#!/usr/bin/env python

# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License");
#    you may not use this file except in compliance with the License.
#    You may obtain a copy of the License at
#
#        http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS,
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#    See the License for the specific language governing permissions and
#    limitations under the License.

"""
Tools for working with logs generated by nova components
"""


import json
import os
import re
import sys


class Request(object):
    def __init__(self):
        self.time = ""
        self.host = ""
        self.logger = ""
        self.message = ""
        self.trace = ""
        self.env = ""
        self.request_id = ""

    def add_error_line(self, error_line):
        self.time = " ".join(error_line.split(" ")[:3])
        self.host = error_line.split(" ")[3]
        self.logger = error_line.split("(")[1].split(" ")[0]
        self.request_id = error_line.split("[")[1].split(" ")[0]
        error_lines = error_line.split("#012")
        self.message = self.clean_log_line(error_lines.pop(0))
        self.trace = "\n".join([self.clean_trace(l) for l in error_lines])

    def add_environment_line(self, env_line):
        self.env = self.clean_env_line(env_line)

    def clean_log_line(self, line):
        """Remove log format for time, level, etc: split after context"""
        return line.split('] ')[-1]

    def clean_env_line(self, line):
        """Also has an 'Environment: ' string in the message"""
        return re.sub(r'^Environment: ', '', self.clean_log_line(line))

    def clean_trace(self, line):
        """trace has a different format, so split on TRACE:"""
        return line.split('TRACE: ')[-1]

    def to_dict(self):
        return {'traceback': self.trace, 'message': self.message,
                'host': self.host, 'env': self.env, 'logger': self.logger,
                'request_id': self.request_id}


class LogReader(object):
    def __init__(self, filename):
        self.filename = filename
        self._errors = {}

    def process(self, spooldir):
        with open(self.filename) as f:
            line = f.readline()
            while len(line) > 0:
                parts = line.split(" ")
                level = (len(parts) < 6) or parts[5]
                if level == 'ERROR':
                    self.handle_logged_error(line)
                elif level == '[-]' and self.last_error:
                    # twisted stack trace line
                    clean_line = " ".join(line.split(" ")[6:])
                    self.last_error.trace = self.last_error.trace + clean_line
                else:
                    self.last_error = None
                line = f.readline()
        self.update_spool(spooldir)

    def handle_logged_error(self, line):
        request_id = re.search(r' \[([A-Z0-9\-/]+)', line)
        if not request_id:
            raise Exception("Unable to parse request id from %s" % line)
        request_id = request_id.group(1)
        data = self._errors.get(request_id, Request())
        if self.is_env_line(line):
            data.add_environment_line(line)
        elif self.is_error_line(line):
            data.add_error_line(line)
        else:
            # possibly error from twsited
            data.add_error_line(line)
        self.last_error = data
        self._errors[request_id] = data

    def is_env_line(self, line):
        return re.search('Environment: ', line)

    def is_error_line(self, line):
        return re.search('raised', line)

    def update_spool(self, directory):
        processed_dir = "%s/processed" % directory
        self._ensure_dir_exists(processed_dir)
        for rid, value in self._errors.iteritems():
            if not self.has_been_processed(processed_dir, rid):
                with open("%s/%s" % (directory, rid), "w") as spool:
                    spool.write(json.dumps(value.to_dict()))
        self.flush_old_processed_spool(processed_dir)

    def _ensure_dir_exists(self, d):
        mkdir = False
        try:
            os.stat(d)
        except:
            mkdir = True
        if mkdir:
            os.mkdir(d)

    def has_been_processed(self, processed_dir, rid):
        rv = False
        try:
            os.stat("%s/%s" % (processed_dir, rid))
            rv = True
        except:
            pass
        return rv

    def flush_old_processed_spool(self, processed_dir):
        keys = self._errors.keys()
        procs = os.listdir(processed_dir)
        for p in procs:
            if p not in keys:
                # log has rotated and the old error won't be seen again
                os.unlink("%s/%s" % (processed_dir, p))

if __name__ == '__main__':
    filename = '/var/log/nova.log'
    spooldir = '/var/spool/nova'
    if len(sys.argv) > 1:
        filename = sys.argv[1]
    if len(sys.argv) > 2:
        spooldir = sys.argv[2]
    LogReader(filename).process(spooldir)
