Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 01 Aug 2012 21:32:53 +0000
From:      tzabal@FreeBSD.org
To:        svn-soc-all@FreeBSD.org
Subject:   socsvn commit: r240000 - soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd
Message-ID:  <20120801213253.4B31A106566B@hub.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: tzabal
Date: Wed Aug  1 21:32:52 2012
New Revision: 240000
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=240000

Log:
  The phases receive-check-store are complete. A basic testing and debugging has occured for these phases. A clean up of the code will take place in the near future.

Modified:
  soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd/crashreportd.py

Modified: soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd/crashreportd.py
==============================================================================
--- soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd/crashreportd.py	Wed Aug  1 19:27:12 2012	(r239999)
+++ soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd/crashreportd.py	Wed Aug  1 21:32:52 2012	(r240000)
@@ -1,123 +1,385 @@
 #!/usr/local/bin/python -tt
+"""This module is the software that implements the server side part of the
+Automated Kernel Crash Reporting System.
+"""
 
+import argparse
+import hashlib
+import logging
 import os
+import random
 import re
+import smtplib
+import string
+from StringIO import StringIO
 import time
 import tarfile
 
-import argparse
-import logging
+from lxml import etree
+import psycopg2
 
+from email.mime.text import MIMEText
 
 # Module Variables
 _interval_time = 10
 _pid_file = '/var/run/crashreportd.pid'
 _crashreports_dir = '/var/spool/crashreports'
-_logging_file = ''
+_auxiliary_dir = '/tmp/crashreports'
+_logging_file = '/home/tzabal/crashreportd.log'
+_conn = ''
+_curs = ''
 
 
-def valid_report_name(report):
-    """Checks if the filename matches the pattern of a valid crash report name."""
-    filename = os.path.basename(report)
-    match_obj = re.match('^crashreport\.[A-Za-z0-9]{6}\.tar\.gz$', filename)
+class CrashReport(object):
+    """This class represents a crash report."""
     
-    if not match_obj:
-        logging.info('Invalid crash report name: %s' % filename)
-        return
+    valid_name = re.compile('^crashreport\.[A-Za-z0-9]{6}\.tar\.gz$')
     
-    return True
+    
+    def __init__(self, path):
+        name = os.path.basename(path)
+        
+        self.path = path
+        self.name = name
+        self.data = CrashData(name)
+    
+    
+    def has_valid_name(self):
+        """Returns True is the report's name matches the name of a valid crash
+        report. Otherwise it returns implicit False."""
+        match = re.match(self.__class__.valid_name, self.name)
+        
+        if not match:
+            return
+        
+        return True
+    
+    
+    def has_valid_type(self):
+        """Returns True if the report's file type matches the file type of a
+        valid crash report. Otherwise it returns implicit False."""
+        if not tarfile.is_tarfile(self.path):
+            return
+        
+        try:
+            tarfileobj = tarfile.open(self.path, 'r:gz')
+        except tarfile.ReadError:
+            return
+        except tarfile.CompressionError:
+            return
+        finally:
+            tarfileobj.close()
+        
+        return True
+    
+    
+    def has_valid_contents_number(self):
+        """Returns True is the report contains the same number of files that a
+        valid crash report has. Othewise it returns implicit False."""
+        try:
+            tarfileobj = tarfile.open(self.path, 'r:gz')
+        except tarfile.ReadError:
+            return
+        except tarfile.CompressionError:
+            return
+        else:
+            contents_list = tarfileobj.getnames()
+            if not len(contents_list) == 1:
+                return
+        finally:
+            tarfileobj.close()
+        
+        return True
+
+
+class CrashData(object):
+    """This class represents the crash data that a crash report contains."""
+    
+    valid_name = re.compile('^crashreport\.[A-Za-z0-9]{6}\.xml$')
+    
+    
+    def __init__(self, reportname):
+        name = re.sub('tar.gz', 'xml', reportname)
+        
+        self.path = _auxiliary_dir + '/' + name
+        self.name = name
+        self.info = {}
+        self.commands = {}
+    
+    
+    def has_valid_name(self):
+        """Returns True if the report's crash data name matches the name of a
+        valid crash data. Otherwise it returns implicit False."""
+        if not os.path.isfile(self.path):
+            return
+        
+        self.name = os.path.basename(self.path)
+        
+        match = re.match(self.__class__.valid_name, self.name)
+        
+        if not match:
+            return
+        
+        return True
+    
+    def has_valid_crashdata(self):
+        """Returns True if the crash data is a well formed and valid XML file.
+        Otherwise implicit False."""
+        dtdfile = StringIO("""<!ELEMENT crashreport (header, body)>
+                           <!ELEMENT header (email)>
+                           <!ELEMENT email (#PCDATA)>
+                           <!ELEMENT body (command+)>
+                           <!ELEMENT command (name, result)>
+                           <!ELEMENT name (#PCDATA)>
+                           <!ELEMENT result (#PCDATA)>""")
+        
+        try:
+            elemtree = etree.parse(self.path)
+        except:
+            logging.info('%s is not a well formed crash report data.' %
+                         (self.path))
+            return
+        else:
+            dtd = etree.DTD(dtdfile)
+            if not dtd.validate(elemtree):
+                logging.info('%s is not a valid crash report data.' %
+                             (self.path))
+                return
+        
+        return True
 
 
-def valid_report_type(report):
-    """Check if the report's file type matches the file type of a valid crash report."""
-    if not tarfile.is_tarfile(report):
-        logging.info('Report %s is not a tar file.' % report)
-        return
+
+def send_confirmation_email(report):
+    sender = 'Automated Kernel Crash Reporting System <akcrs@freebsd.org>'
+    #receiver = report.data.info['email']
+    receiver = 'invalid@it.teithe.gr'
+    subject = 'Kernel Crash Report Confirmation'
+    text = 'Confirm your kernel crash report by clicking here.'
+    smtpserver = 'smtp.hol.gr'
+    
+    message = MIMEText(text)
+    message['From'] = sender
+    message['To'] = receiver
+    message['Subject'] = subject
+    
+    #print message
     
     try:
-        tarfile_obj = tarfile.open(report, 'r:gz')
-    except ReadError:
-        logging.info('ReadError exception.')
-        return
-    except CompressionError:
-        logging.info('CompressionError exception')
+        smtpconn = smtplib.SMTP(smtpserver)
+        smtpconn.sendmail(sender, receiver, message.as_string())
+    except smtplib.SMTPException, err:
+        logging.info(err)
         return
     finally:
-        tarfile_obj.close()
+        smtpconn.quit()
+    
+    return True
+
+
+def parse_crashdata(report):
+    """Parses the crash data XML file of the given report and store the data in
+    variable instances of the report."""
+    validnames = ['crashtype', 'crashdate', 'hostname', 'ostype', 'osrelease',
+                  'version', 'machine', 'panic', 'backtrace', 'ps_axl',
+                  'vmstat_s', 'vmstat_m', 'vmstat_z', 'vmstat_i', 'pstat_T',
+                  'pstat_s', 'iostat', 'ipcs_a', 'ipcs_T', 'nfsstat',
+                  'netstat_s', 'netstat_m', 'netstat_id', 'netstat_anr',
+                  'netstat_anA', 'netstat_aL', 'fstat', 'dmesg', 'kernelconfig',
+                  'ddbcapturebuffer']
+    
+    elemtree = etree.parse(report.data.path)
+    root = elemtree.getroot()
+    
+    report.data.info['email'] = re.sub(r'\s', '', root[0][0].text)
+    
+    for elem in elemtree.iter():
+        if elem.tag == 'command':
+            children = list(elem)
+            #print 'children[0].text: %s' % (children[0].text)
+            name = re.sub(r'\s', '', children[0].text)
+            result = children[1].text
+            if name in validnames:
+                #print 'name: %s' % (name)
+                report.data.commands[name] = result
     
     return True
 
 
-def valid_contents_number(report):
-    """Checks if the report contains the same number of files as a valid report."""
+def generate_password():
+    """Generates and returns a random password.
+    
+    Password is 8 characters in length and it may contain digits, lowercase and
+    uppercase letters.
+    """
+    size = 8
+    chars = string.letters + string.digits
+    return ''.join(random.choice(chars) for ch in range(size))
+
+
+def store_report(report):
+    query = """SELECT id FROM Submitters WHERE email = %s"""
+    values = (report.data.info['email'], )
+    
     try:
-        tarfile_obj = tarfile.open(report, 'r:gz')
-    except ReadError:
-        logging.info('ReadError exception.')
-        return
-    except CompressionError:
-        logging.info('CompressionError exception')
-        return
+        _curs.execute(query, values)
+    except Exception, err:
+        logging.info(err)
+    
+    if _curs.rowcount:
+        submitter_id = _curs.fetchone()
+        print 'Submitter_id: %s' % (submitter_id)
     else:
-        contents_list = tarfile_obj.getnames()
-        if not len(contents_list) == 1:
-            logging.info('Invalid number of files inside the crash report.')
-            return
-    finally:
-        tarfile_obj.close()
+        password = generate_password()
+        print 'password: %s' % (password)
+        hashobj = hashlib.sha256()
+        hashobj.update(password)
+        hashpass = hashobj.hexdigest()
+    
+        query = """INSERT INTO Submitters (email, password)
+        VALUES (%s, %s) RETURNING id;"""
+        values = (report.data.info['email'], hashpass)
+    
+        try:
+            _curs.execute(query, values)
+        except Exception, err:
+            logging.info(err)
+        else:
+            submitter_id = _curs.fetchone()
+            _conn.commit()
+    
+    # -1 declares that the report has not analyzed yet for similarities
+    # with previously logged reports
+    bug_id = -1
+    
+    query = """INSERT INTO Reports (bug_id, submitter_id, crashtype, crashdate,
+    hostname, ostype, osrelease, version, machine, panic, backtrace, ps_axl,
+    vmstat_s, vmstat_m, vmstat_z, vmstat_i, pstat_T, pstat_s, iostat, ipcs_a,
+    ipcs_T, nfsstat, netstat_s, netstat_m, netstat_id, netstat_anr,
+    netstat_anA, netstat_aL, fstat, dmesg, kernelconfig, ddbcapturebuffer)
+    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
+    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
+    
+    values = (bug_id,
+              submitter_id,
+              report.data.commands['crashtype'],
+              report.data.commands['crashdate'],
+              report.data.commands['hostname'],
+              report.data.commands['ostype'],
+              report.data.commands['osrelease'],
+              report.data.commands['version'],
+              report.data.commands['machine'],
+              report.data.commands['panic'],
+              report.data.commands['backtrace'],
+              report.data.commands['ps_axl'],
+              report.data.commands['vmstat_s'],
+              report.data.commands['vmstat_m'],
+              report.data.commands['vmstat_z'],
+              report.data.commands['vmstat_i'],
+              report.data.commands['pstat_T'],
+              report.data.commands['pstat_s'],
+              report.data.commands['iostat'],
+              report.data.commands['ipcs_a'],
+              report.data.commands['ipcs_T'],
+              report.data.commands['nfsstat'],
+              report.data.commands['netstat_s'],
+              report.data.commands['netstat_m'],
+              report.data.commands['netstat_id'],
+              report.data.commands['netstat_anr'],
+              report.data.commands['netstat_anA'],
+              report.data.commands['netstat_aL'],
+              report.data.commands['fstat'],
+              report.data.commands['dmesg'],
+              report.data.commands['kernelconfig'],
+              report.data.commands['ddbcapturebuffer'])
+    
+    try:
+        _curs.execute(query, values)
+    except Exception, err:
+        logging.info(err)
+        logging.info('Query failed.')
+    else:
+        _conn.commit()
     
     return True
 
 
-def valid_data_name(report):
-    """Checks if the filename matches the pattern of a valid crash data name."""
+def extract_report(report):
+    """Extracts the given report to the auxiliary directory."""
+    if not os.path.isdir(_auxiliary_dir):
+        logging.info('Auxiliary directory does not exist')
+        return
+    
     try:
-        tarfile_obj = tarfile.open(report, 'r:gz')
-    except ReadError:
-        logging.info('ReadError exception.')
+        tarfileobj = tarfile.open(report, 'r:gz')
+        tarfileobj.extractall(_auxiliary_dir)
+    except tarfile.ReadError:
         return
-    except CompressionError:
-        logging.info('CompressionError exception')
+    except tarfile.CompressionError:
         return
-    else:
-        contents_list = tarfile_obj.getnames()
-        match_obj = re.match('^crashreport\.[A-Za-z0-9]{6}\.xml$', contents_list[0])
     finally:
-        tarfile_obj.close()
-    
-    if not match_obj:
-        logging.info('Invalid crash data name: %s' % contents_list[0])
-        return
+        tarfileobj.close()
     
     return True
 
 
 def discard_report(report):
-    """Discard a crash report from the system."""
-    os.remove(report)
+    """Discards a crash report from the system."""
+    os.remove(report.path)
 
 
 def check_report(report):
-    """Container function that calls all the functions related to validity."""
-    if not valid_report_name(report):
-        discard_report(report)
+    """Checks a crash report for validity and security.
+    
+    It is a function that calls all the methods provided by the CrashReport and
+    the CrashData objects that are related with the validity of a report. The
+    methods are called with a stict order because some methods assign values
+    to the instance variables of the given object and some other methods depend
+    on them. This is done in order to avoid execution of the same code multiple
+    times and distinguish the checks easily.
+    """
+    if not report.has_valid_name():
+        logging.info('Invalid crash report name: %s' % report.path)
         return
     
-    if not valid_report_type(report):
-        discard_report(report)
+    if not report.has_valid_type():
+        logging.info('Invalid crash report type: %s' % report.path)
         return
     
-    if not valid_contents_number(report):
-        discard_report(report)
+    if not report.has_valid_contents_number():
+        logging.info('Invalid number of contents in crash report %s' %
+                     report.path)
         return
     
-    if not valid_data_name(report):
-        discard_report(report)
+    if not extract_report(report.path):
+        logging.info('Error occured while extract the report %s' % report.path)
+        return
+    
+    if not report.data.has_valid_name():
+        logging.info('Invalid crash data name: %s' % report.data.path)
+        return
+    
+    if not report.data.has_valid_crashdata():
+        logging.info('Invalid crash data XML file: %s' % report.data.path)
+        return
+    
+    if not parse_crashdata(report):
         return
     
     return True
 
 
+def connect_database():
+    global _conn, _curs
+    
+    try:
+        _conn = psycopg2.connect(database='akcrsdb', user='akcrs', password='freebsd')
+    except:
+        logging.error('Could not connect to the database')
+    else:
+        _curs = _conn.cursor()
+
+
 def create_pid_file():
     """Creates the Process ID file that contains the PID of crashreportd.
     
@@ -125,29 +387,50 @@
     """
     pid = os.getpid()
     try:
-        file_obj = open(_pid_file, 'w')
-        file_obj.write(str(pid))
-        file_obj.close()
+        pidfile = open(_pid_file, 'w')
+        pidfile.write(str(pid))
+        pidfile.close()
     except IOError:
         return
     finally:
-        file_obj.close()
+        pidfile.close()
     
     return True
 
 
+def log():
+    """Turns on or off the logging facility."""
+    global _logging_file
+    if _logging_file:
+        logging.basicConfig(level=logging.DEBUG, filename=_logging_file,
+                            format='%(asctime)s in %(funcName)s() at '
+                            '%(lineno)s %(levelname)s: %(message)s',
+                            datefmt='%Y-%m-%d %H:%M:%S')
+
+
 def parse_arguments():
-    """Parse the command line arguments provided."""
+    """Parses the command line arguments."""
     parser = argparse.ArgumentParser()
-    parser.add_argument('-d', help='the directory where crash reports arrive in the system', dest='crashreports_dir')
-    parser.add_argument('-p', help='the file that stores the process id of crashreportd', dest='pid_file')
-    parser.add_argument('-t', type=int, help='the minimum delay between two checks of the crash reports directory', dest='interval_time')
-    parser.add_argument('-l', help='the file that various log messages will be stored (enables logging)', dest='logging_file')
+    parser.add_argument('-d', help='the directory where crash reports arrive '
+                        'in the system', dest='crashreports_dir')
+    parser.add_argument('-a', help='an auxiliary directory used for various '
+                        'actions', dest='auxiliary_dir')
+    parser.add_argument('-p', help='the file that stores the process id of '
+                        'this program', dest='pid_file')
+    parser.add_argument('-t', type=int, help='the minimum delay between two '
+                        'checks of the crash reports directory',
+                        dest='interval_time')
+    parser.add_argument('-l', help='the file that various log messages will be '
+                        'stored (implicitly enables logging)',
+                        dest='logging_file')
     args = parser.parse_args()
     
     if args.crashreports_dir:
         global _crashreports_dir
         _crashreports_dir = args.crashreports_dir
+    if args.auxiliary_dir:
+        global _auxiliary_dir
+        _auxiliary_dir = args.auxiliary_dir
     if args.pid_file:
         global _pid_file
         _pid_file = args.pid_file
@@ -159,57 +442,28 @@
         _logging_file = args.logging_file
 
 
-def log():
-    """Turns on or off the logging facility."""
-    global _logging_file
-    if _logging_file:
-        logging.basicConfig(level=logging.DEBUG, filename=_logging_file,
-                            format='%(asctime)s %(funcName)s() %(levelname)s: %(message)s',
-                            datefmt='%Y-%m-%d %H:%M:%S')
-
-
 def main():
-    """The infinite loop of the daemon.
-    
-    It is the starting point of the program when it is invoked directly. Here
-    lives the infinite loop that checks the directory that crash reports arrive
-    in the server. For every new crash report, a number of actions are executed.
-    """
     parse_arguments()
     log()
     create_pid_file()
+    connect_database()
+    logging.info('========================')
     while True:
         dirlist = os.listdir(_crashreports_dir)
         for filename in dirlist:
-            report = _crashreports_dir + '/' + filename
-            check_report(report)
-        time.sleep(_interval_time)
+            path = _crashreports_dir + '/' + filename
+            report = CrashReport(path)
+            if not check_report(report):
+                print 'Failed in check report'
+                #discard_report(report)
+                continue
+            if not store_report(report):
+                print 'Failed in store report'
+            if not send_confirmation_email(report):
+                print 'Failed in sending confirmation email'
+        #time.sleep(_interval_time)
+        break
 
 
 if __name__ == '__main__':
     main()
-    
-#def extract_report(filename):
-#    if not os.path.isdir(extraction_dir):
-#        logging.debug()
-#        return False
-#    
-#    try:
-#        tarfile_obj = tarfile.open(report, 'r:gz')
-#        tarfile_obj.extractall(extraction_dir);
-#        except ReadError:
-#        logging.debug()
-#        return
-#    except CompressionError:
-#       logging.debug()
-#       return
-#    
-#    dirlist = os.listdir(extraction_dir)
-#    if not len(dirlist) == 1:
-#       logging.debug()
-#       return False
-#    
-#    data_name = dirlist[0]
-#    #print "data_name: ", data_name
-#    
-#    return True
\ No newline at end of file



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120801213253.4B31A106566B>