Date: Tue, 19 Feb 2013 02:37:42 +0000 (UTC) From: Mark Linimon <linimon@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r246971 - user/bugmeister/gnats/tools Message-ID: <201302190237.r1J2bgGH019221@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: linimon (doc,ports committer) Date: Tue Feb 19 02:37:41 2013 New Revision: 246971 URL: http://svnweb.freebsd.org/changeset/base/246971 Log: Ad-hoc script by linimon to rank the goodness of incoming raw data. Added: user/bugmeister/gnats/tools/abuseHandler.py (contents, props changed) Added: user/bugmeister/gnats/tools/abuseHandler.py ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/bugmeister/gnats/tools/abuseHandler.py Tue Feb 19 02:37:41 2013 (r246971) @@ -0,0 +1,189 @@ +"""Go through a set of submitted PRs in a directory and rename + them for manual handling if they meet certain spammy criteria. + Original author: Mark Linimon, Lonesome Dove Computing Services, + www.lonesome.com. License: BSD license. + $FreeBSD$ +""" + +__version__ = "0.02" + +__all__ = ["abuseHandler"] + +import os +import string +import sys + +QUEUE_DIR = "/home/gnats/gnats-queue" + +HEADER_SYNOPSIS = ">Synopsis:" +HEADER_ORGINATOR = ">Orginator:" +HEADER_RELEASE = ">Release:" + +INTERESTING_HEADERS = ( \ + HEADER_SYNOPSIS, \ + HEADER_ORGINATOR, \ + HEADER_RELEASE +) + +ABUSED_HEADERS = ( \ + HEADER_SYNOPSIS, \ + HEADER_ORGINATOR, \ + HEADER_RELEASE +) + +ABUSIVE_ORIGINATORS = ( \ + "Perry Keller", \ +) + +IGNORE_FILE_PREFIX = "." + +LINELEN = 1024 # try to prevent buffer overruns when reading files + +DEBUG_LINE = 0 +DEBUG_VERBOSE = 0 +DEBUG_VERY_VERBOSE = 0 + + +# look for all N headers in ABUSED_HEADERS existing, and having the +# same value. +def checkForAbusiveHeaders( headers ): + + compare = None + for header in ABUSED_HEADERS: + try: + if DEBUG_VERY_VERBOSE: + print "examining header %s" % header + value = headers[ header ] + if compare == None: + compare = value + else: + if compare != value: + if DEBUG_VERBOSE: + print "file is ok: %s != %s" % ( compare, value ) + return False + except: + if DEBUG_VERBOSE: + print "file is ok: no value for %s" % header + return False + + return True + + +def checkForAbusiveOriginators( headers ): + + for originator in ABUSIVE_ORIGINATORS: + try: + if orginator == headers[ HEADER_ORGINATOR ]: + return True + except: + return False + + return False + + +def renamingScheme( infile ): + + # hard-coded by GNATS + return IGNORE_FILE_PREFIX + infile + + +def renameFile( filename ): + + newfilename = renamingScheme( filename ) + try: + os.rename( filename, renamingScheme( filename ) ) + print "suspicious queued PR %s" % filename + \ + " renamed to %s for manual inspection" % newfilename + except Exception, e: + print "could not rename suspected spam PR %s:" % filename + print str( e ) + pass + + +def handleFiles( dir ): + + try: + # dunno why I have to do this, but rename fails with a full pathname + os.chdir( dir ) + filenames = os.listdir( "." ) + for filename in filenames: + if string.find( filename, IGNORE_FILE_PREFIX ) == 0: + if DEBUG_VERBOSE: + print "skippping file %s" % filename + else: + if DEBUG_VERBOSE: + print "handling file %s" % filename + try: + handleFile( filename ) + except Exception, e: + print "could not handle file %s:" % filename + print str( e ) + except Exception, e: + print "could not list directory %s:" % dir + print str( e ) + + +def handleFile( filename ): + + infile = None + try: + infile = file( filename ) + except Exception, e: + print "could not open %s:" % filename + print str( e ) + return + + headers = {} + + while 1: + try: + line = infile.readline( LINELEN ) + if len( line ) == 0: + break + else: + if DEBUG_LINE: + print 'line: ' + line + + tokens = string.split( line ) + if len( tokens ) > 1: + # TODO add more analysis in the future? + header = tokens[ 0 ] + if header in INTERESTING_HEADERS: + headers[ header ] = tokens[ 1 : ] + + # endif len( line ) == 0 + + except EOFError: + break + except IOError, e: + print 'handleFile: IOError:' + print e + break + # end while 1 (read line) + + try: + infile.close() + except: + pass + + # run algorithms and use results to determine whether to rename file + hasAbusiveHeaders = checkForAbusiveHeaders( headers ) + hasAbusiveOriginator = checkForAbusiveOriginators( headers ) + + # TODO add more tests as appropriate in the future + + if hasAbusiveHeaders or hasAbusiveOriginator: + renameFile( filename ) + + +# main + +if __name__ == '__main__': + + dir = QUEUE_DIR + + if len( sys.argv ) > 1: + dir = sys.argv[ 1 ] + + handleFiles( dir ) +
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201302190237.r1J2bgGH019221>