From owner-svn-src-user@FreeBSD.ORG Mon Jun 9 17:55:24 2014 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 1B5B752D; Mon, 9 Jun 2014 17:55:24 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 0860D2048; Mon, 9 Jun 2014 17:55:24 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s59HtNsv045600; Mon, 9 Jun 2014 17:55:23 GMT (envelope-from nwhitehorn@svn.freebsd.org) Received: (from nwhitehorn@localhost) by svn.freebsd.org (8.14.8/8.14.8/Submit) id s59HtNU8045599; Mon, 9 Jun 2014 17:55:23 GMT (envelope-from nwhitehorn@svn.freebsd.org) Message-Id: <201406091755.s59HtNU8045599@svn.freebsd.org> From: Nathan Whitehorn Date: Mon, 9 Jun 2014 17:55:23 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r267289 - user/nwhitehorn/condorports X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 09 Jun 2014 17:55:24 -0000 Author: nwhitehorn Date: Mon Jun 9 17:55:23 2014 New Revision: 267289 URL: http://svnweb.freebsd.org/changeset/base/267289 Log: Improve analytics by allowing this to run on a live build rather than just as a port-mortem. Modified: user/nwhitehorn/condorports/dag_status_summary.py Modified: user/nwhitehorn/condorports/dag_status_summary.py ============================================================================== --- user/nwhitehorn/condorports/dag_status_summary.py Mon Jun 9 17:54:53 2014 (r267288) +++ user/nwhitehorn/condorports/dag_status_summary.py Mon Jun 9 17:55:23 2014 (r267289) @@ -1,16 +1,24 @@ -import sys +import sys, os dag = file(sys.argv[1], 'r') -rescue_dag = file(sys.argv[2], 'r') +rescue_dag = None +for i in range(1, 1000): + path = '%s.rescue%03d' % (sys.argv[1], i) + if not os.path.isfile(path): + break + rescue_dag = path +print 'Operating on rescue DAG %s' % rescue_dag +rescue_dag = file(rescue_dag, 'r') dag = dag.readlines() rescue_dag = rescue_dag.readlines() jobs = {} +jobstate = None for line in dag: line = line.split(' ') if line[0] == 'JOB': - jobs[line[1].strip()] = {'done': False, 'children': 0, 'deps': [], 'failed': False} + jobs[line[1].strip()] = {'done': False, 'children': [], 'deps': [], 'failed': False, 'last_state': ''} if line[0] == 'PARENT': deps = [] for dep in line[1:]: @@ -18,39 +26,78 @@ for line in dag: break deps.append(dep) jobs[line[-1].strip()]['deps'] = deps + if line[0] == 'JOBSTATE_LOG': + jobstate = file(line[1].strip(), 'r') for line in rescue_dag: line = line.split(' ') if line[0] == 'DONE': jobs[line[1].strip()]['done'] = True +if jobstate is not None: + jobstate = jobstate.readlines() + for line in jobstate: + line = line.split(' ') + if line[1] == 'INTERNAL': + continue + job = jobs[line[1]] + job['last_state'] = line[2] + if job['last_state'] == 'POST_SCRIPT_FAILURE' or job['last_state'] == 'PRE_SCRIPT_FAILURE': + job['failed'] = True + if job['last_state'] == 'POST_SCRIPT_SUCCESS': + job['done'] = True + job['failed'] = False + jdone = 0 jfailed = 0 -for job in jobs.values(): +for j,job in jobs.iteritems(): if job['done']: jdone += 1 continue deps_met = True for dep in job['deps']: dep = jobs[dep] - dep['children'] += 1 + if j not in dep['children']: + dep['children'].append(j) if not dep['done']: deps_met = False - if deps_met: + job['deps_met'] = deps_met + if deps_met and job['last_state'] == '': job['failed'] = True + if job['failed']: jfailed += 1 print '%d jobs complete' % jdone print '%d jobs failed' % jfailed +def deepchildren(job, children): + j = jobs[job] + for c in j['children']: + if c not in children: + children.append(c) + deepchildren(c, children) + return children + blocking = [] for job in jobs: j = jobs[job] if j['failed']: - blocking.append((job, j['children'])) + blocking.append((job, len(deepchildren(job, [])), len(j['children']), j['last_state'])) + +def missing_deps(port, depth=0): + p = jobs[port] + if p['failed'] or not p['done']: + indent = '\t'*depth + if p['failed']: + print '%s%s failed' % (indent, port) + else: + print '%s%s incomplete' % (indent, port) + for d in p['deps']: + missing_deps(d, depth+1) + blocking.sort(cmp=lambda a,b: cmp(a[1], b[1]), reverse=True) print 'Top ten failed jobs blocking other jobs:' for job in blocking[:10]: - print '\t%s blocking %d dependent jobs' % (job[0], job[1]) + print '\t%s blocking %d dependent jobs (%d directly). Last seen: %s' % job