Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 10 Oct 2016 19:09:35 +0000 (UTC)
From:      Ed Maste <emaste@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r307003 - head/contrib/mdocml
Message-ID:  <201610101909.u9AJ9ZH0022085@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: emaste
Date: Mon Oct 10 19:09:35 2016
New Revision: 307003
URL: https://svnweb.freebsd.org/changeset/base/307003

Log:
  makewhatis: make output reproducible
  
  The mandoc search database generation uses each page's inode number as
  a hash key to index hard linked pages only once.  However, it also
  processed the pages ordered by hash key resulting in effectively non-
  deterministic output.
  
  Instead:
  
  1) provide fts_open() with a comparison function to process directories
     and files in a deterministic order
  2) in addition to the existing hash, insert pages into a linked list
     which will be sorted (by virtue of 1)
  3) iterate over pages by the list in 2, instead of hash order
  
  I will work on upstreaming this change.
  
  Reviewed by:	bapt
  MFC after:	1 month
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D8213

Modified:
  head/contrib/mdocml/mandocdb.c

Modified: head/contrib/mdocml/mandocdb.c
==============================================================================
--- head/contrib/mdocml/mandocdb.c	Mon Oct 10 18:36:26 2016	(r307002)
+++ head/contrib/mdocml/mandocdb.c	Mon Oct 10 19:09:35 2016	(r307003)
@@ -103,6 +103,7 @@ struct	mpage {
 	char		*arch;    /* architecture from file content */
 	char		*title;   /* title from file content */
 	char		*desc;    /* description from file content */
+	struct mpage	*next;    /* singly linked list */
 	struct mlink	*mlinks;  /* singly linked list */
 	int		 form;    /* format from file content */
 	int		 name_head_done;
@@ -146,6 +147,7 @@ static	void	 dbadd_mlink_name(const stru
 static	int	 dbopen(int);
 static	void	 dbprune(void);
 static	void	 filescan(const char *);
+static	int	 fts_compare(const FTSENT *const *, const FTSENT *const *);
 static	void	 mlink_add(struct mlink *, const struct stat *);
 static	void	 mlink_check(struct mpage *, struct mlink *);
 static	void	 mlink_free(struct mlink *);
@@ -204,6 +206,7 @@ static	struct ohash	 strings; /* table o
 static	sqlite3		*db = NULL; /* current database */
 static	sqlite3_stmt	*stmts[STMT__MAX]; /* current statements */
 static	uint64_t	 name_mask;
+static	struct mpage	*mpage_head;
 
 static	const struct mdoc_handler mdocs[MDOC_MAX] = {
 	{ NULL, 0 },  /* Ap */
@@ -571,6 +574,20 @@ usage:
 	return (int)MANDOCLEVEL_BADARG;
 }
 
+static int
+fts_compare(const FTSENT *const *a, const FTSENT *const *b)
+{
+
+	/*
+	 * The mpage list is processed in the opposite order to which pages are
+	 * added, so traverse the hierarchy in reverse alpha order, resulting
+	 * in database inserts in alpha order. This is not required for correct
+	 * operation, but is helpful when inspecting the database during
+	 * development.
+	 */
+	return -strcmp((*a)->fts_name, (*b)->fts_name);
+}
+
 /*
  * Scan a directory tree rooted at "basedir" for manpages.
  * We use fts(), scanning directory parts along the way for clues to our
@@ -600,8 +617,8 @@ treescan(void)
 	argv[0] = ".";
 	argv[1] = (char *)NULL;
 
-	f = fts_open((char * const *)argv,
-	    FTS_PHYSICAL | FTS_NOCHDIR, NULL);
+	f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR,
+	    fts_compare);
 	if (f == NULL) {
 		exitcode = (int)MANDOCLEVEL_SYSERR;
 		say("", "&fts_open");
@@ -966,6 +983,8 @@ mlink_add(struct mlink *mlink, const str
 		mpage = mandoc_calloc(1, sizeof(struct mpage));
 		mpage->inodev.st_ino = inodev.st_ino;
 		mpage->inodev.st_dev = inodev.st_dev;
+		mpage->next = mpage_head;
+		mpage_head = mpage;
 		ohash_insert(&mpages, slot, mpage);
 	} else
 		mlink->next = mpage->mlinks;
@@ -989,20 +1008,18 @@ mpages_free(void)
 {
 	struct mpage	*mpage;
 	struct mlink	*mlink;
-	unsigned int	 slot;
 
-	mpage = ohash_first(&mpages, &slot);
-	while (NULL != mpage) {
+	while (NULL != (mpage = mpage_head)) {
 		while (NULL != (mlink = mpage->mlinks)) {
 			mpage->mlinks = mlink->next;
 			mlink_free(mlink);
 		}
+		mpage_head = mpage->next;
 		free(mpage->sec);
 		free(mpage->arch);
 		free(mpage->title);
 		free(mpage->desc);
 		free(mpage);
-		mpage = ohash_next(&mpages, &slot);
 	}
 }
 
@@ -1123,16 +1140,14 @@ mpages_merge(struct mparse *mp)
 	char			*sodest;
 	char			*cp;
 	int			 fd;
-	unsigned int		 pslot;
 
 	if ( ! nodb)
 		SQL_EXEC("BEGIN TRANSACTION");
 
-	mpage = ohash_first(&mpages, &pslot);
-	while (mpage != NULL) {
+	for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) {
 		mlinks_undupe(mpage);
 		if ((mlink = mpage->mlinks) == NULL) {
-			mpage = ohash_next(&mpages, &pslot);
+			mpage = mpage->next;
 			continue;
 		}
 
@@ -1256,7 +1271,6 @@ mpages_merge(struct mparse *mp)
 nextpage:
 		ohash_delete(&strings);
 		ohash_delete(&names);
-		mpage = ohash_next(&mpages, &pslot);
 	}
 
 	if (0 == nodb)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201610101909.u9AJ9ZH0022085>