Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 25 May 2012 09:30:17 +0000 (UTC)
From:      Gabor Kovesdan <gabor@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r235987 - head/usr.bin/sort
Message-ID:  <201205250930.q4P9UHk2093518@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gabor
Date: Fri May 25 09:30:16 2012
New Revision: 235987
URL: http://svn.freebsd.org/changeset/base/235987

Log:
  - Only use multi-threading for large files
  - Do not use mmap() by default; it can be enabled by --mmap
  - Add some minor optimizations for -u
  - Update manual page according to the changes
  
  Submitted by:	Oleg Moskalenko <oleg.moskalenko@citrix.com>

Modified:
  head/usr.bin/sort/bwstring.c
  head/usr.bin/sort/file.c
  head/usr.bin/sort/file.h
  head/usr.bin/sort/radixsort.c
  head/usr.bin/sort/sort.1.in
  head/usr.bin/sort/sort.c
  head/usr.bin/sort/sort.h

Modified: head/usr.bin/sort/bwstring.c
==============================================================================
--- head/usr.bin/sort/bwstring.c	Fri May 25 09:27:16 2012	(r235986)
+++ head/usr.bin/sort/bwstring.c	Fri May 25 09:30:16 2012	(r235987)
@@ -499,6 +499,22 @@ bwsfgetln(FILE *f, size_t *len, bool zer
 		}
 		return (bwssbdup(ret, *len));
 
+	} else if (!zero_ended && (MB_CUR_MAX == 1)) {
+		char *ret;
+
+		ret = fgetln(f, len);
+
+		if (ret == NULL) {
+			if (!feof(f))
+				err(2, NULL);
+			return (NULL);
+		}
+		if (*len > 0) {
+			if (ret[*len - 1] == '\n')
+				--(*len);
+		}
+		return (bwscsbdup(ret, *len));
+
 	} else {
 		wchar_t c = 0;
 

Modified: head/usr.bin/sort/file.c
==============================================================================
--- head/usr.bin/sort/file.c	Fri May 25 09:27:16 2012	(r235986)
+++ head/usr.bin/sort/file.c	Fri May 25 09:30:16 2012	(r235987)
@@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
 unsigned long long free_memory = 1000000;
 unsigned long long available_free_memory = 1000000;
 
+bool use_mmap;
+
 const char *tmpdir = "/var/tmp";
 const char *compress_program;
 
@@ -404,23 +406,21 @@ sort_list_dump(struct sort_list *l, cons
 			err(2, NULL);
 
 		if (l->list) {
-			struct sort_list_item *last_printed_item;
 			size_t i;
-
-			last_printed_item = NULL;
-
-			for (i = 0; i < l->count; i++) {
-				struct sort_list_item *item;
-
-				item = l->list[i];
-
-				if (!(sort_opts_vals.uflag) ||
-				    (last_printed_item == NULL) ||
-				    list_coll(&last_printed_item, &item)) {
-					bwsfwrite(item->str, f,
+			if (!(sort_opts_vals.uflag)) {
+				for (i = 0; i < l->count; ++i)
+					bwsfwrite(l->list[i]->str, f,
 					    sort_opts_vals.zflag);
-					if (sort_opts_vals.uflag)
+			} else {
+				struct sort_list_item *last_printed_item = NULL;
+				struct sort_list_item *item;
+				for (i = 0; i < l->count; ++i) {
+					item = l->list[i];
+					if ((last_printed_item == NULL) ||
+					    list_coll(&last_printed_item, &item)) {
+						bwsfwrite(item->str, f, sort_opts_vals.zflag);
 						last_printed_item = item;
+					}
 				}
 			}
 		}
@@ -657,7 +657,7 @@ file_reader_init(const char *fsrc)
 
 	ret->fname = sort_strdup(fsrc);
 
-	if (strcmp(fsrc, "-") && (compress_program == NULL)) {
+	if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) {
 
 		do {
 			struct stat stat_buf;
@@ -1539,7 +1539,9 @@ mt_sort(struct sort_list *list,
     const char* fn)
 {
 #if defined(SORT_THREADS)
-	if (nthreads < 2 || list->count < nthreads) {
+	if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) {
+		size_t nthreads_save = nthreads;
+		nthreads = 1;
 #endif
 		/* if single thread or small data, do simple sort */
 		sort_func(list->list, list->count,
@@ -1547,6 +1549,7 @@ mt_sort(struct sort_list *list,
 		    (int(*)(const void *, const void *)) list_coll);
 		sort_list_dump(list, fn);
 #if defined(SORT_THREADS)
+		nthreads = nthreads_save;
 	} else {
 		/* multi-threaded sort */
 		struct sort_list **parts;
@@ -1590,7 +1593,18 @@ mt_sort(struct sort_list *list,
 			pthread_attr_init(&attr);
 			pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
 
-			pthread_create(&pth, &attr, mt_sort_thread, parts[i]);
+			for (;;) {
+				int res = pthread_create(&pth, &attr,
+				    mt_sort_thread, parts[i]);
+
+				if (res >= 0)
+					break;
+				if (errno == EAGAIN) {
+					pthread_yield();
+					continue;
+				}
+				err(2, NULL);
+			}
 
 			pthread_attr_destroy(&attr);
 		}

Modified: head/usr.bin/sort/file.h
==============================================================================
--- head/usr.bin/sort/file.h	Fri May 25 09:27:16 2012	(r235986)
+++ head/usr.bin/sort/file.h	Fri May 25 09:30:16 2012	(r235987)
@@ -84,6 +84,9 @@ struct file0_reader
 extern unsigned long long free_memory;
 extern unsigned long long available_free_memory;
 
+/* Are we using mmap ? */
+extern bool use_mmap;
+
 /* temporary file dir */
 
 extern const char *tmpdir;

Modified: head/usr.bin/sort/radixsort.c
==============================================================================
--- head/usr.bin/sort/radixsort.c	Fri May 25 09:27:16 2012	(r235986)
+++ head/usr.bin/sort/radixsort.c	Fri May 25 09:30:16 2012	(r235987)
@@ -609,7 +609,17 @@ run_top_sort_level(struct sort_level *sl
 			pthread_attr_setdetachstate(&attr,
 			    PTHREAD_DETACHED);
 
-			pthread_create(&pth, &attr, sort_thread, NULL);
+			for (;;) {
+				int res = pthread_create(&pth, &attr,
+				    sort_thread, NULL);
+				if (res >= 0)
+					break;
+				if (errno == EAGAIN) {
+					pthread_yield();
+					continue;
+				}
+				err(2, NULL);
+			}
 
 			pthread_attr_destroy(&attr);
 		}
@@ -626,6 +636,10 @@ run_sort(struct sort_list_item **base, s
 	struct sort_level *sl;
 
 #if defined(SORT_THREADS)
+	size_t nthreads_save = nthreads;
+	if (nmemb < MT_SORT_THRESHOLD)
+		nthreads = 1;
+
 	if (nthreads > 1) {
 		pthread_mutexattr_t mattr;
 
@@ -663,6 +677,7 @@ run_sort(struct sort_list_item **base, s
 		pthread_mutex_destroy(&g_ls_mutex);
 		pthread_mutex_destroy(&sort_left_mutex);
 	}
+	nthreads = nthreads_save;
 #endif
 }
 

Modified: head/usr.bin/sort/sort.1.in
==============================================================================
--- head/usr.bin/sort/sort.1.in	Fri May 25 09:27:16 2012	(r235986)
+++ head/usr.bin/sort/sort.1.in	Fri May 25 09:30:16 2012	(r235987)
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)sort.1	8.1 (Berkeley) 6/6/93
 .\"
-.Dd May 6, 2012
+.Dd May 25, 2012
 .Dt SORT 1
 .Os
 .Sh NAME
@@ -358,6 +358,9 @@ This sort algorithm cannot be used with
 .Fl u
 and
 .Fl s .
+.It Fl Fl mmap
+Try to use file memory mapping system call.
+It may increase speed in some cases.
 .El
 .Pp
 The following operands are available:

Modified: head/usr.bin/sort/sort.c
==============================================================================
--- head/usr.bin/sort/sort.c	Fri May 25 09:27:16 2012	(r235986)
+++ head/usr.bin/sort/sort.c	Fri May 25 09:30:16 2012	(r235987)
@@ -89,6 +89,7 @@ const char *nlsstr[] = { "",
       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
       "[-o outfile] [--batch-size size] [--files0-from file] "
       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
+      "[--mmap] "
 #if defined(SORT_THREADS)
       "[--nthreads thread_no] "
 #endif
@@ -138,7 +139,8 @@ enum
 	QSORT_OPT,
 	MERGESORT_OPT,
 	HEAPSORT_OPT,
-	RADIXSORT_OPT
+	RADIXSORT_OPT,
+	MMAP_OPT
 };
 
 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
@@ -164,6 +166,7 @@ struct option long_options[] = {
 				{ "key", required_argument, NULL, 'k' },
 				{ "merge", no_argument, NULL, 'm' },
 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
+				{ "mmap", no_argument, NULL, MMAP_OPT },
 				{ "month-sort", no_argument, NULL, 'M' },
 				{ "numeric-sort", no_argument, NULL, 'n' },
 				{ "output", required_argument, NULL, 'o' },
@@ -1063,12 +1066,16 @@ main(int argc, char **argv)
 				tmpdir = sort_strdup(optarg);
 				break;
 			case 't':
-				if (strlen(optarg) > 1) {
-					if (strcmp(optarg, "\\0")) {
+				while (strlen(optarg) > 1) {
+					if (optarg[0] != '\\') {
 						errx(2, "%s: %s\n",
 						    strerror(EINVAL), optarg);
 					}
-					*optarg = 0;
+					optarg += 1;
+					if (*optarg == '0') {
+						*optarg = 0;
+						break;
+					}
 				}
 				sort_opts_vals.tflag = true;
 				sort_opts_vals.field_sep = btowc(optarg[0]);
@@ -1126,6 +1133,9 @@ main(int argc, char **argv)
 			case MERGESORT_OPT:
 				sort_opts_vals.sort_method = SORT_MERGESORT;
 				break;
+			case MMAP_OPT:
+				use_mmap = true;
+				break;
 			case HEAPSORT_OPT:
 				sort_opts_vals.sort_method = SORT_HEAPSORT;
 				break;
@@ -1258,6 +1268,11 @@ main(int argc, char **argv)
 		}
 	}
 
+#if defined(SORT_THREADS)
+	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
+		nthreads = 1;
+#endif
+
 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
 		struct file_list fl;
 		struct sort_list list;

Modified: head/usr.bin/sort/sort.h
==============================================================================
--- head/usr.bin/sort/sort.h	Fri May 25 09:27:16 2012	(r235986)
+++ head/usr.bin/sort/sort.h	Fri May 25 09:30:16 2012	(r235987)
@@ -55,6 +55,7 @@ extern nl_catd catalog;
 extern const char *nlsstr[];
 
 #if defined(SORT_THREADS)
+#define MT_SORT_THRESHOLD (10000)
 extern size_t ncpu;
 extern size_t nthreads;
 #endif



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201205250930.q4P9UHk2093518>