From owner-svn-src-head@FreeBSD.ORG Fri May 25 09:30:17 2012 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 8DBC3106566B; Fri, 25 May 2012 09:30:17 +0000 (UTC) (envelope-from gabor@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 790318FC16; Fri, 25 May 2012 09:30:17 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q4P9UHSh093526; Fri, 25 May 2012 09:30:17 GMT (envelope-from gabor@svn.freebsd.org) Received: (from gabor@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q4P9UHk2093518; Fri, 25 May 2012 09:30:17 GMT (envelope-from gabor@svn.freebsd.org) Message-Id: <201205250930.q4P9UHk2093518@svn.freebsd.org> From: Gabor Kovesdan Date: Fri, 25 May 2012 09:30:17 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r235987 - head/usr.bin/sort X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 25 May 2012 09:30:17 -0000 Author: gabor Date: Fri May 25 09:30:16 2012 New Revision: 235987 URL: http://svn.freebsd.org/changeset/base/235987 Log: - Only use multi-threading for large files - Do not use mmap() by default; it can be enabled by --mmap - Add some minor optimizations for -u - Update manual page according to the changes Submitted by: Oleg Moskalenko Modified: head/usr.bin/sort/bwstring.c head/usr.bin/sort/file.c head/usr.bin/sort/file.h head/usr.bin/sort/radixsort.c head/usr.bin/sort/sort.1.in head/usr.bin/sort/sort.c head/usr.bin/sort/sort.h Modified: head/usr.bin/sort/bwstring.c ============================================================================== --- head/usr.bin/sort/bwstring.c Fri May 25 09:27:16 2012 (r235986) +++ head/usr.bin/sort/bwstring.c Fri May 25 09:30:16 2012 (r235987) @@ -499,6 +499,22 @@ bwsfgetln(FILE *f, size_t *len, bool zer } return (bwssbdup(ret, *len)); + } else if (!zero_ended && (MB_CUR_MAX == 1)) { + char *ret; + + ret = fgetln(f, len); + + if (ret == NULL) { + if (!feof(f)) + err(2, NULL); + return (NULL); + } + if (*len > 0) { + if (ret[*len - 1] == '\n') + --(*len); + } + return (bwscsbdup(ret, *len)); + } else { wchar_t c = 0; Modified: head/usr.bin/sort/file.c ============================================================================== --- head/usr.bin/sort/file.c Fri May 25 09:27:16 2012 (r235986) +++ head/usr.bin/sort/file.c Fri May 25 09:30:16 2012 (r235987) @@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$"); unsigned long long free_memory = 1000000; unsigned long long available_free_memory = 1000000; +bool use_mmap; + const char *tmpdir = "/var/tmp"; const char *compress_program; @@ -404,23 +406,21 @@ sort_list_dump(struct sort_list *l, cons err(2, NULL); if (l->list) { - struct sort_list_item *last_printed_item; size_t i; - - last_printed_item = NULL; - - for (i = 0; i < l->count; i++) { - struct sort_list_item *item; - - item = l->list[i]; - - if (!(sort_opts_vals.uflag) || - (last_printed_item == NULL) || - list_coll(&last_printed_item, &item)) { - bwsfwrite(item->str, f, + if (!(sort_opts_vals.uflag)) { + for (i = 0; i < l->count; ++i) + bwsfwrite(l->list[i]->str, f, sort_opts_vals.zflag); - if (sort_opts_vals.uflag) + } else { + struct sort_list_item *last_printed_item = NULL; + struct sort_list_item *item; + for (i = 0; i < l->count; ++i) { + item = l->list[i]; + if ((last_printed_item == NULL) || + list_coll(&last_printed_item, &item)) { + bwsfwrite(item->str, f, sort_opts_vals.zflag); last_printed_item = item; + } } } } @@ -657,7 +657,7 @@ file_reader_init(const char *fsrc) ret->fname = sort_strdup(fsrc); - if (strcmp(fsrc, "-") && (compress_program == NULL)) { + if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) { do { struct stat stat_buf; @@ -1539,7 +1539,9 @@ mt_sort(struct sort_list *list, const char* fn) { #if defined(SORT_THREADS) - if (nthreads < 2 || list->count < nthreads) { + if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) { + size_t nthreads_save = nthreads; + nthreads = 1; #endif /* if single thread or small data, do simple sort */ sort_func(list->list, list->count, @@ -1547,6 +1549,7 @@ mt_sort(struct sort_list *list, (int(*)(const void *, const void *)) list_coll); sort_list_dump(list, fn); #if defined(SORT_THREADS) + nthreads = nthreads_save; } else { /* multi-threaded sort */ struct sort_list **parts; @@ -1590,7 +1593,18 @@ mt_sort(struct sort_list *list, pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED); - pthread_create(&pth, &attr, mt_sort_thread, parts[i]); + for (;;) { + int res = pthread_create(&pth, &attr, + mt_sort_thread, parts[i]); + + if (res >= 0) + break; + if (errno == EAGAIN) { + pthread_yield(); + continue; + } + err(2, NULL); + } pthread_attr_destroy(&attr); } Modified: head/usr.bin/sort/file.h ============================================================================== --- head/usr.bin/sort/file.h Fri May 25 09:27:16 2012 (r235986) +++ head/usr.bin/sort/file.h Fri May 25 09:30:16 2012 (r235987) @@ -84,6 +84,9 @@ struct file0_reader extern unsigned long long free_memory; extern unsigned long long available_free_memory; +/* Are we using mmap ? */ +extern bool use_mmap; + /* temporary file dir */ extern const char *tmpdir; Modified: head/usr.bin/sort/radixsort.c ============================================================================== --- head/usr.bin/sort/radixsort.c Fri May 25 09:27:16 2012 (r235986) +++ head/usr.bin/sort/radixsort.c Fri May 25 09:30:16 2012 (r235987) @@ -609,7 +609,17 @@ run_top_sort_level(struct sort_level *sl pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED); - pthread_create(&pth, &attr, sort_thread, NULL); + for (;;) { + int res = pthread_create(&pth, &attr, + sort_thread, NULL); + if (res >= 0) + break; + if (errno == EAGAIN) { + pthread_yield(); + continue; + } + err(2, NULL); + } pthread_attr_destroy(&attr); } @@ -626,6 +636,10 @@ run_sort(struct sort_list_item **base, s struct sort_level *sl; #if defined(SORT_THREADS) + size_t nthreads_save = nthreads; + if (nmemb < MT_SORT_THRESHOLD) + nthreads = 1; + if (nthreads > 1) { pthread_mutexattr_t mattr; @@ -663,6 +677,7 @@ run_sort(struct sort_list_item **base, s pthread_mutex_destroy(&g_ls_mutex); pthread_mutex_destroy(&sort_left_mutex); } + nthreads = nthreads_save; #endif } Modified: head/usr.bin/sort/sort.1.in ============================================================================== --- head/usr.bin/sort/sort.1.in Fri May 25 09:27:16 2012 (r235986) +++ head/usr.bin/sort/sort.1.in Fri May 25 09:30:16 2012 (r235987) @@ -33,7 +33,7 @@ .\" .\" @(#)sort.1 8.1 (Berkeley) 6/6/93 .\" -.Dd May 6, 2012 +.Dd May 25, 2012 .Dt SORT 1 .Os .Sh NAME @@ -358,6 +358,9 @@ This sort algorithm cannot be used with .Fl u and .Fl s . +.It Fl Fl mmap +Try to use file memory mapping system call. +It may increase speed in some cases. .El .Pp The following operands are available: Modified: head/usr.bin/sort/sort.c ============================================================================== --- head/usr.bin/sort/sort.c Fri May 25 09:27:16 2012 (r235986) +++ head/usr.bin/sort/sort.c Fri May 25 09:30:16 2012 (r235987) @@ -89,6 +89,7 @@ const char *nlsstr[] = { "", "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " "[-o outfile] [--batch-size size] [--files0-from file] " "[--heapsort] [--mergesort] [--radixsort] [--qsort] " + "[--mmap] " #if defined(SORT_THREADS) "[--nthreads thread_no] " #endif @@ -138,7 +139,8 @@ enum QSORT_OPT, MERGESORT_OPT, HEAPSORT_OPT, - RADIXSORT_OPT + RADIXSORT_OPT, + MMAP_OPT }; #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 @@ -164,6 +166,7 @@ struct option long_options[] = { { "key", required_argument, NULL, 'k' }, { "merge", no_argument, NULL, 'm' }, { "mergesort", no_argument, NULL, MERGESORT_OPT }, + { "mmap", no_argument, NULL, MMAP_OPT }, { "month-sort", no_argument, NULL, 'M' }, { "numeric-sort", no_argument, NULL, 'n' }, { "output", required_argument, NULL, 'o' }, @@ -1063,12 +1066,16 @@ main(int argc, char **argv) tmpdir = sort_strdup(optarg); break; case 't': - if (strlen(optarg) > 1) { - if (strcmp(optarg, "\\0")) { + while (strlen(optarg) > 1) { + if (optarg[0] != '\\') { errx(2, "%s: %s\n", strerror(EINVAL), optarg); } - *optarg = 0; + optarg += 1; + if (*optarg == '0') { + *optarg = 0; + break; + } } sort_opts_vals.tflag = true; sort_opts_vals.field_sep = btowc(optarg[0]); @@ -1126,6 +1133,9 @@ main(int argc, char **argv) case MERGESORT_OPT: sort_opts_vals.sort_method = SORT_MERGESORT; break; + case MMAP_OPT: + use_mmap = true; + break; case HEAPSORT_OPT: sort_opts_vals.sort_method = SORT_HEAPSORT; break; @@ -1258,6 +1268,11 @@ main(int argc, char **argv) } } +#if defined(SORT_THREADS) + if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) + nthreads = 1; +#endif + if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { struct file_list fl; struct sort_list list; Modified: head/usr.bin/sort/sort.h ============================================================================== --- head/usr.bin/sort/sort.h Fri May 25 09:27:16 2012 (r235986) +++ head/usr.bin/sort/sort.h Fri May 25 09:30:16 2012 (r235987) @@ -55,6 +55,7 @@ extern nl_catd catalog; extern const char *nlsstr[]; #if defined(SORT_THREADS) +#define MT_SORT_THRESHOLD (10000) extern size_t ncpu; extern size_t nthreads; #endif