Date: Tue, 24 Jun 2008 06:46:38 GMT From: Konrad Jankowski <konrad@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 144011 for review Message-ID: <200806240646.m5O6kcfI013356@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=144011 Change 144011 by konrad@vspredator on 2008/06/24 06:46:07 style(9) fixes. Additional functionalty added. Affected files ... .. //depot/projects/soc2008/konrad_collation/colldef/colldef.c#4 edit Differences ... ==== //depot/projects/soc2008/konrad_collation/colldef/colldef.c#4 (text+ko) ==== @@ -37,7 +37,6 @@ #include <strings.h> #include <unistd.h> -#define DEFAULT_IN_FILE "../colldef_in.src.simple" #define DEFAULT_OUT_FILE "LC_COLLATE" #define WEIGHT_TABLE_SIZE (1 << 16) #define NWEIGHTS 4 @@ -45,21 +44,22 @@ struct sym_entry { char *name; int val; - SLIST_ENTRY(sym_entry) sym_next; + SLIST_ENTRY(sym_entry) next; }; -SLIST_HEAD(, sym_entry) head; +static SLIST_HEAD(, sym_entry) head; /* * This will have to be a structure, to at least accomodate symbol chaining. */ -uint8_t weight_table[WEIGHT_TABLE_SIZE][NWEIGHTS]; -struct weight_uncompressed { +static uint8_t weight_table[WEIGHT_TABLE_SIZE][NWEIGHTS]; + +static struct weight_uncompressed { uint16_t w[NWEIGHTS]; char used; } weights_uncompressed[WEIGHT_TABLE_SIZE]; -int verbose = 0; +static int verbose = 0; void usage(char *name); void assign_weights(int codepoint, char *weights); @@ -76,9 +76,10 @@ if ((sym = malloc(sizeof(*sym))) == NULL) err(1, "add_symbol: malloc(%d)", sizeof(*sym)); - sym->name = strdup(name); + if ((sym->name = strdup(name)) == NULL) + err(1, "add_symbol: strdup(%d)", strlen(name) + 1); sym->val = counter++; - SLIST_INSERT_HEAD(&head, sym, sym_next); + SLIST_INSERT_HEAD(&head, sym, next); } /* @@ -90,10 +91,10 @@ { struct sym_entry *sym; - SLIST_FOREACH(sym, &head, sym_next) { + SLIST_FOREACH(sym, &head, next) if (strcmp(sym->name, name) == 0) return sym->val; - } + return 0; } @@ -102,10 +103,9 @@ { struct sym_entry *sym; - SLIST_FOREACH(sym, &head, sym_next) { + SLIST_FOREACH(sym, &head, next) printf("sym->name=%s sym->val=%d\n", sym->name, sym->val); - } } void @@ -122,6 +122,10 @@ } } +/* + * Decode a unicode codepoint stored in UTF-8 format, each byte coded + * as a hexadecimal constant. + */ int get_codepoint(char *p, char **p_end) { @@ -131,7 +135,10 @@ wchar_t out = 0; do { - /* without leading "0x" will also work */ + /* + * Scanf without leading "0x" will also work, + * but we don't have such cases in our input data. + */ p[0] = '0'; sscanf(p, "%x%n", &tmp, &len); p += len; @@ -141,30 +148,31 @@ len = mbtowc(&out, synthesis, i); assert(len == i || synthesis[0] == 0); if (p_end != NULL) - *p_end = p; /* return where we got to */ + *p_end = p; /* Return where we got to. */ return tmp; } void -process_file(char *name) +process_file(FILE *f) { unsigned codepoint = 0; char buf[512], *p; bzero(weights_uncompressed, sizeof(weights_uncompressed)); SLIST_INIT(&head); - if (freopen(name, "r", stdin) == NULL) - err(1, "freopen: %s", name); - while (fgets(buf, sizeof(buf), stdin)) { + while (fgets(buf, sizeof(buf), f)) { #ifdef DEBUG fputs(buf, stdout); #endif buf[strlen(buf) - 1] = 0; switch (buf[0]) { case '<': - /*assert(buf[1] == 'X');*/ - if (buf[1] == 'X') /* XXX */ + /* + * XXX. The assumption here is that collating + * symbols start with 'X'. + */ + if (buf[1] == 'X') add_symbol(buf); break; case '\\': @@ -174,10 +182,15 @@ } } - - fclose(stdin); /* not really needed - freopen closes */ } +/* + * This function decodes a weight, which can be given as: + * 1. a symbol name in angle brackets - in which case we have to look it up in + * our symbol table. + * 2. IGNORE keyword - this weight should be ignored. + * 3. A literal hexadecimal value prefixed with "\x". + */ int get_weight_val(char **p) { @@ -189,17 +202,18 @@ case '<': p3 = strchr(p2, '>'); assert(p3); - p3[1] = 0; /* truncate the string for get_symval */ + p3[1] = 0; /* Truncate the string for get_symval. */ ret = get_symval(p2); - *p = p3 + 2; /* skip over ';' also */ - p3[1] = ';'; /* restore string; we shouldn't care... */ + if (ret == 0) + errx(1, "get_weight_val: symbol %s not found", + p2); + *p = p3 + 2; /* Skip over ';' also. */ + p3[1] = ';'; /* Restore string; we shouldn't care... */ return ret; - case 'I': - /* IGNORE */ - *p += 7; - return 0; /* IGNORE means 0 (I think) */ - case '\\': - /* we get literal value, instead of symbol */ + case 'I': /* IGNORE */ + *p += 7; /* IGNORE has 6 letters, + ';'. */ + return 0; /* IGNORE means 0 (I think). */ + case '\\': /* Literal value. Decode it. */ ret = get_codepoint(p2, &p3); *p = p3 + 1; return ret; @@ -209,21 +223,28 @@ } } +/* + * Take a string of four weights, separated by semicolons, + * decode them and assign to the weight table at the given codepoint + * position. + */ void assign_weights(int codepoint, char *weights) { - int i; + int i, val; assert(weights != NULL); weights_uncompressed[codepoint].used = 1; for (i = 0; i < 4; i++) { - int val; - val = get_weight_val(&weights); weights_uncompressed[codepoint].w[i] = val; } } +/* + * Assign new weight value to all codepoint with the given value 'val'. + * Do this only for the first level (w[0]). + */ void reduce(int val, int new_val) { @@ -242,15 +263,15 @@ * Find 2 minimums from the given set. * Optimised to only make one pass throught the set. (data locality) */ -#define MIN_MAX (1<<15) +#define MIN_MAX (1 << 15) int find_min(int start, int *min2_ret) { int min = MIN_MAX, min2, min3 = MIN_MAX; - int i; + int i, val; for (i = 0; i < WEIGHT_TABLE_SIZE; i++) { - int val = weights_uncompressed[i].w[0]; + val = weights_uncompressed[i].w[0]; /* 1. case, at first we find no a minimum */ if (val >= start && val < min) { min2 = min; @@ -275,15 +296,14 @@ int i; printf("%d: ", level); - for (i = 0; i < WEIGHT_TABLE_SIZE; i++) { + for (i = 0; i < WEIGHT_TABLE_SIZE; i++) if (weights_uncompressed[i].used && weights_uncompressed[i].w[0] == level) printf(" (%d %d %d)", weights_uncompressed[i].w[1], - weights_uncompressed[i].w[2], - weights_uncompressed[i].w[3]); + weights_uncompressed[i].w[2], + weights_uncompressed[i].w[3]); - } - printf("\n"); + putchar('\n'); } /* @@ -319,66 +339,75 @@ } void -binary_output(char *out_file) +binary_output(FILE *f) { int i, j; - int out; - - /* - * I just use open and write, instead of stdio in this case. - * This program isn't meant to be portable from UNIX. - */ - if ((out = open(out_file, O_WRONLY | O_CREAT | O_TRUNC, 0644)) == -1) - err(1, "open(%s)", out_file); + for (i = 0; i < WEIGHT_TABLE_SIZE; i++) for (j = 0; j < NWEIGHTS; j++) weight_table[i][j] = weights_uncompressed[i].w[j]; - if (write(out, weight_table, sizeof(weight_table)) != - sizeof(weight_table)) + if (fwrite(weight_table, sizeof(weight_table), 1, f) != 1) errx(1, "not full write"); - close(out); } /* - * I divided the process into 3 main functions, so we could optionally - * process multiple input files with better argument processing. - * Another questionable optimisation. + * I divided the process into 3 main functions, so we can + * process multiple input files with one call. */ int main(int argc, char *argv[]) { + FILE *f, *of; int ch; - char *in_file = DEFAULT_IN_FILE; - char *out_file = DEFAULT_OUT_FILE; + char name[512]; - while ((ch = getopt(argc, argv, "hf:o:v")) != -1) { + while ((ch = getopt(argc, argv, "h:v")) != -1) { switch (ch) { - case 'f': - in_file = optarg; - break; - case 'o': - out_file = optarg; - break; case 'v': verbose = 1; - break; + break; default: usage(argv[0]); } } + argv += optind; + argc -= optind; /* - * need to setlocale to an UTF-8 locale, so thet - * mbtowc works correctly + * Need to setlocale to an UTF-8 locale, so that + * mbtowc works correctly. */ setlocale(LC_ALL, "en_US.UTF-8"); - process_file(in_file); + if (argc) for (; argc; argc--, argv++) { + if ((f = fopen(argv[0], "r")) == NULL) + err(1, "fopen: %s", argv[0]); + process_file(f); + fclose(f); +#ifdef DEBUG2 + dump_table(); +#endif + compress_weights(); + snprintf(name, sizeof(name), + "%s.%s", argv[0], DEFAULT_OUT_FILE); + if ((of = fopen(name, "w")) == NULL) + err(1, "fopen: %s", name); + binary_output(of); + fclose(of); + } else { + process_file(stdin); #ifdef DEBUG2 - dump_table(); + dump_table(); #endif - compress_weights(); - binary_output(out_file); + compress_weights(); + /* + * We could write to stdout here... + */ + if ((of = fopen(DEFAULT_OUT_FILE, "w")) == NULL) + err(1, "fopen: %s", name); + binary_output(of); + fclose(of); + } return 0; } @@ -387,9 +416,11 @@ usage(char *name) { printf( "usage: " - "\t%s [-f input_file] [-o output_file]\n" - "\tdefault output file is LC_COLLATE\n" + "\t%s [-h] [-v] [input_file_1] ... [input_file_n] \n" + "\t output file name is LC_COLLATE\n" + "\t if one or more input files given as arguments, " + "\t output file name is the input file name with\n" + "LC_COLLATE concatenaded.\n" , name); exit(1); } -
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200806240646.m5O6kcfI013356>