]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genprops/misc/ucdmerge.c
2 *******************************************************************************
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucdmerge.c
10 * tab size: 8 (not used)
13 * created on: 2003feb20
14 * created by: Markus W. Scherer
16 * Simple tool for Unicode Character Database files with semicolon-delimited fields.
17 * Merges adjacent, identical per-code point data lines into one line with range syntax.
19 * To compile, just call a C compiler/linker with this source file.
20 * On Windows: cl ucdmerge.c
28 skipWhitespace(const char *s
) {
29 while(*s
==' ' || *s
=='\t') {
35 /* return the first character position after the end of the data */
37 endOfData(const char *l
) {
43 /* ignore whitespace before the comment */
44 while(l
!=end
&& ((c
=*(end
-1))==' ' || c
=='\t')) {
54 sameData(const char *l1
, const char *l2
) {
58 /* find the first semicolon in each line - there must be one */
62 /* find the end of data: end of string or start of comment */
66 /* compare the line data portions */
68 return length
==(end2
-l2
) && 0==memcmp(l1
, l2
, length
);
72 main(int argc
, const char *argv
[]) {
73 static char line
[2000], firstLine
[2000], lastLine
[2000];
82 if(gets(line
)!=NULL
) {
83 /* parse the initial code point, if any */
84 c
=strtol(line
, &end
, 16);
85 if(end
!=line
&& *skipWhitespace(end
)==';') {
86 /* single code point followed by semicolon and data, keep c */
96 if(last
>=0 && (c
!=(last
+1) || !sameData(firstLine
, line
))) {
97 /* output the current range */
99 /* there was no range, just output the one line we found */
102 /* there was a real range, merge their lines */
103 end
=strchr(lastLine
, '#');
105 /* no comment in second line */
106 printf("%04lX..%04lX%s\n",
107 first
, last
, /* code point range */
108 strchr(firstLine
, ';'));/* first line starting from the first ; */
109 } else if(strchr(firstLine
, '#')==NULL
) {
110 /* no comment in first line */
111 printf("%04lX..%04lX%s%s\n",
112 first
, last
, /* code point range */
113 strchr(firstLine
, ';'), /* first line starting from the first ; */
114 end
); /* comment from second line */
116 /* merge comments from both lines */
117 printf("%04lX..%04lX%s..%s\n",
118 first
, last
, /* code point range */
119 strchr(firstLine
, ';'), /* first line starting from the first ; */
120 skipWhitespace(end
+1)); /* comment from second line, after # and spaces */
131 /* no data on this line, output as is */
134 /* data on this line, store for possible range compaction */
136 /* set as the first line in a possible range */
138 strcpy(firstLine
, line
);
140 } else /* must be c==(last+1) && sameData() because of previous conditions */ {
141 /* continue with the current range */
143 strcpy(lastLine
, line
);