Commit | Line | Data |
---|---|---|
5a419892 VZ |
1 | #include <stdio.h> |
2 | #include <string.h> | |
3 | ||
4 | /* | |
5 | - split - divide a string into fields, like awk split() | |
6 | = int split(char *string, char *fields[], int nfields, char *sep); | |
7 | */ | |
8 | int /* number of fields, including overflow */ | |
9 | split(string, fields, nfields, sep) | |
10 | char *string; | |
11 | char *fields[]; /* list is not NULL-terminated */ | |
12 | int nfields; /* number of entries available in fields[] */ | |
13 | char *sep; /* "" white, "c" single char, "ab" [ab]+ */ | |
14 | { | |
15 | register char *p = string; | |
16 | register char c; /* latest character */ | |
17 | register char sepc = sep[0]; | |
18 | register char sepc2; | |
19 | register int fn; | |
20 | register char **fp = fields; | |
21 | register char *sepp; | |
22 | register int trimtrail; | |
23 | ||
24 | /* white space */ | |
25 | if (sepc == '\0') { | |
26 | while ((c = *p++) == ' ' || c == '\t') | |
27 | continue; | |
28 | p--; | |
29 | trimtrail = 1; | |
30 | sep = " \t"; /* note, code below knows this is 2 long */ | |
31 | sepc = ' '; | |
32 | } else | |
33 | trimtrail = 0; | |
34 | sepc2 = sep[1]; /* now we can safely pick this up */ | |
35 | ||
36 | /* catch empties */ | |
37 | if (*p == '\0') | |
38 | return(0); | |
39 | ||
40 | /* single separator */ | |
41 | if (sepc2 == '\0') { | |
42 | fn = nfields; | |
43 | for (;;) { | |
44 | *fp++ = p; | |
45 | fn--; | |
46 | if (fn == 0) | |
47 | break; | |
48 | while ((c = *p++) != sepc) | |
49 | if (c == '\0') | |
50 | return(nfields - fn); | |
51 | *(p-1) = '\0'; | |
52 | } | |
53 | /* we have overflowed the fields vector -- just count them */ | |
54 | fn = nfields; | |
55 | for (;;) { | |
56 | while ((c = *p++) != sepc) | |
57 | if (c == '\0') | |
58 | return(fn); | |
59 | fn++; | |
60 | } | |
61 | /* not reached */ | |
62 | } | |
63 | ||
64 | /* two separators */ | |
65 | if (sep[2] == '\0') { | |
66 | fn = nfields; | |
67 | for (;;) { | |
68 | *fp++ = p; | |
69 | fn--; | |
70 | while ((c = *p++) != sepc && c != sepc2) | |
71 | if (c == '\0') { | |
72 | if (trimtrail && **(fp-1) == '\0') | |
73 | fn++; | |
74 | return(nfields - fn); | |
75 | } | |
76 | if (fn == 0) | |
77 | break; | |
78 | *(p-1) = '\0'; | |
79 | while ((c = *p++) == sepc || c == sepc2) | |
80 | continue; | |
81 | p--; | |
82 | } | |
83 | /* we have overflowed the fields vector -- just count them */ | |
84 | fn = nfields; | |
85 | while (c != '\0') { | |
86 | while ((c = *p++) == sepc || c == sepc2) | |
87 | continue; | |
88 | p--; | |
89 | fn++; | |
90 | while ((c = *p++) != '\0' && c != sepc && c != sepc2) | |
91 | continue; | |
92 | } | |
93 | /* might have to trim trailing white space */ | |
94 | if (trimtrail) { | |
95 | p--; | |
96 | while ((c = *--p) == sepc || c == sepc2) | |
97 | continue; | |
98 | p++; | |
99 | if (*p != '\0') { | |
100 | if (fn == nfields+1) | |
101 | *p = '\0'; | |
102 | fn--; | |
103 | } | |
104 | } | |
105 | return(fn); | |
106 | } | |
107 | ||
108 | /* n separators */ | |
109 | fn = 0; | |
110 | for (;;) { | |
111 | if (fn < nfields) | |
112 | *fp++ = p; | |
113 | fn++; | |
114 | for (;;) { | |
115 | c = *p++; | |
116 | if (c == '\0') | |
117 | return(fn); | |
118 | sepp = sep; | |
119 | while ((sepc = *sepp++) != '\0' && sepc != c) | |
120 | continue; | |
121 | if (sepc != '\0') /* it was a separator */ | |
122 | break; | |
123 | } | |
124 | if (fn < nfields) | |
125 | *(p-1) = '\0'; | |
126 | for (;;) { | |
127 | c = *p++; | |
128 | sepp = sep; | |
129 | while ((sepc = *sepp++) != '\0' && sepc != c) | |
130 | continue; | |
131 | if (sepc == '\0') /* it wasn't a separator */ | |
132 | break; | |
133 | } | |
134 | p--; | |
135 | } | |
136 | ||
137 | /* not reached */ | |
138 | } | |
139 | ||
140 | #ifdef TEST_SPLIT | |
141 | ||
142 | ||
143 | /* | |
144 | * test program | |
145 | * pgm runs regression | |
146 | * pgm sep splits stdin lines by sep | |
147 | * pgm str sep splits str by sep | |
148 | * pgm str sep n splits str by sep n times | |
149 | */ | |
150 | int | |
151 | main(argc, argv) | |
152 | int argc; | |
153 | char *argv[]; | |
154 | { | |
155 | char buf[512]; | |
156 | register int n; | |
157 | # define MNF 10 | |
158 | char *fields[MNF]; | |
159 | ||
160 | if (argc > 4) | |
161 | for (n = atoi(argv[3]); n > 0; n--) { | |
162 | (void) strcpy(buf, argv[1]); | |
163 | } | |
164 | else if (argc > 3) | |
165 | for (n = atoi(argv[3]); n > 0; n--) { | |
166 | (void) strcpy(buf, argv[1]); | |
167 | (void) split(buf, fields, MNF, argv[2]); | |
168 | } | |
169 | else if (argc > 2) | |
170 | dosplit(argv[1], argv[2]); | |
171 | else if (argc > 1) | |
172 | while (fgets(buf, sizeof(buf), stdin) != NULL) { | |
173 | buf[strlen(buf)-1] = '\0'; /* stomp newline */ | |
174 | dosplit(buf, argv[1]); | |
175 | } | |
176 | else | |
177 | regress(); | |
178 | ||
179 | exit(0); | |
180 | } | |
181 | ||
182 | dosplit(string, seps) | |
183 | char *string; | |
184 | char *seps; | |
185 | { | |
186 | # define NF 5 | |
187 | char *fields[NF]; | |
188 | register int nf; | |
189 | ||
190 | nf = split(string, fields, NF, seps); | |
191 | print(nf, NF, fields); | |
192 | } | |
193 | ||
194 | print(nf, nfp, fields) | |
195 | int nf; | |
196 | int nfp; | |
197 | char *fields[]; | |
198 | { | |
199 | register int fn; | |
200 | register int bound; | |
201 | ||
202 | bound = (nf > nfp) ? nfp : nf; | |
203 | printf("%d:\t", nf); | |
204 | for (fn = 0; fn < bound; fn++) | |
205 | printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); | |
206 | } | |
207 | ||
208 | #define RNF 5 /* some table entries know this */ | |
209 | struct { | |
210 | char *str; | |
211 | char *seps; | |
212 | int nf; | |
213 | char *fi[RNF]; | |
214 | } tests[] = { | |
215 | "", " ", 0, { "" }, | |
216 | " ", " ", 2, { "", "" }, | |
217 | "x", " ", 1, { "x" }, | |
218 | "xy", " ", 1, { "xy" }, | |
219 | "x y", " ", 2, { "x", "y" }, | |
220 | "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, | |
221 | " a bcd", " ", 4, { "", "", "a", "bcd" }, | |
222 | "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, | |
223 | " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, | |
224 | ||
225 | "", " _", 0, { "" }, | |
226 | " ", " _", 2, { "", "" }, | |
227 | "x", " _", 1, { "x" }, | |
228 | "x y", " _", 2, { "x", "y" }, | |
229 | "ab _ cd", " _", 2, { "ab", "cd" }, | |
230 | " a_b c ", " _", 5, { "", "a", "b", "c", "" }, | |
231 | "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, | |
232 | " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, | |
233 | ||
234 | "", " _~", 0, { "" }, | |
235 | " ", " _~", 2, { "", "" }, | |
236 | "x", " _~", 1, { "x" }, | |
237 | "x y", " _~", 2, { "x", "y" }, | |
238 | "ab _~ cd", " _~", 2, { "ab", "cd" }, | |
239 | " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, | |
240 | "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, | |
241 | "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, | |
242 | ||
243 | "", " _~-", 0, { "" }, | |
244 | " ", " _~-", 2, { "", "" }, | |
245 | "x", " _~-", 1, { "x" }, | |
246 | "x y", " _~-", 2, { "x", "y" }, | |
247 | "ab _~- cd", " _~-", 2, { "ab", "cd" }, | |
248 | " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, | |
249 | "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, | |
250 | "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, | |
251 | ||
252 | "", " ", 0, { "" }, | |
253 | " ", " ", 2, { "", "" }, | |
254 | "x", " ", 1, { "x" }, | |
255 | "xy", " ", 1, { "xy" }, | |
256 | "x y", " ", 2, { "x", "y" }, | |
257 | "abc def g ", " ", 4, { "abc", "def", "g", "" }, | |
258 | " a bcd", " ", 3, { "", "a", "bcd" }, | |
259 | "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, | |
260 | " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, | |
261 | ||
262 | "", "", 0, { "" }, | |
263 | " ", "", 0, { "" }, | |
264 | "x", "", 1, { "x" }, | |
265 | "xy", "", 1, { "xy" }, | |
266 | "x y", "", 2, { "x", "y" }, | |
267 | "abc def g ", "", 3, { "abc", "def", "g" }, | |
268 | "\t a bcd", "", 2, { "a", "bcd" }, | |
269 | " a \tb\t c ", "", 3, { "a", "b", "c" }, | |
270 | "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, | |
271 | "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, | |
272 | " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, | |
273 | ||
274 | NULL, NULL, 0, { NULL }, | |
275 | }; | |
276 | ||
277 | regress() | |
278 | { | |
279 | char buf[512]; | |
280 | register int n; | |
281 | char *fields[RNF+1]; | |
282 | register int nf; | |
283 | register int i; | |
284 | register int printit; | |
285 | register char *f; | |
286 | ||
287 | for (n = 0; tests[n].str != NULL; n++) { | |
288 | (void) strcpy(buf, tests[n].str); | |
289 | fields[RNF] = NULL; | |
290 | nf = split(buf, fields, RNF, tests[n].seps); | |
291 | printit = 0; | |
292 | if (nf != tests[n].nf) { | |
293 | printf("split `%s' by `%s' gave %d fields, not %d\n", | |
294 | tests[n].str, tests[n].seps, nf, tests[n].nf); | |
295 | printit = 1; | |
296 | } else if (fields[RNF] != NULL) { | |
297 | printf("split() went beyond array end\n"); | |
298 | printit = 1; | |
299 | } else { | |
300 | for (i = 0; i < nf && i < RNF; i++) { | |
301 | f = fields[i]; | |
302 | if (f == NULL) | |
303 | f = "(NULL)"; | |
304 | if (strcmp(f, tests[n].fi[i]) != 0) { | |
305 | printf("split `%s' by `%s', field %d is `%s', not `%s'\n", | |
306 | tests[n].str, tests[n].seps, | |
307 | i, fields[i], tests[n].fi[i]); | |
308 | printit = 1; | |
309 | } | |
310 | } | |
311 | } | |
312 | if (printit) | |
313 | print(nf, RNF, fields); | |
314 | } | |
315 | } | |
316 | #endif |