]> git.saurik.com Git - wxWidgets.git/blob - src/tiff/contrib/iptcutil/iptcutil.c
Define __VISUALC__ for ICC under Windows again.
[wxWidgets.git] / src / tiff / contrib / iptcutil / iptcutil.c
1
2 #include "tif_config.h"
3
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <ctype.h>
8
9 #ifdef HAVE_STRINGS_H
10 # include <strings.h>
11 #endif
12
13 #ifdef HAVE_IO_H
14 # include <io.h>
15 #endif
16
17 #ifdef HAVE_FCNTL_H
18 # include <fcntl.h>
19 #endif
20
21 #ifdef WIN32
22 #define STRNICMP strnicmp
23 #else
24 #define STRNICMP strncasecmp
25 #endif
26
27 typedef struct _tag_spec
28 {
29 short
30 id;
31
32 char
33 *name;
34 } tag_spec;
35
36 static tag_spec tags[] = {
37 { 5,"Image Name" },
38 { 7,"Edit Status" },
39 { 10,"Priority" },
40 { 15,"Category" },
41 { 20,"Supplemental Category" },
42 { 22,"Fixture Identifier" },
43 { 25,"Keyword" },
44 { 30,"Release Date" },
45 { 35,"Release Time" },
46 { 40,"Special Instructions" },
47 { 45,"Reference Service" },
48 { 47,"Reference Date" },
49 { 50,"Reference Number" },
50 { 55,"Created Date" },
51 { 60,"Created Time" },
52 { 65,"Originating Program" },
53 { 70,"Program Version" },
54 { 75,"Object Cycle" },
55 { 80,"Byline" },
56 { 85,"Byline Title" },
57 { 90,"City" },
58 { 95,"Province State" },
59 { 100,"Country Code" },
60 { 101,"Country" },
61 { 103,"Original Transmission Reference" },
62 { 105,"Headline" },
63 { 110,"Credit" },
64 { 115,"Source" },
65 { 116,"Copyright String" },
66 { 120,"Caption" },
67 { 121,"Local Caption" },
68 { 122,"Caption Writer" },
69 { 200,"Custom Field 1" },
70 { 201,"Custom Field 2" },
71 { 202,"Custom Field 3" },
72 { 203,"Custom Field 4" },
73 { 204,"Custom Field 5" },
74 { 205,"Custom Field 6" },
75 { 206,"Custom Field 7" },
76 { 207,"Custom Field 8" },
77 { 208,"Custom Field 9" },
78 { 209,"Custom Field 10" },
79 { 210,"Custom Field 11" },
80 { 211,"Custom Field 12" },
81 { 212,"Custom Field 13" },
82 { 213,"Custom Field 14" },
83 { 214,"Custom Field 15" },
84 { 215,"Custom Field 16" },
85 { 216,"Custom Field 17" },
86 { 217,"Custom Field 18" },
87 { 218,"Custom Field 19" },
88 { 219,"Custom Field 20" }
89 };
90
91 /*
92 * We format the output using HTML conventions
93 * to preserve control characters and such.
94 */
95 void formatString(FILE *ofile, const char *s, int len)
96 {
97 putc('"', ofile);
98 for (; len > 0; --len, ++s) {
99 int c = *s;
100 switch (c) {
101 case '&':
102 fputs("&amp;", ofile);
103 break;
104 #ifdef HANDLE_GT_LT
105 case '<':
106 fputs("&lt;", ofile);
107 break;
108 case '>':
109 fputs("&gt;", ofile);
110 break;
111 #endif
112 case '"':
113 fputs("&quot;", ofile);
114 break;
115 default:
116 if (iscntrl(c))
117 fprintf(ofile, "&#%d;", c);
118 else
119 putc(*s, ofile);
120 break;
121 }
122 }
123 fputs("\"\n", ofile);
124 }
125
126 typedef struct _html_code
127 {
128 short
129 len;
130 const char
131 *code,
132 val;
133 } html_code;
134
135 static html_code html_codes[] = {
136 #ifdef HANDLE_GT_LT
137 { 4,"&lt;",'<' },
138 { 4,"&gt;",'>' },
139 #endif
140 { 5,"&amp;",'&' },
141 { 6,"&quot;",'"' }
142 };
143
144 /*
145 * This routine converts HTML escape sequence
146 * back to the original ASCII representation.
147 * - returns the number of characters dropped.
148 */
149 int convertHTMLcodes(char *s, int len)
150 {
151 if (len <=0 || s==(char*)NULL || *s=='\0')
152 return 0;
153
154 if (s[1] == '#')
155 {
156 int val, o;
157
158 if (sscanf(s,"&#%d;",&val) == 1)
159 {
160 o = 3;
161 while (s[o] != ';')
162 {
163 o++;
164 if (o > 5)
165 break;
166 }
167 if (o < 5)
168 strcpy(s+1, s+1+o);
169 *s = val;
170 return o;
171 }
172 }
173 else
174 {
175 int
176 i,
177 codes = sizeof(html_codes) / sizeof(html_code);
178
179 for (i=0; i < codes; i++)
180 {
181 if (html_codes[i].len <= len)
182 if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
183 {
184 strcpy(s+1, s+html_codes[i].len);
185 *s = html_codes[i].val;
186 return html_codes[i].len-1;
187 }
188 }
189 }
190
191 return 0;
192 }
193
194 int formatIPTC(FILE *ifile, FILE *ofile)
195 {
196 unsigned int
197 foundiptc,
198 tagsfound;
199
200 unsigned char
201 recnum,
202 dataset;
203
204 char
205 *readable,
206 *str;
207
208 long
209 tagindx,
210 taglen;
211
212 int
213 i,
214 tagcount = sizeof(tags) / sizeof(tag_spec);
215
216 char
217 c;
218
219 foundiptc = 0; /* found the IPTC-Header */
220 tagsfound = 0; /* number of tags found */
221
222 c = getc(ifile);
223 while (c != EOF)
224 {
225 if (c == 0x1c)
226 foundiptc = 1;
227 else
228 {
229 if (foundiptc)
230 return -1;
231 else
232 continue;
233 }
234
235 /* we found the 0x1c tag and now grab the dataset and record number tags */
236 dataset = getc(ifile);
237 if ((char) dataset == EOF)
238 return -1;
239 recnum = getc(ifile);
240 if ((char) recnum == EOF)
241 return -1;
242 /* try to match this record to one of the ones in our named table */
243 for (i=0; i< tagcount; i++)
244 {
245 if (tags[i].id == recnum)
246 break;
247 }
248 if (i < tagcount)
249 readable = tags[i].name;
250 else
251 readable = "";
252
253 /* then we decode the length of the block that follows - long or short fmt */
254 c = getc(ifile);
255 if (c == EOF)
256 return 0;
257 if (c & (unsigned char) 0x80)
258 {
259 unsigned char
260 buffer[4];
261
262 for (i=0; i<4; i++)
263 {
264 c = buffer[i] = getc(ifile);
265 if (c == EOF)
266 return -1;
267 }
268 taglen = (((long) buffer[ 0 ]) << 24) |
269 (((long) buffer[ 1 ]) << 16) |
270 (((long) buffer[ 2 ]) << 8) |
271 (((long) buffer[ 3 ]));
272 }
273 else
274 {
275 unsigned char
276 x = c;
277
278 taglen = ((long) x) << 8;
279 x = getc(ifile);
280 if ((char)x == EOF)
281 return -1;
282 taglen |= (long) x;
283 }
284 /* make a buffer to hold the tag data and snag it from the input stream */
285 str = (char *) malloc((unsigned int) (taglen+1));
286 if (str == (char *) NULL)
287 {
288 printf("Memory allocation failed");
289 return 0;
290 }
291 for (tagindx=0; tagindx<taglen; tagindx++)
292 {
293 c = str[tagindx] = getc(ifile);
294 if (c == EOF)
295 {
296 free(str);
297 return -1;
298 }
299 }
300 str[ taglen ] = 0;
301
302 /* now finish up by formatting this binary data into ASCII equivalent */
303 if (strlen(readable) > 0)
304 fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
305 else
306 fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
307 formatString( ofile, str, taglen );
308 free(str);
309
310 tagsfound++;
311
312 c = getc(ifile);
313 }
314 return tagsfound;
315 }
316
317 int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
318 char *white,char *brkchar,char *quote,char eschar,char *brkused,
319 int *next,char *quoted);
320
321 char *super_fgets(char *b, int *blen, FILE *file)
322 {
323 int
324 c,
325 len;
326
327 char
328 *q;
329
330 len=*blen;
331 for (q=b; ; q++)
332 {
333 c=fgetc(file);
334 if (c == EOF || c == '\n')
335 break;
336 if (((long)q - (long)b + 1 ) >= (long) len)
337 {
338 long
339 tlen;
340
341 tlen=(long)q-(long)b;
342 len<<=1;
343 b=(char *) realloc((char *) b,(len+2));
344 if ((char *) b == (char *) NULL)
345 break;
346 q=b+tlen;
347 }
348 *q=(unsigned char) c;
349 }
350 *blen=0;
351 if ((unsigned char *)b != (unsigned char *) NULL)
352 {
353 int
354 tlen;
355
356 tlen=(long)q - (long)b;
357 if (tlen == 0)
358 return (char *) NULL;
359 b[tlen] = '\0';
360 *blen=++tlen;
361 }
362 return b;
363 }
364
365 #define BUFFER_SZ 4096
366
367 int main(int argc, char *argv[])
368 {
369 unsigned int
370 length;
371
372 unsigned char
373 *buffer;
374
375 int
376 i,
377 mode; /* iptc binary, or iptc text */
378
379 FILE
380 *ifile = stdin,
381 *ofile = stdout;
382
383 char
384 c,
385 *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output";
386
387 if( argc < 2 )
388 {
389 puts(usage);
390 return 1;
391 }
392
393 mode = 0;
394 length = -1;
395 buffer = (unsigned char *)NULL;
396
397 for (i=1; i<argc; i++)
398 {
399 c = argv[i][0];
400 if (c == '-' || c == '/')
401 {
402 c = argv[i][1];
403 switch( c )
404 {
405 case 't':
406 mode = 1;
407 #ifdef WIN32
408 /* Set "stdout" to binary mode: */
409 _setmode( _fileno( ofile ), _O_BINARY );
410 #endif
411 break;
412 case 'b':
413 mode = 0;
414 #ifdef WIN32
415 /* Set "stdin" to binary mode: */
416 _setmode( _fileno( ifile ), _O_BINARY );
417 #endif
418 break;
419 case 'i':
420 if (mode == 0)
421 ifile = fopen(argv[++i], "rb");
422 else
423 ifile = fopen(argv[++i], "rt");
424 if (ifile == (FILE *)NULL)
425 {
426 printf("Unable to open: %s\n", argv[i]);
427 return 1;
428 }
429 break;
430 case 'o':
431 if (mode == 0)
432 ofile = fopen(argv[++i], "wt");
433 else
434 ofile = fopen(argv[++i], "wb");
435 if (ofile == (FILE *)NULL)
436 {
437 printf("Unable to open: %s\n", argv[i]);
438 return 1;
439 }
440 break;
441 default:
442 printf("Unknown option: %s\n", argv[i]);
443 return 1;
444 }
445 }
446 else
447 {
448 puts(usage);
449 return 1;
450 }
451 }
452
453 if (mode == 0) /* handle binary iptc info */
454 formatIPTC(ifile, ofile);
455
456 if (mode == 1) /* handle text form of iptc info */
457 {
458 char
459 brkused,
460 quoted,
461 *line,
462 *token,
463 *newstr;
464
465 int
466 state,
467 next;
468
469 unsigned char
470 recnum = 0,
471 dataset = 0;
472
473 int
474 inputlen = BUFFER_SZ;
475
476 line = (char *) malloc(inputlen);
477 token = (char *)NULL;
478 while((line = super_fgets(line,&inputlen,ifile))!=NULL)
479 {
480 state=0;
481 next=0;
482
483 token = (char *) malloc(inputlen);
484 newstr = (char *) malloc(inputlen);
485 while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
486 &brkused,&next,&quoted)==0)
487 {
488 if (state == 0)
489 {
490 int
491 state,
492 next;
493
494 char
495 brkused,
496 quoted;
497
498 state=0;
499 next=0;
500 while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
501 &brkused, &next, &quoted)==0)
502 {
503 if (state == 0)
504 dataset = (unsigned char) atoi(newstr);
505 else
506 if (state == 1)
507 recnum = (unsigned char) atoi(newstr);
508 state++;
509 }
510 }
511 else
512 if (state == 1)
513 {
514 int
515 next;
516
517 unsigned long
518 len;
519
520 char
521 brkused,
522 quoted;
523
524 next=0;
525 len = strlen(token);
526 while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
527 &brkused, &next, &quoted)==0)
528 {
529 if (brkused && next > 0)
530 {
531 char
532 *s = &token[next-1];
533
534 len -= convertHTMLcodes(s, strlen(s));
535 }
536 }
537
538 fputc(0x1c, ofile);
539 fputc(dataset, ofile);
540 fputc(recnum, ofile);
541 if (len < 0x10000)
542 {
543 fputc((len >> 8) & 255, ofile);
544 fputc(len & 255, ofile);
545 }
546 else
547 {
548 fputc(((len >> 24) & 255) | 0x80, ofile);
549 fputc((len >> 16) & 255, ofile);
550 fputc((len >> 8) & 255, ofile);
551 fputc(len & 255, ofile);
552 }
553 next=0;
554 while (len--)
555 fputc(token[next++], ofile);
556 }
557 state++;
558 }
559 free(token);
560 token = (char *)NULL;
561 free(newstr);
562 newstr = (char *)NULL;
563 }
564 free(line);
565
566 fclose( ifile );
567 fclose( ofile );
568 }
569
570 return 0;
571 }
572
573 /*
574 This routine is a generalized, finite state token parser. It allows
575 you extract tokens one at a time from a string of characters. The
576 characters used for white space, for break characters, and for quotes
577 can be specified. Also, characters in the string can be preceded by
578 a specifiable escape character which removes any special meaning the
579 character may have.
580
581 There are a lot of formal parameters in this subroutine call, but
582 once you get familiar with them, this routine is fairly easy to use.
583 "#define" macros can be used to generate simpler looking calls for
584 commonly used applications of this routine.
585
586 First, some terminology:
587
588 token: used here, a single unit of information in
589 the form of a group of characters.
590
591 white space: space that gets ignored (except within quotes
592 or when escaped), like blanks and tabs. in
593 addition, white space terminates a non-quoted
594 token.
595
596 break character: a character that separates non-quoted tokens.
597 commas are a common break character. the
598 usage of break characters to signal the end
599 of a token is the same as that of white space,
600 except multiple break characters with nothing
601 or only white space between generate a null
602 token for each two break characters together.
603
604 for example, if blank is set to be the white
605 space and comma is set to be the break
606 character, the line ...
607
608 A, B, C , , DEF
609
610 ... consists of 5 tokens:
611
612 1) "A"
613 2) "B"
614 3) "C"
615 4) "" (the null string)
616 5) "DEF"
617
618 quote character: a character that, when surrounding a group
619 of other characters, causes the group of
620 characters to be treated as a single token,
621 no matter how many white spaces or break
622 characters exist in the group. also, a
623 token always terminates after the closing
624 quote. for example, if ' is the quote
625 character, blank is white space, and comma
626 is the break character, the following
627 string ...
628
629 A, ' B, CD'EF GHI
630
631 ... consists of 4 tokens:
632
633 1) "A"
634 2) " B, CD" (note the blanks & comma)
635 3) "EF"
636 4) "GHI"
637
638 the quote characters themselves do
639 not appear in the resultant tokens. the
640 double quotes are delimiters i use here for
641 documentation purposes only.
642
643 escape character: a character which itself is ignored but
644 which causes the next character to be
645 used as is. ^ and \ are often used as
646 escape characters. an escape in the last
647 position of the string gets treated as a
648 "normal" (i.e., non-quote, non-white,
649 non-break, and non-escape) character.
650 for example, assume white space, break
651 character, and quote are the same as in the
652 above examples, and further, assume that
653 ^ is the escape character. then, in the
654 string ...
655
656 ABC, ' DEF ^' GH' I ^ J K^ L ^
657
658 ... there are 7 tokens:
659
660 1) "ABC"
661 2) " DEF ' GH"
662 3) "I"
663 4) " " (a lone blank)
664 5) "J"
665 6) "K L"
666 7) "^" (passed as is at end of line)
667
668
669 OK, now that you have this background, here's how to call "tokenizer":
670
671 result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
672 brkused,next,quoted)
673
674 result: 0 if we haven't reached EOS (end of string), and
675 1 if we have (this is an "int").
676
677 flag: right now, only the low order 3 bits are used.
678 1 => convert non-quoted tokens to upper case
679 2 => convert non-quoted tokens to lower case
680 0 => do not convert non-quoted tokens
681 (this is a "char").
682
683 token: a character string containing the returned next token
684 (this is a "char[]").
685
686 maxtok: the maximum size of "token". characters beyond
687 "maxtok" are truncated (this is an "int").
688
689 string: the string to be parsed (this is a "char[]").
690
691 white: a string of the valid white spaces. example:
692
693 char whitesp[]={" \t"};
694
695 blank and tab will be valid white space (this is
696 a "char[]").
697
698 break: a string of the valid break characters. example:
699
700 char breakch[]={";,"};
701
702 semicolon and comma will be valid break characters
703 (this is a "char[]").
704
705 IMPORTANT: do not use the name "break" as a C
706 variable, as this is a reserved word in C.
707
708 quote: a string of the valid quote characters. an example
709 would be
710
711 char whitesp[]={"'\"");
712
713 (this causes single and double quotes to be valid)
714 note that a token starting with one of these characters
715 needs the same quote character to terminate it.
716
717 for example,
718
719 "ABC '
720
721 is unterminated, but
722
723 "DEF" and 'GHI'
724
725 are properly terminated. note that different quote
726 characters can appear on the same line; only for
727 a given token do the quote characters have to be
728 the same (this is a "char[]").
729
730 escape: the escape character (NOT a string ... only one
731 allowed). use zero if none is desired (this is
732 a "char").
733
734 brkused: the break character used to terminate the current
735 token. if the token was quoted, this will be the
736 quote used. if the token is the last one on the
737 line, this will be zero (this is a pointer to a
738 "char").
739
740 next: this variable points to the first character of the
741 next token. it gets reset by "tokenizer" as it steps
742 through the string. set it to 0 upon initialization,
743 and leave it alone after that. you can change it
744 if you want to jump around in the string or re-parse
745 from the beginning, but be careful (this is a
746 pointer to an "int").
747
748 quoted: set to 1 (true) if the token was quoted and 0 (false)
749 if not. you may need this information (for example:
750 in C, a string with quotes around it is a character
751 string, while one without is an identifier).
752
753 (this is a pointer to a "char").
754 */
755
756 /* states */
757
758 #define IN_WHITE 0
759 #define IN_TOKEN 1
760 #define IN_QUOTE 2
761 #define IN_OZONE 3
762
763 int _p_state; /* current state */
764 unsigned _p_flag; /* option flag */
765 char _p_curquote; /* current quote char */
766 int _p_tokpos; /* current token pos */
767
768 /* routine to find character in string ... used only by "tokenizer" */
769
770 int sindex(char ch,char *string)
771 {
772 char *cp;
773 for(cp=string;*cp;++cp)
774 if(ch==*cp)
775 return (int)(cp-string); /* return postion of character */
776 return -1; /* eol ... no match found */
777 }
778
779 /* routine to store a character in a string ... used only by "tokenizer" */
780
781 void chstore(char *string,int max,char ch)
782 {
783 char c;
784 if(_p_tokpos>=0&&_p_tokpos<max-1)
785 {
786 if(_p_state==IN_QUOTE)
787 c=ch;
788 else
789 switch(_p_flag&3)
790 {
791 case 1: /* convert to upper */
792 c=toupper(ch);
793 break;
794
795 case 2: /* convert to lower */
796 c=tolower(ch);
797 break;
798
799 default: /* use as is */
800 c=ch;
801 break;
802 }
803 string[_p_tokpos++]=c;
804 }
805 return;
806 }
807
808 int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
809 char *white,char *brkchar,char *quote,char eschar,char *brkused,
810 int *next,char *quoted)
811 {
812 int qp;
813 char c,nc;
814
815 *brkused=0; /* initialize to null */
816 *quoted=0; /* assume not quoted */
817
818 if(!line[*next]) /* if we're at end of line, indicate such */
819 return 1;
820
821 _p_state=IN_WHITE; /* initialize state */
822 _p_curquote=0; /* initialize previous quote char */
823 _p_flag=inflag; /* set option flag */
824
825 for(_p_tokpos=0;(c=line[*next]);++(*next)) /* main loop */
826 {
827 if((qp=sindex(c,brkchar))>=0) /* break */
828 {
829 switch(_p_state)
830 {
831 case IN_WHITE: /* these are the same here ... */
832 case IN_TOKEN: /* ... just get out */
833 case IN_OZONE: /* ditto */
834 ++(*next);
835 *brkused=brkchar[qp];
836 goto byebye;
837
838 case IN_QUOTE: /* just keep going */
839 chstore(token,tokmax,c);
840 break;
841 }
842 }
843 else if((qp=sindex(c,quote))>=0) /* quote */
844 {
845 switch(_p_state)
846 {
847 case IN_WHITE: /* these are identical, */
848 _p_state=IN_QUOTE; /* change states */
849 _p_curquote=quote[qp]; /* save quote char */
850 *quoted=1; /* set to true as long as something is in quotes */
851 break;
852
853 case IN_QUOTE:
854 if(quote[qp]==_p_curquote) /* same as the beginning quote? */
855 {
856 _p_state=IN_OZONE;
857 _p_curquote=0;
858 }
859 else
860 chstore(token,tokmax,c); /* treat as regular char */
861 break;
862
863 case IN_TOKEN:
864 case IN_OZONE:
865 *brkused=c; /* uses quote as break char */
866 goto byebye;
867 }
868 }
869 else if((qp=sindex(c,white))>=0) /* white */
870 {
871 switch(_p_state)
872 {
873 case IN_WHITE:
874 case IN_OZONE:
875 break; /* keep going */
876
877 case IN_TOKEN:
878 _p_state=IN_OZONE;
879 break;
880
881 case IN_QUOTE:
882 chstore(token,tokmax,c); /* it's valid here */
883 break;
884 }
885 }
886 else if(c==eschar) /* escape */
887 {
888 nc=line[(*next)+1];
889 if(nc==0) /* end of line */
890 {
891 *brkused=0;
892 chstore(token,tokmax,c);
893 ++(*next);
894 goto byebye;
895 }
896 switch(_p_state)
897 {
898 case IN_WHITE:
899 --(*next);
900 _p_state=IN_TOKEN;
901 break;
902
903 case IN_TOKEN:
904 case IN_QUOTE:
905 ++(*next);
906 chstore(token,tokmax,nc);
907 break;
908
909 case IN_OZONE:
910 goto byebye;
911 }
912 }
913 else /* anything else is just a real character */
914 {
915 switch(_p_state)
916 {
917 case IN_WHITE:
918 _p_state=IN_TOKEN; /* switch states */
919
920 case IN_TOKEN: /* these 2 are */
921 case IN_QUOTE: /* identical here */
922 chstore(token,tokmax,c);
923 break;
924
925 case IN_OZONE:
926 goto byebye;
927 }
928 }
929 } /* end of main loop */
930
931 byebye:
932 token[_p_tokpos]=0; /* make sure token ends with EOS */
933
934 return 0;
935 }
936 /*
937 * Local Variables:
938 * mode: c
939 * c-basic-offset: 8
940 * fill-column: 78
941 * End:
942 */