]> git.saurik.com Git - wxWidgets.git/blame - src/tiff/contrib/iptcutil/iptcutil.c
Merged libtiff 4.0.3 changes into the trunk.
[wxWidgets.git] / src / tiff / contrib / iptcutil / iptcutil.c
CommitLineData
8414a40c
VZ
1/* $Id$ */
2
3#include "tif_config.h"
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8414a40c
VZ
8#include <ctype.h>
9
10#ifdef HAVE_STRINGS_H
11# include <strings.h>
12#endif
13
14#ifdef HAVE_IO_H
15# include <io.h>
16#endif
17
18#ifdef HAVE_FCNTL_H
19# include <fcntl.h>
20#endif
21
22#ifdef WIN32
23#define STRNICMP strnicmp
24#else
25#define STRNICMP strncasecmp
26#endif
27
28typedef struct _tag_spec
29{
30 short
31 id;
32
33 char
34 *name;
35} tag_spec;
36
37static tag_spec tags[] = {
38 { 5,"Image Name" },
39 { 7,"Edit Status" },
40 { 10,"Priority" },
41 { 15,"Category" },
42 { 20,"Supplemental Category" },
43 { 22,"Fixture Identifier" },
44 { 25,"Keyword" },
45 { 30,"Release Date" },
46 { 35,"Release Time" },
47 { 40,"Special Instructions" },
48 { 45,"Reference Service" },
49 { 47,"Reference Date" },
50 { 50,"Reference Number" },
51 { 55,"Created Date" },
52 { 60,"Created Time" },
53 { 65,"Originating Program" },
54 { 70,"Program Version" },
55 { 75,"Object Cycle" },
56 { 80,"Byline" },
57 { 85,"Byline Title" },
58 { 90,"City" },
59 { 95,"Province State" },
60 { 100,"Country Code" },
61 { 101,"Country" },
62 { 103,"Original Transmission Reference" },
63 { 105,"Headline" },
64 { 110,"Credit" },
65 { 115,"Source" },
66 { 116,"Copyright String" },
67 { 120,"Caption" },
68 { 121,"Local Caption" },
69 { 122,"Caption Writer" },
70 { 200,"Custom Field 1" },
71 { 201,"Custom Field 2" },
72 { 202,"Custom Field 3" },
73 { 203,"Custom Field 4" },
74 { 204,"Custom Field 5" },
75 { 205,"Custom Field 6" },
76 { 206,"Custom Field 7" },
77 { 207,"Custom Field 8" },
78 { 208,"Custom Field 9" },
79 { 209,"Custom Field 10" },
80 { 210,"Custom Field 11" },
81 { 211,"Custom Field 12" },
82 { 212,"Custom Field 13" },
83 { 213,"Custom Field 14" },
84 { 214,"Custom Field 15" },
85 { 215,"Custom Field 16" },
86 { 216,"Custom Field 17" },
87 { 217,"Custom Field 18" },
88 { 218,"Custom Field 19" },
89 { 219,"Custom Field 20" }
90};
91
92/*
93 * We format the output using HTML conventions
94 * to preserve control characters and such.
95 */
96void formatString(FILE *ofile, const char *s, int len)
97{
98 putc('"', ofile);
99 for (; len > 0; --len, ++s) {
100 int c = *s;
101 switch (c) {
102 case '&':
103 fputs("&amp;", ofile);
104 break;
105#ifdef HANDLE_GT_LT
106 case '<':
107 fputs("&lt;", ofile);
108 break;
109 case '>':
110 fputs("&gt;", ofile);
111 break;
112#endif
113 case '"':
114 fputs("&quot;", ofile);
115 break;
116 default:
117 if (iscntrl(c))
118 fprintf(ofile, "&#%d;", c);
119 else
120 putc(*s, ofile);
121 break;
122 }
123 }
124 fputs("\"\n", ofile);
125}
126
127typedef struct _html_code
128{
129 short
130 len;
131 const char
132 *code,
133 val;
134} html_code;
135
136static html_code html_codes[] = {
137#ifdef HANDLE_GT_LT
138 { 4,"&lt;",'<' },
139 { 4,"&gt;",'>' },
140#endif
141 { 5,"&amp;",'&' },
142 { 6,"&quot;",'"' }
143};
144
145/*
146 * This routine converts HTML escape sequence
147 * back to the original ASCII representation.
148 * - returns the number of characters dropped.
149 */
150int convertHTMLcodes(char *s, int len)
151{
152 if (len <=0 || s==(char*)NULL || *s=='\0')
153 return 0;
154
155 if (s[1] == '#')
156 {
157 int val, o;
158
159 if (sscanf(s,"&#%d;",&val) == 1)
160 {
161 o = 3;
162 while (s[o] != ';')
163 {
164 o++;
165 if (o > 5)
166 break;
167 }
168 if (o < 5)
169 strcpy(s+1, s+1+o);
170 *s = val;
171 return o;
172 }
173 }
174 else
175 {
176 int
177 i,
178 codes = sizeof(html_codes) / sizeof(html_code);
179
180 for (i=0; i < codes; i++)
181 {
182 if (html_codes[i].len <= len)
183 if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
184 {
185 strcpy(s+1, s+html_codes[i].len);
186 *s = html_codes[i].val;
187 return html_codes[i].len-1;
188 }
189 }
190 }
191
192 return 0;
193}
194
195int formatIPTC(FILE *ifile, FILE *ofile)
196{
197 unsigned int
198 foundiptc,
199 tagsfound;
200
201 unsigned char
202 recnum,
203 dataset;
204
205 char
206 *readable,
207 *str;
208
209 long
210 tagindx,
211 taglen;
212
213 int
214 i,
215 tagcount = sizeof(tags) / sizeof(tag_spec);
216
217 char
218 c;
219
220 foundiptc = 0; /* found the IPTC-Header */
221 tagsfound = 0; /* number of tags found */
222
223 c = getc(ifile);
224 while (c != EOF)
225 {
226 if (c == 0x1c)
227 foundiptc = 1;
228 else
229 {
230 if (foundiptc)
231 return -1;
232 else
233 continue;
234 }
235
236 /* we found the 0x1c tag and now grab the dataset and record number tags */
237 dataset = getc(ifile);
238 if ((char) dataset == EOF)
239 return -1;
240 recnum = getc(ifile);
241 if ((char) recnum == EOF)
242 return -1;
243 /* try to match this record to one of the ones in our named table */
244 for (i=0; i< tagcount; i++)
245 {
246 if (tags[i].id == recnum)
247 break;
248 }
249 if (i < tagcount)
250 readable = tags[i].name;
251 else
252 readable = "";
253
254 /* then we decode the length of the block that follows - long or short fmt */
255 c = getc(ifile);
256 if (c == EOF)
257 return 0;
258 if (c & (unsigned char) 0x80)
259 {
260 unsigned char
261 buffer[4];
262
263 for (i=0; i<4; i++)
264 {
265 c = buffer[i] = getc(ifile);
266 if (c == EOF)
267 return -1;
268 }
269 taglen = (((long) buffer[ 0 ]) << 24) |
270 (((long) buffer[ 1 ]) << 16) |
271 (((long) buffer[ 2 ]) << 8) |
272 (((long) buffer[ 3 ]));
273 }
274 else
275 {
276 unsigned char
277 x = c;
278
279 taglen = ((long) x) << 8;
280 x = getc(ifile);
281 if ((char)x == EOF)
282 return -1;
283 taglen |= (long) x;
284 }
285 /* make a buffer to hold the tag data and snag it from the input stream */
286 str = (char *) malloc((unsigned int) (taglen+1));
287 if (str == (char *) NULL)
288 {
289 printf("Memory allocation failed");
290 return 0;
291 }
292 for (tagindx=0; tagindx<taglen; tagindx++)
293 {
294 c = str[tagindx] = getc(ifile);
295 if (c == EOF)
80ed523f
VZ
296 {
297 free(str);
298 return -1;
299 }
8414a40c
VZ
300 }
301 str[ taglen ] = 0;
302
303 /* now finish up by formatting this binary data into ASCII equivalent */
304 if (strlen(readable) > 0)
305 fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
306 else
307 fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
308 formatString( ofile, str, taglen );
309 free(str);
310
311 tagsfound++;
312
313 c = getc(ifile);
314 }
315 return tagsfound;
316}
317
318int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
319char *white,char *brkchar,char *quote,char eschar,char *brkused,
320int *next,char *quoted);
321
322char *super_fgets(char *b, int *blen, FILE *file)
323{
324 int
325 c,
326 len;
327
328 char
329 *q;
330
331 len=*blen;
332 for (q=b; ; q++)
333 {
334 c=fgetc(file);
335 if (c == EOF || c == '\n')
336 break;
80ed523f 337 if (((long)q - (long)b + 1 ) >= (long) len)
8414a40c 338 {
80ed523f 339 long
8414a40c
VZ
340 tlen;
341
80ed523f 342 tlen=(long)q-(long)b;
8414a40c
VZ
343 len<<=1;
344 b=(char *) realloc((char *) b,(len+2));
345 if ((char *) b == (char *) NULL)
346 break;
347 q=b+tlen;
348 }
349 *q=(unsigned char) c;
350 }
351 *blen=0;
352 if ((unsigned char *)b != (unsigned char *) NULL)
353 {
354 int
355 tlen;
356
80ed523f 357 tlen=(long)q - (long)b;
8414a40c
VZ
358 if (tlen == 0)
359 return (char *) NULL;
360 b[tlen] = '\0';
361 *blen=++tlen;
362 }
363 return b;
364}
365
366#define BUFFER_SZ 4096
367
368int main(int argc, char *argv[])
369{
370 unsigned int
371 length;
372
373 unsigned char
374 *buffer;
375
376 int
377 i,
378 mode; /* iptc binary, or iptc text */
379
380 FILE
381 *ifile = stdin,
382 *ofile = stdout;
383
384 char
385 c,
386 *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output";
387
388 if( argc < 2 )
389 {
80ed523f 390 puts(usage);
8414a40c
VZ
391 return 1;
392 }
393
394 mode = 0;
395 length = -1;
396 buffer = (unsigned char *)NULL;
397
398 for (i=1; i<argc; i++)
399 {
400 c = argv[i][0];
401 if (c == '-' || c == '/')
402 {
403 c = argv[i][1];
404 switch( c )
405 {
406 case 't':
407 mode = 1;
408#ifdef WIN32
409 /* Set "stdout" to binary mode: */
410 _setmode( _fileno( ofile ), _O_BINARY );
411#endif
412 break;
413 case 'b':
414 mode = 0;
415#ifdef WIN32
416 /* Set "stdin" to binary mode: */
417 _setmode( _fileno( ifile ), _O_BINARY );
418#endif
419 break;
420 case 'i':
421 if (mode == 0)
422 ifile = fopen(argv[++i], "rb");
423 else
424 ifile = fopen(argv[++i], "rt");
425 if (ifile == (FILE *)NULL)
426 {
427 printf("Unable to open: %s\n", argv[i]);
428 return 1;
429 }
430 break;
431 case 'o':
432 if (mode == 0)
433 ofile = fopen(argv[++i], "wt");
434 else
435 ofile = fopen(argv[++i], "wb");
436 if (ofile == (FILE *)NULL)
437 {
438 printf("Unable to open: %s\n", argv[i]);
439 return 1;
440 }
441 break;
442 default:
443 printf("Unknown option: %s\n", argv[i]);
444 return 1;
445 }
446 }
447 else
448 {
80ed523f 449 puts(usage);
8414a40c
VZ
450 return 1;
451 }
452 }
453
454 if (mode == 0) /* handle binary iptc info */
455 formatIPTC(ifile, ofile);
456
457 if (mode == 1) /* handle text form of iptc info */
458 {
459 char
460 brkused,
461 quoted,
462 *line,
463 *token,
464 *newstr;
465
466 int
467 state,
468 next;
469
470 unsigned char
471 recnum = 0,
472 dataset = 0;
473
474 int
475 inputlen = BUFFER_SZ;
476
477 line = (char *) malloc(inputlen);
478 token = (char *)NULL;
479 while((line = super_fgets(line,&inputlen,ifile))!=NULL)
480 {
481 state=0;
482 next=0;
483
484 token = (char *) malloc(inputlen);
485 newstr = (char *) malloc(inputlen);
486 while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
487 &brkused,&next,&quoted)==0)
488 {
489 if (state == 0)
490 {
491 int
492 state,
493 next;
494
495 char
496 brkused,
497 quoted;
498
499 state=0;
500 next=0;
501 while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
502 &brkused, &next, &quoted)==0)
503 {
504 if (state == 0)
505 dataset = (unsigned char) atoi(newstr);
506 else
507 if (state == 1)
508 recnum = (unsigned char) atoi(newstr);
509 state++;
510 }
511 }
512 else
513 if (state == 1)
514 {
515 int
516 next;
517
518 unsigned long
519 len;
520
521 char
522 brkused,
523 quoted;
524
525 next=0;
526 len = strlen(token);
527 while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
528 &brkused, &next, &quoted)==0)
529 {
530 if (brkused && next > 0)
531 {
532 char
533 *s = &token[next-1];
534
535 len -= convertHTMLcodes(s, strlen(s));
536 }
537 }
538
539 fputc(0x1c, ofile);
540 fputc(dataset, ofile);
541 fputc(recnum, ofile);
542 if (len < 0x10000)
543 {
544 fputc((len >> 8) & 255, ofile);
545 fputc(len & 255, ofile);
546 }
547 else
548 {
549 fputc(((len >> 24) & 255) | 0x80, ofile);
550 fputc((len >> 16) & 255, ofile);
551 fputc((len >> 8) & 255, ofile);
552 fputc(len & 255, ofile);
553 }
554 next=0;
555 while (len--)
556 fputc(token[next++], ofile);
557 }
558 state++;
559 }
560 free(token);
561 token = (char *)NULL;
562 free(newstr);
563 newstr = (char *)NULL;
564 }
565 free(line);
566
567 fclose( ifile );
568 fclose( ofile );
569 }
570
571 return 0;
572}
573
574/*
575 This routine is a generalized, finite state token parser. It allows
576 you extract tokens one at a time from a string of characters. The
577 characters used for white space, for break characters, and for quotes
578 can be specified. Also, characters in the string can be preceded by
579 a specifiable escape character which removes any special meaning the
580 character may have.
581
582 There are a lot of formal parameters in this subroutine call, but
583 once you get familiar with them, this routine is fairly easy to use.
584 "#define" macros can be used to generate simpler looking calls for
585 commonly used applications of this routine.
586
587 First, some terminology:
588
589 token: used here, a single unit of information in
590 the form of a group of characters.
591
592 white space: space that gets ignored (except within quotes
593 or when escaped), like blanks and tabs. in
594 addition, white space terminates a non-quoted
595 token.
596
597 break character: a character that separates non-quoted tokens.
598 commas are a common break character. the
599 usage of break characters to signal the end
600 of a token is the same as that of white space,
601 except multiple break characters with nothing
602 or only white space between generate a null
603 token for each two break characters together.
604
605 for example, if blank is set to be the white
606 space and comma is set to be the break
607 character, the line ...
608
609 A, B, C , , DEF
610
611 ... consists of 5 tokens:
612
613 1) "A"
614 2) "B"
615 3) "C"
616 4) "" (the null string)
617 5) "DEF"
618
619 quote character: a character that, when surrounding a group
620 of other characters, causes the group of
621 characters to be treated as a single token,
622 no matter how many white spaces or break
623 characters exist in the group. also, a
624 token always terminates after the closing
625 quote. for example, if ' is the quote
626 character, blank is white space, and comma
627 is the break character, the following
628 string ...
629
630 A, ' B, CD'EF GHI
631
632 ... consists of 4 tokens:
633
634 1) "A"
635 2) " B, CD" (note the blanks & comma)
636 3) "EF"
637 4) "GHI"
638
639 the quote characters themselves do
640 not appear in the resultant tokens. the
641 double quotes are delimiters i use here for
642 documentation purposes only.
643
644 escape character: a character which itself is ignored but
645 which causes the next character to be
646 used as is. ^ and \ are often used as
647 escape characters. an escape in the last
648 position of the string gets treated as a
649 "normal" (i.e., non-quote, non-white,
650 non-break, and non-escape) character.
651 for example, assume white space, break
652 character, and quote are the same as in the
653 above examples, and further, assume that
654 ^ is the escape character. then, in the
655 string ...
656
657 ABC, ' DEF ^' GH' I ^ J K^ L ^
658
659 ... there are 7 tokens:
660
661 1) "ABC"
662 2) " DEF ' GH"
663 3) "I"
664 4) " " (a lone blank)
665 5) "J"
666 6) "K L"
667 7) "^" (passed as is at end of line)
668
669
670 OK, now that you have this background, here's how to call "tokenizer":
671
672 result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
673 brkused,next,quoted)
674
675 result: 0 if we haven't reached EOS (end of string), and
676 1 if we have (this is an "int").
677
678 flag: right now, only the low order 3 bits are used.
679 1 => convert non-quoted tokens to upper case
680 2 => convert non-quoted tokens to lower case
681 0 => do not convert non-quoted tokens
682 (this is a "char").
683
684 token: a character string containing the returned next token
685 (this is a "char[]").
686
687 maxtok: the maximum size of "token". characters beyond
688 "maxtok" are truncated (this is an "int").
689
690 string: the string to be parsed (this is a "char[]").
691
692 white: a string of the valid white spaces. example:
693
694 char whitesp[]={" \t"};
695
696 blank and tab will be valid white space (this is
697 a "char[]").
698
699 break: a string of the valid break characters. example:
700
701 char breakch[]={";,"};
702
703 semicolon and comma will be valid break characters
704 (this is a "char[]").
705
706 IMPORTANT: do not use the name "break" as a C
707 variable, as this is a reserved word in C.
708
709 quote: a string of the valid quote characters. an example
710 would be
711
712 char whitesp[]={"'\"");
713
714 (this causes single and double quotes to be valid)
715 note that a token starting with one of these characters
716 needs the same quote character to terminate it.
717
718 for example,
719
720 "ABC '
721
722 is unterminated, but
723
724 "DEF" and 'GHI'
725
726 are properly terminated. note that different quote
727 characters can appear on the same line; only for
728 a given token do the quote characters have to be
729 the same (this is a "char[]").
730
731 escape: the escape character (NOT a string ... only one
732 allowed). use zero if none is desired (this is
733 a "char").
734
735 brkused: the break character used to terminate the current
736 token. if the token was quoted, this will be the
737 quote used. if the token is the last one on the
738 line, this will be zero (this is a pointer to a
739 "char").
740
741 next: this variable points to the first character of the
742 next token. it gets reset by "tokenizer" as it steps
743 through the string. set it to 0 upon initialization,
744 and leave it alone after that. you can change it
745 if you want to jump around in the string or re-parse
746 from the beginning, but be careful (this is a
747 pointer to an "int").
748
749 quoted: set to 1 (true) if the token was quoted and 0 (false)
750 if not. you may need this information (for example:
751 in C, a string with quotes around it is a character
752 string, while one without is an identifier).
753
754 (this is a pointer to a "char").
755*/
756
757/* states */
758
759#define IN_WHITE 0
760#define IN_TOKEN 1
761#define IN_QUOTE 2
762#define IN_OZONE 3
763
764int _p_state; /* current state */
765unsigned _p_flag; /* option flag */
766char _p_curquote; /* current quote char */
767int _p_tokpos; /* current token pos */
768
769/* routine to find character in string ... used only by "tokenizer" */
770
771int sindex(char ch,char *string)
772{
773 char *cp;
774 for(cp=string;*cp;++cp)
775 if(ch==*cp)
776 return (int)(cp-string); /* return postion of character */
777 return -1; /* eol ... no match found */
778}
779
780/* routine to store a character in a string ... used only by "tokenizer" */
781
782void chstore(char *string,int max,char ch)
783{
784 char c;
785 if(_p_tokpos>=0&&_p_tokpos<max-1)
786 {
787 if(_p_state==IN_QUOTE)
788 c=ch;
789 else
790 switch(_p_flag&3)
791 {
792 case 1: /* convert to upper */
793 c=toupper(ch);
794 break;
795
796 case 2: /* convert to lower */
797 c=tolower(ch);
798 break;
799
800 default: /* use as is */
801 c=ch;
802 break;
803 }
804 string[_p_tokpos++]=c;
805 }
806 return;
807}
808
809int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
810 char *white,char *brkchar,char *quote,char eschar,char *brkused,
811 int *next,char *quoted)
812{
813 int qp;
814 char c,nc;
815
816 *brkused=0; /* initialize to null */
817 *quoted=0; /* assume not quoted */
818
819 if(!line[*next]) /* if we're at end of line, indicate such */
820 return 1;
821
822 _p_state=IN_WHITE; /* initialize state */
823 _p_curquote=0; /* initialize previous quote char */
824 _p_flag=inflag; /* set option flag */
825
826 for(_p_tokpos=0;(c=line[*next]);++(*next)) /* main loop */
827 {
828 if((qp=sindex(c,brkchar))>=0) /* break */
829 {
830 switch(_p_state)
831 {
832 case IN_WHITE: /* these are the same here ... */
833 case IN_TOKEN: /* ... just get out */
834 case IN_OZONE: /* ditto */
835 ++(*next);
836 *brkused=brkchar[qp];
837 goto byebye;
838
839 case IN_QUOTE: /* just keep going */
840 chstore(token,tokmax,c);
841 break;
842 }
843 }
844 else if((qp=sindex(c,quote))>=0) /* quote */
845 {
846 switch(_p_state)
847 {
848 case IN_WHITE: /* these are identical, */
849 _p_state=IN_QUOTE; /* change states */
850 _p_curquote=quote[qp]; /* save quote char */
851 *quoted=1; /* set to true as long as something is in quotes */
852 break;
853
854 case IN_QUOTE:
855 if(quote[qp]==_p_curquote) /* same as the beginning quote? */
856 {
857 _p_state=IN_OZONE;
858 _p_curquote=0;
859 }
860 else
861 chstore(token,tokmax,c); /* treat as regular char */
862 break;
863
864 case IN_TOKEN:
865 case IN_OZONE:
866 *brkused=c; /* uses quote as break char */
867 goto byebye;
868 }
869 }
870 else if((qp=sindex(c,white))>=0) /* white */
871 {
872 switch(_p_state)
873 {
874 case IN_WHITE:
875 case IN_OZONE:
876 break; /* keep going */
877
878 case IN_TOKEN:
879 _p_state=IN_OZONE;
880 break;
881
882 case IN_QUOTE:
883 chstore(token,tokmax,c); /* it's valid here */
884 break;
885 }
886 }
887 else if(c==eschar) /* escape */
888 {
889 nc=line[(*next)+1];
890 if(nc==0) /* end of line */
891 {
892 *brkused=0;
893 chstore(token,tokmax,c);
894 ++(*next);
895 goto byebye;
896 }
897 switch(_p_state)
898 {
899 case IN_WHITE:
900 --(*next);
901 _p_state=IN_TOKEN;
902 break;
903
904 case IN_TOKEN:
905 case IN_QUOTE:
906 ++(*next);
907 chstore(token,tokmax,nc);
908 break;
909
910 case IN_OZONE:
911 goto byebye;
912 }
913 }
914 else /* anything else is just a real character */
915 {
916 switch(_p_state)
917 {
918 case IN_WHITE:
919 _p_state=IN_TOKEN; /* switch states */
920
921 case IN_TOKEN: /* these 2 are */
922 case IN_QUOTE: /* identical here */
923 chstore(token,tokmax,c);
924 break;
925
926 case IN_OZONE:
927 goto byebye;
928 }
929 }
930 } /* end of main loop */
931
932byebye:
933 token[_p_tokpos]=0; /* make sure token ends with EOS */
934
935 return 0;
936}
80ed523f
VZ
937/*
938 * Local Variables:
939 * mode: c
940 * c-basic-offset: 8
941 * fill-column: 78
942 * End:
943 */