]>
git.saurik.com Git - wxWidgets.git/blob - src/tiff/contrib/iptcutil/iptcutil.c
2 #include "tif_config.h"
22 #define STRNICMP strnicmp
24 #define STRNICMP strncasecmp
27 typedef struct _tag_spec
36 static tag_spec tags
[] = {
41 { 20,"Supplemental Category" },
42 { 22,"Fixture Identifier" },
44 { 30,"Release Date" },
45 { 35,"Release Time" },
46 { 40,"Special Instructions" },
47 { 45,"Reference Service" },
48 { 47,"Reference Date" },
49 { 50,"Reference Number" },
50 { 55,"Created Date" },
51 { 60,"Created Time" },
52 { 65,"Originating Program" },
53 { 70,"Program Version" },
54 { 75,"Object Cycle" },
56 { 85,"Byline Title" },
58 { 95,"Province State" },
59 { 100,"Country Code" },
61 { 103,"Original Transmission Reference" },
65 { 116,"Copyright String" },
67 { 121,"Local Caption" },
68 { 122,"Caption Writer" },
69 { 200,"Custom Field 1" },
70 { 201,"Custom Field 2" },
71 { 202,"Custom Field 3" },
72 { 203,"Custom Field 4" },
73 { 204,"Custom Field 5" },
74 { 205,"Custom Field 6" },
75 { 206,"Custom Field 7" },
76 { 207,"Custom Field 8" },
77 { 208,"Custom Field 9" },
78 { 209,"Custom Field 10" },
79 { 210,"Custom Field 11" },
80 { 211,"Custom Field 12" },
81 { 212,"Custom Field 13" },
82 { 213,"Custom Field 14" },
83 { 214,"Custom Field 15" },
84 { 215,"Custom Field 16" },
85 { 216,"Custom Field 17" },
86 { 217,"Custom Field 18" },
87 { 218,"Custom Field 19" },
88 { 219,"Custom Field 20" }
92 * We format the output using HTML conventions
93 * to preserve control characters and such.
95 void formatString(FILE *ofile
, const char *s
, int len
)
98 for (; len
> 0; --len
, ++s
) {
102 fputs("&", ofile
);
106 fputs("<", ofile
);
109 fputs(">", ofile
);
113 fputs(""", ofile
);
117 fprintf(ofile
, "&#%d;", c
);
123 fputs("\"\n", ofile
);
126 typedef struct _html_code
135 static html_code html_codes
[] = {
145 * This routine converts HTML escape sequence
146 * back to the original ASCII representation.
147 * - returns the number of characters dropped.
149 int convertHTMLcodes(char *s
, int len
)
151 if (len
<=0 || s
==(char*)NULL
|| *s
=='\0')
158 if (sscanf(s
,"&#%d;",&val
) == 1)
177 codes
= sizeof(html_codes
) / sizeof(html_code
);
179 for (i
=0; i
< codes
; i
++)
181 if (html_codes
[i
].len
<= len
)
182 if (STRNICMP(s
, html_codes
[i
].code
, html_codes
[i
].len
) == 0)
184 strcpy(s
+1, s
+html_codes
[i
].len
);
185 *s
= html_codes
[i
].val
;
186 return html_codes
[i
].len
-1;
194 int formatIPTC(FILE *ifile
, FILE *ofile
)
214 tagcount
= sizeof(tags
) / sizeof(tag_spec
);
219 foundiptc
= 0; /* found the IPTC-Header */
220 tagsfound
= 0; /* number of tags found */
235 /* we found the 0x1c tag and now grab the dataset and record number tags */
236 dataset
= getc(ifile
);
237 if ((char) dataset
== EOF
)
239 recnum
= getc(ifile
);
240 if ((char) recnum
== EOF
)
242 /* try to match this record to one of the ones in our named table */
243 for (i
=0; i
< tagcount
; i
++)
245 if (tags
[i
].id
== recnum
)
249 readable
= tags
[i
].name
;
253 /* then we decode the length of the block that follows - long or short fmt */
257 if (c
& (unsigned char) 0x80)
264 c
= buffer
[i
] = getc(ifile
);
268 taglen
= (((long) buffer
[ 0 ]) << 24) |
269 (((long) buffer
[ 1 ]) << 16) |
270 (((long) buffer
[ 2 ]) << 8) |
271 (((long) buffer
[ 3 ]));
278 taglen
= ((long) x
) << 8;
284 /* make a buffer to hold the tag data and snag it from the input stream */
285 str
= (char *) malloc((unsigned int) (taglen
+1));
286 if (str
== (char *) NULL
)
288 printf("Memory allocation failed");
291 for (tagindx
=0; tagindx
<taglen
; tagindx
++)
293 c
= str
[tagindx
] = getc(ifile
);
302 /* now finish up by formatting this binary data into ASCII equivalent */
303 if (strlen(readable
) > 0)
304 fprintf(ofile
, "%d#%d#%s=",(unsigned int)dataset
, (unsigned int) recnum
, readable
);
306 fprintf(ofile
, "%d#%d=",(unsigned int)dataset
, (unsigned int) recnum
);
307 formatString( ofile
, str
, taglen
);
317 int tokenizer(unsigned inflag
,char *token
,int tokmax
,char *line
,
318 char *white
,char *brkchar
,char *quote
,char eschar
,char *brkused
,
319 int *next
,char *quoted
);
321 char *super_fgets(char *b
, int *blen
, FILE *file
)
334 if (c
== EOF
|| c
== '\n')
336 if (((long)q
- (long)b
+ 1 ) >= (long) len
)
341 tlen
=(long)q
-(long)b
;
343 b
=(char *) realloc((char *) b
,(len
+2));
344 if ((char *) b
== (char *) NULL
)
348 *q
=(unsigned char) c
;
351 if ((unsigned char *)b
!= (unsigned char *) NULL
)
356 tlen
=(long)q
- (long)b
;
358 return (char *) NULL
;
365 #define BUFFER_SZ 4096
367 int main(int argc
, char *argv
[])
377 mode
; /* iptc binary, or iptc text */
385 *usage
= "usage: iptcutil -t | -b [-i file] [-o file] <input >output";
395 buffer
= (unsigned char *)NULL
;
397 for (i
=1; i
<argc
; i
++)
400 if (c
== '-' || c
== '/')
408 /* Set "stdout" to binary mode: */
409 _setmode( _fileno( ofile
), _O_BINARY
);
415 /* Set "stdin" to binary mode: */
416 _setmode( _fileno( ifile
), _O_BINARY
);
421 ifile
= fopen(argv
[++i
], "rb");
423 ifile
= fopen(argv
[++i
], "rt");
424 if (ifile
== (FILE *)NULL
)
426 printf("Unable to open: %s\n", argv
[i
]);
432 ofile
= fopen(argv
[++i
], "wt");
434 ofile
= fopen(argv
[++i
], "wb");
435 if (ofile
== (FILE *)NULL
)
437 printf("Unable to open: %s\n", argv
[i
]);
442 printf("Unknown option: %s\n", argv
[i
]);
453 if (mode
== 0) /* handle binary iptc info */
454 formatIPTC(ifile
, ofile
);
456 if (mode
== 1) /* handle text form of iptc info */
474 inputlen
= BUFFER_SZ
;
476 line
= (char *) malloc(inputlen
);
477 token
= (char *)NULL
;
478 while((line
= super_fgets(line
,&inputlen
,ifile
))!=NULL
)
483 token
= (char *) malloc(inputlen
);
484 newstr
= (char *) malloc(inputlen
);
485 while(tokenizer(0, token
, inputlen
, line
, "", "=", "\"", 0,
486 &brkused
,&next
,"ed
)==0)
500 while(tokenizer(0, newstr
, inputlen
, token
, "", "#", "", 0,
501 &brkused
, &next
, "ed
)==0)
504 dataset
= (unsigned char) atoi(newstr
);
507 recnum
= (unsigned char) atoi(newstr
);
526 while(tokenizer(0, newstr
, inputlen
, token
, "", "&", "", 0,
527 &brkused
, &next
, "ed
)==0)
529 if (brkused
&& next
> 0)
534 len
-= convertHTMLcodes(s
, strlen(s
));
539 fputc(dataset
, ofile
);
540 fputc(recnum
, ofile
);
543 fputc((len
>> 8) & 255, ofile
);
544 fputc(len
& 255, ofile
);
548 fputc(((len
>> 24) & 255) | 0x80, ofile
);
549 fputc((len
>> 16) & 255, ofile
);
550 fputc((len
>> 8) & 255, ofile
);
551 fputc(len
& 255, ofile
);
555 fputc(token
[next
++], ofile
);
560 token
= (char *)NULL
;
562 newstr
= (char *)NULL
;
574 This routine is a generalized, finite state token parser. It allows
575 you extract tokens one at a time from a string of characters. The
576 characters used for white space, for break characters, and for quotes
577 can be specified. Also, characters in the string can be preceded by
578 a specifiable escape character which removes any special meaning the
581 There are a lot of formal parameters in this subroutine call, but
582 once you get familiar with them, this routine is fairly easy to use.
583 "#define" macros can be used to generate simpler looking calls for
584 commonly used applications of this routine.
586 First, some terminology:
588 token: used here, a single unit of information in
589 the form of a group of characters.
591 white space: space that gets ignored (except within quotes
592 or when escaped), like blanks and tabs. in
593 addition, white space terminates a non-quoted
596 break character: a character that separates non-quoted tokens.
597 commas are a common break character. the
598 usage of break characters to signal the end
599 of a token is the same as that of white space,
600 except multiple break characters with nothing
601 or only white space between generate a null
602 token for each two break characters together.
604 for example, if blank is set to be the white
605 space and comma is set to be the break
606 character, the line ...
610 ... consists of 5 tokens:
615 4) "" (the null string)
618 quote character: a character that, when surrounding a group
619 of other characters, causes the group of
620 characters to be treated as a single token,
621 no matter how many white spaces or break
622 characters exist in the group. also, a
623 token always terminates after the closing
624 quote. for example, if ' is the quote
625 character, blank is white space, and comma
626 is the break character, the following
631 ... consists of 4 tokens:
634 2) " B, CD" (note the blanks & comma)
638 the quote characters themselves do
639 not appear in the resultant tokens. the
640 double quotes are delimiters i use here for
641 documentation purposes only.
643 escape character: a character which itself is ignored but
644 which causes the next character to be
645 used as is. ^ and \ are often used as
646 escape characters. an escape in the last
647 position of the string gets treated as a
648 "normal" (i.e., non-quote, non-white,
649 non-break, and non-escape) character.
650 for example, assume white space, break
651 character, and quote are the same as in the
652 above examples, and further, assume that
653 ^ is the escape character. then, in the
656 ABC, ' DEF ^' GH' I ^ J K^ L ^
658 ... there are 7 tokens:
663 4) " " (a lone blank)
666 7) "^" (passed as is at end of line)
669 OK, now that you have this background, here's how to call "tokenizer":
671 result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
674 result: 0 if we haven't reached EOS (end of string), and
675 1 if we have (this is an "int").
677 flag: right now, only the low order 3 bits are used.
678 1 => convert non-quoted tokens to upper case
679 2 => convert non-quoted tokens to lower case
680 0 => do not convert non-quoted tokens
683 token: a character string containing the returned next token
684 (this is a "char[]").
686 maxtok: the maximum size of "token". characters beyond
687 "maxtok" are truncated (this is an "int").
689 string: the string to be parsed (this is a "char[]").
691 white: a string of the valid white spaces. example:
693 char whitesp[]={" \t"};
695 blank and tab will be valid white space (this is
698 break: a string of the valid break characters. example:
700 char breakch[]={";,"};
702 semicolon and comma will be valid break characters
703 (this is a "char[]").
705 IMPORTANT: do not use the name "break" as a C
706 variable, as this is a reserved word in C.
708 quote: a string of the valid quote characters. an example
711 char whitesp[]={"'\"");
713 (this causes single and double quotes to be valid)
714 note that a token starting with one of these characters
715 needs the same quote character to terminate it.
725 are properly terminated. note that different quote
726 characters can appear on the same line; only for
727 a given token do the quote characters have to be
728 the same (this is a "char[]").
730 escape: the escape character (NOT a string ... only one
731 allowed). use zero if none is desired (this is
734 brkused: the break character used to terminate the current
735 token. if the token was quoted, this will be the
736 quote used. if the token is the last one on the
737 line, this will be zero (this is a pointer to a
740 next: this variable points to the first character of the
741 next token. it gets reset by "tokenizer" as it steps
742 through the string. set it to 0 upon initialization,
743 and leave it alone after that. you can change it
744 if you want to jump around in the string or re-parse
745 from the beginning, but be careful (this is a
746 pointer to an "int").
748 quoted: set to 1 (true) if the token was quoted and 0 (false)
749 if not. you may need this information (for example:
750 in C, a string with quotes around it is a character
751 string, while one without is an identifier).
753 (this is a pointer to a "char").
763 int _p_state
; /* current state */
764 unsigned _p_flag
; /* option flag */
765 char _p_curquote
; /* current quote char */
766 int _p_tokpos
; /* current token pos */
768 /* routine to find character in string ... used only by "tokenizer" */
770 int sindex(char ch
,char *string
)
773 for(cp
=string
;*cp
;++cp
)
775 return (int)(cp
-string
); /* return postion of character */
776 return -1; /* eol ... no match found */
779 /* routine to store a character in a string ... used only by "tokenizer" */
781 void chstore(char *string
,int max
,char ch
)
784 if(_p_tokpos
>=0&&_p_tokpos
<max
-1)
786 if(_p_state
==IN_QUOTE
)
791 case 1: /* convert to upper */
795 case 2: /* convert to lower */
799 default: /* use as is */
803 string
[_p_tokpos
++]=c
;
808 int tokenizer(unsigned inflag
,char *token
,int tokmax
,char *line
,
809 char *white
,char *brkchar
,char *quote
,char eschar
,char *brkused
,
810 int *next
,char *quoted
)
815 *brkused
=0; /* initialize to null */
816 *quoted
=0; /* assume not quoted */
818 if(!line
[*next
]) /* if we're at end of line, indicate such */
821 _p_state
=IN_WHITE
; /* initialize state */
822 _p_curquote
=0; /* initialize previous quote char */
823 _p_flag
=inflag
; /* set option flag */
825 for(_p_tokpos
=0;(c
=line
[*next
]);++(*next
)) /* main loop */
827 if((qp
=sindex(c
,brkchar
))>=0) /* break */
831 case IN_WHITE
: /* these are the same here ... */
832 case IN_TOKEN
: /* ... just get out */
833 case IN_OZONE
: /* ditto */
835 *brkused
=brkchar
[qp
];
838 case IN_QUOTE
: /* just keep going */
839 chstore(token
,tokmax
,c
);
843 else if((qp
=sindex(c
,quote
))>=0) /* quote */
847 case IN_WHITE
: /* these are identical, */
848 _p_state
=IN_QUOTE
; /* change states */
849 _p_curquote
=quote
[qp
]; /* save quote char */
850 *quoted
=1; /* set to true as long as something is in quotes */
854 if(quote
[qp
]==_p_curquote
) /* same as the beginning quote? */
860 chstore(token
,tokmax
,c
); /* treat as regular char */
865 *brkused
=c
; /* uses quote as break char */
869 else if((qp
=sindex(c
,white
))>=0) /* white */
875 break; /* keep going */
882 chstore(token
,tokmax
,c
); /* it's valid here */
886 else if(c
==eschar
) /* escape */
889 if(nc
==0) /* end of line */
892 chstore(token
,tokmax
,c
);
906 chstore(token
,tokmax
,nc
);
913 else /* anything else is just a real character */
918 _p_state
=IN_TOKEN
; /* switch states */
920 case IN_TOKEN
: /* these 2 are */
921 case IN_QUOTE
: /* identical here */
922 chstore(token
,tokmax
,c
);
929 } /* end of main loop */
932 token
[_p_tokpos
]=0; /* make sure token ends with EOS */