]>
git.saurik.com Git - wxWidgets.git/blob - src/tiff/contrib/iptcutil/iptcutil.c
3 #include "tif_config.h"
24 #define STRNICMP strnicmp
26 #define STRNICMP strncasecmp
29 typedef struct _tag_spec
38 static tag_spec tags
[] = {
43 { 20,"Supplemental Category" },
44 { 22,"Fixture Identifier" },
46 { 30,"Release Date" },
47 { 35,"Release Time" },
48 { 40,"Special Instructions" },
49 { 45,"Reference Service" },
50 { 47,"Reference Date" },
51 { 50,"Reference Number" },
52 { 55,"Created Date" },
53 { 60,"Created Time" },
54 { 65,"Originating Program" },
55 { 70,"Program Version" },
56 { 75,"Object Cycle" },
58 { 85,"Byline Title" },
60 { 95,"Province State" },
61 { 100,"Country Code" },
63 { 103,"Original Transmission Reference" },
67 { 116,"Copyright String" },
69 { 121,"Local Caption" },
70 { 122,"Caption Writer" },
71 { 200,"Custom Field 1" },
72 { 201,"Custom Field 2" },
73 { 202,"Custom Field 3" },
74 { 203,"Custom Field 4" },
75 { 204,"Custom Field 5" },
76 { 205,"Custom Field 6" },
77 { 206,"Custom Field 7" },
78 { 207,"Custom Field 8" },
79 { 208,"Custom Field 9" },
80 { 209,"Custom Field 10" },
81 { 210,"Custom Field 11" },
82 { 211,"Custom Field 12" },
83 { 212,"Custom Field 13" },
84 { 213,"Custom Field 14" },
85 { 214,"Custom Field 15" },
86 { 215,"Custom Field 16" },
87 { 216,"Custom Field 17" },
88 { 217,"Custom Field 18" },
89 { 218,"Custom Field 19" },
90 { 219,"Custom Field 20" }
94 * We format the output using HTML conventions
95 * to preserve control characters and such.
97 void formatString(FILE *ofile
, const char *s
, int len
)
100 for (; len
> 0; --len
, ++s
) {
104 fputs("&", ofile
);
108 fputs("<", ofile
);
111 fputs(">", ofile
);
115 fputs(""", ofile
);
119 fprintf(ofile
, "&#%d;", c
);
125 fputs("\"\n", ofile
);
128 typedef struct _html_code
137 static html_code html_codes
[] = {
147 * This routine converts HTML escape sequence
148 * back to the original ASCII representation.
149 * - returns the number of characters dropped.
151 int convertHTMLcodes(char *s
, int len
)
153 if (len
<=0 || s
==(char*)NULL
|| *s
=='\0')
160 if (sscanf(s
,"&#%d;",&val
) == 1)
179 codes
= sizeof(html_codes
) / sizeof(html_code
);
181 for (i
=0; i
< codes
; i
++)
183 if (html_codes
[i
].len
<= len
)
184 if (STRNICMP(s
, html_codes
[i
].code
, html_codes
[i
].len
) == 0)
186 strcpy(s
+1, s
+html_codes
[i
].len
);
187 *s
= html_codes
[i
].val
;
188 return html_codes
[i
].len
-1;
196 int formatIPTC(FILE *ifile
, FILE *ofile
)
216 tagcount
= sizeof(tags
) / sizeof(tag_spec
);
221 foundiptc
= 0; /* found the IPTC-Header */
222 tagsfound
= 0; /* number of tags found */
237 /* we found the 0x1c tag and now grab the dataset and record number tags */
238 dataset
= getc(ifile
);
239 if ((char) dataset
== EOF
)
241 recnum
= getc(ifile
);
242 if ((char) recnum
== EOF
)
244 /* try to match this record to one of the ones in our named table */
245 for (i
=0; i
< tagcount
; i
++)
247 if (tags
[i
].id
== recnum
)
251 readable
= tags
[i
].name
;
255 /* then we decode the length of the block that follows - long or short fmt */
259 if (c
& (unsigned char) 0x80)
266 c
= buffer
[i
] = getc(ifile
);
270 taglen
= (((long) buffer
[ 0 ]) << 24) |
271 (((long) buffer
[ 1 ]) << 16) |
272 (((long) buffer
[ 2 ]) << 8) |
273 (((long) buffer
[ 3 ]));
280 taglen
= ((long) x
) << 8;
286 /* make a buffer to hold the tag data and snag it from the input stream */
287 str
= (char *) malloc((unsigned int) (taglen
+1));
288 if (str
== (char *) NULL
)
290 printf("Memory allocation failed");
293 for (tagindx
=0; tagindx
<taglen
; tagindx
++)
295 c
= str
[tagindx
] = getc(ifile
);
301 /* now finish up by formatting this binary data into ASCII equivalent */
302 if (strlen(readable
) > 0)
303 fprintf(ofile
, "%d#%d#%s=",(unsigned int)dataset
, (unsigned int) recnum
, readable
);
305 fprintf(ofile
, "%d#%d=",(unsigned int)dataset
, (unsigned int) recnum
);
306 formatString( ofile
, str
, taglen
);
316 int tokenizer(unsigned inflag
,char *token
,int tokmax
,char *line
,
317 char *white
,char *brkchar
,char *quote
,char eschar
,char *brkused
,
318 int *next
,char *quoted
);
320 char *super_fgets(char *b
, int *blen
, FILE *file
)
333 if (c
== EOF
|| c
== '\n')
335 if (((int)q
- (int)b
+ 1 ) >= (int) len
)
342 b
=(char *) realloc((char *) b
,(len
+2));
343 if ((char *) b
== (char *) NULL
)
347 *q
=(unsigned char) c
;
350 if ((unsigned char *)b
!= (unsigned char *) NULL
)
355 tlen
=(int)q
- (int)b
;
357 return (char *) NULL
;
364 #define BUFFER_SZ 4096
366 int main(int argc
, char *argv
[])
376 mode
; /* iptc binary, or iptc text */
384 *usage
= "usage: iptcutil -t | -b [-i file] [-o file] <input >output";
394 buffer
= (unsigned char *)NULL
;
396 for (i
=1; i
<argc
; i
++)
399 if (c
== '-' || c
== '/')
407 /* Set "stdout" to binary mode: */
408 _setmode( _fileno( ofile
), _O_BINARY
);
414 /* Set "stdin" to binary mode: */
415 _setmode( _fileno( ifile
), _O_BINARY
);
420 ifile
= fopen(argv
[++i
], "rb");
422 ifile
= fopen(argv
[++i
], "rt");
423 if (ifile
== (FILE *)NULL
)
425 printf("Unable to open: %s\n", argv
[i
]);
431 ofile
= fopen(argv
[++i
], "wt");
433 ofile
= fopen(argv
[++i
], "wb");
434 if (ofile
== (FILE *)NULL
)
436 printf("Unable to open: %s\n", argv
[i
]);
441 printf("Unknown option: %s\n", argv
[i
]);
452 if (mode
== 0) /* handle binary iptc info */
453 formatIPTC(ifile
, ofile
);
455 if (mode
== 1) /* handle text form of iptc info */
473 inputlen
= BUFFER_SZ
;
475 line
= (char *) malloc(inputlen
);
476 token
= (char *)NULL
;
477 while((line
= super_fgets(line
,&inputlen
,ifile
))!=NULL
)
482 token
= (char *) malloc(inputlen
);
483 newstr
= (char *) malloc(inputlen
);
484 while(tokenizer(0, token
, inputlen
, line
, "", "=", "\"", 0,
485 &brkused
,&next
,"ed
)==0)
499 while(tokenizer(0, newstr
, inputlen
, token
, "", "#", "", 0,
500 &brkused
, &next
, "ed
)==0)
503 dataset
= (unsigned char) atoi(newstr
);
506 recnum
= (unsigned char) atoi(newstr
);
525 while(tokenizer(0, newstr
, inputlen
, token
, "", "&", "", 0,
526 &brkused
, &next
, "ed
)==0)
528 if (brkused
&& next
> 0)
533 len
-= convertHTMLcodes(s
, strlen(s
));
538 fputc(dataset
, ofile
);
539 fputc(recnum
, ofile
);
542 fputc((len
>> 8) & 255, ofile
);
543 fputc(len
& 255, ofile
);
547 fputc(((len
>> 24) & 255) | 0x80, ofile
);
548 fputc((len
>> 16) & 255, ofile
);
549 fputc((len
>> 8) & 255, ofile
);
550 fputc(len
& 255, ofile
);
554 fputc(token
[next
++], ofile
);
559 token
= (char *)NULL
;
561 newstr
= (char *)NULL
;
573 This routine is a generalized, finite state token parser. It allows
574 you extract tokens one at a time from a string of characters. The
575 characters used for white space, for break characters, and for quotes
576 can be specified. Also, characters in the string can be preceded by
577 a specifiable escape character which removes any special meaning the
580 There are a lot of formal parameters in this subroutine call, but
581 once you get familiar with them, this routine is fairly easy to use.
582 "#define" macros can be used to generate simpler looking calls for
583 commonly used applications of this routine.
585 First, some terminology:
587 token: used here, a single unit of information in
588 the form of a group of characters.
590 white space: space that gets ignored (except within quotes
591 or when escaped), like blanks and tabs. in
592 addition, white space terminates a non-quoted
595 break character: a character that separates non-quoted tokens.
596 commas are a common break character. the
597 usage of break characters to signal the end
598 of a token is the same as that of white space,
599 except multiple break characters with nothing
600 or only white space between generate a null
601 token for each two break characters together.
603 for example, if blank is set to be the white
604 space and comma is set to be the break
605 character, the line ...
609 ... consists of 5 tokens:
614 4) "" (the null string)
617 quote character: a character that, when surrounding a group
618 of other characters, causes the group of
619 characters to be treated as a single token,
620 no matter how many white spaces or break
621 characters exist in the group. also, a
622 token always terminates after the closing
623 quote. for example, if ' is the quote
624 character, blank is white space, and comma
625 is the break character, the following
630 ... consists of 4 tokens:
633 2) " B, CD" (note the blanks & comma)
637 the quote characters themselves do
638 not appear in the resultant tokens. the
639 double quotes are delimiters i use here for
640 documentation purposes only.
642 escape character: a character which itself is ignored but
643 which causes the next character to be
644 used as is. ^ and \ are often used as
645 escape characters. an escape in the last
646 position of the string gets treated as a
647 "normal" (i.e., non-quote, non-white,
648 non-break, and non-escape) character.
649 for example, assume white space, break
650 character, and quote are the same as in the
651 above examples, and further, assume that
652 ^ is the escape character. then, in the
655 ABC, ' DEF ^' GH' I ^ J K^ L ^
657 ... there are 7 tokens:
662 4) " " (a lone blank)
665 7) "^" (passed as is at end of line)
668 OK, now that you have this background, here's how to call "tokenizer":
670 result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
673 result: 0 if we haven't reached EOS (end of string), and
674 1 if we have (this is an "int").
676 flag: right now, only the low order 3 bits are used.
677 1 => convert non-quoted tokens to upper case
678 2 => convert non-quoted tokens to lower case
679 0 => do not convert non-quoted tokens
682 token: a character string containing the returned next token
683 (this is a "char[]").
685 maxtok: the maximum size of "token". characters beyond
686 "maxtok" are truncated (this is an "int").
688 string: the string to be parsed (this is a "char[]").
690 white: a string of the valid white spaces. example:
692 char whitesp[]={" \t"};
694 blank and tab will be valid white space (this is
697 break: a string of the valid break characters. example:
699 char breakch[]={";,"};
701 semicolon and comma will be valid break characters
702 (this is a "char[]").
704 IMPORTANT: do not use the name "break" as a C
705 variable, as this is a reserved word in C.
707 quote: a string of the valid quote characters. an example
710 char whitesp[]={"'\"");
712 (this causes single and double quotes to be valid)
713 note that a token starting with one of these characters
714 needs the same quote character to terminate it.
724 are properly terminated. note that different quote
725 characters can appear on the same line; only for
726 a given token do the quote characters have to be
727 the same (this is a "char[]").
729 escape: the escape character (NOT a string ... only one
730 allowed). use zero if none is desired (this is
733 brkused: the break character used to terminate the current
734 token. if the token was quoted, this will be the
735 quote used. if the token is the last one on the
736 line, this will be zero (this is a pointer to a
739 next: this variable points to the first character of the
740 next token. it gets reset by "tokenizer" as it steps
741 through the string. set it to 0 upon initialization,
742 and leave it alone after that. you can change it
743 if you want to jump around in the string or re-parse
744 from the beginning, but be careful (this is a
745 pointer to an "int").
747 quoted: set to 1 (true) if the token was quoted and 0 (false)
748 if not. you may need this information (for example:
749 in C, a string with quotes around it is a character
750 string, while one without is an identifier).
752 (this is a pointer to a "char").
762 int _p_state
; /* current state */
763 unsigned _p_flag
; /* option flag */
764 char _p_curquote
; /* current quote char */
765 int _p_tokpos
; /* current token pos */
767 /* routine to find character in string ... used only by "tokenizer" */
769 int sindex(char ch
,char *string
)
772 for(cp
=string
;*cp
;++cp
)
774 return (int)(cp
-string
); /* return postion of character */
775 return -1; /* eol ... no match found */
778 /* routine to store a character in a string ... used only by "tokenizer" */
780 void chstore(char *string
,int max
,char ch
)
783 if(_p_tokpos
>=0&&_p_tokpos
<max
-1)
785 if(_p_state
==IN_QUOTE
)
790 case 1: /* convert to upper */
794 case 2: /* convert to lower */
798 default: /* use as is */
802 string
[_p_tokpos
++]=c
;
807 int tokenizer(unsigned inflag
,char *token
,int tokmax
,char *line
,
808 char *white
,char *brkchar
,char *quote
,char eschar
,char *brkused
,
809 int *next
,char *quoted
)
814 *brkused
=0; /* initialize to null */
815 *quoted
=0; /* assume not quoted */
817 if(!line
[*next
]) /* if we're at end of line, indicate such */
820 _p_state
=IN_WHITE
; /* initialize state */
821 _p_curquote
=0; /* initialize previous quote char */
822 _p_flag
=inflag
; /* set option flag */
824 for(_p_tokpos
=0;(c
=line
[*next
]);++(*next
)) /* main loop */
826 if((qp
=sindex(c
,brkchar
))>=0) /* break */
830 case IN_WHITE
: /* these are the same here ... */
831 case IN_TOKEN
: /* ... just get out */
832 case IN_OZONE
: /* ditto */
834 *brkused
=brkchar
[qp
];
837 case IN_QUOTE
: /* just keep going */
838 chstore(token
,tokmax
,c
);
842 else if((qp
=sindex(c
,quote
))>=0) /* quote */
846 case IN_WHITE
: /* these are identical, */
847 _p_state
=IN_QUOTE
; /* change states */
848 _p_curquote
=quote
[qp
]; /* save quote char */
849 *quoted
=1; /* set to true as long as something is in quotes */
853 if(quote
[qp
]==_p_curquote
) /* same as the beginning quote? */
859 chstore(token
,tokmax
,c
); /* treat as regular char */
864 *brkused
=c
; /* uses quote as break char */
868 else if((qp
=sindex(c
,white
))>=0) /* white */
874 break; /* keep going */
881 chstore(token
,tokmax
,c
); /* it's valid here */
885 else if(c
==eschar
) /* escape */
888 if(nc
==0) /* end of line */
891 chstore(token
,tokmax
,c
);
905 chstore(token
,tokmax
,nc
);
912 else /* anything else is just a real character */
917 _p_state
=IN_TOKEN
; /* switch states */
919 case IN_TOKEN
: /* these 2 are */
920 case IN_QUOTE
: /* identical here */
921 chstore(token
,tokmax
,c
);
928 } /* end of main loop */
931 token
[_p_tokpos
]=0; /* make sure token ends with EOS */