]>
git.saurik.com Git - wxWidgets.git/blob - src/tiff/contrib/iptcutil/iptcutil.c
3 #include "tif_config.h"
23 #define STRNICMP strnicmp
25 #define STRNICMP strncasecmp
28 typedef struct _tag_spec
37 static tag_spec tags
[] = {
42 { 20,"Supplemental Category" },
43 { 22,"Fixture Identifier" },
45 { 30,"Release Date" },
46 { 35,"Release Time" },
47 { 40,"Special Instructions" },
48 { 45,"Reference Service" },
49 { 47,"Reference Date" },
50 { 50,"Reference Number" },
51 { 55,"Created Date" },
52 { 60,"Created Time" },
53 { 65,"Originating Program" },
54 { 70,"Program Version" },
55 { 75,"Object Cycle" },
57 { 85,"Byline Title" },
59 { 95,"Province State" },
60 { 100,"Country Code" },
62 { 103,"Original Transmission Reference" },
66 { 116,"Copyright String" },
68 { 121,"Local Caption" },
69 { 122,"Caption Writer" },
70 { 200,"Custom Field 1" },
71 { 201,"Custom Field 2" },
72 { 202,"Custom Field 3" },
73 { 203,"Custom Field 4" },
74 { 204,"Custom Field 5" },
75 { 205,"Custom Field 6" },
76 { 206,"Custom Field 7" },
77 { 207,"Custom Field 8" },
78 { 208,"Custom Field 9" },
79 { 209,"Custom Field 10" },
80 { 210,"Custom Field 11" },
81 { 211,"Custom Field 12" },
82 { 212,"Custom Field 13" },
83 { 213,"Custom Field 14" },
84 { 214,"Custom Field 15" },
85 { 215,"Custom Field 16" },
86 { 216,"Custom Field 17" },
87 { 217,"Custom Field 18" },
88 { 218,"Custom Field 19" },
89 { 219,"Custom Field 20" }
93 * We format the output using HTML conventions
94 * to preserve control characters and such.
96 void formatString(FILE *ofile
, const char *s
, int len
)
99 for (; len
> 0; --len
, ++s
) {
103 fputs("&", ofile
);
107 fputs("<", ofile
);
110 fputs(">", ofile
);
114 fputs(""", ofile
);
118 fprintf(ofile
, "&#%d;", c
);
124 fputs("\"\n", ofile
);
127 typedef struct _html_code
136 static html_code html_codes
[] = {
146 * This routine converts HTML escape sequence
147 * back to the original ASCII representation.
148 * - returns the number of characters dropped.
150 int convertHTMLcodes(char *s
, int len
)
152 if (len
<=0 || s
==(char*)NULL
|| *s
=='\0')
159 if (sscanf(s
,"&#%d;",&val
) == 1)
178 codes
= sizeof(html_codes
) / sizeof(html_code
);
180 for (i
=0; i
< codes
; i
++)
182 if (html_codes
[i
].len
<= len
)
183 if (STRNICMP(s
, html_codes
[i
].code
, html_codes
[i
].len
) == 0)
185 strcpy(s
+1, s
+html_codes
[i
].len
);
186 *s
= html_codes
[i
].val
;
187 return html_codes
[i
].len
-1;
195 int formatIPTC(FILE *ifile
, FILE *ofile
)
215 tagcount
= sizeof(tags
) / sizeof(tag_spec
);
220 foundiptc
= 0; /* found the IPTC-Header */
221 tagsfound
= 0; /* number of tags found */
236 /* we found the 0x1c tag and now grab the dataset and record number tags */
237 dataset
= getc(ifile
);
238 if ((char) dataset
== EOF
)
240 recnum
= getc(ifile
);
241 if ((char) recnum
== EOF
)
243 /* try to match this record to one of the ones in our named table */
244 for (i
=0; i
< tagcount
; i
++)
246 if (tags
[i
].id
== recnum
)
250 readable
= tags
[i
].name
;
254 /* then we decode the length of the block that follows - long or short fmt */
258 if (c
& (unsigned char) 0x80)
265 c
= buffer
[i
] = getc(ifile
);
269 taglen
= (((long) buffer
[ 0 ]) << 24) |
270 (((long) buffer
[ 1 ]) << 16) |
271 (((long) buffer
[ 2 ]) << 8) |
272 (((long) buffer
[ 3 ]));
279 taglen
= ((long) x
) << 8;
285 /* make a buffer to hold the tag data and snag it from the input stream */
286 str
= (char *) malloc((unsigned int) (taglen
+1));
287 if (str
== (char *) NULL
)
289 printf("Memory allocation failed");
292 for (tagindx
=0; tagindx
<taglen
; tagindx
++)
294 c
= str
[tagindx
] = getc(ifile
);
303 /* now finish up by formatting this binary data into ASCII equivalent */
304 if (strlen(readable
) > 0)
305 fprintf(ofile
, "%d#%d#%s=",(unsigned int)dataset
, (unsigned int) recnum
, readable
);
307 fprintf(ofile
, "%d#%d=",(unsigned int)dataset
, (unsigned int) recnum
);
308 formatString( ofile
, str
, taglen
);
318 int tokenizer(unsigned inflag
,char *token
,int tokmax
,char *line
,
319 char *white
,char *brkchar
,char *quote
,char eschar
,char *brkused
,
320 int *next
,char *quoted
);
322 char *super_fgets(char *b
, int *blen
, FILE *file
)
335 if (c
== EOF
|| c
== '\n')
337 if (((long)q
- (long)b
+ 1 ) >= (long) len
)
342 tlen
=(long)q
-(long)b
;
344 b
=(char *) realloc((char *) b
,(len
+2));
345 if ((char *) b
== (char *) NULL
)
349 *q
=(unsigned char) c
;
352 if ((unsigned char *)b
!= (unsigned char *) NULL
)
357 tlen
=(long)q
- (long)b
;
359 return (char *) NULL
;
366 #define BUFFER_SZ 4096
368 int main(int argc
, char *argv
[])
378 mode
; /* iptc binary, or iptc text */
386 *usage
= "usage: iptcutil -t | -b [-i file] [-o file] <input >output";
396 buffer
= (unsigned char *)NULL
;
398 for (i
=1; i
<argc
; i
++)
401 if (c
== '-' || c
== '/')
409 /* Set "stdout" to binary mode: */
410 _setmode( _fileno( ofile
), _O_BINARY
);
416 /* Set "stdin" to binary mode: */
417 _setmode( _fileno( ifile
), _O_BINARY
);
422 ifile
= fopen(argv
[++i
], "rb");
424 ifile
= fopen(argv
[++i
], "rt");
425 if (ifile
== (FILE *)NULL
)
427 printf("Unable to open: %s\n", argv
[i
]);
433 ofile
= fopen(argv
[++i
], "wt");
435 ofile
= fopen(argv
[++i
], "wb");
436 if (ofile
== (FILE *)NULL
)
438 printf("Unable to open: %s\n", argv
[i
]);
443 printf("Unknown option: %s\n", argv
[i
]);
454 if (mode
== 0) /* handle binary iptc info */
455 formatIPTC(ifile
, ofile
);
457 if (mode
== 1) /* handle text form of iptc info */
475 inputlen
= BUFFER_SZ
;
477 line
= (char *) malloc(inputlen
);
478 token
= (char *)NULL
;
479 while((line
= super_fgets(line
,&inputlen
,ifile
))!=NULL
)
484 token
= (char *) malloc(inputlen
);
485 newstr
= (char *) malloc(inputlen
);
486 while(tokenizer(0, token
, inputlen
, line
, "", "=", "\"", 0,
487 &brkused
,&next
,"ed
)==0)
501 while(tokenizer(0, newstr
, inputlen
, token
, "", "#", "", 0,
502 &brkused
, &next
, "ed
)==0)
505 dataset
= (unsigned char) atoi(newstr
);
508 recnum
= (unsigned char) atoi(newstr
);
527 while(tokenizer(0, newstr
, inputlen
, token
, "", "&", "", 0,
528 &brkused
, &next
, "ed
)==0)
530 if (brkused
&& next
> 0)
535 len
-= convertHTMLcodes(s
, strlen(s
));
540 fputc(dataset
, ofile
);
541 fputc(recnum
, ofile
);
544 fputc((len
>> 8) & 255, ofile
);
545 fputc(len
& 255, ofile
);
549 fputc(((len
>> 24) & 255) | 0x80, ofile
);
550 fputc((len
>> 16) & 255, ofile
);
551 fputc((len
>> 8) & 255, ofile
);
552 fputc(len
& 255, ofile
);
556 fputc(token
[next
++], ofile
);
561 token
= (char *)NULL
;
563 newstr
= (char *)NULL
;
575 This routine is a generalized, finite state token parser. It allows
576 you extract tokens one at a time from a string of characters. The
577 characters used for white space, for break characters, and for quotes
578 can be specified. Also, characters in the string can be preceded by
579 a specifiable escape character which removes any special meaning the
582 There are a lot of formal parameters in this subroutine call, but
583 once you get familiar with them, this routine is fairly easy to use.
584 "#define" macros can be used to generate simpler looking calls for
585 commonly used applications of this routine.
587 First, some terminology:
589 token: used here, a single unit of information in
590 the form of a group of characters.
592 white space: space that gets ignored (except within quotes
593 or when escaped), like blanks and tabs. in
594 addition, white space terminates a non-quoted
597 break character: a character that separates non-quoted tokens.
598 commas are a common break character. the
599 usage of break characters to signal the end
600 of a token is the same as that of white space,
601 except multiple break characters with nothing
602 or only white space between generate a null
603 token for each two break characters together.
605 for example, if blank is set to be the white
606 space and comma is set to be the break
607 character, the line ...
611 ... consists of 5 tokens:
616 4) "" (the null string)
619 quote character: a character that, when surrounding a group
620 of other characters, causes the group of
621 characters to be treated as a single token,
622 no matter how many white spaces or break
623 characters exist in the group. also, a
624 token always terminates after the closing
625 quote. for example, if ' is the quote
626 character, blank is white space, and comma
627 is the break character, the following
632 ... consists of 4 tokens:
635 2) " B, CD" (note the blanks & comma)
639 the quote characters themselves do
640 not appear in the resultant tokens. the
641 double quotes are delimiters i use here for
642 documentation purposes only.
644 escape character: a character which itself is ignored but
645 which causes the next character to be
646 used as is. ^ and \ are often used as
647 escape characters. an escape in the last
648 position of the string gets treated as a
649 "normal" (i.e., non-quote, non-white,
650 non-break, and non-escape) character.
651 for example, assume white space, break
652 character, and quote are the same as in the
653 above examples, and further, assume that
654 ^ is the escape character. then, in the
657 ABC, ' DEF ^' GH' I ^ J K^ L ^
659 ... there are 7 tokens:
664 4) " " (a lone blank)
667 7) "^" (passed as is at end of line)
670 OK, now that you have this background, here's how to call "tokenizer":
672 result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
675 result: 0 if we haven't reached EOS (end of string), and
676 1 if we have (this is an "int").
678 flag: right now, only the low order 3 bits are used.
679 1 => convert non-quoted tokens to upper case
680 2 => convert non-quoted tokens to lower case
681 0 => do not convert non-quoted tokens
684 token: a character string containing the returned next token
685 (this is a "char[]").
687 maxtok: the maximum size of "token". characters beyond
688 "maxtok" are truncated (this is an "int").
690 string: the string to be parsed (this is a "char[]").
692 white: a string of the valid white spaces. example:
694 char whitesp[]={" \t"};
696 blank and tab will be valid white space (this is
699 break: a string of the valid break characters. example:
701 char breakch[]={";,"};
703 semicolon and comma will be valid break characters
704 (this is a "char[]").
706 IMPORTANT: do not use the name "break" as a C
707 variable, as this is a reserved word in C.
709 quote: a string of the valid quote characters. an example
712 char whitesp[]={"'\"");
714 (this causes single and double quotes to be valid)
715 note that a token starting with one of these characters
716 needs the same quote character to terminate it.
726 are properly terminated. note that different quote
727 characters can appear on the same line; only for
728 a given token do the quote characters have to be
729 the same (this is a "char[]").
731 escape: the escape character (NOT a string ... only one
732 allowed). use zero if none is desired (this is
735 brkused: the break character used to terminate the current
736 token. if the token was quoted, this will be the
737 quote used. if the token is the last one on the
738 line, this will be zero (this is a pointer to a
741 next: this variable points to the first character of the
742 next token. it gets reset by "tokenizer" as it steps
743 through the string. set it to 0 upon initialization,
744 and leave it alone after that. you can change it
745 if you want to jump around in the string or re-parse
746 from the beginning, but be careful (this is a
747 pointer to an "int").
749 quoted: set to 1 (true) if the token was quoted and 0 (false)
750 if not. you may need this information (for example:
751 in C, a string with quotes around it is a character
752 string, while one without is an identifier).
754 (this is a pointer to a "char").
764 int _p_state
; /* current state */
765 unsigned _p_flag
; /* option flag */
766 char _p_curquote
; /* current quote char */
767 int _p_tokpos
; /* current token pos */
769 /* routine to find character in string ... used only by "tokenizer" */
771 int sindex(char ch
,char *string
)
774 for(cp
=string
;*cp
;++cp
)
776 return (int)(cp
-string
); /* return postion of character */
777 return -1; /* eol ... no match found */
780 /* routine to store a character in a string ... used only by "tokenizer" */
782 void chstore(char *string
,int max
,char ch
)
785 if(_p_tokpos
>=0&&_p_tokpos
<max
-1)
787 if(_p_state
==IN_QUOTE
)
792 case 1: /* convert to upper */
796 case 2: /* convert to lower */
800 default: /* use as is */
804 string
[_p_tokpos
++]=c
;
809 int tokenizer(unsigned inflag
,char *token
,int tokmax
,char *line
,
810 char *white
,char *brkchar
,char *quote
,char eschar
,char *brkused
,
811 int *next
,char *quoted
)
816 *brkused
=0; /* initialize to null */
817 *quoted
=0; /* assume not quoted */
819 if(!line
[*next
]) /* if we're at end of line, indicate such */
822 _p_state
=IN_WHITE
; /* initialize state */
823 _p_curquote
=0; /* initialize previous quote char */
824 _p_flag
=inflag
; /* set option flag */
826 for(_p_tokpos
=0;(c
=line
[*next
]);++(*next
)) /* main loop */
828 if((qp
=sindex(c
,brkchar
))>=0) /* break */
832 case IN_WHITE
: /* these are the same here ... */
833 case IN_TOKEN
: /* ... just get out */
834 case IN_OZONE
: /* ditto */
836 *brkused
=brkchar
[qp
];
839 case IN_QUOTE
: /* just keep going */
840 chstore(token
,tokmax
,c
);
844 else if((qp
=sindex(c
,quote
))>=0) /* quote */
848 case IN_WHITE
: /* these are identical, */
849 _p_state
=IN_QUOTE
; /* change states */
850 _p_curquote
=quote
[qp
]; /* save quote char */
851 *quoted
=1; /* set to true as long as something is in quotes */
855 if(quote
[qp
]==_p_curquote
) /* same as the beginning quote? */
861 chstore(token
,tokmax
,c
); /* treat as regular char */
866 *brkused
=c
; /* uses quote as break char */
870 else if((qp
=sindex(c
,white
))>=0) /* white */
876 break; /* keep going */
883 chstore(token
,tokmax
,c
); /* it's valid here */
887 else if(c
==eschar
) /* escape */
890 if(nc
==0) /* end of line */
893 chstore(token
,tokmax
,c
);
907 chstore(token
,tokmax
,nc
);
914 else /* anything else is just a real character */
919 _p_state
=IN_TOKEN
; /* switch states */
921 case IN_TOKEN
: /* these 2 are */
922 case IN_QUOTE
: /* identical here */
923 chstore(token
,tokmax
,c
);
930 } /* end of main loop */
933 token
[_p_tokpos
]=0; /* make sure token ends with EOS */