[wxWidgets.git] / src / tiff / contrib / iptcutil / iptcutil.c

/* $Id$ */

#include "tif_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <memory.h>
#include <ctype.h>

#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif

#ifdef HAVE_IO_H
# include <io.h>
#endif

#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif

#ifdef WIN32
#define STRNICMP strnicmp
#else 
#define STRNICMP strncasecmp
#endif 

typedef struct _tag_spec
{
  short
    id;

  char
    *name;
} tag_spec;

static tag_spec tags[] = {
    { 5,"Image Name" },
    { 7,"Edit Status" },
    { 10,"Priority" },
    { 15,"Category" },
    { 20,"Supplemental Category" },
    { 22,"Fixture Identifier" },
    { 25,"Keyword" },
    { 30,"Release Date" },
    { 35,"Release Time" },
    { 40,"Special Instructions" },
    { 45,"Reference Service" },
    { 47,"Reference Date" },
    { 50,"Reference Number" },
    { 55,"Created Date" },
    { 60,"Created Time" },
    { 65,"Originating Program" },
    { 70,"Program Version" },
    { 75,"Object Cycle" },
    { 80,"Byline" },
    { 85,"Byline Title" },
    { 90,"City" },
    { 95,"Province State" },
    { 100,"Country Code" },
    { 101,"Country" },
    { 103,"Original Transmission Reference" },
    { 105,"Headline" },
    { 110,"Credit" },
    { 115,"Source" },
    { 116,"Copyright String" },
    { 120,"Caption" },
    { 121,"Local Caption" },
    { 122,"Caption Writer" },
    { 200,"Custom Field 1" },
    { 201,"Custom Field 2" },
    { 202,"Custom Field 3" },
    { 203,"Custom Field 4" },
    { 204,"Custom Field 5" },
    { 205,"Custom Field 6" },
    { 206,"Custom Field 7" },
    { 207,"Custom Field 8" },
    { 208,"Custom Field 9" },
    { 209,"Custom Field 10" },
    { 210,"Custom Field 11" },
    { 211,"Custom Field 12" },
    { 212,"Custom Field 13" },
    { 213,"Custom Field 14" },
    { 214,"Custom Field 15" },
    { 215,"Custom Field 16" },
    { 216,"Custom Field 17" },
    { 217,"Custom Field 18" },
    { 218,"Custom Field 19" },
    { 219,"Custom Field 20" }
};

/*
 * We format the output using HTML conventions
 * to preserve control characters and such.
 */
void formatString(FILE *ofile, const char *s, int len)
{
  putc('"', ofile);
  for (; len > 0; --len, ++s) {
    int c = *s;
    switch (c) {
    case '&':
      fputs("&amp;", ofile);
      break;
#ifdef HANDLE_GT_LT
    case '<':
      fputs("&lt;", ofile);
      break;
    case '>':
      fputs("&gt;", ofile);
      break;
#endif
    case '"':
      fputs("&quot;", ofile);
      break;
    default:
      if (iscntrl(c))
        fprintf(ofile, "&#%d;", c);
      else
        putc(*s, ofile);
      break;
    }
  }
  fputs("\"\n", ofile);
}

typedef struct _html_code
{
  short
    len;
  const char
    *code,
    val;
} html_code;

static html_code html_codes[] = {
#ifdef HANDLE_GT_LT
    { 4,"&lt;",'<' },
    { 4,"&gt;",'>' },
#endif
    { 5,"&amp;",'&' },
    { 6,"&quot;",'"' }
};

/*
 * This routine converts HTML escape sequence
 * back to the original ASCII representation.
 * - returns the number of characters dropped.
 */
int convertHTMLcodes(char *s, int len)
{
  if (len <=0 || s==(char*)NULL || *s=='\0')
    return 0;

  if (s[1] == '#')
    {
      int val, o;

      if (sscanf(s,"&#%d;",&val) == 1)
      {
        o = 3;
        while (s[o] != ';')
        {
          o++;
          if (o > 5)
            break;
        }
        if (o < 5)
          strcpy(s+1, s+1+o);
        *s = val;
        return o;
      }
    }
  else
    {
      int
        i,
        codes = sizeof(html_codes) / sizeof(html_code);

      for (i=0; i < codes; i++)
      {
        if (html_codes[i].len <= len)
          if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
            {
              strcpy(s+1, s+html_codes[i].len);
              *s = html_codes[i].val;
              return html_codes[i].len-1;
            }
      }
    }

  return 0;
}

int formatIPTC(FILE *ifile, FILE *ofile)
{
  unsigned int
    foundiptc,
    tagsfound;

  unsigned char
    recnum,
    dataset;

  char
    *readable,
    *str;

  long
    tagindx,
    taglen;

  int
    i,
    tagcount = sizeof(tags) / sizeof(tag_spec);

  char
    c;

  foundiptc = 0; /* found the IPTC-Header */
  tagsfound = 0; /* number of tags found */

  c = getc(ifile);
  while (c != EOF)
  {
	  if (c == 0x1c)
	    foundiptc = 1;
	  else
      {
        if (foundiptc)
	        return -1;
        else
	        continue;
	    }

    /* we found the 0x1c tag and now grab the dataset and record number tags */
    dataset = getc(ifile);
	  if ((char) dataset == EOF)
	    return -1;
    recnum = getc(ifile);
	  if ((char) recnum == EOF)
	    return -1;
    /* try to match this record to one of the ones in our named table */
    for (i=0; i< tagcount; i++)
    {
      if (tags[i].id == recnum)
          break;
    }
    if (i < tagcount)
      readable = tags[i].name;
    else
      readable = "";

    /* then we decode the length of the block that follows - long or short fmt */
    c = getc(ifile);
	  if (c == EOF)
	    return 0;
	  if (c & (unsigned char) 0x80)
      {
        unsigned char
          buffer[4];

        for (i=0; i<4; i++)
        {
          c = buffer[i] = getc(ifile);
          if (c == EOF)
            return -1;
        }
        taglen = (((long) buffer[ 0 ]) << 24) |
                 (((long) buffer[ 1 ]) << 16) | 
	               (((long) buffer[ 2 ]) <<  8) |
                 (((long) buffer[ 3 ]));
	    }
    else
      {
        unsigned char
          x = c;

        taglen = ((long) x) << 8;
        x = getc(ifile);
        if ((char)x == EOF)
          return -1;
        taglen |= (long) x;
	    }
    /* make a buffer to hold the tag data and snag it from the input stream */
    str = (char *) malloc((unsigned int) (taglen+1));
    if (str == (char *) NULL)
      {
        printf("Memory allocation failed");
        return 0;
      }
    for (tagindx=0; tagindx<taglen; tagindx++)
    {
      c = str[tagindx] = getc(ifile);
      if (c == EOF)
        return -1;
    }
    str[ taglen ] = 0;

    /* now finish up by formatting this binary data into ASCII equivalent */
    if (strlen(readable) > 0)
	    fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
    else
	    fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
    formatString( ofile, str, taglen );
    free(str);

	  tagsfound++;

    c = getc(ifile);
  }
  return tagsfound;
}

int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
char *white,char *brkchar,char *quote,char eschar,char *brkused,
int *next,char *quoted);

char *super_fgets(char *b, int *blen, FILE *file)
{
  int
    c,
    len;

  char
    *q;

  len=*blen;
  for (q=b; ; q++)
  {
    c=fgetc(file);
    if (c == EOF || c == '\n')
      break;
    if (((int)q - (int)b + 1 ) >= (int) len)
      {
        int
          tlen;

        tlen=(int)q-(int)b;
        len<<=1;
        b=(char *) realloc((char *) b,(len+2));
        if ((char *) b == (char *) NULL)
          break;
        q=b+tlen;
      }
    *q=(unsigned char) c;
  }
  *blen=0;
  if ((unsigned char *)b != (unsigned char *) NULL)
    {
      int
        tlen;

      tlen=(int)q - (int)b;
      if (tlen == 0)
        return (char *) NULL;
      b[tlen] = '\0';
      *blen=++tlen;
    }
  return b;
}

#define BUFFER_SZ 4096

int main(int argc, char *argv[])
{            
  unsigned int
    length;

  unsigned char
    *buffer;

  int
    i,
    mode; /* iptc binary, or iptc text */

  FILE
    *ifile = stdin,
    *ofile = stdout;

  char
    c,
    *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output";

  if( argc < 2 )
    {
      printf(usage);
	    return 1;
    }

  mode = 0;
  length = -1;
  buffer = (unsigned char *)NULL;

  for (i=1; i<argc; i++)
  {
    c = argv[i][0];
    if (c == '-' || c == '/')
      {
        c = argv[i][1];
        switch( c )
        {
        case 't':
	        mode = 1;
#ifdef WIN32
          /* Set "stdout" to binary mode: */
          _setmode( _fileno( ofile ), _O_BINARY );
#endif
	        break;
        case 'b':
	        mode = 0;
#ifdef WIN32
          /* Set "stdin" to binary mode: */
          _setmode( _fileno( ifile ), _O_BINARY );
#endif
	        break;
        case 'i':
          if (mode == 0)
            ifile = fopen(argv[++i], "rb");
          else
            ifile = fopen(argv[++i], "rt");
          if (ifile == (FILE *)NULL)
            {
	            printf("Unable to open: %s\n", argv[i]);
              return 1;
            }
	        break;
        case 'o':
          if (mode == 0)
            ofile = fopen(argv[++i], "wt");
          else
            ofile = fopen(argv[++i], "wb");
          if (ofile == (FILE *)NULL)
            {
	            printf("Unable to open: %s\n", argv[i]);
              return 1;
            }
	        break;
        default:
	        printf("Unknown option: %s\n", argv[i]);
	        return 1;
        }
      }
    else
      {
        printf(usage);
	      return 1;
      }
  }

  if (mode == 0) /* handle binary iptc info */
    formatIPTC(ifile, ofile);

  if (mode == 1) /* handle text form of iptc info */
    {
      char
        brkused,
        quoted,
        *line,
        *token,
        *newstr;

      int
        state,
        next;

      unsigned char
        recnum = 0,
        dataset = 0;

      int
        inputlen = BUFFER_SZ;

      line = (char *) malloc(inputlen);     
      token = (char *)NULL;
      while((line = super_fgets(line,&inputlen,ifile))!=NULL)
      {
        state=0;
        next=0;

        token = (char *) malloc(inputlen);     
        newstr = (char *) malloc(inputlen);     
        while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
          &brkused,&next,&quoted)==0)
        {
          if (state == 0)
            {                  
              int
                state,
                next;

              char
                brkused,
                quoted;

              state=0;
              next=0;
              while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
                &brkused, &next, &quoted)==0)
              {
                if (state == 0)
                  dataset = (unsigned char) atoi(newstr);
                else
                   if (state == 1)
                     recnum = (unsigned char) atoi(newstr);
                state++;
              }
            }
          else
            if (state == 1)
              {
                int
                  next;

                unsigned long
                  len;

                char
                  brkused,
                  quoted;

                next=0;
                len = strlen(token);
                while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
                  &brkused, &next, &quoted)==0)
                {
                  if (brkused && next > 0)
                    {
                      char
                        *s = &token[next-1];

                      len -= convertHTMLcodes(s, strlen(s));
                    }
                }

                fputc(0x1c, ofile);
                fputc(dataset, ofile);
                fputc(recnum, ofile);
                if (len < 0x10000)
                  {
                    fputc((len >> 8) & 255, ofile);
                    fputc(len & 255, ofile);
                  }
                else
                  {
                    fputc(((len >> 24) & 255) | 0x80, ofile);
                    fputc((len >> 16) & 255, ofile);
                    fputc((len >> 8) & 255, ofile);
                    fputc(len & 255, ofile);
                  }
                next=0;
                while (len--)
                  fputc(token[next++], ofile);
              }
          state++;
        }
        free(token);
        token = (char *)NULL;
        free(newstr);
        newstr = (char *)NULL;
      }
      free(line);

      fclose( ifile );
      fclose( ofile );
    }

  return 0;
}

/*
	This routine is a generalized, finite state token parser. It allows
    you extract tokens one at a time from a string of characters.  The
    characters used for white space, for break characters, and for quotes
    can be specified. Also, characters in the string can be preceded by
    a specifiable escape character which removes any special meaning the
    character may have.

	There are a lot of formal parameters in this subroutine call, but
	once you get familiar with them, this routine is fairly easy to use.
	"#define" macros can be used to generate simpler looking calls for
	commonly used applications of this routine.

	First, some terminology:

	token:		used here, a single unit of information in
				the form of a group of characters.

	white space:	space that gets ignored (except within quotes
				or when escaped), like blanks and tabs.  in
				addition, white space terminates a non-quoted
				token.

	break character: a character that separates non-quoted tokens.
				commas are a common break character.  the
				usage of break characters to signal the end
				of a token is the same as that of white space,
				except multiple break characters with nothing
				or only white space between generate a null
				token for each two break characters together.

				for example, if blank is set to be the white
				space and comma is set to be the break
				character, the line ...

				A, B, C ,  , DEF

				... consists of 5 tokens:

				1)	"A"
				2)	"B"
				3)	"C"
				4)	""      (the null string)
				5)	"DEF"

	quote character: 	a character that, when surrounding a group
				of other characters, causes the group of
				characters to be treated as a single token,
				no matter how many white spaces or break
				characters exist in the group.	also, a
				token always terminates after the closing
				quote.	for example, if ' is the quote
				character, blank is white space, and comma
				is the break character, the following
				string ...

				A, ' B, CD'EF GHI

				... consists of 4 tokens:

				1)	"A"
				2)	" B, CD" (note the blanks & comma)
				3)	"EF"
				4)	"GHI"

				the quote characters themselves do
				not appear in the resultant tokens.  the
				double quotes are delimiters i use here for
				documentation purposes only.

	escape character:	a character which itself is ignored but
				which causes the next character to be
				used as is.  ^ and \ are often used as
				escape characters.  an escape in the last
				position of the string gets treated as a
				"normal" (i.e., non-quote, non-white,
				non-break, and non-escape) character.
				for example, assume white space, break
				character, and quote are the same as in the
				above examples, and further, assume that
				^ is the escape character.  then, in the
				string ...

				ABC, ' DEF ^' GH' I ^ J K^ L ^

				... there are 7 tokens:

				1)	"ABC"
				2)	" DEF ' GH"
				3)	"I"
				4)	" "     (a lone blank)
				5)	"J"
				6)	"K L"
				7)	"^"     (passed as is at end of line)


	OK, now that you have this background, here's how to call "tokenizer":

	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
		      brkused,next,quoted)

	result: 	0 if we haven't reached EOS (end of string), and
			1 if we have (this is an "int").

	flag:		right now, only the low order 3 bits are used.
			1 => convert non-quoted tokens to upper case
			2 => convert non-quoted tokens to lower case
			0 => do not convert non-quoted tokens
			(this is a "char").

	token:		a character string containing the returned next token
			(this is a "char[]").

	maxtok: 	the maximum size of "token".  characters beyond
			"maxtok" are truncated (this is an "int").

	string: 	the string to be parsed (this is a "char[]").

	white:		a string of the valid white spaces.  example:

			char whitesp[]={" \t"};

			blank and tab will be valid white space (this is
			a "char[]").

	break:		a string of the valid break characters.  example:

			char breakch[]={";,"};

			semicolon and comma will be valid break characters
			(this is a "char[]").

			IMPORTANT:  do not use the name "break" as a C
			variable, as this is a reserved word in C.

	quote:		a string of the valid quote characters.  an example
			would be

			char whitesp[]={"'\"");

			(this causes single and double quotes to be valid)
			note that a token starting with one of these characters
			needs the same quote character to terminate it.

			for example,

			"ABC '

			is unterminated, but

			"DEF" and 'GHI'

			are properly terminated.  note that different quote
			characters can appear on the same line; only for
			a given token do the quote characters have to be
			the same (this is a "char[]").

	escape: 	the escape character (NOT a string ... only one
			allowed).  use zero if none is desired (this is
			a "char").

	brkused:	the break character used to terminate the current
			token.	if the token was quoted, this will be the
			quote used.  if the token is the last one on the
			line, this will be zero (this is a pointer to a
			"char").

	next:		this variable points to the first character of the
			next token.  it gets reset by "tokenizer" as it steps
			through the string.  set it to 0 upon initialization,
			and leave it alone after that.	you can change it
			if you want to jump around in the string or re-parse
			from the beginning, but be careful (this is a
			pointer to an "int").

	quoted: 	set to 1 (true) if the token was quoted and 0 (false)
			if not.  you may need this information (for example:
			in C, a string with quotes around it is a character
			string, while one without is an identifier).

			(this is a pointer to a "char").
*/

/* states */

#define IN_WHITE 0
#define IN_TOKEN 1
#define IN_QUOTE 2
#define IN_OZONE 3

int _p_state;	   /* current state	 */
unsigned _p_flag;  /* option flag	 */
char _p_curquote;  /* current quote char */
int _p_tokpos;	   /* current token pos  */

/* routine to find character in string ... used only by "tokenizer" */

int sindex(char ch,char *string)
{
  char *cp;
  for(cp=string;*cp;++cp)
    if(ch==*cp)
      return (int)(cp-string);	/* return postion of character */
  return -1;			/* eol ... no match found */
}

/* routine to store a character in a string ... used only by "tokenizer" */

void chstore(char *string,int max,char ch)
{
  char c;
  if(_p_tokpos>=0&&_p_tokpos<max-1)
  {
    if(_p_state==IN_QUOTE)
      c=ch;
    else
      switch(_p_flag&3)
      {
	    case 1: 	    /* convert to upper */
	      c=toupper(ch);
	      break;

	    case 2: 	    /* convert to lower */
	      c=tolower(ch);
	      break;

	    default:	    /* use as is */
	      c=ch;
	      break;
      }
    string[_p_tokpos++]=c;
  }
  return;
}

int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
  char *white,char *brkchar,char *quote,char eschar,char *brkused,
    int *next,char *quoted)
{
  int qp;
  char c,nc;

  *brkused=0;		/* initialize to null */
  *quoted=0;		/* assume not quoted  */

  if(!line[*next])	/* if we're at end of line, indicate such */
    return 1;

  _p_state=IN_WHITE;   /* initialize state */
  _p_curquote=0;	   /* initialize previous quote char */
  _p_flag=inflag;	   /* set option flag */

  for(_p_tokpos=0;(c=line[*next]);++(*next))	/* main loop */
  {
    if((qp=sindex(c,brkchar))>=0)  /* break */
    {
      switch(_p_state)
      {
	    case IN_WHITE:		/* these are the same here ...	*/
	    case IN_TOKEN:		/* ... just get out		*/
	    case IN_OZONE:		/* ditto			*/
	      ++(*next);
	      *brkused=brkchar[qp];
	      goto byebye;

	    case IN_QUOTE:		 /* just keep going */
	      chstore(token,tokmax,c);
	      break;
      }
    }
    else if((qp=sindex(c,quote))>=0)  /* quote */
    {
      switch(_p_state)
      {
	    case IN_WHITE:	 /* these are identical, */
	      _p_state=IN_QUOTE; /* change states   */
	      _p_curquote=quote[qp]; /* save quote char */
	      *quoted=1;	/* set to true as long as something is in quotes */
	      break;

	    case IN_QUOTE:
	      if(quote[qp]==_p_curquote) /* same as the beginning quote? */
	      {
	        _p_state=IN_OZONE;
	        _p_curquote=0;
	      }
	      else
	        chstore(token,tokmax,c); /* treat as regular char */
	      break;

	    case IN_TOKEN:
	    case IN_OZONE:
	      *brkused=c; /* uses quote as break char */
	      goto byebye;
      }
    }
    else if((qp=sindex(c,white))>=0) /* white */
    {
      switch(_p_state)
      {
	    case IN_WHITE:
	    case IN_OZONE:
	      break;		/* keep going */

	    case IN_TOKEN:
	      _p_state=IN_OZONE;
	      break;

	    case IN_QUOTE:
	      chstore(token,tokmax,c); /* it's valid here */
	      break;
      }
    }
    else if(c==eschar)  /* escape */
    {
      nc=line[(*next)+1];
      if(nc==0) 		/* end of line */
      {
	    *brkused=0;
	    chstore(token,tokmax,c);
	    ++(*next);
	    goto byebye;
      }
      switch(_p_state)
      {
	    case IN_WHITE:
	      --(*next);
	      _p_state=IN_TOKEN;
	      break;

	    case IN_TOKEN:
	    case IN_QUOTE:
	      ++(*next);
	      chstore(token,tokmax,nc);
	      break;

	    case IN_OZONE:
	      goto byebye;
      }
    }
    else	/* anything else is just a real character */
    {
      switch(_p_state)
      {
	    case IN_WHITE:
	      _p_state=IN_TOKEN; /* switch states */

	    case IN_TOKEN:		 /* these 2 are     */
	    case IN_QUOTE:		 /*  identical here */
	      chstore(token,tokmax,c);
	      break;

	    case IN_OZONE:
	      goto byebye;
      }
    }
  }		/* end of main loop */

byebye:
  token[_p_tokpos]=0;	/* make sure token ends with EOS */

  return 0;
}
Commit	Line	Data
8414a40c VZ	1	/* $Id$ */
	2
	3	#include "tif_config.h"
	4
	5	#include <stdio.h>
	6	#include <stdlib.h>
	7	#include <string.h>
	8	#include <memory.h>
	9	#include <ctype.h>
	10
	11	#ifdef HAVE_STRINGS_H
	12	# include <strings.h>
	13	#endif
	14
	15	#ifdef HAVE_IO_H
	16	# include <io.h>
	17	#endif
	18
	19	#ifdef HAVE_FCNTL_H
	20	# include <fcntl.h>
	21	#endif
	22
	23	#ifdef WIN32
	24	#define STRNICMP strnicmp
	25	#else
	26	#define STRNICMP strncasecmp
	27	#endif
	28
	29	typedef struct _tag_spec
	30	{
	31	short
	32	id;
	33
	34	char
	35	*name;
	36	} tag_spec;
	37
	38	static tag_spec tags[] = {
	39	{ 5,"Image Name" },
	40	{ 7,"Edit Status" },
	41	{ 10,"Priority" },
	42	{ 15,"Category" },
	43	{ 20,"Supplemental Category" },
	44	{ 22,"Fixture Identifier" },
	45	{ 25,"Keyword" },
	46	{ 30,"Release Date" },
	47	{ 35,"Release Time" },
	48	{ 40,"Special Instructions" },
	49	{ 45,"Reference Service" },
	50	{ 47,"Reference Date" },
	51	{ 50,"Reference Number" },
	52	{ 55,"Created Date" },
	53	{ 60,"Created Time" },
	54	{ 65,"Originating Program" },
	55	{ 70,"Program Version" },
	56	{ 75,"Object Cycle" },
	57	{ 80,"Byline" },
	58	{ 85,"Byline Title" },
	59	{ 90,"City" },
	60	{ 95,"Province State" },
	61	{ 100,"Country Code" },
	62	{ 101,"Country" },
	63	{ 103,"Original Transmission Reference" },
	64	{ 105,"Headline" },
65	{ 110,"Credit" },
66	{ 115,"Source" },
67	{ 116,"Copyright String" },
68	{ 120,"Caption" },
69	{ 121,"Local Caption" },
70	{ 122,"Caption Writer" },
71	{ 200,"Custom Field 1" },
72	{ 201,"Custom Field 2" },
73	{ 202,"Custom Field 3" },
74	{ 203,"Custom Field 4" },
75	{ 204,"Custom Field 5" },
76	{ 205,"Custom Field 6" },
77	{ 206,"Custom Field 7" },
78	{ 207,"Custom Field 8" },
79	{ 208,"Custom Field 9" },
80	{ 209,"Custom Field 10" },
81	{ 210,"Custom Field 11" },
82	{ 211,"Custom Field 12" },
83	{ 212,"Custom Field 13" },
84	{ 213,"Custom Field 14" },
85	{ 214,"Custom Field 15" },
86	{ 215,"Custom Field 16" },
87	{ 216,"Custom Field 17" },
88	{ 217,"Custom Field 18" },
89	{ 218,"Custom Field 19" },
90	{ 219,"Custom Field 20" }
91	};
92
93	/*
94	* We format the output using HTML conventions
95	* to preserve control characters and such.
96	*/
97	void formatString(FILE ofile, const char s, int len)
98	{
99	putc('"', ofile);
100	for (; len > 0; --len, ++s) {
101	int c = *s;
102	switch (c) {
103	case '&':
104	fputs("&", ofile);
105	break;
106	#ifdef HANDLE_GT_LT
107	case '<':
108	fputs("<", ofile);
109	break;
110	case '>':
111	fputs(">", ofile);
112	break;
113	#endif
114	case '"':
115	fputs(""", ofile);
116	break;
117	default:
118	if (iscntrl(c))
119	fprintf(ofile, "&#%d;", c);
120	else
121	putc(*s, ofile);
122	break;
123	}
124	}
125	fputs("\"\n", ofile);
126	}
127
128	typedef struct _html_code
129	{
130	short
131	len;
132	const char
133	*code,
134	val;
135	} html_code;
136
137	static html_code html_codes[] = {
138	#ifdef HANDLE_GT_LT
139	{ 4,"<",'<' },
140	{ 4,">",'>' },
141	#endif
142	{ 5,"&",'&' },
143	{ 6,""",'"' }
144	};
145
146	/*
147	* This routine converts HTML escape sequence
148	* back to the original ASCII representation.
149	* - returns the number of characters dropped.
150	*/
151	int convertHTMLcodes(char *s, int len)
152	{
153	if (len <=0 \|\| s==(char)NULL \|\| s=='\0')
154	return 0;
155
156	if (s[1] == '#')
157	{
158	int val, o;
159
160	if (sscanf(s,"&#%d;",&val) == 1)
161	{
162	o = 3;
163	while (s[o] != ';')
164	{
165	o++;
166	if (o > 5)
167	break;
168	}
169	if (o < 5)
170	strcpy(s+1, s+1+o);
171	*s = val;
172	return o;
173	}
174	}
175	else
176	{
177	int
178	i,
179	codes = sizeof(html_codes) / sizeof(html_code);
180
181	for (i=0; i < codes; i++)
182	{
183	if (html_codes[i].len <= len)
184	if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
185	{
186	strcpy(s+1, s+html_codes[i].len);
187	*s = html_codes[i].val;
188	return html_codes[i].len-1;
189	}
190	}
191	}
192
193	return 0;
194	}
195
196	int formatIPTC(FILE ifile, FILE ofile)
197	{
198	unsigned int
199	foundiptc,
200	tagsfound;
201
202	unsigned char
203	recnum,
204	dataset;
205
206	char
207	*readable,
208	*str;
209
210	long
211	tagindx,
212	taglen;
213
214	int
215	i,
216	tagcount = sizeof(tags) / sizeof(tag_spec);
217
218	char
219	c;
220
221	foundiptc = 0; /* found the IPTC-Header */
222	tagsfound = 0; /* number of tags found */
223
224	c = getc(ifile);
225	while (c != EOF)
226	{
227	if (c == 0x1c)
228	foundiptc = 1;
229	else
230	{
231	if (foundiptc)
232	return -1;
233	else
234	continue;
235	}
236
237	/* we found the 0x1c tag and now grab the dataset and record number tags */
238	dataset = getc(ifile);
239	if ((char) dataset == EOF)
240	return -1;
241	recnum = getc(ifile);
242	if ((char) recnum == EOF)
243	return -1;
244	/* try to match this record to one of the ones in our named table */
245	for (i=0; i< tagcount; i++)
246	{
247	if (tags[i].id == recnum)
248	break;
249	}
250	if (i < tagcount)
251	readable = tags[i].name;
252	else
253	readable = "";
254
255	/* then we decode the length of the block that follows - long or short fmt */
256	c = getc(ifile);
257	if (c == EOF)
258	return 0;
259	if (c & (unsigned char) 0x80)
260	{
261	unsigned char
262	buffer[4];
263
264	for (i=0; i<4; i++)
265	{
266	c = buffer[i] = getc(ifile);
267	if (c == EOF)
268	return -1;
269	}
270	taglen = (((long) buffer[ 0 ]) << 24) \|
271	(((long) buffer[ 1 ]) << 16) \|
272	(((long) buffer[ 2 ]) << 8) \|
273	(((long) buffer[ 3 ]));
274	}
275	else
276	{
277	unsigned char
278	x = c;
279
280	taglen = ((long) x) << 8;
281	x = getc(ifile);
282	if ((char)x == EOF)
283	return -1;
284	taglen \|= (long) x;
285	}
286	/* make a buffer to hold the tag data and snag it from the input stream */
287	str = (char *) malloc((unsigned int) (taglen+1));
288	if (str == (char *) NULL)
289	{
290	printf("Memory allocation failed");
291	return 0;
292	}
293	for (tagindx=0; tagindx<taglen; tagindx++)
294	{
295	c = str[tagindx] = getc(ifile);
296	if (c == EOF)
297	return -1;
298	}
299	str[ taglen ] = 0;
300
301	/* now finish up by formatting this binary data into ASCII equivalent */
302	if (strlen(readable) > 0)
303	fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
304	else
305	fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
306	formatString( ofile, str, taglen );
307	free(str);
308
309	tagsfound++;
310
311	c = getc(ifile);
312	}
313	return tagsfound;
314	}
315
316	int tokenizer(unsigned inflag,char token,int tokmax,char line,
317	char white,char brkchar,char quote,char eschar,char brkused,
318	int next,char quoted);
319
320	char super_fgets(char b, int blen, FILE file)
321	{
322	int
323	c,
324	len;
325
326	char
327	*q;
328
329	len=*blen;
330	for (q=b; ; q++)
331	{
332	c=fgetc(file);
333	if (c == EOF \|\| c == '\n')
334	break;
335	if (((int)q - (int)b + 1 ) >= (int) len)
336	{
337	int
338	tlen;
339
340	tlen=(int)q-(int)b;
341	len<<=1;
342	b=(char ) realloc((char ) b,(len+2));
343	if ((char ) b == (char ) NULL)
344	break;
345	q=b+tlen;
346	}
347	*q=(unsigned char) c;
348	}
349	*blen=0;
350	if ((unsigned char )b != (unsigned char ) NULL)
351	{
352	int
353	tlen;
354
355	tlen=(int)q - (int)b;
356	if (tlen == 0)
357	return (char *) NULL;
358	b[tlen] = '\0';
359	*blen=++tlen;
360	}
361	return b;
362	}
363
364	#define BUFFER_SZ 4096
365
366	int main(int argc, char *argv[])
367	{
368	unsigned int
369	length;
370
371	unsigned char
372	*buffer;
373
374	int
375	i,
376	mode; /* iptc binary, or iptc text */
377
378	FILE
379	*ifile = stdin,
380	*ofile = stdout;
381
382	char
383	c,
384	*usage = "usage: iptcutil -t \| -b [-i file] [-o file] <input >output";
385
386	if( argc < 2 )
387	{
388	printf(usage);
389	return 1;
390	}
391
392	mode = 0;
393	length = -1;
394	buffer = (unsigned char *)NULL;
395
396	for (i=1; i<argc; i++)
397	{
398	c = argv[i][0];
399	if (c == '-' \|\| c == '/')
400	{
401	c = argv[i][1];
402	switch( c )
403	{
404	case 't':
405	mode = 1;
406	#ifdef WIN32
407	/* Set "stdout" to binary mode: */
408	_setmode( _fileno( ofile ), _O_BINARY );
409	#endif
410	break;
411	case 'b':
412	mode = 0;
413	#ifdef WIN32
414	/* Set "stdin" to binary mode: */
415	_setmode( _fileno( ifile ), _O_BINARY );
416	#endif
417	break;
418	case 'i':
419	if (mode == 0)
420	ifile = fopen(argv[++i], "rb");
421	else
422	ifile = fopen(argv[++i], "rt");
423	if (ifile == (FILE *)NULL)
424	{
425	printf("Unable to open: %s\n", argv[i]);
426	return 1;
427	}
428	break;
429	case 'o':
430	if (mode == 0)
431	ofile = fopen(argv[++i], "wt");
432	else
433	ofile = fopen(argv[++i], "wb");
434	if (ofile == (FILE *)NULL)
435	{
436	printf("Unable to open: %s\n", argv[i]);
437	return 1;
438	}
439	break;
440	default:
441	printf("Unknown option: %s\n", argv[i]);
442	return 1;
443	}
444	}
445	else
446	{
447	printf(usage);
448	return 1;
449	}
450	}
451
452	if (mode == 0) /* handle binary iptc info */
453	formatIPTC(ifile, ofile);
454
455	if (mode == 1) /* handle text form of iptc info */
456	{
457	char
458	brkused,
459	quoted,
460	*line,
461	*token,
462	*newstr;
463
464	int
465	state,
466	next;
467
468	unsigned char
469	recnum = 0,
470	dataset = 0;
471
472	int
473	inputlen = BUFFER_SZ;
474
475	line = (char *) malloc(inputlen);
476	token = (char *)NULL;
477	while((line = super_fgets(line,&inputlen,ifile))!=NULL)
478	{
479	state=0;
480	next=0;
481
482	token = (char *) malloc(inputlen);
483	newstr = (char *) malloc(inputlen);
484	while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
485	&brkused,&next,&quoted)==0)
486	{
487	if (state == 0)
488	{
489	int
490	state,
491	next;
492
493	char
494	brkused,
495	quoted;
496
497	state=0;
498	next=0;
499	while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
500	&brkused, &next, &quoted)==0)
501	{
502	if (state == 0)
503	dataset = (unsigned char) atoi(newstr);
504	else
505	if (state == 1)
506	recnum = (unsigned char) atoi(newstr);
507	state++;
508	}
509	}
510	else
511	if (state == 1)
512	{
513	int
514	next;
515
516	unsigned long
517	len;
518
519	char
520	brkused,
521	quoted;
522
523	next=0;
524	len = strlen(token);
525	while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
526	&brkused, &next, &quoted)==0)
527	{
528	if (brkused && next > 0)
529	{
530	char
531	*s = &token[next-1];
532
533	len -= convertHTMLcodes(s, strlen(s));
534	}
535	}
536
537	fputc(0x1c, ofile);
538	fputc(dataset, ofile);
539	fputc(recnum, ofile);
540	if (len < 0x10000)
541	{
542	fputc((len >> 8) & 255, ofile);
543	fputc(len & 255, ofile);
544	}
545	else
546	{
547	fputc(((len >> 24) & 255) \| 0x80, ofile);
548	fputc((len >> 16) & 255, ofile);
549	fputc((len >> 8) & 255, ofile);
550	fputc(len & 255, ofile);
551	}
552	next=0;
553	while (len--)
554	fputc(token[next++], ofile);
555	}
556	state++;
557	}
558	free(token);
559	token = (char *)NULL;
560	free(newstr);
561	newstr = (char *)NULL;
562	}
563	free(line);
564
565	fclose( ifile );
566	fclose( ofile );
567	}
568
569	return 0;
570	}
571
572	/*
573	This routine is a generalized, finite state token parser. It allows
574	you extract tokens one at a time from a string of characters. The
575	characters used for white space, for break characters, and for quotes
576	can be specified. Also, characters in the string can be preceded by
577	a specifiable escape character which removes any special meaning the
578	character may have.
579
580	There are a lot of formal parameters in this subroutine call, but
581	once you get familiar with them, this routine is fairly easy to use.
582	"#define" macros can be used to generate simpler looking calls for
583	commonly used applications of this routine.
584
585	First, some terminology:
586
587	token: used here, a single unit of information in
588	the form of a group of characters.
589
590	white space: space that gets ignored (except within quotes
591	or when escaped), like blanks and tabs. in
592	addition, white space terminates a non-quoted
593	token.
594
595	break character: a character that separates non-quoted tokens.
596	commas are a common break character. the
597	usage of break characters to signal the end
598	of a token is the same as that of white space,
599	except multiple break characters with nothing
600	or only white space between generate a null
601	token for each two break characters together.
602
603	for example, if blank is set to be the white
604	space and comma is set to be the break
605	character, the line ...
606
607	A, B, C , , DEF
608
609	... consists of 5 tokens:
610
611	1) "A"
612	2) "B"
613	3) "C"
614	4) "" (the null string)
615	5) "DEF"
616
617	quote character: a character that, when surrounding a group
618	of other characters, causes the group of
619	characters to be treated as a single token,
620	no matter how many white spaces or break
621	characters exist in the group. also, a
622	token always terminates after the closing
623	quote. for example, if ' is the quote
624	character, blank is white space, and comma
625	is the break character, the following
626	string ...
627
628	A, ' B, CD'EF GHI
629
630	... consists of 4 tokens:
631
632	1) "A"
633	2) " B, CD" (note the blanks & comma)
634	3) "EF"
635	4) "GHI"
636
637	the quote characters themselves do
638	not appear in the resultant tokens. the
639	double quotes are delimiters i use here for
640	documentation purposes only.
641
642	escape character: a character which itself is ignored but
643	which causes the next character to be
644	used as is. ^ and \ are often used as
645	escape characters. an escape in the last
646	position of the string gets treated as a
647	"normal" (i.e., non-quote, non-white,
648	non-break, and non-escape) character.
649	for example, assume white space, break
650	character, and quote are the same as in the
651	above examples, and further, assume that
652	^ is the escape character. then, in the
653	string ...
654
655	ABC, ' DEF ^' GH' I ^ J K^ L ^
656
657	... there are 7 tokens:
658
659	1) "ABC"
660	2) " DEF ' GH"
661	3) "I"
662	4) " " (a lone blank)
663	5) "J"
664	6) "K L"
665	7) "^" (passed as is at end of line)
666
667
668	OK, now that you have this background, here's how to call "tokenizer":
669
670	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
671	brkused,next,quoted)
672
673	result: 0 if we haven't reached EOS (end of string), and
674	1 if we have (this is an "int").
675
676	flag: right now, only the low order 3 bits are used.
677	1 => convert non-quoted tokens to upper case
678	2 => convert non-quoted tokens to lower case
679	0 => do not convert non-quoted tokens
680	(this is a "char").
681
682	token: a character string containing the returned next token
683	(this is a "char[]").
684
685	maxtok: the maximum size of "token". characters beyond
686	"maxtok" are truncated (this is an "int").
687
688	string: the string to be parsed (this is a "char[]").
689
690	white: a string of the valid white spaces. example:
691
692	char whitesp[]={" \t"};
693
694	blank and tab will be valid white space (this is
695	a "char[]").
696
697	break: a string of the valid break characters. example:
698
699	char breakch[]={";,"};
700
701	semicolon and comma will be valid break characters
702	(this is a "char[]").
703
704	IMPORTANT: do not use the name "break" as a C
705	variable, as this is a reserved word in C.
706
707	quote: a string of the valid quote characters. an example
708	would be
709
710	char whitesp[]={"'\"");
711
712	(this causes single and double quotes to be valid)
713	note that a token starting with one of these characters
714	needs the same quote character to terminate it.
715
716	for example,
717
718	"ABC '
719
720	is unterminated, but
721
722	"DEF" and 'GHI'
723
724	are properly terminated. note that different quote
725	characters can appear on the same line; only for
726	a given token do the quote characters have to be
727	the same (this is a "char[]").
728
729	escape: the escape character (NOT a string ... only one
730	allowed). use zero if none is desired (this is
731	a "char").
732
733	brkused: the break character used to terminate the current
734	token. if the token was quoted, this will be the
735	quote used. if the token is the last one on the
736	line, this will be zero (this is a pointer to a
737	"char").
738
739	next: this variable points to the first character of the
740	next token. it gets reset by "tokenizer" as it steps
741	through the string. set it to 0 upon initialization,
742	and leave it alone after that. you can change it
743	if you want to jump around in the string or re-parse
744	from the beginning, but be careful (this is a
745	pointer to an "int").
746
747	quoted: set to 1 (true) if the token was quoted and 0 (false)
748	if not. you may need this information (for example:
749	in C, a string with quotes around it is a character
750	string, while one without is an identifier).
751
752	(this is a pointer to a "char").
753	*/
754
755	/* states */
756
757	#define IN_WHITE 0
758	#define IN_TOKEN 1
759	#define IN_QUOTE 2
760	#define IN_OZONE 3
761
762	int _p_state; /* current state */
763	unsigned _p_flag; /* option flag */
764	char _p_curquote; /* current quote char */
765	int _p_tokpos; /* current token pos */
766
767	/* routine to find character in string ... used only by "tokenizer" */
768
769	int sindex(char ch,char *string)
770	{
771	char *cp;
772	for(cp=string;*cp;++cp)
773	if(ch==*cp)
774	return (int)(cp-string); /* return postion of character */
775	return -1; /* eol ... no match found */
776	}
777
778	/* routine to store a character in a string ... used only by "tokenizer" */
779
780	void chstore(char *string,int max,char ch)
781	{
782	char c;
783	if(_p_tokpos>=0&&_p_tokpos<max-1)
784	{
785	if(_p_state==IN_QUOTE)
786	c=ch;
787	else
788	switch(_p_flag&3)
789	{
790	case 1: /* convert to upper */
791	c=toupper(ch);
792	break;
793
794	case 2: /* convert to lower */
795	c=tolower(ch);
796	break;
797
798	default: /* use as is */
799	c=ch;
800	break;
801	}
802	string[_p_tokpos++]=c;
803	}
804	return;
805	}
806
807	int tokenizer(unsigned inflag,char token,int tokmax,char line,
808	char white,char brkchar,char quote,char eschar,char brkused,
809	int next,char quoted)
810	{
811	int qp;
812	char c,nc;
813
814	brkused=0; / initialize to null */
815	quoted=0; / assume not quoted */
816
817	if(!line[next]) / if we're at end of line, indicate such */
818	return 1;
819
820	_p_state=IN_WHITE; /* initialize state */
821	_p_curquote=0; /* initialize previous quote char */
822	_p_flag=inflag; /* set option flag */
823
824	for(_p_tokpos=0;(c=line[next]);++(next)) /* main loop */
825	{
826	if((qp=sindex(c,brkchar))>=0) /* break */
827	{
828	switch(_p_state)
829	{
830	case IN_WHITE: /* these are the same here ... */
831	case IN_TOKEN: /* ... just get out */
832	case IN_OZONE: /* ditto */
833	++(*next);
834	*brkused=brkchar[qp];
835	goto byebye;
836
837	case IN_QUOTE: /* just keep going */
838	chstore(token,tokmax,c);
839	break;
840	}
841	}
842	else if((qp=sindex(c,quote))>=0) /* quote */
843	{
844	switch(_p_state)
845	{
846	case IN_WHITE: /* these are identical, */
847	_p_state=IN_QUOTE; /* change states */
848	_p_curquote=quote[qp]; /* save quote char */
849	quoted=1; / set to true as long as something is in quotes */
850	break;
851
852	case IN_QUOTE:
853	if(quote[qp]==_p_curquote) /* same as the beginning quote? */
854	{
855	_p_state=IN_OZONE;
856	_p_curquote=0;
857	}
858	else
859	chstore(token,tokmax,c); /* treat as regular char */
860	break;
861
862	case IN_TOKEN:
863	case IN_OZONE:
864	brkused=c; / uses quote as break char */
865	goto byebye;
866	}
867	}
868	else if((qp=sindex(c,white))>=0) /* white */
869	{
870	switch(_p_state)
871	{
872	case IN_WHITE:
873	case IN_OZONE:
874	break; /* keep going */
875
876	case IN_TOKEN:
877	_p_state=IN_OZONE;
878	break;
879
880	case IN_QUOTE:
881	chstore(token,tokmax,c); /* it's valid here */
882	break;
883	}
884	}
885	else if(c==eschar) /* escape */
886	{
887	nc=line[(*next)+1];
888	if(nc==0) /* end of line */
889	{
890	*brkused=0;
891	chstore(token,tokmax,c);
892	++(*next);
893	goto byebye;
894	}
895	switch(_p_state)
896	{
897	case IN_WHITE:
898	--(*next);
899	_p_state=IN_TOKEN;
900	break;
901
902	case IN_TOKEN:
903	case IN_QUOTE:
904	++(*next);
905	chstore(token,tokmax,nc);
906	break;
907
908	case IN_OZONE:
909	goto byebye;
910	}
911	}
912	else /* anything else is just a real character */
913	{
914	switch(_p_state)
915	{
916	case IN_WHITE:
917	_p_state=IN_TOKEN; /* switch states */
918
919	case IN_TOKEN: /* these 2 are */
920	case IN_QUOTE: /* identical here */
921	chstore(token,tokmax,c);
922	break;
923
924	case IN_OZONE:
925	goto byebye;
926	}
927	}
928	} /* end of main loop */
929
930	byebye:
931	token[_p_tokpos]=0; /* make sure token ends with EOS */
932
933	return 0;
934	}