src/tiff/contrib/iptcutil/iptcutil.c

/* $Id$ */

#include "tif_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <memory.h>
#include <ctype.h>

#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif

#ifdef HAVE_IO_H
# include <io.h>
#endif

#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif

#ifdef WIN32
#define STRNICMP strnicmp
#else
#define STRNICMP strncasecmp
#endif

typedef struct _tag_spec
{
  short
    id;

  char
    *name;
} tag_spec;

static tag_spec tags[] = {
    { 5,"Image Name" },
    { 7,"Edit Status" },
    { 10,"Priority" },
    { 15,"Category" },
    { 20,"Supplemental Category" },
    { 22,"Fixture Identifier" },
    { 25,"Keyword" },
    { 30,"Release Date" },
    { 35,"Release Time" },
    { 40,"Special Instructions" },
    { 45,"Reference Service" },
    { 47,"Reference Date" },
    { 50,"Reference Number" },
    { 55,"Created Date" },
    { 60,"Created Time" },
    { 65,"Originating Program" },
    { 70,"Program Version" },
    { 75,"Object Cycle" },
    { 80,"Byline" },
    { 85,"Byline Title" },
    { 90,"City" },
    { 95,"Province State" },
    { 100,"Country Code" },
    { 101,"Country" },
    { 103,"Original Transmission Reference" },
    { 105,"Headline" },
    { 110,"Credit" },
    { 115,"Source" },
    { 116,"Copyright String" },
    { 120,"Caption" },
    { 121,"Local Caption" },
    { 122,"Caption Writer" },
    { 200,"Custom Field 1" },
    { 201,"Custom Field 2" },
    { 202,"Custom Field 3" },
    { 203,"Custom Field 4" },
    { 204,"Custom Field 5" },
    { 205,"Custom Field 6" },
    { 206,"Custom Field 7" },
    { 207,"Custom Field 8" },
    { 208,"Custom Field 9" },
    { 209,"Custom Field 10" },
    { 210,"Custom Field 11" },
    { 211,"Custom Field 12" },
    { 212,"Custom Field 13" },
    { 213,"Custom Field 14" },
    { 214,"Custom Field 15" },
    { 215,"Custom Field 16" },
    { 216,"Custom Field 17" },
    { 217,"Custom Field 18" },
    { 218,"Custom Field 19" },
    { 219,"Custom Field 20" }
};

/*
 * We format the output using HTML conventions
 * to preserve control characters and such.
 */
void formatString(FILE *ofile, const char *s, int len)
{
  putc('"', ofile);
  for (; len > 0; --len, ++s) {
    int c = *s;
    switch (c) {
    case '&':
      fputs("&amp;", ofile);
      break;
#ifdef HANDLE_GT_LT
    case '<':
      fputs("&lt;", ofile);
      break;
    case '>':
      fputs("&gt;", ofile);
      break;
#endif
    case '"':
      fputs("&quot;", ofile);
      break;
    default:
      if (iscntrl(c))
        fprintf(ofile, "&#%d;", c);
      else
        putc(*s, ofile);
      break;
    }
  }
  fputs("\"\n", ofile);
}

typedef struct _html_code
{
  short
    len;
  const char
    *code,
    val;
} html_code;

static html_code html_codes[] = {
#ifdef HANDLE_GT_LT
    { 4,"&lt;",'<' },
    { 4,"&gt;",'>' },
#endif
    { 5,"&amp;",'&' },
    { 6,"&quot;",'"' }
};

/*
 * This routine converts HTML escape sequence
 * back to the original ASCII representation.
 * - returns the number of characters dropped.
 */
int convertHTMLcodes(char *s, int len)
{
  if (len <=0 || s==(char*)NULL || *s=='\0')
    return 0;

  if (s[1] == '#')
    {
      int val, o;

      if (sscanf(s,"&#%d;",&val) == 1)
      {
        o = 3;
        while (s[o] != ';')
        {
          o++;
          if (o > 5)
            break;
        }
        if (o < 5)
          strcpy(s+1, s+1+o);
        *s = val;
        return o;
      }
    }
  else
    {
      int
        i,
        codes = sizeof(html_codes) / sizeof(html_code);

      for (i=0; i < codes; i++)
      {
        if (html_codes[i].len <= len)
          if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
            {
              strcpy(s+1, s+html_codes[i].len);
              *s = html_codes[i].val;
              return html_codes[i].len-1;
            }
      }
    }

  return 0;
}

int formatIPTC(FILE *ifile, FILE *ofile)
{
  unsigned int
    foundiptc,
    tagsfound;

  unsigned char
    recnum,
    dataset;

  char
    *readable,
    *str;

  long
    tagindx,
    taglen;

  int
    i,
    tagcount = sizeof(tags) / sizeof(tag_spec);

  char
    c;

  foundiptc = 0; /* found the IPTC-Header */
  tagsfound = 0; /* number of tags found */

  c = getc(ifile);
  while (c != EOF)
  {
	  if (c == 0x1c)
	    foundiptc = 1;
	  else
      {
        if (foundiptc)
	        return -1;
        else
	        continue;
	    }

    /* we found the 0x1c tag and now grab the dataset and record number tags */
    dataset = getc(ifile);
	  if ((char) dataset == EOF)
	    return -1;
    recnum = getc(ifile);
	  if ((char) recnum == EOF)
	    return -1;
    /* try to match this record to one of the ones in our named table */
    for (i=0; i< tagcount; i++)
    {
      if (tags[i].id == recnum)
          break;
    }
    if (i < tagcount)
      readable = tags[i].name;
    else
      readable = "";

    /* then we decode the length of the block that follows - long or short fmt */
    c = getc(ifile);
	  if (c == EOF)
	    return 0;
	  if (c & (unsigned char) 0x80)
      {
        unsigned char
          buffer[4];

        for (i=0; i<4; i++)
        {
          c = buffer[i] = getc(ifile);
          if (c == EOF)
            return -1;
        }
        taglen = (((long) buffer[ 0 ]) << 24) |
                 (((long) buffer[ 1 ]) << 16) |
	               (((long) buffer[ 2 ]) <<  8) |
                 (((long) buffer[ 3 ]));
	    }
    else
      {
        unsigned char
          x = c;

        taglen = ((long) x) << 8;
        x = getc(ifile);
        if ((char)x == EOF)
          return -1;
        taglen |= (long) x;
	    }
    /* make a buffer to hold the tag data and snag it from the input stream */
    str = (char *) malloc((unsigned int) (taglen+1));
    if (str == (char *) NULL)
      {
        printf("Memory allocation failed");
        return 0;
      }
    for (tagindx=0; tagindx<taglen; tagindx++)
    {
      c = str[tagindx] = getc(ifile);
      if (c == EOF)
        return -1;
    }
    str[ taglen ] = 0;

    /* now finish up by formatting this binary data into ASCII equivalent */
    if (strlen(readable) > 0)
	    fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
    else
	    fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
    formatString( ofile, str, taglen );
    free(str);

	  tagsfound++;

    c = getc(ifile);
  }
  return tagsfound;
}

int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
char *white,char *brkchar,char *quote,char eschar,char *brkused,
int *next,char *quoted);

char *super_fgets(char *b, int *blen, FILE *file)
{
  int
    c,
    len;

  char
    *q;

  len=*blen;
  for (q=b; ; q++)
  {
    c=fgetc(file);
    if (c == EOF || c == '\n')
      break;
    if (((int)q - (int)b + 1 ) >= (int) len)
      {
        int
          tlen;

        tlen=(int)q-(int)b;
        len<<=1;
        b=(char *) realloc((char *) b,(len+2));
        if ((char *) b == (char *) NULL)
          break;
        q=b+tlen;
      }
    *q=(unsigned char) c;
  }
  *blen=0;
  if ((unsigned char *)b != (unsigned char *) NULL)
    {
      int
        tlen;

      tlen=(int)q - (int)b;
      if (tlen == 0)
        return (char *) NULL;
      b[tlen] = '\0';
      *blen=++tlen;
    }
  return b;
}

#define BUFFER_SZ 4096

int main(int argc, char *argv[])
{
  unsigned int
    length;

  unsigned char
    *buffer;

  int
    i,
    mode; /* iptc binary, or iptc text */

  FILE
    *ifile = stdin,
    *ofile = stdout;

  char
    c,
    *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output";

  if( argc < 2 )
    {
      printf(usage);
	    return 1;
    }

  mode = 0;
  length = -1;
  buffer = (unsigned char *)NULL;

  for (i=1; i<argc; i++)
  {
    c = argv[i][0];
    if (c == '-' || c == '/')
      {
        c = argv[i][1];
        switch( c )
        {
        case 't':
	        mode = 1;
#ifdef WIN32
          /* Set "stdout" to binary mode: */
          _setmode( _fileno( ofile ), _O_BINARY );
#endif
	        break;
        case 'b':
	        mode = 0;
#ifdef WIN32
          /* Set "stdin" to binary mode: */
          _setmode( _fileno( ifile ), _O_BINARY );
#endif
	        break;
        case 'i':
          if (mode == 0)
            ifile = fopen(argv[++i], "rb");
          else
            ifile = fopen(argv[++i], "rt");
          if (ifile == (FILE *)NULL)
            {
	            printf("Unable to open: %s\n", argv[i]);
              return 1;
            }
	        break;
        case 'o':
          if (mode == 0)
            ofile = fopen(argv[++i], "wt");
          else
            ofile = fopen(argv[++i], "wb");
          if (ofile == (FILE *)NULL)
            {
	            printf("Unable to open: %s\n", argv[i]);
              return 1;
            }
	        break;
        default:
	        printf("Unknown option: %s\n", argv[i]);
	        return 1;
        }
      }
    else
      {
        printf(usage);
	      return 1;
      }
  }

  if (mode == 0) /* handle binary iptc info */
    formatIPTC(ifile, ofile);

  if (mode == 1) /* handle text form of iptc info */
    {
      char
        brkused,
        quoted,
        *line,
        *token,
        *newstr;

      int
        state,
        next;

      unsigned char
        recnum = 0,
        dataset = 0;

      int
        inputlen = BUFFER_SZ;

      line = (char *) malloc(inputlen);
      token = (char *)NULL;
      while((line = super_fgets(line,&inputlen,ifile))!=NULL)
      {
        state=0;
        next=0;

        token = (char *) malloc(inputlen);
        newstr = (char *) malloc(inputlen);
        while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
          &brkused,&next,&quoted)==0)
        {
          if (state == 0)
            {
              int
                state,
                next;

              char
                brkused,
                quoted;

              state=0;
              next=0;
              while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
                &brkused, &next, &quoted)==0)
              {
                if (state == 0)
                  dataset = (unsigned char) atoi(newstr);
                else
                   if (state == 1)
                     recnum = (unsigned char) atoi(newstr);
                state++;
              }
            }
          else
            if (state == 1)
              {
                int
                  next;

                unsigned long
                  len;

                char
                  brkused,
                  quoted;

                next=0;
                len = strlen(token);
                while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
                  &brkused, &next, &quoted)==0)
                {
                  if (brkused && next > 0)
                    {
                      char
                        *s = &token[next-1];

                      len -= convertHTMLcodes(s, strlen(s));
                    }
                }

                fputc(0x1c, ofile);
                fputc(dataset, ofile);
                fputc(recnum, ofile);
                if (len < 0x10000)
                  {
                    fputc((len >> 8) & 255, ofile);
                    fputc(len & 255, ofile);
                  }
                else
                  {
                    fputc(((len >> 24) & 255) | 0x80, ofile);
                    fputc((len >> 16) & 255, ofile);
                    fputc((len >> 8) & 255, ofile);
                    fputc(len & 255, ofile);
                  }
                next=0;
                while (len--)
                  fputc(token[next++], ofile);
              }
          state++;
        }
        free(token);
        token = (char *)NULL;
        free(newstr);
        newstr = (char *)NULL;
      }
      free(line);

      fclose( ifile );
      fclose( ofile );
    }

  return 0;
}

/*
	This routine is a generalized, finite state token parser. It allows
    you extract tokens one at a time from a string of characters.  The
    characters used for white space, for break characters, and for quotes
    can be specified. Also, characters in the string can be preceded by
    a specifiable escape character which removes any special meaning the
    character may have.

	There are a lot of formal parameters in this subroutine call, but
	once you get familiar with them, this routine is fairly easy to use.
	"#define" macros can be used to generate simpler looking calls for
	commonly used applications of this routine.

	First, some terminology:

	token:		used here, a single unit of information in
				the form of a group of characters.

	white space:	space that gets ignored (except within quotes
				or when escaped), like blanks and tabs.  in
				addition, white space terminates a non-quoted
				token.

	break character: a character that separates non-quoted tokens.
				commas are a common break character.  the
				usage of break characters to signal the end
				of a token is the same as that of white space,
				except multiple break characters with nothing
				or only white space between generate a null
				token for each two break characters together.

				for example, if blank is set to be the white
				space and comma is set to be the break
				character, the line ...

				A, B, C ,  , DEF

				... consists of 5 tokens:

				1)	"A"
				2)	"B"
				3)	"C"
				4)	""      (the null string)
				5)	"DEF"

	quote character: 	a character that, when surrounding a group
				of other characters, causes the group of
				characters to be treated as a single token,
				no matter how many white spaces or break
				characters exist in the group.	also, a
				token always terminates after the closing
				quote.	for example, if ' is the quote
				character, blank is white space, and comma
				is the break character, the following
				string ...

				A, ' B, CD'EF GHI

				... consists of 4 tokens:

				1)	"A"
				2)	" B, CD" (note the blanks & comma)
				3)	"EF"
				4)	"GHI"

				the quote characters themselves do
				not appear in the resultant tokens.  the
				double quotes are delimiters i use here for
				documentation purposes only.

	escape character:	a character which itself is ignored but
				which causes the next character to be
				used as is.  ^ and \ are often used as
				escape characters.  an escape in the last
				position of the string gets treated as a
				"normal" (i.e., non-quote, non-white,
				non-break, and non-escape) character.
				for example, assume white space, break
				character, and quote are the same as in the
				above examples, and further, assume that
				^ is the escape character.  then, in the
				string ...

				ABC, ' DEF ^' GH' I ^ J K^ L ^

				... there are 7 tokens:

				1)	"ABC"
				2)	" DEF ' GH"
				3)	"I"
				4)	" "     (a lone blank)
				5)	"J"
				6)	"K L"
				7)	"^"     (passed as is at end of line)


	OK, now that you have this background, here's how to call "tokenizer":

	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
		      brkused,next,quoted)

	result: 	0 if we haven't reached EOS (end of string), and
			1 if we have (this is an "int").

	flag:		right now, only the low order 3 bits are used.
			1 => convert non-quoted tokens to upper case
			2 => convert non-quoted tokens to lower case
			0 => do not convert non-quoted tokens
			(this is a "char").

	token:		a character string containing the returned next token
			(this is a "char[]").

	maxtok: 	the maximum size of "token".  characters beyond
			"maxtok" are truncated (this is an "int").

	string: 	the string to be parsed (this is a "char[]").

	white:		a string of the valid white spaces.  example:

			char whitesp[]={" \t"};

			blank and tab will be valid white space (this is
			a "char[]").

	break:		a string of the valid break characters.  example:

			char breakch[]={";,"};

			semicolon and comma will be valid break characters
			(this is a "char[]").

			IMPORTANT:  do not use the name "break" as a C
			variable, as this is a reserved word in C.

	quote:		a string of the valid quote characters.  an example
			would be

			char whitesp[]={"'\"");

			(this causes single and double quotes to be valid)
			note that a token starting with one of these characters
			needs the same quote character to terminate it.

			for example,

			"ABC '

			is unterminated, but

			"DEF" and 'GHI'

			are properly terminated.  note that different quote
			characters can appear on the same line; only for
			a given token do the quote characters have to be
			the same (this is a "char[]").

	escape: 	the escape character (NOT a string ... only one
			allowed).  use zero if none is desired (this is
			a "char").

	brkused:	the break character used to terminate the current
			token.	if the token was quoted, this will be the
			quote used.  if the token is the last one on the
			line, this will be zero (this is a pointer to a
			"char").

	next:		this variable points to the first character of the
			next token.  it gets reset by "tokenizer" as it steps
			through the string.  set it to 0 upon initialization,
			and leave it alone after that.	you can change it
			if you want to jump around in the string or re-parse
			from the beginning, but be careful (this is a
			pointer to an "int").

	quoted: 	set to 1 (true) if the token was quoted and 0 (false)
			if not.  you may need this information (for example:
			in C, a string with quotes around it is a character
			string, while one without is an identifier).

			(this is a pointer to a "char").
*/

/* states */

#define IN_WHITE 0
#define IN_TOKEN 1
#define IN_QUOTE 2
#define IN_OZONE 3

int _p_state;	   /* current state	 */
unsigned _p_flag;  /* option flag	 */
char _p_curquote;  /* current quote char */
int _p_tokpos;	   /* current token pos  */

/* routine to find character in string ... used only by "tokenizer" */

int sindex(char ch,char *string)
{
  char *cp;
  for(cp=string;*cp;++cp)
    if(ch==*cp)
      return (int)(cp-string);	/* return postion of character */
  return -1;			/* eol ... no match found */
}

/* routine to store a character in a string ... used only by "tokenizer" */

void chstore(char *string,int max,char ch)
{
  char c;
  if(_p_tokpos>=0&&_p_tokpos<max-1)
  {
    if(_p_state==IN_QUOTE)
      c=ch;
    else
      switch(_p_flag&3)
      {
	    case 1: 	    /* convert to upper */
	      c=toupper(ch);
	      break;

	    case 2: 	    /* convert to lower */
	      c=tolower(ch);
	      break;

	    default:	    /* use as is */
	      c=ch;
	      break;
      }
    string[_p_tokpos++]=c;
  }
  return;
}

int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
  char *white,char *brkchar,char *quote,char eschar,char *brkused,
    int *next,char *quoted)
{
  int qp;
  char c,nc;

  *brkused=0;		/* initialize to null */
  *quoted=0;		/* assume not quoted  */

  if(!line[*next])	/* if we're at end of line, indicate such */
    return 1;

  _p_state=IN_WHITE;   /* initialize state */
  _p_curquote=0;	   /* initialize previous quote char */
  _p_flag=inflag;	   /* set option flag */

  for(_p_tokpos=0;(c=line[*next]);++(*next))	/* main loop */
  {
    if((qp=sindex(c,brkchar))>=0)  /* break */
    {
      switch(_p_state)
      {
	    case IN_WHITE:		/* these are the same here ...	*/
	    case IN_TOKEN:		/* ... just get out		*/
	    case IN_OZONE:		/* ditto			*/
	      ++(*next);
	      *brkused=brkchar[qp];
	      goto byebye;

	    case IN_QUOTE:		 /* just keep going */
	      chstore(token,tokmax,c);
	      break;
      }
    }
    else if((qp=sindex(c,quote))>=0)  /* quote */
    {
      switch(_p_state)
      {
	    case IN_WHITE:	 /* these are identical, */
	      _p_state=IN_QUOTE; /* change states   */
	      _p_curquote=quote[qp]; /* save quote char */
	      *quoted=1;	/* set to true as long as something is in quotes */
	      break;

	    case IN_QUOTE:
	      if(quote[qp]==_p_curquote) /* same as the beginning quote? */
	      {
	        _p_state=IN_OZONE;
	        _p_curquote=0;
	      }
	      else
	        chstore(token,tokmax,c); /* treat as regular char */
	      break;

	    case IN_TOKEN:
	    case IN_OZONE:
	      *brkused=c; /* uses quote as break char */
	      goto byebye;
      }
    }
    else if((qp=sindex(c,white))>=0) /* white */
    {
      switch(_p_state)
      {
	    case IN_WHITE:
	    case IN_OZONE:
	      break;		/* keep going */

	    case IN_TOKEN:
	      _p_state=IN_OZONE;
	      break;

	    case IN_QUOTE:
	      chstore(token,tokmax,c); /* it's valid here */
	      break;
      }
    }
    else if(c==eschar)  /* escape */
    {
      nc=line[(*next)+1];
      if(nc==0) 		/* end of line */
      {
	    *brkused=0;
	    chstore(token,tokmax,c);
	    ++(*next);
	    goto byebye;
      }
      switch(_p_state)
      {
	    case IN_WHITE:
	      --(*next);
	      _p_state=IN_TOKEN;
	      break;

	    case IN_TOKEN:
	    case IN_QUOTE:
	      ++(*next);
	      chstore(token,tokmax,nc);
	      break;

	    case IN_OZONE:
	      goto byebye;
      }
    }
    else	/* anything else is just a real character */
    {
      switch(_p_state)
      {
	    case IN_WHITE:
	      _p_state=IN_TOKEN; /* switch states */

	    case IN_TOKEN:		 /* these 2 are     */
	    case IN_QUOTE:		 /*  identical here */
	      chstore(token,tokmax,c);
	      break;

	    case IN_OZONE:
	      goto byebye;
      }
    }
  }		/* end of main loop */

byebye:
  token[_p_tokpos]=0;	/* make sure token ends with EOS */

  return 0;
}
Commit	Line	Data
	1	/* $Id$ */
	2
	3	#include "tif_config.h"
	4
	5	#include <stdio.h>
	6	#include <stdlib.h>
	7	#include <string.h>
	8	#include <memory.h>
	9	#include <ctype.h>
	10
	11	#ifdef HAVE_STRINGS_H
	12	# include <strings.h>
	13	#endif
	14
	15	#ifdef HAVE_IO_H
	16	# include <io.h>
	17	#endif
	18
	19	#ifdef HAVE_FCNTL_H
	20	# include <fcntl.h>
	21	#endif
	22
	23	#ifdef WIN32
	24	#define STRNICMP strnicmp
	25	#else
	26	#define STRNICMP strncasecmp
	27	#endif
	28
	29	typedef struct _tag_spec
	30	{
	31	short
	32	id;
	33
	34	char
	35	*name;
	36	} tag_spec;
	37
	38	static tag_spec tags[] = {
	39	{ 5,"Image Name" },
	40	{ 7,"Edit Status" },
	41	{ 10,"Priority" },
	42	{ 15,"Category" },
	43	{ 20,"Supplemental Category" },
	44	{ 22,"Fixture Identifier" },
	45	{ 25,"Keyword" },
	46	{ 30,"Release Date" },
	47	{ 35,"Release Time" },
	48	{ 40,"Special Instructions" },
	49	{ 45,"Reference Service" },
	50	{ 47,"Reference Date" },
	51	{ 50,"Reference Number" },
	52	{ 55,"Created Date" },
	53	{ 60,"Created Time" },
	54	{ 65,"Originating Program" },
	55	{ 70,"Program Version" },
	56	{ 75,"Object Cycle" },
	57	{ 80,"Byline" },
	58	{ 85,"Byline Title" },
	59	{ 90,"City" },
	60	{ 95,"Province State" },
	61	{ 100,"Country Code" },
	62	{ 101,"Country" },
	63	{ 103,"Original Transmission Reference" },
	64	{ 105,"Headline" },
	65	{ 110,"Credit" },
	66	{ 115,"Source" },
	67	{ 116,"Copyright String" },
	68	{ 120,"Caption" },
	69	{ 121,"Local Caption" },
	70	{ 122,"Caption Writer" },
	71	{ 200,"Custom Field 1" },
	72	{ 201,"Custom Field 2" },
	73	{ 202,"Custom Field 3" },
	74	{ 203,"Custom Field 4" },
	75	{ 204,"Custom Field 5" },
	76	{ 205,"Custom Field 6" },
	77	{ 206,"Custom Field 7" },
	78	{ 207,"Custom Field 8" },
	79	{ 208,"Custom Field 9" },
	80	{ 209,"Custom Field 10" },
	81	{ 210,"Custom Field 11" },
	82	{ 211,"Custom Field 12" },
	83	{ 212,"Custom Field 13" },
	84	{ 213,"Custom Field 14" },
	85	{ 214,"Custom Field 15" },
	86	{ 215,"Custom Field 16" },
	87	{ 216,"Custom Field 17" },
	88	{ 217,"Custom Field 18" },
	89	{ 218,"Custom Field 19" },
	90	{ 219,"Custom Field 20" }
	91	};
	92
	93	/*
	94	* We format the output using HTML conventions
	95	* to preserve control characters and such.
	96	*/
	97	void formatString(FILE ofile, const char s, int len)
	98	{
	99	putc('"', ofile);
	100	for (; len > 0; --len, ++s) {
	101	int c = *s;
	102	switch (c) {
	103	case '&':
	104	fputs("&", ofile);
	105	break;
	106	#ifdef HANDLE_GT_LT
	107	case '<':
	108	fputs("<", ofile);
	109	break;
	110	case '>':
	111	fputs(">", ofile);
	112	break;
	113	#endif
	114	case '"':
	115	fputs(""", ofile);
	116	break;
	117	default:
	118	if (iscntrl(c))
	119	fprintf(ofile, "&#%d;", c);
	120	else
	121	putc(*s, ofile);
	122	break;
	123	}
	124	}
	125	fputs("\"\n", ofile);
	126	}
	127
	128	typedef struct _html_code
	129	{
	130	short
	131	len;
	132	const char
	133	*code,
	134	val;
	135	} html_code;
	136
	137	static html_code html_codes[] = {
	138	#ifdef HANDLE_GT_LT
	139	{ 4,"<",'<' },
	140	{ 4,">",'>' },
	141	#endif
	142	{ 5,"&",'&' },
	143	{ 6,""",'"' }
	144	};
	145
	146	/*
	147	* This routine converts HTML escape sequence
	148	* back to the original ASCII representation.
	149	* - returns the number of characters dropped.
	150	*/
	151	int convertHTMLcodes(char *s, int len)
	152	{
	153	if (len <=0 \|\| s==(char)NULL \|\| s=='\0')
	154	return 0;
	155
	156	if (s[1] == '#')
	157	{
	158	int val, o;
	159
	160	if (sscanf(s,"&#%d;",&val) == 1)
	161	{
	162	o = 3;
	163	while (s[o] != ';')
	164	{
	165	o++;
	166	if (o > 5)
	167	break;
	168	}
	169	if (o < 5)
	170	strcpy(s+1, s+1+o);
	171	*s = val;
	172	return o;
	173	}
	174	}
	175	else
	176	{
	177	int
	178	i,
	179	codes = sizeof(html_codes) / sizeof(html_code);
	180
	181	for (i=0; i < codes; i++)
	182	{
	183	if (html_codes[i].len <= len)
	184	if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
	185	{
	186	strcpy(s+1, s+html_codes[i].len);
	187	*s = html_codes[i].val;
	188	return html_codes[i].len-1;
	189	}
	190	}
	191	}
	192
	193	return 0;
	194	}
	195
	196	int formatIPTC(FILE ifile, FILE ofile)
	197	{
	198	unsigned int
	199	foundiptc,
	200	tagsfound;
	201
	202	unsigned char
	203	recnum,
	204	dataset;
	205
	206	char
	207	*readable,
	208	*str;
	209
	210	long
	211	tagindx,
	212	taglen;
	213
	214	int
	215	i,
	216	tagcount = sizeof(tags) / sizeof(tag_spec);
	217
	218	char
	219	c;
	220
	221	foundiptc = 0; /* found the IPTC-Header */
	222	tagsfound = 0; /* number of tags found */
	223
	224	c = getc(ifile);
	225	while (c != EOF)
	226	{
	227	if (c == 0x1c)
	228	foundiptc = 1;
	229	else
	230	{
	231	if (foundiptc)
	232	return -1;
	233	else
	234	continue;
	235	}
	236
	237	/* we found the 0x1c tag and now grab the dataset and record number tags */
	238	dataset = getc(ifile);
	239	if ((char) dataset == EOF)
	240	return -1;
	241	recnum = getc(ifile);
	242	if ((char) recnum == EOF)
	243	return -1;
	244	/* try to match this record to one of the ones in our named table */
	245	for (i=0; i< tagcount; i++)
	246	{
	247	if (tags[i].id == recnum)
	248	break;
	249	}
	250	if (i < tagcount)
	251	readable = tags[i].name;
	252	else
	253	readable = "";
	254
	255	/* then we decode the length of the block that follows - long or short fmt */
	256	c = getc(ifile);
	257	if (c == EOF)
	258	return 0;
	259	if (c & (unsigned char) 0x80)
	260	{
	261	unsigned char
	262	buffer[4];
	263
	264	for (i=0; i<4; i++)
	265	{
	266	c = buffer[i] = getc(ifile);
	267	if (c == EOF)
	268	return -1;
	269	}
	270	taglen = (((long) buffer[ 0 ]) << 24) \|
	271	(((long) buffer[ 1 ]) << 16) \|
	272	(((long) buffer[ 2 ]) << 8) \|
	273	(((long) buffer[ 3 ]));
	274	}
	275	else
	276	{
	277	unsigned char
	278	x = c;
	279
	280	taglen = ((long) x) << 8;
	281	x = getc(ifile);
	282	if ((char)x == EOF)
	283	return -1;
	284	taglen \|= (long) x;
	285	}
	286	/* make a buffer to hold the tag data and snag it from the input stream */
	287	str = (char *) malloc((unsigned int) (taglen+1));
	288	if (str == (char *) NULL)
	289	{
	290	printf("Memory allocation failed");
	291	return 0;
	292	}
	293	for (tagindx=0; tagindx<taglen; tagindx++)
	294	{
	295	c = str[tagindx] = getc(ifile);
	296	if (c == EOF)
	297	return -1;
	298	}
	299	str[ taglen ] = 0;
	300
	301	/* now finish up by formatting this binary data into ASCII equivalent */
	302	if (strlen(readable) > 0)
	303	fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
	304	else
	305	fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
	306	formatString( ofile, str, taglen );
	307	free(str);
	308
	309	tagsfound++;
	310
	311	c = getc(ifile);
	312	}
	313	return tagsfound;
	314	}
	315
	316	int tokenizer(unsigned inflag,char token,int tokmax,char line,
	317	char white,char brkchar,char quote,char eschar,char brkused,
	318	int next,char quoted);
	319
	320	char super_fgets(char b, int blen, FILE file)
	321	{
	322	int
	323	c,
	324	len;
	325
	326	char
	327	*q;
	328
	329	len=*blen;
	330	for (q=b; ; q++)
	331	{
	332	c=fgetc(file);
	333	if (c == EOF \|\| c == '\n')
	334	break;
	335	if (((int)q - (int)b + 1 ) >= (int) len)
	336	{
	337	int
	338	tlen;
	339
	340	tlen=(int)q-(int)b;
	341	len<<=1;
	342	b=(char ) realloc((char ) b,(len+2));
	343	if ((char ) b == (char ) NULL)
	344	break;
	345	q=b+tlen;
	346	}
	347	*q=(unsigned char) c;
	348	}
	349	*blen=0;
	350	if ((unsigned char )b != (unsigned char ) NULL)
	351	{
	352	int
	353	tlen;
	354
	355	tlen=(int)q - (int)b;
	356	if (tlen == 0)
	357	return (char *) NULL;
	358	b[tlen] = '\0';
	359	*blen=++tlen;
	360	}
	361	return b;
	362	}
	363
	364	#define BUFFER_SZ 4096
	365
	366	int main(int argc, char *argv[])
	367	{
	368	unsigned int
	369	length;
	370
	371	unsigned char
	372	*buffer;
	373
	374	int
	375	i,
	376	mode; /* iptc binary, or iptc text */
	377
	378	FILE
	379	*ifile = stdin,
	380	*ofile = stdout;
	381
	382	char
	383	c,
	384	*usage = "usage: iptcutil -t \| -b [-i file] [-o file] <input >output";
	385
	386	if( argc < 2 )
	387	{
	388	printf(usage);
	389	return 1;
	390	}
	391
	392	mode = 0;
	393	length = -1;
	394	buffer = (unsigned char *)NULL;
	395
	396	for (i=1; i<argc; i++)
	397	{
	398	c = argv[i][0];
	399	if (c == '-' \|\| c == '/')
	400	{
	401	c = argv[i][1];
	402	switch( c )
	403	{
	404	case 't':
	405	mode = 1;
	406	#ifdef WIN32
	407	/* Set "stdout" to binary mode: */
	408	_setmode( _fileno( ofile ), _O_BINARY );
	409	#endif
	410	break;
	411	case 'b':
	412	mode = 0;
	413	#ifdef WIN32
	414	/* Set "stdin" to binary mode: */
	415	_setmode( _fileno( ifile ), _O_BINARY );
	416	#endif
	417	break;
	418	case 'i':
	419	if (mode == 0)
	420	ifile = fopen(argv[++i], "rb");
	421	else
	422	ifile = fopen(argv[++i], "rt");
	423	if (ifile == (FILE *)NULL)
	424	{
	425	printf("Unable to open: %s\n", argv[i]);
	426	return 1;
	427	}
	428	break;
	429	case 'o':
	430	if (mode == 0)
	431	ofile = fopen(argv[++i], "wt");
	432	else
	433	ofile = fopen(argv[++i], "wb");
	434	if (ofile == (FILE *)NULL)
	435	{
	436	printf("Unable to open: %s\n", argv[i]);
	437	return 1;
	438	}
	439	break;
	440	default:
	441	printf("Unknown option: %s\n", argv[i]);
	442	return 1;
	443	}
	444	}
	445	else
	446	{
	447	printf(usage);
	448	return 1;
	449	}
	450	}
	451
	452	if (mode == 0) /* handle binary iptc info */
	453	formatIPTC(ifile, ofile);
	454
	455	if (mode == 1) /* handle text form of iptc info */
	456	{
	457	char
	458	brkused,
	459	quoted,
	460	*line,
	461	*token,
	462	*newstr;
	463
	464	int
	465	state,
	466	next;
	467
	468	unsigned char
	469	recnum = 0,
	470	dataset = 0;
	471
	472	int
	473	inputlen = BUFFER_SZ;
	474
	475	line = (char *) malloc(inputlen);
	476	token = (char *)NULL;
	477	while((line = super_fgets(line,&inputlen,ifile))!=NULL)
	478	{
	479	state=0;
	480	next=0;
	481
	482	token = (char *) malloc(inputlen);
	483	newstr = (char *) malloc(inputlen);
	484	while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
	485	&brkused,&next,&quoted)==0)
	486	{
	487	if (state == 0)
	488	{
	489	int
	490	state,
	491	next;
	492
	493	char
	494	brkused,
	495	quoted;
	496
	497	state=0;
	498	next=0;
	499	while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
	500	&brkused, &next, &quoted)==0)
	501	{
	502	if (state == 0)
	503	dataset = (unsigned char) atoi(newstr);
	504	else
	505	if (state == 1)
	506	recnum = (unsigned char) atoi(newstr);
	507	state++;
	508	}
	509	}
	510	else
	511	if (state == 1)
	512	{
	513	int
	514	next;
	515
	516	unsigned long
	517	len;
	518
	519	char
	520	brkused,
	521	quoted;
	522
	523	next=0;
	524	len = strlen(token);
	525	while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
	526	&brkused, &next, &quoted)==0)
	527	{
	528	if (brkused && next > 0)
	529	{
	530	char
	531	*s = &token[next-1];
	532
	533	len -= convertHTMLcodes(s, strlen(s));
	534	}
	535	}
	536
	537	fputc(0x1c, ofile);
	538	fputc(dataset, ofile);
	539	fputc(recnum, ofile);
	540	if (len < 0x10000)
	541	{
	542	fputc((len >> 8) & 255, ofile);
	543	fputc(len & 255, ofile);
	544	}
	545	else
	546	{
	547	fputc(((len >> 24) & 255) \| 0x80, ofile);
	548	fputc((len >> 16) & 255, ofile);
	549	fputc((len >> 8) & 255, ofile);
	550	fputc(len & 255, ofile);
	551	}
	552	next=0;
	553	while (len--)
	554	fputc(token[next++], ofile);
	555	}
	556	state++;
	557	}
	558	free(token);
	559	token = (char *)NULL;
	560	free(newstr);
	561	newstr = (char *)NULL;
	562	}
	563	free(line);
	564
	565	fclose( ifile );
	566	fclose( ofile );
	567	}
	568
	569	return 0;
	570	}
	571
	572	/*
	573	This routine is a generalized, finite state token parser. It allows
	574	you extract tokens one at a time from a string of characters. The
	575	characters used for white space, for break characters, and for quotes
	576	can be specified. Also, characters in the string can be preceded by
	577	a specifiable escape character which removes any special meaning the
	578	character may have.
	579
	580	There are a lot of formal parameters in this subroutine call, but
	581	once you get familiar with them, this routine is fairly easy to use.
	582	"#define" macros can be used to generate simpler looking calls for
	583	commonly used applications of this routine.
	584
	585	First, some terminology:
	586
	587	token: used here, a single unit of information in
	588	the form of a group of characters.
	589
	590	white space: space that gets ignored (except within quotes
	591	or when escaped), like blanks and tabs. in
	592	addition, white space terminates a non-quoted
	593	token.
	594
	595	break character: a character that separates non-quoted tokens.
	596	commas are a common break character. the
	597	usage of break characters to signal the end
	598	of a token is the same as that of white space,
	599	except multiple break characters with nothing
	600	or only white space between generate a null
	601	token for each two break characters together.
	602
	603	for example, if blank is set to be the white
	604	space and comma is set to be the break
	605	character, the line ...
	606
	607	A, B, C , , DEF
	608
	609	... consists of 5 tokens:
	610
	611	1) "A"
	612	2) "B"
	613	3) "C"
	614	4) "" (the null string)
	615	5) "DEF"
	616
	617	quote character: a character that, when surrounding a group
	618	of other characters, causes the group of
	619	characters to be treated as a single token,
	620	no matter how many white spaces or break
	621	characters exist in the group. also, a
	622	token always terminates after the closing
	623	quote. for example, if ' is the quote
	624	character, blank is white space, and comma
	625	is the break character, the following
	626	string ...
	627
	628	A, ' B, CD'EF GHI
	629
	630	... consists of 4 tokens:
	631
	632	1) "A"
	633	2) " B, CD" (note the blanks & comma)
	634	3) "EF"
	635	4) "GHI"
	636
	637	the quote characters themselves do
	638	not appear in the resultant tokens. the
	639	double quotes are delimiters i use here for
	640	documentation purposes only.
	641
	642	escape character: a character which itself is ignored but
	643	which causes the next character to be
	644	used as is. ^ and \ are often used as
	645	escape characters. an escape in the last
	646	position of the string gets treated as a
	647	"normal" (i.e., non-quote, non-white,
	648	non-break, and non-escape) character.
	649	for example, assume white space, break
	650	character, and quote are the same as in the
	651	above examples, and further, assume that
	652	^ is the escape character. then, in the
	653	string ...
	654
	655	ABC, ' DEF ^' GH' I ^ J K^ L ^
	656
	657	... there are 7 tokens:
	658
	659	1) "ABC"
	660	2) " DEF ' GH"
	661	3) "I"
	662	4) " " (a lone blank)
	663	5) "J"
	664	6) "K L"
	665	7) "^" (passed as is at end of line)
	666
	667
	668	OK, now that you have this background, here's how to call "tokenizer":
	669
	670	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
	671	brkused,next,quoted)
	672
	673	result: 0 if we haven't reached EOS (end of string), and
	674	1 if we have (this is an "int").
	675
	676	flag: right now, only the low order 3 bits are used.
	677	1 => convert non-quoted tokens to upper case
	678	2 => convert non-quoted tokens to lower case
	679	0 => do not convert non-quoted tokens
	680	(this is a "char").
	681
	682	token: a character string containing the returned next token
	683	(this is a "char[]").
	684
	685	maxtok: the maximum size of "token". characters beyond
	686	"maxtok" are truncated (this is an "int").
	687
	688	string: the string to be parsed (this is a "char[]").
	689
	690	white: a string of the valid white spaces. example:
	691
	692	char whitesp[]={" \t"};
	693
	694	blank and tab will be valid white space (this is
	695	a "char[]").
	696
	697	break: a string of the valid break characters. example:
	698
	699	char breakch[]={";,"};
	700
	701	semicolon and comma will be valid break characters
	702	(this is a "char[]").
	703
	704	IMPORTANT: do not use the name "break" as a C
	705	variable, as this is a reserved word in C.
	706
	707	quote: a string of the valid quote characters. an example
	708	would be
	709
	710	char whitesp[]={"'\"");
	711
	712	(this causes single and double quotes to be valid)
	713	note that a token starting with one of these characters
	714	needs the same quote character to terminate it.
	715
	716	for example,
	717
	718	"ABC '
	719
	720	is unterminated, but
	721
	722	"DEF" and 'GHI'
	723
	724	are properly terminated. note that different quote
	725	characters can appear on the same line; only for
	726	a given token do the quote characters have to be
	727	the same (this is a "char[]").
	728
	729	escape: the escape character (NOT a string ... only one
	730	allowed). use zero if none is desired (this is
	731	a "char").
	732
	733	brkused: the break character used to terminate the current
	734	token. if the token was quoted, this will be the
	735	quote used. if the token is the last one on the
	736	line, this will be zero (this is a pointer to a
	737	"char").
	738
	739	next: this variable points to the first character of the
	740	next token. it gets reset by "tokenizer" as it steps
	741	through the string. set it to 0 upon initialization,
	742	and leave it alone after that. you can change it
	743	if you want to jump around in the string or re-parse
	744	from the beginning, but be careful (this is a
	745	pointer to an "int").
	746
	747	quoted: set to 1 (true) if the token was quoted and 0 (false)
	748	if not. you may need this information (for example:
	749	in C, a string with quotes around it is a character
	750	string, while one without is an identifier).
	751
	752	(this is a pointer to a "char").
	753	*/
	754
	755	/* states */
	756
	757	#define IN_WHITE 0
	758	#define IN_TOKEN 1
	759	#define IN_QUOTE 2
	760	#define IN_OZONE 3
	761
	762	int _p_state; /* current state */
	763	unsigned _p_flag; /* option flag */
	764	char _p_curquote; /* current quote char */
	765	int _p_tokpos; /* current token pos */
	766
	767	/* routine to find character in string ... used only by "tokenizer" */
	768
	769	int sindex(char ch,char *string)
	770	{
	771	char *cp;
	772	for(cp=string;*cp;++cp)
	773	if(ch==*cp)
	774	return (int)(cp-string); /* return postion of character */
	775	return -1; /* eol ... no match found */
	776	}
	777
	778	/* routine to store a character in a string ... used only by "tokenizer" */
	779
	780	void chstore(char *string,int max,char ch)
	781	{
	782	char c;
	783	if(_p_tokpos>=0&&_p_tokpos<max-1)
	784	{
	785	if(_p_state==IN_QUOTE)
	786	c=ch;
	787	else
	788	switch(_p_flag&3)
	789	{
	790	case 1: /* convert to upper */
	791	c=toupper(ch);
	792	break;
	793
	794	case 2: /* convert to lower */
	795	c=tolower(ch);
	796	break;
	797
	798	default: /* use as is */
	799	c=ch;
	800	break;
	801	}
	802	string[_p_tokpos++]=c;
	803	}
	804	return;
	805	}
	806
	807	int tokenizer(unsigned inflag,char token,int tokmax,char line,
	808	char white,char brkchar,char quote,char eschar,char brkused,
	809	int next,char quoted)
	810	{
	811	int qp;
	812	char c,nc;
	813
	814	brkused=0; / initialize to null */
	815	quoted=0; / assume not quoted */
	816
	817	if(!line[next]) / if we're at end of line, indicate such */
	818	return 1;
	819
	820	_p_state=IN_WHITE; /* initialize state */
	821	_p_curquote=0; /* initialize previous quote char */
	822	_p_flag=inflag; /* set option flag */
	823
	824	for(_p_tokpos=0;(c=line[next]);++(next)) /* main loop */
	825	{
	826	if((qp=sindex(c,brkchar))>=0) /* break */
	827	{
	828	switch(_p_state)
	829	{
	830	case IN_WHITE: /* these are the same here ... */
	831	case IN_TOKEN: /* ... just get out */
	832	case IN_OZONE: /* ditto */
	833	++(*next);
	834	*brkused=brkchar[qp];
	835	goto byebye;
	836
	837	case IN_QUOTE: /* just keep going */
	838	chstore(token,tokmax,c);
	839	break;
	840	}
	841	}
	842	else if((qp=sindex(c,quote))>=0) /* quote */
	843	{
	844	switch(_p_state)
	845	{
	846	case IN_WHITE: /* these are identical, */
	847	_p_state=IN_QUOTE; /* change states */
	848	_p_curquote=quote[qp]; /* save quote char */
	849	quoted=1; / set to true as long as something is in quotes */
	850	break;
	851
	852	case IN_QUOTE:
	853	if(quote[qp]==_p_curquote) /* same as the beginning quote? */
	854	{
	855	_p_state=IN_OZONE;
	856	_p_curquote=0;
	857	}
	858	else
	859	chstore(token,tokmax,c); /* treat as regular char */
	860	break;
	861
	862	case IN_TOKEN:
	863	case IN_OZONE:
	864	brkused=c; / uses quote as break char */
	865	goto byebye;
	866	}
	867	}
	868	else if((qp=sindex(c,white))>=0) /* white */
	869	{
	870	switch(_p_state)
	871	{
	872	case IN_WHITE:
	873	case IN_OZONE:
	874	break; /* keep going */
	875
	876	case IN_TOKEN:
	877	_p_state=IN_OZONE;
	878	break;
	879
	880	case IN_QUOTE:
	881	chstore(token,tokmax,c); /* it's valid here */
	882	break;
	883	}
	884	}
	885	else if(c==eschar) /* escape */
	886	{
	887	nc=line[(*next)+1];
	888	if(nc==0) /* end of line */
	889	{
	890	*brkused=0;
	891	chstore(token,tokmax,c);
	892	++(*next);
	893	goto byebye;
	894	}
	895	switch(_p_state)
	896	{
	897	case IN_WHITE:
	898	--(*next);
	899	_p_state=IN_TOKEN;
	900	break;
	901
	902	case IN_TOKEN:
	903	case IN_QUOTE:
	904	++(*next);
	905	chstore(token,tokmax,nc);
	906	break;
	907
	908	case IN_OZONE:
	909	goto byebye;
	910	}
	911	}
	912	else /* anything else is just a real character */
	913	{
	914	switch(_p_state)
	915	{
	916	case IN_WHITE:
	917	_p_state=IN_TOKEN; /* switch states */
	918
	919	case IN_TOKEN: /* these 2 are */
	920	case IN_QUOTE: /* identical here */
	921	chstore(token,tokmax,c);
	922	break;
	923
	924	case IN_OZONE:
	925	goto byebye;
	926	}
	927	}
	928	} /* end of main loop */
	929
	930	byebye:
	931	token[_p_tokpos]=0; /* make sure token ends with EOS */
	932
	933	return 0;
	934	}