[wxWidgets.git] / src / tiff / contrib / iptcutil / iptcutil.c


#include "tif_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif

#ifdef HAVE_IO_H
# include <io.h>
#endif

#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif

#ifdef WIN32
#define STRNICMP strnicmp
#else 
#define STRNICMP strncasecmp
#endif 

typedef struct _tag_spec
{
  short
    id;

  char
    *name;
} tag_spec;

static tag_spec tags[] = {
    { 5,"Image Name" },
    { 7,"Edit Status" },
    { 10,"Priority" },
    { 15,"Category" },
    { 20,"Supplemental Category" },
    { 22,"Fixture Identifier" },
    { 25,"Keyword" },
    { 30,"Release Date" },
    { 35,"Release Time" },
    { 40,"Special Instructions" },
    { 45,"Reference Service" },
    { 47,"Reference Date" },
    { 50,"Reference Number" },
    { 55,"Created Date" },
    { 60,"Created Time" },
    { 65,"Originating Program" },
    { 70,"Program Version" },
    { 75,"Object Cycle" },
    { 80,"Byline" },
    { 85,"Byline Title" },
    { 90,"City" },
    { 95,"Province State" },
    { 100,"Country Code" },
    { 101,"Country" },
    { 103,"Original Transmission Reference" },
    { 105,"Headline" },
    { 110,"Credit" },
    { 115,"Source" },
    { 116,"Copyright String" },
    { 120,"Caption" },
    { 121,"Local Caption" },
    { 122,"Caption Writer" },
    { 200,"Custom Field 1" },
    { 201,"Custom Field 2" },
    { 202,"Custom Field 3" },
    { 203,"Custom Field 4" },
    { 204,"Custom Field 5" },
    { 205,"Custom Field 6" },
    { 206,"Custom Field 7" },
    { 207,"Custom Field 8" },
    { 208,"Custom Field 9" },
    { 209,"Custom Field 10" },
    { 210,"Custom Field 11" },
    { 211,"Custom Field 12" },
    { 212,"Custom Field 13" },
    { 213,"Custom Field 14" },
    { 214,"Custom Field 15" },
    { 215,"Custom Field 16" },
    { 216,"Custom Field 17" },
    { 217,"Custom Field 18" },
    { 218,"Custom Field 19" },
    { 219,"Custom Field 20" }
};

/*
 * We format the output using HTML conventions
 * to preserve control characters and such.
 */
void formatString(FILE *ofile, const char *s, int len)
{
  putc('"', ofile);
  for (; len > 0; --len, ++s) {
    int c = *s;
    switch (c) {
    case '&':
      fputs("&amp;", ofile);
      break;
#ifdef HANDLE_GT_LT
    case '<':
      fputs("&lt;", ofile);
      break;
    case '>':
      fputs("&gt;", ofile);
      break;
#endif
    case '"':
      fputs("&quot;", ofile);
      break;
    default:
      if (iscntrl(c))
        fprintf(ofile, "&#%d;", c);
      else
        putc(*s, ofile);
      break;
    }
  }
  fputs("\"\n", ofile);
}

typedef struct _html_code
{
  short
    len;
  const char
    *code,
    val;
} html_code;

static html_code html_codes[] = {
#ifdef HANDLE_GT_LT
    { 4,"&lt;",'<' },
    { 4,"&gt;",'>' },
#endif
    { 5,"&amp;",'&' },
    { 6,"&quot;",'"' }
};

/*
 * This routine converts HTML escape sequence
 * back to the original ASCII representation.
 * - returns the number of characters dropped.
 */
int convertHTMLcodes(char *s, int len)
{
  if (len <=0 || s==(char*)NULL || *s=='\0')
    return 0;

  if (s[1] == '#')
    {
      int val, o;

      if (sscanf(s,"&#%d;",&val) == 1)
      {
        o = 3;
        while (s[o] != ';')
        {
          o++;
          if (o > 5)
            break;
        }
        if (o < 5)
          strcpy(s+1, s+1+o);
        *s = val;
        return o;
      }
    }
  else
    {
      int
        i,
        codes = sizeof(html_codes) / sizeof(html_code);

      for (i=0; i < codes; i++)
      {
        if (html_codes[i].len <= len)
          if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
            {
              strcpy(s+1, s+html_codes[i].len);
              *s = html_codes[i].val;
              return html_codes[i].len-1;
            }
      }
    }

  return 0;
}

int formatIPTC(FILE *ifile, FILE *ofile)
{
  unsigned int
    foundiptc,
    tagsfound;

  unsigned char
    recnum,
    dataset;

  char
    *readable,
    *str;

  long
    tagindx,
    taglen;

  int
    i,
    tagcount = sizeof(tags) / sizeof(tag_spec);

  char
    c;

  foundiptc = 0; /* found the IPTC-Header */
  tagsfound = 0; /* number of tags found */

  c = getc(ifile);
  while (c != EOF)
  {
	  if (c == 0x1c)
	    foundiptc = 1;
	  else
      {
        if (foundiptc)
	        return -1;
        else
	        continue;
	    }

    /* we found the 0x1c tag and now grab the dataset and record number tags */
    dataset = getc(ifile);
	  if ((char) dataset == EOF)
	    return -1;
    recnum = getc(ifile);
	  if ((char) recnum == EOF)
	    return -1;
    /* try to match this record to one of the ones in our named table */
    for (i=0; i< tagcount; i++)
    {
      if (tags[i].id == recnum)
          break;
    }
    if (i < tagcount)
      readable = tags[i].name;
    else
      readable = "";

    /* then we decode the length of the block that follows - long or short fmt */
    c = getc(ifile);
	  if (c == EOF)
	    return 0;
	  if (c & (unsigned char) 0x80)
      {
        unsigned char
          buffer[4];

        for (i=0; i<4; i++)
        {
          c = buffer[i] = getc(ifile);
          if (c == EOF)
            return -1;
        }
        taglen = (((long) buffer[ 0 ]) << 24) |
                 (((long) buffer[ 1 ]) << 16) | 
	               (((long) buffer[ 2 ]) <<  8) |
                 (((long) buffer[ 3 ]));
	    }
    else
      {
        unsigned char
          x = c;

        taglen = ((long) x) << 8;
        x = getc(ifile);
        if ((char)x == EOF)
          return -1;
        taglen |= (long) x;
	    }
    /* make a buffer to hold the tag data and snag it from the input stream */
    str = (char *) malloc((unsigned int) (taglen+1));
    if (str == (char *) NULL)
      {
        printf("Memory allocation failed");
        return 0;
      }
    for (tagindx=0; tagindx<taglen; tagindx++)
    {
      c = str[tagindx] = getc(ifile);
      if (c == EOF)
      {
          free(str);
          return -1;
      }
    }
    str[ taglen ] = 0;

    /* now finish up by formatting this binary data into ASCII equivalent */
    if (strlen(readable) > 0)
	    fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
    else
	    fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
    formatString( ofile, str, taglen );
    free(str);

	  tagsfound++;

    c = getc(ifile);
  }
  return tagsfound;
}

int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
char *white,char *brkchar,char *quote,char eschar,char *brkused,
int *next,char *quoted);

char *super_fgets(char *b, int *blen, FILE *file)
{
  int
    c,
    len;

  char
    *q;

  len=*blen;
  for (q=b; ; q++)
  {
    c=fgetc(file);
    if (c == EOF || c == '\n')
      break;
    if (((long)q - (long)b + 1 ) >= (long) len)
      {
        long
          tlen;

        tlen=(long)q-(long)b;
        len<<=1;
        b=(char *) realloc((char *) b,(len+2));
        if ((char *) b == (char *) NULL)
          break;
        q=b+tlen;
      }
    *q=(unsigned char) c;
  }
  *blen=0;
  if ((unsigned char *)b != (unsigned char *) NULL)
    {
      int
        tlen;

      tlen=(long)q - (long)b;
      if (tlen == 0)
        return (char *) NULL;
      b[tlen] = '\0';
      *blen=++tlen;
    }
  return b;
}

#define BUFFER_SZ 4096

int main(int argc, char *argv[])
{            
  unsigned int
    length;

  unsigned char
    *buffer;

  int
    i,
    mode; /* iptc binary, or iptc text */

  FILE
    *ifile = stdin,
    *ofile = stdout;

  char
    c,
    *usage = "usage: iptcutil -t | -b [-i file] [-o file] <input >output";

  if( argc < 2 )
    {
      puts(usage);
	    return 1;
    }

  mode = 0;
  length = -1;
  buffer = (unsigned char *)NULL;

  for (i=1; i<argc; i++)
  {
    c = argv[i][0];
    if (c == '-' || c == '/')
      {
        c = argv[i][1];
        switch( c )
        {
        case 't':
	        mode = 1;
#ifdef WIN32
          /* Set "stdout" to binary mode: */
          _setmode( _fileno( ofile ), _O_BINARY );
#endif
	        break;
        case 'b':
	        mode = 0;
#ifdef WIN32
          /* Set "stdin" to binary mode: */
          _setmode( _fileno( ifile ), _O_BINARY );
#endif
	        break;
        case 'i':
          if (mode == 0)
            ifile = fopen(argv[++i], "rb");
          else
            ifile = fopen(argv[++i], "rt");
          if (ifile == (FILE *)NULL)
            {
	            printf("Unable to open: %s\n", argv[i]);
              return 1;
            }
	        break;
        case 'o':
          if (mode == 0)
            ofile = fopen(argv[++i], "wt");
          else
            ofile = fopen(argv[++i], "wb");
          if (ofile == (FILE *)NULL)
            {
	            printf("Unable to open: %s\n", argv[i]);
              return 1;
            }
	        break;
        default:
	        printf("Unknown option: %s\n", argv[i]);
	        return 1;
        }
      }
    else
      {
        puts(usage);
	      return 1;
      }
  }

  if (mode == 0) /* handle binary iptc info */
    formatIPTC(ifile, ofile);

  if (mode == 1) /* handle text form of iptc info */
    {
      char
        brkused,
        quoted,
        *line,
        *token,
        *newstr;

      int
        state,
        next;

      unsigned char
        recnum = 0,
        dataset = 0;

      int
        inputlen = BUFFER_SZ;

      line = (char *) malloc(inputlen);     
      token = (char *)NULL;
      while((line = super_fgets(line,&inputlen,ifile))!=NULL)
      {
        state=0;
        next=0;

        token = (char *) malloc(inputlen);     
        newstr = (char *) malloc(inputlen);     
        while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
          &brkused,&next,&quoted)==0)
        {
          if (state == 0)
            {                  
              int
                state,
                next;

              char
                brkused,
                quoted;

              state=0;
              next=0;
              while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
                &brkused, &next, &quoted)==0)
              {
                if (state == 0)
                  dataset = (unsigned char) atoi(newstr);
                else
                   if (state == 1)
                     recnum = (unsigned char) atoi(newstr);
                state++;
              }
            }
          else
            if (state == 1)
              {
                int
                  next;

                unsigned long
                  len;

                char
                  brkused,
                  quoted;

                next=0;
                len = strlen(token);
                while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
                  &brkused, &next, &quoted)==0)
                {
                  if (brkused && next > 0)
                    {
                      char
                        *s = &token[next-1];

                      len -= convertHTMLcodes(s, strlen(s));
                    }
                }

                fputc(0x1c, ofile);
                fputc(dataset, ofile);
                fputc(recnum, ofile);
                if (len < 0x10000)
                  {
                    fputc((len >> 8) & 255, ofile);
                    fputc(len & 255, ofile);
                  }
                else
                  {
                    fputc(((len >> 24) & 255) | 0x80, ofile);
                    fputc((len >> 16) & 255, ofile);
                    fputc((len >> 8) & 255, ofile);
                    fputc(len & 255, ofile);
                  }
                next=0;
                while (len--)
                  fputc(token[next++], ofile);
              }
          state++;
        }
        free(token);
        token = (char *)NULL;
        free(newstr);
        newstr = (char *)NULL;
      }
      free(line);

      fclose( ifile );
      fclose( ofile );
    }

  return 0;
}

/*
	This routine is a generalized, finite state token parser. It allows
    you extract tokens one at a time from a string of characters.  The
    characters used for white space, for break characters, and for quotes
    can be specified. Also, characters in the string can be preceded by
    a specifiable escape character which removes any special meaning the
    character may have.

	There are a lot of formal parameters in this subroutine call, but
	once you get familiar with them, this routine is fairly easy to use.
	"#define" macros can be used to generate simpler looking calls for
	commonly used applications of this routine.

	First, some terminology:

	token:		used here, a single unit of information in
				the form of a group of characters.

	white space:	space that gets ignored (except within quotes
				or when escaped), like blanks and tabs.  in
				addition, white space terminates a non-quoted
				token.

	break character: a character that separates non-quoted tokens.
				commas are a common break character.  the
				usage of break characters to signal the end
				of a token is the same as that of white space,
				except multiple break characters with nothing
				or only white space between generate a null
				token for each two break characters together.

				for example, if blank is set to be the white
				space and comma is set to be the break
				character, the line ...

				A, B, C ,  , DEF

				... consists of 5 tokens:

				1)	"A"
				2)	"B"
				3)	"C"
				4)	""      (the null string)
				5)	"DEF"

	quote character: 	a character that, when surrounding a group
				of other characters, causes the group of
				characters to be treated as a single token,
				no matter how many white spaces or break
				characters exist in the group.	also, a
				token always terminates after the closing
				quote.	for example, if ' is the quote
				character, blank is white space, and comma
				is the break character, the following
				string ...

				A, ' B, CD'EF GHI

				... consists of 4 tokens:

				1)	"A"
				2)	" B, CD" (note the blanks & comma)
				3)	"EF"
				4)	"GHI"

				the quote characters themselves do
				not appear in the resultant tokens.  the
				double quotes are delimiters i use here for
				documentation purposes only.

	escape character:	a character which itself is ignored but
				which causes the next character to be
				used as is.  ^ and \ are often used as
				escape characters.  an escape in the last
				position of the string gets treated as a
				"normal" (i.e., non-quote, non-white,
				non-break, and non-escape) character.
				for example, assume white space, break
				character, and quote are the same as in the
				above examples, and further, assume that
				^ is the escape character.  then, in the
				string ...

				ABC, ' DEF ^' GH' I ^ J K^ L ^

				... there are 7 tokens:

				1)	"ABC"
				2)	" DEF ' GH"
				3)	"I"
				4)	" "     (a lone blank)
				5)	"J"
				6)	"K L"
				7)	"^"     (passed as is at end of line)


	OK, now that you have this background, here's how to call "tokenizer":

	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
		      brkused,next,quoted)

	result: 	0 if we haven't reached EOS (end of string), and
			1 if we have (this is an "int").

	flag:		right now, only the low order 3 bits are used.
			1 => convert non-quoted tokens to upper case
			2 => convert non-quoted tokens to lower case
			0 => do not convert non-quoted tokens
			(this is a "char").

	token:		a character string containing the returned next token
			(this is a "char[]").

	maxtok: 	the maximum size of "token".  characters beyond
			"maxtok" are truncated (this is an "int").

	string: 	the string to be parsed (this is a "char[]").

	white:		a string of the valid white spaces.  example:

			char whitesp[]={" \t"};

			blank and tab will be valid white space (this is
			a "char[]").

	break:		a string of the valid break characters.  example:

			char breakch[]={";,"};

			semicolon and comma will be valid break characters
			(this is a "char[]").

			IMPORTANT:  do not use the name "break" as a C
			variable, as this is a reserved word in C.

	quote:		a string of the valid quote characters.  an example
			would be

			char whitesp[]={"'\"");

			(this causes single and double quotes to be valid)
			note that a token starting with one of these characters
			needs the same quote character to terminate it.

			for example,

			"ABC '

			is unterminated, but

			"DEF" and 'GHI'

			are properly terminated.  note that different quote
			characters can appear on the same line; only for
			a given token do the quote characters have to be
			the same (this is a "char[]").

	escape: 	the escape character (NOT a string ... only one
			allowed).  use zero if none is desired (this is
			a "char").

	brkused:	the break character used to terminate the current
			token.	if the token was quoted, this will be the
			quote used.  if the token is the last one on the
			line, this will be zero (this is a pointer to a
			"char").

	next:		this variable points to the first character of the
			next token.  it gets reset by "tokenizer" as it steps
			through the string.  set it to 0 upon initialization,
			and leave it alone after that.	you can change it
			if you want to jump around in the string or re-parse
			from the beginning, but be careful (this is a
			pointer to an "int").

	quoted: 	set to 1 (true) if the token was quoted and 0 (false)
			if not.  you may need this information (for example:
			in C, a string with quotes around it is a character
			string, while one without is an identifier).

			(this is a pointer to a "char").
*/

/* states */

#define IN_WHITE 0
#define IN_TOKEN 1
#define IN_QUOTE 2
#define IN_OZONE 3

int _p_state;	   /* current state	 */
unsigned _p_flag;  /* option flag	 */
char _p_curquote;  /* current quote char */
int _p_tokpos;	   /* current token pos  */

/* routine to find character in string ... used only by "tokenizer" */

int sindex(char ch,char *string)
{
  char *cp;
  for(cp=string;*cp;++cp)
    if(ch==*cp)
      return (int)(cp-string);	/* return postion of character */
  return -1;			/* eol ... no match found */
}

/* routine to store a character in a string ... used only by "tokenizer" */

void chstore(char *string,int max,char ch)
{
  char c;
  if(_p_tokpos>=0&&_p_tokpos<max-1)
  {
    if(_p_state==IN_QUOTE)
      c=ch;
    else
      switch(_p_flag&3)
      {
	    case 1: 	    /* convert to upper */
	      c=toupper(ch);
	      break;

	    case 2: 	    /* convert to lower */
	      c=tolower(ch);
	      break;

	    default:	    /* use as is */
	      c=ch;
	      break;
      }
    string[_p_tokpos++]=c;
  }
  return;
}

int tokenizer(unsigned inflag,char *token,int tokmax,char *line,
  char *white,char *brkchar,char *quote,char eschar,char *brkused,
    int *next,char *quoted)
{
  int qp;
  char c,nc;

  *brkused=0;		/* initialize to null */
  *quoted=0;		/* assume not quoted  */

  if(!line[*next])	/* if we're at end of line, indicate such */
    return 1;

  _p_state=IN_WHITE;   /* initialize state */
  _p_curquote=0;	   /* initialize previous quote char */
  _p_flag=inflag;	   /* set option flag */

  for(_p_tokpos=0;(c=line[*next]);++(*next))	/* main loop */
  {
    if((qp=sindex(c,brkchar))>=0)  /* break */
    {
      switch(_p_state)
      {
	    case IN_WHITE:		/* these are the same here ...	*/
	    case IN_TOKEN:		/* ... just get out		*/
	    case IN_OZONE:		/* ditto			*/
	      ++(*next);
	      *brkused=brkchar[qp];
	      goto byebye;

	    case IN_QUOTE:		 /* just keep going */
	      chstore(token,tokmax,c);
	      break;
      }
    }
    else if((qp=sindex(c,quote))>=0)  /* quote */
    {
      switch(_p_state)
      {
	    case IN_WHITE:	 /* these are identical, */
	      _p_state=IN_QUOTE; /* change states   */
	      _p_curquote=quote[qp]; /* save quote char */
	      *quoted=1;	/* set to true as long as something is in quotes */
	      break;

	    case IN_QUOTE:
	      if(quote[qp]==_p_curquote) /* same as the beginning quote? */
	      {
	        _p_state=IN_OZONE;
	        _p_curquote=0;
	      }
	      else
	        chstore(token,tokmax,c); /* treat as regular char */
	      break;

	    case IN_TOKEN:
	    case IN_OZONE:
	      *brkused=c; /* uses quote as break char */
	      goto byebye;
      }
    }
    else if((qp=sindex(c,white))>=0) /* white */
    {
      switch(_p_state)
      {
	    case IN_WHITE:
	    case IN_OZONE:
	      break;		/* keep going */

	    case IN_TOKEN:
	      _p_state=IN_OZONE;
	      break;

	    case IN_QUOTE:
	      chstore(token,tokmax,c); /* it's valid here */
	      break;
      }
    }
    else if(c==eschar)  /* escape */
    {
      nc=line[(*next)+1];
      if(nc==0) 		/* end of line */
      {
	    *brkused=0;
	    chstore(token,tokmax,c);
	    ++(*next);
	    goto byebye;
      }
      switch(_p_state)
      {
	    case IN_WHITE:
	      --(*next);
	      _p_state=IN_TOKEN;
	      break;

	    case IN_TOKEN:
	    case IN_QUOTE:
	      ++(*next);
	      chstore(token,tokmax,nc);
	      break;

	    case IN_OZONE:
	      goto byebye;
      }
    }
    else	/* anything else is just a real character */
    {
      switch(_p_state)
      {
	    case IN_WHITE:
	      _p_state=IN_TOKEN; /* switch states */

	    case IN_TOKEN:		 /* these 2 are     */
	    case IN_QUOTE:		 /*  identical here */
	      chstore(token,tokmax,c);
	      break;

	    case IN_OZONE:
	      goto byebye;
      }
    }
  }		/* end of main loop */

byebye:
  token[_p_tokpos]=0;	/* make sure token ends with EOS */

  return 0;
}
/*
 * Local Variables:
 * mode: c
 * c-basic-offset: 8
 * fill-column: 78
 * End:
 */
Commit	Line	Data
8414a40c VZ	1
	2	#include "tif_config.h"
	3
	4	#include <stdio.h>
	5	#include <stdlib.h>
	6	#include <string.h>
8414a40c VZ	7	#include <ctype.h>
	8
	9	#ifdef HAVE_STRINGS_H
	10	# include <strings.h>
	11	#endif
	12
	13	#ifdef HAVE_IO_H
	14	# include <io.h>
	15	#endif
	16
	17	#ifdef HAVE_FCNTL_H
	18	# include <fcntl.h>
	19	#endif
	20
	21	#ifdef WIN32
	22	#define STRNICMP strnicmp
	23	#else
	24	#define STRNICMP strncasecmp
	25	#endif
	26
	27	typedef struct _tag_spec
	28	{
	29	short
	30	id;
	31
	32	char
	33	*name;
	34	} tag_spec;
	35
	36	static tag_spec tags[] = {
	37	{ 5,"Image Name" },
	38	{ 7,"Edit Status" },
	39	{ 10,"Priority" },
	40	{ 15,"Category" },
	41	{ 20,"Supplemental Category" },
	42	{ 22,"Fixture Identifier" },
	43	{ 25,"Keyword" },
	44	{ 30,"Release Date" },
	45	{ 35,"Release Time" },
	46	{ 40,"Special Instructions" },
	47	{ 45,"Reference Service" },
	48	{ 47,"Reference Date" },
	49	{ 50,"Reference Number" },
	50	{ 55,"Created Date" },
	51	{ 60,"Created Time" },
	52	{ 65,"Originating Program" },
	53	{ 70,"Program Version" },
	54	{ 75,"Object Cycle" },
	55	{ 80,"Byline" },
	56	{ 85,"Byline Title" },
	57	{ 90,"City" },
	58	{ 95,"Province State" },
	59	{ 100,"Country Code" },
	60	{ 101,"Country" },
	61	{ 103,"Original Transmission Reference" },
	62	{ 105,"Headline" },
	63	{ 110,"Credit" },
	64	{ 115,"Source" },
	65	{ 116,"Copyright String" },
	66	{ 120,"Caption" },
	67	{ 121,"Local Caption" },
	68	{ 122,"Caption Writer" },
	69	{ 200,"Custom Field 1" },
	70	{ 201,"Custom Field 2" },
71	{ 202,"Custom Field 3" },
72	{ 203,"Custom Field 4" },
73	{ 204,"Custom Field 5" },
74	{ 205,"Custom Field 6" },
75	{ 206,"Custom Field 7" },
76	{ 207,"Custom Field 8" },
77	{ 208,"Custom Field 9" },
78	{ 209,"Custom Field 10" },
79	{ 210,"Custom Field 11" },
80	{ 211,"Custom Field 12" },
81	{ 212,"Custom Field 13" },
82	{ 213,"Custom Field 14" },
83	{ 214,"Custom Field 15" },
84	{ 215,"Custom Field 16" },
85	{ 216,"Custom Field 17" },
86	{ 217,"Custom Field 18" },
87	{ 218,"Custom Field 19" },
88	{ 219,"Custom Field 20" }
89	};
90
91	/*
92	* We format the output using HTML conventions
93	* to preserve control characters and such.
94	*/
95	void formatString(FILE ofile, const char s, int len)
96	{
97	putc('"', ofile);
98	for (; len > 0; --len, ++s) {
99	int c = *s;
100	switch (c) {
101	case '&':
102	fputs("&", ofile);
103	break;
104	#ifdef HANDLE_GT_LT
105	case '<':
106	fputs("<", ofile);
107	break;
108	case '>':
109	fputs(">", ofile);
110	break;
111	#endif
112	case '"':
113	fputs(""", ofile);
114	break;
115	default:
116	if (iscntrl(c))
117	fprintf(ofile, "&#%d;", c);
118	else
119	putc(*s, ofile);
120	break;
121	}
122	}
123	fputs("\"\n", ofile);
124	}
125
126	typedef struct _html_code
127	{
128	short
129	len;
130	const char
131	*code,
132	val;
133	} html_code;
134
135	static html_code html_codes[] = {
136	#ifdef HANDLE_GT_LT
137	{ 4,"<",'<' },
138	{ 4,">",'>' },
139	#endif
140	{ 5,"&",'&' },
141	{ 6,""",'"' }
142	};
143
144	/*
145	* This routine converts HTML escape sequence
146	* back to the original ASCII representation.
147	* - returns the number of characters dropped.
148	*/
149	int convertHTMLcodes(char *s, int len)
150	{
151	if (len <=0 \|\| s==(char)NULL \|\| s=='\0')
152	return 0;
153
154	if (s[1] == '#')
155	{
156	int val, o;
157
158	if (sscanf(s,"&#%d;",&val) == 1)
159	{
160	o = 3;
161	while (s[o] != ';')
162	{
163	o++;
164	if (o > 5)
165	break;
166	}
167	if (o < 5)
168	strcpy(s+1, s+1+o);
169	*s = val;
170	return o;
171	}
172	}
173	else
174	{
175	int
176	i,
177	codes = sizeof(html_codes) / sizeof(html_code);
178
179	for (i=0; i < codes; i++)
180	{
181	if (html_codes[i].len <= len)
182	if (STRNICMP(s, html_codes[i].code, html_codes[i].len) == 0)
183	{
184	strcpy(s+1, s+html_codes[i].len);
185	*s = html_codes[i].val;
186	return html_codes[i].len-1;
187	}
188	}
189	}
190
191	return 0;
192	}
193
194	int formatIPTC(FILE ifile, FILE ofile)
195	{
196	unsigned int
197	foundiptc,
198	tagsfound;
199
200	unsigned char
201	recnum,
202	dataset;
203
204	char
205	*readable,
206	*str;
207
208	long
209	tagindx,
210	taglen;
211
212	int
213	i,
214	tagcount = sizeof(tags) / sizeof(tag_spec);
215
216	char
217	c;
218
219	foundiptc = 0; /* found the IPTC-Header */
220	tagsfound = 0; /* number of tags found */
221
222	c = getc(ifile);
223	while (c != EOF)
224	{
225	if (c == 0x1c)
226	foundiptc = 1;
227	else
228	{
229	if (foundiptc)
230	return -1;
231	else
232	continue;
233	}
234
235	/* we found the 0x1c tag and now grab the dataset and record number tags */
236	dataset = getc(ifile);
237	if ((char) dataset == EOF)
238	return -1;
239	recnum = getc(ifile);
240	if ((char) recnum == EOF)
241	return -1;
242	/* try to match this record to one of the ones in our named table */
243	for (i=0; i< tagcount; i++)
244	{
245	if (tags[i].id == recnum)
246	break;
247	}
248	if (i < tagcount)
249	readable = tags[i].name;
250	else
251	readable = "";
252
253	/* then we decode the length of the block that follows - long or short fmt */
254	c = getc(ifile);
255	if (c == EOF)
256	return 0;
257	if (c & (unsigned char) 0x80)
258	{
259	unsigned char
260	buffer[4];
261
262	for (i=0; i<4; i++)
263	{
264	c = buffer[i] = getc(ifile);
265	if (c == EOF)
266	return -1;
267	}
268	taglen = (((long) buffer[ 0 ]) << 24) \|
269	(((long) buffer[ 1 ]) << 16) \|
270	(((long) buffer[ 2 ]) << 8) \|
271	(((long) buffer[ 3 ]));
272	}
273	else
274	{
275	unsigned char
276	x = c;
277
278	taglen = ((long) x) << 8;
279	x = getc(ifile);
280	if ((char)x == EOF)
281	return -1;
282	taglen \|= (long) x;
283	}
284	/* make a buffer to hold the tag data and snag it from the input stream */
285	str = (char *) malloc((unsigned int) (taglen+1));
286	if (str == (char *) NULL)
287	{
288	printf("Memory allocation failed");
289	return 0;
290	}
291	for (tagindx=0; tagindx<taglen; tagindx++)
292	{
293	c = str[tagindx] = getc(ifile);
294	if (c == EOF)
80ed523f VZ	295	{
	296	free(str);
	297	return -1;
	298	}
8414a40c VZ	299	}
	300	str[ taglen ] = 0;
	301
	302	/* now finish up by formatting this binary data into ASCII equivalent */
	303	if (strlen(readable) > 0)
	304	fprintf(ofile, "%d#%d#%s=",(unsigned int)dataset, (unsigned int) recnum, readable);
	305	else
	306	fprintf(ofile, "%d#%d=",(unsigned int)dataset, (unsigned int) recnum);
	307	formatString( ofile, str, taglen );
	308	free(str);
	309
	310	tagsfound++;
	311
	312	c = getc(ifile);
	313	}
	314	return tagsfound;
	315	}
	316
	317	int tokenizer(unsigned inflag,char token,int tokmax,char line,
	318	char white,char brkchar,char quote,char eschar,char brkused,
	319	int next,char quoted);
	320
	321	char super_fgets(char b, int blen, FILE file)
	322	{
	323	int
	324	c,
	325	len;
	326
	327	char
	328	*q;
	329
	330	len=*blen;
	331	for (q=b; ; q++)
	332	{
	333	c=fgetc(file);
	334	if (c == EOF \|\| c == '\n')
	335	break;
80ed523f	336	if (((long)q - (long)b + 1 ) >= (long) len)
8414a40c	337	{
80ed523f	338	long
8414a40c VZ	339	tlen;
8414a40c VZ	340
80ed523f	341	tlen=(long)q-(long)b;
8414a40c VZ	342	len<<=1;
	343	b=(char ) realloc((char ) b,(len+2));
	344	if ((char ) b == (char ) NULL)
	345	break;
	346	q=b+tlen;
	347	}
	348	*q=(unsigned char) c;
	349	}
	350	*blen=0;
	351	if ((unsigned char )b != (unsigned char ) NULL)
	352	{
	353	int
	354	tlen;
	355
80ed523f	356	tlen=(long)q - (long)b;
8414a40c VZ	357	if (tlen == 0)
	358	return (char *) NULL;
	359	b[tlen] = '\0';
	360	*blen=++tlen;
	361	}
	362	return b;
	363	}
	364
	365	#define BUFFER_SZ 4096
	366
	367	int main(int argc, char *argv[])
	368	{
	369	unsigned int
	370	length;
	371
	372	unsigned char
	373	*buffer;
	374
	375	int
	376	i,
	377	mode; /* iptc binary, or iptc text */
	378
	379	FILE
	380	*ifile = stdin,
	381	*ofile = stdout;
	382
	383	char
	384	c,
	385	*usage = "usage: iptcutil -t \| -b [-i file] [-o file] <input >output";
	386
	387	if( argc < 2 )
	388	{
80ed523f	389	puts(usage);
8414a40c VZ	390	return 1;
	391	}
	392
	393	mode = 0;
	394	length = -1;
	395	buffer = (unsigned char *)NULL;
	396
	397	for (i=1; i<argc; i++)
	398	{
	399	c = argv[i][0];
	400	if (c == '-' \|\| c == '/')
	401	{
	402	c = argv[i][1];
	403	switch( c )
	404	{
	405	case 't':
	406	mode = 1;
	407	#ifdef WIN32
	408	/* Set "stdout" to binary mode: */
	409	_setmode( _fileno( ofile ), _O_BINARY );
	410	#endif
	411	break;
	412	case 'b':
	413	mode = 0;
	414	#ifdef WIN32
	415	/* Set "stdin" to binary mode: */
	416	_setmode( _fileno( ifile ), _O_BINARY );
	417	#endif
	418	break;
	419	case 'i':
	420	if (mode == 0)
	421	ifile = fopen(argv[++i], "rb");
	422	else
	423	ifile = fopen(argv[++i], "rt");
	424	if (ifile == (FILE *)NULL)
	425	{
	426	printf("Unable to open: %s\n", argv[i]);
	427	return 1;
	428	}
	429	break;
	430	case 'o':
	431	if (mode == 0)
	432	ofile = fopen(argv[++i], "wt");
	433	else
	434	ofile = fopen(argv[++i], "wb");
	435	if (ofile == (FILE *)NULL)
	436	{
	437	printf("Unable to open: %s\n", argv[i]);
	438	return 1;
	439	}
	440	break;
	441	default:
	442	printf("Unknown option: %s\n", argv[i]);
	443	return 1;
	444	}
	445	}
	446	else
	447	{
80ed523f	448	puts(usage);
8414a40c VZ	449	return 1;
	450	}
	451	}
	452
	453	if (mode == 0) /* handle binary iptc info */
	454	formatIPTC(ifile, ofile);
	455
	456	if (mode == 1) /* handle text form of iptc info */
	457	{
	458	char
	459	brkused,
	460	quoted,
	461	*line,
	462	*token,
	463	*newstr;
	464
	465	int
	466	state,
	467	next;
	468
	469	unsigned char
	470	recnum = 0,
	471	dataset = 0;
	472
	473	int
	474	inputlen = BUFFER_SZ;
	475
	476	line = (char *) malloc(inputlen);
	477	token = (char *)NULL;
	478	while((line = super_fgets(line,&inputlen,ifile))!=NULL)
	479	{
	480	state=0;
	481	next=0;
	482
	483	token = (char *) malloc(inputlen);
	484	newstr = (char *) malloc(inputlen);
	485	while(tokenizer(0, token, inputlen, line, "", "=", "\"", 0,
	486	&brkused,&next,&quoted)==0)
	487	{
	488	if (state == 0)
	489	{
	490	int
	491	state,
	492	next;
	493
	494	char
	495	brkused,
	496	quoted;
	497
	498	state=0;
	499	next=0;
	500	while(tokenizer(0, newstr, inputlen, token, "", "#", "", 0,
	501	&brkused, &next, &quoted)==0)
	502	{
	503	if (state == 0)
	504	dataset = (unsigned char) atoi(newstr);
	505	else
	506	if (state == 1)
	507	recnum = (unsigned char) atoi(newstr);
	508	state++;
	509	}
	510	}
	511	else
	512	if (state == 1)
513	{
514	int
515	next;
516
517	unsigned long
518	len;
519
520	char
521	brkused,
522	quoted;
523
524	next=0;
525	len = strlen(token);
526	while(tokenizer(0, newstr, inputlen, token, "", "&", "", 0,
527	&brkused, &next, &quoted)==0)
528	{
529	if (brkused && next > 0)
530	{
531	char
532	*s = &token[next-1];
533
534	len -= convertHTMLcodes(s, strlen(s));
535	}
536	}
537
538	fputc(0x1c, ofile);
539	fputc(dataset, ofile);
540	fputc(recnum, ofile);
541	if (len < 0x10000)
542	{
543	fputc((len >> 8) & 255, ofile);
544	fputc(len & 255, ofile);
545	}
546	else
547	{
548	fputc(((len >> 24) & 255) \| 0x80, ofile);
549	fputc((len >> 16) & 255, ofile);
550	fputc((len >> 8) & 255, ofile);
551	fputc(len & 255, ofile);
552	}
553	next=0;
554	while (len--)
555	fputc(token[next++], ofile);
556	}
557	state++;
558	}
559	free(token);
560	token = (char *)NULL;
561	free(newstr);
562	newstr = (char *)NULL;
563	}
564	free(line);
565
566	fclose( ifile );
567	fclose( ofile );
568	}
569
570	return 0;
571	}
572
573	/*
574	This routine is a generalized, finite state token parser. It allows
575	you extract tokens one at a time from a string of characters. The
576	characters used for white space, for break characters, and for quotes
577	can be specified. Also, characters in the string can be preceded by
578	a specifiable escape character which removes any special meaning the
579	character may have.
580
581	There are a lot of formal parameters in this subroutine call, but
582	once you get familiar with them, this routine is fairly easy to use.
583	"#define" macros can be used to generate simpler looking calls for
584	commonly used applications of this routine.
585
586	First, some terminology:
587
588	token: used here, a single unit of information in
589	the form of a group of characters.
590
591	white space: space that gets ignored (except within quotes
592	or when escaped), like blanks and tabs. in
593	addition, white space terminates a non-quoted
594	token.
595
596	break character: a character that separates non-quoted tokens.
597	commas are a common break character. the
598	usage of break characters to signal the end
599	of a token is the same as that of white space,
600	except multiple break characters with nothing
601	or only white space between generate a null
602	token for each two break characters together.
603
604	for example, if blank is set to be the white
605	space and comma is set to be the break
606	character, the line ...
607
608	A, B, C , , DEF
609
610	... consists of 5 tokens:
611
612	1) "A"
613	2) "B"
614	3) "C"
615	4) "" (the null string)
616	5) "DEF"
617
618	quote character: a character that, when surrounding a group
619	of other characters, causes the group of
620	characters to be treated as a single token,
621	no matter how many white spaces or break
622	characters exist in the group. also, a
623	token always terminates after the closing
624	quote. for example, if ' is the quote
625	character, blank is white space, and comma
626	is the break character, the following
627	string ...
628
629	A, ' B, CD'EF GHI
630
631	... consists of 4 tokens:
632
633	1) "A"
634	2) " B, CD" (note the blanks & comma)
635	3) "EF"
636	4) "GHI"
637
638	the quote characters themselves do
639	not appear in the resultant tokens. the
640	double quotes are delimiters i use here for
641	documentation purposes only.
642
643	escape character: a character which itself is ignored but
644	which causes the next character to be
645	used as is. ^ and \ are often used as
646	escape characters. an escape in the last
647	position of the string gets treated as a
648	"normal" (i.e., non-quote, non-white,
649	non-break, and non-escape) character.
650	for example, assume white space, break
651	character, and quote are the same as in the
652	above examples, and further, assume that
653	^ is the escape character. then, in the
654	string ...
655
656	ABC, ' DEF ^' GH' I ^ J K^ L ^
657
658	... there are 7 tokens:
659
660	1) "ABC"
661	2) " DEF ' GH"
662	3) "I"
663	4) " " (a lone blank)
664	5) "J"
665	6) "K L"
666	7) "^" (passed as is at end of line)
667
668
669	OK, now that you have this background, here's how to call "tokenizer":
670
671	result=tokenizer(flag,token,maxtok,string,white,break,quote,escape,
672	brkused,next,quoted)
673
674	result: 0 if we haven't reached EOS (end of string), and
675	1 if we have (this is an "int").
676
677	flag: right now, only the low order 3 bits are used.
678	1 => convert non-quoted tokens to upper case
679	2 => convert non-quoted tokens to lower case
680	0 => do not convert non-quoted tokens
681	(this is a "char").
682
683	token: a character string containing the returned next token
684	(this is a "char[]").
685
686	maxtok: the maximum size of "token". characters beyond
687	"maxtok" are truncated (this is an "int").
688
689	string: the string to be parsed (this is a "char[]").
690
691	white: a string of the valid white spaces. example:
692
693	char whitesp[]={" \t"};
694
695	blank and tab will be valid white space (this is
696	a "char[]").
697
698	break: a string of the valid break characters. example:
699
700	char breakch[]={";,"};
701
702	semicolon and comma will be valid break characters
703	(this is a "char[]").
704
705	IMPORTANT: do not use the name "break" as a C
706	variable, as this is a reserved word in C.
707
708	quote: a string of the valid quote characters. an example
709	would be
710
711	char whitesp[]={"'\"");
712
713	(this causes single and double quotes to be valid)
714	note that a token starting with one of these characters
715	needs the same quote character to terminate it.
716
717	for example,
718
719	"ABC '
720
721	is unterminated, but
722
723	"DEF" and 'GHI'
724
725	are properly terminated. note that different quote
726	characters can appear on the same line; only for
727	a given token do the quote characters have to be
728	the same (this is a "char[]").
729
730	escape: the escape character (NOT a string ... only one
731	allowed). use zero if none is desired (this is
732	a "char").
733
734	brkused: the break character used to terminate the current
735	token. if the token was quoted, this will be the
736	quote used. if the token is the last one on the
737	line, this will be zero (this is a pointer to a
738	"char").
739
740	next: this variable points to the first character of the
741	next token. it gets reset by "tokenizer" as it steps
742	through the string. set it to 0 upon initialization,
743	and leave it alone after that. you can change it
744	if you want to jump around in the string or re-parse
745	from the beginning, but be careful (this is a
746	pointer to an "int").
747
748	quoted: set to 1 (true) if the token was quoted and 0 (false)
749	if not. you may need this information (for example:
750	in C, a string with quotes around it is a character
751	string, while one without is an identifier).
752
753	(this is a pointer to a "char").
754	*/
755
756	/* states */
757
758	#define IN_WHITE 0
759	#define IN_TOKEN 1
760	#define IN_QUOTE 2
761	#define IN_OZONE 3
762
763	int _p_state; /* current state */
764	unsigned _p_flag; /* option flag */
765	char _p_curquote; /* current quote char */
766	int _p_tokpos; /* current token pos */
767
768	/* routine to find character in string ... used only by "tokenizer" */
769
770	int sindex(char ch,char *string)
771	{
772	char *cp;
773	for(cp=string;*cp;++cp)
774	if(ch==*cp)
775	return (int)(cp-string); /* return postion of character */
776	return -1; /* eol ... no match found */
777	}
778
779	/* routine to store a character in a string ... used only by "tokenizer" */
780
781	void chstore(char *string,int max,char ch)
782	{
783	char c;
784	if(_p_tokpos>=0&&_p_tokpos<max-1)
785	{
786	if(_p_state==IN_QUOTE)
787	c=ch;
788	else
789	switch(_p_flag&3)
790	{
791	case 1: /* convert to upper */
792	c=toupper(ch);
793	break;
794
795	case 2: /* convert to lower */
796	c=tolower(ch);
797	break;
798
799	default: /* use as is */
800	c=ch;
801	break;
802	}
803	string[_p_tokpos++]=c;
804	}
805	return;
806	}
807
808	int tokenizer(unsigned inflag,char token,int tokmax,char line,
809	char white,char brkchar,char quote,char eschar,char brkused,
810	int next,char quoted)
811	{
812	int qp;
813	char c,nc;
814
815	brkused=0; / initialize to null */
816	quoted=0; / assume not quoted */
817
818	if(!line[next]) / if we're at end of line, indicate such */
819	return 1;
820
821	_p_state=IN_WHITE; /* initialize state */
822	_p_curquote=0; /* initialize previous quote char */
823	_p_flag=inflag; /* set option flag */
824
825	for(_p_tokpos=0;(c=line[next]);++(next)) /* main loop */
826	{
827	if((qp=sindex(c,brkchar))>=0) /* break */
828	{
829	switch(_p_state)
830	{
831	case IN_WHITE: /* these are the same here ... */
832	case IN_TOKEN: /* ... just get out */
833	case IN_OZONE: /* ditto */
834	++(*next);
835	*brkused=brkchar[qp];
836	goto byebye;
837
838	case IN_QUOTE: /* just keep going */
839	chstore(token,tokmax,c);
840	break;
841	}
842	}
843	else if((qp=sindex(c,quote))>=0) /* quote */
844	{
845	switch(_p_state)
846	{
847	case IN_WHITE: /* these are identical, */
848	_p_state=IN_QUOTE; /* change states */
849	_p_curquote=quote[qp]; /* save quote char */
850	quoted=1; / set to true as long as something is in quotes */
851	break;
852
853	case IN_QUOTE:
854	if(quote[qp]==_p_curquote) /* same as the beginning quote? */
855	{
856	_p_state=IN_OZONE;
857	_p_curquote=0;
858	}
859	else
860	chstore(token,tokmax,c); /* treat as regular char */
861	break;
862
863	case IN_TOKEN:
864	case IN_OZONE:
865	brkused=c; / uses quote as break char */
866	goto byebye;
867	}
868	}
869	else if((qp=sindex(c,white))>=0) /* white */
870	{
871	switch(_p_state)
872	{
873	case IN_WHITE:
874	case IN_OZONE:
875	break; /* keep going */
876
877	case IN_TOKEN:
878	_p_state=IN_OZONE;
879	break;
880
881	case IN_QUOTE:
882	chstore(token,tokmax,c); /* it's valid here */
883	break;
884	}
885	}
886	else if(c==eschar) /* escape */
887	{
888	nc=line[(*next)+1];
889	if(nc==0) /* end of line */
890	{
891	*brkused=0;
892	chstore(token,tokmax,c);
893	++(*next);
894	goto byebye;
895	}
896	switch(_p_state)
897	{
898	case IN_WHITE:
899	--(*next);
900	_p_state=IN_TOKEN;
901	break;
902
903	case IN_TOKEN:
904	case IN_QUOTE:
905	++(*next);
906	chstore(token,tokmax,nc);
907	break;
908
909	case IN_OZONE:
910	goto byebye;
911	}
912	}
913	else /* anything else is just a real character */
914	{
915	switch(_p_state)
916	{
917	case IN_WHITE:
918	_p_state=IN_TOKEN; /* switch states */
919
920	case IN_TOKEN: /* these 2 are */
921	case IN_QUOTE: /* identical here */
922	chstore(token,tokmax,c);
923	break;
924
925	case IN_OZONE:
926	goto byebye;
927	}
928	}
929	} /* end of main loop */
930
931	byebye:
932	token[_p_tokpos]=0; /* make sure token ends with EOS */
933
934	return 0;
935	}
80ed523f VZ	936	/*
	937	* Local Variables:
	938	* mode: c
	939	* c-basic-offset: 8
	940	* fill-column: 78
	941	* End:
	942	*/