1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 
   2    See the file COPYING for copying permission. 
  19 /* This ensures proper sorting. */ 
  21 #define NSSEP T('\001') 
  24 characterData(void *userData
, const XML_Char 
*s
, int len
) 
  27   for (; len 
> 0; --len
, ++s
) { 
  30       fputts(T("&"), fp
); 
  33       fputts(T("<"), fp
); 
  36       fputts(T(">"), fp
); 
  40       fputts(T("
"), fp
); 
  44       fputts(T("""), fp
); 
  49       ftprintf(fp
, T("&#%d;"), *s
); 
  60 attributeValue(FILE *fp
, const XML_Char 
*s
) 
  71       fputts(T("&"), fp
); 
  74       fputts(T("<"), fp
); 
  77       fputts(T("""), fp
); 
  81       fputts(T("	"), fp
); 
  84       fputts(T("
"), fp
); 
  87       fputts(T("
"), fp
); 
  91       fputts(T(">"), fp
); 
  96       ftprintf(fp
, T("&#%d;"), *s
); 
 107 /* Lexicographically comparing UTF-8 encoded attribute values, 
 108 is equivalent to lexicographically comparing based on the character number. */ 
 111 attcmp(const void *att1
, const void *att2
) 
 113   return tcscmp(*(const XML_Char 
**)att1
, *(const XML_Char 
**)att2
); 
 117 startElement(void *userData
, const XML_Char 
*name
, const XML_Char 
**atts
) 
 128   nAtts 
= (p 
- atts
) >> 1; 
 130     qsort((void *)atts
, nAtts
, sizeof(XML_Char 
*) * 2, attcmp
); 
 134     attributeValue(fp
, *atts
); 
 141 endElement(void *userData
, const XML_Char 
*name
) 
 151 nsattcmp(const void *p1
, const void *p2
) 
 153   const XML_Char 
*att1 
= *(const XML_Char 
**)p1
; 
 154   const XML_Char 
*att2 
= *(const XML_Char 
**)p2
; 
 155   int sep1 
= (tcsrchr(att1
, NSSEP
) != 0); 
 156   int sep2 
= (tcsrchr(att1
, NSSEP
) != 0); 
 159   return tcscmp(att1
, att2
); 
 163 startElementNS(void *userData
, const XML_Char 
*name
, const XML_Char 
**atts
) 
 172   sep 
= tcsrchr(name
, NSSEP
); 
 174     fputts(T("n1:"), fp
); 
 176     fputts(T(" xmlns:n1"), fp
); 
 177     attributeValue(fp
, name
); 
 188   nAtts 
= (p 
- atts
) >> 1; 
 190     qsort((void *)atts
, nAtts
, sizeof(XML_Char 
*) * 2, nsattcmp
); 
 193     sep 
= tcsrchr(name
, NSSEP
); 
 196       ftprintf(fp
, T("n%d:"), nsi
); 
 201     attributeValue(fp
, *atts
); 
 203       ftprintf(fp
, T(" xmlns:n%d"), nsi
++); 
 204       attributeValue(fp
, name
); 
 212 endElementNS(void *userData
, const XML_Char 
*name
) 
 218   sep 
= tcsrchr(name
, NSSEP
); 
 220     fputts(T("n1:"), fp
); 
 231 processingInstruction(void *userData
, const XML_Char 
*target
, 
 232                       const XML_Char 
*data
) 
 244 #endif /* not W3C14N */ 
 247 defaultCharacterData(void *userData
, const XML_Char 
*s
, int len
) 
 249   XML_DefaultCurrent((XML_Parser
) userData
); 
 253 defaultStartElement(void *userData
, const XML_Char 
*name
, 
 254                     const XML_Char 
**atts
) 
 256   XML_DefaultCurrent((XML_Parser
) userData
); 
 260 defaultEndElement(void *userData
, const XML_Char 
*name
) 
 262   XML_DefaultCurrent((XML_Parser
) userData
); 
 266 defaultProcessingInstruction(void *userData
, const XML_Char 
*target
, 
 267                              const XML_Char 
*data
) 
 269   XML_DefaultCurrent((XML_Parser
) userData
); 
 273 nopCharacterData(void *userData
, const XML_Char 
*s
, int len
) 
 278 nopStartElement(void *userData
, const XML_Char 
*name
, const XML_Char 
**atts
) 
 283 nopEndElement(void *userData
, const XML_Char 
*name
) 
 288 nopProcessingInstruction(void *userData
, const XML_Char 
*target
, 
 289                          const XML_Char 
*data
) 
 294 markup(void *userData
, const XML_Char 
*s
, int len
) 
 296   FILE *fp 
= XML_GetUserData((XML_Parser
) userData
); 
 297   for (; len 
> 0; --len
, ++s
) 
 302 metaLocation(XML_Parser parser
) 
 304   const XML_Char 
*uri 
= XML_GetBase(parser
); 
 306     ftprintf(XML_GetUserData(parser
), T(" uri=\"%s\""), uri
); 
 307   ftprintf(XML_GetUserData(parser
), 
 308            T(" byte=\"%ld\" nbytes=\"%d\" line=\"%d\" col=\"%d\""), 
 309            XML_GetCurrentByteIndex(parser
), 
 310            XML_GetCurrentByteCount(parser
), 
 311            XML_GetCurrentLineNumber(parser
), 
 312            XML_GetCurrentColumnNumber(parser
)); 
 316 metaStartDocument(void *userData
) 
 318   fputts(T("<document>\n"), XML_GetUserData((XML_Parser
) userData
)); 
 322 metaEndDocument(void *userData
) 
 324   fputts(T("</document>\n"), XML_GetUserData((XML_Parser
) userData
)); 
 328 metaStartElement(void *userData
, const XML_Char 
*name
, 
 329                  const XML_Char 
**atts
) 
 331   XML_Parser parser 
= (XML_Parser
) userData
; 
 332   FILE *fp 
= XML_GetUserData(parser
); 
 333   const XML_Char 
**specifiedAttsEnd
 
 334     = atts 
+ XML_GetSpecifiedAttributeCount(parser
); 
 335   const XML_Char 
**idAttPtr
; 
 336   int idAttIndex 
= XML_GetIdAttributeIndex(parser
); 
 340     idAttPtr 
= atts 
+ idAttIndex
; 
 342   ftprintf(fp
, T("<starttag name=\"%s\""), name
); 
 343   metaLocation(parser
); 
 345     fputts(T(">\n"), fp
); 
 347       ftprintf(fp
, T("<attribute name=\"%s\" value=\""), atts
[0]); 
 348       characterData(fp
, atts
[1], tcslen(atts
[1])); 
 349       if (atts 
>= specifiedAttsEnd
) 
 350         fputts(T("\" defaulted=\"yes\"/>\n"), fp
); 
 351       else if (atts 
== idAttPtr
) 
 352         fputts(T("\" id=\"yes\"/>\n"), fp
); 
 354         fputts(T("\"/>\n"), fp
); 
 355     } while (*(atts 
+= 2)); 
 356     fputts(T("</starttag>\n"), fp
); 
 359     fputts(T("/>\n"), fp
); 
 363 metaEndElement(void *userData
, const XML_Char 
*name
) 
 365   XML_Parser parser 
= (XML_Parser
) userData
; 
 366   FILE *fp 
= XML_GetUserData(parser
); 
 367   ftprintf(fp
, T("<endtag name=\"%s\""), name
); 
 368   metaLocation(parser
); 
 369   fputts(T("/>\n"), fp
); 
 373 metaProcessingInstruction(void *userData
, const XML_Char 
*target
, 
 374                           const XML_Char 
*data
) 
 376   XML_Parser parser 
= (XML_Parser
) userData
; 
 377   FILE *fp 
= XML_GetUserData(parser
); 
 378   ftprintf(fp
, T("<pi target=\"%s\" data=\""), target
); 
 379   characterData(fp
, data
, tcslen(data
)); 
 381   metaLocation(parser
); 
 382   fputts(T("/>\n"), fp
); 
 386 metaComment(void *userData
, const XML_Char 
*data
) 
 388   XML_Parser parser 
= (XML_Parser
) userData
; 
 389   FILE *fp 
= XML_GetUserData(parser
); 
 390   fputts(T("<comment data=\""), fp
); 
 391   characterData(fp
, data
, tcslen(data
)); 
 393   metaLocation(parser
); 
 394   fputts(T("/>\n"), fp
); 
 398 metaStartCdataSection(void *userData
) 
 400   XML_Parser parser 
= (XML_Parser
) userData
; 
 401   FILE *fp 
= XML_GetUserData(parser
); 
 402   fputts(T("<startcdata"), fp
); 
 403   metaLocation(parser
); 
 404   fputts(T("/>\n"), fp
); 
 408 metaEndCdataSection(void *userData
) 
 410   XML_Parser parser 
= (XML_Parser
) userData
; 
 411   FILE *fp 
= XML_GetUserData(parser
); 
 412   fputts(T("<endcdata"), fp
); 
 413   metaLocation(parser
); 
 414   fputts(T("/>\n"), fp
); 
 418 metaCharacterData(void *userData
, const XML_Char 
*s
, int len
) 
 420   XML_Parser parser 
= (XML_Parser
) userData
; 
 421   FILE *fp 
= XML_GetUserData(parser
); 
 422   fputts(T("<chars str=\""), fp
); 
 423   characterData(fp
, s
, len
); 
 425   metaLocation(parser
); 
 426   fputts(T("/>\n"), fp
); 
 430 metaStartDoctypeDecl(void *userData
, 
 431                      const XML_Char 
*doctypeName
, 
 432                      const XML_Char 
*sysid
, 
 433                      const XML_Char 
*pubid
, 
 434                      int has_internal_subset
) 
 436   XML_Parser parser 
= (XML_Parser
) userData
; 
 437   FILE *fp 
= XML_GetUserData(parser
); 
 438   ftprintf(fp
, T("<startdoctype name=\"%s\""), doctypeName
); 
 439   metaLocation(parser
); 
 440   fputts(T("/>\n"), fp
); 
 444 metaEndDoctypeDecl(void *userData
) 
 446   XML_Parser parser 
= (XML_Parser
) userData
; 
 447   FILE *fp 
= XML_GetUserData(parser
); 
 448   fputts(T("<enddoctype"), fp
); 
 449   metaLocation(parser
); 
 450   fputts(T("/>\n"), fp
); 
 454 metaNotationDecl(void *userData
, 
 455                  const XML_Char 
*notationName
, 
 456                  const XML_Char 
*base
, 
 457                  const XML_Char 
*systemId
, 
 458                  const XML_Char 
*publicId
) 
 460   XML_Parser parser 
= (XML_Parser
) userData
; 
 461   FILE *fp 
= XML_GetUserData(parser
); 
 462   ftprintf(fp
, T("<notation name=\"%s\""), notationName
); 
 464     ftprintf(fp
, T(" public=\"%s\""), publicId
); 
 466     fputts(T(" system=\""), fp
); 
 467     characterData(fp
, systemId
, tcslen(systemId
)); 
 470   metaLocation(parser
); 
 471   fputts(T("/>\n"), fp
); 
 476 metaEntityDecl(void *userData
, 
 477                const XML_Char 
*entityName
, 
 479                const XML_Char 
*value
, 
 481                const XML_Char 
*base
, 
 482                const XML_Char 
*systemId
, 
 483                const XML_Char 
*publicId
, 
 484                const XML_Char 
*notationName
) 
 486   XML_Parser parser 
= (XML_Parser
) userData
; 
 487   FILE *fp 
= XML_GetUserData(parser
); 
 490     ftprintf(fp
, T("<entity name=\"%s\""), entityName
); 
 491     metaLocation(parser
); 
 493     characterData(fp
, value
, value_length
); 
 494     fputts(T("</entity/>\n"), fp
); 
 496   else if (notationName
) { 
 497     ftprintf(fp
, T("<entity name=\"%s\""), entityName
); 
 499       ftprintf(fp
, T(" public=\"%s\""), publicId
); 
 500     fputts(T(" system=\""), fp
); 
 501     characterData(fp
, systemId
, tcslen(systemId
)); 
 503     ftprintf(fp
, T(" notation=\"%s\""), notationName
); 
 504     metaLocation(parser
); 
 505     fputts(T("/>\n"), fp
); 
 508     ftprintf(fp
, T("<entity name=\"%s\""), entityName
); 
 510       ftprintf(fp
, T(" public=\"%s\""), publicId
); 
 511     fputts(T(" system=\""), fp
); 
 512     characterData(fp
, systemId
, tcslen(systemId
)); 
 514     metaLocation(parser
); 
 515     fputts(T("/>\n"), fp
); 
 520 metaStartNamespaceDecl(void *userData
, 
 521                        const XML_Char 
*prefix
, 
 524   XML_Parser parser 
= (XML_Parser
) userData
; 
 525   FILE *fp 
= XML_GetUserData(parser
); 
 526   fputts(T("<startns"), fp
); 
 528     ftprintf(fp
, T(" prefix=\"%s\""), prefix
); 
 530     fputts(T(" ns=\""), fp
); 
 531     characterData(fp
, uri
, tcslen(uri
)); 
 532     fputts(T("\"/>\n"), fp
); 
 535     fputts(T("/>\n"), fp
); 
 539 metaEndNamespaceDecl(void *userData
, const XML_Char 
*prefix
) 
 541   XML_Parser parser 
= (XML_Parser
) userData
; 
 542   FILE *fp 
= XML_GetUserData(parser
); 
 544     fputts(T("<endns/>\n"), fp
); 
 546     ftprintf(fp
, T("<endns prefix=\"%s\"/>\n"), prefix
); 
 550 unknownEncodingConvert(void *data
, const char *p
) 
 552   return codepageConvert(*(int *)data
, p
); 
 556 unknownEncoding(void *userData
, const XML_Char 
*name
, XML_Encoding 
*info
) 
 559   static const XML_Char prefixL
[] = T("windows-"); 
 560   static const XML_Char prefixU
[] = T("WINDOWS-"); 
 563   for (i 
= 0; prefixU
[i
]; i
++) 
 564     if (name
[i
] != prefixU
[i
] && name
[i
] != prefixL
[i
]) 
 568   for (; name
[i
]; i
++) { 
 569     static const XML_Char digits
[] = T("0123456789"); 
 570     const XML_Char 
*s 
= tcschr(digits
, name
[i
]); 
 578   if (!codepageMap(cp
, info
->map
)) 
 580   info
->convert 
= unknownEncodingConvert
; 
 581   /* We could just cast the code page integer to a void *, 
 582   and avoid the use of release. */ 
 583   info
->release 
= free
; 
 584   info
->data 
= malloc(sizeof(int)); 
 587   *(int *)info
->data 
= cp
; 
 592 notStandalone(void *userData
) 
 598 showVersion(XML_Char 
*prog
) 
 602   const XML_Feature 
*features 
= XML_GetFeatureList(); 
 603   while ((ch 
= *s
) != 0) { 
 612   ftprintf(stdout
, T("%s using %s\n"), prog
, XML_ExpatVersion()); 
 613   if (features 
!= NULL 
&& features
[0].feature 
!= XML_FEATURE_END
) { 
 615     ftprintf(stdout
, T("%s"), features
[0].name
); 
 616     if (features
[0].value
) 
 617       ftprintf(stdout
, T("=%ld"), features
[0].value
); 
 618     while (features
[i
].feature 
!= XML_FEATURE_END
) { 
 619       ftprintf(stdout
, T(", %s"), features
[i
].name
); 
 620       if (features
[i
].value
) 
 621         ftprintf(stdout
, T("=%ld"), features
[i
].value
); 
 624     ftprintf(stdout
, T("\n")); 
 629 usage(const XML_Char 
*prog
, int rc
) 
 632            T("usage: %s [-n] [-p] [-r] [-s] [-w] [-x] [-d output-dir] " 
 633              "[-e encoding] file ...\n"), prog
); 
 638 tmain(int argc
, XML_Char 
**argv
) 
 641   const XML_Char 
*outputDir 
= NULL
; 
 642   const XML_Char 
*encoding 
= NULL
; 
 643   unsigned processFlags 
= XML_MAP_FILE
; 
 644   int windowsCodePages 
= 0; 
 646   int useNamespaces 
= 0; 
 647   int requireStandalone 
= 0; 
 648   int paramEntityParsing 
= XML_PARAM_ENTITY_PARSING_NEVER
; 
 652   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF
|_CRTDBG_LEAK_CHECK_DF
); 
 659       if (argv
[i
][0] != T('-')) 
 661       if (argv
[i
][1] == T('-') && argv
[i
][2] == T('\0')) { 
 667     switch (argv
[i
][j
]) { 
 669       processFlags 
&= ~XML_MAP_FILE
; 
 673       requireStandalone 
= 1; 
 681       paramEntityParsing 
= XML_PARAM_ENTITY_PARSING_ALWAYS
; 
 684       processFlags 
|= XML_EXTERNAL_ENTITIES
; 
 688       windowsCodePages 
= 1; 
 705       if (argv
[i
][j 
+ 1] == T('\0')) { 
 711         outputDir 
= argv
[i
] + j 
+ 1; 
 716       if (argv
[i
][j 
+ 1] == T('\0')) { 
 722         encoding 
= argv
[i
] + j 
+ 1; 
 730       showVersion(argv
[0]); 
 745     processFlags 
&= ~XML_MAP_FILE
; 
 748   for (; i 
< argc
; i
++) { 
 750     XML_Char 
*outName 
= 0; 
 754       parser 
= XML_ParserCreateNS(encoding
, NSSEP
); 
 756       parser 
= XML_ParserCreate(encoding
); 
 757     if (requireStandalone
) 
 758       XML_SetNotStandaloneHandler(parser
, notStandalone
); 
 759     XML_SetParamEntityParsing(parser
, paramEntityParsing
); 
 760     if (outputType 
== 't') { 
 761       /* This is for doing timings; this gives a more realistic estimate of 
 764       XML_SetElementHandler(parser
, nopStartElement
, nopEndElement
); 
 765       XML_SetCharacterDataHandler(parser
, nopCharacterData
); 
 766       XML_SetProcessingInstructionHandler(parser
, nopProcessingInstruction
); 
 768     else if (outputDir
) { 
 769       const XML_Char 
*file 
= useStdin 
? T("STDIN") : argv
[i
]; 
 770       if (tcsrchr(file
, T('/'))) 
 771         file 
= tcsrchr(file
, T('/')) + 1; 
 773       if (tcsrchr(file
, T('\\'))) 
 774         file 
= tcsrchr(file
, T('\\')) + 1; 
 776       outName 
= malloc((tcslen(outputDir
) + tcslen(file
) + 2) 
 778       tcscpy(outName
, outputDir
); 
 779       tcscat(outName
, T("/")); 
 780       tcscat(outName
, file
); 
 781       fp 
= tfopen(outName
, T("wb")); 
 786       setvbuf(fp
, NULL
, _IOFBF
, 16384); 
 790       XML_SetUserData(parser
, fp
); 
 791       switch (outputType
) { 
 793         XML_UseParserAsHandlerArg(parser
); 
 794         XML_SetElementHandler(parser
, metaStartElement
, metaEndElement
); 
 795         XML_SetProcessingInstructionHandler(parser
, metaProcessingInstruction
); 
 796         XML_SetCommentHandler(parser
, metaComment
); 
 797         XML_SetCdataSectionHandler(parser
, metaStartCdataSection
, 
 798                                    metaEndCdataSection
); 
 799         XML_SetCharacterDataHandler(parser
, metaCharacterData
); 
 800         XML_SetDoctypeDeclHandler(parser
, metaStartDoctypeDecl
, 
 802         XML_SetEntityDeclHandler(parser
, metaEntityDecl
); 
 803         XML_SetNotationDeclHandler(parser
, metaNotationDecl
); 
 804         XML_SetNamespaceDeclHandler(parser
, metaStartNamespaceDecl
, 
 805                                     metaEndNamespaceDecl
); 
 806         metaStartDocument(parser
); 
 809         XML_UseParserAsHandlerArg(parser
); 
 810         XML_SetDefaultHandler(parser
, markup
); 
 811         XML_SetElementHandler(parser
, defaultStartElement
, defaultEndElement
); 
 812         XML_SetCharacterDataHandler(parser
, defaultCharacterData
); 
 813         XML_SetProcessingInstructionHandler(parser
, 
 814                                             defaultProcessingInstruction
); 
 818           XML_SetElementHandler(parser
, startElementNS
, endElementNS
); 
 820           XML_SetElementHandler(parser
, startElement
, endElement
); 
 821         XML_SetCharacterDataHandler(parser
, characterData
); 
 823         XML_SetProcessingInstructionHandler(parser
, processingInstruction
); 
 824 #endif /* not W3C14N */ 
 828     if (windowsCodePages
) 
 829       XML_SetUnknownEncodingHandler(parser
, unknownEncoding
, 0); 
 830     result 
= XML_ProcessFile(parser
, useStdin 
? NULL 
: argv
[i
], processFlags
); 
 832       if (outputType 
== 'm') 
 833         metaEndDocument(parser
); 
 839     XML_ParserFree(parser
);