[apple/icu.git] / icuSources / tools / escapesrc / escapesrc.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include <stdio.h>
#include <string>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#include <fstream>

// We only use U8_* macros, which are entirely inline.
#include "unicode/utf8.h"

// This contains a codepage and ISO 14882:1998 illegality table.
// Use "make gen-table" to rebuild it.
#include "cptbl.h"

/**
 * What is this?
 *
 * "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code
 * in utf-8 into something consumable by certain compilers (Solaris, xlC)
 * which aren't quite standards compliant.
 *
 * - u"<unicode>" or u'<unicode>' gets converted to u"\uNNNN" or u'\uNNNN'
 * - u8"<unicode>" gets converted to "\xAA\xBB\xCC\xDD" etc.
 *   (some compilers do not support the u8 prefix correctly.)
 * - if the system is EBCDIC-based, that is used to correct the input characters.
 *
 * Usage:
 *   escapesrc infile.cpp outfile.cpp
 * Normally this is invoked by the build stage, with a rule such as:
 *
 * _%.cpp: $(srcdir)/%.cpp
 *       @$(BINDIR)/escapesrc$(EXEEXT) $< $@
 * %.o: _%.cpp
 *       $(COMPILE.cc) ... $@ $<
 *
 * In the Makefiles, SKIP_ESCAPING=YES is used to prevent escapesrc.cpp 
 * from being itself escaped.
 */


static const char
  kSPACE   = 0x20,
  kTAB     = 0x09,
  kLF      = 0x0A,
  kCR      = 0x0D;

// For convenience
# define cp1047_to_8859(c) cp1047_8859_1[c]

// Our app's name
std::string prog;

/**
 * Give the usual 1-line documentation and exit
 */
void usage() {
  fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str());
}

/**
 * Delete the output file (if any)
 * We want to delete even if we didn't generate, because it might be stale.
 */
int cleanup(const std::string &outfile) {
  const char *outstr = outfile.c_str();
  if(outstr && *outstr) {
    int rc = std::remove(outstr);
    if(rc == 0) {
      fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr);
      return 0;
    } else {
      if( errno == ENOENT ) {
        return 0; // File did not exist - no error.
      } else {
        perror("std::remove");
        return 1;
      }
    }
  }
  return 0;
}

/**
 * Skip across any known whitespace.
 * @param p startpoint
 * @param e limit
 * @return first non-whitespace char
 */
inline const char *skipws(const char *p, const char *e) {
  for(;p<e;p++) {
    switch(*p) {
    case kSPACE:
    case kTAB:
    case kLF:
    case kCR:
      break;
    default:
      return p; // non ws
    }
  }
  return p;
}

/**
 * Append a byte, hex encoded
 * @param outstr sstring to append to
 * @param byte the byte to append
 */
void appendByte(std::string &outstr,
                uint8_t byte) {
    char tmp2[5];
    sprintf(tmp2, "\\x%02X", 0xFF & (int)(byte));
    outstr += tmp2;
}

/**
 * Append the bytes from 'linestr' into outstr, with escaping
 * @param outstr the output buffer
 * @param linestr the input buffer
 * @param pos in/out: the current char under consideration
 * @param chars the number of chars to consider
 * @return true on failure
 */
bool appendUtf8(std::string &outstr,
                const std::string &linestr,
                size_t &pos,
                size_t chars) {
  char tmp[9];
  for(size_t i=0;i<chars;i++) {
    tmp[i] = linestr[++pos];
  }
  tmp[chars] = 0;
  unsigned int c;
  sscanf(tmp, "%X", &c);
  UChar32 ch = c & 0x1FFFFF; 

  // now to append \\x%% etc
  uint8_t bytesNeeded = U8_LENGTH(ch);
  if(bytesNeeded == 0) {
    fprintf(stderr, "Illegal code point U+%X\n", ch);
    return true;
  }
  uint8_t bytes[4];
  uint8_t *s = bytes;
  size_t i = 0;
  U8_APPEND_UNSAFE(s, i, ch);
  for(size_t t = 0; t<i; t++) {
    appendByte(outstr, s[t]);
  }
  return false;
}

/**
 * Fixup u8"x"
 * @param linestr string to mutate. Already escaped into \u format.
 * @param origpos beginning, points to 'u8"'
 * @param pos end, points to "
 * @return false for no-problem, true for failure!
 */
bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) {
  size_t pos = origpos + 3;
  std::string outstr;
  outstr += '\"'; // local encoding
  for(;pos<endpos;pos++) {
    char c = linestr[pos];
    if(c == '\\') {
      char c2 = linestr[++pos];
      switch(c2) {
      case '\'':
      case '"':
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
        c2 = cp1047_to_8859(c2);
#endif
        appendByte(outstr, c2);
        break;
      case 'u':
        appendUtf8(outstr, linestr, pos, 4);
        break;
      case 'U':
        appendUtf8(outstr, linestr, pos, 8);
        break;
      }
    } else {
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
      c = cp1047_to_8859(c);
#endif
      appendByte(outstr, c);
    }
  }
  outstr += ('\"');

  linestr.replace(origpos, (endpos-origpos+1), outstr);
  
  return false; // OK
}

/**
 * fix the u"x"/u'x'/u8"x" string at the position
 * u8'x' is not supported, sorry.
 * @param linestr the input string
 * @param pos the position
 * @return false = no err, true = had err
 */
bool fixAt(std::string &linestr, size_t pos) {
  size_t origpos = pos;
  
  if(linestr[pos] != 'u') {
    fprintf(stderr, "Not a 'u'?");
    return true;
  }

  pos++; // past 'u'

  bool utf8 = false;
  
  if(linestr[pos] == '8') { // u8"
    utf8 = true;
    pos++;
  }
  
  char quote = linestr[pos];

  if(quote != '\'' && quote != '\"') {
    fprintf(stderr, "Quote is '%c' - not sure what to do.\n", quote);
    return true;
  }

  if(quote == '\'' && utf8) {
    fprintf(stderr, "Cannot do u8'...'\n");
    return true;
  }

  pos ++;

  //printf("u%c…%c\n", quote, quote);

  for(; pos < linestr.size(); pos++) {
    if(linestr[pos] == quote) {
      if(utf8) {
        return fixu8(linestr, origpos, pos); // fix u8"..."
      } else {
        return false; // end of quote
      }
    }
    if(linestr[pos] == '\\') {
      pos++;
      if(linestr[pos] == quote) continue; // quoted quote
      if(linestr[pos] == 'u') continue; // for now ... unicode escape
      if(linestr[pos] == '\\') continue;
      // some other escape… ignore
    } else {
      size_t old_pos = pos;
      int32_t i = pos;
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
      // mogrify 1-4 bytes from 1047 'back' to utf-8
      char old_byte = linestr[pos];
      linestr[pos] = cp1047_to_8859(linestr[pos]);
      // how many more?
      int32_t trail = U8_COUNT_TRAIL_BYTES(linestr[pos]);
      for(size_t pos2 = pos+1; trail>0; pos2++,trail--) {
        linestr[pos2] = cp1047_to_8859(linestr[pos2]);
        if(linestr[pos2] == 0x0A) {
          linestr[pos2] = 0x85; // NL is ambiguous here
        }
      }
#endif
      
      // Proceed to decode utf-8
      const uint8_t *s = (const uint8_t*) (linestr.c_str());
      int32_t length = linestr.size();
      UChar32 c;
      if(U8_IS_SINGLE((uint8_t)s[i]) && oldIllegal[s[i]]) {
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
        linestr[pos] = old_byte; // put it back
#endif
        continue; // single code point not previously legal for \u escaping
      }

      // otherwise, convert it to \u / \U
      {
        U8_NEXT(s, i, length, c);
      }
      if(c<0) {
        fprintf(stderr, "Illegal utf-8 sequence at Column: %d\n", (int)old_pos);
        fprintf(stderr, "Line: >>%s<<\n", linestr.c_str());
        return true;
      }

      size_t seqLen = (i-pos);

      //printf("U+%04X pos %d [len %d]\n", c, pos, seqLen);fflush(stdout);

      char newSeq[20];
      if( c <= 0xFFFF) {
        sprintf(newSeq, "\\u%04X", c);
      } else {
        sprintf(newSeq, "\\U%08X", c);
      }
      linestr.replace(pos, seqLen, newSeq);
      pos += strlen(newSeq) - 1;
    }
  }

  return false;
}

/**
 * Fixup an entire line
 * false = no err
 * true = had err
 * @param no the line number (not used)
 * @param linestr the string to fix
 * @return true if any err, else false
 */
bool fixLine(int /*no*/, std::string &linestr) {
  const char *line = linestr.c_str();
  size_t len = linestr.size();

  // no u' in the line?
  if(!strstr(line, "u'") && !strstr(line, "u\"") && !strstr(line, "u8\"")) {
    return false; // Nothing to do. No u' or u" detected
  }

  // start from the end and find all u" cases
  size_t pos = len = linestr.size();
  if(len>INT32_MAX/2) {
    return true;
  }
  while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) {
    //printf("found doublequote at %d\n", pos);
    if(fixAt(linestr, pos)) return true;
    if(pos == 0) break;
    pos--;
  }

  // reset and find all u' cases
  pos = len = linestr.size();
  while((pos>0) && (pos = linestr.rfind("u'", pos)) != std::string::npos) {
    //printf("found singlequote at %d\n", pos);
    if(fixAt(linestr, pos)) return true;
    if(pos == 0) break;
    pos--;
  }

  // reset and find all u8" cases
  pos = len = linestr.size();
  while((pos>0) && (pos = linestr.rfind("u8\"", pos)) != std::string::npos) {
    if(fixAt(linestr, pos)) return true;
    if(pos == 0) break;
    pos--;
  }

  //fprintf(stderr, "%d - fixed\n", no);
  return false;
}

/**
 * Convert a whole file
 * @param infile
 * @param outfile
 * @return 1 on err, 0 otherwise
 */
int convert(const std::string &infile, const std::string &outfile) {
  fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str());

  std::ifstream inf;
  
  inf.open(infile.c_str(), std::ios::in);

  if(!inf.is_open()) {
    fprintf(stderr, "%s: could not open input file %s\n", prog.c_str(), infile.c_str());
    cleanup(outfile);
    return 1;
  }

  std::ofstream outf;

  outf.open(outfile.c_str(), std::ios::out);

  if(!outf.is_open()) {
    fprintf(stderr, "%s: could not open output file %s\n", prog.c_str(), outfile.c_str());
    return 1;
  }

  // TODO: any platform variations of #line?
  outf << "#line 1 \"" << infile << "\"" << '\n';

  int no = 0;
  std::string linestr;
  while( getline( inf, linestr)) {
    no++;
    if(fixLine(no, linestr)) {
      goto fail;
    }
    outf << linestr << '\n';
  }

  if(inf.eof()) {
    return 0;
  }
fail:
  outf.close();
  fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str());
  cleanup(outfile);
  return 1;
}

/**
 * Main function
 */
int main(int argc, const char *argv[]) {
  prog = argv[0];

  if(argc != 3) {
    usage();
    return 1;
  }

  std::string infile = argv[1];
  std::string outfile = argv[2];

  return convert(infile, outfile);
}
Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3
	4	#include <stdio.h>
	5	#include <string>
	6	#include <stdlib.h>
f3c0d7a5 A	7	#include <errno.h>
	8	#include <string.h>
	9	#include <iostream>
	10	#include <fstream>
	11
0f5d89e8	12	// We only use U8_* macros, which are entirely inline.
f3c0d7a5 A	13	#include "unicode/utf8.h"
f3c0d7a5 A	14
0f5d89e8 A	15	// This contains a codepage and ISO 14882:1998 illegality table.
	16	// Use "make gen-table" to rebuild it.
	17	#include "cptbl.h"
	18
	19	/**
	20	* What is this?
	21	*
	22	* "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code
	23	* in utf-8 into something consumable by certain compilers (Solaris, xlC)
	24	* which aren't quite standards compliant.
	25	*
	26	* - u"<unicode>" or u'<unicode>' gets converted to u"\uNNNN" or u'\uNNNN'
	27	* - u8"<unicode>" gets converted to "\xAA\xBB\xCC\xDD" etc.
	28	* (some compilers do not support the u8 prefix correctly.)
	29	* - if the system is EBCDIC-based, that is used to correct the input characters.
	30	*
	31	* Usage:
	32	* escapesrc infile.cpp outfile.cpp
	33	* Normally this is invoked by the build stage, with a rule such as:
	34	*
	35	* _%.cpp: $(srcdir)/%.cpp
	36	* @$(BINDIR)/escapesrc$(EXEEXT) $< $@
	37	* %.o: _%.cpp
	38	* $(COMPILE.cc) ... $@ $<
	39	*
	40	* In the Makefiles, SKIP_ESCAPING=YES is used to prevent escapesrc.cpp
	41	* from being itself escaped.
	42	*/
	43
	44
f3c0d7a5 A	45	static const char
	46	kSPACE = 0x20,
	47	kTAB = 0x09,
	48	kLF = 0x0A,
0f5d89e8	49	kCR = 0x0D;
f3c0d7a5	50
0f5d89e8	51	// For convenience
f3c0d7a5 A	52	# define cp1047_to_8859(c) cp1047_8859_1[c]
f3c0d7a5 A	53
0f5d89e8	54	// Our app's name
f3c0d7a5 A	55	std::string prog;
f3c0d7a5 A	56
0f5d89e8 A	57	/**
	58	* Give the usual 1-line documentation and exit
	59	*/
f3c0d7a5 A	60	void usage() {
	61	fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str());
	62	}
	63
0f5d89e8 A	64	/**
	65	* Delete the output file (if any)
	66	* We want to delete even if we didn't generate, because it might be stale.
	67	*/
f3c0d7a5 A	68	int cleanup(const std::string &outfile) {
	69	const char *outstr = outfile.c_str();
	70	if(outstr && *outstr) {
0f5d89e8	71	int rc = std::remove(outstr);
f3c0d7a5 A	72	if(rc == 0) {
	73	fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr);
	74	return 0;
	75	} else {
	76	if( errno == ENOENT ) {
	77	return 0; // File did not exist - no error.
	78	} else {
0f5d89e8	79	perror("std::remove");
f3c0d7a5 A	80	return 1;
	81	}
	82	}
	83	}
	84	return 0;
	85	}
	86
0f5d89e8 A	87	/**
	88	* Skip across any known whitespace.
	89	* @param p startpoint
	90	* @param e limit
	91	* @return first non-whitespace char
	92	*/
f3c0d7a5 A	93	inline const char skipws(const char p, const char *e) {
	94	for(;p<e;p++) {
	95	switch(*p) {
	96	case kSPACE:
	97	case kTAB:
	98	case kLF:
	99	case kCR:
	100	break;
	101	default:
	102	return p; // non ws
	103	}
	104	}
	105	return p;
	106	}
	107
0f5d89e8 A	108	/**
	109	* Append a byte, hex encoded
	110	* @param outstr sstring to append to
	111	* @param byte the byte to append
	112	*/
f3c0d7a5 A	113	void appendByte(std::string &outstr,
	114	uint8_t byte) {
	115	char tmp2[5];
	116	sprintf(tmp2, "\\x%02X", 0xFF & (int)(byte));
	117	outstr += tmp2;
	118	}
	119
	120	/**
0f5d89e8 A	121	* Append the bytes from 'linestr' into outstr, with escaping
	122	* @param outstr the output buffer
	123	* @param linestr the input buffer
	124	* @param pos in/out: the current char under consideration
	125	* @param chars the number of chars to consider
f3c0d7a5 A	126	* @return true on failure
	127	*/
	128	bool appendUtf8(std::string &outstr,
	129	const std::string &linestr,
	130	size_t &pos,
	131	size_t chars) {
	132	char tmp[9];
	133	for(size_t i=0;i<chars;i++) {
	134	tmp[i] = linestr[++pos];
	135	}
	136	tmp[chars] = 0;
	137	unsigned int c;
	138	sscanf(tmp, "%X", &c);
	139	UChar32 ch = c & 0x1FFFFF;
	140
	141	// now to append \\x%% etc
	142	uint8_t bytesNeeded = U8_LENGTH(ch);
	143	if(bytesNeeded == 0) {
	144	fprintf(stderr, "Illegal code point U+%X\n", ch);
	145	return true;
	146	}
	147	uint8_t bytes[4];
	148	uint8_t *s = bytes;
	149	size_t i = 0;
	150	U8_APPEND_UNSAFE(s, i, ch);
	151	for(size_t t = 0; t<i; t++) {
	152	appendByte(outstr, s[t]);
	153	}
	154	return false;
	155	}
	156
	157	/**
0f5d89e8	158	* Fixup u8"x"
f3c0d7a5 A	159	* @param linestr string to mutate. Already escaped into \u format.
	160	* @param origpos beginning, points to 'u8"'
	161	* @param pos end, points to "
	162	* @return false for no-problem, true for failure!
	163	*/
	164	bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) {
	165	size_t pos = origpos + 3;
	166	std::string outstr;
	167	outstr += '\"'; // local encoding
	168	for(;pos<endpos;pos++) {
	169	char c = linestr[pos];
	170	if(c == '\\') {
	171	char c2 = linestr[++pos];
	172	switch(c2) {
	173	case '\'':
	174	case '"':
	175	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
	176	c2 = cp1047_to_8859(c2);
	177	#endif
	178	appendByte(outstr, c2);
	179	break;
	180	case 'u':
	181	appendUtf8(outstr, linestr, pos, 4);
	182	break;
	183	case 'U':
	184	appendUtf8(outstr, linestr, pos, 8);
	185	break;
	186	}
	187	} else {
	188	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
	189	c = cp1047_to_8859(c);
	190	#endif
	191	appendByte(outstr, c);
	192	}
	193	}
	194	outstr += ('\"');
	195
	196	linestr.replace(origpos, (endpos-origpos+1), outstr);
	197
	198	return false; // OK
	199	}
	200
	201	/**
0f5d89e8 A	202	* fix the u"x"/u'x'/u8"x" string at the position
	203	* u8'x' is not supported, sorry.
	204	* @param linestr the input string
	205	* @param pos the position
	206	* @return false = no err, true = had err
f3c0d7a5 A	207	*/
	208	bool fixAt(std::string &linestr, size_t pos) {
	209	size_t origpos = pos;
	210
	211	if(linestr[pos] != 'u') {
	212	fprintf(stderr, "Not a 'u'?");
	213	return true;
	214	}
	215
	216	pos++; // past 'u'
	217
	218	bool utf8 = false;
	219
	220	if(linestr[pos] == '8') { // u8"
	221	utf8 = true;
	222	pos++;
	223	}
	224
	225	char quote = linestr[pos];
	226
	227	if(quote != '\'' && quote != '\"') {
	228	fprintf(stderr, "Quote is '%c' - not sure what to do.\n", quote);
	229	return true;
	230	}
	231
	232	if(quote == '\'' && utf8) {
	233	fprintf(stderr, "Cannot do u8'...'\n");
	234	return true;
	235	}
	236
	237	pos ++;
	238
	239	//printf("u%c…%c\n", quote, quote);
	240
	241	for(; pos < linestr.size(); pos++) {
	242	if(linestr[pos] == quote) {
	243	if(utf8) {
	244	return fixu8(linestr, origpos, pos); // fix u8"..."
	245	} else {
	246	return false; // end of quote
	247	}
	248	}
	249	if(linestr[pos] == '\\') {
	250	pos++;
	251	if(linestr[pos] == quote) continue; // quoted quote
	252	if(linestr[pos] == 'u') continue; // for now ... unicode escape
	253	if(linestr[pos] == '\\') continue;
	254	// some other escape… ignore
	255	} else {
	256	size_t old_pos = pos;
	257	int32_t i = pos;
	258	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
	259	// mogrify 1-4 bytes from 1047 'back' to utf-8
	260	char old_byte = linestr[pos];
	261	linestr[pos] = cp1047_to_8859(linestr[pos]);
	262	// how many more?
	263	int32_t trail = U8_COUNT_TRAIL_BYTES(linestr[pos]);
	264	for(size_t pos2 = pos+1; trail>0; pos2++,trail--) {
	265	linestr[pos2] = cp1047_to_8859(linestr[pos2]);
	266	if(linestr[pos2] == 0x0A) {
	267	linestr[pos2] = 0x85; // NL is ambiguous here
	268	}
	269	}
	270	#endif
271
272	// Proceed to decode utf-8
273	const uint8_t s = (const uint8_t) (linestr.c_str());
274	int32_t length = linestr.size();
275	UChar32 c;
276	if(U8_IS_SINGLE((uint8_t)s[i]) && oldIllegal[s[i]]) {
277	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
278	linestr[pos] = old_byte; // put it back
279	#endif
280	continue; // single code point not previously legal for \u escaping
281	}
282
283	// otherwise, convert it to \u / \U
284	{
285	U8_NEXT(s, i, length, c);
286	}
287	if(c<0) {
0f5d89e8	288	fprintf(stderr, "Illegal utf-8 sequence at Column: %d\n", (int)old_pos);
f3c0d7a5 A	289	fprintf(stderr, "Line: >>%s<<\n", linestr.c_str());
	290	return true;
	291	}
	292
	293	size_t seqLen = (i-pos);
	294
	295	//printf("U+%04X pos %d [len %d]\n", c, pos, seqLen);fflush(stdout);
	296
	297	char newSeq[20];
	298	if( c <= 0xFFFF) {
	299	sprintf(newSeq, "\\u%04X", c);
	300	} else {
	301	sprintf(newSeq, "\\U%08X", c);
	302	}
	303	linestr.replace(pos, seqLen, newSeq);
	304	pos += strlen(newSeq) - 1;
	305	}
	306	}
	307
	308	return false;
	309	}
	310
	311	/**
0f5d89e8	312	* Fixup an entire line
f3c0d7a5 A	313	* false = no err
f3c0d7a5 A	314	* true = had err
0f5d89e8 A	315	* @param no the line number (not used)
	316	* @param linestr the string to fix
	317	* @return true if any err, else false
f3c0d7a5 A	318	*/
	319	bool fixLine(int /no/, std::string &linestr) {
	320	const char *line = linestr.c_str();
	321	size_t len = linestr.size();
	322
	323	// no u' in the line?
	324	if(!strstr(line, "u'") && !strstr(line, "u\"") && !strstr(line, "u8\"")) {
	325	return false; // Nothing to do. No u' or u" detected
	326	}
	327
f3c0d7a5 A	328	// start from the end and find all u" cases
f3c0d7a5 A	329	size_t pos = len = linestr.size();
3d1f044b A	330	if(len>INT32_MAX/2) {
	331	return true;
	332	}
f3c0d7a5 A	333	while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) {
	334	//printf("found doublequote at %d\n", pos);
	335	if(fixAt(linestr, pos)) return true;
	336	if(pos == 0) break;
	337	pos--;
	338	}
	339
	340	// reset and find all u' cases
	341	pos = len = linestr.size();
	342	while((pos>0) && (pos = linestr.rfind("u'", pos)) != std::string::npos) {
	343	//printf("found singlequote at %d\n", pos);
	344	if(fixAt(linestr, pos)) return true;
	345	if(pos == 0) break;
	346	pos--;
	347	}
	348
	349	// reset and find all u8" cases
	350	pos = len = linestr.size();
	351	while((pos>0) && (pos = linestr.rfind("u8\"", pos)) != std::string::npos) {
	352	if(fixAt(linestr, pos)) return true;
	353	if(pos == 0) break;
	354	pos--;
	355	}
	356
	357	//fprintf(stderr, "%d - fixed\n", no);
	358	return false;
	359	}
	360
0f5d89e8 A	361	/**
	362	* Convert a whole file
	363	* @param infile
	364	* @param outfile
	365	* @return 1 on err, 0 otherwise
	366	*/
f3c0d7a5 A	367	int convert(const std::string &infile, const std::string &outfile) {
	368	fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str());
	369
	370	std::ifstream inf;
	371
	372	inf.open(infile.c_str(), std::ios::in);
	373
	374	if(!inf.is_open()) {
	375	fprintf(stderr, "%s: could not open input file %s\n", prog.c_str(), infile.c_str());
	376	cleanup(outfile);
	377	return 1;
	378	}
	379
	380	std::ofstream outf;
	381
	382	outf.open(outfile.c_str(), std::ios::out);
	383
	384	if(!outf.is_open()) {
	385	fprintf(stderr, "%s: could not open output file %s\n", prog.c_str(), outfile.c_str());
	386	return 1;
	387	}
	388
	389	// TODO: any platform variations of #line?
	390	outf << "#line 1 \"" << infile << "\"" << '\n';
	391
	392	int no = 0;
	393	std::string linestr;
	394	while( getline( inf, linestr)) {
	395	no++;
	396	if(fixLine(no, linestr)) {
3d1f044b	397	goto fail;
f3c0d7a5 A	398	}
	399	outf << linestr << '\n';
	400	}
	401
3d1f044b A	402	if(inf.eof()) {
	403	return 0;
	404	}
	405	fail:
	406	outf.close();
	407	fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str());
	408	cleanup(outfile);
	409	return 1;
f3c0d7a5 A	410	}
f3c0d7a5 A	411
0f5d89e8 A	412	/**
	413	* Main function
	414	*/
f3c0d7a5 A	415	int main(int argc, const char *argv[]) {
	416	prog = argv[0];
	417
	418	if(argc != 3) {
	419	usage();
	420	return 1;
	421	}
	422
	423	std::string infile = argv[1];
	424	std::string outfile = argv[2];
	425
	426	return convert(infile, outfile);
	427	}