git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/intltest/tokiter.cpp

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	**********************************************************************
	5	* Copyright (c) 2004-2011, International Business Machines
	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	* Author: Alan Liu
	9	* Created: March 22 2004
	10	* Since: ICU 3.0
	11	**********************************************************************
	12	*/
	13	#include "tokiter.h"
	14	#include "textfile.h"
	15	#include "patternprops.h"
	16	#include "util.h"
	17	#include "uprops.h"
	18
	19	TokenIterator::TokenIterator(TextFile* r) {
	20	reader = r;
	21	done = haveLine = FALSE;
	22	pos = lastpos = -1;
	23	}
	24
	25	TokenIterator::~TokenIterator() {
	26	}
	27
	28	UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
	29	if (done \|\| U_FAILURE(ec)) {
	30	return FALSE;
	31	}
	32	token.truncate(0);
	33	for (;;) {
	34	if (!haveLine) {
	35	if (!reader->readLineSkippingComments(line, ec)) {
	36	done = TRUE;
	37	return FALSE;
	38	}
	39	haveLine = TRUE;
	40	pos = 0;
	41	}
	42	lastpos = pos;
	43	if (!nextToken(token, ec)) {
	44	haveLine = FALSE;
	45	if (U_FAILURE(ec)) return FALSE;
	46	continue;
	47	}
	48	return TRUE;
	49	}
	50	}
	51
	52	int32_t TokenIterator::getLineNumber() const {
	53	return reader->getLineNumber();
	54	}
	55
	56	/**
	57	* Read the next token from 'this->line' and append it to 'token'.
	58	* Tokens are separated by Pattern_White_Space. Tokens may also be
	59	* delimited by double or single quotes. The closing quote must match
	60	* the opening quote. If a '#' is encountered, the rest of the line
	61	* is ignored, unless it is backslash-escaped or within quotes.
	62	* @param token the token is appended to this StringBuffer
	63	* @param ec input-output error code
	64	* @return TRUE if a valid token is found, or FALSE if the end
	65	* of the line is reached or an error occurs
	66	*/
	67	UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
	68	ICU_Utility::skipWhitespace(line, pos, TRUE);
	69	if (pos == line.length()) {
	70	return FALSE;
	71	}
	72	UChar c = line.charAt(pos++);
	73	UChar quote = 0;
	74	switch (c) {
	75	case 34/'"'/:
	76	case 39/'\\'/:
	77	quote = c;
	78	break;
	79	case 35/'#'/:
	80	return FALSE;
	81	default:
	82	token.append(c);
	83	break;
	84	}
	85	while (pos < line.length()) {
	86	c = line.charAt(pos); // 16-bit ok
	87	if (c == 92/'\\'/) {
	88	UChar32 c32 = line.unescapeAt(pos);
	89	if (c32 < 0) {
	90	ec = U_MALFORMED_UNICODE_ESCAPE;
	91	return FALSE;
	92	}
	93	token.append(c32);
	94	} else if ((quote != 0 && c == quote) \|\|
	95	(quote == 0 && PatternProps::isWhiteSpace(c))) {
	96	++pos;
	97	return TRUE;
	98	} else if (quote == 0 && c == '#') {
	99	return TRUE; // do NOT increment
	100	} else {
	101	token.append(c);
	102	++pos;
	103	}
	104	}
	105	if (quote != 0) {
	106	ec = U_UNTERMINATED_QUOTE;
	107	return FALSE;
	108	}
	109	return TRUE;
	110	}