[apple/icu.git] / icuSources / test / intltest / tokiter.cpp

/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others.  All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: March 22 2004
* Since: ICU 3.0
**********************************************************************
*/
#include "tokiter.h"
#include "textfile.h"
#include "util.h"
#include "uprops.h"

TokenIterator::TokenIterator(TextFile* r) {
    reader = r;
    done = haveLine = FALSE;
    pos = lastpos = -1;
}

TokenIterator::~TokenIterator() {
}

UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
    if (done || U_FAILURE(ec)) {
        return FALSE;
    }
    token.truncate(0);
    for (;;) {
        if (!haveLine) {
            if (!reader->readLineSkippingComments(line, ec)) {
                done = TRUE;
                return FALSE;
            }
            haveLine = TRUE;
            pos = 0;
        }
        lastpos = pos;
        if (!nextToken(token, ec)) {
            haveLine = FALSE;
            if (U_FAILURE(ec)) return FALSE;
            continue;
        }
        return TRUE;
    }
}

int32_t TokenIterator::getLineNumber() const {
    return reader->getLineNumber();
}

/**
 * Read the next token from 'this->line' and append it to 'token'.
 * Tokens are separated by rule white space.  Tokens may also be
 * delimited by double or single quotes.  The closing quote must match
 * the opening quote.  If a '#' is encountered, the rest of the line
 * is ignored, unless it is backslash-escaped or within quotes.
 * @param token the token is appended to this StringBuffer
 * @param ec input-output error code
 * @return TRUE if a valid token is found, or FALSE if the end
 * of the line is reached or an error occurs
 */
UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
    ICU_Utility::skipWhitespace(line, pos, TRUE);
    if (pos == line.length()) {
        return FALSE;
    }
    UChar c = line.charAt(pos++);
    UChar quote = 0;
    switch (c) {
    case 34/*'"'*/:
    case 39/*'\\'*/:
        quote = c;
        break;
    case 35/*'#'*/:
        return FALSE;
    default:
        token.append(c);
        break;
    }
    while (pos < line.length()) {
        c = line.charAt(pos); // 16-bit ok
        if (c == 92/*'\\'*/) {
            UChar32 c32 = line.unescapeAt(pos);
            if (c32 < 0) {
                ec = U_MALFORMED_UNICODE_ESCAPE;
                return FALSE;
            }
            token.append(c32);
        } else if ((quote != 0 && c == quote) ||
                   (quote == 0 && uprv_isRuleWhiteSpace(c))) {
            ++pos;
            return TRUE;
        } else if (quote == 0 && c == '#') {
            return TRUE; // do NOT increment
        } else {
            token.append(c);
            ++pos;
        }
    }
    if (quote != 0) {
        ec = U_UNTERMINATED_QUOTE;
        return FALSE;
    }
    return TRUE;
}
Commit	Line	Data
374ca955 A	1	/*
	2	**********************************************************************
	3	* Copyright (c) 2004, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* Author: Alan Liu
	7	* Created: March 22 2004
	8	* Since: ICU 3.0
	9	**********************************************************************
	10	*/
	11	#include "tokiter.h"
	12	#include "textfile.h"
	13	#include "util.h"
	14	#include "uprops.h"
	15
	16	TokenIterator::TokenIterator(TextFile* r) {
	17	reader = r;
	18	done = haveLine = FALSE;
	19	pos = lastpos = -1;
	20	}
	21
	22	TokenIterator::~TokenIterator() {
	23	}
	24
	25	UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
	26	if (done \|\| U_FAILURE(ec)) {
	27	return FALSE;
	28	}
	29	token.truncate(0);
	30	for (;;) {
	31	if (!haveLine) {
	32	if (!reader->readLineSkippingComments(line, ec)) {
	33	done = TRUE;
	34	return FALSE;
	35	}
	36	haveLine = TRUE;
	37	pos = 0;
	38	}
	39	lastpos = pos;
	40	if (!nextToken(token, ec)) {
	41	haveLine = FALSE;
	42	if (U_FAILURE(ec)) return FALSE;
	43	continue;
	44	}
	45	return TRUE;
	46	}
	47	}
	48
	49	int32_t TokenIterator::getLineNumber() const {
	50	return reader->getLineNumber();
	51	}
	52
	53	/**
	54	* Read the next token from 'this->line' and append it to 'token'.
	55	* Tokens are separated by rule white space. Tokens may also be
	56	* delimited by double or single quotes. The closing quote must match
	57	* the opening quote. If a '#' is encountered, the rest of the line
	58	* is ignored, unless it is backslash-escaped or within quotes.
	59	* @param token the token is appended to this StringBuffer
	60	* @param ec input-output error code
	61	* @return TRUE if a valid token is found, or FALSE if the end
	62	* of the line is reached or an error occurs
	63	*/
	64	UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
65	ICU_Utility::skipWhitespace(line, pos, TRUE);
66	if (pos == line.length()) {
67	return FALSE;
68	}
69	UChar c = line.charAt(pos++);
70	UChar quote = 0;
71	switch (c) {
72	case 34/'"'/:
73	case 39/'\\'/:
74	quote = c;
75	break;
76	case 35/'#'/:
77	return FALSE;
78	default:
79	token.append(c);
80	break;
81	}
82	while (pos < line.length()) {
83	c = line.charAt(pos); // 16-bit ok
84	if (c == 92/'\\'/) {
85	UChar32 c32 = line.unescapeAt(pos);
86	if (c32 < 0) {
87	ec = U_MALFORMED_UNICODE_ESCAPE;
88	return FALSE;
89	}
90	token.append(c32);
91	} else if ((quote != 0 && c == quote) \|\|
92	(quote == 0 && uprv_isRuleWhiteSpace(c))) {
93	++pos;
94	return TRUE;
95	} else if (quote == 0 && c == '#') {
96	return TRUE; // do NOT increment
97	} else {
98	token.append(c);
99	++pos;
100	}
101	}
102	if (quote != 0) {
103	ec = U_UNTERMINATED_QUOTE;
104	return FALSE;
105	}
106	return TRUE;
107	}