git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 2001-2011, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* Date Name Description
	7	* 05/24/01 aliu Creation.
	8	**********************************************************************
	9	*/
	10
	11	#include "unicode/utypes.h"
	12
	13	#if !UCONFIG_NO_TRANSLITERATION
	14
	15	#include "unicode/uchar.h"
	16	#include "unicode/uniset.h"
	17	#include "unicode/ustring.h"
	18	#include "unicode/utf16.h"
	19	#include "titletrn.h"
	20	#include "umutex.h"
	21	#include "ucase.h"
	22	#include "cpputils.h"
	23
	24	U_NAMESPACE_BEGIN
	25
	26	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
	27
	28	TitlecaseTransliterator::TitlecaseTransliterator() :
	29	CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), NULL)
	30	{
	31	// Need to look back 2 characters in the case of "can't"
	32	setMaximumContextLength(2);
	33	}
	34
	35	/**
	36	* Destructor.
	37	*/
	38	TitlecaseTransliterator::~TitlecaseTransliterator() {
	39	}
	40
	41	/**
	42	* Copy constructor.
	43	*/
	44	TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
	45	CaseMapTransliterator(o)
	46	{
	47	}
	48
	49	/**
	50	* Assignment operator.
	51	*/
	52	/*TitlecaseTransliterator& TitlecaseTransliterator::operator=(
	53	const TitlecaseTransliterator& o) {
	54	CaseMapTransliterator::operator=(o);
	55	return *this;
	56	}*/
	57
	58	/**
	59	* Transliterator API.
	60	*/
	61	Transliterator* TitlecaseTransliterator::clone(void) const {
	62	return new TitlecaseTransliterator(*this);
	63	}
	64
	65	/**
	66	* Implements {@link Transliterator#handleTransliterate}.
	67	*/
	68	void TitlecaseTransliterator::handleTransliterate(
	69	Replaceable& text, UTransPosition& offsets,
	70	UBool isIncremental) const
	71	{
	72	// TODO reimplement, see ustrcase.c
	73	// using a real word break iterator
	74	// instead of just looking for a transition between cased and uncased characters
	75	// call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
	76	// needs to take isIncremental into account because case mappings are context-sensitive
	77	// also detect when lowercasing function did not finish because of context
	78
	79	if (offsets.start >= offsets.limit) {
	80	return;
	81	}
	82
	83	// case type: >0 cased (UCASE_LOWER etc.) ==0 uncased <0 case-ignorable
	84	int32_t type;
	85
	86	// Our mode; we are either converting letter toTitle or
	87	// toLower.
	88	UBool doTitle = TRUE;
	89
	90	// Determine if there is a preceding context of cased case-ignorable*,
	91	// in which case we want to start in toLower mode. If the
	92	// prior context is anything else (including empty) then start
	93	// in toTitle mode.
	94	UChar32 c;
	95	int32_t start;
	96	for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
	97	c = text.char32At(start);
	98	type=ucase_getTypeOrIgnorable(fCsp, c);
	99	if(type>0) { // cased
	100	doTitle=FALSE;
	101	break;
	102	} else if(type==0) { // uncased but not ignorable
	103	break;
	104	}
	105	// else (type<0) case-ignorable: continue
	106	}
	107
	108	// Convert things after a cased character toLower; things
	109	// after an uncased, non-case-ignorable character toTitle. Case-ignorable
	110	// characters are copied directly and do not change the mode.
	111	UCaseContext csc;
	112	uprv_memset(&csc, 0, sizeof(csc));
	113	csc.p = &text;
	114	csc.start = offsets.contextStart;
	115	csc.limit = offsets.contextLimit;
	116
	117	UnicodeString tmp;
	118	const UChar *s;
	119	int32_t textPos, delta, result, locCache=0;
	120
	121	for(textPos=offsets.start; textPos<offsets.limit;) {
	122	csc.cpStart=textPos;
	123	c=text.char32At(textPos);
	124	csc.cpLimit=textPos+=U16_LENGTH(c);
	125
	126	type=ucase_getTypeOrIgnorable(fCsp, c);
	127	if(type>=0) { // not case-ignorable
	128	if(doTitle) {
	129	result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
	130	} else {
	131	result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
	132	}
	133	doTitle = (UBool)(type==0); // doTitle=isUncased
	134
	135	if(csc.b1 && isIncremental) {
	136	// fMap() tried to look beyond the context limit
	137	// wait for more input
	138	offsets.start=csc.cpStart;
	139	return;
	140	}
	141
	142	if(result>=0) {
	143	// replace the current code point with its full case mapping result
	144	// see UCASE_MAX_STRING_LENGTH
	145	if(result<=UCASE_MAX_STRING_LENGTH) {
	146	// string s[result]
	147	tmp.setTo(FALSE, s, result);
	148	delta=result-U16_LENGTH(c);
	149	} else {
	150	// single code point
	151	tmp.setTo(result);
	152	delta=tmp.length()-U16_LENGTH(c);
	153	}
	154	text.handleReplaceBetween(csc.cpStart, textPos, tmp);
	155	if(delta!=0) {
	156	textPos+=delta;
	157	csc.limit=offsets.contextLimit+=delta;
	158	offsets.limit+=delta;
	159	}
	160	}
	161	}
	162	}
	163	offsets.start=textPos;
	164	}
	165
	166	U_NAMESPACE_END
	167
	168	#endif /* #if !UCONFIG_NO_TRANSLITERATION */