git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	**********************************************************************
	5	* Copyright (C) 2001-2011, International Business Machines
	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	* Date Name Description
	9	* 05/24/01 aliu Creation.
	10	**********************************************************************
	11	*/
	12
	13	#include "unicode/utypes.h"
	14
	15	#if !UCONFIG_NO_TRANSLITERATION
	16
	17	#include "unicode/uchar.h"
	18	#include "unicode/uniset.h"
	19	#include "unicode/ustring.h"
	20	#include "unicode/utf16.h"
	21	#include "titletrn.h"
	22	#include "umutex.h"
	23	#include "ucase.h"
	24	#include "cpputils.h"
	25
	26	U_NAMESPACE_BEGIN
	27
	28	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
	29
	30	TitlecaseTransliterator::TitlecaseTransliterator() :
	31	CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), NULL)
	32	{
	33	// Need to look back 2 characters in the case of "can't"
	34	setMaximumContextLength(2);
	35	}
	36
	37	/**
	38	* Destructor.
	39	*/
	40	TitlecaseTransliterator::~TitlecaseTransliterator() {
	41	}
	42
	43	/**
	44	* Copy constructor.
	45	*/
	46	TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
	47	CaseMapTransliterator(o)
	48	{
	49	}
	50
	51	/**
	52	* Assignment operator.
	53	*/
	54	/*TitlecaseTransliterator& TitlecaseTransliterator::operator=(
	55	const TitlecaseTransliterator& o) {
	56	CaseMapTransliterator::operator=(o);
	57	return *this;
	58	}*/
	59
	60	/**
	61	* Transliterator API.
	62	*/
	63	Transliterator* TitlecaseTransliterator::clone(void) const {
	64	return new TitlecaseTransliterator(*this);
	65	}
	66
	67	/**
	68	* Implements {@link Transliterator#handleTransliterate}.
	69	*/
	70	void TitlecaseTransliterator::handleTransliterate(
	71	Replaceable& text, UTransPosition& offsets,
	72	UBool isIncremental) const
	73	{
	74	// TODO reimplement, see ustrcase.c
	75	// using a real word break iterator
	76	// instead of just looking for a transition between cased and uncased characters
	77	// call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
	78	// needs to take isIncremental into account because case mappings are context-sensitive
	79	// also detect when lowercasing function did not finish because of context
	80
	81	if (offsets.start >= offsets.limit) {
	82	return;
	83	}
	84
	85	// case type: >0 cased (UCASE_LOWER etc.) ==0 uncased <0 case-ignorable
	86	int32_t type;
	87
	88	// Our mode; we are either converting letter toTitle or
	89	// toLower.
	90	UBool doTitle = TRUE;
	91
	92	// Determine if there is a preceding context of cased case-ignorable*,
	93	// in which case we want to start in toLower mode. If the
	94	// prior context is anything else (including empty) then start
	95	// in toTitle mode.
	96	UChar32 c;
	97	int32_t start;
	98	for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
	99	c = text.char32At(start);
	100	type=ucase_getTypeOrIgnorable(c);
	101	if(type>0) { // cased
	102	doTitle=FALSE;
	103	break;
	104	} else if(type==0) { // uncased but not ignorable
	105	break;
	106	}
	107	// else (type<0) case-ignorable: continue
	108	}
	109
	110	// Convert things after a cased character toLower; things
	111	// after an uncased, non-case-ignorable character toTitle. Case-ignorable
	112	// characters are copied directly and do not change the mode.
	113	UCaseContext csc;
	114	uprv_memset(&csc, 0, sizeof(csc));
	115	csc.p = &text;
	116	csc.start = offsets.contextStart;
	117	csc.limit = offsets.contextLimit;
	118
	119	UnicodeString tmp;
	120	const UChar *s;
	121	int32_t textPos, delta, result;
	122
	123	for(textPos=offsets.start; textPos<offsets.limit;) {
	124	csc.cpStart=textPos;
	125	c=text.char32At(textPos);
	126	csc.cpLimit=textPos+=U16_LENGTH(c);
	127
	128	type=ucase_getTypeOrIgnorable(c);
	129	if(type>=0) { // not case-ignorable
	130	if(doTitle) {
	131	result=ucase_toFullTitle(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
	132	} else {
	133	result=ucase_toFullLower(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
	134	}
	135	doTitle = (UBool)(type==0); // doTitle=isUncased
	136
	137	if(csc.b1 && isIncremental) {
	138	// fMap() tried to look beyond the context limit
	139	// wait for more input
	140	offsets.start=csc.cpStart;
	141	return;
	142	}
	143
	144	if(result>=0) {
	145	// replace the current code point with its full case mapping result
	146	// see UCASE_MAX_STRING_LENGTH
	147	if(result<=UCASE_MAX_STRING_LENGTH) {
	148	// string s[result]
	149	tmp.setTo(FALSE, s, result);
	150	delta=result-U16_LENGTH(c);
	151	} else {
	152	// single code point
	153	tmp.setTo(result);
	154	delta=tmp.length()-U16_LENGTH(c);
	155	}
	156	text.handleReplaceBetween(csc.cpStart, textPos, tmp);
	157	if(delta!=0) {
	158	textPos+=delta;
	159	csc.limit=offsets.contextLimit+=delta;
	160	offsets.limit+=delta;
	161	}
	162	}
	163	}
	164	}
	165	offsets.start=textPos;
	166	}
	167
	168	U_NAMESPACE_END
	169
	170	#endif /* #if !UCONFIG_NO_TRANSLITERATION */