2 *******************************************************************************
4 * Copyright (C) 2003-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: unorm_it.c
10 * tab size: 8 (not used)
13 * created on: 2003jan21
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
21 #include "unicode/uiter.h"
22 #include "unicode/unorm.h"
23 #include "unicode/utf.h"
27 /* UNormIterator ------------------------------------------------------------ */
33 struct UNormIterator
{
38 * chars and states either use the static buffers
39 * or are allocated in the same memory block
41 * They are parallel arrays with states[] holding the getState() values
42 * from normalization boundaries, and UITER_NO_STATE in between.
48 * api.start: first valid character & state in the arrays
49 * api.index: current position
50 * api.limit: one past the last valid character in chars[], but states[limit] is valid
51 * capacity: length of allocated arrays
55 /* the current iter->getState(), saved to avoid unnecessary setState() calls; may not correspond to api->index! */
58 /* there are UChars available before start or after limit? */
59 UBool hasPrevious
, hasNext
, isStackAllocated
;
61 UNormalizationMode mode
;
63 UChar charsBuffer
[INITIAL_CAPACITY
];
64 uint32_t statesBuffer
[INITIAL_CAPACITY
+1]; /* one more than charsBuffer[]! */
68 initIndexes(UNormIterator
*uni
, UCharIterator
*iter
) {
69 /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
70 UCharIterator
*api
=&uni
->api
;
72 if(!iter
->hasPrevious(iter
)) {
73 /* set indexes to the beginning of the arrays */
74 api
->start
=api
->index
=api
->limit
=0;
75 uni
->hasPrevious
=FALSE
;
76 uni
->hasNext
=iter
->hasNext(iter
);
77 } else if(!iter
->hasNext(iter
)) {
78 /* set indexes to the end of the arrays */
79 api
->start
=api
->index
=api
->limit
=uni
->capacity
;
81 uni
->hasPrevious
=iter
->hasPrevious(iter
);
83 /* set indexes into the middle of the arrays */
84 api
->start
=api
->index
=api
->limit
=uni
->capacity
/2;
85 uni
->hasPrevious
=uni
->hasNext
=TRUE
;
90 reallocArrays(UNormIterator
*uni
, int32_t capacity
, UBool addAtStart
) {
91 /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
92 UCharIterator
*api
=&uni
->api
;
98 states
=(uint32_t *)uprv_malloc((capacity
+1)*4+capacity
*2);
103 chars
=(UChar
*)(states
+(capacity
+1));
104 uni
->capacity
=capacity
;
110 /* copy old contents to the end of the new arrays */
113 delta
=capacity
-uni
->capacity
;
114 uprv_memcpy(states
+delta
+start
, uni
->states
+start
, (limit
-start
+1)*4);
115 uprv_memcpy(chars
+delta
+start
, uni
->chars
+start
, (limit
-start
)*4);
117 api
->start
=start
+delta
;
119 api
->limit
=limit
+delta
;
121 /* copy old contents to the beginning of the new arrays */
122 uprv_memcpy(states
+start
, uni
->states
+start
, (limit
-start
+1)*4);
123 uprv_memcpy(chars
+start
, uni
->chars
+start
, (limit
-start
)*4);
133 moveContentsTowardStart(UCharIterator
*api
, UChar chars
[], uint32_t states
[], int32_t delta
) {
134 /* move array contents up to make room */
135 int32_t srcIndex
, destIndex
, limit
;
139 if(srcIndex
>api
->start
) {
140 /* look for a position in the arrays with a known state */
141 while(srcIndex
<limit
&& states
[srcIndex
]==UITER_NO_STATE
) {
146 /* now actually move the array contents */
147 api
->start
=destIndex
=0;
148 while(srcIndex
<limit
) {
149 chars
[destIndex
]=chars
[srcIndex
];
150 states
[destIndex
++]=states
[srcIndex
++];
153 /* copy states[limit] as well! */
154 states
[destIndex
]=states
[srcIndex
];
156 api
->limit
=destIndex
;
160 moveContentsTowardEnd(UCharIterator
*api
, UChar chars
[], uint32_t states
[], int32_t delta
) {
161 /* move array contents up to make room */
162 int32_t srcIndex
, destIndex
, start
;
165 destIndex
=((UNormIterator
*)api
)->capacity
;
166 srcIndex
=destIndex
-delta
;
167 if(srcIndex
<api
->limit
) {
168 /* look for a position in the arrays with a known state */
169 while(srcIndex
>start
&& states
[srcIndex
]==UITER_NO_STATE
) {
174 /* now actually move the array contents */
175 api
->limit
=destIndex
;
177 /* copy states[limit] as well! */
178 states
[destIndex
]=states
[srcIndex
];
180 while(srcIndex
>start
) {
181 chars
[--destIndex
]=chars
[--srcIndex
];
182 states
[destIndex
]=states
[srcIndex
];
185 api
->start
=destIndex
;
188 /* normalize forward from the limit, assume hasNext is true */
190 readNext(UNormIterator
*uni
, UCharIterator
*iter
) {
191 /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
192 UCharIterator
*api
=&uni
->api
;
194 /* make capacity/4 room at the end of the arrays */
195 int32_t limit
, capacity
, room
;
196 UErrorCode errorCode
;
199 capacity
=uni
->capacity
;
201 if(room
>(capacity
-limit
)) {
202 /* move array contents to make room */
203 moveContentsTowardStart(api
, uni
->chars
, uni
->states
, room
);
204 api
->index
=limit
=api
->limit
;
205 uni
->hasPrevious
=TRUE
;
208 /* normalize starting from the limit position */
209 errorCode
=U_ZERO_ERROR
;
210 if(uni
->state
!=uni
->states
[limit
]) {
211 uiter_setState(iter
, uni
->states
[limit
], &errorCode
);
212 if(U_FAILURE(errorCode
)) {
213 uni
->state
=UITER_NO_STATE
;
219 room
=unorm_next(iter
, uni
->chars
+limit
, capacity
-limit
, uni
->mode
, 0, TRUE
, NULL
, &errorCode
);
220 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
222 /* empty and re-use the arrays */
223 uni
->states
[0]=uni
->states
[limit
];
224 api
->start
=api
->index
=api
->limit
=limit
=0;
225 uni
->hasPrevious
=TRUE
;
228 if(!reallocArrays(uni
, capacity
, FALSE
)) {
229 uni
->state
=UITER_NO_STATE
;
236 errorCode
=U_ZERO_ERROR
;
237 uiter_setState(iter
, uni
->states
[limit
], &errorCode
);
238 room
=unorm_next(iter
, uni
->chars
+limit
, capacity
-limit
, uni
->mode
, 0, TRUE
, NULL
, &errorCode
);
240 if(U_FAILURE(errorCode
) || room
==0) {
241 uni
->state
=UITER_NO_STATE
;
247 ++limit
; /* leave the known states[limit] alone */
248 for(--room
; room
>0; --room
) {
249 /* set unknown states for all but the normalization boundaries */
250 uni
->states
[limit
++]=UITER_NO_STATE
;
252 uni
->states
[limit
]=uni
->state
=uiter_getState(iter
);
253 uni
->hasNext
=iter
->hasNext(iter
);
258 /* normalize backward from the start, assume hasPrevious is true */
260 readPrevious(UNormIterator
*uni
, UCharIterator
*iter
) {
261 /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
262 UCharIterator
*api
=&uni
->api
;
264 /* make capacity/4 room at the start of the arrays */
265 int32_t start
, capacity
, room
;
266 UErrorCode errorCode
;
269 capacity
=uni
->capacity
;
272 /* move array contents to make room */
273 moveContentsTowardEnd(api
, uni
->chars
, uni
->states
, room
);
274 api
->index
=start
=api
->start
;
278 /* normalize ending at the start position */
279 errorCode
=U_ZERO_ERROR
;
280 if(uni
->state
!=uni
->states
[start
]) {
281 uiter_setState(iter
, uni
->states
[start
], &errorCode
);
282 if(U_FAILURE(errorCode
)) {
283 uni
->state
=UITER_NO_STATE
;
284 uni
->hasPrevious
=FALSE
;
289 room
=unorm_previous(iter
, uni
->chars
, start
, uni
->mode
, 0, TRUE
, NULL
, &errorCode
);
290 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
292 /* empty and re-use the arrays */
293 uni
->states
[capacity
]=uni
->states
[start
];
294 api
->start
=api
->index
=api
->limit
=start
=capacity
;
298 if(!reallocArrays(uni
, capacity
, TRUE
)) {
299 uni
->state
=UITER_NO_STATE
;
300 uni
->hasPrevious
=FALSE
;
306 errorCode
=U_ZERO_ERROR
;
307 uiter_setState(iter
, uni
->states
[start
], &errorCode
);
308 room
=unorm_previous(iter
, uni
->chars
, start
, uni
->mode
, 0, TRUE
, NULL
, &errorCode
);
310 if(U_FAILURE(errorCode
) || room
==0) {
311 uni
->state
=UITER_NO_STATE
;
312 uni
->hasPrevious
=FALSE
;
318 /* copy the UChars from chars[0..room[ to chars[(start-room)..start[ */
319 uni
->chars
[--start
]=uni
->chars
[--room
];
320 /* set unknown states for all but the normalization boundaries */
321 uni
->states
[start
]=UITER_NO_STATE
;
323 uni
->states
[start
]=uni
->state
=uiter_getState(iter
);
324 uni
->hasPrevious
=iter
->hasPrevious(iter
);
329 /* Iterator runtime API functions ------------------------------------------- */
331 static int32_t U_CALLCONV
332 unormIteratorGetIndex(UCharIterator
*api
, UCharIteratorOrigin origin
) {
340 return UITER_UNKNOWN_INDEX
;
342 /* not a valid origin */
343 /* Should never get here! */
348 static int32_t U_CALLCONV
349 unormIteratorMove(UCharIterator
*api
, int32_t delta
, UCharIteratorOrigin origin
) {
350 UNormIterator
*uni
=(UNormIterator
*)api
;
351 UCharIterator
*iter
=uni
->iter
;
357 /* restart from the beginning */
358 if(uni
->hasPrevious
) {
359 iter
->move(iter
, 0, UITER_START
);
360 api
->start
=api
->index
=api
->limit
=0;
361 uni
->states
[api
->limit
]=uni
->state
=uiter_getState(iter
);
362 uni
->hasPrevious
=FALSE
;
363 uni
->hasNext
=iter
->hasNext(iter
);
365 /* we already have the beginning of the normalized text */
366 api
->index
=api
->start
;
373 /* restart from the end */
375 iter
->move(iter
, 0, UITER_LIMIT
);
376 api
->start
=api
->index
=api
->limit
=uni
->capacity
;
377 uni
->states
[api
->limit
]=uni
->state
=uiter_getState(iter
);
378 uni
->hasPrevious
=iter
->hasPrevious(iter
);
381 /* we already have the end of the normalized text */
382 api
->index
=api
->limit
;
386 return -1; /* Error */
389 /* move relative to the current position by delta normalized UChars */
393 /* go forward until the requested position is in the buffer */
395 pos
=api
->index
+delta
; /* requested position */
396 delta
=pos
-api
->limit
; /* remainder beyond buffered text */
398 api
->index
=pos
; /* position reached */
402 /* go to end of buffer and normalize further */
403 api
->index
=api
->limit
;
404 if(!uni
->hasNext
|| !readNext(uni
, iter
)) {
405 break; /* reached end of text */
408 } else /* delta<0 */ {
409 /* go backward until the requested position is in the buffer */
411 pos
=api
->index
+delta
; /* requested position */
412 delta
=pos
-api
->start
; /* remainder beyond buffered text */
414 api
->index
=pos
; /* position reached */
418 /* go to start of buffer and normalize further */
419 api
->index
=api
->start
;
420 if(!uni
->hasPrevious
|| !readPrevious(uni
, iter
)) {
421 break; /* reached start of text */
426 if(api
->index
==api
->start
&& !uni
->hasPrevious
) {
429 return UITER_UNKNOWN_INDEX
;
433 static UBool U_CALLCONV
434 unormIteratorHasNext(UCharIterator
*api
) {
435 return api
->index
<api
->limit
|| ((UNormIterator
*)api
)->hasNext
;
438 static UBool U_CALLCONV
439 unormIteratorHasPrevious(UCharIterator
*api
) {
440 return api
->index
>api
->start
|| ((UNormIterator
*)api
)->hasPrevious
;
443 static UChar32 U_CALLCONV
444 unormIteratorCurrent(UCharIterator
*api
) {
445 UNormIterator
*uni
=(UNormIterator
*)api
;
447 if( api
->index
<api
->limit
||
448 (uni
->hasNext
&& readNext(uni
, uni
->iter
))
450 return uni
->chars
[api
->index
];
456 static UChar32 U_CALLCONV
457 unormIteratorNext(UCharIterator
*api
) {
458 UNormIterator
*uni
=(UNormIterator
*)api
;
460 if( api
->index
<api
->limit
||
461 (uni
->hasNext
&& readNext(uni
, uni
->iter
))
463 return uni
->chars
[api
->index
++];
469 static UChar32 U_CALLCONV
470 unormIteratorPrevious(UCharIterator
*api
) {
471 UNormIterator
*uni
=(UNormIterator
*)api
;
473 if( api
->index
>api
->start
||
474 (uni
->hasPrevious
&& readPrevious(uni
, uni
->iter
))
476 return uni
->chars
[--api
->index
];
482 static uint32_t U_CALLCONV
483 unormIteratorGetState(const UCharIterator
*api
) {
484 /* not uni->state because that may not be at api->index */
485 return ((UNormIterator
*)api
)->states
[api
->index
];
488 static void U_CALLCONV
489 unormIteratorSetState(UCharIterator
*api
, uint32_t state
, UErrorCode
*pErrorCode
) {
490 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
492 } else if(api
==NULL
) {
493 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
494 } else if(state
==UITER_NO_STATE
) {
495 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
497 UNormIterator
*uni
=(UNormIterator
*)api
;
498 UCharIterator
*iter
=((UNormIterator
*)api
)->iter
;
499 if(state
!=uni
->state
) {
501 uiter_setState(iter
, state
, pErrorCode
);
505 * Try shortcuts: If the requested state is in the array contents
506 * then just set the index there.
508 * We assume that the state is unique per position!
510 if(state
==uni
->states
[api
->index
]) {
512 } else if(state
==uni
->states
[api
->limit
]) {
513 api
->index
=api
->limit
;
516 /* search for the index with this state */
519 for(i
=api
->start
; i
<api
->limit
; ++i
) {
520 if(state
==uni
->states
[i
]) {
527 /* there is no array index for this state, reset for fresh contents */
528 initIndexes((UNormIterator
*)api
, iter
);
529 uni
->states
[api
->limit
]=state
;
533 static const UCharIterator unormIterator
={
535 unormIteratorGetIndex
,
537 unormIteratorHasNext
,
538 unormIteratorHasPrevious
,
539 unormIteratorCurrent
,
541 unormIteratorPrevious
,
543 unormIteratorGetState
,
544 unormIteratorSetState
547 /* Setup functions ---------------------------------------------------------- */
549 U_CAPI UNormIterator
* U_EXPORT2
550 unorm_openIter(void *stackMem
, int32_t stackMemSize
, UErrorCode
*pErrorCode
) {
553 /* argument checking */
554 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
560 if(stackMem
!=NULL
&& stackMemSize
>=sizeof(UNormIterator
)) {
561 if(U_ALIGNMENT_OFFSET(stackMem
)==0) {
562 /* already aligned */
563 uni
=(UNormIterator
*)stackMem
;
565 int32_t align
=(int32_t)U_ALIGNMENT_OFFSET_UP(stackMem
);
566 if((stackMemSize
-=align
)>=(int32_t)sizeof(UNormIterator
)) {
567 /* needs alignment */
568 uni
=(UNormIterator
*)((char *)stackMem
+align
);
571 /* else does not fit */
575 uni
->isStackAllocated
=TRUE
;
577 uni
=(UNormIterator
*)uprv_malloc(sizeof(UNormIterator
));
579 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
582 uni
->isStackAllocated
=FALSE
;
587 * do not memset because that would unnecessarily initialize the arrays
590 uni
->chars
=uni
->charsBuffer
;
591 uni
->states
=uni
->statesBuffer
;
592 uni
->capacity
=INITIAL_CAPACITY
;
593 uni
->state
=UITER_NO_STATE
;
594 uni
->hasPrevious
=uni
->hasNext
=FALSE
;
595 uni
->mode
=UNORM_NONE
;
597 /* set a no-op iterator into the api */
598 uiter_setString(&uni
->api
, NULL
, 0);
602 U_CAPI
void U_EXPORT2
603 unorm_closeIter(UNormIterator
*uni
) {
605 if(uni
->states
!=uni
->statesBuffer
) {
606 /* chars and states are allocated in the same memory block */
607 uprv_free(uni
->states
);
609 if(!uni
->isStackAllocated
) {
615 U_CAPI UCharIterator
* U_EXPORT2
616 unorm_setIter(UNormIterator
*uni
, UCharIterator
*iter
, UNormalizationMode mode
, UErrorCode
*pErrorCode
) {
617 /* argument checking */
618 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
622 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
625 if( iter
==NULL
|| iter
->getState
==NULL
|| iter
->setState
==NULL
||
626 mode
<UNORM_NONE
|| UNORM_MODE_COUNT
<=mode
628 /* set a no-op iterator into the api */
629 uiter_setString(&uni
->api
, NULL
, 0);
630 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
634 /* set the iterator and initialize */
635 uprv_memcpy(&uni
->api
, &unormIterator
, sizeof(unormIterator
));
640 initIndexes(uni
, iter
);
641 uni
->states
[uni
->api
.limit
]=uni
->state
=uiter_getState(iter
);
646 #endif /* uconfig.h switches */