2 *******************************************************************************
4 * Copyright (C) 2003-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: unorm_it.c
10 * tab size: 8 (not used)
13 * created on: 2003jan21
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
21 #include "unicode/uiter.h"
22 #include "unicode/unorm.h"
26 /* UNormIterator ------------------------------------------------------------ */
32 struct UNormIterator
{
37 * chars and states either use the static buffers
38 * or are allocated in the same memory block
40 * They are parallel arrays with states[] holding the getState() values
41 * from normalization boundaries, and UITER_NO_STATE in between.
47 * api.start: first valid character & state in the arrays
48 * api.index: current position
49 * api.limit: one past the last valid character in chars[], but states[limit] is valid
50 * capacity: length of allocated arrays
54 /* the current iter->getState(), saved to avoid unnecessary setState() calls; may not correspond to api->index! */
57 /* there are UChars available before start or after limit? */
58 UBool hasPrevious
, hasNext
, isStackAllocated
;
60 UNormalizationMode mode
;
62 UChar charsBuffer
[INITIAL_CAPACITY
];
63 uint32_t statesBuffer
[INITIAL_CAPACITY
+1]; /* one more than charsBuffer[]! */
67 initIndexes(UNormIterator
*uni
, UCharIterator
*iter
) {
68 /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
69 UCharIterator
*api
=&uni
->api
;
71 if(!iter
->hasPrevious(iter
)) {
72 /* set indexes to the beginning of the arrays */
73 api
->start
=api
->index
=api
->limit
=0;
74 uni
->hasPrevious
=FALSE
;
75 uni
->hasNext
=iter
->hasNext(iter
);
76 } else if(!iter
->hasNext(iter
)) {
77 /* set indexes to the end of the arrays */
78 api
->start
=api
->index
=api
->limit
=uni
->capacity
;
80 uni
->hasPrevious
=iter
->hasPrevious(iter
);
82 /* set indexes into the middle of the arrays */
83 api
->start
=api
->index
=api
->limit
=uni
->capacity
/2;
84 uni
->hasPrevious
=uni
->hasNext
=TRUE
;
89 reallocArrays(UNormIterator
*uni
, int32_t capacity
, UBool addAtStart
) {
90 /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
91 UCharIterator
*api
=&uni
->api
;
97 states
=(uint32_t *)uprv_malloc((capacity
+1)*4+capacity
*2);
102 chars
=(UChar
*)(states
+(capacity
+1));
103 uni
->capacity
=capacity
;
109 /* copy old contents to the end of the new arrays */
112 delta
=capacity
-uni
->capacity
;
113 uprv_memcpy(states
+delta
+start
, uni
->states
+start
, (limit
-start
+1)*4);
114 uprv_memcpy(chars
+delta
+start
, uni
->chars
+start
, (limit
-start
)*4);
116 api
->start
=start
+delta
;
118 api
->limit
=limit
+delta
;
120 /* copy old contents to the beginning of the new arrays */
121 uprv_memcpy(states
+start
, uni
->states
+start
, (limit
-start
+1)*4);
122 uprv_memcpy(chars
+start
, uni
->chars
+start
, (limit
-start
)*4);
132 moveContentsTowardStart(UCharIterator
*api
, UChar chars
[], uint32_t states
[], int32_t delta
) {
133 /* move array contents up to make room */
134 int32_t srcIndex
, destIndex
, limit
;
138 if(srcIndex
>api
->start
) {
139 /* look for a position in the arrays with a known state */
140 while(srcIndex
<limit
&& states
[srcIndex
]==UITER_NO_STATE
) {
145 /* now actually move the array contents */
146 api
->start
=destIndex
=0;
147 while(srcIndex
<limit
) {
148 chars
[destIndex
]=chars
[srcIndex
];
149 states
[destIndex
++]=states
[srcIndex
++];
152 /* copy states[limit] as well! */
153 states
[destIndex
]=states
[srcIndex
];
155 api
->limit
=destIndex
;
159 moveContentsTowardEnd(UCharIterator
*api
, UChar chars
[], uint32_t states
[], int32_t delta
) {
160 /* move array contents up to make room */
161 int32_t srcIndex
, destIndex
, start
;
164 destIndex
=((UNormIterator
*)api
)->capacity
;
165 srcIndex
=destIndex
-delta
;
166 if(srcIndex
<api
->limit
) {
167 /* look for a position in the arrays with a known state */
168 while(srcIndex
>start
&& states
[srcIndex
]==UITER_NO_STATE
) {
173 /* now actually move the array contents */
174 api
->limit
=destIndex
;
176 /* copy states[limit] as well! */
177 states
[destIndex
]=states
[srcIndex
];
179 while(srcIndex
>start
) {
180 chars
[--destIndex
]=chars
[--srcIndex
];
181 states
[destIndex
]=states
[srcIndex
];
184 api
->start
=destIndex
;
187 /* normalize forward from the limit, assume hasNext is true */
189 readNext(UNormIterator
*uni
, UCharIterator
*iter
) {
190 /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
191 UCharIterator
*api
=&uni
->api
;
193 /* make capacity/4 room at the end of the arrays */
194 int32_t limit
, capacity
, room
;
195 UErrorCode errorCode
;
198 capacity
=uni
->capacity
;
200 if(room
>(capacity
-limit
)) {
201 /* move array contents to make room */
202 moveContentsTowardStart(api
, uni
->chars
, uni
->states
, room
);
203 api
->index
=limit
=api
->limit
;
204 uni
->hasPrevious
=TRUE
;
207 /* normalize starting from the limit position */
208 errorCode
=U_ZERO_ERROR
;
209 if(uni
->state
!=uni
->states
[limit
]) {
210 uiter_setState(iter
, uni
->states
[limit
], &errorCode
);
211 if(U_FAILURE(errorCode
)) {
212 uni
->state
=UITER_NO_STATE
;
218 room
=unorm_next(iter
, uni
->chars
+limit
, capacity
-limit
, uni
->mode
, 0, TRUE
, NULL
, &errorCode
);
219 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
221 /* empty and re-use the arrays */
222 uni
->states
[0]=uni
->states
[limit
];
223 api
->start
=api
->index
=api
->limit
=limit
=0;
224 uni
->hasPrevious
=TRUE
;
227 if(!reallocArrays(uni
, capacity
, FALSE
)) {
228 uni
->state
=UITER_NO_STATE
;
235 errorCode
=U_ZERO_ERROR
;
236 uiter_setState(iter
, uni
->states
[limit
], &errorCode
);
237 room
=unorm_next(iter
, uni
->chars
+limit
, capacity
-limit
, uni
->mode
, 0, TRUE
, NULL
, &errorCode
);
239 if(U_FAILURE(errorCode
) || room
==0) {
240 uni
->state
=UITER_NO_STATE
;
246 ++limit
; /* leave the known states[limit] alone */
247 for(--room
; room
>0; --room
) {
248 /* set unknown states for all but the normalization boundaries */
249 uni
->states
[limit
++]=UITER_NO_STATE
;
251 uni
->states
[limit
]=uni
->state
=uiter_getState(iter
);
252 uni
->hasNext
=iter
->hasNext(iter
);
257 /* normalize backward from the start, assume hasPrevious is true */
259 readPrevious(UNormIterator
*uni
, UCharIterator
*iter
) {
260 /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
261 UCharIterator
*api
=&uni
->api
;
263 /* make capacity/4 room at the start of the arrays */
264 int32_t start
, capacity
, room
;
265 UErrorCode errorCode
;
268 capacity
=uni
->capacity
;
271 /* move array contents to make room */
272 moveContentsTowardEnd(api
, uni
->chars
, uni
->states
, room
);
273 api
->index
=start
=api
->start
;
277 /* normalize ending at the start position */
278 errorCode
=U_ZERO_ERROR
;
279 if(uni
->state
!=uni
->states
[start
]) {
280 uiter_setState(iter
, uni
->states
[start
], &errorCode
);
281 if(U_FAILURE(errorCode
)) {
282 uni
->state
=UITER_NO_STATE
;
283 uni
->hasPrevious
=FALSE
;
288 room
=unorm_previous(iter
, uni
->chars
, start
, uni
->mode
, 0, TRUE
, NULL
, &errorCode
);
289 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
291 /* empty and re-use the arrays */
292 uni
->states
[capacity
]=uni
->states
[start
];
293 api
->start
=api
->index
=api
->limit
=start
=capacity
;
297 if(!reallocArrays(uni
, capacity
, TRUE
)) {
298 uni
->state
=UITER_NO_STATE
;
299 uni
->hasPrevious
=FALSE
;
305 errorCode
=U_ZERO_ERROR
;
306 uiter_setState(iter
, uni
->states
[start
], &errorCode
);
307 room
=unorm_previous(iter
, uni
->chars
, start
, uni
->mode
, 0, TRUE
, NULL
, &errorCode
);
309 if(U_FAILURE(errorCode
) || room
==0) {
310 uni
->state
=UITER_NO_STATE
;
311 uni
->hasPrevious
=FALSE
;
317 /* copy the UChars from chars[0..room[ to chars[(start-room)..start[ */
318 uni
->chars
[--start
]=uni
->chars
[--room
];
319 /* set unknown states for all but the normalization boundaries */
320 uni
->states
[start
]=UITER_NO_STATE
;
322 uni
->states
[start
]=uni
->state
=uiter_getState(iter
);
323 uni
->hasPrevious
=iter
->hasPrevious(iter
);
328 /* Iterator runtime API functions ------------------------------------------- */
330 static int32_t U_CALLCONV
331 unormIteratorGetIndex(UCharIterator
*api
, UCharIteratorOrigin origin
) {
339 return UITER_UNKNOWN_INDEX
;
341 /* not a valid origin */
342 /* Should never get here! */
347 static int32_t U_CALLCONV
348 unormIteratorMove(UCharIterator
*api
, int32_t delta
, UCharIteratorOrigin origin
) {
349 UNormIterator
*uni
=(UNormIterator
*)api
;
350 UCharIterator
*iter
=uni
->iter
;
356 /* restart from the beginning */
357 if(uni
->hasPrevious
) {
358 iter
->move(iter
, 0, UITER_START
);
359 api
->start
=api
->index
=api
->limit
=0;
360 uni
->states
[api
->limit
]=uni
->state
=uiter_getState(iter
);
361 uni
->hasPrevious
=FALSE
;
362 uni
->hasNext
=iter
->hasNext(iter
);
364 /* we already have the beginning of the normalized text */
365 api
->index
=api
->start
;
372 /* restart from the end */
374 iter
->move(iter
, 0, UITER_LIMIT
);
375 api
->start
=api
->index
=api
->limit
=uni
->capacity
;
376 uni
->states
[api
->limit
]=uni
->state
=uiter_getState(iter
);
377 uni
->hasPrevious
=iter
->hasPrevious(iter
);
380 /* we already have the end of the normalized text */
381 api
->index
=api
->limit
;
385 return -1; /* Error */
388 /* move relative to the current position by delta normalized UChars */
392 /* go forward until the requested position is in the buffer */
394 pos
=api
->index
+delta
; /* requested position */
395 delta
=pos
-api
->limit
; /* remainder beyond buffered text */
397 api
->index
=pos
; /* position reached */
401 /* go to end of buffer and normalize further */
402 api
->index
=api
->limit
;
403 if(!uni
->hasNext
|| !readNext(uni
, iter
)) {
404 break; /* reached end of text */
407 } else /* delta<0 */ {
408 /* go backward until the requested position is in the buffer */
410 pos
=api
->index
+delta
; /* requested position */
411 delta
=pos
-api
->start
; /* remainder beyond buffered text */
413 api
->index
=pos
; /* position reached */
417 /* go to start of buffer and normalize further */
418 api
->index
=api
->start
;
419 if(!uni
->hasPrevious
|| !readPrevious(uni
, iter
)) {
420 break; /* reached start of text */
425 if(api
->index
==api
->start
&& !uni
->hasPrevious
) {
428 return UITER_UNKNOWN_INDEX
;
432 static UBool U_CALLCONV
433 unormIteratorHasNext(UCharIterator
*api
) {
434 return api
->index
<api
->limit
|| ((UNormIterator
*)api
)->hasNext
;
437 static UBool U_CALLCONV
438 unormIteratorHasPrevious(UCharIterator
*api
) {
439 return api
->index
>api
->start
|| ((UNormIterator
*)api
)->hasPrevious
;
442 static UChar32 U_CALLCONV
443 unormIteratorCurrent(UCharIterator
*api
) {
444 UNormIterator
*uni
=(UNormIterator
*)api
;
446 if( api
->index
<api
->limit
||
447 (uni
->hasNext
&& readNext(uni
, uni
->iter
))
449 return uni
->chars
[api
->index
];
455 static UChar32 U_CALLCONV
456 unormIteratorNext(UCharIterator
*api
) {
457 UNormIterator
*uni
=(UNormIterator
*)api
;
459 if( api
->index
<api
->limit
||
460 (uni
->hasNext
&& readNext(uni
, uni
->iter
))
462 return uni
->chars
[api
->index
++];
468 static UChar32 U_CALLCONV
469 unormIteratorPrevious(UCharIterator
*api
) {
470 UNormIterator
*uni
=(UNormIterator
*)api
;
472 if( api
->index
>api
->start
||
473 (uni
->hasPrevious
&& readPrevious(uni
, uni
->iter
))
475 return uni
->chars
[--api
->index
];
481 static uint32_t U_CALLCONV
482 unormIteratorGetState(const UCharIterator
*api
) {
483 /* not uni->state because that may not be at api->index */
484 return ((UNormIterator
*)api
)->states
[api
->index
];
487 static void U_CALLCONV
488 unormIteratorSetState(UCharIterator
*api
, uint32_t state
, UErrorCode
*pErrorCode
) {
489 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
491 } else if(api
==NULL
) {
492 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
493 } else if(state
==UITER_NO_STATE
) {
494 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
496 UNormIterator
*uni
=(UNormIterator
*)api
;
497 UCharIterator
*iter
=((UNormIterator
*)api
)->iter
;
498 if(state
!=uni
->state
) {
500 uiter_setState(iter
, state
, pErrorCode
);
504 * Try shortcuts: If the requested state is in the array contents
505 * then just set the index there.
507 * We assume that the state is unique per position!
509 if(state
==uni
->states
[api
->index
]) {
511 } else if(state
==uni
->states
[api
->limit
]) {
512 api
->index
=api
->limit
;
515 /* search for the index with this state */
518 for(i
=api
->start
; i
<api
->limit
; ++i
) {
519 if(state
==uni
->states
[i
]) {
526 /* there is no array index for this state, reset for fresh contents */
527 initIndexes((UNormIterator
*)api
, iter
);
528 uni
->states
[api
->limit
]=state
;
532 static const UCharIterator unormIterator
={
534 unormIteratorGetIndex
,
536 unormIteratorHasNext
,
537 unormIteratorHasPrevious
,
538 unormIteratorCurrent
,
540 unormIteratorPrevious
,
542 unormIteratorGetState
,
543 unormIteratorSetState
546 /* Setup functions ---------------------------------------------------------- */
548 U_CAPI UNormIterator
* U_EXPORT2
549 unorm_openIter(void *stackMem
, int32_t stackMemSize
, UErrorCode
*pErrorCode
) {
552 /* argument checking */
553 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
559 if(stackMem
!=NULL
&& stackMemSize
>=sizeof(UNormIterator
)) {
560 size_t align
=U_ALIGNMENT_OFFSET(stackMem
);
562 /* already aligned */
563 uni
=(UNormIterator
*)stackMem
;
564 } else if((stackMemSize
-=(int32_t)align
)>=(int32_t)sizeof(UNormIterator
)) {
565 /* needs alignment */
566 uni
=(UNormIterator
*)((char *)stackMem
+align
);
568 /* else does not fit */
572 uni
->isStackAllocated
=TRUE
;
574 uni
=(UNormIterator
*)uprv_malloc(sizeof(UNormIterator
));
576 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
579 uni
->isStackAllocated
=FALSE
;
584 * do not memset because that would unnecessarily initialize the arrays
587 uni
->chars
=uni
->charsBuffer
;
588 uni
->states
=uni
->statesBuffer
;
589 uni
->capacity
=INITIAL_CAPACITY
;
590 uni
->state
=UITER_NO_STATE
;
591 uni
->hasPrevious
=uni
->hasNext
=FALSE
;
592 uni
->mode
=UNORM_NONE
;
594 /* set a no-op iterator into the api */
595 uiter_setString(&uni
->api
, NULL
, 0);
599 U_CAPI
void U_EXPORT2
600 unorm_closeIter(UNormIterator
*uni
) {
602 if(uni
->states
!=uni
->statesBuffer
) {
603 /* chars and states are allocated in the same memory block */
604 uprv_free(uni
->states
);
606 if(!uni
->isStackAllocated
) {
612 U_CAPI UCharIterator
* U_EXPORT2
613 unorm_setIter(UNormIterator
*uni
, UCharIterator
*iter
, UNormalizationMode mode
, UErrorCode
*pErrorCode
) {
614 /* argument checking */
615 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
619 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
622 if( iter
==NULL
|| iter
->getState
==NULL
|| iter
->setState
==NULL
||
623 mode
<UNORM_NONE
|| UNORM_MODE_COUNT
<=mode
625 /* set a no-op iterator into the api */
626 uiter_setString(&uni
->api
, NULL
, 0);
627 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
631 /* set the iterator and initialize */
632 uprv_memcpy(&uni
->api
, &unormIterator
, sizeof(unormIterator
));
637 initIndexes(uni
, iter
);
638 uni
->states
[uni
->api
.limit
]=uni
->state
=uiter_getState(iter
);
643 #endif /* uconfig.h switches */