]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/toolutil/ucbuf.c
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / ucbuf.c
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
73c04bcf 4* Copyright (C) 1998-2006, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
9* File ucbuf.c
10*
11* Modification History:
12*
13* Date Name Description
14* 05/10/01 Ram Creation.
15*******************************************************************************
16*/
17
18#include "unicode/utypes.h"
374ca955 19#include "unicode/putil.h"
b75a7d8f
A
20#include "unicode/ucnv.h"
21#include "unicode/ucnv_err.h"
22#include "filestrm.h"
23#include "cstring.h"
24#include "cmemory.h"
25#include "ustrfmt.h"
26#include "unicode/ustring.h"
27#include "unicode/uchar.h"
28#include "ucbuf.h"
29#include <stdio.h>
30
73c04bcf
A
31#if !UCONFIG_NO_CONVERSION
32
33
b75a7d8f
A
34#define MAX_IN_BUF 1000
35#define MAX_U_BUF 1500
36#define CONTEXT_LEN 15
37
38struct UCHARBUF {
39 UChar* buffer;
40 UChar* currentPos;
41 UChar* bufLimit;
42 int32_t bufCapacity;
43 int32_t remaining;
44 int32_t signatureLength;
45 FileStream* in;
46 UConverter* conv;
47 UBool showWarning; /* makes this API not produce any errors */
48 UBool isBuffered;
49};
50
51U_CAPI UBool U_EXPORT2
52ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
53 char start[8];
54 int32_t numRead;
55
56 UChar target[1]={ 0 };
57 UChar* pTarget;
58 const char* pStart;
59
60 /* read a few bytes */
61 numRead=T_FileStream_read(in, start, sizeof(start));
62
63 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
64
65 /* unread the bytes beyond what was consumed for U+FEFF */
66 T_FileStream_rewind(in);
67 if (*signatureLength > 0) {
68 numRead = T_FileStream_read(in, start, *signatureLength);
69 }
70
71 if(*cp==NULL){
72 *conv =NULL;
73 return FALSE;
74 }
75
76 /* open the converter for the detected Unicode charset */
77 *conv = ucnv_open(*cp,error);
78
79 /* convert and ignore initial U+FEFF, and the buffer overflow */
80 pTarget = target;
81 pStart = start;
82 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
374ca955 83 *signatureLength = (int32_t)(pStart - start);
b75a7d8f
A
84 if(*error==U_BUFFER_OVERFLOW_ERROR) {
85 *error=U_ZERO_ERROR;
86 }
87
88 /* verify that we successfully read exactly U+FEFF */
89 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
90 *error=U_INTERNAL_PROGRAM_ERROR;
91 }
92
93
b75a7d8f
A
94 return TRUE;
95}
96static UBool ucbuf_isCPKnown(const char* cp){
97 if(ucnv_compareNames("UTF-8",cp)==0){
98 return TRUE;
99 }
100 if(ucnv_compareNames("UTF-16BE",cp)==0){
101 return TRUE;
102 }
103 if(ucnv_compareNames("UTF-16LE",cp)==0){
104 return TRUE;
105 }
106 if(ucnv_compareNames("UTF-16",cp)==0){
107 return TRUE;
108 }
109 if(ucnv_compareNames("UTF-32",cp)==0){
110 return TRUE;
111 }
112 if(ucnv_compareNames("UTF-32BE",cp)==0){
113 return TRUE;
114 }
115 if(ucnv_compareNames("UTF-32LE",cp)==0){
116 return TRUE;
117 }
b75a7d8f
A
118 if(ucnv_compareNames("SCSU",cp)==0){
119 return TRUE;
120 }
374ca955 121 if(ucnv_compareNames("BOCU-1",cp)==0){
b75a7d8f
A
122 return TRUE;
123 }
124 if(ucnv_compareNames("UTF-7",cp)==0){
125 return TRUE;
126 }
127 return FALSE;
128}
129
130U_CAPI FileStream * U_EXPORT2
131ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
132 FileStream* in=NULL;
133 if(error==NULL || U_FAILURE(*error)){
134 return NULL;
135 }
136 if(conv==NULL || cp==NULL || fileName==NULL){
137 *error = U_ILLEGAL_ARGUMENT_ERROR;
138 return NULL;
139 }
140 /* open the file */
141 in= T_FileStream_open(fileName,"rb");
142
143 if(in == NULL){
144 *error=U_FILE_ACCESS_ERROR;
145 return NULL;
146 }
147
148 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
149 return in;
150 } else {
151 ucnv_close(*conv);
152 *conv=NULL;
153 T_FileStream_close(in);
154 return NULL;
155 }
156}
157
158/* fill the uchar buffer */
159static UCHARBUF*
160ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
161 UChar* pTarget=NULL;
162 UChar* target=NULL;
163 const char* source=NULL;
164 char carr[MAX_IN_BUF] = {'\0'};
165 char* cbuf = carr;
166 int32_t inputRead=0;
167 int32_t outputWritten=0;
168 int32_t offset=0;
169 const char* sourceLimit =NULL;
170 int32_t cbufSize=0;
171 pTarget = buf->buffer;
172 /* check if we arrived here without exhausting the buffer*/
173 if(buf->currentPos<buf->bufLimit){
174 offset = (int32_t)(buf->bufLimit-buf->currentPos);
175 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
176 }
177
178#if DEBUG
179 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
180#endif
181 if(buf->isBuffered){
182 cbufSize = MAX_IN_BUF;
183 /* read the file */
184 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
185 buf->remaining-=inputRead;
186
187 }else{
188 cbufSize = T_FileStream_size(buf->in);
189 cbuf = (char*)uprv_malloc(cbufSize);
190 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
191 buf->remaining-=inputRead;
192 }
193
194 /* just to be sure...*/
195 if ( 0 == inputRead )
196 buf->remaining = 0;
197
198 target=pTarget;
199 /* convert the bytes */
200 if(buf->conv){
201 /* set the callback to stop */
202 UConverterToUCallback toUOldAction ;
203 void* toUOldContext;
204 void* toUNewContext=NULL;
205 ucnv_setToUCallBack(buf->conv,
206 UCNV_TO_U_CALLBACK_STOP,
207 toUNewContext,
208 &toUOldAction,
209 (const void**)&toUOldContext,
210 error);
211 /* since state is saved in the converter we add offset to source*/
212 target = pTarget+offset;
213 source = cbuf;
214 sourceLimit = source + inputRead;
215 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
216 &source,sourceLimit,NULL,
217 (UBool)(buf->remaining==0),error);
218
219 if(U_FAILURE(*error)){
220 char context[CONTEXT_LEN];
221 char preContext[CONTEXT_LEN];
222 char postContext[CONTEXT_LEN];
223 int8_t len = CONTEXT_LEN;
224 int32_t start=0;
225 int32_t stop =0;
226 int32_t pos =0;
227 /* use erro1 to preserve the error code */
228 UErrorCode error1 =U_ZERO_ERROR;
229
230 if( buf->showWarning==TRUE){
231 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
232 " converting input stream to target encoding: %s\n",
233 u_errorName(*error));
234 }
235
236
237 /* now get the context chars */
238 ucnv_getInvalidChars(buf->conv,context,&len,&error1);
239 context[len]= 0 ; /* null terminate the buffer */
240
241 pos = (int32_t)(source - cbuf - len);
242
243 /* for pre-context */
244 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
245 stop = pos-len;
246
247 memcpy(preContext,cbuf+start,stop-start);
248 /* null terminate the buffer */
249 preContext[stop-start] = 0;
250
251 /* for post-context */
252 start = pos+len;
253 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
254
255 memcpy(postContext,source,stop-start);
256 /* null terminate the buffer */
257 postContext[stop-start] = 0;
258
259 if(buf->showWarning ==TRUE){
260 /* print out the context */
261 fprintf(stderr,"\tPre-context: %s\n",preContext);
262 fprintf(stderr,"\tContext: %s\n",context);
263 fprintf(stderr,"\tPost-context: %s\n", postContext);
264 }
265
266 /* reset the converter */
267 ucnv_reset(buf->conv);
268
269 /* set the call back to substitute
270 * and restart conversion
271 */
272 ucnv_setToUCallBack(buf->conv,
273 UCNV_TO_U_CALLBACK_SUBSTITUTE,
274 toUNewContext,
275 &toUOldAction,
276 (const void**)&toUOldContext,
277 &error1);
278
279 /* reset source and target start positions */
280 target = pTarget+offset;
281 source = cbuf;
282
283 /* re convert */
284 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
285 &source,sourceLimit,NULL,
286 (UBool)(buf->remaining==0),&error1);
287
288 }
289 outputWritten = (int32_t)(target - pTarget);
290
291
292#if DEBUG
293 {
294 int i;
295 target = pTarget;
296 for(i=0;i<numRead;i++){
297 /* printf("%c", (char)(*target++));*/
298 }
299 }
300#endif
301
302 }else{
303 u_charsToUChars(cbuf,target+offset,inputRead);
304 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
305 }
306 buf->currentPos = pTarget;
307 buf->bufLimit=pTarget+outputWritten;
374ca955 308 *buf->bufLimit=0; /*NUL terminate*/
b75a7d8f
A
309 if(cbuf!=carr){
310 uprv_free(cbuf);
311 }
312 return buf;
313}
314
315
316
317/* get a UChar from the stream*/
318U_CAPI int32_t U_EXPORT2
319ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
320 if(error==NULL || U_FAILURE(*error)){
321 return FALSE;
322 }
323 if(buf->currentPos>=buf->bufLimit){
324 if(buf->remaining==0){
325 return U_EOF;
326 }
327 buf=ucbuf_fillucbuf(buf,error);
328 if(U_FAILURE(*error)){
329 return U_EOF;
330 }
331 }
332
333 return *(buf->currentPos++);
334}
335
336/* get a UChar32 from the stream*/
337U_CAPI int32_t U_EXPORT2
338ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
339 int32_t retVal = (int32_t)U_EOF;
340 if(error==NULL || U_FAILURE(*error)){
341 return FALSE;
342 }
343 if(buf->currentPos+1>=buf->bufLimit){
344 if(buf->remaining==0){
345 return U_EOF;
346 }
347 buf=ucbuf_fillucbuf(buf,error);
348 if(U_FAILURE(*error)){
349 return U_EOF;
350 }
351 }
352 if(UTF_IS_LEAD(*(buf->currentPos))){
353 retVal=UTF16_GET_PAIR_VALUE(*(buf->currentPos++),*(buf->currentPos++));
354 }else{
355 retVal = *(buf->currentPos++);
356 }
357 return retVal;
358}
359
360/* u_unescapeAt() callback to return a UChar*/
361static UChar U_CALLCONV
362_charAt(int32_t offset, void *context) {
363 return ((UCHARBUF*) context)->currentPos[offset];
364}
365
366/* getc and escape it */
367U_CAPI int32_t U_EXPORT2
368ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
369 int32_t length;
370 int32_t offset;
371 UChar32 c32,c1,c2;
372 if(error==NULL || U_FAILURE(*error)){
373 return FALSE;
374 }
375 /* Fill the buffer if it is empty */
376 if (buf->currentPos >=buf->bufLimit-2) {
377 ucbuf_fillucbuf(buf,error);
378 }
379
380 /* Get the next character in the buffer */
381 if (buf->currentPos < buf->bufLimit) {
382 c1 = *(buf->currentPos)++;
383 } else {
384 c1 = U_EOF;
385 }
386
387 c2 = *(buf->currentPos);
388
389 /* If it isn't a backslash, return it */
390 if (c1 != 0x005C) {
391 return c1;
392 }
393
394 /* Determine the amount of data in the buffer */
395 length = (int32_t)(buf->bufLimit - buf->currentPos);
396
397 /* The longest escape sequence is \Uhhhhhhhh; make sure
398 we have at least that many characters */
399 if (length < 10) {
400
401 /* fill the buffer */
402 ucbuf_fillucbuf(buf,error);
403 length = (int32_t)(buf->bufLimit - buf->buffer);
404 }
405
406 /* Process the escape */
407 offset = 0;
408 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
409
410 /* check if u_unescapeAt unescaped and converted
411 * to c32 or not
412 */
413 if(c32==0xFFFFFFFF){
374ca955
A
414 if(buf->showWarning) {
415 char context[20];
416 int32_t len = 20;
417 if(length < len) {
418 len = length;
419 }
420 context[len]= 0 ; /* null terminate the buffer */
421 u_UCharsToChars( buf->currentPos, context, len);
422 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
423 }
b75a7d8f
A
424 *error= U_ILLEGAL_ESCAPE_SEQUENCE;
425 return c1;
426 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
427 /* Update the current buffer position */
428 buf->currentPos += offset;
429 }else{
430 /* unescaping failed so we just return
431 * c1 and not consume the buffer
432 * this is useful for rules with escapes
433 * in resouce bundles
434 * eg: \' \\ \"
435 */
436 return c1;
437 }
438
439 return c32;
440}
441
442U_CAPI UCHARBUF* U_EXPORT2
443ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
444
445 FileStream* in = NULL;
446 int32_t fileSize=0;
447 const char* knownCp;
448 if(error==NULL || U_FAILURE(*error)){
449 return NULL;
450 }
451 if(cp==NULL || fileName==NULL){
452 *error = U_ILLEGAL_ARGUMENT_ERROR;
453 return FALSE;
454 }
455 if (!uprv_strcmp(fileName, "-")) {
456 in = T_FileStream_stdin();
457 }else{
458 in = T_FileStream_open(fileName, "rb");
459 }
460
461 if(in!=NULL){
462 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
463 fileSize = T_FileStream_size(in);
73c04bcf
A
464 if(buf == NULL){
465 *error = U_MEMORY_ALLOCATION_ERROR;
466 T_FileStream_close(in);
467 return NULL;
468 }
469 buf->in=in;
470 buf->conv=NULL;
471 buf->showWarning = showWarning;
472 buf->isBuffered = buffered;
473 buf->signatureLength=0;
474 if(*cp==NULL || **cp=='\0'){
475 /* don't have code page name... try to autodetect */
476 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
477 }else if(ucbuf_isCPKnown(*cp)){
478 /* discard BOM */
479 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
480 }
481 if(U_SUCCESS(*error) && buf->conv==NULL) {
482 buf->conv=ucnv_open(*cp,error);
483 }
484 if(U_FAILURE(*error)){
485 ucnv_close(buf->conv);
486 uprv_free(buf);
487 T_FileStream_close(in);
488 return NULL;
489 }
490
491 if((buf->conv==NULL) && (buf->showWarning==TRUE)){
492 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
493 }
494 buf->remaining=fileSize-buf->signatureLength;
495 if(buf->isBuffered){
496 buf->bufCapacity=MAX_U_BUF;
b75a7d8f 497 }else{
73c04bcf
A
498 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
499 }
500 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
501 if (buf->buffer == NULL) {
b75a7d8f 502 *error = U_MEMORY_ALLOCATION_ERROR;
73c04bcf
A
503 ucnv_close(buf->conv);
504 uprv_free(buf);
505 T_FileStream_close(in);
b75a7d8f
A
506 return NULL;
507 }
73c04bcf
A
508 buf->currentPos=buf->buffer;
509 buf->bufLimit=buf->buffer;
510 if(U_FAILURE(*error)){
511 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
512 ucnv_close(buf->conv);
513 uprv_free(buf);
514 T_FileStream_close(in);
515 return NULL;
516 }
517 buf=ucbuf_fillucbuf(buf,error);
518 return buf;
b75a7d8f
A
519 }
520 *error =U_FILE_ACCESS_ERROR;
521 return NULL;
522}
523
524
525
526/* TODO: this method will fail if at the
527 * begining of buffer and the uchar to unget
528 * is from the previous buffer. Need to implement
529 * system to take care of that situation.
530 */
531U_CAPI void U_EXPORT2
532ucbuf_ungetc(int32_t c,UCHARBUF* buf){
533 /* decrement currentPos pointer
534 * if not at the begining of buffer
535 */
536 UChar escaped[8] ={'\0'};
537 int32_t len =0;
538 if(c > 0xFFFF){
539 len = uprv_itou(escaped,8,c,16,8);
540 }else{
541 len=uprv_itou(escaped,8,c,16,4);
542 }
543 if(buf->currentPos!=buf->buffer){
544 if(*(buf->currentPos-1)==c){
545 buf->currentPos--;
546 }else if(u_strncmp(buf->currentPos-len,escaped,len) == 0){
547 while(--len>0){
548 buf->currentPos--;
549 }
550 }
551 }
552}
553
554/* frees the resources of UChar* buffer */
555static void
556ucbuf_closebuf(UCHARBUF* buf){
557 uprv_free(buf->buffer);
558 buf->buffer = NULL;
559}
560
561/* close the buf and release resources*/
562U_CAPI void U_EXPORT2
563ucbuf_close(UCHARBUF* buf){
564 if(buf!=NULL){
565 if(buf->conv){
566 ucnv_close(buf->conv);
567 }
568 T_FileStream_close(buf->in);
569 ucbuf_closebuf(buf);
570 uprv_free(buf);
571 }
572}
573
574/* rewind the buf and file stream */
575U_CAPI void U_EXPORT2
576ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
577 if(error==NULL || U_FAILURE(*error)){
578 return;
579 }
580 if(buf){
581 buf->currentPos=buf->buffer;
582 buf->bufLimit=buf->buffer;
583 T_FileStream_rewind(buf->in);
584 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
585
586 ucnv_resetToUnicode(buf->conv);
587 if(buf->signatureLength>0) {
588 UChar target[1]={ 0 };
589 UChar* pTarget;
590 char start[8];
591 const char* pStart;
592 int32_t numRead;
593
594 /* read the signature bytes */
595 numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
596
597 /* convert and ignore initial U+FEFF, and the buffer overflow */
598 pTarget = target;
599 pStart = start;
600 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
601 if(*error==U_BUFFER_OVERFLOW_ERROR) {
602 *error=U_ZERO_ERROR;
603 }
604
605 /* verify that we successfully read exactly U+FEFF */
606 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
607 *error=U_INTERNAL_PROGRAM_ERROR;
608 }
609 }
610 }
611}
612
613
614U_CAPI int32_t U_EXPORT2
615ucbuf_size(UCHARBUF* buf){
616 if(buf){
617 if(buf->isBuffered){
618 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
619 }else{
374ca955 620 return (int32_t)(buf->bufLimit - buf->buffer);
b75a7d8f
A
621 }
622 }
623 return 0;
624}
625
626U_CAPI const UChar* U_EXPORT2
627ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
628 if(error==NULL || U_FAILURE(*error)){
629 return NULL;
630 }
631 if(buf==NULL || len==NULL){
632 *error = U_ILLEGAL_ARGUMENT_ERROR;
633 return NULL;
634 }
374ca955 635 *len = (int32_t)(buf->bufLimit - buf->buffer);
b75a7d8f
A
636 return buf->buffer;
637}
638
639U_CAPI const char* U_EXPORT2
640ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
641 int32_t requiredLen = 0;
642 int32_t dirlen = 0;
643 int32_t filelen = 0;
644 if(status==NULL || U_FAILURE(*status)){
645 return NULL;
646 }
647
648 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
649 *status = U_ILLEGAL_ARGUMENT_ERROR;
650 return NULL;
651 }
652
653
654 dirlen = (int32_t)uprv_strlen(inputDir);
655 filelen = (int32_t)uprv_strlen(fileName);
656 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
657 requiredLen = dirlen + filelen + 2;
658 if((*len < requiredLen) || target==NULL){
659 *len = requiredLen;
660 *status = U_BUFFER_OVERFLOW_ERROR;
661 return NULL;
662 }
663
664 target[0] = '\0';
665 /*
666 * append the input dir to openFileName if the first char in
667 * filename is not file seperation char and the last char input directory is not '.'.
668 * This is to support :
669 * genrb -s. /home/icu/data
670 * genrb -s. icu/data
671 * The user cannot mix notations like
672 * genrb -s. /icu/data --- the absolute path specified. -s redundant
673 * user should use
674 * genrb -s. icu/data --- start from CWD and look in icu/data dir
675 */
676 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
677 uprv_strcpy(target, inputDir);
678 target[dirlen] = U_FILE_SEP_CHAR;
679 }
680 target[dirlen + 1] = '\0';
681 } else {
682 requiredLen = dirlen + filelen + 1;
683 if((*len < requiredLen) || target==NULL){
684 *len = requiredLen;
685 *status = U_BUFFER_OVERFLOW_ERROR;
686 return NULL;
687 }
688
689 uprv_strcpy(target, inputDir);
690 }
691
692 uprv_strcat(target, fileName);
693 return target;
694}
695/*
696 * Unicode TR 13 says any of the below chars is
697 * a new line char in a readline function in addition
698 * to CR+LF combination which needs to be
699 * handled seperately
700 */
701static UBool ucbuf_isCharNewLine(UChar c){
702 switch(c){
703 case 0x000A: /* LF */
704 case 0x000D: /* CR */
705 case 0x000C: /* FF */
706 case 0x0085: /* NEL */
707 case 0x2028: /* LS */
708 case 0x2029: /* PS */
709 return TRUE;
710 default:
711 return FALSE;
712 }
713}
714
715U_CAPI const UChar* U_EXPORT2
716ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
717 UChar* temp = buf->currentPos;
718 UChar* savePos =NULL;
719 UChar c=0x0000;
720 if(buf->isBuffered){
374ca955
A
721 /* The input is buffered we have to do more
722 * for returning a pointer U_TRUNCATED_CHAR_FOUND
b75a7d8f
A
723 */
724 for(;;){
725 c = *temp++;
726 if(buf->remaining==0){
374ca955 727 return NULL; /* end of file is reached return NULL */
b75a7d8f
A
728 }
729 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
730 *err= U_TRUNCATED_CHAR_FOUND;
731 return NULL;
732 }else{
733 ucbuf_fillucbuf(buf,err);
734 if(U_FAILURE(*err)){
374ca955 735 return NULL;
b75a7d8f
A
736 }
737 }
738 /*
739 * Accoding to TR 13 readLine functions must interpret
740 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
741 */
742 /* Windows CR LF */
743 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
374ca955 744 *len = (int32_t)(temp++ - buf->currentPos);
b75a7d8f
A
745 savePos = buf->currentPos;
746 buf->currentPos = temp;
747 return savePos;
748 }
749 /* else */
750
751 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */
374ca955 752 *len = (int32_t)(temp - buf->currentPos);
b75a7d8f
A
753 savePos = buf->currentPos;
754 buf->currentPos = temp;
755 return savePos;
756 }
757 }
758 }else{
759 /* we know that all input is read into the internal
760 * buffer so we can safely return pointers
761 */
762 for(;;){
763 c = *temp++;
764
765 if(buf->currentPos==buf->bufLimit){
374ca955 766 return NULL; /* end of file is reached return NULL */
b75a7d8f
A
767 }
768 /* Windows CR LF */
769 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
374ca955 770 *len = (int32_t)(temp++ - buf->currentPos);
b75a7d8f
A
771 savePos = buf->currentPos;
772 buf->currentPos = temp;
773 return savePos;
774 }
775 /* else */
776 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */
374ca955 777 *len = (int32_t)(temp - buf->currentPos);
b75a7d8f
A
778 savePos = buf->currentPos;
779 buf->currentPos = temp;
780 return savePos;
781 }
782 }
783 }
784 /* not reached */
785 /* A compiler warning will appear if all paths don't contain a return statement. */
786/* return NULL;*/
787}
73c04bcf 788#endif