]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/toolutil/ucbuf.c
ICU-6.2.10.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / ucbuf.c
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
374ca955 4* Copyright (C) 1998-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
9* File ucbuf.c
10*
11* Modification History:
12*
13* Date Name Description
14* 05/10/01 Ram Creation.
15*******************************************************************************
16*/
17
18#include "unicode/utypes.h"
374ca955 19#include "unicode/putil.h"
b75a7d8f
A
20#include "unicode/ucnv.h"
21#include "unicode/ucnv_err.h"
22#include "filestrm.h"
23#include "cstring.h"
24#include "cmemory.h"
25#include "ustrfmt.h"
26#include "unicode/ustring.h"
27#include "unicode/uchar.h"
28#include "ucbuf.h"
29#include <stdio.h>
30
31#define MAX_IN_BUF 1000
32#define MAX_U_BUF 1500
33#define CONTEXT_LEN 15
34
35struct UCHARBUF {
36 UChar* buffer;
37 UChar* currentPos;
38 UChar* bufLimit;
39 int32_t bufCapacity;
40 int32_t remaining;
41 int32_t signatureLength;
42 FileStream* in;
43 UConverter* conv;
44 UBool showWarning; /* makes this API not produce any errors */
45 UBool isBuffered;
46};
47
48U_CAPI UBool U_EXPORT2
49ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
50 char start[8];
51 int32_t numRead;
52
53 UChar target[1]={ 0 };
54 UChar* pTarget;
55 const char* pStart;
56
57 /* read a few bytes */
58 numRead=T_FileStream_read(in, start, sizeof(start));
59
60 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
61
62 /* unread the bytes beyond what was consumed for U+FEFF */
63 T_FileStream_rewind(in);
64 if (*signatureLength > 0) {
65 numRead = T_FileStream_read(in, start, *signatureLength);
66 }
67
68 if(*cp==NULL){
69 *conv =NULL;
70 return FALSE;
71 }
72
73 /* open the converter for the detected Unicode charset */
74 *conv = ucnv_open(*cp,error);
75
76 /* convert and ignore initial U+FEFF, and the buffer overflow */
77 pTarget = target;
78 pStart = start;
79 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
374ca955 80 *signatureLength = (int32_t)(pStart - start);
b75a7d8f
A
81 if(*error==U_BUFFER_OVERFLOW_ERROR) {
82 *error=U_ZERO_ERROR;
83 }
84
85 /* verify that we successfully read exactly U+FEFF */
86 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
87 *error=U_INTERNAL_PROGRAM_ERROR;
88 }
89
90
b75a7d8f
A
91 return TRUE;
92}
93static UBool ucbuf_isCPKnown(const char* cp){
94 if(ucnv_compareNames("UTF-8",cp)==0){
95 return TRUE;
96 }
97 if(ucnv_compareNames("UTF-16BE",cp)==0){
98 return TRUE;
99 }
100 if(ucnv_compareNames("UTF-16LE",cp)==0){
101 return TRUE;
102 }
103 if(ucnv_compareNames("UTF-16",cp)==0){
104 return TRUE;
105 }
106 if(ucnv_compareNames("UTF-32",cp)==0){
107 return TRUE;
108 }
109 if(ucnv_compareNames("UTF-32BE",cp)==0){
110 return TRUE;
111 }
112 if(ucnv_compareNames("UTF-32LE",cp)==0){
113 return TRUE;
114 }
b75a7d8f
A
115 if(ucnv_compareNames("SCSU",cp)==0){
116 return TRUE;
117 }
374ca955 118 if(ucnv_compareNames("BOCU-1",cp)==0){
b75a7d8f
A
119 return TRUE;
120 }
121 if(ucnv_compareNames("UTF-7",cp)==0){
122 return TRUE;
123 }
124 return FALSE;
125}
126
127U_CAPI FileStream * U_EXPORT2
128ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
129 FileStream* in=NULL;
130 if(error==NULL || U_FAILURE(*error)){
131 return NULL;
132 }
133 if(conv==NULL || cp==NULL || fileName==NULL){
134 *error = U_ILLEGAL_ARGUMENT_ERROR;
135 return NULL;
136 }
137 /* open the file */
138 in= T_FileStream_open(fileName,"rb");
139
140 if(in == NULL){
141 *error=U_FILE_ACCESS_ERROR;
142 return NULL;
143 }
144
145 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
146 return in;
147 } else {
148 ucnv_close(*conv);
149 *conv=NULL;
150 T_FileStream_close(in);
151 return NULL;
152 }
153}
154
155/* fill the uchar buffer */
156static UCHARBUF*
157ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
158 UChar* pTarget=NULL;
159 UChar* target=NULL;
160 const char* source=NULL;
161 char carr[MAX_IN_BUF] = {'\0'};
162 char* cbuf = carr;
163 int32_t inputRead=0;
164 int32_t outputWritten=0;
165 int32_t offset=0;
166 const char* sourceLimit =NULL;
167 int32_t cbufSize=0;
168 pTarget = buf->buffer;
169 /* check if we arrived here without exhausting the buffer*/
170 if(buf->currentPos<buf->bufLimit){
171 offset = (int32_t)(buf->bufLimit-buf->currentPos);
172 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
173 }
174
175#if DEBUG
176 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
177#endif
178 if(buf->isBuffered){
179 cbufSize = MAX_IN_BUF;
180 /* read the file */
181 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
182 buf->remaining-=inputRead;
183
184 }else{
185 cbufSize = T_FileStream_size(buf->in);
186 cbuf = (char*)uprv_malloc(cbufSize);
187 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
188 buf->remaining-=inputRead;
189 }
190
191 /* just to be sure...*/
192 if ( 0 == inputRead )
193 buf->remaining = 0;
194
195 target=pTarget;
196 /* convert the bytes */
197 if(buf->conv){
198 /* set the callback to stop */
199 UConverterToUCallback toUOldAction ;
200 void* toUOldContext;
201 void* toUNewContext=NULL;
202 ucnv_setToUCallBack(buf->conv,
203 UCNV_TO_U_CALLBACK_STOP,
204 toUNewContext,
205 &toUOldAction,
206 (const void**)&toUOldContext,
207 error);
208 /* since state is saved in the converter we add offset to source*/
209 target = pTarget+offset;
210 source = cbuf;
211 sourceLimit = source + inputRead;
212 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
213 &source,sourceLimit,NULL,
214 (UBool)(buf->remaining==0),error);
215
216 if(U_FAILURE(*error)){
217 char context[CONTEXT_LEN];
218 char preContext[CONTEXT_LEN];
219 char postContext[CONTEXT_LEN];
220 int8_t len = CONTEXT_LEN;
221 int32_t start=0;
222 int32_t stop =0;
223 int32_t pos =0;
224 /* use erro1 to preserve the error code */
225 UErrorCode error1 =U_ZERO_ERROR;
226
227 if( buf->showWarning==TRUE){
228 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
229 " converting input stream to target encoding: %s\n",
230 u_errorName(*error));
231 }
232
233
234 /* now get the context chars */
235 ucnv_getInvalidChars(buf->conv,context,&len,&error1);
236 context[len]= 0 ; /* null terminate the buffer */
237
238 pos = (int32_t)(source - cbuf - len);
239
240 /* for pre-context */
241 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
242 stop = pos-len;
243
244 memcpy(preContext,cbuf+start,stop-start);
245 /* null terminate the buffer */
246 preContext[stop-start] = 0;
247
248 /* for post-context */
249 start = pos+len;
250 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
251
252 memcpy(postContext,source,stop-start);
253 /* null terminate the buffer */
254 postContext[stop-start] = 0;
255
256 if(buf->showWarning ==TRUE){
257 /* print out the context */
258 fprintf(stderr,"\tPre-context: %s\n",preContext);
259 fprintf(stderr,"\tContext: %s\n",context);
260 fprintf(stderr,"\tPost-context: %s\n", postContext);
261 }
262
263 /* reset the converter */
264 ucnv_reset(buf->conv);
265
266 /* set the call back to substitute
267 * and restart conversion
268 */
269 ucnv_setToUCallBack(buf->conv,
270 UCNV_TO_U_CALLBACK_SUBSTITUTE,
271 toUNewContext,
272 &toUOldAction,
273 (const void**)&toUOldContext,
274 &error1);
275
276 /* reset source and target start positions */
277 target = pTarget+offset;
278 source = cbuf;
279
280 /* re convert */
281 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
282 &source,sourceLimit,NULL,
283 (UBool)(buf->remaining==0),&error1);
284
285 }
286 outputWritten = (int32_t)(target - pTarget);
287
288
289#if DEBUG
290 {
291 int i;
292 target = pTarget;
293 for(i=0;i<numRead;i++){
294 /* printf("%c", (char)(*target++));*/
295 }
296 }
297#endif
298
299 }else{
300 u_charsToUChars(cbuf,target+offset,inputRead);
301 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
302 }
303 buf->currentPos = pTarget;
304 buf->bufLimit=pTarget+outputWritten;
374ca955 305 *buf->bufLimit=0; /*NUL terminate*/
b75a7d8f
A
306 if(cbuf!=carr){
307 uprv_free(cbuf);
308 }
309 return buf;
310}
311
312
313
314/* get a UChar from the stream*/
315U_CAPI int32_t U_EXPORT2
316ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
317 if(error==NULL || U_FAILURE(*error)){
318 return FALSE;
319 }
320 if(buf->currentPos>=buf->bufLimit){
321 if(buf->remaining==0){
322 return U_EOF;
323 }
324 buf=ucbuf_fillucbuf(buf,error);
325 if(U_FAILURE(*error)){
326 return U_EOF;
327 }
328 }
329
330 return *(buf->currentPos++);
331}
332
333/* get a UChar32 from the stream*/
334U_CAPI int32_t U_EXPORT2
335ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
336 int32_t retVal = (int32_t)U_EOF;
337 if(error==NULL || U_FAILURE(*error)){
338 return FALSE;
339 }
340 if(buf->currentPos+1>=buf->bufLimit){
341 if(buf->remaining==0){
342 return U_EOF;
343 }
344 buf=ucbuf_fillucbuf(buf,error);
345 if(U_FAILURE(*error)){
346 return U_EOF;
347 }
348 }
349 if(UTF_IS_LEAD(*(buf->currentPos))){
350 retVal=UTF16_GET_PAIR_VALUE(*(buf->currentPos++),*(buf->currentPos++));
351 }else{
352 retVal = *(buf->currentPos++);
353 }
354 return retVal;
355}
356
357/* u_unescapeAt() callback to return a UChar*/
358static UChar U_CALLCONV
359_charAt(int32_t offset, void *context) {
360 return ((UCHARBUF*) context)->currentPos[offset];
361}
362
363/* getc and escape it */
364U_CAPI int32_t U_EXPORT2
365ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
366 int32_t length;
367 int32_t offset;
368 UChar32 c32,c1,c2;
369 if(error==NULL || U_FAILURE(*error)){
370 return FALSE;
371 }
372 /* Fill the buffer if it is empty */
373 if (buf->currentPos >=buf->bufLimit-2) {
374 ucbuf_fillucbuf(buf,error);
375 }
376
377 /* Get the next character in the buffer */
378 if (buf->currentPos < buf->bufLimit) {
379 c1 = *(buf->currentPos)++;
380 } else {
381 c1 = U_EOF;
382 }
383
384 c2 = *(buf->currentPos);
385
386 /* If it isn't a backslash, return it */
387 if (c1 != 0x005C) {
388 return c1;
389 }
390
391 /* Determine the amount of data in the buffer */
392 length = (int32_t)(buf->bufLimit - buf->currentPos);
393
394 /* The longest escape sequence is \Uhhhhhhhh; make sure
395 we have at least that many characters */
396 if (length < 10) {
397
398 /* fill the buffer */
399 ucbuf_fillucbuf(buf,error);
400 length = (int32_t)(buf->bufLimit - buf->buffer);
401 }
402
403 /* Process the escape */
404 offset = 0;
405 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
406
407 /* check if u_unescapeAt unescaped and converted
408 * to c32 or not
409 */
410 if(c32==0xFFFFFFFF){
374ca955
A
411 if(buf->showWarning) {
412 char context[20];
413 int32_t len = 20;
414 if(length < len) {
415 len = length;
416 }
417 context[len]= 0 ; /* null terminate the buffer */
418 u_UCharsToChars( buf->currentPos, context, len);
419 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
420 }
b75a7d8f
A
421 *error= U_ILLEGAL_ESCAPE_SEQUENCE;
422 return c1;
423 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
424 /* Update the current buffer position */
425 buf->currentPos += offset;
426 }else{
427 /* unescaping failed so we just return
428 * c1 and not consume the buffer
429 * this is useful for rules with escapes
430 * in resouce bundles
431 * eg: \' \\ \"
432 */
433 return c1;
434 }
435
436 return c32;
437}
438
439U_CAPI UCHARBUF* U_EXPORT2
440ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
441
442 FileStream* in = NULL;
443 int32_t fileSize=0;
444 const char* knownCp;
445 if(error==NULL || U_FAILURE(*error)){
446 return NULL;
447 }
448 if(cp==NULL || fileName==NULL){
449 *error = U_ILLEGAL_ARGUMENT_ERROR;
450 return FALSE;
451 }
452 if (!uprv_strcmp(fileName, "-")) {
453 in = T_FileStream_stdin();
454 }else{
455 in = T_FileStream_open(fileName, "rb");
456 }
457
458 if(in!=NULL){
459 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
460 fileSize = T_FileStream_size(in);
461 if(buf){
462 buf->in=in;
463 buf->conv=NULL;
464 buf->showWarning = showWarning;
465 buf->isBuffered = buffered;
466 buf->signatureLength=0;
467 if(*cp==NULL || **cp=='\0'){
468 /* don't have code page name... try to autodetect */
469 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
470 }else if(ucbuf_isCPKnown(*cp)){
471 /* discard BOM */
472 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
473 }
474 if(U_SUCCESS(*error) && buf->conv==NULL) {
475 buf->conv=ucnv_open(*cp,error);
476 }
477 if(U_FAILURE(*error)){
478 ucnv_close(buf->conv);
479 uprv_free(buf);
480 return NULL;
481 }
482
483 if((buf->conv==NULL) && (buf->showWarning==TRUE)){
484 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
485 }
486 buf->remaining=fileSize-buf->signatureLength;
487 if(buf->isBuffered){
b75a7d8f
A
488 buf->bufCapacity=MAX_U_BUF;
489 }else{
374ca955 490 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
b75a7d8f 491 }
374ca955 492 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
b75a7d8f
A
493 if (buf->buffer == NULL) {
494 *error = U_MEMORY_ALLOCATION_ERROR;
495 return NULL;
496 }
497 buf->currentPos=buf->buffer;
498 buf->bufLimit=buf->buffer;
499 if(U_FAILURE(*error)){
500 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
501 return NULL;
502 }
503 buf=ucbuf_fillucbuf(buf,error);
504 return buf;
505 }else{
506 *error = U_MEMORY_ALLOCATION_ERROR;
507 return NULL;
508 }
509
510 }
511 *error =U_FILE_ACCESS_ERROR;
512 return NULL;
513}
514
515
516
517/* TODO: this method will fail if at the
518 * begining of buffer and the uchar to unget
519 * is from the previous buffer. Need to implement
520 * system to take care of that situation.
521 */
522U_CAPI void U_EXPORT2
523ucbuf_ungetc(int32_t c,UCHARBUF* buf){
524 /* decrement currentPos pointer
525 * if not at the begining of buffer
526 */
527 UChar escaped[8] ={'\0'};
528 int32_t len =0;
529 if(c > 0xFFFF){
530 len = uprv_itou(escaped,8,c,16,8);
531 }else{
532 len=uprv_itou(escaped,8,c,16,4);
533 }
534 if(buf->currentPos!=buf->buffer){
535 if(*(buf->currentPos-1)==c){
536 buf->currentPos--;
537 }else if(u_strncmp(buf->currentPos-len,escaped,len) == 0){
538 while(--len>0){
539 buf->currentPos--;
540 }
541 }
542 }
543}
544
545/* frees the resources of UChar* buffer */
546static void
547ucbuf_closebuf(UCHARBUF* buf){
548 uprv_free(buf->buffer);
549 buf->buffer = NULL;
550}
551
552/* close the buf and release resources*/
553U_CAPI void U_EXPORT2
554ucbuf_close(UCHARBUF* buf){
555 if(buf!=NULL){
556 if(buf->conv){
557 ucnv_close(buf->conv);
558 }
559 T_FileStream_close(buf->in);
560 ucbuf_closebuf(buf);
561 uprv_free(buf);
562 }
563}
564
565/* rewind the buf and file stream */
566U_CAPI void U_EXPORT2
567ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
568 if(error==NULL || U_FAILURE(*error)){
569 return;
570 }
571 if(buf){
572 buf->currentPos=buf->buffer;
573 buf->bufLimit=buf->buffer;
574 T_FileStream_rewind(buf->in);
575 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
576
577 ucnv_resetToUnicode(buf->conv);
578 if(buf->signatureLength>0) {
579 UChar target[1]={ 0 };
580 UChar* pTarget;
581 char start[8];
582 const char* pStart;
583 int32_t numRead;
584
585 /* read the signature bytes */
586 numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
587
588 /* convert and ignore initial U+FEFF, and the buffer overflow */
589 pTarget = target;
590 pStart = start;
591 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
592 if(*error==U_BUFFER_OVERFLOW_ERROR) {
593 *error=U_ZERO_ERROR;
594 }
595
596 /* verify that we successfully read exactly U+FEFF */
597 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
598 *error=U_INTERNAL_PROGRAM_ERROR;
599 }
600 }
601 }
602}
603
604
605U_CAPI int32_t U_EXPORT2
606ucbuf_size(UCHARBUF* buf){
607 if(buf){
608 if(buf->isBuffered){
609 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
610 }else{
374ca955 611 return (int32_t)(buf->bufLimit - buf->buffer);
b75a7d8f
A
612 }
613 }
614 return 0;
615}
616
617U_CAPI const UChar* U_EXPORT2
618ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
619 if(error==NULL || U_FAILURE(*error)){
620 return NULL;
621 }
622 if(buf==NULL || len==NULL){
623 *error = U_ILLEGAL_ARGUMENT_ERROR;
624 return NULL;
625 }
374ca955 626 *len = (int32_t)(buf->bufLimit - buf->buffer);
b75a7d8f
A
627 return buf->buffer;
628}
629
630U_CAPI const char* U_EXPORT2
631ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
632 int32_t requiredLen = 0;
633 int32_t dirlen = 0;
634 int32_t filelen = 0;
635 if(status==NULL || U_FAILURE(*status)){
636 return NULL;
637 }
638
639 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
640 *status = U_ILLEGAL_ARGUMENT_ERROR;
641 return NULL;
642 }
643
644
645 dirlen = (int32_t)uprv_strlen(inputDir);
646 filelen = (int32_t)uprv_strlen(fileName);
647 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
648 requiredLen = dirlen + filelen + 2;
649 if((*len < requiredLen) || target==NULL){
650 *len = requiredLen;
651 *status = U_BUFFER_OVERFLOW_ERROR;
652 return NULL;
653 }
654
655 target[0] = '\0';
656 /*
657 * append the input dir to openFileName if the first char in
658 * filename is not file seperation char and the last char input directory is not '.'.
659 * This is to support :
660 * genrb -s. /home/icu/data
661 * genrb -s. icu/data
662 * The user cannot mix notations like
663 * genrb -s. /icu/data --- the absolute path specified. -s redundant
664 * user should use
665 * genrb -s. icu/data --- start from CWD and look in icu/data dir
666 */
667 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
668 uprv_strcpy(target, inputDir);
669 target[dirlen] = U_FILE_SEP_CHAR;
670 }
671 target[dirlen + 1] = '\0';
672 } else {
673 requiredLen = dirlen + filelen + 1;
674 if((*len < requiredLen) || target==NULL){
675 *len = requiredLen;
676 *status = U_BUFFER_OVERFLOW_ERROR;
677 return NULL;
678 }
679
680 uprv_strcpy(target, inputDir);
681 }
682
683 uprv_strcat(target, fileName);
684 return target;
685}
686/*
687 * Unicode TR 13 says any of the below chars is
688 * a new line char in a readline function in addition
689 * to CR+LF combination which needs to be
690 * handled seperately
691 */
692static UBool ucbuf_isCharNewLine(UChar c){
693 switch(c){
694 case 0x000A: /* LF */
695 case 0x000D: /* CR */
696 case 0x000C: /* FF */
697 case 0x0085: /* NEL */
698 case 0x2028: /* LS */
699 case 0x2029: /* PS */
700 return TRUE;
701 default:
702 return FALSE;
703 }
704}
705
706U_CAPI const UChar* U_EXPORT2
707ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
708 UChar* temp = buf->currentPos;
709 UChar* savePos =NULL;
710 UChar c=0x0000;
711 if(buf->isBuffered){
374ca955
A
712 /* The input is buffered we have to do more
713 * for returning a pointer U_TRUNCATED_CHAR_FOUND
b75a7d8f
A
714 */
715 for(;;){
716 c = *temp++;
717 if(buf->remaining==0){
374ca955 718 return NULL; /* end of file is reached return NULL */
b75a7d8f
A
719 }
720 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
721 *err= U_TRUNCATED_CHAR_FOUND;
722 return NULL;
723 }else{
724 ucbuf_fillucbuf(buf,err);
725 if(U_FAILURE(*err)){
374ca955 726 return NULL;
b75a7d8f
A
727 }
728 }
729 /*
730 * Accoding to TR 13 readLine functions must interpret
731 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
732 */
733 /* Windows CR LF */
734 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
374ca955 735 *len = (int32_t)(temp++ - buf->currentPos);
b75a7d8f
A
736 savePos = buf->currentPos;
737 buf->currentPos = temp;
738 return savePos;
739 }
740 /* else */
741
742 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */
374ca955 743 *len = (int32_t)(temp - buf->currentPos);
b75a7d8f
A
744 savePos = buf->currentPos;
745 buf->currentPos = temp;
746 return savePos;
747 }
748 }
749 }else{
750 /* we know that all input is read into the internal
751 * buffer so we can safely return pointers
752 */
753 for(;;){
754 c = *temp++;
755
756 if(buf->currentPos==buf->bufLimit){
374ca955 757 return NULL; /* end of file is reached return NULL */
b75a7d8f
A
758 }
759 /* Windows CR LF */
760 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
374ca955 761 *len = (int32_t)(temp++ - buf->currentPos);
b75a7d8f
A
762 savePos = buf->currentPos;
763 buf->currentPos = temp;
764 return savePos;
765 }
766 /* else */
767 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */
374ca955 768 *len = (int32_t)(temp - buf->currentPos);
b75a7d8f
A
769 savePos = buf->currentPos;
770 buf->currentPos = temp;
771 return savePos;
772 }
773 }
774 }
775 /* not reached */
776 /* A compiler warning will appear if all paths don't contain a return statement. */
777/* return NULL;*/
778}