]> git.saurik.com Git - apple/icu.git/blob - icuSources/samples/layout/UnicodeReader.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / samples / layout / UnicodeReader.cpp
1 /*
2 ******************************************************************************
3 * Copyright (C) 1998-2005, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
6 */
7
8 #include <errno.h>
9 #include <stdio.h>
10 #include <string.h>
11
12 #include "unicode/utypes.h"
13 #include "unicode/unistr.h"
14
15 #include "layout/LETypes.h"
16
17 #include "GUISupport.h"
18 #include "UnicodeReader.h"
19
20 #define BYTE(b) (((int) b) & 0xFF)
21
22 /*
23 * Read the text from a file. The text must start with a Unicode Byte
24 * Order Mark (BOM) so that we know what order to read the bytes in.
25 */
26 const UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount)
27 {
28 FILE *f;
29 int32_t fileSize;
30
31 UChar *charBuffer;
32 char *byteBuffer;
33 char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
34 char errorMessage[128];
35 const char *cp = "";
36 int32_t signatureLength = 0;
37
38 f = fopen(fileName, "rb");
39
40 if( f == NULL ) {
41 sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
42 guiSupport->postErrorMessage(errorMessage, "Text File Error");
43 return 0;
44 }
45
46 fseek(f, 0, SEEK_END);
47 fileSize = ftell(f);
48
49 fseek(f, 0, SEEK_SET);
50 fread(startBytes, sizeof(char), 4, f);
51
52 if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
53 cp = "UTF-16BE";
54 signatureLength = 2;
55 } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
56 if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
57 cp = "UTF-32LE";
58 signatureLength = 4;
59 } else {
60 cp = "UTF-16LE";
61 signatureLength = 2;
62 }
63 } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
64 cp = "UTF-8";
65 signatureLength = 3;
66 } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
67 cp = "SCSU";
68 signatureLength = 3;
69 } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
70 startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
71 cp = "UTF-32BE";
72 signatureLength = 4;
73 } else {
74 sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
75 BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
76 guiSupport->postErrorMessage(errorMessage, "Text File Error");
77 fclose(f);
78 return 0;
79 }
80
81 fileSize -= signatureLength;
82 fseek(f, signatureLength, SEEK_SET);
83 byteBuffer = new char[fileSize];
84
85 if(byteBuffer == 0) {
86 sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
87 guiSupport->postErrorMessage(errorMessage, "Text File Error");
88 fclose(f);
89 return 0;
90 }
91
92 fread(byteBuffer, sizeof(char), fileSize, f);
93 if( ferror(f) ) {
94 sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
95 guiSupport->postErrorMessage(errorMessage, "Text File Error");
96 fclose(f);
97 delete[] byteBuffer;
98 return 0;
99 }
100 fclose(f);
101
102 UnicodeString myText(byteBuffer, fileSize, cp);
103
104 delete[] byteBuffer;
105
106 charCount = myText.length();
107 charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
108 if(charBuffer == 0) {
109 sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
110 guiSupport->postErrorMessage(errorMessage, "Text File Error");
111 return 0;
112 }
113
114 myText.extract(0, myText.length(), charBuffer);
115 charBuffer[charCount] = 0; // NULL terminate for easier reading in the debugger
116
117 return charBuffer;
118 }
119