]> git.saurik.com Git - apple/icu.git/blob - icuSources/samples/layout/UnicodeReader.cpp
ICU-64243.0.1.tar.gz
[apple/icu.git] / icuSources / samples / layout / UnicodeReader.cpp
1 /*
2 ******************************************************************************
3 * © 2016 and later: Unicode, Inc. and others. *
4 * License & terms of use: http://www.unicode.org/copyright.html#License *
5 ******************************************************************************
6 ******************************************************************************
7 * Copyright (C) 1998-2005, International Business Machines Corporation and *
8 * others. All Rights Reserved. *
9 ******************************************************************************
10 */
11
12 #include <errno.h>
13 #include <stdio.h>
14 #include <string.h>
15
16 #include "unicode/utypes.h"
17 #include "unicode/unistr.h"
18
19 #include "layout/LETypes.h"
20
21 #include "GUISupport.h"
22 #include "UnicodeReader.h"
23
24 #define BYTE(b) (((int) b) & 0xFF)
25
26 /*
27 * Read the text from a file. The text must start with a Unicode Byte
28 * Order Mark (BOM) so that we know what order to read the bytes in.
29 */
30 const UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount)
31 {
32 FILE *f;
33 int32_t fileSize;
34
35 UChar *charBuffer;
36 char *byteBuffer;
37 char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
38 char errorMessage[128];
39 const char *cp = "";
40 int32_t signatureLength = 0;
41
42 f = fopen(fileName, "rb");
43
44 if( f == NULL ) {
45 sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
46 guiSupport->postErrorMessage(errorMessage, "Text File Error");
47 return 0;
48 }
49
50 fseek(f, 0, SEEK_END);
51 fileSize = ftell(f);
52
53 fseek(f, 0, SEEK_SET);
54 fread(startBytes, sizeof(char), 4, f);
55
56 if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
57 cp = "UTF-16BE";
58 signatureLength = 2;
59 } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
60 if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
61 cp = "UTF-32LE";
62 signatureLength = 4;
63 } else {
64 cp = "UTF-16LE";
65 signatureLength = 2;
66 }
67 } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
68 cp = "UTF-8";
69 signatureLength = 3;
70 } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
71 cp = "SCSU";
72 signatureLength = 3;
73 } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
74 startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
75 cp = "UTF-32BE";
76 signatureLength = 4;
77 } else {
78 sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
79 BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
80 guiSupport->postErrorMessage(errorMessage, "Text File Error");
81 fclose(f);
82 return 0;
83 }
84
85 fileSize -= signatureLength;
86 fseek(f, signatureLength, SEEK_SET);
87 byteBuffer = new char[fileSize];
88
89 if(byteBuffer == 0) {
90 sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
91 guiSupport->postErrorMessage(errorMessage, "Text File Error");
92 fclose(f);
93 return 0;
94 }
95
96 fread(byteBuffer, sizeof(char), fileSize, f);
97 if( ferror(f) ) {
98 sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
99 guiSupport->postErrorMessage(errorMessage, "Text File Error");
100 fclose(f);
101 delete[] byteBuffer;
102 return 0;
103 }
104 fclose(f);
105
106 UnicodeString myText(byteBuffer, fileSize, cp);
107
108 delete[] byteBuffer;
109
110 charCount = myText.length();
111 charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
112 if(charBuffer == 0) {
113 sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
114 guiSupport->postErrorMessage(errorMessage, "Text File Error");
115 return 0;
116 }
117
118 myText.extract(0, myText.length(), charBuffer);
119 charBuffer[charCount] = 0; // NULL terminate for easier reading in the debugger
120
121 return charBuffer;
122 }
123