icuSources/samples/layout/UnicodeReader.cpp

   1 /*
   2  ******************************************************************************
   3  * © 2016 and later: Unicode, Inc. and others.                    *
   4  * License & terms of use: http://www.unicode.org/copyright.html#License      *
   5  ******************************************************************************
   6  ******************************************************************************
   7  * Copyright (C) 1998-2005, International Business Machines Corporation and   *
   8  * others. All Rights Reserved.                                               *
   9  ******************************************************************************
  10  */
  11
  12 #include <errno.h>
  13 #include <stdio.h>
  14 #include <string.h>
  15
  16 #include "unicode/utypes.h"
  17 #include "unicode/unistr.h"
  18
  19 #include "layout/LETypes.h"
  20
  21 #include "GUISupport.h"
  22 #include "UnicodeReader.h"
  23
  24 #define BYTE(b) (((int) b) & 0xFF)
  25
  26 /*
  27  * Read the text from a file. The text must start with a Unicode Byte
  28  * Order Mark (BOM) so that we know what order to read the bytes in.
  29  */
  30 const UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount)
  31 {
  32     FILE *f;
  33     int32_t fileSize;
  34
  35     UChar *charBuffer;
  36     char *byteBuffer;
  37     char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
  38     char errorMessage[128];
  39     const char *cp = "";
  40     int32_t signatureLength = 0;
  41
  42     f = fopen(fileName, "rb");
  43
  44     if( f == NULL ) {
  45         sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
  46         guiSupport->postErrorMessage(errorMessage, "Text File Error");
  47         return 0;
  48     }
  49
  50     fseek(f, 0, SEEK_END);
  51     fileSize = ftell(f);
  52
  53     fseek(f, 0, SEEK_SET);
  54     fread(startBytes, sizeof(char), 4, f);
  55
  56     if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
  57         cp = "UTF-16BE";
  58         signatureLength = 2;
  59     } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
  60         if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
  61             cp = "UTF-32LE";
  62             signatureLength = 4;
  63         } else {
  64             cp = "UTF-16LE";
  65             signatureLength = 2;
  66         }
  67     } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
  68         cp = "UTF-8";
  69         signatureLength = 3;
  70     } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
  71         cp = "SCSU";
  72         signatureLength = 3;
  73     } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
  74         startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
  75         cp = "UTF-32BE";
  76         signatureLength = 4;
  77     } else {
  78         sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
  79                     BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
  80         guiSupport->postErrorMessage(errorMessage, "Text File Error");
  81         fclose(f);
  82         return 0;
  83     }
  84
  85     fileSize -= signatureLength;
  86     fseek(f, signatureLength, SEEK_SET);
  87     byteBuffer = new char[fileSize];
  88
  89     if(byteBuffer == 0) {
  90         sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
  91         guiSupport->postErrorMessage(errorMessage, "Text File Error");
  92         fclose(f);
  93         return 0;
  94     }
  95
  96     fread(byteBuffer, sizeof(char), fileSize, f);
  97     if( ferror(f) ) {
  98         sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
  99         guiSupport->postErrorMessage(errorMessage, "Text File Error");
 100         fclose(f);
 101         delete[] byteBuffer;
 102         return 0;
 103     }
 104     fclose(f);
 105
 106     UnicodeString myText(byteBuffer, fileSize, cp);
 107
 108     delete[] byteBuffer;
 109
 110     charCount = myText.length();
 111     charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
 112     if(charBuffer == 0) {
 113         sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
 114         guiSupport->postErrorMessage(errorMessage, "Text File Error");
 115         return 0;
 116     }
 117
 118     myText.extract(0, myText.length(), charBuffer);
 119     charBuffer[charCount] = 0;    // NULL terminate for easier reading in the debugger
 120
 121     return charBuffer;
 122 }
 123