[apple/icu.git] / icuSources / samples / layout / UnicodeReader.cpp

/*
 ******************************************************************************
 * Copyright (C) 1998-2005, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 */

#include <errno.h>
#include <stdio.h>
#include <string.h>

#include "unicode/utypes.h"
#include "unicode/unistr.h"

#include "layout/LETypes.h"

#include "GUISupport.h"
#include "UnicodeReader.h"

#define BYTE(b) (((int) b) & 0xFF)

/*
 * Read the text from a file. The text must start with a Unicode Byte
 * Order Mark (BOM) so that we know what order to read the bytes in.
 */
const UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount)
{
    FILE *f;
    int32_t fileSize;
    
    UChar *charBuffer;
    char *byteBuffer;
    char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
    char errorMessage[128];
    const char *cp = "";
    int32_t signatureLength = 0;
    
    f = fopen(fileName, "rb");
    
    if( f == NULL ) {
        sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
        guiSupport->postErrorMessage(errorMessage, "Text File Error");
        return 0;
    }
    
    fseek(f, 0, SEEK_END);
    fileSize = ftell(f);

    fseek(f, 0, SEEK_SET);
    fread(startBytes, sizeof(char), 4, f);

    if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
        cp = "UTF-16BE";
        signatureLength = 2;
    } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
        if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
            cp = "UTF-32LE";
            signatureLength = 4;
        } else {
            cp = "UTF-16LE";
            signatureLength = 2;
        }
    } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
        cp = "UTF-8";
        signatureLength = 3;
    } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
        cp = "SCSU";
        signatureLength = 3;
    } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
        startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
        cp = "UTF-32BE";
        signatureLength = 4;
    } else {
        sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
                    BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
        guiSupport->postErrorMessage(errorMessage, "Text File Error");
        fclose(f);
        return 0;
    }
        
    fileSize -= signatureLength;
    fseek(f, signatureLength, SEEK_SET);
    byteBuffer = new char[fileSize];
    
    if(byteBuffer == 0) {
        sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
        guiSupport->postErrorMessage(errorMessage, "Text File Error");
        fclose(f);
        return 0;
    }
    
    fread(byteBuffer, sizeof(char), fileSize, f);
    if( ferror(f) ) {
        sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
        guiSupport->postErrorMessage(errorMessage, "Text File Error");
        fclose(f);
        delete[] byteBuffer;
        return 0;
    }
    fclose(f);
    
    UnicodeString myText(byteBuffer, fileSize, cp);

    delete[] byteBuffer;
    
    charCount = myText.length();
    charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
    if(charBuffer == 0) {
        sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
        guiSupport->postErrorMessage(errorMessage, "Text File Error");
        return 0;
    }
    
    myText.extract(0, myText.length(), charBuffer);
    charBuffer[charCount] = 0;    // NULL terminate for easier reading in the debugger
    
    return charBuffer;
}
Commit	Line	Data
b75a7d8f A	1	/*
b75a7d8f A	2	******************************************************************************
73c04bcf	3	* Copyright (C) 1998-2005, International Business Machines Corporation and *
b75a7d8f A	4	* others. All Rights Reserved. *
	5	******************************************************************************
	6	*/
	7
	8	#include <errno.h>
	9	#include <stdio.h>
	10	#include <string.h>
	11
	12	#include "unicode/utypes.h"
	13	#include "unicode/unistr.h"
	14
	15	#include "layout/LETypes.h"
	16
	17	#include "GUISupport.h"
	18	#include "UnicodeReader.h"
	19
	20	#define BYTE(b) (((int) b) & 0xFF)
	21
	22	/*
	23	* Read the text from a file. The text must start with a Unicode Byte
	24	* Order Mark (BOM) so that we know what order to read the bytes in.
	25	*/
	26	const UChar UnicodeReader::readFile(const char fileName, GUISupport *guiSupport, int32_t &charCount)
	27	{
	28	FILE *f;
	29	int32_t fileSize;
	30
	31	UChar *charBuffer;
	32	char *byteBuffer;
	33	char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
	34	char errorMessage[128];
73c04bcf	35	const char *cp = "";
b75a7d8f A	36	int32_t signatureLength = 0;
	37
	38	f = fopen(fileName, "rb");
	39
	40	if( f == NULL ) {
	41	sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
	42	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	43	return 0;
	44	}
	45
	46	fseek(f, 0, SEEK_END);
	47	fileSize = ftell(f);
	48
	49	fseek(f, 0, SEEK_SET);
	50	fread(startBytes, sizeof(char), 4, f);
	51
	52	if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
	53	cp = "UTF-16BE";
	54	signatureLength = 2;
	55	} else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
	56	if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
	57	cp = "UTF-32LE";
	58	signatureLength = 4;
	59	} else {
	60	cp = "UTF-16LE";
	61	signatureLength = 2;
	62	}
	63	} else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
	64	cp = "UTF-8";
	65	signatureLength = 3;
	66	} else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
	67	cp = "SCSU";
	68	signatureLength = 3;
	69	} else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
	70	startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
	71	cp = "UTF-32BE";
	72	signatureLength = 4;
	73	} else {
	74	sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
	75	BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
	76	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	77	fclose(f);
	78	return 0;
	79	}
	80
	81	fileSize -= signatureLength;
	82	fseek(f, signatureLength, SEEK_SET);
	83	byteBuffer = new char[fileSize];
	84
	85	if(byteBuffer == 0) {
	86	sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
	87	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	88	fclose(f);
	89	return 0;
	90	}
	91
	92	fread(byteBuffer, sizeof(char), fileSize, f);
	93	if( ferror(f) ) {
	94	sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
	95	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	96	fclose(f);
	97	delete[] byteBuffer;
	98	return 0;
	99	}
100	fclose(f);
101
102	UnicodeString myText(byteBuffer, fileSize, cp);
103
104	delete[] byteBuffer;
105
106	charCount = myText.length();
107	charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
108	if(charBuffer == 0) {
109	sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
110	guiSupport->postErrorMessage(errorMessage, "Text File Error");
111	return 0;
112	}
113
114	myText.extract(0, myText.length(), charBuffer);
115	charBuffer[charCount] = 0; // NULL terminate for easier reading in the debugger
116
117	return charBuffer;
118	}
119