git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/samples/layout/UnicodeReader.cpp

... / ...

Commit	Line	Data
	1	/*
	2	******************************************************************************
	3	* Copyright (C) 1998-2005, International Business Machines Corporation and *
	4	* others. All Rights Reserved. *
	5	******************************************************************************
	6	*/
	7
	8	#include <errno.h>
	9	#include <stdio.h>
	10	#include <string.h>
	11
	12	#include "unicode/utypes.h"
	13	#include "unicode/unistr.h"
	14
	15	#include "layout/LETypes.h"
	16
	17	#include "GUISupport.h"
	18	#include "UnicodeReader.h"
	19
	20	#define BYTE(b) (((int) b) & 0xFF)
	21
	22	/*
	23	* Read the text from a file. The text must start with a Unicode Byte
	24	* Order Mark (BOM) so that we know what order to read the bytes in.
	25	*/
	26	const UChar UnicodeReader::readFile(const char fileName, GUISupport *guiSupport, int32_t &charCount)
	27	{
	28	FILE *f;
	29	int32_t fileSize;
	30
	31	UChar *charBuffer;
	32	char *byteBuffer;
	33	char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
	34	char errorMessage[128];
	35	const char *cp = "";
	36	int32_t signatureLength = 0;
	37
	38	f = fopen(fileName, "rb");
	39
	40	if( f == NULL ) {
	41	sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
	42	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	43	return 0;
	44	}
	45
	46	fseek(f, 0, SEEK_END);
	47	fileSize = ftell(f);
	48
	49	fseek(f, 0, SEEK_SET);
	50	fread(startBytes, sizeof(char), 4, f);
	51
	52	if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
	53	cp = "UTF-16BE";
	54	signatureLength = 2;
	55	} else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
	56	if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
	57	cp = "UTF-32LE";
	58	signatureLength = 4;
	59	} else {
	60	cp = "UTF-16LE";
	61	signatureLength = 2;
	62	}
	63	} else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
	64	cp = "UTF-8";
	65	signatureLength = 3;
	66	} else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
	67	cp = "SCSU";
	68	signatureLength = 3;
	69	} else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
	70	startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
	71	cp = "UTF-32BE";
	72	signatureLength = 4;
	73	} else {
	74	sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
	75	BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
	76	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	77	fclose(f);
	78	return 0;
	79	}
	80
	81	fileSize -= signatureLength;
	82	fseek(f, signatureLength, SEEK_SET);
	83	byteBuffer = new char[fileSize];
	84
	85	if(byteBuffer == 0) {
	86	sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
	87	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	88	fclose(f);
	89	return 0;
	90	}
	91
	92	fread(byteBuffer, sizeof(char), fileSize, f);
	93	if( ferror(f) ) {
	94	sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
	95	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	96	fclose(f);
	97	delete[] byteBuffer;
	98	return 0;
	99	}
	100	fclose(f);
	101
	102	UnicodeString myText(byteBuffer, fileSize, cp);
	103
	104	delete[] byteBuffer;
	105
	106	charCount = myText.length();
	107	charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
	108	if(charBuffer == 0) {
	109	sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
	110	guiSupport->postErrorMessage(errorMessage, "Text File Error");
	111	return 0;
	112	}
	113
	114	myText.extract(0, myText.length(), charBuffer);
	115	charBuffer[charCount] = 0; // NULL terminate for easier reading in the debugger
	116
	117	return charBuffer;
	118	}
	119