]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/inputext.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2005-2008, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
17 * This is an internal header for the Character Set Detection code. The
18 * name is probably too generic...
22 #include "unicode/uobject.h"
24 #if !UCONFIG_NO_CONVERSION
28 class InputText
: public UMemory
31 InputText(const InputText
&);
33 InputText(UErrorCode
&status
);
36 void setText(const char *in
, int32_t len
);
37 void setDeclaredEncoding(const char *encoding
, int32_t len
);
39 void MungeInput(UBool fStripTags
);
41 // The text to be checked. Markup will have been
42 // removed if appropriate.
44 int32_t fInputLen
; // Length of the byte data in fInputBytes.
45 // byte frequency statistics for the input text.
46 // Value is percent, not absolute.
47 // Value is rounded up, so zero really means zero occurences.
49 UBool fC1Bytes
; // True if any bytes in the range 0x80 - 0x9F are in the input;false by default
50 UBool fOnlyTypicalASCII
; // True if has only byte values that are typical for ASCII // rdar://56373519
51 char *fDeclaredEncoding
;
53 const uint8_t *fRawInput
; // Original, untouched input bytes.
54 // If user gave us a byte array, this is it.
55 // If user gave us a stream, it's read to a
57 int32_t fRawLength
; // Length of data in fRawInput array.
64 #endif /* __INPUTEXT_H */