| 1 | ///////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: convauto.h |
| 3 | // Purpose: interface of wxConvAuto |
| 4 | // Author: wxWidgets team |
| 5 | // RCS-ID: $Id$ |
| 6 | // Licence: wxWindows license |
| 7 | ///////////////////////////////////////////////////////////////////////////// |
| 8 | |
| 9 | /** |
| 10 | @class wxConvAuto |
| 11 | |
| 12 | This class implements a Unicode to/from multibyte converter capable of |
| 13 | automatically recognizing the encoding of the multibyte text on input. The |
| 14 | logic used is very simple: the class uses the BOM (byte order mark) if it's |
| 15 | present and tries to interpret the input as UTF-8 otherwise. If this fails, |
| 16 | the input is interpreted as being in the default multibyte encoding which |
| 17 | can be specified in the constructor of a wxConvAuto instance and, in turn, |
| 18 | defaults to the value of GetFallbackEncoding() if not explicitly given. |
| 19 | |
| 20 | For the conversion from Unicode to multibyte, the same encoding as was |
| 21 | previously used for multibyte to Unicode conversion is reused. If there had |
| 22 | been no previous multibyte to Unicode conversion, UTF-8 is used by default. |
| 23 | Notice that once the multibyte encoding is automatically detected, it |
| 24 | doesn't change any more, i.e. it is entirely determined by the first use of |
| 25 | wxConvAuto object in the multibyte-to-Unicode direction. However creating a |
| 26 | copy of wxConvAuto object, either via the usual copy constructor or |
| 27 | assignment operator, or using wxMBConv::Clone(), resets the automatically |
| 28 | detected encoding so that the new copy will try to detect the encoding of |
| 29 | the input on first use. |
| 30 | |
| 31 | This class is used by default in wxWidgets classes and functions reading |
| 32 | text from files such as wxFile, wxFFile, wxTextFile, wxFileConfig and |
| 33 | various stream classes so the encoding set with its SetFallbackEncoding() |
| 34 | method will affect how these classes treat input files. In particular, use |
| 35 | this method to change the fall-back multibyte encoding used to interpret |
| 36 | the contents of the files whose contents isn't valid UTF-8 or to disallow |
| 37 | it completely. |
| 38 | |
| 39 | @library{wxbase} |
| 40 | @category{data} |
| 41 | |
| 42 | @see @ref overview_mbconv |
| 43 | */ |
| 44 | class wxConvAuto : public wxMBConv |
| 45 | { |
| 46 | public: |
| 47 | /** |
| 48 | Constructs a new wxConvAuto instance. The object will try to detect the |
| 49 | input of the multibyte text given to its wxMBConv::ToWChar() method |
| 50 | automatically but if the automatic detection of Unicode encodings |
| 51 | fails, the fall-back encoding @a enc will be used to interpret it as |
| 52 | multibyte text. |
| 53 | |
| 54 | The default value of @a enc, @c wxFONTENCODING_DEFAULT, means that the |
| 55 | global default value (which can be set using SetFallbackEncoding()) |
| 56 | should be used. As with that method, passing @c wxFONTENCODING_MAX |
| 57 | inhibits using this encoding completely so the input multibyte text |
| 58 | will always be interpreted as UTF-8 in the absence of BOM and the |
| 59 | conversion will fail if the input doesn't form valid UTF-8 sequence. |
| 60 | |
| 61 | Another special value is @c wxFONTENCODING_SYSTEM which means to use |
| 62 | the encoding currently used on the user system, i.e. the encoding |
| 63 | returned by wxLocale::GetSystemEncoding(). Any other encoding will be |
| 64 | used as is, e.g. passing @c wxFONTENCODING_ISO8859_1 ensures that |
| 65 | non-UTF-8 input will be treated as latin1. |
| 66 | */ |
| 67 | wxConvAuto(wxFontEncoding enc = wxFONTENCODING_DEFAULT); |
| 68 | |
| 69 | /** |
| 70 | Disable the use of the fall back encoding: if the input doesn't have a |
| 71 | BOM and is not valid UTF-8, the conversion will fail. |
| 72 | */ |
| 73 | static void DisableFallbackEncoding(); |
| 74 | |
| 75 | /** |
| 76 | Returns the encoding used by default by wxConvAuto if no other encoding |
| 77 | is explicitly specified in constructor. By default, returns |
| 78 | @c wxFONTENCODING_ISO8859_1 but can be changed using |
| 79 | SetFallbackEncoding(). |
| 80 | */ |
| 81 | static wxFontEncoding GetFallbackEncoding(); |
| 82 | |
| 83 | /** |
| 84 | Changes the encoding used by default by wxConvAuto if no other encoding |
| 85 | is explicitly specified in constructor. The default value, which can be |
| 86 | retrieved using GetFallbackEncoding(), is @c wxFONTENCODING_ISO8859_1. |
| 87 | |
| 88 | Special values of @c wxFONTENCODING_SYSTEM or @c wxFONTENCODING_MAX can |
| 89 | be used for the @a enc parameter to use the encoding of the current |
| 90 | user locale as fall back or not use any encoding for fall back at all, |
| 91 | respectively (just as with the similar constructor parameter). However, |
| 92 | @c wxFONTENCODING_DEFAULT can't be used here. |
| 93 | */ |
| 94 | static void SetFallbackEncoding(wxFontEncoding enc); |
| 95 | }; |
| 96 | |