[wxWidgets.git] / src / common / convauto.cpp

///////////////////////////////////////////////////////////////////////////////
// Name:        src/common/convauto.cpp
// Purpose:     implementation of wxConvAuto
// Author:      Vadim Zeitlin
// Created:     2006-04-04
// RCS-ID:      $Id$
// Copyright:   (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
// Licence:     wxWindows licence
///////////////////////////////////////////////////////////////////////////////

// ============================================================================
// declarations
// ============================================================================

// ----------------------------------------------------------------------------
// headers
// ----------------------------------------------------------------------------

// for compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"

#ifdef __BORLANDC__
    #pragma hdrstop
#endif

#if wxUSE_WCHAR_T

#ifndef WX_PRECOMP
#endif //WX_PRECOMP

#include "wx/convauto.h"

// ============================================================================
// implementation
// ============================================================================

/* static */
wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
{
    if ( srcLen < 2 )
    {
        // minimal BOM is 2 bytes so bail out immediately and simplify the code
        // below which wouldn't need to check for length for UTF-16 cases
        return BOM_None;
    }

    // examine the buffer for BOM presence
    //
    // see http://www.unicode.org/faq/utf_bom.html#BOM
    switch ( *src++ )
    {
        case '\0':
            // could only be big endian UTF-32 (00 00 FE FF)
            if ( srcLen >= 4 &&
                    src[0] == '\0' &&
                        src[1] == '\xfe' &&
                            src[2] == '\xff' )
            {
                return BOM_UTF32BE;
            }
            break;

        case '\xfe':
            // could only be big endian UTF-16 (FE FF)
            if ( *src++ == '\xff' )
            {
                return BOM_UTF16BE;
            }
            break;

        case '\xff':
            // could be either little endian UTF-16 or UTF-32, both start
            // with FF FE
            if ( *src++ == '\xfe' )
            {
                return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
                            ? BOM_UTF32LE
                            : BOM_UTF16LE;
            }
            break;

        case '\xef':
            // is this UTF-8 BOM (EF BB BF)?
            if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
            {
                return BOM_UTF8;
            }
            break;
    }

    return BOM_None;
}

void wxConvAuto::InitFromBOM(BOMType bomType)
{
    m_consumedBOM = false;

    switch ( bomType )
    {
        case BOM_UTF32BE:
            m_conv = new wxMBConvUTF32BE;
            m_ownsConv = true;
            break;

        case BOM_UTF32LE:
            m_conv = new wxMBConvUTF32LE;
            m_ownsConv = true;
            break;

        case BOM_UTF16BE:
            m_conv = new wxMBConvUTF16BE;
            m_ownsConv = true;
            break;

        case BOM_UTF16LE:
            m_conv = new wxMBConvUTF16LE;
            m_ownsConv = true;
            break;

        case BOM_UTF8:
            m_conv = &wxConvUTF8;
            m_ownsConv = false;
            break;

        default:
            wxFAIL_MSG( _T("unexpected BOM type") );
            // fall through: still need to create something

        case BOM_None:
            InitWithDefault();
            m_consumedBOM = true; // as there is nothing to consume
    }
}

void wxConvAuto::SkipBOM(const char **src, size_t *len) const
{
    int ofs;
    switch ( m_bomType )
    {
        case BOM_UTF32BE:
        case BOM_UTF32LE:
            ofs = 4;
            break;

        case BOM_UTF16BE:
        case BOM_UTF16LE:
            ofs = 2;
            break;

        case BOM_UTF8:
            ofs = 3;
            break;

        default:
            wxFAIL_MSG( _T("unexpected BOM type") );
            // fall through: still need to create something

        case BOM_None:
            ofs = 0;
    }

    *src += ofs;
    if ( *len != (size_t)-1 )
        *len -= ofs;
}

void wxConvAuto::InitFromInput(const char **src, size_t *len)
{
    m_bomType = DetectBOM(*src, *len);
    InitFromBOM(m_bomType);
    SkipBOM(src, len);
}

size_t
wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
                    const char *src, size_t srcLen) const
{
    // we check BOM and create the appropriate conversion the first time we're
    // called but we also need to ensure that the BOM is skipped not only
    // during this initial call but also during the first call with non-NULL
    // dst as typically we're first called with NULL dst to calculate the
    // needed buffer size
    wxConvAuto *self = wx_const_cast(wxConvAuto *, this);
    if ( !m_conv )
    {
        self->InitFromInput(&src, &srcLen);
        if ( dst )
            self->m_consumedBOM = true;
    }

    if ( !m_consumedBOM && dst )
    {
        self->m_consumedBOM = true;
        SkipBOM(&src, &srcLen);
    }

    return m_conv->ToWChar(dst, dstLen, src, srcLen);
}

size_t
wxConvAuto::FromWChar(char *dst, size_t dstLen,
                      const wchar_t *src, size_t srcLen) const
{
    if ( !m_conv )
    {
        // default to UTF-8 for the multibyte output
        wx_const_cast(wxConvAuto *, this)->InitWithDefault();
    }

    return m_conv->FromWChar(dst, dstLen, src, srcLen);
}

#endif // wxUSE_WCHAR_T
Commit	Line	Data
830f8f11 VZ	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: src/common/convauto.cpp
	3	// Purpose: implementation of wxConvAuto
	4	// Author: Vadim Zeitlin
	5	// Created: 2006-04-04
	6	// RCS-ID: $Id$
	7	// Copyright: (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
	8	// Licence: wxWindows licence
	9	///////////////////////////////////////////////////////////////////////////////
	10
	11	// ============================================================================
	12	// declarations
	13	// ============================================================================
	14
	15	// ----------------------------------------------------------------------------
	16	// headers
	17	// ----------------------------------------------------------------------------
	18
	19	// for compilers that support precompilation, includes "wx.h".
	20	#include "wx/wxprec.h"
	21
	22	#ifdef __BORLANDC__
	23	#pragma hdrstop
	24	#endif
	25
	26	#if wxUSE_WCHAR_T
	27
	28	#ifndef WX_PRECOMP
	29	#endif //WX_PRECOMP
	30
	31	#include "wx/convauto.h"
	32
	33	// ============================================================================
	34	// implementation
	35	// ============================================================================
	36
	37	/* static */
	38	wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
	39	{
	40	if ( srcLen < 2 )
	41	{
	42	// minimal BOM is 2 bytes so bail out immediately and simplify the code
	43	// below which wouldn't need to check for length for UTF-16 cases
	44	return BOM_None;
	45	}
	46
	47	// examine the buffer for BOM presence
	48	//
	49	// see http://www.unicode.org/faq/utf_bom.html#BOM
	50	switch ( *src++ )
	51	{
	52	case '\0':
	53	// could only be big endian UTF-32 (00 00 FE FF)
	54	if ( srcLen >= 4 &&
	55	src[0] == '\0' &&
	56	src[1] == '\xfe' &&
	57	src[2] == '\xff' )
	58	{
	59	return BOM_UTF32BE;
	60	}
	61	break;
	62
	63	case '\xfe':
	64	// could only be big endian UTF-16 (FE FF)
65	if ( *src++ == '\xff' )
66	{
67	return BOM_UTF16BE;
68	}
69	break;
70
71	case '\xff':
72	// could be either little endian UTF-16 or UTF-32, both start
73	// with FF FE
74	if ( *src++ == '\xfe' )
75	{
76	return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
77	? BOM_UTF32LE
78	: BOM_UTF16LE;
79	}
80	break;
81
82	case '\xef':
83	// is this UTF-8 BOM (EF BB BF)?
84	if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
85	{
86	return BOM_UTF8;
87	}
88	break;
89	}
90
91	return BOM_None;
92	}
93
94	void wxConvAuto::InitFromBOM(BOMType bomType)
95	{
96	m_consumedBOM = false;
97
98	switch ( bomType )
99	{
100	case BOM_UTF32BE:
101	m_conv = new wxMBConvUTF32BE;
102	m_ownsConv = true;
103	break;
104
105	case BOM_UTF32LE:
106	m_conv = new wxMBConvUTF32LE;
107	m_ownsConv = true;
108	break;
109
110	case BOM_UTF16BE:
111	m_conv = new wxMBConvUTF16BE;
112	m_ownsConv = true;
113	break;
114
115	case BOM_UTF16LE:
116	m_conv = new wxMBConvUTF16LE;
117	m_ownsConv = true;
118	break;
119
120	case BOM_UTF8:
121	m_conv = &wxConvUTF8;
122	m_ownsConv = false;
123	break;
124
125	default:
126	wxFAIL_MSG( _T("unexpected BOM type") );
127	// fall through: still need to create something
128
129	case BOM_None:
130	InitWithDefault();
131	m_consumedBOM = true; // as there is nothing to consume
132	}
133	}
134
135	void wxConvAuto::SkipBOM(const char *src, size_t len) const
136	{
137	int ofs;
138	switch ( m_bomType )
139	{
140	case BOM_UTF32BE:
141	case BOM_UTF32LE:
142	ofs = 4;
143	break;
144
145	case BOM_UTF16BE:
146	case BOM_UTF16LE:
147	ofs = 2;
148	break;
149
150	case BOM_UTF8:
151	ofs = 3;
152	break;
153
154	default:
155	wxFAIL_MSG( _T("unexpected BOM type") );
156	// fall through: still need to create something
157
158	case BOM_None:
159	ofs = 0;
160	}
161
162	*src += ofs;
163	if ( *len != (size_t)-1 )
164	*len -= ofs;
165	}
166
167	void wxConvAuto::InitFromInput(const char *src, size_t len)
168	{
169	m_bomType = DetectBOM(src, len);
170	InitFromBOM(m_bomType);
171	SkipBOM(src, len);
172	}
173
174	size_t
175	wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
176	const char *src, size_t srcLen) const
177	{
178	// we check BOM and create the appropriate conversion the first time we're
179	// called but we also need to ensure that the BOM is skipped not only
180	// during this initial call but also during the first call with non-NULL
181	// dst as typically we're first called with NULL dst to calculate the
182	// needed buffer size
183	wxConvAuto self = wx_const_cast(wxConvAuto , this);
184	if ( !m_conv )
185	{
186	self->InitFromInput(&src, &srcLen);
187	if ( dst )
188	self->m_consumedBOM = true;
189	}
190
191	if ( !m_consumedBOM && dst )
192	{
193	self->m_consumedBOM = true;
194	SkipBOM(&src, &srcLen);
195	}
196
197	return m_conv->ToWChar(dst, dstLen, src, srcLen);
198	}
199
200	size_t
201	wxConvAuto::FromWChar(char *dst, size_t dstLen,
202	const wchar_t *src, size_t srcLen) const
203	{
204	if ( !m_conv )
205	{
206	// default to UTF-8 for the multibyte output
207	wx_const_cast(wxConvAuto *, this)->InitWithDefault();
208	}
209
210	return m_conv->FromWChar(dst, dstLen, src, srcLen);
211	}
212
213	#endif // wxUSE_WCHAR_T
214