src/common/convauto.cpp

///////////////////////////////////////////////////////////////////////////////
// Name:        src/common/convauto.cpp
// Purpose:     implementation of wxConvAuto
// Author:      Vadim Zeitlin
// Created:     2006-04-04
// RCS-ID:      $Id$
// Copyright:   (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
// Licence:     wxWindows licence
///////////////////////////////////////////////////////////////////////////////

// ============================================================================
// declarations
// ============================================================================

// ----------------------------------------------------------------------------
// headers
// ----------------------------------------------------------------------------

// for compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"

#ifdef __BORLANDC__
    #pragma hdrstop
#endif

#if wxUSE_WCHAR_T

#ifndef WX_PRECOMP
#endif //WX_PRECOMP

#include "wx/convauto.h"

// ============================================================================
// implementation
// ============================================================================

/* static */
wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
{
    if ( srcLen < 2 )
    {
        // minimal BOM is 2 bytes so bail out immediately and simplify the code
        // below which wouldn't need to check for length for UTF-16 cases
        return BOM_None;
    }

    // examine the buffer for BOM presence
    //
    // see http://www.unicode.org/faq/utf_bom.html#BOM
    switch ( *src++ )
    {
        case '\0':
            // could only be big endian UTF-32 (00 00 FE FF)
            if ( srcLen >= 4 &&
                    src[0] == '\0' &&
                        src[1] == '\xfe' &&
                            src[2] == '\xff' )
            {
                return BOM_UTF32BE;
            }
            break;

        case '\xfe':
            // could only be big endian UTF-16 (FE FF)
            if ( *src++ == '\xff' )
            {
                return BOM_UTF16BE;
            }
            break;

        case '\xff':
            // could be either little endian UTF-16 or UTF-32, both start
            // with FF FE
            if ( *src++ == '\xfe' )
            {
                return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
                            ? BOM_UTF32LE
                            : BOM_UTF16LE;
            }
            break;

        case '\xef':
            // is this UTF-8 BOM (EF BB BF)?
            if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
            {
                return BOM_UTF8;
            }
            break;
    }

    return BOM_None;
}

void wxConvAuto::InitFromBOM(BOMType bomType)
{
    m_consumedBOM = false;

    switch ( bomType )
    {
        case BOM_UTF32BE:
            m_conv = new wxMBConvUTF32BE;
            m_ownsConv = true;
            break;

        case BOM_UTF32LE:
            m_conv = new wxMBConvUTF32LE;
            m_ownsConv = true;
            break;

        case BOM_UTF16BE:
            m_conv = new wxMBConvUTF16BE;
            m_ownsConv = true;
            break;

        case BOM_UTF16LE:
            m_conv = new wxMBConvUTF16LE;
            m_ownsConv = true;
            break;

        case BOM_UTF8:
            m_conv = &wxConvUTF8;
            m_ownsConv = false;
            break;

        default:
            wxFAIL_MSG( _T("unexpected BOM type") );
            // fall through: still need to create something

        case BOM_None:
            InitWithDefault();
            m_consumedBOM = true; // as there is nothing to consume
    }
}

void wxConvAuto::SkipBOM(const char **src, size_t *len) const
{
    int ofs;
    switch ( m_bomType )
    {
        case BOM_UTF32BE:
        case BOM_UTF32LE:
            ofs = 4;
            break;

        case BOM_UTF16BE:
        case BOM_UTF16LE:
            ofs = 2;
            break;

        case BOM_UTF8:
            ofs = 3;
            break;

        default:
            wxFAIL_MSG( _T("unexpected BOM type") );
            // fall through: still need to create something

        case BOM_None:
            ofs = 0;
    }

    *src += ofs;
    if ( *len != (size_t)-1 )
        *len -= ofs;
}

void wxConvAuto::InitFromInput(const char **src, size_t *len)
{
    m_bomType = DetectBOM(*src, *len);
    InitFromBOM(m_bomType);
    SkipBOM(src, len);
}

size_t
wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
                    const char *src, size_t srcLen) const
{
    // we check BOM and create the appropriate conversion the first time we're
    // called but we also need to ensure that the BOM is skipped not only
    // during this initial call but also during the first call with non-NULL
    // dst as typically we're first called with NULL dst to calculate the
    // needed buffer size
    wxConvAuto *self = wx_const_cast(wxConvAuto *, this);
    if ( !m_conv )
    {
        self->InitFromInput(&src, &srcLen);
        if ( dst )
            self->m_consumedBOM = true;
    }

    if ( !m_consumedBOM && dst )
    {
        self->m_consumedBOM = true;
        SkipBOM(&src, &srcLen);
    }

    return m_conv->ToWChar(dst, dstLen, src, srcLen);
}

size_t
wxConvAuto::FromWChar(char *dst, size_t dstLen,
                      const wchar_t *src, size_t srcLen) const
{
    if ( !m_conv )
    {
        // default to UTF-8 for the multibyte output
        wx_const_cast(wxConvAuto *, this)->InitWithDefault();
    }

    return m_conv->FromWChar(dst, dstLen, src, srcLen);
}

#endif // wxUSE_WCHAR_T
Commit	Line	Data
	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: src/common/convauto.cpp
	3	// Purpose: implementation of wxConvAuto
	4	// Author: Vadim Zeitlin
	5	// Created: 2006-04-04
	6	// RCS-ID: $Id$
	7	// Copyright: (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
	8	// Licence: wxWindows licence
	9	///////////////////////////////////////////////////////////////////////////////
	10
	11	// ============================================================================
	12	// declarations
	13	// ============================================================================
	14
	15	// ----------------------------------------------------------------------------
	16	// headers
	17	// ----------------------------------------------------------------------------
	18
	19	// for compilers that support precompilation, includes "wx.h".
	20	#include "wx/wxprec.h"
	21
	22	#ifdef __BORLANDC__
	23	#pragma hdrstop
	24	#endif
	25
	26	#if wxUSE_WCHAR_T
	27
	28	#ifndef WX_PRECOMP
	29	#endif //WX_PRECOMP
	30
	31	#include "wx/convauto.h"
	32
	33	// ============================================================================
	34	// implementation
	35	// ============================================================================
	36
	37	/* static */
	38	wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
	39	{
	40	if ( srcLen < 2 )
	41	{
	42	// minimal BOM is 2 bytes so bail out immediately and simplify the code
	43	// below which wouldn't need to check for length for UTF-16 cases
	44	return BOM_None;
	45	}
	46
	47	// examine the buffer for BOM presence
	48	//
	49	// see http://www.unicode.org/faq/utf_bom.html#BOM
	50	switch ( *src++ )
	51	{
	52	case '\0':
	53	// could only be big endian UTF-32 (00 00 FE FF)
	54	if ( srcLen >= 4 &&
	55	src[0] == '\0' &&
	56	src[1] == '\xfe' &&
	57	src[2] == '\xff' )
	58	{
	59	return BOM_UTF32BE;
	60	}
	61	break;
	62
	63	case '\xfe':
	64	// could only be big endian UTF-16 (FE FF)
	65	if ( *src++ == '\xff' )
	66	{
	67	return BOM_UTF16BE;
	68	}
	69	break;
	70
	71	case '\xff':
	72	// could be either little endian UTF-16 or UTF-32, both start
	73	// with FF FE
	74	if ( *src++ == '\xfe' )
	75	{
	76	return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
	77	? BOM_UTF32LE
	78	: BOM_UTF16LE;
	79	}
	80	break;
	81
	82	case '\xef':
	83	// is this UTF-8 BOM (EF BB BF)?
	84	if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
	85	{
	86	return BOM_UTF8;
	87	}
	88	break;
	89	}
	90
	91	return BOM_None;
	92	}
	93
	94	void wxConvAuto::InitFromBOM(BOMType bomType)
	95	{
	96	m_consumedBOM = false;
	97
	98	switch ( bomType )
	99	{
	100	case BOM_UTF32BE:
	101	m_conv = new wxMBConvUTF32BE;
	102	m_ownsConv = true;
	103	break;
	104
	105	case BOM_UTF32LE:
	106	m_conv = new wxMBConvUTF32LE;
	107	m_ownsConv = true;
	108	break;
	109
	110	case BOM_UTF16BE:
	111	m_conv = new wxMBConvUTF16BE;
	112	m_ownsConv = true;
	113	break;
	114
	115	case BOM_UTF16LE:
	116	m_conv = new wxMBConvUTF16LE;
	117	m_ownsConv = true;
	118	break;
	119
	120	case BOM_UTF8:
	121	m_conv = &wxConvUTF8;
	122	m_ownsConv = false;
	123	break;
	124
	125	default:
	126	wxFAIL_MSG( _T("unexpected BOM type") );
	127	// fall through: still need to create something
	128
	129	case BOM_None:
	130	InitWithDefault();
	131	m_consumedBOM = true; // as there is nothing to consume
	132	}
	133	}
	134
	135	void wxConvAuto::SkipBOM(const char *src, size_t len) const
	136	{
	137	int ofs;
	138	switch ( m_bomType )
	139	{
	140	case BOM_UTF32BE:
	141	case BOM_UTF32LE:
	142	ofs = 4;
	143	break;
	144
	145	case BOM_UTF16BE:
	146	case BOM_UTF16LE:
	147	ofs = 2;
	148	break;
	149
	150	case BOM_UTF8:
	151	ofs = 3;
	152	break;
	153
	154	default:
	155	wxFAIL_MSG( _T("unexpected BOM type") );
	156	// fall through: still need to create something
	157
	158	case BOM_None:
	159	ofs = 0;
	160	}
	161
	162	*src += ofs;
	163	if ( *len != (size_t)-1 )
	164	*len -= ofs;
	165	}
	166
	167	void wxConvAuto::InitFromInput(const char *src, size_t len)
	168	{
	169	m_bomType = DetectBOM(src, len);
	170	InitFromBOM(m_bomType);
	171	SkipBOM(src, len);
	172	}
	173
	174	size_t
	175	wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
	176	const char *src, size_t srcLen) const
	177	{
	178	// we check BOM and create the appropriate conversion the first time we're
	179	// called but we also need to ensure that the BOM is skipped not only
	180	// during this initial call but also during the first call with non-NULL
	181	// dst as typically we're first called with NULL dst to calculate the
	182	// needed buffer size
	183	wxConvAuto self = wx_const_cast(wxConvAuto , this);
	184	if ( !m_conv )
	185	{
	186	self->InitFromInput(&src, &srcLen);
	187	if ( dst )
	188	self->m_consumedBOM = true;
	189	}
	190
	191	if ( !m_consumedBOM && dst )
	192	{
	193	self->m_consumedBOM = true;
	194	SkipBOM(&src, &srcLen);
	195	}
	196
	197	return m_conv->ToWChar(dst, dstLen, src, srcLen);
	198	}
	199
	200	size_t
	201	wxConvAuto::FromWChar(char *dst, size_t dstLen,
	202	const wchar_t *src, size_t srcLen) const
	203	{
	204	if ( !m_conv )
	205	{
	206	// default to UTF-8 for the multibyte output
	207	wx_const_cast(wxConvAuto *, this)->InitWithDefault();
	208	}
	209
	210	return m_conv->FromWChar(dst, dstLen, src, srcLen);
	211	}
	212
	213	#endif // wxUSE_WCHAR_T
	214