git.saurik.com Git - wxWidgets.git/blame_incremental

... / ...

Commit	Line	Data
	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: wx/stringops.h
	3	// Purpose: implementation of wxString primitive operations
	4	// Author: Vaclav Slavik
	5	// Modified by:
	6	// Created: 2007-04-16
	7	// RCS-ID: $Id$
	8	// Copyright: (c) 2007 REA Elektronik GmbH
	9	// Licence: wxWindows licence
	10	///////////////////////////////////////////////////////////////////////////////
	11
	12	#ifndef _WX_WXSTRINGOPS_H__
	13	#define _WX_WXSTRINGOPS_H__
	14
	15	#include "wx/chartype.h"
	16	#include "wx/stringimpl.h"
	17	#include "wx/unichar.h"
	18	#include "wx/buffer.h"
	19
	20	// This header contains wxStringOperations "namespace" class that implements
	21	// elementary operations on string data as static methods; wxString methods and
	22	// iterators are implemented in terms of it. Two implementations are available,
	23	// one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
	24	// char* in ANSI build).
	25
	26	// FIXME-UTF8: only wchar after we remove ANSI build
	27	#if wxUSE_UNICODE_WCHAR \|\| !wxUSE_UNICODE
	28	struct WXDLLIMPEXP_BASE wxStringOperationsWchar
	29	{
	30	// moves the iterator to the next Unicode character
	31	template <typename Iterator>
	32	static void IncIter(Iterator& i) { ++i; }
	33
	34	// moves the iterator to the previous Unicode character
	35	template <typename Iterator>
	36	static void DecIter(Iterator& i) { --i; }
	37
	38	// moves the iterator by n Unicode characters
	39	template <typename Iterator>
	40	static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
	41	{ return i + n; }
	42
	43	// returns distance of the two iterators in Unicode characters
	44	template <typename Iterator>
	45	static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2)
	46	{ return i1 - i2; }
	47
	48	// encodes the character to a form used to represent it in internal
	49	// representation (returns a string in UTF8 version)
	50	static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; }
	51
	52	static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
	53	{ return *i; }
	54	};
	55	#endif // wxUSE_UNICODE_WCHAR \|\| !wxUSE_UNICODE
	56
	57
	58	#if wxUSE_UNICODE_UTF8
	59	struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
	60	{
	61	// checks correctness of UTF-8 sequence
	62	static bool IsValidUtf8String(const char *c,
	63	size_t len = wxStringImpl::npos);
	64	static bool IsValidUtf8LeadByte(unsigned char c)
	65	{
	66	return (c <= 0x7F) \|\| (c >= 0xC2 && c <= 0xF4);
	67	}
	68
	69	// table of offsets to skip forward when iterating over UTF-8 sequence
	70	static const unsigned char ms_utf8IterTable[256];
	71
	72
	73	template<typename Iterator>
	74	static void IncIter(Iterator& i)
	75	{
	76	wxASSERT( IsValidUtf8LeadByte(*i) );
	77	i += ms_utf8IterTable[(unsigned char)*i];
	78	}
	79
	80	template<typename Iterator>
	81	static void DecIter(Iterator& i)
	82	{
	83	wxASSERT( IsValidUtf8LeadByte(*i) );
	84
	85	// Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
	86	// binary), so we just have to go back until we hit a byte that is
	87	// either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
	88	// binary; this includes some invalid values, but we can ignore it
	89	// here, because we assume valid UTF-8 input for the purpose of
	90	// efficient implementation).
	91	--i;
	92	while ( ((i) & 0xC0) == 0x80 / 2 highest bits are '10' */ )
	93	--i;
	94	}
	95
	96	template<typename Iterator>
	97	static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
	98	{
	99	Iterator out(i);
	100
	101	if ( n > 0 )
	102	{
	103	for ( ptrdiff_t j = 0; j < n; ++j )
	104	IncIter(out);
	105	}
	106	else if ( n < 0 )
	107	{
	108	for ( ptrdiff_t j = 0; j > n; --j )
	109	DecIter(out);
	110	}
	111
	112	return out;
	113	}
	114
	115	template<typename Iterator>
	116	static ptrdiff_t DiffIters(Iterator i1, Iterator i2)
	117	{
	118	ptrdiff_t dist = 0;
	119
	120	if ( i1 < i2 )
	121	{
	122	while ( i1 != i2 )
	123	{
	124	IncIter(i1);
	125	dist--;
	126	}
	127	}
	128	else if ( i2 < i1 )
	129	{
	130	while ( i2 != i1 )
	131	{
	132	IncIter(i2);
	133	dist++;
	134	}
	135	}
	136
	137	return dist;
	138	}
	139
	140	// encodes the character as UTF-8:
	141	typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
	142	static Utf8CharBuffer EncodeChar(const wxUniChar& ch)
	143	{ return ch.AsUTF8(); }
	144
	145	// returns n copies of ch encoded in UTF-8 string
	146	static wxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
	147
	148	// returns the length of UTF-8 encoding of the character with lead byte 'c'
	149	static size_t GetUtf8CharLength(char c)
	150	{
	151	wxASSERT( IsValidUtf8LeadByte(c) );
	152	return ms_utf8IterTable[(unsigned char)c];
	153	}
	154
	155	// decodes single UTF-8 character from UTF-8 string
	156	static wxUniChar DecodeChar(wxStringImpl::const_iterator i)
	157	{
	158	if ( (unsigned char)*i < 0x80 )
	159	return (int)*i;
	160	return DecodeNonAsciiChar(i);
	161	}
	162
	163	private:
	164	static wxUniChar DecodeNonAsciiChar(wxStringImpl::const_iterator i);
	165	};
	166	#endif // wxUSE_UNICODE_UTF8
	167
	168
	169	#if wxUSE_UNICODE_UTF8
	170	typedef wxStringOperationsUtf8 wxStringOperations;
	171	#else
	172	typedef wxStringOperationsWchar wxStringOperations;
	173	#endif
	174
	175	#endif // _WX_WXSTRINGOPS_H_