git.saurik.com Git - wxWidgets.git/blame_incremental

... / ...

Commit	Line	Data
	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: wx/stringops.h
	3	// Purpose: implementation of wxString primitive operations
	4	// Author: Vaclav Slavik
	5	// Modified by:
	6	// Created: 2007-04-16
	7	// Copyright: (c) 2007 REA Elektronik GmbH
	8	// Licence: wxWindows licence
	9	///////////////////////////////////////////////////////////////////////////////
	10
	11	#ifndef _WX_WXSTRINGOPS_H__
	12	#define _WX_WXSTRINGOPS_H__
	13
	14	#include "wx/chartype.h"
	15	#include "wx/stringimpl.h"
	16	#include "wx/unichar.h"
	17	#include "wx/buffer.h"
	18
	19	// This header contains wxStringOperations "namespace" class that implements
	20	// elementary operations on string data as static methods; wxString methods and
	21	// iterators are implemented in terms of it. Two implementations are available,
	22	// one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
	23	// char* in ANSI build).
	24
	25	// FIXME-UTF8: only wchar after we remove ANSI build
	26	#if wxUSE_UNICODE_WCHAR \|\| !wxUSE_UNICODE
	27	struct WXDLLIMPEXP_BASE wxStringOperationsWchar
	28	{
	29	// moves the iterator to the next Unicode character
	30	template <typename Iterator>
	31	static void IncIter(Iterator& i) { ++i; }
	32
	33	// moves the iterator to the previous Unicode character
	34	template <typename Iterator>
	35	static void DecIter(Iterator& i) { --i; }
	36
	37	// moves the iterator by n Unicode characters
	38	template <typename Iterator>
	39	static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
	40	{ return i + n; }
	41
	42	// returns distance of the two iterators in Unicode characters
	43	template <typename Iterator>
	44	static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2)
	45	{ return i1 - i2; }
	46
	47	// encodes the character to a form used to represent it in internal
	48	// representation (returns a string in UTF8 version)
	49	static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; }
	50
	51	static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
	52	{ return *i; }
	53	};
	54	#endif // wxUSE_UNICODE_WCHAR \|\| !wxUSE_UNICODE
	55
	56
	57	#if wxUSE_UNICODE_UTF8
	58	struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
	59	{
	60	// checks correctness of UTF-8 sequence
	61	static bool IsValidUtf8String(const char *c,
	62	size_t len = wxStringImpl::npos);
	63	static bool IsValidUtf8LeadByte(unsigned char c)
	64	{
	65	return (c <= 0x7F) \|\| (c >= 0xC2 && c <= 0xF4);
	66	}
	67
	68	// table of offsets to skip forward when iterating over UTF-8 sequence
	69	static const unsigned char ms_utf8IterTable[256];
	70
	71
	72	template<typename Iterator>
	73	static void IncIter(Iterator& i)
	74	{
	75	wxASSERT( IsValidUtf8LeadByte(*i) );
	76	i += ms_utf8IterTable[(unsigned char)*i];
	77	}
	78
	79	template<typename Iterator>
	80	static void DecIter(Iterator& i)
	81	{
	82	wxASSERT( IsValidUtf8LeadByte(*i) );
	83
	84	// Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
	85	// binary), so we just have to go back until we hit a byte that is
	86	// either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
	87	// binary; this includes some invalid values, but we can ignore it
	88	// here, because we assume valid UTF-8 input for the purpose of
	89	// efficient implementation).
	90	--i;
	91	while ( ((i) & 0xC0) == 0x80 / 2 highest bits are '10' */ )
	92	--i;
	93	}
	94
	95	template<typename Iterator>
	96	static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
	97	{
	98	Iterator out(i);
	99
	100	if ( n > 0 )
	101	{
	102	for ( ptrdiff_t j = 0; j < n; ++j )
	103	IncIter(out);
	104	}
	105	else if ( n < 0 )
	106	{
	107	for ( ptrdiff_t j = 0; j > n; --j )
	108	DecIter(out);
	109	}
	110
	111	return out;
	112	}
	113
	114	template<typename Iterator>
	115	static ptrdiff_t DiffIters(Iterator i1, Iterator i2)
	116	{
	117	ptrdiff_t dist = 0;
	118
	119	if ( i1 < i2 )
	120	{
	121	while ( i1 != i2 )
	122	{
	123	IncIter(i1);
	124	dist--;
	125	}
	126	}
	127	else if ( i2 < i1 )
	128	{
	129	while ( i2 != i1 )
	130	{
	131	IncIter(i2);
	132	dist++;
	133	}
	134	}
	135
	136	return dist;
	137	}
	138
	139	// encodes the character as UTF-8:
	140	typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
	141	static Utf8CharBuffer EncodeChar(const wxUniChar& ch)
	142	{ return ch.AsUTF8(); }
	143
	144	// returns n copies of ch encoded in UTF-8 string
	145	static wxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
	146
	147	// returns the length of UTF-8 encoding of the character with lead byte 'c'
	148	static size_t GetUtf8CharLength(char c)
	149	{
	150	wxASSERT( IsValidUtf8LeadByte(c) );
	151	return ms_utf8IterTable[(unsigned char)c];
	152	}
	153
	154	// decodes single UTF-8 character from UTF-8 string
	155	static wxUniChar DecodeChar(wxStringImpl::const_iterator i)
	156	{
	157	if ( (unsigned char)*i < 0x80 )
	158	return (int)*i;
	159	return DecodeNonAsciiChar(i);
	160	}
	161
	162	private:
	163	static wxUniChar DecodeNonAsciiChar(wxStringImpl::const_iterator i);
	164	};
	165	#endif // wxUSE_UNICODE_UTF8
	166
	167
	168	#if wxUSE_UNICODE_UTF8
	169	typedef wxStringOperationsUtf8 wxStringOperations;
	170	#else
	171	typedef wxStringOperationsWchar wxStringOperations;
	172	#endif
	173
	174	#endif // _WX_WXSTRINGOPS_H_