X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/44c44c82a3fe5362bac5cc9536f06eeac52af00c..a6c7a0f826d5e156c177b4755365c3eb33e9b933:/src/common/intl.cpp diff --git a/src/common/intl.cpp b/src/common/intl.cpp index e78a33c9d0..470c5d12bb 100644 --- a/src/common/intl.cpp +++ b/src/common/intl.cpp @@ -2,7 +2,8 @@ // Name: src/common/intl.cpp // Purpose: Internationalization and localisation for wxWindows // Author: Vadim Zeitlin -// Modified by: +// Modified by: Michael N. Filippov +// (2003/09/30 - PluralForms support) // Created: 29/01/98 // RCS-ID: $Id$ // Copyright: (c) 1998 Vadim Zeitlin @@ -17,7 +18,7 @@ // headers // ---------------------------------------------------------------------------- -#ifdef __GNUG__ +#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) #pragma implementation "intl.h" #endif @@ -31,7 +32,11 @@ #if wxUSE_INTL // standard headers + +#ifndef __WXWINCE__ #include +#endif + #include #include #ifdef HAVE_LANGINFO_H @@ -48,18 +53,19 @@ #include "wx/dynarray.h" #endif // WX_PRECOMP +#ifdef __WIN32__ + #include "wx/msw/private.h" +#elif defined(__UNIX_LIKE__) + #include "wx/fontmap.h" // for CharsetToEncoding() +#endif + #include "wx/file.h" #include "wx/tokenzr.h" #include "wx/module.h" #include "wx/fontmap.h" #include "wx/encconv.h" #include "wx/hashmap.h" - -#ifdef __WIN32__ - #include "wx/msw/private.h" -#elif defined(__UNIX_LIKE__) - #include "wx/fontmap.h" // for CharsetToEncoding() -#endif +#include "wx/ptr_scpd.h" #if defined(__WXMAC__) #include "wx/mac/private.h" // includes mac headers @@ -141,6 +147,714 @@ static inline wxString ExtractNotLang(const wxString& langFull) #endif // __UNIX__ +// ---------------------------------------------------------------------------- +// Plural forms parser +// ---------------------------------------------------------------------------- + +/* + Simplified Grammar + +Expression: + LogicalOrExpression '?' Expression ':' Expression + LogicalOrExpression + +LogicalOrExpression: + LogicalAndExpression "||" LogicalOrExpression // to (a || b) || c + LogicalAndExpression + +LogicalAndExpression: + EqualityExpression "&&" LogicalAndExpression // to (a && b) && c + EqualityExpression + +EqualityExpression: + RelationalExpression "==" RelationalExperession + RelationalExpression "!=" RelationalExperession + RelationalExpression + +RelationalExpression: + MultiplicativeExpression '>' MultiplicativeExpression + MultiplicativeExpression '<' MultiplicativeExpression + MultiplicativeExpression ">=" MultiplicativeExpression + MultiplicativeExpression "<=" MultiplicativeExpression + MultiplicativeExpression + +MultiplicativeExpression: + PmExpression '%' PmExpression + PmExpression + +PmExpression: + N + Number + '(' Expression ')' +*/ + +class wxPluralFormsToken +{ +public: + enum Type + { + T_ERROR, T_EOF, T_NUMBER, T_N, T_PLURAL, T_NPLURALS, T_EQUAL, T_ASSIGN, + T_GREATER, T_GREATER_OR_EQUAL, T_LESS, T_LESS_OR_EQUAL, + T_REMINDER, T_NOT_EQUAL, + T_LOGICAL_AND, T_LOGICAL_OR, T_QUESTION, T_COLON, T_SEMICOLON, + T_LEFT_BRACKET, T_RIGHT_BRACKET + }; + Type type() const { return m_type; } + void setType(Type type) { m_type = type; } + // for T_NUMBER only + typedef int Number; + Number number() const { return m_number; } + void setNumber(Number num) { m_number = num; } +private: + Type m_type; + Number m_number; +}; + + +class wxPluralFormsScanner +{ +public: + wxPluralFormsScanner(const char* s); + const wxPluralFormsToken& token() const { return m_token; } + bool nextToken(); // returns false if error +private: + const char* m_s; + wxPluralFormsToken m_token; +}; + +wxPluralFormsScanner::wxPluralFormsScanner(const char* s) : m_s(s) +{ + nextToken(); +} + +bool wxPluralFormsScanner::nextToken() +{ + wxPluralFormsToken::Type type = wxPluralFormsToken::T_ERROR; + while (isspace(*m_s)) + { + ++m_s; + } + if (*m_s == 0) + { + type = wxPluralFormsToken::T_EOF; + } + else if (isdigit(*m_s)) + { + wxPluralFormsToken::Number number = *m_s++ - '0'; + while (isdigit(*m_s)) + { + number = number * 10 + (*m_s++ - '0'); + } + m_token.setNumber(number); + type = wxPluralFormsToken::T_NUMBER; + } + else if (isalpha(*m_s)) + { + const char* begin = m_s++; + while (isalnum(*m_s)) + { + ++m_s; + } + size_t size = m_s - begin; + if (size == 1 && memcmp(begin, "n", size) == 0) + { + type = wxPluralFormsToken::T_N; + } + else if (size == 6 && memcmp(begin, "plural", size) == 0) + { + type = wxPluralFormsToken::T_PLURAL; + } + else if (size == 8 && memcmp(begin, "nplurals", size) == 0) + { + type = wxPluralFormsToken::T_NPLURALS; + } + } + else if (*m_s == '=') + { + ++m_s; + if (*m_s == '=') + { + ++m_s; + type = wxPluralFormsToken::T_EQUAL; + } + else + { + type = wxPluralFormsToken::T_ASSIGN; + } + } + else if (*m_s == '>') + { + ++m_s; + if (*m_s == '=') + { + ++m_s; + type = wxPluralFormsToken::T_GREATER_OR_EQUAL; + } + else + { + type = wxPluralFormsToken::T_GREATER; + } + } + else if (*m_s == '<') + { + ++m_s; + if (*m_s == '=') + { + ++m_s; + type = wxPluralFormsToken::T_LESS_OR_EQUAL; + } + else + { + type = wxPluralFormsToken::T_LESS; + } + } + else if (*m_s == '%') + { + ++m_s; + type = wxPluralFormsToken::T_REMINDER; + } + else if (*m_s == '!' && m_s[1] == '=') + { + m_s += 2; + type = wxPluralFormsToken::T_NOT_EQUAL; + } + else if (*m_s == '&' && m_s[1] == '&') + { + m_s += 2; + type = wxPluralFormsToken::T_LOGICAL_AND; + } + else if (*m_s == '|' && m_s[1] == '|') + { + m_s += 2; + type = wxPluralFormsToken::T_LOGICAL_OR; + } + else if (*m_s == '?') + { + ++m_s; + type = wxPluralFormsToken::T_QUESTION; + } + else if (*m_s == ':') + { + ++m_s; + type = wxPluralFormsToken::T_COLON; + } else if (*m_s == ';') { + ++m_s; + type = wxPluralFormsToken::T_SEMICOLON; + } + else if (*m_s == '(') + { + ++m_s; + type = wxPluralFormsToken::T_LEFT_BRACKET; + } + else if (*m_s == ')') + { + ++m_s; + type = wxPluralFormsToken::T_RIGHT_BRACKET; + } + m_token.setType(type); + return type != wxPluralFormsToken::T_ERROR; +} + +class wxPluralFormsNode; + +// NB: Can't use wxDEFINE_SCOPED_PTR_TYPE because wxPluralFormsNode is not +// fully defined yet: +class wxPluralFormsNodePtr +{ +public: + wxPluralFormsNodePtr(wxPluralFormsNode *p = NULL) : m_p(p) {} + ~wxPluralFormsNodePtr(); + wxPluralFormsNode& operator*() const { return *m_p; } + wxPluralFormsNode* operator->() const { return m_p; } + wxPluralFormsNode* get() const { return m_p; } + wxPluralFormsNode* release(); + void reset(wxPluralFormsNode *p); + +private: + wxPluralFormsNode *m_p; +}; + +class wxPluralFormsNode +{ +public: + wxPluralFormsNode(const wxPluralFormsToken& token) : m_token(token) {} + const wxPluralFormsToken& token() const { return m_token; } + const wxPluralFormsNode* node(size_t i) const + { return m_nodes[i].get(); } + void setNode(size_t i, wxPluralFormsNode* n); + wxPluralFormsNode* releaseNode(size_t i); + wxPluralFormsToken::Number evaluate(wxPluralFormsToken::Number n) const; + +private: + wxPluralFormsToken m_token; + wxPluralFormsNodePtr m_nodes[3]; +}; + +wxPluralFormsNodePtr::~wxPluralFormsNodePtr() +{ + delete m_p; +} +wxPluralFormsNode* wxPluralFormsNodePtr::release() +{ + wxPluralFormsNode *p = m_p; + m_p = NULL; + return p; +} +void wxPluralFormsNodePtr::reset(wxPluralFormsNode *p) +{ + if (p != m_p) + { + delete m_p; + m_p = p; + } +} + + +void wxPluralFormsNode::setNode(size_t i, wxPluralFormsNode* n) +{ + m_nodes[i].reset(n); +} + +wxPluralFormsNode* wxPluralFormsNode::releaseNode(size_t i) +{ + return m_nodes[i].release(); +} + +wxPluralFormsToken::Number +wxPluralFormsNode::evaluate(wxPluralFormsToken::Number n) const +{ + switch (token().type()) + { + // leaf + case wxPluralFormsToken::T_NUMBER: + return token().number(); + case wxPluralFormsToken::T_N: + return n; + // 2 args + case wxPluralFormsToken::T_EQUAL: + return node(0)->evaluate(n) == node(1)->evaluate(n); + case wxPluralFormsToken::T_NOT_EQUAL: + return node(0)->evaluate(n) != node(1)->evaluate(n); + case wxPluralFormsToken::T_GREATER: + return node(0)->evaluate(n) > node(1)->evaluate(n); + case wxPluralFormsToken::T_GREATER_OR_EQUAL: + return node(0)->evaluate(n) >= node(1)->evaluate(n); + case wxPluralFormsToken::T_LESS: + return node(0)->evaluate(n) < node(1)->evaluate(n); + case wxPluralFormsToken::T_LESS_OR_EQUAL: + return node(0)->evaluate(n) <= node(1)->evaluate(n); + case wxPluralFormsToken::T_REMINDER: + { + wxPluralFormsToken::Number number = node(1)->evaluate(n); + if (number != 0) + { + return node(0)->evaluate(n) % number; + } + else + { + return 0; + } + } + case wxPluralFormsToken::T_LOGICAL_AND: + return node(0)->evaluate(n) && node(1)->evaluate(n); + case wxPluralFormsToken::T_LOGICAL_OR: + return node(0)->evaluate(n) || node(1)->evaluate(n); + // 3 args + case wxPluralFormsToken::T_QUESTION: + return node(0)->evaluate(n) + ? node(1)->evaluate(n) + : node(2)->evaluate(n); + default: + return 0; + } +} + + +class wxPluralFormsCalculator +{ +public: + wxPluralFormsCalculator() : m_nplurals(0), m_plural(0) {} + + // input: number, returns msgstr index + int evaluate(int n) const; + + // input: text after "Plural-Forms:" (e.g. "nplurals=2; plural=(n != 1);"), + // if s == 0, creates default handler + // returns 0 if error + static wxPluralFormsCalculator* make(const char* s = 0); + + ~wxPluralFormsCalculator() {} + + void init(wxPluralFormsToken::Number nplurals, wxPluralFormsNode* plural); + wxString getString() const; + +private: + wxPluralFormsToken::Number m_nplurals; + wxPluralFormsNodePtr m_plural; +}; + +wxDEFINE_SCOPED_PTR_TYPE(wxPluralFormsCalculator); + +void wxPluralFormsCalculator::init(wxPluralFormsToken::Number nplurals, + wxPluralFormsNode* plural) +{ + m_nplurals = nplurals; + m_plural.reset(plural); +} + +int wxPluralFormsCalculator::evaluate(int n) const +{ + if (m_plural.get() == 0) + { + return 0; + } + wxPluralFormsToken::Number number = m_plural->evaluate(n); + if (number < 0 || number > m_nplurals) + { + return 0; + } + return number; +} + + +class wxPluralFormsParser +{ +public: + wxPluralFormsParser(wxPluralFormsScanner& scanner) : m_scanner(scanner) {} + bool parse(wxPluralFormsCalculator& rCalculator); + +private: + wxPluralFormsNode* parsePlural(); + // stops at T_SEMICOLON, returns 0 if error + wxPluralFormsScanner& m_scanner; + const wxPluralFormsToken& token() const; + bool nextToken(); + + wxPluralFormsNode* expression(); + wxPluralFormsNode* logicalOrExpression(); + wxPluralFormsNode* logicalAndExpression(); + wxPluralFormsNode* equalityExpression(); + wxPluralFormsNode* multiplicativeExpression(); + wxPluralFormsNode* relationalExpression(); + wxPluralFormsNode* pmExpression(); +}; + +bool wxPluralFormsParser::parse(wxPluralFormsCalculator& rCalculator) +{ + if (token().type() != wxPluralFormsToken::T_NPLURALS) + return false; + if (!nextToken()) + return false; + if (token().type() != wxPluralFormsToken::T_ASSIGN) + return false; + if (!nextToken()) + return false; + if (token().type() != wxPluralFormsToken::T_NUMBER) + return false; + wxPluralFormsToken::Number nplurals = token().number(); + if (!nextToken()) + return false; + if (token().type() != wxPluralFormsToken::T_SEMICOLON) + return false; + if (!nextToken()) + return false; + if (token().type() != wxPluralFormsToken::T_PLURAL) + return false; + if (!nextToken()) + return false; + if (token().type() != wxPluralFormsToken::T_ASSIGN) + return false; + if (!nextToken()) + return false; + wxPluralFormsNode* plural = parsePlural(); + if (plural == 0) + return false; + if (token().type() != wxPluralFormsToken::T_SEMICOLON) + return false; + if (!nextToken()) + return false; + if (token().type() != wxPluralFormsToken::T_EOF) + return false; + rCalculator.init(nplurals, plural); + return true; +} + +wxPluralFormsNode* wxPluralFormsParser::parsePlural() +{ + wxPluralFormsNode* p = expression(); + if (p == NULL) + { + return NULL; + } + wxPluralFormsNodePtr n(p); + if (token().type() != wxPluralFormsToken::T_SEMICOLON) + { + return NULL; + } + return n.release(); +} + +const wxPluralFormsToken& wxPluralFormsParser::token() const +{ + return m_scanner.token(); +} + +bool wxPluralFormsParser::nextToken() +{ + if (!m_scanner.nextToken()) + return false; + return true; +} + +wxPluralFormsNode* wxPluralFormsParser::expression() +{ + wxPluralFormsNode* p = logicalOrExpression(); + if (p == NULL) + return NULL; + wxPluralFormsNodePtr n(p); + if (token().type() == wxPluralFormsToken::T_QUESTION) + { + wxPluralFormsNodePtr qn(new wxPluralFormsNode(token())); + if (!nextToken()) + { + return 0; + } + p = expression(); + if (p == 0) + { + return 0; + } + qn->setNode(1, p); + if (token().type() != wxPluralFormsToken::T_COLON) + { + return 0; + } + if (!nextToken()) + { + return 0; + } + p = expression(); + if (p == 0) + { + return 0; + } + qn->setNode(2, p); + qn->setNode(0, n.release()); + return qn.release(); + } + return n.release(); +} + +wxPluralFormsNode*wxPluralFormsParser::logicalOrExpression() +{ + wxPluralFormsNode* p = logicalAndExpression(); + if (p == NULL) + return NULL; + wxPluralFormsNodePtr ln(p); + if (token().type() == wxPluralFormsToken::T_LOGICAL_OR) + { + wxPluralFormsNodePtr un(new wxPluralFormsNode(token())); + if (!nextToken()) + { + return 0; + } + p = logicalOrExpression(); + if (p == 0) + { + return 0; + } + wxPluralFormsNodePtr rn(p); // right + if (rn->token().type() == wxPluralFormsToken::T_LOGICAL_OR) + { + // see logicalAndExpression comment + un->setNode(0, ln.release()); + un->setNode(1, rn->releaseNode(0)); + rn->setNode(0, un.release()); + return rn.release(); + } + + + un->setNode(0, ln.release()); + un->setNode(1, rn.release()); + return un.release(); + } + return ln.release(); +} + +wxPluralFormsNode* wxPluralFormsParser::logicalAndExpression() +{ + wxPluralFormsNode* p = equalityExpression(); + if (p == NULL) + return NULL; + wxPluralFormsNodePtr ln(p); // left + if (token().type() == wxPluralFormsToken::T_LOGICAL_AND) + { + wxPluralFormsNodePtr un(new wxPluralFormsNode(token())); // up + if (!nextToken()) + { + return NULL; + } + p = logicalAndExpression(); + if (p == 0) + { + return NULL; + } + wxPluralFormsNodePtr rn(p); // right + if (rn->token().type() == wxPluralFormsToken::T_LOGICAL_AND) + { +// transform 1 && (2 && 3) -> (1 && 2) && 3 +// u r +// l r -> u 3 +// 2 3 l 2 + un->setNode(0, ln.release()); + un->setNode(1, rn->releaseNode(0)); + rn->setNode(0, un.release()); + return rn.release(); + } + + un->setNode(0, ln.release()); + un->setNode(1, rn.release()); + return un.release(); + } + return ln.release(); +} + +wxPluralFormsNode* wxPluralFormsParser::equalityExpression() +{ + wxPluralFormsNode* p = relationalExpression(); + if (p == NULL) + return NULL; + wxPluralFormsNodePtr n(p); + if (token().type() == wxPluralFormsToken::T_EQUAL + || token().type() == wxPluralFormsToken::T_NOT_EQUAL) + { + wxPluralFormsNodePtr qn(new wxPluralFormsNode(token())); + if (!nextToken()) + { + return NULL; + } + p = relationalExpression(); + if (p == NULL) + { + return NULL; + } + qn->setNode(1, p); + qn->setNode(0, n.release()); + return qn.release(); + } + return n.release(); +} + +wxPluralFormsNode* wxPluralFormsParser::relationalExpression() +{ + wxPluralFormsNode* p = multiplicativeExpression(); + if (p == NULL) + return NULL; + wxPluralFormsNodePtr n(p); + if (token().type() == wxPluralFormsToken::T_GREATER + || token().type() == wxPluralFormsToken::T_LESS + || token().type() == wxPluralFormsToken::T_GREATER_OR_EQUAL + || token().type() == wxPluralFormsToken::T_LESS_OR_EQUAL) + { + wxPluralFormsNodePtr qn(new wxPluralFormsNode(token())); + if (!nextToken()) + { + return NULL; + } + p = multiplicativeExpression(); + if (p == NULL) + { + return NULL; + } + qn->setNode(1, p); + qn->setNode(0, n.release()); + return qn.release(); + } + return n.release(); +} + +wxPluralFormsNode* wxPluralFormsParser::multiplicativeExpression() +{ + wxPluralFormsNode* p = pmExpression(); + if (p == NULL) + return NULL; + wxPluralFormsNodePtr n(p); + if (token().type() == wxPluralFormsToken::T_REMINDER) + { + wxPluralFormsNodePtr qn(new wxPluralFormsNode(token())); + if (!nextToken()) + { + return NULL; + } + p = pmExpression(); + if (p == NULL) + { + return NULL; + } + qn->setNode(1, p); + qn->setNode(0, n.release()); + return qn.release(); + } + return n.release(); +} + +wxPluralFormsNode* wxPluralFormsParser::pmExpression() +{ + wxPluralFormsNodePtr n; + if (token().type() == wxPluralFormsToken::T_N + || token().type() == wxPluralFormsToken::T_NUMBER) + { + n.reset(new wxPluralFormsNode(token())); + if (!nextToken()) + { + return NULL; + } + } + else if (token().type() == wxPluralFormsToken::T_LEFT_BRACKET) { + if (!nextToken()) + { + return NULL; + } + wxPluralFormsNode* p = expression(); + if (p == NULL) + { + return NULL; + } + n.reset(p); + if (token().type() != wxPluralFormsToken::T_RIGHT_BRACKET) + { + return NULL; + } + if (!nextToken()) + { + return NULL; + } + } + else + { + return NULL; + } + return n.release(); +} + +wxPluralFormsCalculator* wxPluralFormsCalculator::make(const char* s) +{ + wxPluralFormsCalculatorPtr calculator(new wxPluralFormsCalculator); + if (s != NULL) + { + wxPluralFormsScanner scanner(s); + wxPluralFormsParser p(scanner); + if (!p.parse(*calculator)) + { + return NULL; + } + } + return calculator.release(); +} + + + + // ---------------------------------------------------------------------------- // wxMsgCatalogFile corresponds to one disk-file message catalog. // @@ -157,7 +871,8 @@ public: ~wxMsgCatalogFile(); // load the catalog from disk (szDirPrefix corresponds to language) - bool Load(const wxChar *szDirPrefix, const wxChar *szName); + bool Load(const wxChar *szDirPrefix, const wxChar *szName, + wxPluralFormsCalculatorPtr& rPluralFormsCalculator); // fills the hash with string-translation pairs void FillHash(wxMessagesHash& hash, bool convertEncoding) const; @@ -187,21 +902,39 @@ private: // all data is stored here, NULL if no data loaded size_t8 *m_pData; + // amount of memory pointed to by m_pData. + size_t32 m_nSize; + // data description size_t32 m_numStrings; // number of strings in this domain wxMsgTableEntry *m_pOrigTable, // pointer to original strings *m_pTransTable; // translated - const char *StringAtOfs(wxMsgTableEntry *pTable, size_t32 index) const - { return (const char *)(m_pData + Swap(pTable[index].ofsString)); } + wxString m_charset; + + // swap the 2 halves of 32 bit integer if needed + size_t32 Swap(size_t32 ui) const + { + return m_bSwapped ? (ui << 24) | ((ui & 0xff00) << 8) | + ((ui >> 8) & 0xff00) | (ui >> 24) + : ui; + } + + const char *StringAtOfs(wxMsgTableEntry *pTable, size_t32 n) const + { + const wxMsgTableEntry * const ent = pTable + n; - wxString GetCharset() const; + // this check could fail for a corrupt message catalog + size_t32 ofsString = Swap(ent->ofsString); + if ( ofsString + Swap(ent->nLen) > m_nSize) + { + return NULL; + } - // utility functions - // big<->little endian - inline size_t32 Swap(size_t32 ui) const; + return (const char *)(m_pData + ofsString); + } - bool m_bSwapped; // wrong endianness? + bool m_bSwapped; // wrong endianness? DECLARE_NO_COPY_CLASS(wxMsgCatalogFile) }; @@ -224,7 +957,7 @@ public: wxString GetName() const { return m_name; } // get the translated string: returns NULL if not found - const wxChar *GetString(const wxChar *sz) const; + const wxChar *GetString(const wxChar *sz, size_t n = size_t(-1)) const; // public variable pointing to the next element in a linked list (or NULL) wxMsgCatalog *m_pNext; @@ -232,6 +965,7 @@ public: private: wxMessagesHash m_messages; // all messages in the catalog wxString m_name; // name of the domain + wxPluralFormsCalculatorPtr m_pluralFormsCalculator; }; // ---------------------------------------------------------------------------- @@ -249,22 +983,15 @@ static wxArrayString s_searchPrefixes; // wxMsgCatalogFile class // ---------------------------------------------------------------------------- -// swap the 2 halves of 32 bit integer if needed -size_t32 wxMsgCatalogFile::Swap(size_t32 ui) const -{ - return m_bSwapped ? (ui << 24) | ((ui & 0xff00) << 8) | - ((ui >> 8) & 0xff00) | (ui >> 24) - : ui; -} - wxMsgCatalogFile::wxMsgCatalogFile() { - m_pData = NULL; + m_pData = NULL; + m_nSize = 0; } wxMsgCatalogFile::~wxMsgCatalogFile() { - wxDELETEA(m_pData); + wxDELETEA(m_pData); } // return all directories to search for given prefix @@ -298,9 +1025,11 @@ static wxString GetFullSearchPath(const wxChar *lang) // LC_PATH is a standard env var containing the search path for the .mo // files +#ifndef __WXWINCE__ const wxChar *pszLcPath = wxGetenv(wxT("LC_PATH")); if ( pszLcPath != NULL ) searchPath << GetAllMsgCatalogSubdirs(pszLcPath, lang); +#endif #ifdef __UNIX__ // add some standard ones and the one in the tree where wxWin was installed: @@ -327,7 +1056,8 @@ static wxString GetFullSearchPath(const wxChar *lang) } // open disk file and read in it's contents -bool wxMsgCatalogFile::Load(const wxChar *szDirPrefix, const wxChar *szName0) +bool wxMsgCatalogFile::Load(const wxChar *szDirPrefix, const wxChar *szName0, + wxPluralFormsCalculatorPtr& rPluralFormsCalculator) { /* We need to handle locales like de_AT.iso-8859-1 For this we first chop off the .CHARSET specifier and ignore it. @@ -413,41 +1143,75 @@ bool wxMsgCatalogFile::Load(const wxChar *szDirPrefix, const wxChar *szName0) Swap(pHeader->ofsOrigTable)); m_pTransTable = (wxMsgTableEntry *)(m_pData + Swap(pHeader->ofsTransTable)); + m_nSize = nSize; + + // now parse catalog's header and try to extract catalog charset and + // plural forms formula from it: + + const char* headerData = StringAtOfs(m_pOrigTable, 0); + if (headerData && headerData[0] == 0) + { + // Extract the charset: + wxString header = wxString::FromAscii(StringAtOfs(m_pTransTable, 0)); + int begin = header.Find(wxT("Content-Type: text/plain; charset=")); + if (begin != wxNOT_FOUND) + { + begin += 34; //strlen("Content-Type: text/plain; charset=") + size_t end = header.find('\n', begin); + if (end != size_t(-1)) + { + m_charset.assign(header, begin, end - begin); + if (m_charset == wxT("CHARSET")) + { + // "CHARSET" is not valid charset, but lazy translator + m_charset.Clear(); + } + } + } + // else: incorrectly filled Content-Type header + + // Extract plural forms: + begin = header.Find(wxT("Plural-Forms:")); + if (begin != wxNOT_FOUND) + { + begin += 13; + size_t end = header.find('\n', begin); + if (end != size_t(-1)) + { + wxString pfs(header, begin, end - begin); + wxPluralFormsCalculator* pCalculator = wxPluralFormsCalculator + ::make(pfs.ToAscii()); + if (pCalculator != 0) + { + rPluralFormsCalculator.reset(pCalculator); + } + else + { + wxLogVerbose(_("Cannot parse Plural-Forms:'%s'"), + pfs.c_str()); + } + } + } + if (rPluralFormsCalculator.get() == NULL) + { + rPluralFormsCalculator.reset(wxPluralFormsCalculator::make()); + } + } // everything is fine - return TRUE; + return true; } void wxMsgCatalogFile::FillHash(wxMessagesHash& hash, bool convertEncoding) const { - wxString charset = GetCharset(); - #if wxUSE_WCHAR_T wxCSConv *csConv = NULL; - if ( !!charset ) - csConv = new wxCSConv(charset); + if ( !!m_charset ) + csConv = new wxCSConv(m_charset); wxMBConv& inputConv = csConv ? *((wxMBConv*)csConv) : *wxConvCurrent; - - for (size_t i = 0; i < m_numStrings; i++) - { - wxString key(StringAtOfs(m_pOrigTable, i), inputConv); - - #if wxUSE_UNICODE - hash[key] = wxString(StringAtOfs(m_pTransTable, i), inputConv); - #else - if ( convertEncoding ) - hash[key] = - wxString(inputConv.cMB2WC(StringAtOfs(m_pTransTable, i)), - wxConvLocal); - else - hash[key] = StringAtOfs(m_pTransTable, i); - #endif - } - - delete csConv; -#else // !wxUSE_WCHAR_T - #if wxUSE_FONTMAP +#elif wxUSE_FONTMAP + wxEncodingConverter converter; if ( convertEncoding ) { wxFontEncoding targetEnc = wxFONTENCODING_SYSTEM; @@ -474,62 +1238,61 @@ void wxMsgCatalogFile::FillHash(wxMessagesHash& hash, bool convertEncoding) cons if ( convertEncoding ) { - wxEncodingConverter converter; converter.Init(enc, targetEnc); - - for (size_t i = 0; i < m_numStrings; i++) - { - wxString key(StringAtOfs(m_pOrigTable, i)); - hash[key] = - converter.Convert(wxString(StringAtOfs(m_pTransTable, i))); - } - } - } - - if ( !convertEncoding ) - #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP - { - for (size_t i = 0; i < m_numStrings; i++) - { - wxString key(StringAtOfs(m_pOrigTable, i)); - hash[key] = StringAtOfs(m_pTransTable, i); } } #endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T -} - -wxString wxMsgCatalogFile::GetCharset() const -{ - // first, find encoding header: - const char *hdr = StringAtOfs(m_pOrigTable, 0); - if ( hdr == NULL || hdr[0] != 0 ) - { - // not supported by this catalog, does not have correct header - return wxEmptyString; - } + (void)convertEncoding; // get rid of warnings about unused parameter - wxString header = wxString::FromAscii( StringAtOfs(m_pTransTable, 0)); - wxString charset; - int pos = header.Find(wxT("Content-Type: text/plain; charset=")); - if ( pos == wxNOT_FOUND ) + for (size_t i = 0; i < m_numStrings; i++) { - // incorrectly filled Content-Type header - return wxEmptyString; - } + const char *data = StringAtOfs(m_pOrigTable, i); +#if wxUSE_WCHAR_T + wxString msgid(data, inputConv); +#else + wxString msgid(data); +#endif - size_t n = pos + 34; /*strlen("Content-Type: text/plain; charset=")*/ - while ( header[n] != wxT('\n') ) - charset << header[n++]; + data = StringAtOfs(m_pTransTable, i); + size_t length = Swap(m_pTransTable[i].nLen); + size_t offset = 0; + size_t index = 0; + while (offset < length) + { + wxString msgstr; +#if wxUSE_WCHAR_T + #if wxUSE_UNICODE + msgstr = wxString(data + offset, inputConv); + #else + if ( convertEncoding ) + msgstr = wxString(inputConv.cMB2WC(data + offset), wxConvLocal); + else + msgstr = wxString(data + offset); + #endif +#else // !wxUSE_WCHAR_T + #if wxUSE_FONTMAP + if ( convertEncoding ) + msgstr = wxString(converter.Convert(data + offset)); + else + #endif + msgstr = wxString(data + offset); +#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T - if ( charset == wxT("CHARSET") ) - { - // "CHARSET" is not valid charset, but lazy translator - return wxEmptyString; + if ( !msgstr.empty() ) + { + hash[index == 0 ? msgid : msgid + wxChar(index)] = msgstr; + } + offset += strlen(data + offset) + 1; + ++index; + } } - return charset; +#if wxUSE_WCHAR_T + delete csConv; +#endif } + // ---------------------------------------------------------------------------- // wxMsgCatalog class // ---------------------------------------------------------------------------- @@ -541,7 +1304,7 @@ bool wxMsgCatalog::Load(const wxChar *szDirPrefix, const wxChar *szName, m_name = szName; - if ( file.Load(szDirPrefix, szName) ) + if ( file.Load(szDirPrefix, szName, m_pluralFormsCalculator) ) { file.FillHash(m_messages, bConvertEncoding); return TRUE; @@ -550,9 +1313,23 @@ bool wxMsgCatalog::Load(const wxChar *szDirPrefix, const wxChar *szName, return FALSE; } -const wxChar *wxMsgCatalog::GetString(const wxChar *sz) const +const wxChar *wxMsgCatalog::GetString(const wxChar *sz, size_t n) const { - wxMessagesHash::const_iterator i = m_messages.find(sz); + int index = 0; + if (n != size_t(-1)) + { + index = m_pluralFormsCalculator->evaluate(n); + } + wxMessagesHash::const_iterator i; + if (index != 0) + { + i = m_messages.find(wxString(sz) + wxChar(index)); // plural + } + else + { + i = m_messages.find(sz); + } + if ( i != m_messages.end() ) { return i->second.c_str(); @@ -592,6 +1369,7 @@ wxLocale::wxLocale() m_pszOldLocale = NULL; m_pMsgCat = NULL; m_language = wxLANGUAGE_UNKNOWN; + m_initialized = false; } // NB: this function has (desired) side effect of changing current locale @@ -601,6 +1379,10 @@ bool wxLocale::Init(const wxChar *szName, bool bLoadDefault, bool bConvertEncoding) { + wxASSERT_MSG( !m_initialized, + _T("you can't call wxLocale::Init more than once") ); + + m_initialized = true; m_strLocale = szName; m_strShort = szShort; m_bConvertEncoding = bConvertEncoding; @@ -614,7 +1396,27 @@ bool wxLocale::Init(const wxChar *szName, wxCHECK_MSG( szLocale, FALSE, _T("no locale to set in wxLocale::Init()") ); } + +#ifdef __WXWINCE__ + // FIXME: I'm guessing here + wxChar localeName[256]; + int ret = GetLocaleInfo(LOCALE_USER_DEFAULT, LOCALE_SLANGUAGE, localeName, + 256); + if (ret != 0) + { + m_pszOldLocale = wxStrdup(localeName); + } + else + m_pszOldLocale = NULL; + + // TODO: how to find languageId + // SetLocaleInfo(languageId, SORT_DEFAULT, localeName); +#else m_pszOldLocale = wxSetlocale(LC_ALL, szLocale); + if ( m_pszOldLocale ) + m_pszOldLocale = wxStrdup(m_pszOldLocale); +#endif + if ( m_pszOldLocale == NULL ) wxLogError(_("locale '%s' can not be set."), szLocale); @@ -623,7 +1425,7 @@ bool wxLocale::Init(const wxChar *szName, if ( m_strShort.IsEmpty() ) { // FIXME I don't know how these 2 letter abbreviations are formed, // this wild guess is surely wrong - if ( szLocale[0] ) + if ( szLocale && szLocale[0] ) { m_strShort += (wxChar)wxTolower(szLocale[0]); if ( szLocale[1] ) @@ -643,6 +1445,39 @@ bool wxLocale::Init(const wxChar *szName, return bOk; } + +#if defined(__UNIX__) && wxUSE_UNICODE +static wxWCharBuffer wxSetlocaleTryUTF(int c, const wxChar *lc) +{ + wxMB2WXbuf l = wxSetlocale(c, lc); + if ( !l && lc && lc[0] != 0 ) + { + wxString buf(lc); + wxString buf2; + buf2 = buf + wxT(".UTF-8"); + l = wxSetlocale(c, buf2.c_str()); + if ( !l ) + { + buf2 = buf + wxT(".utf-8"); + l = wxSetlocale(c, buf2.c_str()); + } + if ( !l ) + { + buf2 = buf + wxT(".UTF8"); + l = wxSetlocale(c, buf2.c_str()); + } + if ( !l ) + { + buf2 = buf + wxT(".utf8"); + l = wxSetlocale(c, buf2.c_str()); + } + } + return l; +} +#else +#define wxSetlocaleTryUTF(c, lc) wxSetlocale(c, lc) +#endif + bool wxLocale::Init(int language, int flags) { int lang = language; @@ -678,12 +1513,12 @@ bool wxLocale::Init(int language, int flags) else locale = info->CanonicalName; - wxMB2WXbuf retloc = wxSetlocale(LC_ALL, locale); + wxMB2WXbuf retloc = wxSetlocaleTryUTF(LC_ALL, locale); if ( !retloc ) { // Some C libraries don't like xx_YY form and require xx only - retloc = wxSetlocale(LC_ALL, locale.Mid(0,2)); + retloc = wxSetlocaleTryUTF(LC_ALL, locale.Mid(0,2)); } if ( !retloc ) { @@ -696,14 +1531,18 @@ bool wxLocale::Init(int language, int flags) locale = wxT("in") + locale.Mid(3); else if (mid == wxT("yi")) locale = wxT("ji") + locale.Mid(3); + else if (mid == wxT("nb")) + locale = wxT("no_NO"); + else if (mid == wxT("nn")) + locale = wxT("no_NY"); - retloc = wxSetlocale(LC_ALL, locale); + retloc = wxSetlocaleTryUTF(LC_ALL, locale); } if ( !retloc ) { // (This time, we changed locale in previous if-branch, so try again.) // Some C libraries don't like xx_YY form and require xx only - retloc = wxSetlocale(LC_ALL, locale.Mid(0,2)); + retloc = wxSetlocaleTryUTF(LC_ALL, locale.Mid(0,2)); } if ( !retloc ) { @@ -711,6 +1550,17 @@ bool wxLocale::Init(int language, int flags) return FALSE; } #elif defined(__WIN32__) + + #if wxUSE_UNICODE && (defined(__VISUALC__) || defined(__MINGW32__)) + // NB: setlocale() from msvcrt.dll (used by VC++ and Mingw) + // can't set locale to language that can only be written using + // Unicode. Therefore wxSetlocale call failed, but we don't want + // to report it as an error -- so that at least message catalogs + // can be used. Watch for code marked with + // #ifdef SETLOCALE_FAILS_ON_UNICODE_LANGS bellow. + #define SETLOCALE_FAILS_ON_UNICODE_LANGS + #endif + wxMB2WXbuf retloc = wxT("C"); if (language != wxLANGUAGE_DEFAULT) { @@ -721,42 +1571,73 @@ bool wxLocale::Init(int language, int flags) } else { + int codepage + #ifdef SETLOCALE_FAILS_ON_UNICODE_LANGS + = -1 + #endif + ; wxUint32 lcid = MAKELCID(MAKELANGID(info->WinLang, info->WinSublang), SORT_DEFAULT); - if (SetThreadLocale(lcid)) + // FIXME +#ifndef __WXWINCE__ + SetThreadLocale(lcid); +#endif + // NB: we must translate LCID to CRT's setlocale string ourselves, + // because SetThreadLocale does not modify change the + // interpretation of setlocale(LC_ALL, "") call: + wxChar buffer[256]; + buffer[0] = wxT('\0'); + GetLocaleInfo(lcid, LOCALE_SENGLANGUAGE, buffer, 256); + locale << buffer; + if (GetLocaleInfo(lcid, LOCALE_SENGCOUNTRY, buffer, 256) > 0) + locale << wxT("_") << buffer; + if (GetLocaleInfo(lcid, LOCALE_IDEFAULTANSICODEPAGE, buffer, 256) > 0) { - retloc = wxSetlocale(LC_ALL, wxEmptyString); + codepage = wxAtoi(buffer); + if (codepage != 0) + locale << wxT(".") << buffer; + } + if (locale.IsEmpty()) + { + wxLogLastError(wxT("SetThreadLocale")); + wxLogError(wxT("Cannot set locale to language %s."), name.c_str()); + return FALSE; } else { - // Windows9X doesn't support SetThreadLocale, so we must - // translate LCID to CRT's setlocale string ourselves - locale.Empty(); - if (GetLastError() == ERROR_CALL_NOT_IMPLEMENTED) - { - wxChar buffer[256]; - buffer[0] = wxT('\0'); - GetLocaleInfo(lcid, LOCALE_SENGLANGUAGE, buffer, 256); - locale << buffer; - if (GetLocaleInfo(lcid, LOCALE_SENGCOUNTRY, buffer, 256) > 0) - locale << wxT("_") << buffer; - } - if (locale.IsEmpty()) - { - wxLogLastError(wxT("SetThreadLocale")); - wxLogError(wxT("Cannot set locale to language %s."), name.c_str()); - return FALSE; - } - else + // FIXME +#ifndef __WXWINCE__ + retloc = wxSetlocale(LC_ALL, locale); +#endif +#ifdef SETLOCALE_FAILS_ON_UNICODE_LANGS + if (codepage == 0 && (const wxChar*)retloc == NULL) { - retloc = wxSetlocale(LC_ALL, locale); + retloc = wxT("C"); } +#endif } } } else { + // FIXME +#ifndef __WXWINCE__ retloc = wxSetlocale(LC_ALL, wxEmptyString); +#else + retloc = NULL; +#endif +#ifdef SETLOCALE_FAILS_ON_UNICODE_LANGS + if ((const wxChar*)retloc == NULL) + { + wxChar buffer[16]; + if (GetLocaleInfo(LOCALE_USER_DEFAULT, + LOCALE_IDEFAULTANSICODEPAGE, buffer, 16) > 0 && + wxStrcmp(buffer, wxT("0")) == 0) + { + retloc = wxT("C"); + } + } +#endif } if ( !retloc ) @@ -772,12 +1653,15 @@ bool wxLocale::Init(int language, int flags) #endif #ifndef WX_NO_LOCALE_SUPPORT - wxChar *szLocale = retloc ? wxStrdup(retloc) : NULL; + wxChar *szLocale = retloc ? wxStrdup(retloc) : NULL; bool ret = Init(name, canonical, retloc, (flags & wxLOCALE_LOAD_DEFAULT) != 0, (flags & wxLOCALE_CONV_ENCODING) != 0); - if (szLocale) - free(szLocale); + free(szLocale); + + if (IsOk()) // setlocale() succeeded + m_language = lang; + return ret; #endif } @@ -865,10 +1749,16 @@ void wxLocale::AddCatalogLookupPathPrefix(const wxString& prefix) wxString lang; if ( langOrig == wxT("iw")) lang = _T("he"); - else if ( langOrig == wxT("in") ) + else if (langOrig == wxT("in")) lang = wxT("id"); - else if ( langOrig == wxT("ji") ) + else if (langOrig == wxT("ji")) lang = wxT("yi"); + else if (langOrig == wxT("no_NO")) + lang = wxT("nb_NO"); + else if (langOrig == wxT("no_NY")) + lang = wxT("nn_NO"); + else if (langOrig == wxT("no")) + lang = wxT("nb_NO"); else lang = langOrig; @@ -964,7 +1854,7 @@ void wxLocale::AddCatalogLookupPathPrefix(const wxString& prefix) lc = wxT("fr_CA") ; break ; case verNorway: - lc = wxT("no_NO") ; + lc = wxT("nb_NO") ; break ; case verIsrael: lc = wxT("iw_IL") ; @@ -1262,7 +2152,7 @@ wxString wxLocale::GetSystemEncodingName() // to Unix98) char *oldLocale = strdup(setlocale(LC_CTYPE, NULL)); setlocale(LC_CTYPE, ""); - char *alang = nl_langinfo(CODESET); + const char *alang = nl_langinfo(CODESET); setlocale(LC_CTYPE, oldLocale); free(oldLocale); @@ -1407,6 +2297,16 @@ const wxLanguageInfo *wxLocale::GetLanguageInfo(int lang) return NULL; } +/* static */ +wxString wxLocale::GetLanguageName(int lang) +{ + const wxLanguageInfo *info = GetLanguageInfo(lang); + if ( !info ) + return wxEmptyString; + else + return info->Description; +} + /* static */ const wxLanguageInfo *wxLocale::FindLanguageInfo(const wxString& locale) { @@ -1445,7 +2345,12 @@ const wxLanguageInfo *wxLocale::FindLanguageInfo(const wxString& locale) wxString wxLocale::GetSysName() const { + // FIXME +#ifndef __WXWINCE__ return wxSetlocale(LC_ALL, NULL); +#else + return wxEmptyString; +#endif } // clean up @@ -1461,15 +2366,27 @@ wxLocale::~wxLocale() // restore old locale wxSetLocale(m_pOldLocale); + // FIXME +#ifndef __WXWINCE__ wxSetlocale(LC_ALL, m_pszOldLocale); +#endif + free((wxChar *)m_pszOldLocale); // const_cast } // get the translation of given string in current locale const wxChar *wxLocale::GetString(const wxChar *szOrigString, const wxChar *szDomain) const +{ + return GetString(szOrigString, szOrigString, size_t(-1), szDomain); +} + +const wxChar *wxLocale::GetString(const wxChar *szOrigString, + const wxChar *szOrigString2, + size_t n, + const wxChar *szDomain) const { if ( wxIsEmpty(szOrigString) ) - return _T(""); + return wxEmptyString; const wxChar *pszTrans = NULL; wxMsgCatalog *pMsgCat; @@ -1480,14 +2397,14 @@ const wxChar *wxLocale::GetString(const wxChar *szOrigString, // does the catalog exist? if ( pMsgCat != NULL ) - pszTrans = pMsgCat->GetString(szOrigString); + pszTrans = pMsgCat->GetString(szOrigString, n); } else { // search in all domains for ( pMsgCat = m_pMsgCat; pMsgCat != NULL; pMsgCat = pMsgCat->m_pNext ) { - pszTrans = pMsgCat->GetString(szOrigString); + pszTrans = pMsgCat->GetString(szOrigString, n); if ( pszTrans != NULL ) // take the first found break; } @@ -1503,19 +2420,23 @@ const wxChar *wxLocale::GetString(const wxChar *szOrigString, if ( szDomain != NULL ) { wxLogTrace(_T("i18n"), - _T("string '%s' not found in domain '%s' for locale '%s'."), - szOrigString, szDomain, m_strLocale.c_str()); + _T("string '%s'[%d] not found in domain '%s' for locale '%s'."), + szOrigString, n, szDomain, m_strLocale.c_str()); + } else { wxLogTrace(_T("i18n"), - _T("string '%s' not found in locale '%s'."), - szOrigString, m_strLocale.c_str()); + _T("string '%s'[%d] not found in locale '%s'."), + szOrigString, n, m_strLocale.c_str()); } } #endif // __WXDEBUG__ - return szOrigString; + if (n == size_t(-1)) + return szOrigString; + else + return n == 1 ? szOrigString : szOrigString2; } return pszTrans; @@ -1558,6 +2479,11 @@ bool wxLocale::AddCatalog(const wxChar *szDomain) // don't add it because it couldn't be loaded anyway delete pMsgCat; + // it's OK to not load English catalog, the texts are embedded in + // the program: + if (m_strShort.Mid(0, 2) == wxT("en")) + return TRUE; + return FALSE; } } @@ -2313,7 +3239,7 @@ void wxLocale::InitLanguagesDB() LNG(wxLANGUAGE_NAURU, "na" , 0 , 0 , "Nauru") LNG(wxLANGUAGE_NEPALI, "ne" , LANG_NEPALI , SUBLANG_DEFAULT , "Nepali") LNG(wxLANGUAGE_NEPALI_INDIA, "ne_IN", LANG_NEPALI , SUBLANG_NEPALI_INDIA , "Nepali (India)") - LNG(wxLANGUAGE_NORWEGIAN_BOKMAL, "no_NO", LANG_NORWEGIAN , SUBLANG_NORWEGIAN_BOKMAL , "Norwegian (Bokmal)") + LNG(wxLANGUAGE_NORWEGIAN_BOKMAL, "nb_NO", LANG_NORWEGIAN , SUBLANG_NORWEGIAN_BOKMAL , "Norwegian (Bokmal)") LNG(wxLANGUAGE_NORWEGIAN_NYNORSK, "nn_NO", LANG_NORWEGIAN , SUBLANG_NORWEGIAN_NYNORSK , "Norwegian (Nynorsk)") LNG(wxLANGUAGE_OCCITAN, "oc" , 0 , 0 , "Occitan") LNG(wxLANGUAGE_ORIYA, "or" , LANG_ORIYA , SUBLANG_DEFAULT , "Oriya") @@ -2370,7 +3296,7 @@ void wxLocale::InitLanguagesDB() LNG(wxLANGUAGE_SWAHILI, "sw_KE", LANG_SWAHILI , SUBLANG_DEFAULT , "Swahili") LNG(wxLANGUAGE_SWEDISH, "sv_SE", LANG_SWEDISH , SUBLANG_SWEDISH , "Swedish") LNG(wxLANGUAGE_SWEDISH_FINLAND, "sv_FI", LANG_SWEDISH , SUBLANG_SWEDISH_FINLAND , "Swedish (Finland)") - LNG(wxLANGUAGE_TAGALOG, "tl" , 0 , 0 , "Tagalog") + LNG(wxLANGUAGE_TAGALOG, "tl_PH", 0 , 0 , "Tagalog") LNG(wxLANGUAGE_TAJIK, "tg" , 0 , 0 , "Tajik") LNG(wxLANGUAGE_TAMIL, "ta" , LANG_TAMIL , SUBLANG_DEFAULT , "Tamil") LNG(wxLANGUAGE_TATAR, "tt" , LANG_TATAR , SUBLANG_DEFAULT , "Tatar")