bug in handling escaped chars in entries/group names corrected

[wxWidgets.git] / src / common / string.cpp
diff --git a/src/common/string.cpp b/src/common/string.cpp

index ba38f0559a0cccb5512dc3cc5b2867ed07661030..7759ed8766ed8c91d33f326ee257dfff63106e3d 100644 (file)
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -1935,9 +1935,12 @@ static void wxLoadCharacterSets(void)
  {
    static bool already_loaded = FALSE;
  
-#if defined(__UNIX__) && wxUSE_UNICODE
+  if (already_loaded) return;
+
+#if defined(__UNIX__)
    // search through files in /usr/share/i18n/charmaps
-  for (wxString fname = ::wxFindFirstFile(_T("/usr/share/i18n/charmaps/*"));
+  wxString fname;
+  for (fname = ::wxFindFirstFile(_T("/usr/share/i18n/charmaps/*"));
         !fname.IsEmpty();
         fname = ::wxFindNextFile()) {
      wxTextFile cmap(fname);
@@ -1946,12 +1949,13 @@ static void wxLoadCharacterSets(void)
        wxString comchar,escchar;
        bool in_charset = FALSE;
  
-      wxPrintf(_T("yup, loaded %s\n"),fname.c_str());
+      // wxFprintf(stderr,_T("Loaded: %s\n"),fname.c_str());
  
-      for (wxString line = cmap.GetFirstLine();
+      wxString line;
+      for (line = cmap.GetFirstLine();
            !cmap.Eof();
            line = cmap.GetNextLine()) {
-       wxPrintf(_T("line contents: %s\n"),line.c_str());
+       // wxFprintf(stderr,_T("line contents: %s\n"),line.c_str());
         wxStringTokenizer token(line);
         wxString cmd = token.GetNextToken();
         if (cmd == comchar) {
@@ -1968,12 +1972,13 @@ static void wxLoadCharacterSets(void)
           comchar = token.GetNextToken();
         else if (cmd == _T("<escape_char>"))
           escchar = token.GetNextToken();
-       else if (cmd == _T("<mb_cur_min")) {
+       else if (cmd == _T("<mb_cur_min>")) {
           delete cset;
-         goto forget_it; // we don't support multibyte charsets ourselves (yet)
+         cset = (wxCharacterSet *) NULL;
+         break; // we don't support multibyte charsets ourselves (yet)
         }
         else if (cmd == _T("CHARMAP")) {
-         cset->data = (wchar_t *)calloc(256, sizeof(wxChar));
+         cset->data = (wchar_t *)calloc(256, sizeof(wchar_t));
           in_charset = TRUE;
         }
         else if (cmd == _T("END")) {
@@ -1982,19 +1987,31 @@ static void wxLoadCharacterSets(void)
         }
         else if (in_charset) {
           // format: <NUL> /x00 <U0000> NULL (NUL)
+         //         <A>   /x41 <U0041> LATIN CAPITAL LETTER A
           wxString hex = token.GetNextToken();
+         // skip whitespace (why doesn't wxStringTokenizer do this?)
+         while (wxIsEmpty(hex) && token.HasMoreTokens()) hex = token.GetNextToken();
           wxString uni = token.GetNextToken();
-         // just assume that we've got the right format
-         int pos = ::wxHexToDec(hex.Mid(2,2));
-         unsigned long uni1 = ::wxHexToDec(uni.Mid(2,2));
-         unsigned long uni2 = ::wxHexToDec(uni.Mid(4,2));
-         cset->data[pos] = (uni1 << 16) | uni2;
+         // skip whitespace again
+         while (wxIsEmpty(uni) && token.HasMoreTokens()) uni = token.GetNextToken();
+
+         if ((hex.GetChar(0) == escchar) && (hex.GetChar(1) == _T('x')) &&
+             (uni.Left(2) == _T("<U"))) {
+           hex.MakeUpper(); uni.MakeUpper();
+           int pos = ::wxHexToDec(hex.Mid(2,2));
+           if (pos>=0) {
+             unsigned long uni1 = ::wxHexToDec(uni.Mid(2,2));
+             unsigned long uni2 = ::wxHexToDec(uni.Mid(4,2));
+             cset->data[pos] = (uni1 << 16) | uni2;
+             // wxFprintf(stderr,_T("char %02x mapped to %04x (%c)\n"),pos,cset->data[pos],cset->data[pos]);
+           }
+         }
         }
        }
-      cset->names.Shrink();
-      wxCharsets.Add(cset);
-    forget_it:
-      continue;
+      if (cset) {
+       cset->names.Shrink();
+       wxCharsets.Add(cset);
+      }
      }
    }
  #endif
@@ -2002,8 +2019,9 @@ static void wxLoadCharacterSets(void)
    already_loaded = TRUE;
  }
  
-static wxCharacterSet *wxFindCharacterSet(const wxString& charset)
+static wxCharacterSet *wxFindCharacterSet(const wxChar *charset)
  {
+  wxLoadCharacterSets();
    for (size_t n=0; n<wxCharsets.GetCount(); n++)
      if (wxCharsets[n].names.Index(charset) != wxNOT_FOUND)
        return &(wxCharsets[n]);
@@ -2014,47 +2032,60 @@ WXDLLEXPORT_DATA(wxCSConv) wxConv_local((const wxChar *)NULL);
  
  wxCSConv::wxCSConv(const wxChar *charset)
  {
-  wxLoadCharacterSets();
    if (!charset) {
  #ifdef __UNIX__
      wxChar *lang = wxGetenv(_T("LANG"));
-    wxChar *dot = wxStrchr(lang, _T('.'));
+    wxChar *dot = lang ? wxStrchr(lang, _T('.')) : (wxChar *)NULL;
      if (dot) charset = dot+1;
  #endif
    }
-  cset = (wxCharacterSet *) NULL;
-
+  m_cset = (wxCharacterSet *) NULL;
+  m_deferred = FALSE;
+  if (charset) {
  #ifdef __UNIX__
-  // first, convert the character set name to standard form
-  wxString codeset;
-  if (wxString(charset,3) == _T("ISO")) {
-    // make sure it's represented in the standard form: ISO_8859-1
-    codeset = _T("ISO_");
-    charset += 3;
-    if ((*charset == _T('-')) || (*charset == _T('_'))) charset++;
-    if (wxStrlen(charset)>4) {
-      if (wxString(charset,4) == _T("8859")) {
-       codeset << _T("8859-");
-       if (*charset == _T('-')) charset++;
+    // first, convert the character set name to standard form
+    wxString codeset;
+    if (wxString(charset,3).CmpNoCase(_T("ISO")) == 0) {
+      // make sure it's represented in the standard form: ISO_8859-1
+      codeset = _T("ISO_");
+      charset += 3;
+      if ((*charset == _T('-')) || (*charset == _T('_'))) charset++;
+      if (wxStrlen(charset)>4) {
+       if (wxString(charset,4) == _T("8859")) {
+         codeset << _T("8859-");
+         if (*charset == _T('-')) charset++;
+       }
        }
      }
-  }
-  codeset << charset;
-  codeset.MakeUpper();
-  cset = wxFindCharacterSet(codeset);
+    codeset << charset;
+    codeset.MakeUpper();
+    m_name = wxStrdup(codeset.c_str());
+    m_deferred = TRUE;
  #endif
+  }
  }
  
-wxCSConv::~wxCSConv(void)
+wxCSConv::~wxCSConv()
  {
+  free(m_name);
+}
+
+void wxCSConv::LoadNow()
+{
+//  wxPrintf(_T("Conversion request\n"));
+  if (m_deferred) {
+    m_cset = wxFindCharacterSet(m_name);
+    m_deferred = FALSE;
+  }
  }
  
  size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
  {
+  ((wxCSConv *)this)->LoadNow(); // discard constness
    if (buf) {
-    if (cset) {
+    if (m_cset) {
        for (size_t c=0; c<=n; c++)
-       buf[c] = cset->data[psz[c]];
+       buf[c] = m_cset->data[psz[c]];
      } else {
        // latin-1 (direct)
        for (size_t c=0; c<=n; c++)
@@ -2066,11 +2097,12 @@ size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
  
  size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
  {
+  ((wxCSConv *)this)->LoadNow(); // discard constness
    if (buf) {
-    if (cset) {
+    if (m_cset) {
        for (size_t c=0; c<=n; c++) {
         size_t n;
-       for (n=0; (n<256) && (cset->data[n] != psz[c]); n++);
+       for (n=0; (n<256) && (m_cset->data[n] != psz[c]); n++);
         buf[c] = (n>0xff) ? '?' : n;
        }
      } else {