From 66f22f4ae5bf7aac125780a5b18138e3afb54426 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Tue, 24 Mar 2009 00:11:42 +0000 Subject: [PATCH] fix parsing methods for non-ASCII strings (e.g. dates in non-"C" locales) (see #9560) git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@59798 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- src/common/datetimefmt.cpp | 262 +++++++++++++++----------------- tests/datetime/datetimetest.cpp | 70 +++++++-- 2 files changed, 183 insertions(+), 149 deletions(-) diff --git a/src/common/datetimefmt.cpp b/src/common/datetimefmt.cpp index 3ecb11b15e..19114a43c7 100644 --- a/src/common/datetimefmt.cpp +++ b/src/common/datetimefmt.cpp @@ -89,6 +89,9 @@ static const int MIN_PER_HOUR = 60; // parsing helpers // ---------------------------------------------------------------------------- +namespace +{ + #ifdef HAVE_STRPTIME #if wxUSE_UNIX && !defined(HAVE_STRPTIME_DECL) @@ -97,35 +100,31 @@ static const int MIN_PER_HOUR = 60; extern "C" char *strptime(const char *, const char *, struct tm *); #endif -// Unicode-friendly strptime() wrapper -static const wxStringCharType * -CallStrptime(const wxStringCharType *input, const char *fmt, tm *tm) +// strptime() wrapper: call strptime() for the string starting at the given +// iterator and fill output tm struct with the results and modify input to +// point to the end of the string consumed by strptime() if successful, +// otherwise return false and don't modify anything +bool +CallStrptime(const wxString& str, + wxString::const_iterator& p, + const char *fmt, + tm *tm) { - // the problem here is that strptime() returns pointer into the string we - // passed to it while we're really interested in the pointer into the - // original, Unicode, string so we try to transform the pointer back -#if wxUSE_UNICODE_WCHAR - wxCharBuffer inputMB(wxConvertWX2MB(input)); -#else // ASCII - const char * const inputMB = input; -#endif // Unicode/Ascii - - const char *result = strptime(inputMB, fmt, tm); - if ( !result ) - return NULL; + const char *start = str.mb_str(); + start = wxStringOperations::AddToIter(start, p - str.begin()); -#if wxUSE_UNICODE_WCHAR - // FIXME: this is wrong in presence of surrogates &c - return input + (result - inputMB.data()); -#else // ASCII - return result; -#endif // Unicode/Ascii + const char * const end = strptime(start, fmt, tm); + if ( !end ) + return false; + + p += wxStringOperations::DiffIters(end, start); + return true; } #endif // HAVE_STRPTIME // return the month if the string is a month name or Inv_Month otherwise -static wxDateTime::Month GetMonthFromName(const wxString& name, int flags) +wxDateTime::Month GetMonthFromName(const wxString& name, int flags) { wxDateTime::Month mon; for ( mon = wxDateTime::Jan; mon < wxDateTime::Inv_Month; wxNextMonth(mon) ) @@ -155,7 +154,7 @@ static wxDateTime::Month GetMonthFromName(const wxString& name, int flags) } // return the weekday if the string is a weekday name or Inv_WeekDay otherwise -static wxDateTime::WeekDay GetWeekDayFromName(const wxString& name, int flags) +wxDateTime::WeekDay GetWeekDayFromName(const wxString& name, int flags) { wxDateTime::WeekDay wd; for ( wd = wxDateTime::Sun; wd < wxDateTime::Inv_WeekDay; wxNextWDay(wd) ) @@ -185,9 +184,9 @@ static wxDateTime::WeekDay GetWeekDayFromName(const wxString& name, int flags) } // scans all digits (but no more than len) and returns the resulting number -static bool GetNumericToken(size_t len, - const wxStringCharType*& p, - unsigned long *number) +bool GetNumericToken(size_t len, + wxString::const_iterator& p, + unsigned long *number) { size_t n = 1; wxString s; @@ -203,7 +202,7 @@ static bool GetNumericToken(size_t len, } // scans all alphabetic characters and returns the resulting string -static wxString GetAlphaToken(const wxStringCharType*& p) +wxString GetAlphaToken(wxString::const_iterator& p) { wxString s; while ( wxIsalpha(*p) ) @@ -214,6 +213,36 @@ static wxString GetAlphaToken(const wxStringCharType*& p) return s; } +// parses string starting at given iterator using the specified format and, +// optionally, a fall back format (and optionally another one... but it stops +// there, really) +// +// if unsuccessful, returns invalid wxDateTime without changing p; otherwise +// advance p to the end of the match and returns wxDateTime containing the +// results of the parsing +wxDateTime +ParseFormatAt(wxString::const_iterator& p, + const wxString::const_iterator& end, + const wxString& fmt, + const wxString& fmtAlt = wxString(), + const wxString& fmtAlt2 = wxString()) +{ + const wxString str(p, end); + wxString::const_iterator endParse; + wxDateTime dt; + if ( dt.ParseFormat(str, fmt, &endParse) || + (!fmtAlt.empty() && dt.ParseFormat(str, fmtAlt, &endParse)) || + (!fmtAlt2.empty() && dt.ParseFormat(str, fmtAlt2, &endParse)) ) + { + p += endParse - str.begin(); + } + //else: all formats failed + + return dt; +} + +} // anonymous namespace + // ---------------------------------------------------------------------------- // wxDateTime to/from text representations // ---------------------------------------------------------------------------- @@ -1089,7 +1118,7 @@ wxDateTime::ParseFormat(const wxString& date, wxDateTime::Month mon = Inv_Month; int year = 0; - const wxStringCharType *input = date.wx_str(); + wxString::const_iterator input = date.begin(); for ( wxString::const_iterator fmt = format.begin(); fmt != format.end(); ++fmt ) { if ( *fmt != _T('%') ) @@ -1185,24 +1214,14 @@ wxDateTime::ParseFormat(const wxString& date, case _T('c'): // locale default date and time representation { - wxDateTime dt; - - const wxString inc(input); - - // NOTE: %c is locale-dependent; try strptime #ifdef HAVE_STRPTIME struct tm tm; // try using strptime() -- it may fail even if the input is // correct but the date is out of range, so we will fall back // to our generic code anyhow - const wxStringCharType * - result = CallStrptime(input, "%c", &tm); - if ( result ) + if ( CallStrptime(date, input, "%c", &tm) ) { - haveDay = haveMon = haveYear = - haveHour = haveMin = haveSec = true; - hour = tm.tm_hour; min = tm.tm_min; sec = tm.tm_sec; @@ -1210,28 +1229,25 @@ wxDateTime::ParseFormat(const wxString& date, year = 1900 + tm.tm_year; mon = (Month)tm.tm_mon; mday = tm.tm_mday; - - input = result; // proceed where strptime() ended } - else - { - // strptime() failed; try generic heuristic code + else // strptime() failed; try generic heuristic code #endif // HAVE_STRPTIME + { - // try the format which corresponds to ctime() output first - wxString::const_iterator endc; - if ( !dt.ParseFormat(inc, wxS("%a %b %d %H:%M:%S %Y"), &endc) && - !dt.ParseFormat(inc, wxS("%x %X"), &endc) && - !dt.ParseFormat(inc, wxS("%X %x"), &endc) ) - { - // we've tried everything and still no match + // try the format which corresponds to ctime() output + // first, then the generic date/time formats + const wxDateTime dt = ParseFormatAt + ( + input, + date.end(), + wxS("%a %b %d %H:%M:%S %Y"), + wxS("%x %X"), + wxS("%X %x") + ); + if ( !dt.IsValid() ) return NULL; - } Tm tm = dt.GetTm(); - - haveDay = haveMon = haveYear = - haveHour = haveMin = haveSec = true; hour = tm.hour; min = tm.min; @@ -1240,11 +1256,10 @@ wxDateTime::ParseFormat(const wxString& date, year = tm.year; mon = tm.mon; mday = tm.mday; - - input += endc - inc.begin(); -#ifdef HAVE_STRPTIME } -#endif // HAVE_STRPTIME + + haveDay = haveMon = haveYear = + haveHour = haveMin = haveSec = true; } break; @@ -1329,7 +1344,7 @@ wxDateTime::ParseFormat(const wxString& date, case _T('p'): // AM or PM string { wxString am, pm, token = GetAlphaToken(input); - + // some locales have empty AM/PM tokens and thus when formatting // dates with the %p specifier nothing is generated; when trying to // parse them back, we get an empty token here... but that's not @@ -1355,12 +1370,9 @@ wxDateTime::ParseFormat(const wxString& date, case _T('r'): // time as %I:%M:%S %p { wxDateTime dt; - input = dt.ParseFormat(input, wxS("%I:%M:%S %p")); - if ( !input ) - { - // no match + if ( !dt.ParseFormat(wxString(input, date.end()), + wxS("%I:%M:%S %p"), &input) ) return NULL; - } haveHour = haveMin = haveSec = true; @@ -1373,15 +1385,13 @@ wxDateTime::ParseFormat(const wxString& date, case _T('R'): // time as %H:%M { - wxDateTime dt; - input = dt.ParseFormat(input, wxS("%H:%M")); - if ( !input ) - { - // no match + const wxDateTime + dt = ParseFormatAt(input, date.end(), wxS("%H:%M")); + if ( !dt.IsValid() ) return NULL; - } - haveHour = haveMin = true; + haveHour = + haveMin = true; Tm tm = dt.GetTm(); hour = tm.hour; @@ -1402,15 +1412,14 @@ wxDateTime::ParseFormat(const wxString& date, case _T('T'): // time as %H:%M:%S { - wxDateTime dt; - input = dt.ParseFormat(input, _T("%H:%M:%S")); - if ( !input ) - { - // no match + const wxDateTime + dt = ParseFormatAt(input, date.end(), wxS("%H:%M:%S")); + if ( !dt.IsValid() ) return NULL; - } - haveHour = haveMin = haveSec = true; + haveHour = + haveMin = + haveSec = true; Tm tm = dt.GetTm(); hour = tm.hour; @@ -1438,12 +1447,8 @@ wxDateTime::ParseFormat(const wxString& date, { struct tm tm; - const wxStringCharType * - result = CallStrptime(input, "%x", &tm); - if ( result ) + if ( CallStrptime(date, input, "%x", &tm) ) { - input = result; - haveDay = haveMon = haveYear = true; year = 1900 + tm.tm_year; @@ -1456,7 +1461,6 @@ wxDateTime::ParseFormat(const wxString& date, #endif // HAVE_STRPTIME { - wxDateTime dt; wxString fmtDate, fmtDateAlt; @@ -1480,17 +1484,11 @@ wxDateTime::ParseFormat(const wxString& date, } } - const wxString indate(input); - wxString::const_iterator endDate; - if ( !dt.ParseFormat(indate, fmtDate, &endDate) ) - { - // try another one if we have it - if ( fmtDateAlt.empty() || - !dt.ParseFormat(indate, fmtDateAlt, &endDate) ) - { - return NULL; - } - } + const wxDateTime + dt = ParseFormatAt(input, date.end(), + fmtDate, fmtDateAlt); + if ( !dt.IsValid() ) + return NULL; Tm tm = dt.GetTm(); @@ -1501,8 +1499,6 @@ wxDateTime::ParseFormat(const wxString& date, year = tm.year; mon = tm.mon; mday = tm.mday; - - input += endDate - indate.begin(); } break; @@ -1512,11 +1508,8 @@ wxDateTime::ParseFormat(const wxString& date, { // use strptime() to do it for us (FIXME !Unicode friendly) struct tm tm; - input = CallStrptime(input, "%X", &tm); - if ( !input ) - { + if ( !CallStrptime(date, input, "%X", &tm) ) return NULL; - } haveHour = haveMin = haveSec = true; @@ -1534,18 +1527,10 @@ wxDateTime::ParseFormat(const wxString& date, // common cases wxDateTime dt; - const wxStringCharType * - result = dt.ParseFormat(input, wxS("%T")); - if ( !result ) - { - result = dt.ParseFormat(input, wxS("%r")); - } - - if ( !result ) - { - // no match + const wxDateTime + dt = ParseFormatAt(input, date.end(), "%T", "%r"); + if ( !dt.IsValid() ) return NULL; - } haveHour = haveMin = @@ -1555,8 +1540,6 @@ wxDateTime::ParseFormat(const wxString& date, hour = tm.hour; min = tm.min; sec = tm.sec; - - input = result; } #endif // HAVE_STRPTIME/!HAVE_STRPTIME break; @@ -1705,11 +1688,10 @@ wxDateTime::ParseFormat(const wxString& date, return NULL; } - const size_t endpos = input - date.wx_str(); if ( end ) - *end = date.begin() + endpos; + *end = input; - return date.c_str() + endpos; + return date.c_str() + (input - date.begin()); } const char * @@ -1773,7 +1755,9 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end) // "today" (for wxDate compatibility) and digits only otherwise (and not // all esoteric constructions ParseDateTime() knows about) - const wxStringCharType *p = date.wx_str(); + const wxString::const_iterator pBegin = date.begin(); + + wxString::const_iterator p = pBegin; while ( wxIsspace(*p) ) p++; @@ -1793,27 +1777,26 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end) { const wxString dateStr = wxGetTranslation(literalDates[n].str); size_t len = dateStr.length(); - if ( wxStrlen(p) >= len ) - { - wxString str(p, len); - if ( str.CmpNoCase(dateStr) == 0 ) - { - // nothing can follow this, so stop here - p += len; - int dayDiffFromToday = literalDates[n].dayDiffFromToday; - *this = Today(); - if ( dayDiffFromToday ) - { - *this += wxDateSpan::Days(dayDiffFromToday); - } + const wxString::const_iterator pEnd = p + len; + if ( wxString(p, pEnd).CmpNoCase(dateStr) == 0 ) + { + // nothing can follow this, so stop here - const size_t endpos = p - date.wx_str(); + p = pEnd; - if ( end ) - *end = date.begin() + endpos; - return date.c_str() + endpos; + int dayDiffFromToday = literalDates[n].dayDiffFromToday; + *this = Today(); + if ( dayDiffFromToday ) + { + *this += wxDateSpan::Days(dayDiffFromToday); } + + if ( end ) + *end = pEnd; + + return wxStringOperations::AddToIter(date.c_str().AsChar(), + pEnd - pBegin); } } @@ -1837,7 +1820,7 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end) // tokenize the string size_t nPosCur = 0; static const wxStringCharType *dateDelimiters = wxS(".,/-\t\r\n "); - wxStringTokenizer tok(p, dateDelimiters); + wxStringTokenizer tok(wxString(p, date.end()), dateDelimiters); while ( tok.HasMoreTokens() ) { wxString token = tok.GetNextToken(); @@ -2117,11 +2100,10 @@ wxDateTime::ParseDate(const wxString& date, wxString::const_iterator *end) p--; } - const size_t endpos = p - date.wx_str(); if ( end ) - *end = date.begin() + endpos; + *end = p; - return date.c_str() + endpos; + return wxStringOperations::AddToIter(date.c_str().AsChar(), p - pBegin); } const char * diff --git a/tests/datetime/datetimetest.cpp b/tests/datetime/datetimetest.cpp index 4e1ac7fb58..90df81d4ea 100644 --- a/tests/datetime/datetimetest.cpp +++ b/tests/datetime/datetimetest.cpp @@ -92,10 +92,64 @@ public: private: const char * const m_locOld; - wxDECLARE_NO_COPY_CLASS(CLocaleSetter); }; +// helper function translating week day/month names from English to the current +// locale +static wxString TranslateDate(const wxString& str) +{ + // small optimization: if there are no alphabetic characters in the string, + // there is nothing to translate + wxString::const_iterator i, end = str.end(); + for ( i = str.begin(); i != end; ++i ) + { + if ( isalpha(*i) ) + break; + } + + if ( i == end ) + return str; + + wxString trans(str); + + static const char *weekdays[] = + { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" + }; + + for ( wxDateTime::WeekDay wd = wxDateTime::Sun; + wd < wxDateTime::Inv_WeekDay; + wxNextWDay(wd) ) + { + trans.Replace + ( + weekdays[wd], + wxDateTime::GetWeekDayName(wd, wxDateTime::Name_Abbr) + ); + } + + + static const char *months[] = + { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", + "Nov", "Dec" + }; + + for ( wxDateTime::Month mon = wxDateTime::Jan; + mon < wxDateTime::Inv_Month; + wxNextMonth(mon) ) + { + trans.Replace + ( + months[mon], + wxDateTime::GetMonthName(mon, wxDateTime::Name_Abbr) + ); + } + + return trans; +} + // ---------------------------------------------------------------------------- // broken down date representation used for testing // ---------------------------------------------------------------------------- @@ -918,13 +972,9 @@ void DateTimeTestCase::TestDateParse() CPPUNIT_ASSERT( dt.ParseDate(_T("today")) ); CPPUNIT_ASSERT_EQUAL( wxDateTime::Today(), dt ); - // the other test strings use "C" locale so set it for the duration of this - // test - CLocaleSetter cloc; - for ( size_t n = 0; n < WXSIZEOF(parseTestDates); n++ ) { - const char * const datestr = parseTestDates[n].str; + const wxString datestr = TranslateDate(parseTestDates[n].str); const char * const end = dt.ParseDate(datestr); if ( end && !*end ) @@ -1033,15 +1083,17 @@ void DateTimeTestCase::TestDateTimeParse() { 22, wxDateTime::Nov, 2007, 19, 40, 0}, true }, }; - // the test strings use "C" locale so set it for the duration of this test + // the test strings here use "PM" which is not available in all locales so + // we need to use "C" locale for them CLocaleSetter cloc; wxDateTime dt; for ( size_t n = 0; n < WXSIZEOF(parseTestDates); n++ ) { - const char * const datestr = parseTestDates[n].str; + const wxString datestr = TranslateDate(parseTestDates[n].str); - if ( dt.ParseDateTime(datestr) ) + const char * const end = dt.ParseDateTime(datestr); + if ( end && !*end ) { WX_ASSERT_MESSAGE( ("Erroneously parsed \"%s\"", datestr), -- 2.47.2