]> git.saurik.com Git - apt.git/commitdiff
dd support for the LANGUAGE environment variable
authorDavid Kalnischkies <kalnischkies@gmail.com>
Wed, 17 Feb 2010 23:30:51 +0000 (00:30 +0100)
committerDavid Kalnischkies <kalnischkies@gmail.com>
Wed, 17 Feb 2010 23:30:51 +0000 (00:30 +0100)
apt-pkg/aptconfiguration.cc
apt-pkg/aptconfiguration.h
apt-pkg/contrib/strutl.cc
apt-pkg/contrib/strutl.h
test/libapt/getlanguages_test.cc

index 899004d9f0d964c1482386f4551471047e6d0014..9fd51ad5a95b6645ed8826f245d5365abe2d373f 100644 (file)
@@ -8,9 +8,11 @@
    ##################################################################### */
                                                                        /*}}}*/
 // Include Files                                                       /*{{{*/
-#include <apt-pkg/fileutl.h>
 #include <apt-pkg/aptconfiguration.h>
 #include <apt-pkg/configuration.h>
+#include <apt-pkg/fileutl.h>
+#include <apt-pkg/macros.h>
+#include <apt-pkg/strutl.h>
 
 #include <vector>
 #include <string>
@@ -96,7 +98,7 @@ const Configuration::getCompressionTypes(bool const &Cached) {
    will result in "de_DE, de, en".
    The special word "none" is the stopcode for the not-All code vector */
 std::vector<std::string> const Configuration::getLanguages(bool const &All,
-                               bool const &Cached, char const * const Locale) {
+                               bool const &Cached, char const ** const Locale) {
        using std::string;
 
        // The detection is boring and has a lot of cornercases,
@@ -117,27 +119,29 @@ std::vector<std::string> const Configuration::getLanguages(bool const &All,
                }
        }
 
-       // get the environment language code
+       // get the environment language codes: LC_MESSAGES (and later LANGUAGE)
        // we extract both, a long and a short code and then we will
        // check if we actually need both (rare) or if the short is enough
-       string const envMsg = string(Locale == 0 ? std::setlocale(LC_MESSAGES, NULL) : Locale);
+       string const envMsg = string(Locale == 0 ? std::setlocale(LC_MESSAGES, NULL) : *Locale);
        size_t const lenShort = (envMsg.find('_') != string::npos) ? envMsg.find('_') : 2;
-       size_t const lenLong = (envMsg.find('.') != string::npos) ? envMsg.find('.') : (lenShort + 3);
+       size_t const lenLong = (envMsg.find_first_of(".@") != string::npos) ? envMsg.find_first_of(".@") : (lenShort + 3);
 
        string envLong = envMsg.substr(0,lenLong);
        string const envShort = envLong.substr(0,lenShort);
-       bool envLongIncluded = true, envShortIncluded = false;
+       bool envLongIncluded = true;
 
        // first cornercase: LANG=C, so we use only "en" Translation
        if (envLong == "C") {
                codes.push_back("en");
+               allCodes = codes;
                return codes;
        }
 
+       // to save the servers from unneeded queries, we only try also long codes
+       // for languages it is realistic to have a long code translation fileā€¦
+       // TODO: Improve translation acquire system to drop them dynamic
+       char const *needLong[] = { "cs", "en", "pt", "sv", "zh", NULL };
        if (envLong != envShort) {
-               // to save the servers from unneeded queries, we only try also long codes
-               // for languages it is realistic to have a long code translation file...
-               char const *needLong[] = { "cs", "en", "pt", "sv", "zh", NULL };
                for (char const **l = needLong; *l != NULL; l++)
                        if (envShort.compare(*l) == 0) {
                                envLongIncluded = false;
@@ -155,33 +159,64 @@ std::vector<std::string> const Configuration::getLanguages(bool const &All,
        if (oldAcquire.empty() == false && oldAcquire != "environment") {
                if (oldAcquire != "none")
                        codes.push_back(oldAcquire);
+               allCodes = codes;
                return codes;
        }
 
+       // It is very likely we will need to environment codes later,
+       // so let us generate them now from LC_MESSAGES and LANGUAGE
+       std::vector<string> environment;
+       // take care of LC_MESSAGES
+       if (envLongIncluded == false)
+               environment.push_back(envLong);
+       environment.push_back(envShort);
+       // take care of LANGUAGE
+       string envLang = Locale == 0 ? getenv("LANGUAGE") : *(Locale+1);
+       if (envLang.empty() == false) {
+               std::vector<string> env = ExplodeString(envLang,':');
+               short addedLangs = 0; // add a maximum of 3 fallbacks from the environment
+               for (std::vector<string>::const_iterator e = env.begin();
+                    e != env.end() && addedLangs < 3; ++e) {
+                       if (unlikely(e->empty() == true) || *e == "en")
+                               continue;
+                       if (*e == envLong || *e == envShort)
+                               continue;
+                       if (std::find(environment.begin(), environment.end(), *e) != environment.end())
+                               continue;
+                       if (e->find('_') != string::npos) {
+                               // Drop LongCodes here - ShortCodes are also included
+                               string const shorty = e->substr(0, e->find('_'));
+                               char const **n = needLong;
+                               for (; *n != NULL; ++n)
+                                       if (shorty == *n)
+                                               break;
+                               if (*n == NULL)
+                                       continue;
+                       }
+                       ++addedLangs;
+                       environment.push_back(*e);
+               }
+       }
+
        // Support settings like Acquire::Translation=none on the command line to
        // override the configuration settings vector of languages.
        string const forceLang = _config->Find("Acquire::Languages","");
        if (forceLang.empty() == false) {
                if (forceLang == "environment") {
-                       if (envLongIncluded == false)
-                               codes.push_back(envLong);
-                       if (envShortIncluded == false)
-                               codes.push_back(envShort);
-                       return codes;
+                       codes = environment;
                } else if (forceLang != "none")
                        codes.push_back(forceLang);
+               allCodes = codes;
                return codes;
        }
 
        std::vector<string> const lang = _config->FindVector("Acquire::Languages");
        // the default setting -> "environment, en"
        if (lang.empty() == true) {
-               if (envLongIncluded == false)
-                       codes.push_back(envLong);
-               if (envShortIncluded == false)
-                       codes.push_back(envShort);
+               codes = environment;
                if (envShort != "en")
                        codes.push_back("en");
+               allCodes = codes;
                return codes;
        }
 
@@ -191,26 +226,19 @@ std::vector<std::string> const Configuration::getLanguages(bool const &All,
        for (std::vector<string>::const_iterator l = lang.begin();
             l != lang.end(); l++) {
                if (*l == "environment") {
-                       if (envLongIncluded == true && envShortIncluded == true)
-                               continue;
-                       if (envLongIncluded == false) {
-                               envLongIncluded = true;
-                               if (noneSeen == false)
-                                       codes.push_back(envLong);
-                               allCodes.push_back(envLong);
-                       }
-                       if (envShortIncluded == false) {
-                               envShortIncluded = true;
+                       for (std::vector<string>::const_iterator e = environment.begin();
+                            e != environment.end(); ++e) {
+                               if (std::find(allCodes.begin(), allCodes.end(), *e) != allCodes.end())
+                                       continue;
                                if (noneSeen == false)
-                                       codes.push_back(envShort);
-                               allCodes.push_back(envShort);
+                                       codes.push_back(*e);
+                               allCodes.push_back(*e);
                        }
                        continue;
                } else if (*l == "none") {
                        noneSeen = true;
                        continue;
-               } else if ((envLongIncluded == true && *l == envLong) ||
-                        (envShortIncluded == true && *l == envShort))
+               } else if (std::find(allCodes.begin(), allCodes.end(), *l) != allCodes.end())
                        continue;
 
                if (noneSeen == false)
index f2f04a39bd840a9a6db6bfd28f9a935105593a81..2ba1b38256699d8e9b76a811c9270d0c28e30580 100644 (file)
@@ -64,7 +64,7 @@ public:                                                                       /*{{{*/
         *  \return a vector of (all) Language Codes in the prefered usage order
         */
        std::vector<std::string> static const getLanguages(bool const &All = false,
-                       bool const &Cached = true, char const * const Locale = 0);
+                       bool const &Cached = true, char const ** const Locale = 0);
 
                                                                        /*}}}*/
 };
index 2913fbf44e42f74bf8104cb5b97051816c0940d7..3bbaf5f3045dc6d3464325dbe75179ab8c34b093 100644 (file)
@@ -1000,6 +1000,24 @@ bool TokSplitString(char Tok,char *Input,char **List,
    return true;
 }
                                                                        /*}}}*/
+// ExplodeString - Split a string up into a vector                     /*{{{*/
+// ---------------------------------------------------------------------
+/* This can be used to split a given string up into a vector, so the
+   propose is the same as in the method above and this one is a bit slower
+   also, but the advantage is that we an iteratable vector */
+vector<string> ExplodeString(string const &haystack, char const &split)
+{
+   string::const_iterator start = haystack.begin();
+   string::const_iterator end = start;
+   vector<string> exploded;
+   do {
+      for (; end != haystack.end() && *end != split; ++end);
+      exploded.push_back(string(start, end));
+      start = end + 1;
+   } while (end != haystack.end() && (++end) != haystack.end());
+   return exploded;
+}
+                                                                       /*}}}*/
 // RegexChoice - Simple regex list/list matcher                                /*{{{*/
 // ---------------------------------------------------------------------
 /* */
index 2b2e147fb740c2a07f35ef196915806f833563ef..d65f975d212acc39443672e2cbf5c315c9b06fe8 100644 (file)
@@ -59,6 +59,7 @@ bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base = 0)
 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length);
 bool TokSplitString(char Tok,char *Input,char **List,
                    unsigned long ListMax);
+vector<string> ExplodeString(string const &haystack, char const &split);
 void ioprintf(ostream &out,const char *format,...) APT_FORMAT2;
 void strprintf(string &out,const char *format,...) APT_FORMAT2;
 char *safe_snprintf(char *Buffer,char *End,const char *Format,...) APT_FORMAT3;
index fd3c8269f0d54d8c5583143af7eb7eccd37aa4a3..fb7afb4ef675b0c11960ac67f01f686adf561bdf 100644 (file)
@@ -16,75 +16,108 @@ void dumpVector(std::vector<std::string> vec) {
 
 int main(int argc,char *argv[])
 {
-       std::vector<std::string> vec = APT::Configuration::getLanguages(false, false, "de_DE.UTF-8");
+       char const* env[2];
+       env[0] = "de_DE.UTF-8";
+       env[1] = "";
+       std::vector<std::string> vec = APT::Configuration::getLanguages(false, false, env);
        equals(vec.size(), 2);
        equals(vec[0], "de");
        equals(vec[1], "en");
 
        // Special: Check if the cache is actually in use
-               vec = APT::Configuration::getLanguages(false, true, "en_GB.UTF-8");
+               env[0] = "en_GB.UTF-8";
+               vec = APT::Configuration::getLanguages(false, true, env);
                equals(vec.size(), 2);
                equals(vec[0], "de");
                equals(vec[1], "en");
 
-       vec = APT::Configuration::getLanguages(false, false, "en_GB.UTF-8");
+       env[0] = "en_GB.UTF-8";
+       vec = APT::Configuration::getLanguages(false, false, env);
        equals(vec.size(), 2);
        equals(vec[0], "en_GB");
        equals(vec[1], "en");
 
-       vec = APT::Configuration::getLanguages(false, false, "pt_PR.UTF-8");
+       env[0] = "tr_DE@euro";
+       vec = APT::Configuration::getLanguages(false, false, env);
+       equals(vec.size(), 2);
+       equals(vec[0], "tr");
+       equals(vec[1], "en");
+
+       env[0] = "de_NO";
+       env[1] = "se_NO:en_GB:nb_NO:nb:no_NO:no:nn_NO:nn:da:sv:en";
+       vec = APT::Configuration::getLanguages(false, false, env);
+       equals(vec.size(), 5);
+       equals(vec[0], "de");
+       equals(vec[1], "en_GB");
+       equals(vec[2], "nb");
+       equals(vec[3], "no");
+       equals(vec[4], "en");
+
+       env[0] = "pt_PR.UTF-8";
+       env[1] = "";
+       vec = APT::Configuration::getLanguages(false, false, env);
        equals(vec.size(), 3);
        equals(vec[0], "pt_PR");
        equals(vec[1], "pt");
        equals(vec[2], "en");
 
-       vec = APT::Configuration::getLanguages(false, false, "ast_DE.UTF-8"); // bogus, but syntactical correct
+       env[0] = "ast_DE.UTF-8";
+       vec = APT::Configuration::getLanguages(false, false, env); // bogus, but syntactical correct
        equals(vec.size(), 2);
        equals(vec[0], "ast");
        equals(vec[1], "en");
 
-       vec = APT::Configuration::getLanguages(false, false, "C");
+       env[0] = "C";
+       vec = APT::Configuration::getLanguages(false, false, env);
        equals(vec.size(), 1);
        equals(vec[0], "en");
 
        _config->Set("Acquire::Languages::1", "environment");
        _config->Set("Acquire::Languages::2", "en");
-       vec = APT::Configuration::getLanguages(false, false, "de_DE.UTF-8");
+       env[0] = "de_DE.UTF-8";
+       vec = APT::Configuration::getLanguages(false, false, env);
        equals(vec.size(), 2);
        equals(vec[0], "de");
        equals(vec[1], "en");
 
        _config->Set("Acquire::Languages::3", "de");
-       vec = APT::Configuration::getLanguages(false, false, "de_DE.UTF-8");
+       env[0] = "de_DE.UTF-8";
+       vec = APT::Configuration::getLanguages(false, false, env);
        equals(vec.size(), 2);
        equals(vec[0], "de");
        equals(vec[1], "en");
 
        _config->Set("Acquire::Languages::1", "none");
-       vec = APT::Configuration::getLanguages(false, false, "de_DE.UTF-8");
+       env[0] = "de_DE.UTF-8";
+       vec = APT::Configuration::getLanguages(false, false, env);
        equals(vec.size(), 0);
-       vec = APT::Configuration::getLanguages(true, false, "de_DE.UTF-8");
+       env[0] = "de_DE.UTF-8";
+       vec = APT::Configuration::getLanguages(true, false, env);
        equals(vec[0], "en");
        equals(vec[1], "de");
 
        _config->Set("Acquire::Languages::1", "fr");
        _config->Set("Acquire::Languages", "de_DE");
-       vec = APT::Configuration::getLanguages(false, false, "de_DE.UTF-8");
+       env[0] = "de_DE.UTF-8";
+       vec = APT::Configuration::getLanguages(false, false, env);
        equals(vec.size(), 1);
        equals(vec[0], "de_DE");
 
        _config->Set("Acquire::Languages", "none");
-       vec = APT::Configuration::getLanguages(true, false, "de_DE.UTF-8");
+       env[0] = "de_DE.UTF-8";
+       vec = APT::Configuration::getLanguages(true, false, env);
        equals(vec.size(), 0);
 
        _config->Set("Acquire::Languages", "");
        //FIXME: Remove support for this deprecated setting
                _config->Set("APT::Acquire::Translation", "ast_DE");
-               vec = APT::Configuration::getLanguages(true, false, "de_DE.UTF-8");
+               env[0] = "de_DE.UTF-8";
+               vec = APT::Configuration::getLanguages(true, false, env);
                equals(vec.size(), 1);
                equals(vec[0], "ast_DE");
                _config->Set("APT::Acquire::Translation", "none");
-               vec = APT::Configuration::getLanguages(true, false, "de_DE.UTF-8");
+               env[0] = "de_DE.UTF-8";
+               vec = APT::Configuration::getLanguages(true, false, env);
                equals(vec.size(), 0);
 
        return 0;