diff --git a/src/Utils.py b/src/Utils.py index 842035dcf..476edea1f 100644 --- a/src/Utils.py +++ b/src/Utils.py @@ -280,26 +280,74 @@ def encodingdefs(): """ pass -if constfunc.win(): - # python encoding is ascii, but C functions need to receive the - # windows codeset, so convert over to it - conv_utf8_tosrtkey = lambda x: locale.strxfrm(x.decode("utf-8").encode( - codeset)) - conv_unicode_tosrtkey = lambda x: locale.strxfrm(x.encode(codeset)) - #when gtk is imported the python defaultencoding is utf-8, - #so no need to specify it - conv_utf8_tosrtkey_ongtk = lambda x: locale.strxfrm(unicode(x).encode( - codeset)) - conv_unicode_tosrtkey_ongtk = lambda x: locale.strxfrm(x.encode(codeset,'replace')) -else: - # on unix C functions need to receive utf-8. Default conversion would - # use ascii, so it is needed to be explicit about the resulting encoding - conv_utf8_tosrtkey = lambda x: locale.strxfrm(x) - conv_unicode_tosrtkey = lambda x: locale.strxfrm(x.encode("utf-8")) - # when gtk loaded, default encoding (sys.getdefaultencoding ) is utf-8, - # so default conversion happens with utf-8 - conv_utf8_tosrtkey_ongtk = lambda x: locale.strxfrm(x) - conv_unicode_tosrtkey_ongtk = lambda x: locale.strxfrm(x) +try: + import PyICU + if os.environ.has_key("LC_COLLATE"): + collation = os.environ['LC_COLLATE'] + else: + collation = os.environ["LANG"] + language_and_country = collation.rsplit('.', 1)[0] + if language_and_country in PyICU.Collator.getAvailableLocales().keys(): + loc = language_and_country + else: + language = collation.rsplit('_', 1)[0] + if language in PyICU.Collator.getAvailableLocales().keys(): + LOG.warn(_("Language and country %s not supported by ICU: " + "but language %s is supported and will be used" % + (language_and_country, language))) + loc = language + else: + LOG.warn(_("Neither Language and country %s nor language %s " + "supported by ICU: using en_GB" % + (language_and_country, language))) + loc = "en_GB" + + collator = PyICU.Collator.createInstance(PyICU.Locale(loc)) + # on ICU, the functions need to receive unicode + conv_utf8_tosrtkey = lambda x: collator.getCollationKey( + x.decode("UTF-8")).getByteArray() + conv_unicode_tosrtkey = lambda x: collator.getCollationKey( + x).getByteArray() + conv_utf8_tosrtkey_ongtk = lambda x: collator.getCollationKey( + x.decode("UTF-8")).getByteArray() + conv_unicode_tosrtkey_ongtk = lambda x: collator.getCollationKey( + x).getByteArray() +except: + LOG.warn(_("PyICU not available: sorting may be incorrect")) + if constfunc.win(): + # python encoding is ascii, but C functions need to receive the + # windows codeset, so convert over to it + conv_utf8_tosrtkey = lambda x: locale.strxfrm(x.decode("utf-8").encode( + codeset)) + conv_unicode_tosrtkey = lambda x: locale.strxfrm(x.encode(codeset)) + #when gtk is imported the python defaultencoding is utf-8, + #so no need to specify it + conv_utf8_tosrtkey_ongtk = lambda x: locale.strxfrm(unicode(x).encode( + codeset)) + conv_unicode_tosrtkey_ongtk = lambda x: locale.strxfrm(x.encode(codeset,'replace')) + elif constfunc.mac(): + # On mac strxfrm seems to be broken such that better results are + # obtained by applying strxfrm to each character individually, rather + # than applying the function to the whole string. See in particular + # greek names at bug 5645 + + # on mac C functions need to receive utf-8. Default conversion would + # use ascii, so it is needed to be explicit about the resulting encoding + conv_utf8_tosrtkey = lambda x: map(locale.strxfrm, x) + conv_unicode_tosrtkey = lambda x: map(locale.strxfrm, x.encode("utf-8")) + # when gtk loaded, default encoding (sys.getdefaultencoding ) is utf-8, + # so default conversion happens with utf-8 + conv_utf8_tosrtkey_ongtk = lambda x: map(locale.strxfrm, x) + conv_unicode_tosrtkey_ongtk = lambda x: map(locale.strxfrm, x.encode("utf-8")) + else: + # on unix C functions need to receive utf-8. Default conversion would + # use ascii, so it is needed to be explicit about the resulting encoding + conv_utf8_tosrtkey = lambda x: locale.strxfrm(x) + conv_unicode_tosrtkey = lambda x: locale.strxfrm(x.encode("utf-8")) + # when gtk loaded, default encoding (sys.getdefaultencoding ) is utf-8, + # so default conversion happens with utf-8 + conv_utf8_tosrtkey_ongtk = lambda x: locale.strxfrm(x) + conv_unicode_tosrtkey_ongtk = lambda x: locale.strxfrm(x.encode("utf-8")) #------------------------------------------------------------------------- # diff --git a/src/gui/views/treemodels/flatbasemodel.py b/src/gui/views/treemodels/flatbasemodel.py index 77bde601c..2dffa8108 100644 --- a/src/gui/views/treemodels/flatbasemodel.py +++ b/src/gui/views/treemodels/flatbasemodel.py @@ -107,8 +107,8 @@ class FlatNodeMap(object): the path, and a dictionary mapping hndl to index. To obtain index given a path, method real_index() is available - ..Note: If a string sortkey is used, apply conv_unicode_tosrtkey_ongtk - on it , so as to have localized sort + ..Note: conv_unicode_tosrtkey_ongtk is applied to the underlying sort key, + so as to have localized sort """ def __init__(self): @@ -381,6 +381,9 @@ class FlatBaseModel(gtk.GenericTreeModel): """ The base class for all flat treeview models. It keeps a FlatNodeMap, and obtains data from database as needed + + ..Note: conv_unicode_tosrtkey_ongtk is applied to the underlying sort key, + so as to have localized sort """ def __init__(self, db, scol=0, order=gtk.SORT_ASCENDING, @@ -399,9 +402,9 @@ class FlatBaseModel(gtk.GenericTreeModel): self.sort_map = [ f for f in sort_map if f[0]] #we need the model col, that corresponds with scol col = self.sort_map[scol][1] - self.sort_func = self.smap[col] else: - self.sort_func = self.smap[scol] + col = scol + self.sort_func = lambda x: conv_unicode_tosrtkey_ongtk(self.smap[col](x)) self.sort_col = scol self.skip = skip self._in_build = False @@ -505,15 +508,11 @@ class FlatBaseModel(gtk.GenericTreeModel): Return the (sort_key, handle) list of all data that can maximally be shown. This list is sorted ascending, via localized string sort. - conv_unicode_tosrtkey_ongtk which uses strxfrm, which is apparently - broken in Win ?? --> they should fix base lib, we need strxfrm, fix it - in the Utils module. """ # use cursor as a context manager with self.gen_cursor() as cursor: #loop over database and store the sort field, and the handle - return sorted((map(conv_unicode_tosrtkey_ongtk, - self.sort_func(data)), key) for key, data in cursor) + return sorted((self.sort_func(data), key) for key, data in cursor) def _rebuild_search(self, ignore=None): """ function called when view must be build, given a search text @@ -582,8 +581,7 @@ class FlatBaseModel(gtk.GenericTreeModel): if self.node_map.get_path(handle) is not None: return # row is already displayed data = self.map(handle) - insert_val = (map(conv_unicode_tosrtkey_ongtk, self.sort_func(data)), - handle) + insert_val = (self.sort_func(data), handle) if not self.search or \ (self.search and self.search.match(handle, self.db)): #row needs to be added to the model @@ -616,8 +614,7 @@ class FlatBaseModel(gtk.GenericTreeModel): return # row is not currently displayed self.clear_cache(handle) oldsortkey = self.node_map.get_sortkey(handle) - newsortkey = map(conv_unicode_tosrtkey_ongtk, self.sort_func(self.map( - handle))) + newsortkey = self.sort_func(self.map(handle)) if oldsortkey is None or oldsortkey != newsortkey: #or the changed object is not present in the view due to filtering #or the order of the object must change. diff --git a/src/gui/views/treemodels/treebasemodel.py b/src/gui/views/treemodels/treebasemodel.py index e070f8583..1e0d38f1e 100644 --- a/src/gui/views/treemodels/treebasemodel.py +++ b/src/gui/views/treemodels/treebasemodel.py @@ -88,7 +88,7 @@ class Node(object): def __init__(self, ref, parent, sortkey, handle, secondary): self.name = sortkey if sortkey: - self.sortkey = map(conv_unicode_tosrtkey_ongtk, sortkey) + self.sortkey = conv_unicode_tosrtkey_ongtk(sortkey) else: self.sortkey = None self.ref = ref