diff --git a/gramps/plugins/webreport/alphabeticindex.py b/gramps/plugins/webreport/alphabeticindex.py new file mode 100644 index 000000000..bc4f768cb --- /dev/null +++ b/gramps/plugins/webreport/alphabeticindex.py @@ -0,0 +1,383 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python +# +# Gramps - a GTK+/GNOME based genealogy program +# +# Copyright (C) 2010- Serge Noiraud +# Copyright (C) 2021- T G L Lyons +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +""" +Narrative Web Page generator. + +Class: + AlphabeticIndex - approximate emulation of ICU Alphabetic Index +""" + +#------------------------------------------------ +# python modules +#------------------------------------------------ +from unicodedata import normalize +from collections import defaultdict +import logging + +#------------------------------------------------ +# Gramps module +#------------------------------------------------ +from gramps.gen.const import GRAMPS_LOCALE as glocale + +HAVE_ICU = False +try: + from icu import Locale, Collator + HAVE_ICU = True +except ImportError: + try: + from PyICU import Locale, Collator + HAVE_ICU = True + except ImportError: + pass + +LOG = logging.getLogger(".NarrativeWeb") +COLLATE_LANG = glocale.collation +class U_ENUM_OUT_OF_SYNC_ERROR(Exception): # pylint: disable=invalid-name + """ + Exception to match the error in the ICU AlphabetiIndex + """ + pass + +# See : http://www.gramps-project.org/bugs/view.php?id = 4423 + +# Contraction data taken from CLDR 22.1. Only the default variant is considered. +# The languages included below are, by no means, all the langauges that have +# contractions - just a sample of langauges that have been supported + +# At the time of writing (Feb 2013), the following langauges have greater that +# 50% coverage of translation of Gramps: bg Bulgarian, ca Catalan, cs Czech, da +# Danish, de German, el Greek, en_GB, es Spanish, fi Finish, fr French, he +# Hebrew, hr Croation, hu Hungarian, it Italian, ja Japanese, lt Lithuanian, nb +# Noregian Bokmål, nn Norwegian Nynorsk, nl Dutch, pl Polish, pt_BR Portuguese +# (Brazil), pt_P Portugeuse (Portugal), ru Russian, sk Slovak, sl Slovenian, sv +# Swedish, vi Vietnamese, zh_CN Chinese. + +# Key is the language (or language and country), Value is a list of +# contractions. Each contraction consists of a tuple. First element of the +# tuple is the list of characters, second element is the string to use as the +# index entry. + +# The DUCET contractions (e.g. LATIN CAPIAL LETTER L, MIDDLE DOT) are ignored, +# as are the supresscontractions in some locales. + +CONTRACTIONS_DICT = { +# bg Bulgarian validSubLocales="bg_BG" no contractions +# ca Catalan validSubLocales="ca_AD ca_ES" +"ca" : [(("l·", "L·"), "L")], +# Czech, validSubLocales="cs_CZ" Czech_Czech Republic +"cs" : [(("ch", "cH", "Ch", "CH"), "CH")], +# Danish validSubLocales="da_DK" Danish_Denmark +"da" : [(("aa", "Aa", "AA"), "Å")], +# de German validSubLocales="de_AT de_BE de_CH de_DE de_LI de_LU" no +# contractions in standard collation. +# el Greek validSubLocales="el_CY el_GR" no contractions. +# es Spanish validSubLocales="es_419 es_AR es_BO es_CL es_CO es_CR es_CU +# es_DO es_EA es_EC es_ES es_GQ es_GT es_HN es_IC es_MX es_NI es_PA es_PE +# es_PH es_PR es_PY es_SV es_US es_UY es_VE" no contractions in standard +# collation. +# fi Finish validSubLocales="fi_FI" no contractions in default (phonebook) +# collation. +# fr French no collation data. +# he Hebrew validSubLocales="he_IL" no contractions +# hr Croation validSubLocales="hr_BA hr_HR" +"hr" : [(("dž", "Dž", "DŽ"), "DŽ"), + (("lj", "Lj", 'LJ'), "LJ"), + (("Nj", "NJ", "nj"), "NJ")], +# Hungarian hu_HU for two and three character contractions. +"hu" : [(("cs", "Cs", "CS"), "CS"), + (("dzs", "Dzs", "DZS"), "DZS"), # order is important + (("dz", "Dz", "DZ"), "DZ"), + (("gy", "Gy", "GY"), "GY"), + (("ly", "Ly", "LY"), "LY"), + (("ny", "Ny", "NY"), "NY"), + (("sz", "Sz", "SZ"), "SZ"), + (("ty", "Ty", "TY"), "TY"), + (("zs", "Zs", "ZS"), "ZS") + ], +# it Italian no collation data. +# ja Japanese unable to process the data as it is too complex. +# lt Lithuanian no contractions. +# Norwegian Bokmål +"nb" : [(("aa", "Aa", "AA"), "Å")], +# nn Norwegian Nynorsk validSubLocales="nn_NO" +"nn" : [(("aa", "Aa", "AA"), "Å")], +# nl Dutch no collation data. +# pl Polish validSubLocales="pl_PL" no contractions +# pt Portuguese no collation data. +# ru Russian validSubLocales="ru_BY ru_KG ru_KZ ru_MD ru_RU ru_UA" no +# contractions +# Slovak, validSubLocales="sk_SK" Slovak_Slovakia +# having DZ in Slovak as a contraction was rejected in +# http://unicode.org/cldr/trac/ticket/2968 +"sk" : [(("ch", "cH", "Ch", "CH"), "Ch")], +# sl Slovenian validSubLocales="sl_SI" no contractions +# sv Swedish validSubLocales="sv_AX sv_FI sv_SE" default collation is +# "reformed" no contractions. +# vi Vietnamese validSubLocales="vi_VN" no contractions. +# zh Chinese validSubLocales="zh_Hans zh_Hans_CN zh_Hans_SG" no contractions +# in Latin characters the others are too complex. +} + +# The comment below from the glibc locale sv_SE in +# localedata/locales/sv_SE : +# +# % The letter w is normally not present in the Swedish alphabet. It +# % exists in some names in Swedish and foreign words, but is accounted +# % for as a variant of 'v'. Words and names with 'w' are in Swedish +# % ordered alphabetically among the words and names with 'v'. If two +# % words or names are only to be distinguished by 'v' or % 'w', 'v' is +# % placed before 'w'. +# +# See : http://www.gramps-project.org/bugs/view.php?id = 2933 +# + +# HOWEVER: the characters V and W in Swedish are not considered as a special +# case for several reasons. (1) The default collation for Swedish (called the +# 'reformed' collation type) regards the difference between 'v' and 'w' as a +# primary difference. (2) 'v' and 'w' in the 'standard' (non-default) collation +# type are not a contraction, just a case where the difference is secondary +# rather than primary. (3) There are plenty of other languages where a +# difference that is primary in other languages is secondary, and those are not +# specially handled. + +def first_letter(string, rlocale=glocale): + """ + Receives a string and returns the first letter + """ + if string is None or len(string) < 1: + return ' ' + + norm_unicode = normalize('NFKC', str(string)) + contractions = CONTRACTIONS_DICT.get(rlocale.collation) + if contractions is None: + contractions = CONTRACTIONS_DICT.get(rlocale.collation.split("_")[0]) + + if contractions is not None: + for contraction in contractions: + count = len(contraction[0][0]) + if (len(norm_unicode) >= count and + norm_unicode[:count] in contraction[0]): + return contraction[1] + + # no special case + return norm_unicode[0].upper() + +if HAVE_ICU: + def primary_difference(prev_key, new_key, rlocale=glocale): + """ + Try to use the PyICU collation. + If we generate a report for another language, make sure we use the good + collation sequence + """ + collate_lang = Locale(rlocale.collation) + collation = Collator.createInstance(collate_lang) + collation.setStrength(Collator.PRIMARY) + return collation.compare(prev_key, new_key) != 0 + +else: + def primary_difference(prev_key, new_key, rlocale=glocale): + """ + The PyICU collation is not available. + + Returns true if there is a primary difference between the two parameters + See http://www.gramps-project.org/bugs/view.php?id=2933#c9317 if + letter[i]+'a' < letter[i+1]+'b' and letter[i+1]+'a' < letter[i]+'b' is + true then the letters should be grouped together + + The test characters here must not be any that are used in contractions. + """ + + return rlocale.sort_key(prev_key + "e") >= \ + rlocale.sort_key(new_key + "f") or \ + rlocale.sort_key(new_key + "e") >= \ + rlocale.sort_key(prev_key + "f") + +def get_index_letter(letter, index_list, rlocale=glocale): + """ + This finds the letter in the index_list that has no primary difference from + the letter provided. See the discussion in get_first_letters above. + Continuing the example, if letter is Å and index_list is A, then this would + return A. + + @param: letter -- The letter to find in the index_list + @param: index_list -- The list of all first letters in use + @param: rlocale -- The locale to use + """ + for index in index_list: + if not primary_difference(letter, index, rlocale): + return index + + LOG.warning("Initial letter '%s' not found in alphabetic navigation list", + letter) + LOG.debug("filtered sorted index list %s", index_list) + return letter + +#------------------------------------------------------------ +# +# AlphabeticIndex (local non-ICU version) +# +#------------------------------------------------------------ +class AlphabeticIndex(): + """ + Approximately emulate the ICU AlphabeticIndex + """ + def __init__(self, rlocale): + self.rlocale = rlocale + self._record_list = [] + # self.index_list = [] + + self._bucket_list = [] + self._dirty = False + self._bucket = -1 + self._record = -1 + # Externally available properties + self.bucketLabel = "" # pylint: disable=invalid-name + self.recordName = "" # pylint: disable=invalid-name + self.recordData = "" # pylint: disable=invalid-name + self.bucketRecordCount = 0 # pylint: disable=invalid-name + + def __create_index(self): + """ + Internal routine to create the Alphabetic Index + """ + # The first letter (or letters if there is a contraction) are extracted + # from all the records in the _record_list. There may be duplicates, and + # there may be letters where there is only a secondary or tertiary + # difference, not a primary difference. + index_list = [] + for (name, dummy_data) in self._record_list: + ltr = first_letter(name, self.rlocale) + index_list.append(ltr) + # The list is sorted in collation order. + index_list.sort(key=self.rlocale.sort_key) + # For each group with secondary or tertiary differences, the first in + # collation sequence is retained. For example, assume the default + # collation sequence (DUCET) and names Ånström and Apple. These will + # sort in the order shown. Å and A have a secondary difference. If the + # first letter from these names was chosen then the index entry would be + # Å. This is not desirable. Instead, the initial letters are extracted + # (Å and A). These are sorted, which gives A and Å. Then the first of + # these is used for the index entry. + + # now remove letters where there is not a primary difference + first = True + prev_index = None + for nkey in index_list[:]: + # iterate over a slice copy + if first or primary_difference(prev_index, nkey, self.rlocale): + first = False + prev_index = nkey + else: + index_list.remove(nkey) + + # finally construct the buckets and contents + bucket_dict = defaultdict(list) + for (name, data) in \ + sorted(self._record_list, + key=lambda x: self.rlocale.sort_key(x[0])): + letter = first_letter(name, self.rlocale) + letter = get_index_letter(letter, index_list, self.rlocale) + bucket_dict[letter].append((name, data)) + + self._bucket_list = sorted(bucket_dict.items(), + key=lambda x : self.rlocale.sort_key(x[0])) + + self._dirty = False + + def addRecord(self, name, data): # pylint: disable=invalid-name + """ + Add a record to the index. + + Each record will be associated with an index Bucket based on the + record's name. The list of records for each bucket will be sorted + based on the collation ordering of the names in the index's locale. + Records with duplicate names are permitted; they will be kept in the + order that they were added. + + @param: name -- The display name + for the Record. The Record will be placed in + a bucket based on this name. + @param: data -- An optional pointer to user data associated + with this item. When iterating the contents + of a bucket, both the data pointer the name + will be available for each Record. + """ + self._record_list.append((name, data)) + self._dirty = True + + def resetBucketIterator(self): # pylint: disable=invalid-name + """ + Reset the Bucket iteration for this index. + + The next call to nextBucket() will restart the iteration at the + first label. + """ + if self._dirty: + self.__create_index() + self._bucket = -1 + self.bucketLabel = "" + self._record = -1 + self.recordName = "" + self.recordData = "" + + def nextBucket(self): # pylint: disable=invalid-name + """ + Advance the iteration over the Buckets of this index. + + Return false if there are no more Buckets. + """ + if self._dirty: + raise U_ENUM_OUT_OF_SYNC_ERROR + self._bucket += 1 + self._record = -1 + if self._bucket < len(self._bucket_list): + self.bucketLabel = self._bucket_list[self._bucket][0] + self.bucketRecordCount = len(self._bucket_list[self._bucket][1]) + self.recordName = "" + self.recordData = None + return True + else: + return False + + def nextRecord(self): # pylint: disable=invalid-name + """ + Advance to the next record in the current Bucket. + + When nextBucket() is called, Record iteration is reset to just + before the first Record in the new Bucket. + """ + if self._dirty: + raise U_ENUM_OUT_OF_SYNC_ERROR + self._record += 1 + if self._record < len(self._bucket_list[self._bucket][1]): + curr_bucket = self._bucket_list[self._bucket] + bucket_value = curr_bucket[1] + curr_record = bucket_value[self._record] + self.recordName = curr_record[0] + self.recordData = curr_record[1] + return True + else: + self.recordName = "" + self.recordData = None + return False diff --git a/gramps/plugins/webreport/common.py b/gramps/plugins/webreport/common.py index 344921f22..13ff653c5 100644 --- a/gramps/plugins/webreport/common.py +++ b/gramps/plugins/webreport/common.py @@ -27,11 +27,9 @@ This module is used to share variables, enums and functions between all modules """ -from unicodedata import normalize from collections import defaultdict from hashlib import md5 import re -import locale # Used only with pyICU import logging from xml.sax.saxutils import escape @@ -45,6 +43,30 @@ from gramps.plugins.lib.libgedcom import make_gedcom_date, DATE_QUALITY from gramps.gen.plug.report import utils from gramps.plugins.lib.libhtml import Html +HAVE_ICU = False +HAVE_ALPHABETICINDEX = False #separate check as this is only in ICU 4.6+ +try: + from icu import Locale + HAVE_ICU = True + try: + from icu import AlphabeticIndex as icuAlphabeticIndex + HAVE_ALPHABETICINDEX = True + except ImportError: + from gramps.plugins.webreport.alphabeticindex \ + import AlphabeticIndex as localAlphabeticIndex +except ImportError: + try: + from PyICU import Locale + HAVE_ICU = True + try: + from PyICU import AlphabeticIndex as icuAlphabeticIndex + HAVE_ALPHABETICINDEX = True + except ImportError: + from gramps.plugins.webreport.alphabeticindex \ + import AlphabeticIndex as localAlphabeticIndex + except ImportError: + pass + LOG = logging.getLogger(".NarrativeWeb") # define clear blank line for proper styling @@ -418,28 +440,57 @@ def do_we_have_holidays(lang): return idx return None +def get_surname_from_person(dbase, person): + """ + get the person's surname + get the primary name + if group as get the group_as surname + else get the primary surname of the primary name + and correct for [global] group_as name + correct for surnames that are space or None + """ + primary_name = person.get_primary_name() + + if primary_name.group_as: + surname = primary_name.group_as + else: + group_map = _nd.primary_surname(primary_name) + surname = dbase.get_name_group_mapping(group_map) + + # Treat people who have no name with those whose name is just + # 'whitespace' + if surname is None or surname.isspace(): + surname = '' + return surname + def sort_people(dbase, handle_list, rlocale=glocale): """ will sort the database people by surname + @param: dbase -- The instance of the database + @param: handle_list -- The list of handles of people to sort + @param: rlocale -- The locale related to the language used for the + sort + @result: -- A list sorted by surname, each element of which + consists of a tuple of (surname, list of handles) + where the list of handles is sorted by + primary surname, first name, suffix. + Surname uses group_as, but primary surname + does not. + get the primary name + if group as get the group_as surname + else get the primary surname of the primary name + and correct for [global] group_as name + correct for surnames that are space or None + for each surname sort handles by the surname, first name and suffix + construct a list of surnames and list of handles """ sname_sub = defaultdict(list) sortnames = {} for person_handle in handle_list: person = dbase.get_person_from_handle(person_handle) - primary_name = person.get_primary_name() - - if primary_name.group_as: - surname = primary_name.group_as - else: - group_map = _nd.primary_surname(primary_name) - surname = dbase.get_name_group_mapping(group_map) - - # Treat people who have no name with those whose name is just - # 'whitespace' - if surname is None or surname.isspace(): - surname = '' - sortnames[person_handle] = _nd.sort_string(primary_name) + surname = get_surname_from_person(dbase, person) + sortnames[person_handle] = _nd.sort_string(person.get_primary_name()) sname_sub[surname].append(person_handle) sorted_lists = [] @@ -536,242 +587,37 @@ def __get_place_keyname(dbase, handle): return utils.place_name(dbase, handle) -# See : http://www.gramps-project.org/bugs/view.php?id = 4423 - -# Contraction data taken from CLDR 22.1. Only the default variant is considered. -# The languages included below are, by no means, all the langauges that have -# contractions - just a sample of langauges that have been supported - -# At the time of writing (Feb 2013), the following langauges have greater that -# 50% coverage of translation of Gramps: bg Bulgarian, ca Catalan, cs Czech, da -# Danish, de German, el Greek, en_GB, es Spanish, fi Finish, fr French, he -# Hebrew, hr Croation, hu Hungarian, it Italian, ja Japanese, lt Lithuanian, nb -# Noregian Bokmål, nn Norwegian Nynorsk, nl Dutch, pl Polish, pt_BR Portuguese -# (Brazil), pt_P Portugeuse (Portugal), ru Russian, sk Slovak, sl Slovenian, sv -# Swedish, vi Vietnamese, zh_CN Chinese. - -# Key is the language (or language and country), Value is a list of -# contractions. Each contraction consists of a tuple. First element of the -# tuple is the list of characters, second element is the string to use as the -# index entry. - -# The DUCET contractions (e.g. LATIN CAPIAL LETTER L, MIDDLE DOT) are ignored, -# as are the supresscontractions in some locales. - -CONTRACTIONS_DICT = { - # bg Bulgarian validSubLocales="bg_BG" no contractions - # ca Catalan validSubLocales="ca_AD ca_ES" - "ca" : [(("l·", "L·"), "L")], - # Czech, validSubLocales="cs_CZ" Czech_Czech Republic - "cs" : [(("ch", "cH", "Ch", "CH"), "CH")], - # Danish validSubLocales="da_DK" Danish_Denmark - "da" : [(("aa", "Aa", "AA"), "Å")], - # de German validSubLocales="de_AT de_BE de_CH de_DE de_LI de_LU" no - # contractions in standard collation. - # el Greek validSubLocales="el_CY el_GR" no contractions. - # es Spanish validSubLocales="es_419 es_AR es_BO es_CL es_CO es_CR es_CU - # es_DO es_EA es_EC es_ES es_GQ es_GT es_HN es_IC es_MX es_NI es_PA es_PE - # es_PH es_PR es_PY es_SV es_US es_UY es_VE" no contractions in standard - # collation. - # fi Finish validSubLocales="fi_FI" no contractions in default (phonebook) - # collation. - # fr French no collation data. - # he Hebrew validSubLocales="he_IL" no contractions - # hr Croation validSubLocales="hr_BA hr_HR" - "hr" : [(("dž", "Dž"), "dž"), - (("lj", "Lj", 'LJ'), "LJ"), - (("Nj", "NJ", "nj"), "NJ")], - # Hungarian hu_HU for two and three character contractions. - "hu" : [(("cs", "Cs", "CS"), "CS"), - (("dzs", "Dzs", "DZS"), "DZS"), # order is important - (("dz", "Dz", "DZ"), "DZ"), - (("gy", "Gy", "GY"), "GY"), - (("ly", "Ly", "LY"), "LY"), - (("ny", "Ny", "NY"), "NY"), - (("sz", "Sz", "SZ"), "SZ"), - (("ty", "Ty", "TY"), "TY"), - (("zs", "Zs", "ZS"), "ZS") - ], - # it Italian no collation data. - # ja Japanese unable to process the data as it is too complex. - # lt Lithuanian no contractions. - # Norwegian Bokmål - "nb" : [(("aa", "Aa", "AA"), "Å")], - # nn Norwegian Nynorsk validSubLocales="nn_NO" - "nn" : [(("aa", "Aa", "AA"), "Å")], - # nl Dutch no collation data. - # pl Polish validSubLocales="pl_PL" no contractions - # pt Portuguese no collation data. - # ru Russian validSubLocales="ru_BY ru_KG ru_KZ ru_MD ru_RU ru_UA" no - # contractions - # Slovak, validSubLocales="sk_SK" Slovak_Slovakia - # having DZ in Slovak as a contraction was rejected in - # http://unicode.org/cldr/trac/ticket/2968 - "sk" : [(("ch", "cH", "Ch", "CH"), "Ch")], - # sl Slovenian validSubLocales="sl_SI" no contractions - # sv Swedish validSubLocales="sv_AX sv_FI sv_SE" default collation is - # "reformed" no contractions. - # vi Vietnamese validSubLocales="vi_VN" no contractions. - # zh Chinese validSubLocales="zh_Hans zh_Hans_CN zh_Hans_SG" no contractions - # in Latin characters the others are too complex. - } - - # The comment below from the glibc locale sv_SE in - # localedata/locales/sv_SE : - # - # % The letter w is normally not present in the Swedish alphabet. It - # % exists in some names in Swedish and foreign words, but is accounted - # % for as a variant of 'v'. Words and names with 'w' are in Swedish - # % ordered alphabetically among the words and names with 'v'. If two - # % words or names are only to be distinguished by 'v' or % 'w', 'v' is - # % placed before 'w'. - # - # See : http://www.gramps-project.org/bugs/view.php?id = 2933 - # - -# HOWEVER: the characters V and W in Swedish are not considered as a special -# case for several reasons. (1) The default collation for Swedish (called the -# 'reformed' collation type) regards the difference between 'v' and 'w' as a -# primary difference. (2) 'v' and 'w' in the 'standard' (non-default) collation -# type are not a contraction, just a case where the difference is secondary -# rather than primary. (3) There are plenty of other languages where a -# difference that is primary in other languages is secondary, and those are not -# specially handled. - -def first_letter(string, rlocale=glocale): - """ - Receives a string and returns the first letter - """ - dummy_rlocale = rlocale - if string is None or len(string) < 1: - return ' ' - - norm_unicode = normalize('NFKC', str(string)) - contractions = CONTRACTIONS_DICT.get(COLLATE_LANG) - if contractions is None: - contractions = CONTRACTIONS_DICT.get(COLLATE_LANG.split("_")[0]) - - if contractions is not None: - for contraction in contractions: - count = len(contraction[0][0]) - if (len(norm_unicode) >= count and - norm_unicode[:count] in contraction[0]): - return contraction[1] - - # no special case - return norm_unicode[0].upper() - - -try: - import PyICU # pylint : disable=wrong-import-position - PRIM_COLL = PyICU.Collator.createInstance(PyICU.Locale(COLLATE_LANG)) - PRIM_COLL.setStrength(PRIM_COLL.PRIMARY) - - def primary_difference(prev_key, new_key, rlocale=glocale): +if HAVE_ALPHABETICINDEX: + class AlphabeticIndex(icuAlphabeticIndex): """ - Try to use the PyICU collation. - If we generate a report for another language, make sure we use the good - collation sequence + Call the ICU AlphabeticIndex, passing the ICU Locale """ - collation = PRIM_COLL - if rlocale.lang != locale.getlocale(locale.LC_COLLATE)[0]: - encoding = rlocale.encoding if rlocale.encoding else "UTF-8" - collate_lang = PyICU.Locale(rlocale.collation+"."+encoding) - collation = PyICU.Collator.createInstance(collate_lang) - return collation.compare(prev_key, new_key) != 0 + def __init__(self, rlocale): + self.iculocale = Locale(rlocale.collation) + super().__init__(self.iculocale) -except: - def primary_difference(prev_key, new_key, rlocale=glocale): - """ - The PyICU collation is not available. + # set the maximum number of buckets, the undocumented default is 99 + # Latin + Greek + Cyrillic + Hebrew + Arabic + Tamil + Hiragana + + # CJK Unified is about 206 different buckets + self.maxLabelCount = 500 # pylint: disable=invalid-name - Returns true if there is a primary difference between the two parameters - See http://www.gramps-project.org/bugs/view.php?id=2933#c9317 if - letter[i]+'a' < letter[i+1]+'b' and letter[i+1]+'a' < letter[i]+'b' is - true then the letters should be grouped together + # Add bucket labels for scripts other than the one for the output + # which is being generated + self.iculocale.addLikelySubtags() + default_script = self.iculocale.getDisplayScript() + used_scripts = [default_script] - The test characters here must not be any that are used in contractions. - """ + for lang_code in glocale.get_language_dict().values(): + loc = Locale(lang_code) + loc.addLikelySubtags() + script = loc.getDisplayScript() + if script not in used_scripts: + used_scripts.append(script) + super().addLabels(loc) +else: + AlphabeticIndex = localAlphabeticIndex - return rlocale.sort_key(prev_key + "e") >= \ - rlocale.sort_key(new_key + "f") or \ - rlocale.sort_key(new_key + "e") >= \ - rlocale.sort_key(prev_key + "f") - -def get_first_letters(dbase, handle_list, key, rlocale=glocale): - """ - get the first letters of the handle_list - - @param: handle_list -- One of a handle list for either person or - place handles or an evt types list - @param: key -- Either a person, place, or event type - @param: rlocale -- The locale to use - - The first letter (or letters if there is a contraction) are extracted from - all the objects in the handle list. There may be duplicates, and there may - be letters where there is only a secondary or tertiary difference, not a - primary difference. The list is sorted in collation order. For each group - with secondary or tertiary differences, the first in collation sequence is - retained. For example, assume the default collation sequence (DUCET) and - names Ånström and Apple. These will sort in the order shown. Å and A have a - secondary difference. If the first letter from these names was chosen then - the inex entry would be Å. This is not desirable. Instead, the initial - letters are extracted (Å and A). These are sorted, which gives A and Å. Then - the first of these is used for the index entry. - """ - index_list = [] - - for handle in handle_list: - if key == _KEYPERSON: - keyname = __get_person_keyname(dbase, handle) - - elif key == _KEYPLACE: - keyname = __get_place_keyname(dbase, handle) - - else: - if rlocale != glocale: - keyname = rlocale.translation.sgettext(handle) - else: - keyname = handle - ltr = first_letter(keyname) - - index_list.append(ltr) - - # Now remove letters where there is not a primary difference - index_list.sort(key=rlocale.sort_key) - first = True - prev_index = None - for nkey in index_list[:]: #iterate over a slice copy of the list - if first or primary_difference(prev_index, nkey, rlocale): - first = False - prev_index = nkey - else: - index_list.remove(nkey) - - # return menu set letters for alphabet_navigation - return index_list - -def get_index_letter(letter, index_list, rlocale=glocale): - """ - This finds the letter in the index_list that has no primary difference from - the letter provided. See the discussion in get_first_letters above. - Continuing the example, if letter is Å and index_list is A, then this would - return A. - - @param: letter -- The letter to find in the index_list - @param: index_list -- The list of all first letters in use - @param: rlocale -- The locale to use - """ - for index in index_list: - if not primary_difference(letter, index, rlocale): - return index - - LOG.warning("Initial letter '%s' not found in alphabetic navigation list", - letter) - LOG.debug("filtered sorted index list %s", index_list) - return letter - -def alphabet_navigation(index_list, rlocale=glocale): +def alphabet_navigation(sorted_alpha_index, rlocale=glocale): """ Will create the alphabet navigation bar for classes IndividualListPage, SurnameListPage, PlaceListPage, and EventList @@ -779,14 +625,6 @@ def alphabet_navigation(index_list, rlocale=glocale): @param: index_list -- a dictionary of either letters or words @param: rlocale -- The locale to use """ - sorted_set = defaultdict(int) - - for menu_item in index_list: - sorted_set[menu_item] += 1 - - # remove the number of each occurance of each letter - sorted_alpha_index = sorted(sorted_set, key=rlocale.sort_key) - # if no letters, return None to its callers if not sorted_alpha_index: return None @@ -799,6 +637,8 @@ def alphabet_navigation(index_list, rlocale=glocale): with Html("div", id="alphanav") as alphabetnavigation: index = 0 + output = [] + dup_index = 0 for dummy_row in range(num_of_rows): unordered = Html("ul") @@ -811,8 +651,16 @@ def alphabet_navigation(index_list, rlocale=glocale): # braille writers title_txt = "Alphabet Menu: %s" % menu_item title_str = rlocale.translation.sgettext(title_txt) + # deal with multiple ellipsis which are generated for overflow, + # underflow and inflow labels + link = menu_item + if menu_item in output: + link = "%s (%i)" % (menu_item, dup_index) + dup_index += 1 + output.append(menu_item) + hyper = Html("a", menu_item, title=title_str, - href="#%s" % menu_item) + href="#%s" % link) unordered.extend(Html("li", hyper, inline=True)) index += 1 diff --git a/gramps/plugins/webreport/event.py b/gramps/plugins/webreport/event.py index 33cf60878..31c7b8620 100644 --- a/gramps/plugins/webreport/event.py +++ b/gramps/plugins/webreport/event.py @@ -58,11 +58,10 @@ from gramps.plugins.lib.libhtml import Html # specific narrative web import #------------------------------------------------ from gramps.plugins.webreport.basepage import BasePage -from gramps.plugins.webreport.common import (get_first_letters, _ALPHAEVENT, - _EVENTMAP, alphabet_navigation, +from gramps.plugins.webreport.common import (_EVENTMAP, + alphabet_navigation, FULLCLEAR, sort_event_types, - primary_difference, - get_index_letter) + AlphabeticIndex) _ = glocale.translation.sgettext LOG = logging.getLogger(".NarrativeWeb") @@ -128,6 +127,107 @@ class EventPages(BasePage): self.eventlistpage(self.report, the_lang, the_title, event_types, event_handle_list) + + def __output_event(self, ldatec, event_type, tbody, bucket_letter, + bucket_link, first_letter, _event_displayed, first_type, + event_handle): + """ + Generate and output the data for a single event + + @param: ldatec -- Last change date and time (updated) + @param: event_type -- The event type name processed for XML etc + @param: tbody -- The current HTML body into which the data is + assembled + @param: bucket_letter -- The AlphabeticIndex bucket for this event + @param: bucket_link -- ???? + @param: first_letter -- Whether this is the first event for this + letter + @param: event_displayed -- List of events already displayed + @param: first_type -- Whether this is the first event of this type + @param: event_handle -- Handle of the event to be output + + :returns: Returns a tuple of updated ldatec, first_letter, first_type, + _event_displayed + :rtype: tuple + + """ + event = self.r_db.get_event_from_handle(event_handle) + _type = event.get_type() + gid = event.get_gramps_id() + if event.get_change_time() > ldatec: + ldatec = event.get_change_time() + # check to see if we have listed this gramps_id yet? + if gid not in _event_displayed: + if int(_type) in _EVENTMAP: + handle_list = set(self.r_db.find_backlink_handles( + event_handle, + include_classes=['Family', 'Person'])) + else: + handle_list = set( + self.r_db.find_backlink_handles( + event_handle, + include_classes=['Person'])) + if handle_list: + trow = Html("tr") + tbody += trow + # set up hyperlinked letter for alphabet_navigation + tcell = Html("td", class_="ColumnLetter", inline=True) + trow += tcell + if first_letter: + first_letter = False + first_type = False + # Update the ColumnLetter cell and + # create a populated ColumnType + t_a = 'class = "BeginLetter BeginType"' + trow.attr = t_a + letter = bucket_letter + ttle = self._("Event types beginning " + "with letter %s") % letter + tcell += Html("a", letter, name=letter, id_=bucket_link, + title=ttle) + tcell = Html("td", class_="ColumnType", + title=self._(event_type), + inline=True) + tcell += self._(event_type) + elif first_type: + first_type = False + # Update the ColumnLetter cell and + # create a populated ColumnType cell + trow.attr = 'class = "BeginType"' + tcell = Html("td", class_="ColumnType", + title=self._(event_type), inline=True) + tcell += self._(event_type) + else: + tcell = Html("td", class_="ColumnType", + title=" ", inline=True) + tcell += " " # create a non-populated ColumnType + # Add the ColumnType cell + trow += tcell + # event date + tcell = Html("td", class_="ColumnDate", inline=True) + trow += tcell + date = Date.EMPTY + if event: + date = event.get_date_object() + if date and date is not Date.EMPTY: + tcell += self.rlocale.get_date(date) + else: + tcell += " " + # Gramps ID + trow += Html("td", class_="ColumnGRAMPSID") \ + + (self.event_grampsid_link(event_handle, gid, None)) + # Person(s) column + tcell = Html("td", class_="ColumnPerson") + trow += tcell + # classname can either be a person or a family + first_person = True + # get person(s) for ColumnPerson + sorted_list = sorted(handle_list) + self.complete_people(tcell, first_person, sorted_list, + uplink=False) + _event_displayed.append(gid) + return (ldatec, first_letter, first_type, _event_displayed) + def eventlistpage(self, report, the_lang, the_title, event_types, event_handle_list): """ @@ -142,7 +242,6 @@ class EventPages(BasePage): """ BasePage.__init__(self, report, the_lang, the_title) ldatec = 0 - prev_letter = " " output_file, sio = self.report.create_file("events") result = self.write_header(self._("Events")) @@ -159,8 +258,21 @@ class EventPages(BasePage): eventlist += Html("p", msg, id="description") # get alphabet navigation... - index_list = get_first_letters(self.r_db, event_types, - _ALPHAEVENT) + # Assemble all the event types + index = AlphabeticIndex(self.rlocale) + for (event_type, data_list) in sort_event_types(self.r_db, + event_types, + event_handle_list, + self.rlocale): + index.addRecord(event_type, data_list) + + # Extract the buckets from the index + index_list = [] + index.resetBucketIterator() + while index.nextBucket(): + if index.bucketRecordCount != 0: + index_list.append(index.bucketLabel) + # Output the navigation alpha_nav = alphabet_navigation(index_list, self.rlocale) if alpha_nav: eventlist += alpha_nav @@ -191,123 +303,37 @@ class EventPages(BasePage): tbody = Html("tbody") table += tbody - # separate events by their type and then thier event handles - savevtyp = " " - for (evt_type, - data_list) in sort_event_types(self.r_db, - event_types, - event_handle_list, - self.rlocale): - first = True - _event_displayed = [] - - # sort datalist by date of event and by event handle... - data_list = sorted(data_list, key=itemgetter(0, 1)) - first_event = True - - for (dummy_sort_value, event_handle) in data_list: - event = self.r_db.get_event_from_handle(event_handle) - _type = event.get_type() - gid = event.get_gramps_id() - if event.get_change_time() > ldatec: - ldatec = event.get_change_time() - - # check to see if we have listed this gramps_id yet? - if gid not in _event_displayed: - - # family event - if int(_type) in _EVENTMAP: - handle_list = set( - self.r_db.find_backlink_handles( - event_handle, - include_classes=['Family', 'Person'])) - else: - handle_list = set( - self.r_db.find_backlink_handles( - event_handle, - include_classes=['Person'])) - if handle_list: - - trow = Html("tr") - tbody += trow - - # set up hyperlinked letter for - # alphabet_navigation - tcell = Html("td", class_="ColumnLetter", - inline=True) - trow += tcell - - if evt_type and not evt_type.isspace(): - letter = get_index_letter( - self._(str(evt_type)[0].capitalize()), - index_list, self.rlocale) - if letter != savevtyp: - savevtyp = letter - else: - letter = " " - else: - letter = " " - - if first or primary_difference(letter, - prev_letter, - self.rlocale): - first = False - prev_letter = letter - t_a = 'class = "BeginLetter BeginType"' - trow.attr = t_a - ttle = self._("Event types beginning " - "with letter %s") % letter - tcell += Html("a", letter, name=letter, - id_=letter, title=ttle, - inline=True) - else: - tcell += " " - - # display Event type if first in the list - tcell = Html("td", class_="ColumnType", - title=self._(evt_type), - inline=True) - trow += tcell - if first_event: - tcell += self._(evt_type) - if trow.attr == "": - trow.attr = 'class = "BeginType"' - else: - tcell += " " - - # event date - tcell = Html("td", class_="ColumnDate", - inline=True) - trow += tcell - date = Date.EMPTY - if event: - date = event.get_date_object() - if date and date is not Date.EMPTY: - tcell += self.rlocale.get_date(date) - else: - tcell += " " - - # Gramps ID - trow += Html("td", class_="ColumnGRAMPSID") + ( - self.event_grampsid_link(event_handle, - gid, None) - ) - - # Person(s) column - tcell = Html("td", class_="ColumnPerson") - trow += tcell - - # classname can either be a person or a family - first_person = True - - # get person(s) for ColumnPerson - sorted_list = sorted(handle_list) - self.complete_people(tcell, first_person, - sorted_list, - uplink=False) - - _event_displayed.append(gid) - first_event = False + # for each bucket, output the events in that bucket + index.resetBucketIterator() + output = [] + dup_index = 0 + while index.nextBucket(): + if index.bucketRecordCount != 0: + bucket_letter = index.bucketLabel + bucket_link = bucket_letter + if bucket_letter in output: + bucket_link = "%s (%i)" % (bucket_letter, dup_index) + dup_index += 1 + output.append(bucket_letter) + first_letter = True + while index.nextRecord(): + _event_displayed = [] + first_type = True + event_type = index.recordName + data_list = index.recordData + # sort datalist by date of event and by event + # handle... + data_list = sorted(data_list, key=itemgetter(0, 1)) + for (dummy_sort_value, event_handle) in data_list: + (ldatec, first_letter, first_type, + _event_displayed) \ + = self.__output_event(ldatec, event_type, + tbody, bucket_letter, + bucket_link, + first_letter, + _event_displayed, + first_type, + event_handle) # add clearline for proper styling # add footer section diff --git a/gramps/plugins/webreport/family.py b/gramps/plugins/webreport/family.py index 22d60d81f..449ed27d7 100644 --- a/gramps/plugins/webreport/family.py +++ b/gramps/plugins/webreport/family.py @@ -41,7 +41,7 @@ Classe: #------------------------------------------------ # python modules #------------------------------------------------ -from collections import defaultdict +from collections import defaultdict, OrderedDict from decimal import getcontext import logging @@ -58,11 +58,11 @@ from gramps.plugins.lib.libhtml import Html #------------------------------------------------ from gramps.plugins.webreport.basepage import BasePage from gramps.gen.display.name import displayer as _nd -from gramps.plugins.webreport.common import (get_first_letters, _KEYPERSON, - alphabet_navigation, sort_people, - primary_difference, first_letter, +from gramps.plugins.webreport.common import (alphabet_navigation, html_escape, - FULLCLEAR, get_index_letter) + FULLCLEAR, + AlphabeticIndex, + get_surname_from_person) _ = glocale.translation.sgettext LOG = logging.getLogger(".NarrativeWeb") @@ -122,6 +122,97 @@ class FamilyPages(BasePage): self.familylistpage(self.report, the_lang, the_title, self.report.obj_dict[Family].keys()) + + def __output_family(self, ldatec, family_handle, person_handle, + tbody, letter, bucket_link, first_person, first_family): + """ + Generate and output the data for a single family + + @param: ldatec -- Last change date and time (updated) + @param: family_handle -- The family_handle to be output + @param: person_handle -- The person_handle to be output + @param: tbody -- The current HTML body into which the data is + assembled + @param: letter -- The AlphabeticIndex bucket for this event + @param: first_person -- Whether this is the first person for this + letter + @param: first_family -- Whether this is the first family of this + person + + @returns: Returns a tuple of updated (ldatec, first_person, + first_family) + @rtype: tuple + """ + family = self.r_db.get_family_from_handle(family_handle) + if family.get_change_time() > ldatec: + ldatec = family.get_change_time() + + trow = Html("tr") + tbody += trow + tcell = Html("td", class_="ColumnRowLabel") + trow += tcell + if first_person: + first_person = False + first_family = False + # Update the ColumnRowLabel cell + trow.attr = 'class="BeginLetter BeginFamily"' + ttle = self._("Families beginning with " + "letter ") + tcell += Html("a", letter, name=letter, title=ttle + letter, + id_=bucket_link) + # and create the populated ColumnPartner for the person + tcell = Html("td", class_="ColumnPartner") + tcell += self.new_person_link(person_handle, uplink=self.uplink) + trow += tcell + elif first_family: + first_family = False + # Update the ColumnRowLabel cell + trow.attr = 'class ="BeginFamily"' + # and create the populated ColumnPartner for the person + tcell = Html("td", class_="ColumnPartner") + tcell += self.new_person_link(person_handle, uplink=self.uplink) + trow += tcell + else: + # Create the blank ColumnPartner row for the person + tcell = Html("td", class_="ColumnPartner") + tcell += ' ' + trow += tcell + + tcell = Html("td", class_="ColumnPartner") + trow += tcell + tcell += self.family_link(family.get_handle(), + self.report.get_family_name(family), + family.get_gramps_id(), self.uplink) + # family events; such as marriage and divorce + # events + fam_evt_ref_list = family.get_event_ref_list() + tcell1 = Html("td", class_="ColumnDate", inline=True) + tcell2 = Html("td", class_="ColumnDate", inline=True) + trow += tcell1, tcell2 + if fam_evt_ref_list: + fam_evt_srt_ref_list = sorted(fam_evt_ref_list, + key=self.sort_on_grampsid) + for evt_ref in fam_evt_srt_ref_list: + evt = self.r_db.get_event_from_handle(evt_ref.ref) + if evt: + evt_type = evt.get_type() + if evt_type in [EventType.MARRIAGE, EventType.DIVORCE]: + cell = self.rlocale.get_date(evt.get_date_object()) + if evt_type == EventType.MARRIAGE: + tcell1 += cell + else: + tcell1 += ' ' + if evt_type == EventType.DIVORCE: + tcell2 += cell + else: + tcell2 += ' ' + + else: + tcell1 += ' ' + tcell2 += ' ' + first_family = False + return (ldatec, first_person, first_family) + def familylistpage(self, report, the_lang, the_title, fam_list): """ Create a family index @@ -138,7 +229,6 @@ class FamilyPages(BasePage): result = self.write_header(self._("Families")) familieslistpage, dummy_head, dummy_body, outerwrapper = result ldatec = 0 - prev_letter = " " # begin Family Division with Html("div", class_="content", id="Relationships") as relationlist: @@ -154,7 +244,7 @@ class FamilyPages(BasePage): relationlist += Html("p", msg, id="description") # go through all the families, and construct a dictionary of all the - # people and the families thay are involved in. Note that the people + # people and the families they are involved in. Note that the people # in the list may be involved in OTHER families, that are not listed # because they are not in the original family list. pers_fam_dict = defaultdict(list) @@ -170,9 +260,22 @@ class FamilyPages(BasePage): if spouse_handle: pers_fam_dict[spouse_handle].append(family) + # Assemble all the people, we no longer care about their families + index = AlphabeticIndex(self.rlocale) + for (person_handle, dummy_family) in pers_fam_dict.items(): + person = self.r_db.get_person_from_handle(person_handle) + surname = get_surname_from_person(self.r_db, person) + index.addRecord(surname, person_handle) + + # Extract the buckets from the index + index_list = [] + index.resetBucketIterator() + while index.nextBucket(): + if index.bucketRecordCount != 0: + index_list.append(index.bucketLabel) + + # Output the navigation # add alphabet navigation - index_list = get_first_letters(self.r_db, pers_fam_dict.keys(), - _KEYPERSON, rlocale=self.rlocale) alpha_nav = alphabet_navigation(index_list, self.rlocale) if alpha_nav: relationlist += alpha_nav @@ -187,7 +290,7 @@ class FamilyPages(BasePage): trow = Html("tr") thead += trow - # set up page columns + # set up page columns trow.extend( Html("th", trans, class_=colclass, inline=True) for trans, colclass in [(self._("Letter"), @@ -201,108 +304,56 @@ class FamilyPages(BasePage): tbody = Html("tbody") table += tbody - # begin displaying index list - ppl_handle_list = sort_people(self.r_db, pers_fam_dict.keys(), - self.rlocale) - first = True - for (surname, handle_list) in ppl_handle_list: - - if surname and not surname.isspace(): - letter = get_index_letter(first_letter(surname), - index_list, - self.rlocale) - else: - letter = ' ' - - # get person from sorted database list - for person_handle in sorted( - handle_list, key=self.sort_on_name_and_grampsid): - person = self.r_db.get_person_from_handle(person_handle) - if person: - family_list = person.get_family_handle_list() - first_family = True - for family_handle in family_list: - get_family = self.r_db.get_family_from_handle - family = get_family(family_handle) - trow = Html("tr") - tbody += trow - - tcell = Html("td", class_="ColumnRowLabel") - trow += tcell - - if first or primary_difference(letter, - prev_letter, - self.rlocale): - first = False - prev_letter = letter - trow.attr = 'class="BeginLetter"' - ttle = self._("Families beginning with " - "letter ") - tcell += Html("a", letter, name=letter, - title=ttle + letter, - inline=True) - else: - tcell += ' ' - - tcell = Html("td", class_="ColumnPartner") - trow += tcell - - if first_family: - trow.attr = 'class ="BeginFamily"' - - tcell += self.new_person_link( - person_handle, uplink=self.uplink) - - first_family = False - else: - tcell += ' ' - - tcell = Html("td", class_="ColumnPartner") - trow += tcell - - tcell += self.family_link( - family.get_handle(), - self.report.get_family_name(family), - family.get_gramps_id(), self.uplink) - - # family events; such as marriage and divorce - # events - fam_evt_ref_list = family.get_event_ref_list() - tcell1 = Html("td", class_="ColumnDate", - inline=True) - tcell2 = Html("td", class_="ColumnDate", - inline=True) - trow += (tcell1, tcell2) - - if fam_evt_ref_list: - fam_evt_srt_ref_list = sorted( - fam_evt_ref_list, - key=self.sort_on_grampsid) - for evt_ref in fam_evt_srt_ref_list: - evt = self.r_db.get_event_from_handle( - evt_ref.ref) - if evt: - evt_type = evt.get_type() - if evt_type in [EventType.MARRIAGE, - EventType.DIVORCE]: - - cell = self.rlocale.get_date( - evt.get_date_object()) - if (evt_type == - EventType.MARRIAGE): - tcell1 += cell - else: - tcell1 += ' ' - - if (evt_type == - EventType.DIVORCE): - tcell2 += cell - else: - tcell2 += ' ' - else: - tcell1 += ' ' - tcell2 += ' ' - first_family = False + # for each bucket, output the people and their families in that + # bucket + index.resetBucketIterator() + output = [] + dup_index = 0 + while index.nextBucket(): + if index.bucketRecordCount != 0: + bucket_letter = index.bucketLabel + bucket_link = bucket_letter + if bucket_letter in output: + bucket_link = "%s (%i)" % (bucket_letter, dup_index) + dup_index += 1 + output.append(bucket_letter) + # Assemble a dict of all the people in this bucket. + surname_ppl_handle_dict = OrderedDict() + while index.nextRecord(): + # The records are returned sorted by recordName, + # which is surname. we need to retain that order but + # in addition sort by the rest of the name + person_surname = index.recordName + person_handle = index.recordData + if person_surname in surname_ppl_handle_dict.keys(): + surname_ppl_handle_dict[person_surname]\ + .append(person_handle) + else: + surname_ppl_handle_dict[person_surname] = \ + [person_handle] + first_person = True + for (surname, handle_list) in \ + surname_ppl_handle_dict.items(): + # get person from sorted database list + for person_handle in sorted( + handle_list, + key=self.sort_on_name_and_grampsid): + person = self.r_db.get_person_from_handle\ + (person_handle) + if person: + family_list = person.\ + get_family_handle_list() + first_family = True + for family_handle in family_list: + (ldatec, first_person, first_family) \ + = self.__output_family(ldatec, + family_handle, + person_handle, + tbody, + bucket_letter, + bucket_link, + first_person, + first_family) # add clearline for proper styling # add footer section diff --git a/gramps/plugins/webreport/narrativeweb.py b/gramps/plugins/webreport/narrativeweb.py index a19245296..47ed57ef5 100644 --- a/gramps/plugins/webreport/narrativeweb.py +++ b/gramps/plugins/webreport/narrativeweb.py @@ -2887,105 +2887,3 @@ class NavWebOptions(MenuReportOptions): self.__alive.set_available(False) self.__death_anniv.set_available(False) self.__after_year.set_available(False) - -# See : http://www.gramps-project.org/bugs/view.php?id = 4423 - -# Contraction data taken from CLDR 22.1. Only the default variant is considered. -# The languages included below are, by no means, all the languages that have -# contractions - just a sample of languages that have been supported - -# At the time of writing (Feb 2013), the following languages have greater that -# 50% coverage of translation of Gramps: bg Bulgarian, ca Catalan, cs Czech, da -# Danish, de German, el Greek, en_GB, es Spanish, fi Finish, fr French, he -# Hebrew, hr Croation, hu Hungarian, it Italian, ja Japanese, lt Lithuanian, nb -# Noregian Bokmål, nn Norwegian Nynorsk, nl Dutch, pl Polish, pt_BR Portuguese -# (Brazil), pt_P Portugeuse (Portugal), ru Russian, sk Slovak, sl Slovenian, sv -# Swedish, vi Vietnamese, zh_CN Chinese. - -# Key is the language (or language and country), Value is a list of -# contractions. Each contraction consists of a tuple. First element of the -# tuple is the list of characters, second element is the string to use as the -# index entry. - -# The DUCET contractions (e.g. LATIN CAPIAL LETTER L, MIDDLE DOT) are ignored, -# as are the supresscontractions in some locales. - -CONTRACTIONS_DICT = { - # bg Bulgarian validSubLocales="bg_BG" no contractions - # ca Catalan validSubLocales="ca_AD ca_ES" - "ca" : [(("l·", "L·"), "L")], - # Czech, validSubLocales="cs_CZ" Czech_Czech Republic - "cs" : [(("ch", "cH", "Ch", "CH"), "CH")], - # Danish validSubLocales="da_DK" Danish_Denmark - "da" : [(("aa", "Aa", "AA"), "Å")], - # de German validSubLocales="de_AT de_BE de_CH de_DE de_LI de_LU" no - # contractions in standard collation. - # el Greek validSubLocales="el_CY el_GR" no contractions. - # es Spanish validSubLocales="es_419 es_AR es_BO es_CL es_CO es_CR es_CU - # es_DO es_EA es_EC es_ES es_GQ es_GT es_HN es_IC es_MX es_NI es_PA es_PE - # es_PH es_PR es_PY es_SV es_US es_UY es_VE" no contractions in standard - # collation. - # fi Finish validSubLocales="fi_FI" no contractions in default (phonebook) - # collation. - # fr French no collation data. - # he Hebrew validSubLocales="he_IL" no contractions - # hr Croation validSubLocales="hr_BA hr_HR" - "hr" : [(("dž", "Dž"), "dž"), - (("lj", "Lj", 'LJ'), "LJ"), - (("Nj", "NJ", "nj"), "NJ")], - # Hungarian hu_HU for two and three character contractions. - "hu" : [(("cs", "Cs", "CS"), "CS"), - (("dzs", "Dzs", "DZS"), "DZS"), # order is important - (("dz", "Dz", "DZ"), "DZ"), - (("gy", "Gy", "GY"), "GY"), - (("ly", "Ly", "LY"), "LY"), - (("ny", "Ny", "NY"), "NY"), - (("sz", "Sz", "SZ"), "SZ"), - (("ty", "Ty", "TY"), "TY"), - (("zs", "Zs", "ZS"), "ZS") - ], - # it Italian no collation data. - # ja Japanese unable to process the data as it is too complex. - # lt Lithuanian no contractions. - # Norwegian Bokmål - "nb" : [(("aa", "Aa", "AA"), "Å")], - # nn Norwegian Nynorsk validSubLocales="nn_NO" - "nn" : [(("aa", "Aa", "AA"), "Å")], - # nl Dutch no collation data. - # pl Polish validSubLocales="pl_PL" no contractions - # pt Portuguese no collation data. - # ru Russian validSubLocales="ru_BY ru_KG ru_KZ ru_MD ru_RU ru_UA" no - # contractions - # Slovak, validSubLocales="sk_SK" Slovak_Slovakia - # having DZ in Slovak as a contraction was rejected in - # http://unicode.org/cldr/trac/ticket/2968 - "sk" : [(("ch", "cH", "Ch", "CH"), "Ch")], - # sl Slovenian validSubLocales="sl_SI" no contractions - # sv Swedish validSubLocales="sv_AX sv_FI sv_SE" default collation is - # "reformed" no contractions. - # vi Vietnamese validSubLocales="vi_VN" no contractions. - # zh Chinese validSubLocales="zh_Hans zh_Hans_CN zh_Hans_SG" no contractions - # in Latin characters the others are too complex. - } - - # The comment below from the glibc locale sv_SE in - # localedata/locales/sv_SE : - # - # % The letter w is normally not present in the Swedish alphabet. It - # % exists in some names in Swedish and foreign words, but is accounted - # % for as a variant of 'v'. Words and names with 'w' are in Swedish - # % ordered alphabetically among the words and names with 'v'. If two - # % words or names are only to be distinguished by 'v' or % 'w', 'v' is - # % placed before 'w'. - # - # See : http://www.gramps-project.org/bugs/view.php?id = 2933 - # - -# HOWEVER: the characters V and W in Swedish are not considered as a special -# case for several reasons. (1) The default collation for Swedish (called the -# 'reformed' collation type) regards the difference between 'v' and 'w' as a -# primary difference. (2) 'v' and 'w' in the 'standard' (non-default) collation -# type are not a contraction, just a case where the difference is secondary -# rather than primary. (3) There are plenty of other languages where a -# difference that is primary in other languages is secondary, and those are not -# specially handled. diff --git a/gramps/plugins/webreport/person.py b/gramps/plugins/webreport/person.py index 1b3599303..9fe7f6acb 100644 --- a/gramps/plugins/webreport/person.py +++ b/gramps/plugins/webreport/person.py @@ -71,16 +71,15 @@ from gramps.gen.relationship import get_relationship_calculator # specific narrative web import #------------------------------------------------ from gramps.plugins.webreport.basepage import BasePage -from gramps.plugins.webreport.common import (get_first_letters, _KEYPERSON, - alphabet_navigation, sort_people, - first_letter, - get_index_letter, add_birthdate, - primary_difference, FULLCLEAR, +from gramps.plugins.webreport.common import (alphabet_navigation, + add_birthdate, FULLCLEAR, _find_birth_date, _find_death_date, MARKER_PATH, OPENLAYER, OSM_MARKERS, STAMEN_MARKERS, GOOGLE_MAPS, MARKERS, html_escape, - DROPMASTERS, FAMILYLINKS) + DROPMASTERS, FAMILYLINKS, + get_surname_from_person, + AlphabeticIndex) from gramps.plugins.webreport.layout import LayoutTree from gramps.plugins.webreport.buchheim import buchheim @@ -167,6 +166,139 @@ class PersonPages(BasePage): # creates the Individual List Page # ################################################# + + def __output_person(self, date, tbody, bucket_letter, bucket_link, + showbirth, showdeath, showpartner, showparents, + surname, surnamed, first_surname, first_individual, + person_handle): + """ + Generate and output the data for a single person + """ + person = self.r_db.get_person_from_handle(person_handle) + if person.get_change_time() > date: + date = person.get_change_time() + # surname column + trow = Html("tr") + tbody += trow + tcell = Html("td", class_="ColumnSurname", inline=True) + trow += tcell + if first_surname: + first_surname = False + first_individual = False + trow.attr = 'class = "BeginSurname"' + ttle = self._("Surnames %(surname)s beginning " + "with letter %(letter)s" % + {'surname':surname, 'letter':bucket_letter}) + tcell += Html("a", html_escape(surnamed), name=bucket_letter, + id_=bucket_link, title=ttle) + elif first_individual: + first_individual = False + tcell += Html("a", html_escape(surnamed), + title=self._("Surnames") + " " + surname) + else: + tcell += " " + # firstname column + link = self.new_person_link(person_handle, person=person) + trow += Html("td", link, class_="ColumnName") + # birth column + if showbirth: + tcell = Html("td", class_="ColumnBirth", inline=True) + trow += tcell + birth_date = _find_birth_date(self.r_db, person) + if birth_date is not None: + if birth_date.fallback: + tcell += Html('em', self.rlocale.get_date(birth_date), + inline=True) + else: + tcell += self.rlocale.get_date(birth_date) + else: + tcell += " " + # death column + if showdeath: + tcell = Html("td", class_="ColumnDeath", inline=True) + trow += tcell + death_date = _find_death_date(self.r_db, person) + if death_date is not None: + if death_date.fallback: + tcell += Html('em', self.rlocale.get_date(death_date), + inline=True) + else: + tcell += self.rlocale.get_date(death_date) + else: + tcell += " " + # partner column + if showpartner: + family_list = person.get_family_handle_list() + first_family = True + #partner_name = None + tcell = () + if family_list: + for family_handle in family_list: + family = self.r_db.get_family_from_handle(family_handle) + partner_handle = utils.find_spouse( + person, family) + if partner_handle: + if not first_family: + # have to do this to get the comma on + # the same line as the link + if isinstance(tcell[-1], Html): + # tcell is an instance of Html (or + # of a subclass thereof) + tcell[-1].inside += "," + else: + tcell = tcell[:-1] + ( + # TODO for Arabic, translate? + (tcell[-1] + ", "), ) + # Have to manipulate as tuples so that + # subsequent people are not nested + # within the first link + tcell += (self.new_person_link(partner_handle),) + first_family = False + + else: + tcell = " " + trow += Html("td", class_="ColumnPartner") + tcell + # parents column + if showparents: + parent_hdl_list = person.get_parent_family_handle_list() + if parent_hdl_list: + parent_handle = parent_hdl_list[0] + family = self.r_db.get_family_from_handle(parent_handle) + father_handle = family.get_father_handle() + mother_handle = family.get_mother_handle() + if father_handle: + father = self.r_db.get_person_from_handle(father_handle) + else: + father = None + if mother_handle: + mother = self.r_db.get_person_from_handle(mother_handle) + else: + mother = None + if father: + father_name = self.get_name(father) + if mother: + mother_name = self.get_name(mother) + samerow = False + if mother and father: + tcell = (Html("span", father_name, + class_="father fatherNmother", inline=True), + Html("span", mother_name, + class_="mother", inline=True)) + elif mother: + tcell = Html("span", mother_name, class_="mother", + inline=True) + elif father: + tcell = Html("span", father_name, class_="father", + inline=True) + else: + tcell = " " + samerow = True + else: + tcell = " " + samerow = True + trow += Html("td", class_="ColumnParents", inline=samerow) + tcell + return (date, first_surname, first_individual) + def individuallistpage(self, report, the_lang, the_title, ppl_handle_list): """ Creates an individual page @@ -179,7 +311,6 @@ class PersonPages(BasePage): to create a page. """ BasePage.__init__(self, report, the_lang, the_title) - prev_letter = " " # plugin variables for this module showbirth = report.options['showbirth'] @@ -205,8 +336,27 @@ class PersonPages(BasePage): individuallist += Html("p", msg, id="description") # add alphabet navigation - index_list = get_first_letters(self.r_db, ppl_handle_list, - _KEYPERSON, rlocale=self.rlocale) + # Assemble all the handles for each surname into a dictionary + # We don't call sort_people because we don't care about sorting + # individuals, only surnames + surname_handle_dict = defaultdict(list) + for person_handle in ppl_handle_list: + person = self.r_db.get_person_from_handle(person_handle) + surname = get_surname_from_person(self.r_db, person) + surname_handle_dict[surname].append(person_handle) + + # Assemble the alphabeticIndex + index = AlphabeticIndex(self.rlocale) + for surname, handle_list in surname_handle_dict.items(): + index.addRecord(surname, handle_list) + + # Extract the buckets from the index + index_list = [] + index.resetBucketIterator() + while index.nextBucket(): + if index.bucketRecordCount != 0: + index_list.append(index.bucketLabel) + # Output the navigation alpha_nav = alphabet_navigation(index_list, self.rlocale) if alpha_nav is not None: individuallist += alpha_nav @@ -248,179 +398,57 @@ class PersonPages(BasePage): tbody = Html("tbody") table += tbody - ppl_handle_list = sort_people(self.r_db, ppl_handle_list, - self.rlocale) - first = True - name_format = self.report.options['name_format'] - nme_format = _nd.name_formats[name_format][1] - for (surname, handle_list) in ppl_handle_list: + # for each bucket, output the surnames in that bucket + index.resetBucketIterator() + output = [] + dup_index = 0 + while index.nextBucket(): + if index.bucketRecordCount != 0: + surname_handle_dict = defaultdict(list) + bucket_letter = index.bucketLabel + bucket_link = bucket_letter + if bucket_letter in output: + bucket_link = "%s (%i)" % (bucket_letter, dup_index) + dup_index += 1 + output.append(bucket_letter) + while index.nextRecord(): + surname = index.recordName + handle_list = index.recordData + for handle in handle_list: + surname_handle_dict[surname].append(handle) + surname_handle_list = list(surname_handle_dict.items()) + # sort by surname + surname_handle_list.sort(key=lambda x: + self.rlocale.sort_key(x[0])) - if surname and not surname.isspace(): - letter = get_index_letter(first_letter(surname), index_list, - self.rlocale) - else: - letter = ' ' - surname = self._("") + name_format = self.report.options['name_format'] + nme_format = _nd.name_formats[name_format][1] + for (surname, handle_list) in surname_handle_list: + if not surname or surname.isspace(): + surname = self._("") - # In case the user choose a format name like "*SURNAME*" - # We must display this field in upper case. So we use the - # english format of format_name to find if this is the case. - # name_format = self.report.options['name_format'] - # nme_format = _nd.name_formats[name_format][1] - if "SURNAME" in nme_format: - surnamed = surname.upper() - else: - surnamed = surname - first_surname = True - for person_handle in sorted(handle_list, - key=self.sort_on_name_and_grampsid): - person = self.r_db.get_person_from_handle(person_handle) - if person.get_change_time() > date: - date = person.get_change_time() - - # surname column - trow = Html("tr") - tbody += trow - tcell = Html("td", class_="ColumnSurname", inline=True) - trow += tcell - - if first or primary_difference(letter, prev_letter, - self.rlocale): - first = False - first_surname = False - prev_letter = letter - trow.attr = 'class = "BeginSurname"' - ttle = self._("Surnames %(surname)s beginning " - "with letter %(letter)s" % - {'surname' : surname, - 'letter' : letter}) - tcell += Html( - "a", html_escape(surnamed), name=letter, - id_=letter, - title=ttle) - elif first_surname: - first_surname = False - tcell += Html("a", html_escape(surnamed), - title=self._("Surnames") + " " + surname) - else: - tcell += " " - - # firstname column - link = self.new_person_link(person_handle, person=person) - trow += Html("td", link, class_="ColumnName") - - # birth column - if showbirth: - tcell = Html("td", class_="ColumnBirth", inline=True) - trow += tcell - - birth_date = _find_birth_date(self.r_db, person) - if birth_date is not None: - if birth_date.fallback: - tcell += Html('em', - self.rlocale.get_date(birth_date), - inline=True) - else: - tcell += self.rlocale.get_date(birth_date) + # In case the user choose a format name like "*SURNAME*" + # We must display this field in upper case. So we use + # the english format of format_name to find if this is + # the case. name_format = + # self.report.options['name_format'] nme_format = + # _nd.name_formats[name_format][1] + if "SURNAME" in nme_format: + surnamed = surname.upper() else: - tcell += " " - - # death column - if showdeath: - tcell = Html("td", class_="ColumnDeath", inline=True) - trow += tcell - - death_date = _find_death_date(self.r_db, person) - if death_date is not None: - if death_date.fallback: - tcell += Html('em', - self.rlocale.get_date(death_date), - inline=True) - else: - tcell += self.rlocale.get_date(death_date) - else: - tcell += " " - - # partner column - if showpartner: - - family_list = person.get_family_handle_list() - first_family = True - #partner_name = None - tcell = () - if family_list: - for family_handle in family_list: - family = self.r_db.get_family_from_handle( - family_handle) - partner_handle = utils.find_spouse( - person, family) - if partner_handle: - if not first_family: - # have to do this to get the comma on - # the same line as the link - if isinstance(tcell[-1], Html): - # tcell is an instance of Html (or - # of a subclass thereof) - tcell[-1].inside += "," - else: - tcell = tcell[:-1] + ( - # TODO for Arabic, translate? - (tcell[-1] + ", "),) - # Have to manipulate as tuples so that - # subsequent people are not nested - # within the first link - tcell += ( - self.new_person_link(partner_handle),) - first_family = False - else: - tcell = " " - trow += Html("td", class_="ColumnPartner") + tcell - - # parents column - if showparents: - - parent_hdl_list = person.get_parent_family_handle_list() - if parent_hdl_list: - parent_handle = parent_hdl_list[0] - family = self.r_db.get_family_from_handle( - parent_handle) - father_handle = family.get_father_handle() - mother_handle = family.get_mother_handle() - if father_handle: - father = self.r_db.get_person_from_handle( - father_handle) - else: - father = None - if mother_handle: - mother = self.r_db.get_person_from_handle( - mother_handle) - else: - mother = None - if father: - father_name = self.get_name(father) - if mother: - mother_name = self.get_name(mother) - samerow = False - if mother and father: - tcell = (Html("span", father_name, - class_="father fatherNmother", - inline=True), - Html("span", mother_name, - class_="mother", inline=True)) - elif mother: - tcell = Html("span", mother_name, - class_="mother", inline=True) - elif father: - tcell = Html("span", father_name, - class_="father", inline=True) - else: - tcell = " " - samerow = True - else: - tcell = " " - samerow = True - trow += Html("td", class_="ColumnParents", - inline=samerow) + tcell + surnamed = surname + first_surname = True + first_individual = True + for person_handle in sorted(handle_list, + key=self.sort_on_name_and_grampsid): + (date, first_surname, first_individual) \ + = self.__output_person(date, tbody, bucket_letter, + bucket_link, showbirth, + showdeath, showpartner, + showparents, surname, + surnamed, first_surname, + first_individual, + person_handle) # create clear line for proper styling # create footer section diff --git a/gramps/plugins/webreport/place.py b/gramps/plugins/webreport/place.py index ffa50d64b..dada44daa 100644 --- a/gramps/plugins/webreport/place.py +++ b/gramps/plugins/webreport/place.py @@ -61,14 +61,12 @@ from gramps.gen.display.place import displayer as _pd # specific narrative web import #------------------------------------------------ from gramps.plugins.webreport.basepage import BasePage -from gramps.plugins.webreport.common import (first_letter, - alphabet_navigation, GOOGLE_MAPS, - primary_difference, - get_index_letter, FULLCLEAR, +from gramps.plugins.webreport.common import (alphabet_navigation, GOOGLE_MAPS, + FULLCLEAR, MARKER_PATH, OPENLAYER, OSM_MARKERS, STAMEN_MARKERS, MARKERS, html_escape, - sort_places) + sort_places, AlphabeticIndex) _ = glocale.translation.sgettext LOG = logging.getLogger(".NarrativeWeb") @@ -164,6 +162,57 @@ class PlacePages(BasePage): step() self.placelistpage(self.report, the_lang, the_title) + + def __output_place(self, ldatec, tbody, + first_place, pname, place_handle, letter, bucket_link): + place = self.r_db.get_place_from_handle(place_handle) + if place: + if place.get_change_time() > ldatec: + ldatec = place.get_change_time() + plc_title = pname + main_location = get_main_location(self.r_db, place) + if not plc_title or plc_title == " ": + letter = " " + trow = Html("tr") + tbody += trow + tcell = Html("td", class_="ColumnLetter", inline=True) + trow += tcell + if first_place: + # or primary_difference(letter, prev_letter, self.rlocale): + first_place = False + # prev_letter = letter + trow.attr = 'class = "BeginLetter"' + ttle = self._("Places beginning " + "with letter %s") % letter + tcell += Html("a", letter, name=letter, title=ttle, + id_=bucket_link) + else: + tcell += " " + trow += Html("td", self.place_link(place.get_handle(), + plc_title, + place.get_gramps_id()), + class_="ColumnName") + trow.extend(Html("td", data or " ", class_=colclass, + inline=True) for + (colclass, data) in [ + ["ColumnState", + main_location.get(PlaceType.STATE, '')], + ["ColumnCountry", + main_location.get(PlaceType.COUNTRY, '')]]) + if self.display_coordinates: + tcell1 = Html("td", class_="ColumnLatitude", inline=True) + tcell2 = Html("td", class_="ColumnLongitude", inline=True) + trow += tcell1, tcell2 + if place.lat and place.long: + latitude, longitude = conv_lat_lon(place.lat, place.long, + "DEG") + tcell1 += latitude + tcell2 += longitude + else: + tcell1 += ' ' + tcell2 += ' ' + return (ldatec, first_place) + def placelistpage(self, report, the_lang, the_title): """ Create a place index @@ -179,7 +228,6 @@ class PlacePages(BasePage): result = self.write_header(self._("Places")) placelistpage, dummy_head, dummy_body, outerwrapper = result ldatec = 0 - prev_letter = " " # begin places division with Html("div", class_="content", id="Places") as placelist: @@ -193,10 +241,22 @@ class PlacePages(BasePage): placelist += Html("p", msg, id="description") # begin alphabet navigation - pkeys = self.report.obj_dict[PlaceName].keys() - index_list = get_first_letters(pkeys, rlocale=self.rlocale) + # Assemble all the places + index = AlphabeticIndex(self.rlocale) + # self.report.obj_dict[PlaceName] is a dict with key place_name and + # values (place_fname, place_name, place.gramps_id, event) + for (place_name, value) in self.report.obj_dict[PlaceName].items(): + index.addRecord(place_name, value) + + # Extract the buckets from the index + index_list = [] + index.resetBucketIterator() + while index.nextBucket(): + if index.bucketRecordCount != 0: + index_list.append(index.bucketLabel) + # Output the navigation alpha_nav = alphabet_navigation(index_list, self.rlocale) - if alpha_nav is not None: + if alpha_nav: placelist += alpha_nav # begin places table and table head @@ -234,80 +294,40 @@ class PlacePages(BasePage): ] ) - handle_list = sort_places(self.r_db, - self.report.obj_dict[PlaceName], - self.rlocale) - first = True - # begin table body tbody = Html("tbody") table += tbody - for (pname, place_handle) in handle_list: - place = self.r_db.get_place_from_handle(place_handle) - if place: - if place.get_change_time() > ldatec: - ldatec = place.get_change_time() - plc_title = pname - main_location = get_main_location(self.r_db, place) + # For each bucket, output the places in that bucket + index.resetBucketIterator() + output = [] + dup_index = 0 + while index.nextBucket(): + if index.bucketRecordCount != 0: + bucket_letter = index.bucketLabel + bucket_link = bucket_letter + if bucket_letter in output: + bucket_link = "%s (%i)" % (bucket_letter, dup_index) + dup_index += 1 + output.append(bucket_letter) + # Assemble all the places in this bucket into a dict for + # sorting + place_dict = dict() + while index.nextRecord(): + place_name = index.recordName + value = index.recordData + place_dict[place_name] = value - if plc_title and plc_title != " ": - letter = get_index_letter(first_letter(plc_title), - index_list, - self.rlocale) - else: - letter = ' ' - - trow = Html("tr") - tbody += trow - - tcell = Html("td", class_="ColumnLetter", inline=True) - trow += tcell - if first or primary_difference(letter, prev_letter, - self.rlocale): - first = False - prev_letter = letter - trow.attr = 'class = "BeginLetter"' - - ttle = self._("Places beginning " - "with letter %s") % letter - tcell += Html("a", letter, name=letter, title=ttle) - else: - tcell += " " - - trow += Html("td", - self.place_link( - place.get_handle(), - plc_title, place.get_gramps_id()), - class_="ColumnName") - - trow.extend( - Html("td", data or " ", class_=colclass, - inline=True) - for (colclass, data) in [ - ["ColumnState", - main_location.get(PlaceType.STATE, '')], - ["ColumnCountry", - main_location.get(PlaceType.COUNTRY, '')] - ] - ) - - if self.display_coordinates: - tcell1 = Html("td", class_="ColumnLatitude", - inline=True) - tcell2 = Html("td", class_="ColumnLongitude", - inline=True) - trow += (tcell1, tcell2) - - if place.lat and place.long: - latitude, longitude = conv_lat_lon(place.lat, - place.long, - "DEG") - tcell1 += latitude - tcell2 += longitude - else: - tcell1 += ' ' - tcell2 += ' ' + handle_list = sort_places(self.r_db, + place_dict, + self.rlocale) + first_place = True + for (pname, place_handle) in handle_list: + (ldatec, first_place) \ + = self.__output_place(ldatec, + trow, first_place, pname, + place_handle, bucket_letter, + bucket_link) # add clearline for proper styling # add footer section @@ -545,29 +565,3 @@ class PlacePages(BasePage): if place_name == apname: # store only the primary named page self.xhtml_writer(placepage, output_file, sio, ldatec) -def get_first_letters(place_list, rlocale=glocale): - """ - get the first letters of the place name list - - @param: handle_list -- The place name list - - The first letter (or letters if there is a contraction) are extracted from - """ - index_list = [] - for place in place_list: - ltr = first_letter(place) - index_list.append(ltr) - - # Now remove letters where there is not a primary difference - index_list.sort(key=rlocale.sort_key) - first = True - prev_index = None - for nkey in index_list[:]: #iterate over a slice copy of the list - if first or primary_difference(prev_index, nkey, rlocale): - first = False - prev_index = nkey - else: - index_list.remove(nkey) - - # return menu set letters for alphabet_navigation - return index_list diff --git a/gramps/plugins/webreport/surnamelist.py b/gramps/plugins/webreport/surnamelist.py index bf485e925..02b5c3246 100644 --- a/gramps/plugins/webreport/surnamelist.py +++ b/gramps/plugins/webreport/surnamelist.py @@ -43,6 +43,8 @@ Classe: #------------------------------------------------ from decimal import getcontext import logging +from collections import defaultdict +from unicodedata import name as uniname #------------------------------------------------ # Gramps module @@ -55,11 +57,10 @@ from gramps.plugins.lib.libhtml import Html #------------------------------------------------ from gramps.plugins.webreport.basepage import BasePage from gramps.gen.display.name import displayer as _nd -from gramps.plugins.webreport.common import (get_first_letters, _KEYPERSON, - alphabet_navigation, html_escape, - sort_people, name_to_md5, - first_letter, get_index_letter, - primary_difference, FULLCLEAR) +from gramps.plugins.webreport.common import (alphabet_navigation, html_escape, + name_to_md5, FULLCLEAR, + get_surname_from_person, + AlphabeticIndex) _ = glocale.translation.sgettext LOG = logging.getLogger(".NarrativeWeb") @@ -91,8 +92,6 @@ class SurnameListPage(BasePage): @param: filename -- The name to use for the Surnames page """ BasePage.__init__(self, report, the_lang, the_title) - prev_surname = "" - prev_letter = " " if order_by == self.ORDER_BY_NAME: output_file, sio = self.report.create_file(filename) @@ -113,11 +112,30 @@ class SurnameListPage(BasePage): 'database with this same surname.') surnamelist += Html("p", msg, id="description") + # Assemble all the handles for each surname into a dictionary + # We don't call sort_people because we don't care about sorting + # individuals, only surnames + surname_handle_dict = defaultdict(list) + for person_handle in ppl_handle_list: + person = self.r_db.get_person_from_handle(person_handle) + surname = get_surname_from_person(self.r_db, person) + surname_handle_dict[surname].append(person_handle) + # add alphabet navigation... # only if surname list not surname count if order_by == self.ORDER_BY_NAME: - index_list = get_first_letters(self.r_db, ppl_handle_list, - _KEYPERSON, rlocale=self.rlocale) + # Assemble the AlphabeticIndex + index = AlphabeticIndex(self.rlocale) + for surname, handle_list in surname_handle_dict.items(): + index.addRecord(surname, handle_list) + + # Extract the buckets from the index + index_list = [] + index.resetBucketIterator() + while index.nextBucket(): + if index.bucketRecordCount != 0: + index_list.append(index.bucketLabel) + # Output the navigation alpha_nav = alphabet_navigation(index_list, self.rlocale) if alpha_nav is not None: surnamelist += alpha_nav @@ -162,74 +180,36 @@ class SurnameListPage(BasePage): with Html("tbody") as tbody: table += tbody - ppl_handle_list = sort_people(self.r_db, ppl_handle_list, - self.rlocale) if order_by == self.ORDER_BY_COUNT: - temp_list = {} - for (surname, data_list) in ppl_handle_list: - index_val = "%90d_%s" % (999999999-len(data_list), - surname) - temp_list[index_val] = (surname, data_list) + # construct a dictionary of counts, for example + # {3: ["brown", [handle1, handle2,handle3], + # "smith", [handle4, handle5, handle6] + # ], + # 2: ["Jones", [handle7] + # + # }] + count_ppl_handle_dict = defaultdict(list) + for surname, data_list in surname_handle_dict.items(): + count_ppl_handle_dict[len(data_list)].append \ + ((surname,data_list)) + # For each count, we construct and output a separate + # AlphabeticIndex for all surnames with that count + for (dummy_count, ppl_handles) in \ + sorted(count_ppl_handle_dict.items(), reverse=True): + # Construct the AplhabeticIndex for that count + index = AlphabeticIndex(self.rlocale) + for (surname, handle_list) in ppl_handles: + index.addRecord(surname, handle_list) + # Output the AlphabeticIndex for that count + self.output_surname_records(index, tbody, + name_format) - lkey = self.rlocale.sort_key - ppl_handle_list = (temp_list[key] - for key in sorted(temp_list, - key=lkey)) + else: # order_by == self.ORDER_BY_NAME + # The AlphabeticIndex has already been constructed + # Output the AlphabeticIndex + self.output_surname_records(index, tbody, + name_format) - first = True - first_surname = True - - for (surname, data_list) in ppl_handle_list: - - if surname and not surname.isspace(): - letter = first_letter(surname) - if order_by == self.ORDER_BY_NAME: - # There will only be an alphabetic index list if - # the ORDER_BY_NAME page is being generated - letter = get_index_letter(letter, index_list, - self.rlocale) - else: - letter = ' ' - surname = self._("") - - trow = Html("tr") - tbody += trow - - tcell = Html("td", class_="ColumnLetter", inline=True) - trow += tcell - - if first or primary_difference(letter, prev_letter, - self.rlocale): - first = False - prev_letter = letter - trow.attr = 'class = "BeginLetter"' - ttle = self._("Surnames beginning with " - "letter %s") % letter - hyper = Html("a", letter, name=letter, - title=ttle, inline=True) - tcell += hyper - elif first_surname or surname != prev_surname: - first_surname = False - tcell += " " - prev_surname = surname - - # In case the user choose a format name like "*SURNAME*" - # We must display this field in upper case. So we use - # the english format of format_name to find if this is - # the case. - # name_format = self.report.options['name_format'] - nme_format = _nd.name_formats[name_format][1] - if "SURNAME" in nme_format: - surnamed = surname.upper() - else: - surnamed = surname - trow += Html("td", - self.surname_link(name_to_md5(surname), - surnamed), - class_="ColumnSurname", inline=True) - - trow += Html("td", len(data_list), - class_="ColumnQuantity", inline=True) # create footer section # add clearline for proper styling @@ -252,10 +232,83 @@ class SurnameListPage(BasePage): the result. """ url = self.report.build_url_fname_html(fname, "srn", uplink) + try: # some characters don't have a unicode name + char = uniname(name[0]) + except (ValueError, TypeError) as dummy_err: + char = " " hyper = Html("a", html_escape(name), href=url, - title=name, inline=True) + title="%s starting with %s" % (name, char), + inline=True) if opt_val is not None: hyper += opt_val # return hyperlink to its caller return hyper + + def output_surname_records(self, index, tbody, name_format): + """ + Output all the surnames in the index. + + @param: index -- An ICU AlphabeticIndex where the names are surnames + and the data is a list of people handles with that + surname + @param: tbody -- The HTML body to which the lines are added + @param: name_format -- The name format from the report options + """ + index.resetBucketIterator() + output = [] + dup_index = 0 + while index.nextBucket(): + if index.bucketRecordCount != 0: + trow = Html("tr") + tbody += trow + + tcell = Html("td", class_="ColumnLetter", inline=True) + trow += tcell + trow.attr = 'class = "BeginLetter"' + bucket_letter = index.bucketLabel + bucket_link = bucket_letter + if bucket_letter in output: + bucket_link = "%s (%i)" % (bucket_letter, dup_index) + dup_index += 1 + output.append(bucket_letter) + try: # some characters don't have a unicode name + char = uniname(bucket_letter) + except (ValueError, TypeError) as dummy_err: + char = " " + ttle = self._("Surnames beginning with " + "letter '%s' %s") % \ + (bucket_letter, char) + hyper = Html("a", index.bucketLabel, name=index.bucketLabel, + id_=bucket_link, title=ttle) + tcell += hyper + + first = True + while index.nextRecord(): + surname = index.recordName + if not first: + trow = Html("tr") + tbody += trow + + tcell = Html("td", class_="ColumnLetter", inline=True) + trow += tcell + first = False + if not surname or surname.isspace(): + surname = self._("") + # In case the user choose a format name like "*SURNAME*" + # We must display this field in upper case. So we use + # the english format of format_name to find if this is + # the case. + # name_format = self.report.options['name_format'] + nme_format = _nd.name_formats[name_format][1] + if "SURNAME" in nme_format: + surnamed = surname.upper() + else: + surnamed = surname + trow += Html("td", + self.surname_link(name_to_md5(surname), + surnamed), + class_="ColumnSurname", inline=True) + + trow += Html("td", len(index.recordData), + class_="ColumnQuantity", inline=True)