From 66da4a7f96ed03851740a9419cdfaeb2f22cee9f Mon Sep 17 00:00:00 2001 From: Benny Malengier Date: Wed, 24 Mar 2010 16:05:14 +0000 Subject: [PATCH] Solving connected date bugs 3390: Date module that reads calendar on Gedcom file format, cannot read the @ 3763: dates parsed wrong 3754: Active markup for invalid date on Event View Some other bugs might be fixed by these changes svn: r14921 --- src/DateHandler/_DateParser.py | 47 +++++-- src/gui/views/treemodels/eventmodel.py | 19 ++- src/plugins/lib/libgedcom.py | 167 ++++++++++++++++--------- src/plugins/view/eventview.py | 5 +- 4 files changed, 166 insertions(+), 72 deletions(-) diff --git a/src/DateHandler/_DateParser.py b/src/DateHandler/_DateParser.py index 6c78742f8..1698544cd 100644 --- a/src/DateHandler/_DateParser.py +++ b/src/DateHandler/_DateParser.py @@ -154,11 +154,17 @@ class DateParser(object): modifier_after_to_int = {} hebrew_to_int = { - "tishri" : 1, "heshvan" : 2, "kislev" : 3, - "tevet" : 4, "shevat" : 5, "adari" : 6, - "adarii" : 7, "nisan" : 8, "iyyar" : 9, - "sivan" : 10, "tammuz" : 11, "av" : 12, - "elul" : 13, + u"tishri" : 1, u"heshvan" : 2, u"kislev" : 3, + u"tevet" : 4, u"shevat" : 5, u"adari" : 6, + u"adarii" : 7, u"nisan" : 8, u"iyyar" : 9, + u"sivan" : 10, u"tammuz" : 11, u"av" : 12, + u"elul" : 13, + #alternative spelling + u"cheshvan": 2, u"adar sheni": 7, u"iyar" : 9, + #GEDCOM months + u"tsh" : 1, u"csh": 5, u"ksl": 3, u"tvt": 4, u"shv": 5, u"adr": 6, + u"ads" : 7, u"nsn": 8, u"iyr": 9, u"svn":10, u"tmz":11, u"aav":12, + u"ell":13, } french_to_int = { @@ -168,7 +174,15 @@ class DateParser(object): u'germinal' : 7, u'floréal' : 8, u'prairial' : 9, u'messidor' : 10, u'thermidor' : 11, u'fructidor' : 12, - u'extra' : 13 + u'extra' : 13, + #GEDCOM months + u'vend' : 1, u'brum' : 2, + u'frim' : 3, u'nivo' : 4, + u'pluv' : 5, u'vent' : 6, + u'germ' : 7, u'flor' : 8, + u'prai' : 9, u'mess' : 10, + u'ther' : 11, u'fruc' : 12, + u'comp' : 13, } islamic_to_int = { @@ -430,7 +444,6 @@ class DateParser(object): m = 0 else: m = mmap[groups[1].lower()] - d = self._get_int(groups[0]) if groups[2] is None: @@ -586,11 +599,15 @@ class DateParser(object): text_parser = self.parser[cal] (text1, bc1) = self.match_bce(match.group('start')) start = self._parse_subdate(text1, text_parser) + if start == Date.EMPTY and text1 != "": + return 0 if bc1: start = self.invert_year(start) (text2, bc2) = self.match_bce(match.group('stop')) stop = self._parse_subdate(text2, text_parser) + if stop == Date.EMPTY and text2 != "": + return 0 if bc2: stop = self.invert_year(stop) @@ -609,11 +626,15 @@ class DateParser(object): text_parser = self.parser[cal] (text1, bc1) = self.match_bce(match.group('start')) start = self._parse_subdate(text1, text_parser) + if start == Date.EMPTY and text1 != "": + return 0 if bc1: start = self.invert_year(start) (text2, bc2) = self.match_bce(match.group('stop')) stop = self._parse_subdate(text2, text_parser) + if stop == Date.EMPTY and text2 != "": + return 0 if bc2: stop = self.invert_year(stop) @@ -666,7 +687,10 @@ class DateParser(object): start = self._parse_subdate(grps[0], self.parser[cal]) mod = self.modifier_after_to_int.get(grps[1].lower(), Date.MOD_NONE) - if bc: + if start == Date.EMPTY: + date.set_modifier(Date.MOD_TEXTONLY) + date.set_text_value(text) + elif bc: date.set(qual, mod, cal, self.invert_year(start), newyear=ny) else: date.set(qual, mod, cal, start, newyear=ny) @@ -676,7 +700,10 @@ class DateParser(object): grps = match.groups() start = self._parse_subdate(grps[0]) mod = Date.MOD_ABOUT - if bc: + if start == Date.EMPTY: + date.set_modifier(Date.MOD_TEXTONLY) + date.set_text_value(text) + elif bc: date.set(qual, mod, cal, self.invert_year(start), newyear=ny) else: date.set(qual, mod, cal, start, newyear=ny) @@ -712,6 +739,8 @@ class DateParser(object): if subdate == Date.EMPTY and text != "": date.set_as_text(text) return + #else: + # print 'valid subdate', text, subdate except: date.set_as_text(text) return diff --git a/src/gui/views/treemodels/eventmodel.py b/src/gui/views/treemodels/eventmodel.py index 0b401d42c..e98e6d52a 100644 --- a/src/gui/views/treemodels/eventmodel.py +++ b/src/gui/views/treemodels/eventmodel.py @@ -25,6 +25,7 @@ # #------------------------------------------------------------------------- import time +import cgi import logging log = logging.getLogger(".") @@ -45,6 +46,7 @@ import GrampsLocale import DateHandler import gen.lib import Utils +import config from gui.views.treemodels.flatbasemodel import FlatBaseModel #------------------------------------------------------------------------- @@ -60,6 +62,8 @@ COLUMN_DESCRIPTION = 4 COLUMN_PLACE = 5 COLUMN_CHANGE = 10 +INVALID_DATE_FORMAT = config.get('preferences.invalid-date-format') + #------------------------------------------------------------------------- # # EventModel @@ -122,14 +126,25 @@ class EventModel(FlatBaseModel): if data[COLUMN_DATE]: event = gen.lib.Event() event.unserialize(data) - return DateHandler.get_date(event) + date_str = DateHandler.get_date(event) + if date_str != "": + retval = cgi.escape(date_str) + if not DateHandler.get_date_valid(event): + return INVALID_DATE_FORMAT % retval + else: + return retval return u'' def sort_date(self,data): if data[COLUMN_DATE]: event = gen.lib.Event() event.unserialize(data) - return "%09d" % event.get_date_object().get_sort_value() + retval = "%09d" % event.get_date_object().get_sort_value() + if not DateHandler.get_date_valid(event): + return INVALID_DATE_FORMAT % retval + else: + return retval + return u'' def column_handle(self,data): diff --git a/src/plugins/lib/libgedcom.py b/src/plugins/lib/libgedcom.py index 41fad7ecd..a517e30b6 100644 --- a/src/plugins/lib/libgedcom.py +++ b/src/plugins/lib/libgedcom.py @@ -87,7 +87,7 @@ all lines until the next level 2 token is found (in this case, skipping the # standard python modules # #------------------------------------------------------------------------- -import os +import os, sys import re import time import codecs @@ -428,10 +428,10 @@ MEDIA_MAP = { # Integer to GEDCOM tag mappings for constants # #------------------------------------------------------------------------- -CALENDAR_MAP = { - "FRENCH R" : gen.lib.Date.CAL_FRENCH, - "JULIAN" : gen.lib.Date.CAL_JULIAN, - "HEBREW" : gen.lib.Date.CAL_HEBREW, +CALENDAR_MAP_GEDCOM2XML = { + u"FRENCH R" : gen.lib.Date.CAL_FRENCH, + u"JULIAN" : gen.lib.Date.CAL_JULIAN, + u"HEBREW" : gen.lib.Date.CAL_HEBREW, } QUALITY_MAP = { @@ -574,6 +574,21 @@ CALENDAR_MAP = { gen.lib.Date.CAL_SWEDISH : (MONTH, '@#DUNKNOWN@'), } +CALENDAR_MAP_PARSESTRING = { + gen.lib.Date.CAL_HEBREW : ' (h)', + gen.lib.Date.CAL_FRENCH : ' (f)', + gen.lib.Date.CAL_JULIAN : ' (j)', + gen.lib.Date.CAL_SWEDISH : ' (s)', + } + +#how wrong calendar use is shown +CALENDAR_MAP_WRONGSTRING = { + gen.lib.Date.CAL_HEBREW : ' ', + gen.lib.Date.CAL_FRENCH : ' ', + gen.lib.Date.CAL_JULIAN : ' ', + gen.lib.Date.CAL_SWEDISH : ' ', + } + DATE_MODIFIER = { gen.lib.Date.MOD_ABOUT : "ABT", gen.lib.Date.MOD_BEFORE : "BEF", @@ -601,7 +616,11 @@ PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$") MOD = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$") CAL = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D?([^@]+)@\s*(.*)$") RANGE = re.compile(r"\s*BET\s+@#D?([^@]+)@\s*(.*)\s+AND\s+@#D?([^@]+)@\s*(.*)$") +RANGE1 = re.compile(r"\s*BET\s+\s*(.*)\s+AND\s+@#D?([^@]+)@\s*(.*)$") +RANGE2 = re.compile(r"\s*BET\s+@#D?([^@]+)@\s*(.*)\s+AND\s+\s*(.*)$") SPAN = re.compile(r"\s*FROM\s+@#D?([^@]+)@\s*(.*)\s+TO\s+@#D?([^@]+)@\s*(.*)$") +SPAN1 = re.compile(r"\s*FROM\s+\s*(.*)\s+TO\s+@#D?([^@]+)@\s*(.*)$") +SPAN2 = re.compile(r"\s*FROM\s+@#D?([^@]+)@\s*(.*)\s+TO\s+\s*(.*)$") NAME_RE = re.compile(r"/?([^/]*)(/([^/]*)(/([^/]*))?)?") SURNAME_RE = re.compile(r"/([^/]*)/([^/]*)") @@ -643,6 +662,7 @@ class Lexer(object): try: return GedLine(self.current_list.pop()) except: + LOG.debug('Error in reading Gedcom line', exc_info=True) return None def __fix_token_cont(self, data): @@ -734,72 +754,99 @@ class GedLine(object): Converts the specified text to a gen.lib.Date object. """ dateobj = gen.lib.Date() - text = text.replace('BET ABT','EST BET') # Horrible hack for importing # illegal GEDCOM from # Apple Macintosh Classic # 'Gene' program - - try: - # extract out the MOD line - match = MOD.match(text) - if match: - (mod, text) = match.groups() - qual = QUALITY_MAP.get(mod, gen.lib.Date.QUAL_NONE) - else: - qual = gen.lib.Date.QUAL_NONE - - # parse the range if we match, if so, return - match = RANGE.match(text) + + # extract out the MOD line + match = MOD.match(text) + mod = '' + if match: + (mod, text) = match.groups() + qual = QUALITY_MAP.get(mod, gen.lib.Date.QUAL_NONE) + mod += ' ' + else: + qual = gen.lib.Date.QUAL_NONE + + # parse the range if we match, if so, return + match = RANGE.match(text) + match1 = RANGE1.match(text) + match2 = RANGE2.match(text) + if match or match1 or match2: if match: (cal1, data1, cal2, data2) = match.groups() - - cal = CALENDAR_MAP.get(cal1, gen.lib.Date.CAL_GREGORIAN) - - start = GedLine.__DATE_CNV.parse(data1) - stop = GedLine.__DATE_CNV.parse(data2) - dateobj.set(gen.lib.Date.QUAL_NONE, gen.lib.Date.MOD_RANGE, cal, - start.get_start_date() + stop.get_start_date()) - dateobj.set_quality(qual) - return dateobj - - # parse a span if we match - match = SPAN.match(text) - if match: - (cal1, data1, cal2, data2) = match.groups() - - cal = CALENDAR_MAP.get(cal1, gen.lib.Date.CAL_GREGORIAN) - - start = GedLine.__DATE_CNV.parse(data1) - stop = GedLine.__DATE_CNV.parse(data2) - dateobj.set(gen.lib.Date.QUAL_NONE, gen.lib.Date.MOD_SPAN, cal, - start.get_start_date() + stop.get_start_date()) - dateobj.set_quality(qual) - return dateobj + elif match1: + cal1 = gen.lib.Date.CAL_GREGORIAN + (data1, cal2, data2) = match1.groups() + elif match2: + cal2 = gen.lib.Date.CAL_GREGORIAN + (cal1, data1, data2) = match2.groups() + cal1 = CALENDAR_MAP_GEDCOM2XML.get(cal1, gen.lib.Date.CAL_GREGORIAN) + cal2 = CALENDAR_MAP_GEDCOM2XML.get(cal2, gen.lib.Date.CAL_GREGORIAN) + if cal1 != cal2: + #not supported by GRAMPS, import as text, we construct a string + # that the parser will not parse as a correct date + return GedLine.__DATE_CNV.parse('%sbetween %s%s and %s%s' % + (mod, data1, CALENDAR_MAP_WRONGSTRING.get(cal1, ''), + CALENDAR_MAP_WRONGSTRING.get(cal2, ''), data2)) - match = CAL.match(text) + #add hebrew, ... calendar so that months are recognized + data1 += CALENDAR_MAP_PARSESTRING.get(cal1, '') + data2 += CALENDAR_MAP_PARSESTRING.get(cal2, '') + start = GedLine.__DATE_CNV.parse(data1) + stop = GedLine.__DATE_CNV.parse(data2) + dateobj.set(gen.lib.Date.QUAL_NONE, gen.lib.Date.MOD_RANGE, cal1, + start.get_start_date() + stop.get_start_date()) + dateobj.set_quality(qual) + return dateobj + + # parse a span if we match + match = SPAN.match(text) + match1 = SPAN1.match(text) + match2 = SPAN2.match(text) + if match or match1 or match2: if match: - (abt, cal, data) = match.groups() - if abt: - dateobj = GedLine.__DATE_CNV.parse("%s %s" % (abt, data)) - else: - dateobj = GedLine.__DATE_CNV.parse(data) - dateobj.set_calendar( - CALENDAR_MAP.get(cal, gen.lib.Date.CAL_GREGORIAN)) - dateobj.set_quality(qual) - return dateobj - - dateobj = GedLine.__DATE_CNV.parse(text) + (cal1, data1, cal2, data2) = match.groups() + elif match1: + cal1 = gen.lib.Date.CAL_GREGORIAN + (data1, cal2, data2) = match1.groups() + elif match2: + cal2 = gen.lib.Date.CAL_GREGORIAN + (cal1, data1, data2) = match2.groups() + cal1 = CALENDAR_MAP_GEDCOM2XML.get(cal1, gen.lib.Date.CAL_GREGORIAN) + cal2 = CALENDAR_MAP_GEDCOM2XML.get(cal2, gen.lib.Date.CAL_GREGORIAN) + if cal1 != cal2: + #not supported by GRAMPS, import as text, we construct a string + # that the parser will not parse as a correct date + return GedLine.__DATE_CNV.parse('%sfrom %s%s to %s%s' % + (mod, data1, CALENDAR_MAP_WRONGSTRING.get(cal1, ''), + CALENDAR_MAP_WRONGSTRING.get(cal2, ''), data2)) + #add hebrew, ... calendar so that months are recognized + data1 += CALENDAR_MAP_PARSESTRING.get(cal1, '') + data2 += CALENDAR_MAP_PARSESTRING.get(cal2, '') + start = GedLine.__DATE_CNV.parse(data1) + stop = GedLine.__DATE_CNV.parse(data2) + dateobj.set(gen.lib.Date.QUAL_NONE, gen.lib.Date.MOD_SPAN, cal1, + start.get_start_date() + stop.get_start_date()) dateobj.set_quality(qual) return dateobj - # FIXME: explain where/why an IOError might arise - # and also: is such a long try-clause needed - # having this fallback invites "what about other exceptions?" - except IOError: - # fallback strategy (evidently) - return GedLine.__DATE_CNV.set_text(text) - + match = CAL.match(text) + if match: + (abt, call, data) = match.groups() + call = CALENDAR_MAP_GEDCOM2XML.get(call, gen.lib.Date.CAL_GREGORIAN) + data += CALENDAR_MAP_PARSESTRING.get(call, '') + if abt: + dateobj = GedLine.__DATE_CNV.parse("%s %s" % (abt, data)) + else: + dateobj = GedLine.__DATE_CNV.parse(data) + dateobj.set_quality(qual) + return dateobj + dateobj = GedLine.__DATE_CNV.parse(text) + dateobj.set_quality(qual) + return dateobj + def __init__(self, data): """ If the level is 0, then this is a top level instance. In this case, diff --git a/src/plugins/view/eventview.py b/src/plugins/view/eventview.py index 549f219a8..65dd3a062 100644 --- a/src/plugins/view/eventview.py +++ b/src/plugins/view/eventview.py @@ -84,6 +84,8 @@ class EventView(ListView): _('Last Changed'), _('Main Participants'), ] + # columns that contain markup + MARKUP_COLS = [COL_DATE] # default setting with visible columns, order of the col, and their size CONFIGSETTINGS = ( ('columns.visible', [COL_DESCR, COL_ID, COL_TYPE, COL_DATE, COL_PLACE]), @@ -115,7 +117,8 @@ class EventView(ListView): signal_map, dbstate.db.get_event_bookmarks(), Bookmarks.EventBookmarks, nav_group, multiple=True, - filter_class=EventSidebarFilter) + filter_class=EventSidebarFilter, + markup = EventView.MARKUP_COLS) self.func_list = { 'J' : self.jump,