From cca72aa5795cd09f1ff60ec97e4762a492f2772c Mon Sep 17 00:00:00 2001 From: Vassilii Khachaturov Date: Sun, 17 Nov 2013 16:12:43 +0200 Subject: [PATCH] 7212: convert invalid dates to text on import If the date is invalid, it will be converted to text mode, and the text field will be set to the problematic XML. A warning will be printed on the console as well. The log message will use the invalid date attached as DateError.date, see [8e1659]. --- gramps/plugins/importer/importxml.py | 41 +++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/gramps/plugins/importer/importxml.py b/gramps/plugins/importer/importxml.py index ede091250..5d451da4c 100644 --- a/gramps/plugins/importer/importxml.py +++ b/gramps/plugins/importer/importxml.py @@ -29,10 +29,12 @@ # Standard Python Modules # #------------------------------------------------------------------------- +from __future__ import unicode_literals import os import sys import time from xml.parsers.expat import ExpatError, ParserCreate +from xml.sax.saxutils import escape from gramps.gen.const import GRAMPS_LOCALE as glocale _ = glocale.translation.gettext import re @@ -47,7 +49,7 @@ LOG = logging.getLogger(".ImportXML") #------------------------------------------------------------------------- from gramps.gen.mime import get_type from gramps.gen.lib import (Address, Attribute, AttributeType, ChildRef, - ChildRefType, Citation, Date, Event, EventRef, + ChildRefType, Citation, Date, DateError, Event, EventRef, EventRoleType, EventType, Family, LdsOrd, Location, MediaObject, MediaRef, Name, NameOriginType, NameType, Note, NoteType, Person, PersonRef, @@ -2294,10 +2296,16 @@ class GrampsParser(UpdateCallback): else: newyear = Date.newyear_to_code(newyear) - date_value.set(qual, mode, cal, - (day, month, year, dualdated, - rng_day, rng_month, rng_year, dualdated), - newyear=newyear) + try: + date_value.set(qual, mode, cal, + (day, month, year, dualdated, + rng_day, rng_month, rng_year, dualdated), + newyear=newyear) + except DateError as e: + self._set_date_to_xml_text(date_value, e, + xml_element_name = ("datespan" if mode == Date.MOD_SPAN + else "daterange"), + xml_attrs = attrs) def start_dateval(self, attrs): if self.citation: @@ -2375,8 +2383,27 @@ class GrampsParser(UpdateCallback): else: newyear = Date.newyear_to_code(newyear) - date_value.set(qual, mod, cal, (day, month, year, dualdated), - newyear=newyear) + try: + date_value.set(qual, mod, cal, (day, month, year, dualdated), + newyear=newyear) + except DateError as e: + self._set_date_to_xml_text(date_value, e, 'dateval', attrs) + + def _set_date_to_xml_text(self, date_value, date_error, xml_element_name, xml_attrs): + """ + Common handling of invalid dates for the date... element handlers. + + Prints warning on console and sets date_value to a text-only date + with the problematic XML inside. + """ + xml = "<{element_name} {attrs}/>".format( + element_name = xml_element_name, + attrs = " ".join( + ['{}="{}"'.format(k,escape(v, entities={'"' : """})) + for k,v in xml_attrs.iteritems()])) + LOG.warning(_("Invalid date {} in XML {}, preserving XML as text" + ).format(date_error.date.to_struct(), xml)) + date_value.set(modifier=Date.MOD_TEXTONLY, text=xml) def start_datestr(self, attrs): if self.citation: