From 57e7b3419a507c3b64eab129170e6acc88a2b3ba Mon Sep 17 00:00:00 2001 From: Don Allingham Date: Fri, 23 Feb 2007 22:56:41 +0000 Subject: [PATCH] 2007-02-23 Don Allingham * src/GrampsDbUtils/_GedcomParse.py: handle error cases * src/GrampsDbUtils/_GedcomLex.py: handle error cases * src/GrampsDbUtils/_GedcomUtils.py: handle error cases svn: r8219 --- ChangeLog | 5 +++++ src/GrampsDbUtils/_GedcomLex.py | 4 ++-- src/GrampsDbUtils/_GedcomParse.py | 15 +++++++++++++-- src/GrampsDbUtils/_GedcomUtils.py | 2 +- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5b370a10d..8d227ee6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2007-02-23 Don Allingham + * src/GrampsDbUtils/_GedcomParse.py: handle error cases + * src/GrampsDbUtils/_GedcomLex.py: handle error cases + * src/GrampsDbUtils/_GedcomUtils.py: handle error cases + 2007-02-23 Alex Roitman * src/GrampsDb/_GrampsBSDDB.py (convert_notes_13): Skip empty notes. diff --git a/src/GrampsDbUtils/_GedcomLex.py b/src/GrampsDbUtils/_GedcomLex.py index 01a76eda5..6c8ff988a 100644 --- a/src/GrampsDbUtils/_GedcomLex.py +++ b/src/GrampsDbUtils/_GedcomLex.py @@ -68,7 +68,7 @@ def latin_to_utf8(s): return unicode(s,'iso-8859-1') def nocnv(s): - return unicode(s) + return unicode(s,errors='replace') #------------------------------------------------------------------------- # @@ -400,7 +400,7 @@ class Reader: except: line = self.cnv(line.translate(_transtable2)) else: - line = unicode(line) + line = unicode(line,errors='replace') line = line.split(None,2) + [''] diff --git a/src/GrampsDbUtils/_GedcomParse.py b/src/GrampsDbUtils/_GedcomParse.py index e740100ad..15a302a29 100644 --- a/src/GrampsDbUtils/_GedcomParse.py +++ b/src/GrampsDbUtils/_GedcomParse.py @@ -281,7 +281,15 @@ class StageOne: try: (level, key, value) = data[:3] value = value.strip() - level = int(level) + # convert the first value to an integer. We have to be a bit + # careful here, since some GEDCOM files have garbage characters + # at the front of the first file if they are unicode encoded. + # So, if we have a failure to convert, check the last character + # of the string, which shoul de a '0' + try: + level = int(level) + except: + level = int(level[-1]) key = key.strip() except: raise Errors.GedcomError("Corrupted file at line %d" % self.lcnt) @@ -362,7 +370,10 @@ class GedcomParser(UpdateCallback): if self.use_def_src: self.def_src = RelLib.Source() fname = os.path.basename(filename).split('\\')[-1] - self.def_src.set_title(_("Import from GEDCOM") % unicode(fname)) + self.def_src.set_title(_("Import from GEDCOM (%s)") % + unicode(fname, + encoding=sys.getfilesystemencoding(), + errors='replace')) self.dir_path = os.path.dirname(filename) self.is_ftw = False self.is_ancestry_com = False diff --git a/src/GrampsDbUtils/_GedcomUtils.py b/src/GrampsDbUtils/_GedcomUtils.py index 4e671d0e4..bde7bec61 100644 --- a/src/GrampsDbUtils/_GedcomUtils.py +++ b/src/GrampsDbUtils/_GedcomUtils.py @@ -150,7 +150,7 @@ class IdMapper: def no_translate(self, gid): return self.clean(gid) - def get_translate(self, id): + def get_translate(self, gid): gid = self.clean(gid) new_id = self.swap.has_key(gid) if new_id: