From 6c4a6f4d05cc1fcad9de9bb3281e359fefc99f32 Mon Sep 17 00:00:00 2001 From: Peter Landgren Date: Wed, 5 Jan 2011 18:11:50 +0000 Subject: [PATCH] Issue 4515, allow only 09, 0A, 0D in GEDCOM input. svn: r16340 --- src/plugins/lib/libgedcom.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/plugins/lib/libgedcom.py b/src/plugins/lib/libgedcom.py index 7e40a1ccb..add02d902 100644 --- a/src/plugins/lib/libgedcom.py +++ b/src/plugins/lib/libgedcom.py @@ -535,6 +535,9 @@ lds_status = { "UNCLEARED": gen.lib.LdsOrd.STATUS_UNCLEARED, } +# table for skipping illegal control chars in GEDCOM import +# Only 09, 0A, 0D are allowed. +strip_dict = dict.fromkeys(range(9)+range(11,13)+range(14, 32)) #------------------------------------------------------------------------- # @@ -1168,9 +1171,10 @@ class BaseReader(object): self.ifile.seek(0) def readline(self): - return unicode(self.ifile.readline(), + line = unicode(self.ifile.readline(), encoding=self.enc, errors='replace') + return line.strip().translate(strip_dict) class UTF8Reader(BaseReader): @@ -1185,9 +1189,10 @@ class UTF8Reader(BaseReader): self.ifile.seek(0) def readline(self): - return unicode(self.ifile.readline(), + line = unicode(self.ifile.readline(), encoding=self.enc, errors='replace') + return line.strip().translate(strip_dict) class UTF16Reader(BaseReader):