From 76e66f6104f3ea5a2263a29dd30021935e02b92f Mon Sep 17 00:00:00 2001 From: Peter Landgren Date: Wed, 5 Jan 2011 18:12:15 +0000 Subject: [PATCH] Issue 4515, allow only 09, 0A, 0D in GEDCOM input. svn: r16341 --- src/plugins/lib/libgedcom.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/plugins/lib/libgedcom.py b/src/plugins/lib/libgedcom.py index 4c56c241e..f1b416fbe 100644 --- a/src/plugins/lib/libgedcom.py +++ b/src/plugins/lib/libgedcom.py @@ -531,6 +531,9 @@ lds_status = { "UNCLEARED": gen.lib.LdsOrd.STATUS_UNCLEARED, } +# table for skipping illegal control chars in GEDCOM import +# Only 09, 0A, 0D are allowed. +strip_dict = dict.fromkeys(range(9)+range(11,13)+range(14, 32)) #------------------------------------------------------------------------- # @@ -1164,9 +1167,10 @@ class BaseReader(object): self.ifile.seek(0) def readline(self): - return unicode(self.ifile.readline(), + line = unicode(self.ifile.readline(), encoding=self.enc, errors='replace') + return line.strip().translate(strip_dict) class UTF8Reader(BaseReader): @@ -1181,9 +1185,10 @@ class UTF8Reader(BaseReader): self.ifile.seek(0) def readline(self): - return unicode(self.ifile.readline(), + line = unicode(self.ifile.readline(), encoding=self.enc, errors='replace') + return line.strip().translate(strip_dict) class UTF16Reader(BaseReader):