From 1b81ba78dee04addf9bd61c79175fc1a39879d6b Mon Sep 17 00:00:00 2001 From: Martin Hawlisch Date: Tue, 10 Apr 2007 19:22:44 +0000 Subject: [PATCH] * src/plugins/ImportGeneWeb.py: Some parsing enhancements svn: r8375 --- gramps2/ChangeLog | 3 +++ gramps2/src/plugins/ImportGeneWeb.py | 34 ++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/gramps2/ChangeLog b/gramps2/ChangeLog index 0dfb4e61b..29b0ab6c6 100644 --- a/gramps2/ChangeLog +++ b/gramps2/ChangeLog @@ -1,3 +1,6 @@ +2007-04-10 Martin Hawlisch + * src/plugins/ImportGeneWeb.py: Some parsing enhancements + 2007-04-09 Don Allingham * src/plugins/Check.py (CheckIntegrity.check_events): don't require same gender for civil unions diff --git a/gramps2/src/plugins/ImportGeneWeb.py b/gramps2/src/plugins/ImportGeneWeb.py index e5869c70f..40be67e5c 100644 --- a/gramps2/src/plugins/ImportGeneWeb.py +++ b/gramps2/src/plugins/ImportGeneWeb.py @@ -108,6 +108,7 @@ class GeneWebParser: self.db = dbase self.f = open(file,"rU") self.filename = file + self.encoding = 'iso-8859-1' def get_next_line(self): self.lineno += 1 @@ -116,7 +117,7 @@ class GeneWebParser: try: line = unicode(line.strip()) except UnicodeDecodeError: - line = unicode(line.strip(),'iso-8859-1') + line = unicode(line.strip(),self.encoding) else: line = None return line @@ -159,7 +160,7 @@ class GeneWebParser: self.read_relationship_person(line,fields) elif fields[0] == "src": self.read_source_line(line,fields) - elif fields[0] == "wit": + elif fields[0] in ("wit", "wit:"): self.read_witness_line(line,fields) elif fields[0] == "cbp": self.read_children_birthplace_line(line,fields) @@ -172,9 +173,13 @@ class GeneWebParser: elif fields[0] == "comm": self.read_family_comment(line,fields) elif fields[0] == "notes": - self.read_notes_lines(line,fields) + self.read_person_notes_lines(line,fields) + elif fields[0] == "notes-db": + self.read_database_notes_lines(line,fields) elif fields[0] == "end": self.current_mode = None + elif fields[0] == "encoding:": + self.encoding = fields[1] else: print "parse_geneweb_file(): Token >%s< unknown. line %d skipped: %s" % (fields[0],self.lineno,line) except Errors.GedcomError, err: @@ -373,8 +378,7 @@ class GeneWebParser: self.db.commit_family(self.current_family,self.trans) return None - def read_notes_lines(self,line,fields): - (idx,person) = self.parse_person(fields,1,None,None) + def _read_notes_lines(self,note_tag): note_txt = "" while True: line = self.get_next_line() @@ -382,7 +386,7 @@ class GeneWebParser: break fields = line.split(" ") - if fields[0] == "end" and fields[1] == "notes": + if fields[0] == "end" and fields[1] == note_tag: break elif fields[0] == "beg": continue @@ -391,11 +395,19 @@ class GeneWebParser: note_txt = note_txt + "\n" + line else: note_txt = note_txt + line + return note_txt + + def read_person_notes_lines(self,line,fields): + (idx,person) = self.parse_person(fields,1,None,None) + note_txt = self._read_notes_lines( fields[0]) if note_txt: person.set_note(note_txt) self.db.commit_person(person,self.trans) - return None - + + def read_database_notes_lines(self,line,fields): + note_txt = self._read_notes_lines( fields[0]) + # currently does nothing. Could probably be added to a common source + def parse_marriage(self,fields,idx): mariageDataRe = re.compile("^[+#-0-9].*$") @@ -419,7 +431,7 @@ class GeneWebParser: while idx < len(fields) and mariageDataRe.match(fields[idx]): if fields[idx][0] == "+": - mar_date = self.parse_date(self.decode(fields[idx])) + mar_date = self.parse_date(self.decode(fields[idx][1:])) self.debug(" Married at: %s" % fields[idx]) idx = idx + 1 elif fields[idx][0] == "-": @@ -687,7 +699,9 @@ class GeneWebParser: else: self.debug("Death Date: %s" % fields[idx]) death_date = self.parse_date(self.decode(fields[idx])) - if fields[idx][0] == "k": + if fields[idx] == "mj": + death_cause = "Died joung" + elif fields[idx][0] == "k": death_cause = "Killed" elif fields[idx][0] == "m": death_cause = "Murdered"