* src/plugins/ImportGeneWeb.py: Some parsing enhancements

svn: r8375
This commit is contained in:
Martin Hawlisch 2007-04-10 19:22:44 +00:00
parent 26d85d8ef2
commit 1b81ba78de
2 changed files with 27 additions and 10 deletions

View File

@ -1,3 +1,6 @@
2007-04-10 Martin Hawlisch <Martin.Hawlisch@gmx.de>
* src/plugins/ImportGeneWeb.py: Some parsing enhancements
2007-04-09 Don Allingham <don@gramps-project.org> 2007-04-09 Don Allingham <don@gramps-project.org>
* src/plugins/Check.py (CheckIntegrity.check_events): don't require same gender for * src/plugins/Check.py (CheckIntegrity.check_events): don't require same gender for
civil unions civil unions

View File

@ -108,6 +108,7 @@ class GeneWebParser:
self.db = dbase self.db = dbase
self.f = open(file,"rU") self.f = open(file,"rU")
self.filename = file self.filename = file
self.encoding = 'iso-8859-1'
def get_next_line(self): def get_next_line(self):
self.lineno += 1 self.lineno += 1
@ -116,7 +117,7 @@ class GeneWebParser:
try: try:
line = unicode(line.strip()) line = unicode(line.strip())
except UnicodeDecodeError: except UnicodeDecodeError:
line = unicode(line.strip(),'iso-8859-1') line = unicode(line.strip(),self.encoding)
else: else:
line = None line = None
return line return line
@ -159,7 +160,7 @@ class GeneWebParser:
self.read_relationship_person(line,fields) self.read_relationship_person(line,fields)
elif fields[0] == "src": elif fields[0] == "src":
self.read_source_line(line,fields) self.read_source_line(line,fields)
elif fields[0] == "wit": elif fields[0] in ("wit", "wit:"):
self.read_witness_line(line,fields) self.read_witness_line(line,fields)
elif fields[0] == "cbp": elif fields[0] == "cbp":
self.read_children_birthplace_line(line,fields) self.read_children_birthplace_line(line,fields)
@ -172,9 +173,13 @@ class GeneWebParser:
elif fields[0] == "comm": elif fields[0] == "comm":
self.read_family_comment(line,fields) self.read_family_comment(line,fields)
elif fields[0] == "notes": elif fields[0] == "notes":
self.read_notes_lines(line,fields) self.read_person_notes_lines(line,fields)
elif fields[0] == "notes-db":
self.read_database_notes_lines(line,fields)
elif fields[0] == "end": elif fields[0] == "end":
self.current_mode = None self.current_mode = None
elif fields[0] == "encoding:":
self.encoding = fields[1]
else: else:
print "parse_geneweb_file(): Token >%s< unknown. line %d skipped: %s" % (fields[0],self.lineno,line) print "parse_geneweb_file(): Token >%s< unknown. line %d skipped: %s" % (fields[0],self.lineno,line)
except Errors.GedcomError, err: except Errors.GedcomError, err:
@ -373,8 +378,7 @@ class GeneWebParser:
self.db.commit_family(self.current_family,self.trans) self.db.commit_family(self.current_family,self.trans)
return None return None
def read_notes_lines(self,line,fields): def _read_notes_lines(self,note_tag):
(idx,person) = self.parse_person(fields,1,None,None)
note_txt = "" note_txt = ""
while True: while True:
line = self.get_next_line() line = self.get_next_line()
@ -382,7 +386,7 @@ class GeneWebParser:
break break
fields = line.split(" ") fields = line.split(" ")
if fields[0] == "end" and fields[1] == "notes": if fields[0] == "end" and fields[1] == note_tag:
break break
elif fields[0] == "beg": elif fields[0] == "beg":
continue continue
@ -391,11 +395,19 @@ class GeneWebParser:
note_txt = note_txt + "\n" + line note_txt = note_txt + "\n" + line
else: else:
note_txt = note_txt + line note_txt = note_txt + line
return note_txt
def read_person_notes_lines(self,line,fields):
(idx,person) = self.parse_person(fields,1,None,None)
note_txt = self._read_notes_lines( fields[0])
if note_txt: if note_txt:
person.set_note(note_txt) person.set_note(note_txt)
self.db.commit_person(person,self.trans) self.db.commit_person(person,self.trans)
return None
def read_database_notes_lines(self,line,fields):
note_txt = self._read_notes_lines( fields[0])
# currently does nothing. Could probably be added to a common source
def parse_marriage(self,fields,idx): def parse_marriage(self,fields,idx):
mariageDataRe = re.compile("^[+#-0-9].*$") mariageDataRe = re.compile("^[+#-0-9].*$")
@ -419,7 +431,7 @@ class GeneWebParser:
while idx < len(fields) and mariageDataRe.match(fields[idx]): while idx < len(fields) and mariageDataRe.match(fields[idx]):
if fields[idx][0] == "+": if fields[idx][0] == "+":
mar_date = self.parse_date(self.decode(fields[idx])) mar_date = self.parse_date(self.decode(fields[idx][1:]))
self.debug(" Married at: %s" % fields[idx]) self.debug(" Married at: %s" % fields[idx])
idx = idx + 1 idx = idx + 1
elif fields[idx][0] == "-": elif fields[idx][0] == "-":
@ -687,7 +699,9 @@ class GeneWebParser:
else: else:
self.debug("Death Date: %s" % fields[idx]) self.debug("Death Date: %s" % fields[idx])
death_date = self.parse_date(self.decode(fields[idx])) death_date = self.parse_date(self.decode(fields[idx]))
if fields[idx][0] == "k": if fields[idx] == "mj":
death_cause = "Died joung"
elif fields[idx][0] == "k":
death_cause = "Killed" death_cause = "Killed"
elif fields[idx][0] == "m": elif fields[idx][0] == "m":
death_cause = "Murdered" death_cause = "Murdered"