2007-02-15 Don Allingham <don@gramps-project.org>

* src/GrampsDbUtils/_GedcomParse.py: refactoring
	* src/GrampsDbUtils/_GedcomUtils.py: refactoring



svn: r8123
This commit is contained in:
Don Allingham 2007-02-16 00:07:24 +00:00
parent 7d2522f3b6
commit 91b7257b57
3 changed files with 99 additions and 82 deletions

View File

@ -1,3 +1,7 @@
2007-02-15 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_GedcomParse.py: refactoring
* src/GrampsDbUtils/_GedcomUtils.py: refactoring
2007-02-14 Brian Matherly <brian@gramps-project.org> 2007-02-14 Brian Matherly <brian@gramps-project.org>
* src/docgen/PSDrawDoc.py: remove draw_wedge from BaseDoc * src/docgen/PSDrawDoc.py: remove draw_wedge from BaseDoc
* src/ReportBase/_ReportUtils.py: remove draw_wedge from BaseDoc * src/ReportBase/_ReportUtils.py: remove draw_wedge from BaseDoc

View File

@ -122,7 +122,8 @@ from ansel_utf8 import ansel_to_utf8
from _GedcomInfo import * from _GedcomInfo import *
from _GedcomTokens import * from _GedcomTokens import *
from _GedcomLex import Reader from _GedcomLex import Reader
from _GedcomUtils import PlaceParser, IdFinder
import _GedcomUtils as GedcomUtils
from GrampsDb._GrampsDbConst import EVENT_KEY from GrampsDb._GrampsDbConst import EVENT_KEY
from BasicUtils import UpdateCallback from BasicUtils import UpdateCallback
@ -326,6 +327,8 @@ class NoteParser:
self.count = 0 self.count = 0
self.person_count = 0 self.person_count = 0
self.trans = None
self.groups = None
ifile.seek(0) ifile.seek(0)
innote = False innote = False
@ -397,10 +400,15 @@ class GedcomParser(UpdateCallback):
self.repo2id = {} self.repo2id = {}
self.maxpeople = people self.maxpeople = people
self.db = dbase self.db = dbase
self.emapper = IdFinder(dbase.get_gramps_ids(EVENT_KEY), self.emapper = GedcomUtils.IdFinder(dbase.get_gramps_ids(EVENT_KEY),
dbase.eprefix) dbase.eprefix)
self.place_parser = PlaceParser() self.fam_count = 0
self.indi_count = 0
self.repo_count = 0
self.source_count = 0
self.place_parser = GedcomUtils.PlaceParser()
self.debug = False self.debug = False
self.person = None self.person = None
self.inline_srcs = {} self.inline_srcs = {}
@ -829,24 +837,6 @@ class GedcomParser(UpdateCallback):
self.backup() self.backup()
return done return done
def parse_name_personal(self, text):
name = RelLib.Name()
m = SURNAME_RE.match(text)
if m:
names = m.groups()
name.set_first_name(names[1].strip())
name.set_surname(names[0].strip())
else:
try:
names = NAME_RE.match(text).groups()
name.set_first_name(names[0].strip())
name.set_surname(names[2].strip())
name.set_suffix(names[4].strip())
except:
name.set_first_name(text.strip())
return name
def get_next(self): def get_next(self):
if not self.backoff: if not self.backoff:
self.groups = self.lexer.readline() self.groups = self.lexer.readline()
@ -890,10 +880,6 @@ class GedcomParser(UpdateCallback):
self.trans = self.db.transaction_begin("", not use_trans, no_magic) self.trans = self.db.transaction_begin("", not use_trans, no_magic)
self.db.disable_signals() self.db.disable_signals()
self.fam_count = 0
self.indi_count = 0
self.repo_count = 0
self.source_count = 0
self.parse_header() self.parse_header()
self.parse_submitter() self.parse_submitter()
if self.use_def_src: if self.use_def_src:
@ -1102,7 +1088,7 @@ class GedcomParser(UpdateCallback):
# build a RelLib.Name structure from the text # build a RelLib.Name structure from the text
name = self.parse_name_personal(line.data) name = GedcomUtils.parse_name_personal(line.data)
# Add the name as the primary name if this is the first one that # Add the name as the primary name if this is the first one that
# we have encountered for this person. Assume that if this is the # we have encountered for this person. Assume that if this is the
@ -1469,7 +1455,7 @@ class GedcomParser(UpdateCallback):
sub_state.lds_ord = RelLib.LdsOrd() sub_state.lds_ord = RelLib.LdsOrd()
sub_state.lds_ord.set_type(lds_type) sub_state.lds_ord.set_type(lds_type)
sub_state.place = None sub_state.place = None
sub_state.place_fields = PlaceParser() sub_state.place_fields = GedcomUtils.PlaceParser()
state.person.lds_ord_list.append(sub_state.lds_ord) state.person.lds_ord_list.append(sub_state.lds_ord)
self.parse_level(sub_state, self.lds_parse_tbl, self.func_ignore) self.parse_level(sub_state, self.lds_parse_tbl, self.func_ignore)
@ -1511,7 +1497,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
gid = self.extract_gramps_id(line.data) gid = GedcomUtils.extract_id(line.data)
state.lds_ord.set_family_handle(self.find_family_handle(gid)) state.lds_ord.set_family_handle(self.find_family_handle(gid))
def func_lds_form(self, line, state): def func_lds_form(self, line, state):
@ -1524,7 +1510,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
state.pf = PlaceParser(line) state.pf = GedcomUtils.PlaceParser(line)
def func_lds_plac(self, line, state): def func_lds_plac(self, line, state):
""" """
@ -1541,7 +1527,7 @@ class GedcomParser(UpdateCallback):
state.place.set_title(line.data) state.place.set_title(line.data)
state.lds_ord.set_place_handle(state.place.handle) state.lds_ord.set_place_handle(state.place.handle)
except NameError: except NameError:
pass return
def func_lds_sour(self, line, state): def func_lds_sour(self, line, state):
""" """
@ -1564,7 +1550,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
note = self.parse_note(line, state.lds_ord, state.level+1, '') self.parse_note(line, state.lds_ord, state.level+1, '')
def func_lds_stat(self, line, state): def func_lds_stat(self, line, state):
""" """
@ -1600,7 +1586,7 @@ class GedcomParser(UpdateCallback):
sub_state.primary = False sub_state.primary = False
notelist = [] notelist = []
gid = self.extract_gramps_id(line.data) gid = GedcomUtils.extract_id(line.data)
handle = self.find_family_handle(gid) handle = self.find_family_handle(gid)
self.parse_level(sub_state, self.famc_parse_tbl, self.parse_level(sub_state, self.famc_parse_tbl,
@ -1704,7 +1690,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
handle = self.find_family_handle(self.extract_gramps_id(line.data)) handle = self.find_family_handle(GedcomUtils.extract_id(line.data))
state.person.add_family_handle(handle) state.person.add_family_handle(handle)
state.add_to_note(self.parse_optional_note(2)) state.add_to_note(self.parse_optional_note(2))
@ -1734,7 +1720,7 @@ class GedcomParser(UpdateCallback):
""" """
# find the id and person that we are referencing # find the id and person that we are referencing
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
handle = self.find_person_handle(self.map_gid(gid)) handle = self.find_person_handle(self.map_gid(gid))
# create a new PersonRef, and assign the handle, add the # create a new PersonRef, and assign the handle, add the
@ -1883,7 +1869,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
handle = self.find_person_handle(self.map_gid(gid)) handle = self.find_person_handle(self.map_gid(gid))
state.family.set_father_handle(handle) state.family.set_father_handle(handle)
@ -1898,7 +1884,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
handle = self.find_person_handle(self.map_gid(gid)) handle = self.find_person_handle(self.map_gid(gid))
state.family.set_mother_handle(handle) state.family.set_mother_handle(handle)
@ -1976,7 +1962,7 @@ class GedcomParser(UpdateCallback):
""" """
mrel, frel = self.parse_ftw_relations(state.level+1) mrel, frel = self.parse_ftw_relations(state.level+1)
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
child = self.find_or_create_person(self.map_gid(gid)) child = self.find_or_create_person(self.map_gid(gid))
reflist = [ ref for ref in state.family.get_child_ref_list() \ reflist = [ ref for ref in state.family.get_child_ref_list() \
@ -2014,7 +2000,7 @@ class GedcomParser(UpdateCallback):
sub_state.lds_ord = RelLib.LdsOrd() sub_state.lds_ord = RelLib.LdsOrd()
sub_state.lds_ord.set_type(RelLib.LdsOrd.SEAL_TO_SPOUSE) sub_state.lds_ord.set_type(RelLib.LdsOrd.SEAL_TO_SPOUSE)
sub_state.place = None sub_state.place = None
sub_state.place_fields = PlaceParser() sub_state.place_fields = GedcomUtils.PlaceParser()
state.family.lds_ord_list.append(sub_state.lds_ord) state.family.lds_ord_list.append(sub_state.lds_ord)
self.parse_level(sub_state, self.lds_parse_tbl, self.func_ignore) self.parse_level(sub_state, self.lds_parse_tbl, self.func_ignore)
@ -2382,7 +2368,7 @@ class GedcomParser(UpdateCallback):
TOKEN_IGNORE): TOKEN_IGNORE):
self.skip_subordinate_levels(level+1) self.skip_subordinate_levels(level+1)
elif line.token == TOKEN_RIN: elif line.token == TOKEN_RIN:
pass continue
else: else:
self.not_recognized(level+1) self.not_recognized(level+1)
@ -2409,7 +2395,7 @@ class GedcomParser(UpdateCallback):
frel = TYPE_BIRTH frel = TYPE_BIRTH
# Legacy _PREF # Legacy _PREF
elif line.token == TOKEN__PRIMARY: elif line.token == TOKEN__PRIMARY:
pass continue
else: else:
self.not_recognized(level+1) self.not_recognized(level+1)
return (mrel, frel) return (mrel, frel)
@ -2469,14 +2455,6 @@ class GedcomParser(UpdateCallback):
def parse_comment(self, line, obj, level, old_note): def parse_comment(self, line, obj, level, old_note):
return self.parse_note_base(line, obj, level, old_note, obj.set_note) return self.parse_note_base(line, obj, level, old_note, obj.set_note)
def extract_gramps_id(self, value):
"""
Extracts a value to use for the GRAMPS ID value from the GEDCOM
reference token. The value should be in the form of @XXX@, and the
returned value will be XXX
"""
return value.strip()[1:-1]
def parse_obje(self, line): def parse_obje(self, line):
""" """
n @XREF:OBJE@ OBJE {1:1} n @XREF:OBJE@ OBJE {1:1}
@ -2491,8 +2469,8 @@ class GedcomParser(UpdateCallback):
+1 RIN <AUTOMATED_RECORD_ID> {0:1} p.* +1 RIN <AUTOMATED_RECORD_ID> {0:1} p.*
+1 <<CHANGE_DATE>> {0:1} p.* +1 <<CHANGE_DATE>> {0:1} p.*
""" """
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
self.media = self.find_or_create_object(self.map_gid(gid[1:-1])) self.media = self.find_or_create_object(self.map_gid(gid))
while True: while True:
line = self.get_next() line = self.get_next()
@ -2584,7 +2562,7 @@ class GedcomParser(UpdateCallback):
elif line.token == TOKEN_NOTE: elif line.token == TOKEN_NOTE:
note = self.parse_note(line, address, level+1, '') note = self.parse_note(line, address, level+1, '')
elif line.token in (TOKEN__LOC, TOKEN__NAME): elif line.token in (TOKEN__LOC, TOKEN__NAME):
pass # ignore unsupported extended location syntax continue # ignore unsupported extended location syntax
elif line.token in (TOKEN_IGNORE, TOKEN_TYPE, TOKEN_CAUS): elif line.token in (TOKEN_IGNORE, TOKEN_TYPE, TOKEN_CAUS):
self.skip_subordinate_levels(level+1) self.skip_subordinate_levels(level+1)
else: else:
@ -2719,7 +2697,7 @@ class GedcomParser(UpdateCallback):
note = self.parse_note_simple(line, level+1) note = self.parse_note_simple(line, level+1)
added = True added = True
elif line.token in (TOKEN__LOC, TOKEN__NAME, TOKEN_PHON): elif line.token in (TOKEN__LOC, TOKEN__NAME, TOKEN_PHON):
pass # ignore unsupported extended location syntax continue # ignore unsupported extended location syntax
else: else:
self.not_recognized(level+1) self.not_recognized(level+1)
if added: if added:
@ -2902,7 +2880,7 @@ class GedcomParser(UpdateCallback):
note = self.parse_note(line, place, level+1, '') note = self.parse_note(line, place, level+1, '')
place.set_note(note) place.set_note(note)
elif line.token == TOKEN_FORM: elif line.token == TOKEN_FORM:
pf = PlaceParser(line) pf = GedcomUtils.PlaceParser(line)
elif line.token == TOKEN_OBJE: elif line.token == TOKEN_OBJE:
self.func_place_object(line, place, level+1) self.func_place_object(line, place, level+1)
elif line.token == TOKEN_SOUR: elif line.token == TOKEN_SOUR:
@ -3275,7 +3253,7 @@ class GedcomParser(UpdateCallback):
if self.refn.has_key(pid): if self.refn.has_key(pid):
val = self.refn[pid] val = self.refn[pid]
new_key = prefix % val new_key = prefix % val
new_pmax = max(new_pmax,val) new_pmax = max(new_pmax, val)
person = self.db.get_person_from_handle(pid, self.trans) person = self.db.get_person_from_handle(pid, self.trans)
@ -3369,7 +3347,7 @@ class GedcomParser(UpdateCallback):
multiple NAME indicators, which is the correct way of handling multiple NAME indicators, which is the correct way of handling
multiple names. multiple names.
""" """
name = self.parse_name_personal(line.data) name = GedcomUtils.parse_name_personal(line.data)
name.set_type(RelLib.NameType.AKA) name.set_type(RelLib.NameType.AKA)
state.person.add_alternate_name(name) state.person.add_alternate_name(name)
@ -3583,7 +3561,7 @@ class GedcomParser(UpdateCallback):
name.set_type(RelLib.NameType.MARRIED) name.set_type(RelLib.NameType.MARRIED)
state.person.add_alternate_name(name) state.person.add_alternate_name(name)
elif len(data) > 1: elif len(data) > 1:
name = self.parse_name_personal(text) name = GedcomUtils.parse_name_personal(text)
name.set_type(RelLib.NameType.MARRIED) name.set_type(RelLib.NameType.MARRIED)
state.person.add_alternate_name(name) state.person.add_alternate_name(name)
@ -3752,10 +3730,10 @@ class GedcomParser(UpdateCallback):
source.set_abbreviation(line.data) source.set_abbreviation(line.data)
def func_source_agnc(self, line, source, level): def func_source_agnc(self, line, source, level):
a = RelLib.Attribute() attr = RelLib.Attribute()
a.set_type(RelLib.AttributeType.AGENCY) attr.set_type(RelLib.AttributeType.AGENCY)
a.set_value(line.data) attr.set_value(line.data)
source.add_attribute(a) source.add_attribute(attr)
def func_source_text(self, line, source, level): def func_source_text(self, line, source, level):
source.set_note(line.data) source.set_note(line.data)
@ -3783,8 +3761,8 @@ class GedcomParser(UpdateCallback):
self.skip_subordinate_levels(level+1) self.skip_subordinate_levels(level+1)
def func_obje_file(self, line, media, level): def func_obje_file(self, line, media, level):
(ok, filename) = self.find_file(line.data, self.dir_path) (file_ok, filename) = self.find_file(line.data, self.dir_path)
if not ok: if not file_ok:
self.warn(_("Could not import %s") % filename[0]) self.warn(_("Could not import %s") % filename[0])
path = filename[0].replace('\\', os.path.sep) path = filename[0].replace('\\', os.path.sep)
media.set_path(path) media.set_path(path)
@ -3827,12 +3805,14 @@ class GedcomParser(UpdateCallback):
elif LdsUtils.temple_codes.has_key(code): elif LdsUtils.temple_codes.has_key(code):
return LdsUtils.temple_codes[code] return LdsUtils.temple_codes[code]
c = get_code(line.data) code = get_code(line.data)
if c: return c if code:
return code
## Not sure why we do this. Kind of ugly. ## Not sure why we do this. Kind of ugly.
c = get_code(line.data.split()[0]) code = get_code(line.data.split()[0])
if c: return c if code:
return code
## Okay we have no clue which temple this is. ## Okay we have no clue which temple this is.
## We should tell the user and store it anyway. ## We should tell the user and store it anyway.

View File

@ -18,6 +18,8 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# #
import re
import RelLib import RelLib
class PlaceParser: class PlaceParser:
@ -90,6 +92,37 @@ class IdFinder:
self.index += 1 self.index += 1
return index return index
#------------------------------------------------------------------------
#
# Support functions
#
#------------------------------------------------------------------------
NAME_RE = re.compile(r"/?([^/]*)(/([^/]*)(/([^/]*))?)?")
SURNAME_RE = re.compile(r"/([^/]*)/([^/]*)")
def parse_name_personal(self, text):
name = RelLib.Name()
m = SURNAME_RE.match(text)
if m:
names = m.groups()
name.set_first_name(names[1].strip())
name.set_surname(names[0].strip())
else:
try:
names = NAME_RE.match(text).groups()
name.set_first_name(names[0].strip())
name.set_surname(names[2].strip())
name.set_suffix(names[4].strip())
except:
name.set_first_name(text.strip())
return name
def extract_id(self):
"""
Extracts a value to use for the GRAMPS ID value from the GEDCOM
reference token. The value should be in the form of @XXX@, and the
returned value will be XXX
"""
return value.strip()[1:-1]