2007-02-15 Don Allingham <don@gramps-project.org>

* src/GrampsDbUtils/_GedcomParse.py: refactoring
	* src/GrampsDbUtils/_GedcomUtils.py: refactoring



svn: r8123
This commit is contained in:
Don Allingham 2007-02-16 00:07:24 +00:00
parent 7d2522f3b6
commit 91b7257b57
3 changed files with 99 additions and 82 deletions

View File

@ -1,3 +1,7 @@
2007-02-15 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_GedcomParse.py: refactoring
* src/GrampsDbUtils/_GedcomUtils.py: refactoring
2007-02-14 Brian Matherly <brian@gramps-project.org> 2007-02-14 Brian Matherly <brian@gramps-project.org>
* src/docgen/PSDrawDoc.py: remove draw_wedge from BaseDoc * src/docgen/PSDrawDoc.py: remove draw_wedge from BaseDoc
* src/ReportBase/_ReportUtils.py: remove draw_wedge from BaseDoc * src/ReportBase/_ReportUtils.py: remove draw_wedge from BaseDoc

View File

@ -122,7 +122,8 @@ from ansel_utf8 import ansel_to_utf8
from _GedcomInfo import * from _GedcomInfo import *
from _GedcomTokens import * from _GedcomTokens import *
from _GedcomLex import Reader from _GedcomLex import Reader
from _GedcomUtils import PlaceParser, IdFinder
import _GedcomUtils as GedcomUtils
from GrampsDb._GrampsDbConst import EVENT_KEY from GrampsDb._GrampsDbConst import EVENT_KEY
from BasicUtils import UpdateCallback from BasicUtils import UpdateCallback
@ -326,6 +327,8 @@ class NoteParser:
self.count = 0 self.count = 0
self.person_count = 0 self.person_count = 0
self.trans = None
self.groups = None
ifile.seek(0) ifile.seek(0)
innote = False innote = False
@ -397,10 +400,15 @@ class GedcomParser(UpdateCallback):
self.repo2id = {} self.repo2id = {}
self.maxpeople = people self.maxpeople = people
self.db = dbase self.db = dbase
self.emapper = IdFinder(dbase.get_gramps_ids(EVENT_KEY), self.emapper = GedcomUtils.IdFinder(dbase.get_gramps_ids(EVENT_KEY),
dbase.eprefix) dbase.eprefix)
self.place_parser = PlaceParser() self.fam_count = 0
self.indi_count = 0
self.repo_count = 0
self.source_count = 0
self.place_parser = GedcomUtils.PlaceParser()
self.debug = False self.debug = False
self.person = None self.person = None
self.inline_srcs = {} self.inline_srcs = {}
@ -829,24 +837,6 @@ class GedcomParser(UpdateCallback):
self.backup() self.backup()
return done return done
def parse_name_personal(self, text):
name = RelLib.Name()
m = SURNAME_RE.match(text)
if m:
names = m.groups()
name.set_first_name(names[1].strip())
name.set_surname(names[0].strip())
else:
try:
names = NAME_RE.match(text).groups()
name.set_first_name(names[0].strip())
name.set_surname(names[2].strip())
name.set_suffix(names[4].strip())
except:
name.set_first_name(text.strip())
return name
def get_next(self): def get_next(self):
if not self.backoff: if not self.backoff:
self.groups = self.lexer.readline() self.groups = self.lexer.readline()
@ -890,10 +880,6 @@ class GedcomParser(UpdateCallback):
self.trans = self.db.transaction_begin("", not use_trans, no_magic) self.trans = self.db.transaction_begin("", not use_trans, no_magic)
self.db.disable_signals() self.db.disable_signals()
self.fam_count = 0
self.indi_count = 0
self.repo_count = 0
self.source_count = 0
self.parse_header() self.parse_header()
self.parse_submitter() self.parse_submitter()
if self.use_def_src: if self.use_def_src:
@ -1102,7 +1088,7 @@ class GedcomParser(UpdateCallback):
# build a RelLib.Name structure from the text # build a RelLib.Name structure from the text
name = self.parse_name_personal(line.data) name = GedcomUtils.parse_name_personal(line.data)
# Add the name as the primary name if this is the first one that # Add the name as the primary name if this is the first one that
# we have encountered for this person. Assume that if this is the # we have encountered for this person. Assume that if this is the
@ -1296,7 +1282,7 @@ class GedcomParser(UpdateCallback):
self.parse_level(sub_state, self.resi_parse_tbl, self.parse_level(sub_state, self.resi_parse_tbl,
self.func_person_unknown) self.func_person_unknown)
def func_person_resi_date(self, line, state): def func_person_resi_date(self, line, state):
""" """
Sets the date on the address associated with and Address. Sets the date on the address associated with and Address.
@ -1347,7 +1333,7 @@ class GedcomParser(UpdateCallback):
self.parse_level(state, self.parse_addr_tbl, self.func_ignore) self.parse_level(state, self.parse_addr_tbl, self.func_ignore)
#self.parse_address(state.addr, state.level+1) #self.parse_address(state.addr, state.level+1)
def func_person_resi_phon(self, line, state): def func_person_resi_phon(self, line, state):
""" """
Parses the source connected to a PHON tag Parses the source connected to a PHON tag
@ -1469,7 +1455,7 @@ class GedcomParser(UpdateCallback):
sub_state.lds_ord = RelLib.LdsOrd() sub_state.lds_ord = RelLib.LdsOrd()
sub_state.lds_ord.set_type(lds_type) sub_state.lds_ord.set_type(lds_type)
sub_state.place = None sub_state.place = None
sub_state.place_fields = PlaceParser() sub_state.place_fields = GedcomUtils.PlaceParser()
state.person.lds_ord_list.append(sub_state.lds_ord) state.person.lds_ord_list.append(sub_state.lds_ord)
self.parse_level(sub_state, self.lds_parse_tbl, self.func_ignore) self.parse_level(sub_state, self.lds_parse_tbl, self.func_ignore)
@ -1491,7 +1477,7 @@ class GedcomParser(UpdateCallback):
if value: if value:
state.lds_ord.set_temple(value) state.lds_ord.set_temple(value)
def func_lds_date(self, line, state): def func_lds_date(self, line, state):
""" """
Parses the DATE tag for the LdsOrd Parses the DATE tag for the LdsOrd
@ -1511,10 +1497,10 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
gid = self.extract_gramps_id(line.data) gid = GedcomUtils.extract_id(line.data)
state.lds_ord.set_family_handle(self.find_family_handle(gid)) state.lds_ord.set_family_handle(self.find_family_handle(gid))
def func_lds_form(self, line, state): def func_lds_form(self, line, state):
""" """
Parses the FORM tag thate defines the place structure for a place. This Parses the FORM tag thate defines the place structure for a place. This
tag, if found, will override any global place structure. tag, if found, will override any global place structure.
@ -1524,7 +1510,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
state.pf = PlaceParser(line) state.pf = GedcomUtils.PlaceParser(line)
def func_lds_plac(self, line, state): def func_lds_plac(self, line, state):
""" """
@ -1541,7 +1527,7 @@ class GedcomParser(UpdateCallback):
state.place.set_title(line.data) state.place.set_title(line.data)
state.lds_ord.set_place_handle(state.place.handle) state.lds_ord.set_place_handle(state.place.handle)
except NameError: except NameError:
pass return
def func_lds_sour(self, line, state): def func_lds_sour(self, line, state):
""" """
@ -1564,9 +1550,9 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
note = self.parse_note(line, state.lds_ord, state.level+1, '') self.parse_note(line, state.lds_ord, state.level+1, '')
def func_lds_stat(self, line, state): def func_lds_stat(self, line, state):
""" """
Parses the STAT (status) tag attached to the LdsOrd. Parses the STAT (status) tag attached to the LdsOrd.
@ -1600,7 +1586,7 @@ class GedcomParser(UpdateCallback):
sub_state.primary = False sub_state.primary = False
notelist = [] notelist = []
gid = self.extract_gramps_id(line.data) gid = GedcomUtils.extract_id(line.data)
handle = self.find_family_handle(gid) handle = self.find_family_handle(gid)
self.parse_level(sub_state, self.famc_parse_tbl, self.parse_level(sub_state, self.famc_parse_tbl,
@ -1635,7 +1621,7 @@ class GedcomParser(UpdateCallback):
family.add_child_ref(ref) family.add_child_ref(ref)
self.db.commit_family(family, self.trans) self.db.commit_family(family, self.trans)
def func_person_famc_pedi(self, line, state): def func_person_famc_pedi(self, line, state):
""" """
Parses the PEDI tag attached to a INDI.FAMC record. No values are set Parses the PEDI tag attached to a INDI.FAMC record. No values are set
at this point, because we have to do some post processing. Instead, we at this point, because we have to do some post processing. Instead, we
@ -1665,7 +1651,7 @@ class GedcomParser(UpdateCallback):
else: else:
self.skip_subordinate_levels(state.level+1) self.skip_subordinate_levels(state.level+1)
def func_person_famc_primary(self, line, state): def func_person_famc_primary(self, line, state):
""" """
Parses the _PRIM tag on an INDI.FAMC tag. This value is stored in Parses the _PRIM tag on an INDI.FAMC tag. This value is stored in
the state record to be used later. the state record to be used later.
@ -1704,7 +1690,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
handle = self.find_family_handle(self.extract_gramps_id(line.data)) handle = self.find_family_handle(GedcomUtils.extract_id(line.data))
state.person.add_family_handle(handle) state.person.add_family_handle(handle)
state.add_to_note(self.parse_optional_note(2)) state.add_to_note(self.parse_optional_note(2))
@ -1734,7 +1720,7 @@ class GedcomParser(UpdateCallback):
""" """
# find the id and person that we are referencing # find the id and person that we are referencing
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
handle = self.find_person_handle(self.map_gid(gid)) handle = self.find_person_handle(self.map_gid(gid))
# create a new PersonRef, and assign the handle, add the # create a new PersonRef, and assign the handle, add the
@ -1751,7 +1737,7 @@ class GedcomParser(UpdateCallback):
if not sub_state.ignore: if not sub_state.ignore:
state.person.add_person_ref(sub_state.ref) state.person.add_person_ref(sub_state.ref)
def func_person_asso_type(self, line, state): def func_person_asso_type(self, line, state):
""" """
Parses the INDI.ASSO.TYPE tag. GRAMPS only supports the ASSO tag when Parses the INDI.ASSO.TYPE tag. GRAMPS only supports the ASSO tag when
the tag represents an INDI. So if the data is not INDI, we set the ignore the tag represents an INDI. So if the data is not INDI, we set the ignore
@ -1765,7 +1751,7 @@ class GedcomParser(UpdateCallback):
if line.data != "INDI": if line.data != "INDI":
state.ignore = True state.ignore = True
def func_person_asso_rela(self, line, state): def func_person_asso_rela(self, line, state):
""" """
Parses the INDI.ASSO.RELA tag. Parses the INDI.ASSO.RELA tag.
@ -1883,7 +1869,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
handle = self.find_person_handle(self.map_gid(gid)) handle = self.find_person_handle(self.map_gid(gid))
state.family.set_father_handle(handle) state.family.set_father_handle(handle)
@ -1898,7 +1884,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state @param state: The current state
@type state: CurrentState @type state: CurrentState
""" """
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
handle = self.find_person_handle(self.map_gid(gid)) handle = self.find_person_handle(self.map_gid(gid))
state.family.set_mother_handle(handle) state.family.set_mother_handle(handle)
@ -1976,7 +1962,7 @@ class GedcomParser(UpdateCallback):
""" """
mrel, frel = self.parse_ftw_relations(state.level+1) mrel, frel = self.parse_ftw_relations(state.level+1)
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
child = self.find_or_create_person(self.map_gid(gid)) child = self.find_or_create_person(self.map_gid(gid))
reflist = [ ref for ref in state.family.get_child_ref_list() \ reflist = [ ref for ref in state.family.get_child_ref_list() \
@ -2014,7 +2000,7 @@ class GedcomParser(UpdateCallback):
sub_state.lds_ord = RelLib.LdsOrd() sub_state.lds_ord = RelLib.LdsOrd()
sub_state.lds_ord.set_type(RelLib.LdsOrd.SEAL_TO_SPOUSE) sub_state.lds_ord.set_type(RelLib.LdsOrd.SEAL_TO_SPOUSE)
sub_state.place = None sub_state.place = None
sub_state.place_fields = PlaceParser() sub_state.place_fields = GedcomUtils.PlaceParser()
state.family.lds_ord_list.append(sub_state.lds_ord) state.family.lds_ord_list.append(sub_state.lds_ord)
self.parse_level(sub_state, self.lds_parse_tbl, self.func_ignore) self.parse_level(sub_state, self.lds_parse_tbl, self.func_ignore)
@ -2107,7 +2093,7 @@ class GedcomParser(UpdateCallback):
self.parse_level(state, self.parse_addr_tbl, self.func_ignore) self.parse_level(state, self.parse_addr_tbl, self.func_ignore)
#self.parse_address(state.addr, state.level) #self.parse_address(state.addr, state.level)
def func_family_attr(self, line, state): def func_family_attr(self, line, state):
""" """
@param line: The current line in GedLine format @param line: The current line in GedLine format
@type line: GedLine @type line: GedLine
@ -2140,7 +2126,7 @@ class GedcomParser(UpdateCallback):
return (sub_state.form, sub_state.filename, sub_state.title, return (sub_state.form, sub_state.filename, sub_state.title,
sub_state.note) sub_state.note)
def func_object_ref_form(self, line, state): def func_object_ref_form(self, line, state):
""" """
+1 FORM <MULTIMEDIA_FORMAT> {1:1} +1 FORM <MULTIMEDIA_FORMAT> {1:1}
@ -2151,7 +2137,7 @@ class GedcomParser(UpdateCallback):
""" """
state.form = line.data state.form = line.data
def func_object_ref_titl(self, line, state): def func_object_ref_titl(self, line, state):
""" """
+1 TITL <DESCRIPTIVE_TITLE> {0:1} +1 TITL <DESCRIPTIVE_TITLE> {0:1}
@ -2162,7 +2148,7 @@ class GedcomParser(UpdateCallback):
""" """
state.title = line.data state.title = line.data
def func_object_ref_file(self, line, state): def func_object_ref_file(self, line, state):
""" """
+1 FILE <MULTIMEDIA_FILE_REFERENCE> {1:1} +1 FILE <MULTIMEDIA_FILE_REFERENCE> {1:1}
@ -2173,7 +2159,7 @@ class GedcomParser(UpdateCallback):
""" """
state.filename = line.data state.filename = line.data
def func_object_ref_note(self, line, state): def func_object_ref_note(self, line, state):
""" """
+1 <<NOTE_STRUCTURE>> {0:M} +1 <<NOTE_STRUCTURE>> {0:M}
@ -2382,7 +2368,7 @@ class GedcomParser(UpdateCallback):
TOKEN_IGNORE): TOKEN_IGNORE):
self.skip_subordinate_levels(level+1) self.skip_subordinate_levels(level+1)
elif line.token == TOKEN_RIN: elif line.token == TOKEN_RIN:
pass continue
else: else:
self.not_recognized(level+1) self.not_recognized(level+1)
@ -2409,7 +2395,7 @@ class GedcomParser(UpdateCallback):
frel = TYPE_BIRTH frel = TYPE_BIRTH
# Legacy _PREF # Legacy _PREF
elif line.token == TOKEN__PRIMARY: elif line.token == TOKEN__PRIMARY:
pass continue
else: else:
self.not_recognized(level+1) self.not_recognized(level+1)
return (mrel, frel) return (mrel, frel)
@ -2469,14 +2455,6 @@ class GedcomParser(UpdateCallback):
def parse_comment(self, line, obj, level, old_note): def parse_comment(self, line, obj, level, old_note):
return self.parse_note_base(line, obj, level, old_note, obj.set_note) return self.parse_note_base(line, obj, level, old_note, obj.set_note)
def extract_gramps_id(self, value):
"""
Extracts a value to use for the GRAMPS ID value from the GEDCOM
reference token. The value should be in the form of @XXX@, and the
returned value will be XXX
"""
return value.strip()[1:-1]
def parse_obje(self, line): def parse_obje(self, line):
""" """
n @XREF:OBJE@ OBJE {1:1} n @XREF:OBJE@ OBJE {1:1}
@ -2491,8 +2469,8 @@ class GedcomParser(UpdateCallback):
+1 RIN <AUTOMATED_RECORD_ID> {0:1} p.* +1 RIN <AUTOMATED_RECORD_ID> {0:1} p.*
+1 <<CHANGE_DATE>> {0:1} p.* +1 <<CHANGE_DATE>> {0:1} p.*
""" """
gid = self.extract_gramps_id(line.data.strip()) gid = GedcomUtils.extract_id(line.data)
self.media = self.find_or_create_object(self.map_gid(gid[1:-1])) self.media = self.find_or_create_object(self.map_gid(gid))
while True: while True:
line = self.get_next() line = self.get_next()
@ -2584,7 +2562,7 @@ class GedcomParser(UpdateCallback):
elif line.token == TOKEN_NOTE: elif line.token == TOKEN_NOTE:
note = self.parse_note(line, address, level+1, '') note = self.parse_note(line, address, level+1, '')
elif line.token in (TOKEN__LOC, TOKEN__NAME): elif line.token in (TOKEN__LOC, TOKEN__NAME):
pass # ignore unsupported extended location syntax continue # ignore unsupported extended location syntax
elif line.token in (TOKEN_IGNORE, TOKEN_TYPE, TOKEN_CAUS): elif line.token in (TOKEN_IGNORE, TOKEN_TYPE, TOKEN_CAUS):
self.skip_subordinate_levels(level+1) self.skip_subordinate_levels(level+1)
else: else:
@ -2719,7 +2697,7 @@ class GedcomParser(UpdateCallback):
note = self.parse_note_simple(line, level+1) note = self.parse_note_simple(line, level+1)
added = True added = True
elif line.token in (TOKEN__LOC, TOKEN__NAME, TOKEN_PHON): elif line.token in (TOKEN__LOC, TOKEN__NAME, TOKEN_PHON):
pass # ignore unsupported extended location syntax continue # ignore unsupported extended location syntax
else: else:
self.not_recognized(level+1) self.not_recognized(level+1)
if added: if added:
@ -2902,7 +2880,7 @@ class GedcomParser(UpdateCallback):
note = self.parse_note(line, place, level+1, '') note = self.parse_note(line, place, level+1, '')
place.set_note(note) place.set_note(note)
elif line.token == TOKEN_FORM: elif line.token == TOKEN_FORM:
pf = PlaceParser(line) pf = GedcomUtils.PlaceParser(line)
elif line.token == TOKEN_OBJE: elif line.token == TOKEN_OBJE:
self.func_place_object(line, place, level+1) self.func_place_object(line, place, level+1)
elif line.token == TOKEN_SOUR: elif line.token == TOKEN_SOUR:
@ -3275,7 +3253,7 @@ class GedcomParser(UpdateCallback):
if self.refn.has_key(pid): if self.refn.has_key(pid):
val = self.refn[pid] val = self.refn[pid]
new_key = prefix % val new_key = prefix % val
new_pmax = max(new_pmax,val) new_pmax = max(new_pmax, val)
person = self.db.get_person_from_handle(pid, self.trans) person = self.db.get_person_from_handle(pid, self.trans)
@ -3369,7 +3347,7 @@ class GedcomParser(UpdateCallback):
multiple NAME indicators, which is the correct way of handling multiple NAME indicators, which is the correct way of handling
multiple names. multiple names.
""" """
name = self.parse_name_personal(line.data) name = GedcomUtils.parse_name_personal(line.data)
name.set_type(RelLib.NameType.AKA) name.set_type(RelLib.NameType.AKA)
state.person.add_alternate_name(name) state.person.add_alternate_name(name)
@ -3583,7 +3561,7 @@ class GedcomParser(UpdateCallback):
name.set_type(RelLib.NameType.MARRIED) name.set_type(RelLib.NameType.MARRIED)
state.person.add_alternate_name(name) state.person.add_alternate_name(name)
elif len(data) > 1: elif len(data) > 1:
name = self.parse_name_personal(text) name = GedcomUtils.parse_name_personal(text)
name.set_type(RelLib.NameType.MARRIED) name.set_type(RelLib.NameType.MARRIED)
state.person.add_alternate_name(name) state.person.add_alternate_name(name)
@ -3752,10 +3730,10 @@ class GedcomParser(UpdateCallback):
source.set_abbreviation(line.data) source.set_abbreviation(line.data)
def func_source_agnc(self, line, source, level): def func_source_agnc(self, line, source, level):
a = RelLib.Attribute() attr = RelLib.Attribute()
a.set_type(RelLib.AttributeType.AGENCY) attr.set_type(RelLib.AttributeType.AGENCY)
a.set_value(line.data) attr.set_value(line.data)
source.add_attribute(a) source.add_attribute(attr)
def func_source_text(self, line, source, level): def func_source_text(self, line, source, level):
source.set_note(line.data) source.set_note(line.data)
@ -3783,8 +3761,8 @@ class GedcomParser(UpdateCallback):
self.skip_subordinate_levels(level+1) self.skip_subordinate_levels(level+1)
def func_obje_file(self, line, media, level): def func_obje_file(self, line, media, level):
(ok, filename) = self.find_file(line.data, self.dir_path) (file_ok, filename) = self.find_file(line.data, self.dir_path)
if not ok: if not file_ok:
self.warn(_("Could not import %s") % filename[0]) self.warn(_("Could not import %s") % filename[0])
path = filename[0].replace('\\', os.path.sep) path = filename[0].replace('\\', os.path.sep)
media.set_path(path) media.set_path(path)
@ -3827,12 +3805,14 @@ class GedcomParser(UpdateCallback):
elif LdsUtils.temple_codes.has_key(code): elif LdsUtils.temple_codes.has_key(code):
return LdsUtils.temple_codes[code] return LdsUtils.temple_codes[code]
c = get_code(line.data) code = get_code(line.data)
if c: return c if code:
return code
## Not sure why we do this. Kind of ugly. ## Not sure why we do this. Kind of ugly.
c = get_code(line.data.split()[0]) code = get_code(line.data.split()[0])
if c: return c if code:
return code
## Okay we have no clue which temple this is. ## Okay we have no clue which temple this is.
## We should tell the user and store it anyway. ## We should tell the user and store it anyway.

View File

@ -18,6 +18,8 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# #
import re
import RelLib import RelLib
class PlaceParser: class PlaceParser:
@ -90,6 +92,37 @@ class IdFinder:
self.index += 1 self.index += 1
return index return index
#------------------------------------------------------------------------
#
# Support functions
#
#------------------------------------------------------------------------
NAME_RE = re.compile(r"/?([^/]*)(/([^/]*)(/([^/]*))?)?")
SURNAME_RE = re.compile(r"/([^/]*)/([^/]*)")
def parse_name_personal(self, text):
name = RelLib.Name()
m = SURNAME_RE.match(text)
if m:
names = m.groups()
name.set_first_name(names[1].strip())
name.set_surname(names[0].strip())
else:
try:
names = NAME_RE.match(text).groups()
name.set_first_name(names[0].strip())
name.set_surname(names[2].strip())
name.set_suffix(names[4].strip())
except:
name.set_first_name(text.strip())
return name
def extract_id(self):
"""
Extracts a value to use for the GRAMPS ID value from the GEDCOM
reference token. The value should be in the form of @XXX@, and the
returned value will be XXX
"""
return value.strip()[1:-1]