gramps/src/GrampsDb/_ReadGedcom.py
2006-04-17 22:33:13 +00:00

2513 lines
88 KiB
Python

#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2000-2006 Donald N. Allingham
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id$
"Import from GEDCOM"
#-------------------------------------------------------------------------
#
# standard python modules
#
#-------------------------------------------------------------------------
import os
import re
import string
import const
import lds
import time
from gettext import gettext as _
# and module sets for earlier pythons
try:
set()
except NameError:
from sets import Set as set
#------------------------------------------------------------------------
#
# Set up logging
#
#------------------------------------------------------------------------
import logging
log = logging.getLogger(".GedcomImport")
#-------------------------------------------------------------------------
#
# GTK/GNOME Modules
#
#-------------------------------------------------------------------------
import gtk
import gtk.glade
#-------------------------------------------------------------------------
#
# GRAMPS modules
#
#-------------------------------------------------------------------------
import Errors
import RelLib
from DateHandler import DateParser
import NameDisplay
import Utils
import Mime
import _ConstXML
from ansel_utf8 import ansel_to_utf8
from bsddb import db
from _GedcomInfo import *
from _GedTokens import *
from QuestionDialog import ErrorDialog, WarningDialog
from _GrampsDbBase import EVENT_KEY
addr_re = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)\s+(\d+)\s*(.*)')
addr2_re = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)\s+(\d+)')
addr3_re = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)')
#-------------------------------------------------------------------------
#
# latin/utf8 conversions
#
#-------------------------------------------------------------------------
_place_field = []
_place_match = {
'city' : RelLib.Location.set_city,
'county' : RelLib.Location.set_county,
'country': RelLib.Location.set_country,
'state' : RelLib.Location.set_state,
}
def _empty_func(a,b):
return
def utf8_to_latin(s):
return s.encode('iso-8859-1','replace')
def latin_to_utf8(s):
if type(s) == unicode:
return s
else:
return unicode(s,'iso-8859-1')
def nocnv(s):
return unicode(s)
#-------------------------------------------------------------------------
#
# constants
#
#-------------------------------------------------------------------------
ANSEL = 1
UNICODE = 2
UPDATE = 25
_TYPE_BIRTH = RelLib.ChildRefType()
_TYPE_ADOPT = RelLib.ChildRefType(RelLib.ChildRefType.ADOPTED)
_TYPE_FOSTER = RelLib.ChildRefType(RelLib.ChildRefType.FOSTER)
file_systems = {
'VFAT' : _('Windows 9x file system'),
'FAT' : _('Windows 9x file system'),
"NTFS" : _('Windows NT file system'),
"ISO9660" : _('CD ROM'),
"SMBFS" : _('Networked Windows file system')
}
rel_types = (RelLib.ChildRefType.BIRTH,
RelLib.ChildRefType.UNKNOWN,
RelLib.ChildRefType.NONE,
)
pedi_type = {
'birth' : RelLib.ChildRefType(),
'natural': RelLib.ChildRefType(),
'step' : _TYPE_ADOPT,
'adopted': _TYPE_ADOPT,
'foster' : _TYPE_FOSTER,
}
lds_status = {
"BIC" : RelLib.LdsOrd.STATUS_BIC,
"CANCELED" : RelLib.LdsOrd.STATUS_CANCELED,
"CHILD" : RelLib.LdsOrd.STATUS_CHILD,
"CLEARED" : RelLib.LdsOrd.STATUS_CLEARED,
"COMPLETED": RelLib.LdsOrd.STATUS_COMPLETED,
"DNS" : RelLib.LdsOrd.STATUS_DNS,
"INFANT" : RelLib.LdsOrd.STATUS_INFANT,
"PRE-1970" : RelLib.LdsOrd.STATUS_PRE_1970,
"QUALIFIED": RelLib.LdsOrd.STATUS_QUALIFIED,
"DNS/CAN" : RelLib.LdsOrd.STATUS_DNS_CAN,
"STILLBORN": RelLib.LdsOrd.STATUS_STILLBORN,
"SUBMITTED": RelLib.LdsOrd.STATUS_SUBMITTED,
"UNCLEARED": RelLib.LdsOrd.STATUS_UNCLEARED,
}
_event_family_str = _("%(event_name)s of %(family)s")
_event_person_str = _("%(event_name)s of %(person)s")
#-------------------------------------------------------------------------
#
# GEDCOM events to GRAMPS events conversion
#
#-------------------------------------------------------------------------
ged2gramps = {}
for _val in Utils.personalConstantEvents.keys():
_key = Utils.personalConstantEvents[_val]
if _key != "":
ged2gramps[_key] = _val
ged2fam = {}
for _val in Utils.familyConstantEvents.keys():
_key = Utils.familyConstantEvents[_val]
if _key != "":
ged2fam[_key] = _val
ged2fam_custom = {}
#-------------------------------------------------------------------------
#
# regular expressions
#
#-------------------------------------------------------------------------
intRE = re.compile(r"\s*(\d+)\s*$")
nameRegexp= re.compile(r"/?([^/]*)(/([^/]*)(/([^/]*))?)?")
snameRegexp= re.compile(r"/([^/]*)/([^/]*)")
calRegexp = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
rangeRegexp = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
spanRegexp = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
def importData(database, filename, callback=None, use_trans=False):
f = open(filename,"r")
ansel = False
gramps = False
for index in range(50):
line = f.readline().split()
if len(line) == 0:
break
if len(line) > 2 and line[1][0:4] == 'CHAR' and line[2] == "ANSEL":
ansel = True
if len(line) > 2 and line[1][0:4] == 'SOUR' and line[2] == "GRAMPS":
gramps = True
f.close()
if not gramps and ansel:
glade_file = "%s/gedcomimport.glade" % os.path.dirname(__file__)
top = gtk.glade.XML(glade_file,'encoding','gramps')
code = top.get_widget('codeset')
code.set_active(0)
dialog = top.get_widget('encoding')
dialog.run()
codeset = code.get_active()
dialog.destroy()
else:
codeset = None
import2(database, filename, callback, codeset, use_trans)
def import2(database, filename, callback, codeset, use_trans):
# add some checking here
try:
np = NoteParser(filename, False)
g = GedcomParser(database,filename, callback, codeset, np.get_map(),
np.get_lines(),np.get_persons())
except IOError,msg:
ErrorDialog(_("%s could not be opened\n") % filename,str(msg))
return
if database.get_number_of_people() == 0:
use_trans = False
try:
close = g.parse_gedcom_file(use_trans)
except IOError,msg:
errmsg = _("%s could not be opened\n") % filename
ErrorDialog(errmsg,str(msg))
return
except Errors.GedcomError, val:
(m1,m2) = val.messages()
ErrorDialog(m1,m2)
return
except db.DBSecondaryBadError, msg:
WarningDialog(_('Database corruption detected'),
_('A problem was detected with the database. Please '
'run the Check and Repair Database tool to fix the '
'problem.'))
return
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
class DateStruct:
def __init__(self):
self.date = ""
self.time = ""
class GedcomDateParser(DateParser):
month_to_int = {
'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4,
'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8,
'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12,
}
class IdFinder:
def __init__(self,keys,prefix):
self.ids = set(keys)
self.index = 0
self.prefix = prefix
def find_next(self):
"""
Returns the next available GRAMPS' ID for a Event object based
off the person ID prefix.
"""
index = self.prefix % self.index
while str(index) in self.ids:
self.index += 1
index = self.prefix % self.index
self.ids.add(index)
self.index += 1
return index
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
noteRE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$")
contRE = re.compile(r"\s*\d+\s+CONT\s(.*)$")
concRE = re.compile(r"\s*\d+\s+CONC\s(.*)$")
personRE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
class CurrentState:
def __init__(self):
self.note = ""
self.name_cnt = 0
self.person = None
def add_to_note(self,text):
self.note += text
def get_text(self):
return self.note
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
class NoteParser:
def __init__(self, filename,broken):
self.name_map = {}
self.count = 0
self.person_count = 0
f = open(filename,"rU")
innote = False
for line in f:
self.count += 1
if innote:
match = contRE.match(line)
if match:
noteobj.append("\n" + match.groups()[0])
continue
match = concRE.match(line)
if match:
if broken:
noteobj.append(" " + match.groups()[0])
else:
noteobj.append(match.groups()[0])
continue
innote = False
else:
match = noteRE.match(line)
if match:
data = match.groups()[0]
noteobj = RelLib.Note()
self.name_map["@%s@" % data] = noteobj
noteobj.append(match.groups()[1])
innote = True
elif personRE.match(line):
self.person_count += 1
f.close()
def get_map(self):
return self.name_map
def get_lines(self):
return self.count
def get_persons(self):
return self.person_count
#-------------------------------------------------------------------------
#
# Reader - serves as the lexical analysis engine
#
#-------------------------------------------------------------------------
class Reader:
def __init__(self,name):
self.f = open(name,'rU')
self.current_list = []
self.eof = False
self.transtable = string.maketrans('','')
self.delc = self.transtable[0:31]
self.transtable2 = self.transtable[0:128] + ('?' * 128)
self.cnv = lambda s: unicode(s)
self.broken_conc = False
self.cnt = 0
self.index = 0
def set_charset_fn(self,cnv):
self.cnv = cnv
def set_broken_conc(self,broken):
self.broken_conc = broken
def read(self):
if len(self.current_list) <= 1 and not self.eof:
self.readahead()
try:
self.current = self.current_list.pop()
return self.current
except:
return None
def readahead(self):
while len(self.current_list) < 5:
line = self.f.readline()
self.index += 1
line = line.strip('\r\n')
if line == "":
self.f.close()
self.eof = True
break
line = line.split(None,2) + ['']
val = line[2].translate(self.transtable,self.delc)
try:
val = self.cnv(val)
except:
val = self.cnv(val.translate(self.transtable2))
try:
level = int(line[0])
except:
level = 0
data = (level,tokens.get(line[1],TOKEN_UNKNOWN),val,
self.cnv(line[1]),self.index)
if data[1] == TOKEN_CONT:
l = self.current_list[0]
self.current_list[0] = (l[0],l[1],l[2]+'\n'+data[2],l[3],l[4])
elif data[1] == TOKEN_CONC:
l = self.current_list[0]
if self.broken_conc:
new_value = u"%s %s" % (l[2],data[2])
else:
new_value = l[2] + data[2]
self.current_list[0] = (l[0],l[1],new_value,l[3],l[4])
else:
self.current_list.insert(0,data)
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
class GedcomParser:
SyntaxError = "Syntax Error"
BadFile = "Not a GEDCOM file"
def __init__(self,dbase,filename,callback,codeset,note_map,lines,people):
self.maxlines = lines
self.maxpeople = people
self.interval = lines/100
self.percent = 0
self.callback = callback
self.dp = GedcomDateParser()
self.db = dbase
self.emapper = IdFinder(dbase.get_gramps_ids(EVENT_KEY),
dbase.eprefix)
self.person = None
self.inline_srcs = {}
self.media_map = {}
self.fmap = {}
self.smap = {}
self.note_map = note_map
self.refn = {}
self.added = set()
self.gedmap = GedcomInfoDB()
self.gedsource = self.gedmap.get_from_source_tag('GEDCOM 5.5')
self.def_src = RelLib.Source()
fname = os.path.basename(filename).split('\\')[-1]
self.def_src.set_title(_("Import from %s") % unicode(fname))
self.dir_path = os.path.dirname(filename)
self.localref = 0
self.placemap = {}
self.broken_conc_list = [ 'FamilyOrigins', 'FTW' ]
self.is_ftw = 0
self.idswap = {}
self.gid2id = {}
self.sid2id = {}
self.lid2id = {}
self.fid2id = {}
self.rid2id = {}
self.repo_func = {
TOKEN_NAME : self.func_repo_name,
TOKEN_ADDR : self.func_repo_addr,
}
self.name_func = {
TOKEN_ALIA : self.func_name_alia,
TOKEN_NPFX : self.func_name_npfx,
TOKEN_GIVN : self.func_name_givn,
TOKEN_SPFX : self.func_name_spfx,
TOKEN_SURN : self.func_name_surn,
TOKEN__MARNM : self.func_name_marnm,
TOKEN_TITL : self.func_name_titl,
TOKEN_NSFX : self.func_name_nsfx,
TOKEN_NICK : self.func_name_nick,
TOKEN__AKA : self.func_name_aka,
TOKEN_SOUR : self.func_name_sour,
TOKEN_NOTE : self.func_name_note,
}
self.person_func = {
TOKEN_NAME : self.func_person_name,
TOKEN_ALIA : self.func_person_alt_name,
TOKEN_OBJE : self.func_person_object,
TOKEN_NOTE : self.func_person_note,
TOKEN__COMM : self.func_person_note,
TOKEN_SEX : self.func_person_sex,
TOKEN_BAPL : self.func_person_bapl,
TOKEN_ENDL : self.func_person_endl,
TOKEN_SLGC : self.func_person_slgc,
TOKEN_FAMS : self.func_person_fams,
TOKEN_FAMC : self.func_person_famc,
TOKEN_RESI : self.func_person_resi,
TOKEN_ADDR : self.func_person_addr,
TOKEN_PHON : self.func_person_phon,
TOKEN_BIRT : self.func_person_birt,
TOKEN_ADOP : self.func_person_adop,
TOKEN_DEAT : self.func_person_deat,
TOKEN_EVEN : self.func_person_even,
TOKEN_SOUR : self.func_person_sour,
TOKEN_REFN : self.func_person_refn,
TOKEN_AFN : self.func_person_attr,
TOKEN_RFN : self.func_person_attr,
TOKEN__UID : self.func_person_attr,
TOKEN_CHAN : self.skip_record,
TOKEN_ASSO : self.func_person_asso,
TOKEN_ANCI : self.skip_record,
TOKEN_DESI : self.skip_record,
TOKEN_RIN : self.skip_record,
TOKEN__TODO : self.skip_record,
}
self.place_names = set()
cursor = dbase.get_place_cursor()
data = cursor.next()
while data:
(handle,val) = data
self.place_names.add(val[2])
data = cursor.next()
cursor.close()
self.lexer = Reader(filename)
self.filename = filename
self.backoff = False
self.override = codeset
if self.db.get_number_of_people() == 0:
self.map_gid = self.map_gid_empty
else:
self.map_gid = self.map_gid_not_empty
if self.override != 0:
if self.override == 1:
self.lexer.set_charset_fn(ansel_to_utf8)
elif self.override == 2:
self.lexer.set_charset_fn(latin_to_utf8)
self.geddir = os.path.dirname(os.path.normpath(os.path.abspath(filename)))
self.error_count = 0
amap = Utils.personalConstantAttributes
self.current = self.interval
self.attrs = amap.values()
self.gedattr = {}
for val in amap.keys():
self.gedattr[amap[val]] = val
self.search_paths = []
try:
mypaths = []
f = open("/proc/mounts","r")
for line in f:
paths = line.split()
ftype = paths[2].upper()
if ftype in file_systems.keys():
mypaths.append((paths[1],file_systems[ftype]))
self.search_paths.append(paths[1])
f.close()
except:
pass
def errmsg(self,msg):
log.warning(msg)
def infomsg(self,msg):
log.warning(msg)
def find_file(self,fullname,altpath):
tries = []
fullname = fullname.replace('\\','/')
tries.append(fullname)
if os.path.isfile(fullname):
return (1,fullname)
other = os.path.join(altpath,fullname)
tries.append(other)
if os.path.isfile(other):
return (1,other)
other = os.path.join(altpath,os.path.basename(fullname))
tries.append(other)
if os.path.isfile(other):
return (1,other)
if len(fullname) > 3:
if fullname[1] == ':':
fullname = fullname[2:]
for path in self.search_paths:
other = os.path.normpath("%s/%s" % (path,fullname))
tries.append(other)
if os.path.isfile(other):
return (1,other)
return (0,tries)
else:
return (0,tries)
def track_lines(self):
if self.current == 1:
self.current = self.interval
self.percent += 1
if self.callback:
self.callback(self.percent)
else:
self.current -= 1
def get_next(self):
if self.backoff == False:
self.groups = self.lexer.read()
self.track_lines()
# EOF ?
if not self.groups:
self.text = "";
self.backoff = False
msg = _("Premature end of file at line %d.\n") % self.groups[4]
self.errmsg(msg)
self.error_count += 1
self.groups = (-1, TOKEN_UNKNOWN, "","")
return self.groups
self.backoff = False
return self.groups
def barf(self,level):
msg = _("Line %d was not understood, so it was ignored.") % self.groups[4]
self.errmsg(msg)
self.error_count += 1
self.ignore_sub_junk(level)
def warn(self,msg):
self.errmsg(msg)
self.error_count += 1
def backup(self):
self.backoff = True
def parse_gedcom_file(self,use_trans=False):
if self.maxpeople < 1000:
no_magic = True
else:
no_magic = False
self.trans = self.db.transaction_begin("",not use_trans,no_magic)
self.db.disable_signals()
t = time.time()
self.fam_count = 0
self.indi_count = 0
self.repo_count = 0
self.source_count = 0
try:
self.parse_header()
self.parse_submitter()
self.db.add_source(self.def_src,self.trans)
self.parse_record()
self.parse_trailer()
except Errors.GedcomError, err:
self.errmsg(str(err))
for value in self.inline_srcs.keys():
title,note = value
handle = self.inline_srcs[value]
src = RelLib.Source()
src.set_handle(handle)
src.set_title(title)
if note:
src.set_note(note)
self.db.add_source(src,self.trans)
t = time.time() - t
msg = _('Import Complete: %d seconds') % t
if self.callback:
self.callback(100)
self.db.transaction_commit(self.trans,_("GEDCOM import"))
self.db.enable_signals()
self.db.request_rebuild()
def parse_trailer(self):
matches = self.get_next()
if matches[0] >= 0 and matches[1] != TOKEN_TRLR:
self.barf(0)
def parse_header(self):
self.parse_header_head()
self.parse_header_source()
def parse_submitter(self):
matches = self.get_next()
if matches[2] != "SUBM":
self.backup()
return
else:
self.parse_submitter_data(1)
def parse_submitter_data(self,level):
while(1):
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_NAME:
self.def_src.set_author(matches[2])
elif matches[1] == TOKEN_ADDR:
self.ignore_sub_junk(level+1)
def parse_source(self,name,level):
self.source = self.find_or_create_source(name[1:-1])
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
if not self.source.get_title():
self.source.set_title("No title - ID %s" % self.source.get_gramps_id())
self.db.commit_source(self.source, self.trans)
self.backup()
return
elif matches[1] == TOKEN_TITL:
title = matches[2]
title = title.replace('\n',' ')
self.source.set_title(title)
elif matches[1] in (TOKEN_TAXT,TOKEN_PERI): # EasyTree Sierra On-Line
if self.source.get_title() == "":
title = matches[2]
title = title.replace('\n',' ')
self.source.set_title(title)
elif matches[1] == TOKEN_AUTH:
self.source.set_author(matches[2])
elif matches[1] == TOKEN_PUBL:
self.source.set_publication_info(matches[2])
elif matches[1] == TOKEN_NOTE:
note = self.parse_note(matches,self.source,level+1,note)
self.source.set_note(note)
elif matches[1] == TOKEN_TEXT:
note = self.source.get_note()
self.source.set_note(note.strip())
elif matches[1] == TOKEN_ABBR:
self.source.set_abbreviation(matches[2])
elif matches[1] == TOKEN_REPO:
repo_ref = RelLib.RepoRef()
repo = self.find_or_create_repository(matches[2][1:-1])
repo_ref.set_reference_handle(repo.handle)
self.parse_repo_ref(matches,repo_ref,level+1)
self.source.add_repo_reference(repo_ref)
elif matches[1] in (TOKEN_OBJE,TOKEN_CHAN,TOKEN_IGNORE):
self.ignore_sub_junk(2)
else:
note = self.source.get_note()
if note:
note = "%s\n%s %s" % (note,matches[3],matches[2])
else:
note = "%s %s" % (matches[3],matches[2])
self.source.set_note(note.strip())
def parse_record(self):
while True:
matches = self.get_next()
if matches[2] == "FAM":
self.fam_count = self.fam_count + 1
self.family = self.find_or_create_family(matches[3][1:-1])
self.parse_family()
if self.addr != None:
father_handle = self.family.get_father_handle()
father = self.db.get_person_from_handle(father_handle)
if father:
father.add_address(self.addr)
self.db.commit_person(father, self.trans)
mother_handle = self.family.get_mother_handle()
mother = self.db.get_person_from_handle(mother_handle)
if mother:
mother.add_address(self.addr)
self.db.commit_person(mother, self.trans)
for child_ref in self.family.get_child_ref_list():
child_handle = child_ref.ref
child = self.db.get_person_from_handle(child_handle)
if child:
child.add_address(self.addr)
self.db.commit_person(child, self.trans)
if len(self.family.get_source_references()) == 0:
sref = RelLib.SourceRef()
sref.set_base_handle(self.def_src.handle)
self.family.add_source_reference(sref)
self.db.commit_family(self.family, self.trans)
del self.family
elif matches[2] == "INDI":
self.indi_count = self.indi_count + 1
gid = matches[3]
gid = gid[1:-1]
self.person = self.find_or_create_person(self.map_gid(gid))
self.added.add(self.person.handle)
self.parse_individual(self.person)
if len(self.person.get_source_references()) == 0:
sref = RelLib.SourceRef()
sref.set_base_handle(self.def_src.handle)
self.person.add_source_reference(sref)
self.db.commit_person(self.person, self.trans)
del self.person
elif matches[2] == "REPO":
self.repo_count = self.repo_count + 1
self.repo = self.find_or_create_repository(matches[3][1:-1])
self.repo.set_type((RelLib.Repository.UNKNOWN,""))
self.added.add(self.repo.handle)
self.parse_repository(self.repo)
self.db.commit_repository(self.repo, self.trans)
del self.repo
elif matches[2] in ("SUBM","SUBN"):
self.ignore_sub_junk(1)
elif matches[1] in (TOKEN_SUBM,TOKEN_SUBN,TOKEN_OBJE,TOKEN_IGNORE):
self.ignore_sub_junk(1)
elif matches[2] == "SOUR":
self.parse_source(matches[3],1)
elif matches[2].startswith("SOUR "):
# A source formatted in a single line, for example:
# 0 @S62@ SOUR This is the title of the source
source = self.find_or_create_source(matches[3][1:-1])
source.set_title( matches[2][5:])
self.db.commit_source(source, self.trans)
elif matches[2][0:4] == "NOTE":
self.ignore_sub_junk(1)
elif matches[2] == "_LOC":
# TODO: Add support for extended Locations.
# See: http://en.wiki.genealogy.net/index.php/Gedcom_5.5EL
self.ignore_sub_junk(1)
elif matches[0] < 0 or matches[1] == TOKEN_TRLR:
self.backup()
return
else:
self.barf(1)
def map_gid_empty(self,gid):
return gid
def map_gid_not_empty(self,gid):
if self.idswap.get(gid):
return self.idswap[gid]
else:
if self.db.id_trans.get(str(gid)):
self.idswap[gid] = self.db.find_next_person_gramps_id()
else:
self.idswap[gid] = gid
return self.idswap[gid]
def find_or_create_person(self,gramps_id):
person = RelLib.Person()
intid = self.gid2id.get(gramps_id)
if self.db.has_person_handle(intid):
person.unserialize(self.db.get_raw_person_data(intid))
else:
intid = self.find_person_handle(gramps_id)
person.set_handle(intid)
person.set_gramps_id(gramps_id)
return person
def find_person_handle(self,gramps_id):
intid = self.gid2id.get(gramps_id)
if not intid:
intid = create_id()
self.gid2id[gramps_id] = intid
return intid
def find_or_create_family(self,gramps_id):
family = RelLib.Family()
intid = self.fid2id.get(gramps_id)
if self.db.has_family_handle(intid):
family.unserialize(self.db.get_raw_family_data(intid))
else:
intid = self.find_family_handle(gramps_id)
family.set_handle(intid)
family.set_gramps_id(gramps_id)
return family
def find_or_create_repository(self,gramps_id):
repository = RelLib.Repository()
intid = self.rid2id.get(gramps_id)
if self.db.has_repository_handle(intid):
repository.unserialize(self.db.get_raw_repository_data(intid))
else:
intid = self.find_repository_handle(gramps_id)
repository.set_handle(intid)
repository.set_gramps_id(gramps_id)
return repository
def find_repository_handle(self,gramps_id):
intid = self.rid2id.get(gramps_id)
if not intid:
intid = create_id()
self.rid2id[gramps_id] = intid
return intid
def find_family_handle(self,gramps_id):
intid = self.fid2id.get(gramps_id)
if not intid:
intid = create_id()
self.fid2id[gramps_id] = intid
return intid
def find_or_create_source(self,gramps_id):
source = RelLib.Source()
intid = self.sid2id.get(gramps_id)
if self.db.has_source_handle(intid):
source.unserialize(self.db.get_raw_source_data(intid))
else:
intid = create_id()
source.set_handle(intid)
source.set_gramps_id(gramps_id)
self.db.add_source(source,self.trans)
self.sid2id[gramps_id] = intid
return source
def find_or_create_place(self,title):
place = RelLib.Place()
# check to see if we've encountered this name before
# if we haven't we need to get a new GRAMPS ID
intid = self.lid2id.get(title)
if intid == None:
new_id = self.db.find_next_place_gramps_id()
else:
new_id = None
# check to see if the name already existed in the database
# if it does, create a new name by appending the GRAMPS ID.
# generate a GRAMPS ID if needed
if title in self.place_names:
if not new_id:
new_id = self.db.find_next_place_gramps_id()
pname = "%s [%s]" % (title,new_id)
else:
pname = title
if self.db.has_place_handle(intid):
place.unserialize(self.db.get_raw_place_data(intid))
else:
intid = create_id()
place.set_handle(intid)
place.set_title(pname)
load_place_values(place,pname)
place.set_gramps_id(new_id)
self.db.add_place(place,self.trans)
self.lid2id[title] = intid
return place
def parse_cause(self,event,level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_SOUR:
event.add_source_reference(self.handle_source(matches,level+1))
else:
self.barf(1)
def parse_repo_caln(self, matches, repo, level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_CALN:
repo.set_call_number(matches[2])
#self.parse_repo_caln(matches, repo. level+1)
elif matches[1] == TOKEN_NOTE:
repo.set_note(matches[2])
else:
self.barf(1)
def parse_repo_ref(self, matches, repo_ref, level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_CALN:
repo_ref.set_call_number(matches[2])
self.parse_repo_ref_caln(repo_ref, level+1)
elif matches[1] == TOKEN_NOTE:
note = self.parse_note(matches,repo_ref,level+1,"")
repo_ref.set_note(note)
else:
self.barf(1)
def parse_repo_ref_caln(self, reporef, level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_MEDI:
media_type = _ConstXML.tuple_from_xml(
_ConstXML.source_media_types,matches[2])
reporef.set_media_type(media_type)
else:
self.barf(1)
def parse_note_data(self,level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] in (TOKEN_SOUR,TOKEN_CHAN,TOKEN_REFN):
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_RIN:
pass
else:
self.barf(level+1)
def parse_ftw_relations(self,level):
mrel = RelLib.ChildRefType()
frel = RelLib.ChildRefType()
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return (mrel,frel)
# FTW
elif matches[1] == TOKEN__FREL:
frel = pedi_type.get(matches[2].lower(),_TYPE_BIRTH)
# FTW
elif matches[1] == TOKEN__MREL:
mrel = pedi_type.get(matches[2].lower(),_TYPE_BIRTH)
elif matches[1] == TOKEN_ADOP:
mrel = _TYPE_ADOPT
frel = _TYPE_ADOPT
# Legacy
elif matches[1] == TOKEN__STAT:
mrel = _TYPE_BIRTH
frel = _TYPE_BIRTH
# Legacy _PREF
elif matches[1][0] == TOKEN_UNKNOWN:
pass
else:
self.barf(level+1)
return None
def parse_family(self):
self.addr = None
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < 1:
self.backup()
return
elif matches[1] == TOKEN_HUSB:
gid = matches[2]
handle = self.find_person_handle(self.map_gid(gid[1:-1]))
self.family.set_father_handle(handle)
self.ignore_sub_junk(2)
elif matches[1] == TOKEN_WIFE:
gid = matches[2]
handle = self.find_person_handle(self.map_gid(gid[1:-1]))
self.family.set_mother_handle(handle)
self.ignore_sub_junk(2)
elif matches[1] == TOKEN_SLGS:
lds_ord = RelLib.LdsOrd()
lds_ord.set_type(RelLib.LdsOrd.SEAL_TO_SPOUSE)
self.family.lds_ord_list.append(lds_ord)
self.parse_ord(lds_ord,2)
elif matches[1] == TOKEN_ADDR:
self.addr = RelLib.Address()
self.addr.set_street(matches[2])
self.parse_address(self.addr,2)
elif matches[1] == TOKEN_CHIL:
mrel,frel = self.parse_ftw_relations(2)
gid = matches[2]
child = self.find_or_create_person(self.map_gid(gid[1:-1]))
ref = RelLib.ChildRef()
ref.ref = child.handle
ref.set_father_relation(frel)
ref.set_mother_relation(mrel)
self.family.add_child_ref(ref)
elif matches[1] == TOKEN_NCHI:
a = RelLib.Attribute()
a.set_type((RelLib.Attribute.NUM_CHILD,''))
a.set_value(matches[2])
self.family.add_attribute(a)
elif matches[1] == TOKEN_SOUR:
source_ref = self.handle_source(matches,2)
self.family.add_source_reference(source_ref)
elif matches[1] in (TOKEN_RIN, TOKEN_SUBM, TOKEN_REFN,TOKEN_CHAN):
self.ignore_sub_junk(2)
elif matches[1] == TOKEN_OBJE:
if matches[2] and matches[2][0] == '@':
self.barf(2)
else:
self.parse_family_object(2)
elif matches[1] == TOKEN__COMM:
note = matches[2]
self.family.set_note(note)
self.ignore_sub_junk(2)
elif matches[1] == TOKEN_NOTE:
note = self.parse_note(matches,self.family,1,note)
else:
event = RelLib.Event()
event.set_gramps_id(self.emapper.find_next())
try:
event.set_type((ged2fam[matches[3]],''))
except:
if ged2fam_custom.has_key(matches[3]):
event.set_type((RelLib.Event.CUSTOM,ged2fam_custom[matches[3]]))
elif matches[3]:
event.set_type((RelLib.Event.CUSTOM,matches[3]))
else:
event.set_type((RelLib.Event.UNKNOWN,''))
if matches[2] and not event.get_description():
event.set_description(matches[2])
self.parse_family_event(event,2)
if event.get_type()[0] == RelLib.Event.MARRIAGE:
self.family.set_relationship((RelLib.Family.MARRIED,''))
if event.get_type()[0] != RelLib.Event.CUSTOM:
if not event.get_description():
text = _event_family_str % {
'event_name' : Utils.family_events[event.get_type()[0]],
'family' : Utils.family_name(self.family,self.db),
}
event.set_description(text)
self.db.add_event(event,self.trans)
event_ref = RelLib.EventRef()
event_ref.set_reference_handle(event.handle)
event_ref.set_role((RelLib.EventRef.PRIMARY,''))
self.family.add_event_ref(event_ref)
del event
def parse_note_base(self,matches,obj,level,old_note,task):
note = old_note
if matches[2] and matches[2][0] == "@": # reference to a named note defined elsewhere
note_obj = self.note_map.get(matches[2])
if note_obj:
return note_obj.get()
else:
return u""
else:
if old_note:
note = u"%s\n%s" % (old_note,matches[2])
else:
note = matches[2]
if type(note) != unicode:
print type(note),type(matches[2])
task(note)
self.ignore_sub_junk(level+1)
return note
def parse_note(self,matches,obj,level,old_note):
return self.parse_note_base(matches,obj,level,old_note,obj.set_note)
def parse_comment(self,matches,obj,level,old_note):
return self.parse_note_base(matches,obj,level,old_note,obj.set_note)
def parse_individual(self,person):
state = CurrentState()
state.person = person
while True:
matches = self.get_next()
if int(matches[0]) < 1:
self.backup()
if state.get_text():
state.person.set_note(state.get_text())
return
else:
func = self.person_func.get(matches[1],self.func_person_event)
func(matches,state)
def parse_optional_note(self,level):
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return note
elif matches[1] == TOKEN_NOTE:
if not matches[2].strip() or matches[2] and matches[2][0] != "@":
note = matches[2]
self.parse_note_data(level+1)
else:
self.ignore_sub_junk(level+1)
else:
self.barf(level+1)
return None
def parse_famc_type(self,level,person):
ftype = _TYPE_BIRTH
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return (ftype,note)
elif matches[1] == TOKEN_PEDI:
ftype = pedi_type.get(matches[2],RelLib.Person.UNKNOWN)
elif matches[1] == TOKEN_SOUR:
source_ref = self.handle_source(matches,level+1)
person.primary_name.add_source_reference(source_ref)
elif matches[1] == TOKEN__PRIMARY:
pass #type = matches[1]
elif matches[1] == TOKEN_NOTE:
if not matches[2].strip() or matches[2] and matches[2][0] != "@":
note = matches[2]
self.parse_note_data(level+1)
else:
self.ignore_sub_junk(level+1)
else:
self.barf(level+1)
return None
def parse_person_object(self,level,state):
form = ""
filename = ""
title = "no title"
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
break
elif matches[1] == TOKEN_FORM:
form = matches[2].lower()
elif matches[1] == TOKEN_TITL:
title = matches[2]
elif matches[1] == TOKEN_FILE:
filename = matches[2]
elif matches[1] == TOKEN_NOTE:
note = matches[2]
elif matches[1] == TOKEN_UNKNOWN:
self.ignore_sub_junk(level+1)
else:
self.barf(level+1)
if form == "url":
url = RelLib.Url()
url.set_path(filename)
url.set_description(title)
state.person.add_url(url)
else:
(ok,path) = self.find_file(filename,self.dir_path)
if not ok:
self.warn(_("Could not import %s") % filename)
path = filename.replace('\\','/')
photo_handle = self.media_map.get(path)
if photo_handle == None:
photo = RelLib.MediaObject()
photo.set_path(path)
photo.set_description(title)
photo.set_mime_type(Mime.get_type(os.path.abspath(path)))
self.db.add_object(photo, self.trans)
self.media_map[path] = photo.handle
else:
photo = self.db.get_object_from_handle(photo_handle)
oref = RelLib.MediaRef()
oref.set_reference_handle(photo.handle)
oref.set_note(note)
state.person.add_media_reference(oref)
def parse_family_object(self,level):
form = ""
filename = ""
title = ""
note = ""
while True:
matches = self.get_next()
if matches[1] == TOKEN_FORM:
form = matches[2].lower()
elif matches[1] == TOKEN_TITL:
title = matches[2]
elif matches[1] == TOKEN_FILE:
filename = matches[2]
elif matches[1] == TOKEN_NOTE:
note = matches[2]
elif int(matches[0]) < level:
self.backup()
break
else:
self.barf(level+1)
if form:
(ok,path) = self.find_file(filename,self.dir_path)
if not ok:
self.warn(_("Could not import %s") % filename)
path = filename.replace('\\','/')
photo_handle = self.media_map.get(path)
if photo_handle == None:
photo = RelLib.MediaObject()
photo.set_path(path)
photo.set_description(title)
photo.set_mime_type(Mime.get_type(os.path.abspath(path)))
self.db.add_object(photo, self.trans)
self.media_map[path] = photo.handle
else:
photo = self.db.get_object_from_handle(photo_handle)
oref = RelLib.MediaRef()
oref.set_reference_handle(photo.handle)
oref.set_note(note)
self.family.add_media_reference(oref)
def parse_residence(self,address,level):
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_DATE:
address.set_date_object(self.extract_date(matches[2]))
elif matches[1] == TOKEN_ADDR:
address.set_street(matches[2])
self.parse_address(address,level+1)
elif matches[1] in (TOKEN_IGNORE,TOKEN_CAUS,TOKEN_STAT,
TOKEN_TEMP,TOKEN_OBJE,TOKEN_TYPE):
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_SOUR:
address.add_source_reference(self.handle_source(matches,level+1))
elif matches[1] == TOKEN_PLAC:
address.set_street(matches[2])
self.parse_address(address,level+1)
elif matches[1] == TOKEN_PHON:
address.set_street("Unknown")
address.set_phone(matches[2])
elif matches[1] == TOKEN_NOTE:
note = self.parse_note(matches,address,level+1,note)
else:
self.barf(level+1)
def parse_address(self,address,level):
first = 0
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
if matches[1] == TOKEN_PHON:
address.set_phone(matches[2])
else:
self.backup()
return
elif matches[1] in (TOKEN_ADDR, TOKEN_ADR1, TOKEN_ADR2):
val = address.get_street()
if first == 0:
val = matches[2]
first = 1
else:
val = "%s,%s" % (val,matches[2])
address.set_street(val)
elif matches[1] == TOKEN_CITY:
address.set_city(matches[2])
elif matches[1] == TOKEN_STAE:
address.set_state(matches[2])
elif matches[1] == TOKEN_POST:
address.set_postal_code(matches[2])
elif matches[1] == TOKEN_CTRY:
address.set_country(matches[2])
elif matches[1] == TOKEN_PHON:
address.set_phone(matches[2])
elif matches[1] == TOKEN_NOTE:
note = self.parse_note(matches,address,level+1,note)
elif matches[1] == TOKEN__LOC:
pass # ignore unsupported extended location syntax
elif matches[1] == TOKEN__NAME:
pass # ignore
else:
self.barf(level+1)
def parse_ord(self,lds_ord,level):
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
break
elif matches[1] == TOKEN_TEMP:
value = self.extract_temple(matches)
if value:
lds_ord.set_temple(value)
elif matches[1] == TOKEN_DATE:
lds_ord.set_date_object(self.extract_date(matches[2]))
elif matches[1] == TOKEN_FAMC:
lds_ord.set_family_handle(self.find_family_handle(matches[2][1:-1]))
elif matches[1] == TOKEN_PLAC:
try:
place = self.find_or_create_place(matches[2])
place.set_title(matches[2])
load_place_values(place,matches[2])
place_handle = place.handle
lds_ord.set_place_handle(place_handle)
self.ignore_sub_junk(level+1)
except NameError:
pass
elif matches[1] == TOKEN_SOUR:
lds_ord.add_source_reference(self.handle_source(matches,level+1))
elif matches[1] == TOKEN_NOTE:
note = self.parse_note(matches,lds_ord,level+1,note)
elif matches[1] == TOKEN_STAT:
lds_ord.set_status(lds_status.get(matches[2],RelLib.LdsOrd.STATUS_NONE))
else:
self.barf(level+1)
def parse_person_event(self,event,level):
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
if note:
event.set_note(note)
self.backup()
break
elif matches[1] == TOKEN_TYPE:
if event.get_type() == (RelLib.Event.CUSTOM,""):
if ged2gramps.has_key(matches[2]):
name = (ged2gramps[matches[2]],'')
else:
val = self.gedsource.tag2gramps(matches[2])
if val:
name = (RelLib.Event.CUSTOM,val)
else:
name = (RelLib.Event.CUSTOM,matches[3])
event.set_type(name)
else:
event.set_description(matches[2])
elif matches[1] == TOKEN__PRIV and matches[2] == "Y":
event.set_privacy(True)
elif matches[1] == TOKEN_DATE:
event.set_date_object(self.extract_date(matches[2]))
elif matches[1] == TOKEN_SOUR:
event.add_source_reference(self.handle_source(matches,level+1))
elif matches[1] == TOKEN_PLAC:
val = matches[2]
n = event.get_type()
if self.is_ftw and n[0] in [RelLib.Event.OCCUPATION,RelLib.Event.DEGREE]:
event.set_description(val)
self.ignore_sub_junk(level+1)
else:
place = self.find_or_create_place(val)
place_handle = place.handle
place.set_title(matches[2])
load_place_values(place,matches[2])
event.set_place_handle(place_handle)
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_CAUS:
info = matches[2]
event.set_cause(info)
self.parse_cause(event,level+1)
elif matches[1] in (TOKEN_NOTE,TOKEN_OFFI):
info = matches[2]
if note == "":
note = info
else:
note = "\n%s" % info
elif matches[1] in (TOKEN__GODP, TOKEN__WITN, TOKEN__WTN):
if matches[2][0] == "@":
witness_handle = self.find_person_handle(self.map_gid(matches[2][1:-1]))
witness = RelLib.Witness(RelLib.Event.ID,witness_handle)
else:
witness = RelLib.Witness(RelLib.Event.NAME,matches[2])
event.add_witness(witness)
self.ignore_sub_junk(level+1)
elif matches[1] in (TOKEN_RELI, TOKEN_TIME, TOKEN_ADDR,TOKEN_IGNORE,
TOKEN_STAT,TOKEN_TEMP,TOKEN_OBJE):
self.ignore_sub_junk(level+1)
else:
self.barf(level+1)
def parse_adopt_event(self,event,level):
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
if note != "":
event.set_note(note)
self.backup()
break
elif matches[1] == TOKEN_DATE:
event.set_date_object(self.extract_date(matches[2]))
elif matches[1] in (TOKEN_TIME,TOKEN_ADDR,TOKEN_IGNORE,
TOKEN_STAT,TOKEN_TEMP,TOKEN_OBJE):
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_SOUR:
event.add_source_reference(self.handle_source(matches,level+1))
elif matches[1] == TOKEN_FAMC:
handle = self.find_family_handle(matches[2][1:-1])
mrel,frel = self.parse_adopt_famc(level+1);
if self.person.get_main_parents_family_handle() == handle:
self.person.set_main_parent_family_handle(None)
self.person.add_parent_family_handle(handle)
if mrel != RelLib.ChildRefType.BIRTH or frel != RelLib.ChildRefType.BIRTH:
print "NOT FIXED YET"
elif matches[1] == TOKEN_PLAC:
val = matches[2]
place = self.find_or_create_place(val)
place_handle = place.handle
place.set_title(matches[2])
load_place_values(place,matches[2])
event.set_place_handle(place_handle)
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_TYPE:
# eventually do something intelligent here
pass
elif matches[1] == TOKEN_CAUS:
info = matches[2]
event.set_cause(info)
self.parse_cause(event,level+1)
elif matches[1] == TOKEN_NOTE:
info = matches[2]
if note == "":
note = info
else:
note = "\n%s" % info
else:
self.barf(level+1)
def parse_adopt_famc(self,level):
mrel = _TYPE_ADOPT
frel = _TYPE_ADOPT
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return (mrel,frel)
elif matches[1] == TOKEN_ADOP:
if matches[2] == "HUSB":
mrel = _TYPE_BIRTH
elif matches[2] == "WIFE":
frel = _TYPE_BIRTH
else:
self.barf(level+1)
return None
def parse_person_attr(self,attr,level):
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
break
elif matches[1] == TOKEN_TYPE:
if attr.get_type() == "":
if ged2gramps.has_key(matches[2]):
name = ged2gramps[matches[2]]
else:
val = self.gedsource.tag2gramps(matches[2])
if val:
name = val
else:
name = matches[2]
attr.set_name(name)
elif matches[1] in (TOKEN_CAUS,TOKEN_DATE,TOKEN_TIME,TOKEN_ADDR,
TOKEN_IGNORE,TOKEN_STAT,TOKEN_TEMP,TOKEN_OBJE):
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_SOUR:
attr.add_source_reference(self.handle_source(matches,level+1))
elif matches[1] == TOKEN_PLAC:
val = matches[2]
if attr.get_value() == "":
attr.set_value(val)
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_DATE:
note = "%s\n\n" % ("Date : %s" % matches[2])
elif matches[1] == TOKEN_NOTE:
info = matches[2]
if note == "":
note = info
else:
note = "%s\n\n%s" % (note,info)
else:
self.barf(level+1)
if note != "":
attr.set_note(note)
def parse_family_event(self,event,level):
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
if note:
event.set_note(note)
self.backup()
break
elif matches[1] == TOKEN_TYPE:
etype = event.get_type()
if etype[0] == RelLib.Event.CUSTOM:
event.set_type((RelLib.Event.CUSTOM,matches[2]))
else:
note = 'Status = %s\n' % matches[2]
elif matches[1] == TOKEN_DATE:
event.set_date_object(self.extract_date(matches[2]))
elif matches[1] == TOKEN_CAUS:
info = matches[2]
event.set_cause(info)
self.parse_cause(event,level+1)
elif matches[1] in (TOKEN_TIME,TOKEN_IGNORE,TOKEN_ADDR,TOKEN_STAT,
TOKEN_TEMP,TOKEN_HUSB,TOKEN_WIFE,TOKEN_OBJE):
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_SOUR:
event.add_source_reference(self.handle_source(matches,level+1))
elif matches[1] == TOKEN_PLAC:
val = matches[2]
place = self.find_or_create_place(val)
place_handle = place.handle
place.set_title(matches[2])
load_place_values(place,matches[2])
event.set_place_handle(place_handle)
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_OFFI:
if note == "":
note = matches[2]
else:
note = note + "\n" + matches[2]
elif matches[1] == TOKEN_NOTE:
note = self.parse_note(matches,event,level+1,note)
elif matches[1] in (TOKEN__WITN, TOKEN__WTN):
if matches[2][0] == "@":
witness_handle = self.find_person_handle(self.map_gid(matches[2][1:-1]))
witness = RelLib.Witness(RelLib.Event.ID,witness_handle)
else:
witness = RelLib.Witness(RelLib.Event.NAME,matches[2])
event.add_witness(witness)
self.ignore_sub_junk(level+1)
else:
self.barf(level+1)
def parse_source_reference(self,source,level):
"""Reads the data associated with a SOUR reference"""
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
source.set_note(note)
self.backup()
return
elif matches[1] == TOKEN_PAGE:
source.set_page(matches[2])
elif matches[1] == TOKEN_DATE:
source.set_date_object(self.extract_date(matches[2]))
elif matches[1] == TOKEN_DATA:
date,text = self.parse_source_data(level+1)
if date:
d = self.dp.parse(date)
source.set_date_object(d)
source.set_text(text)
elif matches[1] in (TOKEN_OBJE,TOKEN_REFN):
self.ignore_sub_junk(level+1)
elif matches[1] == TOKEN_QUAY:
try:
val = int(matches[2])
except ValueError:
return
if val > 1:
source.set_confidence_level(val+1)
else:
source.set_confidence_level(val)
elif matches[1] in (TOKEN_NOTE,TOKEN_TEXT):
note = self.parse_comment(matches,source,level+1,note)
else:
self.barf(level+1)
def parse_source_data(self,level):
"""Parses the source data"""
date = ""
note = ""
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return (date,note)
elif matches[1] == TOKEN_DATE:
date = matches[2]
elif matches[1] == TOKEN_TEXT:
note = matches[2]
else:
self.barf(level+1)
return None
def parse_marnm(self,person,text):
data = text.split()
if len(data) == 1:
name = RelLib.Name(person.primary_name)
name.set_surname(data[0])
name.set_type((RelLib.Name.MARRIED,''))
person.add_alternate_name(name)
elif len(data) > 1:
name = RelLib.Name()
name.set_surname(data[-1])
name.set_first_name(' '.join(data[0:-1]))
name.set_type((RelLib.Name.MARRIED,''))
person.add_alternate_name(name)
def parse_header_head(self):
"""validiates that this is a valid GEDCOM file"""
line = self.lexer.read()
if line[1] != TOKEN_HEAD:
raise Errors.GedcomError("%s is not a GEDCOM file" % self.filename)
def parse_header_source(self):
genby = ""
while True:
matches = self.get_next()
if int(matches[0]) < 1:
self.backup()
return
elif matches[1] == TOKEN_SOUR:
self.gedsource = self.gedmap.get_from_source_tag(matches[2])
self.lexer.set_broken_conc(self.gedsource.get_conc())
if matches[2] == "FTW":
self.is_ftw = 1
genby = matches[2]
elif matches[1] == TOKEN_NAME:
pass
elif matches[1] == TOKEN_VERS:
self.def_src.set_data_item('Generated by',"%s %s" %
(genby,matches[2]))
pass
elif matches[1] == TOKEN_FILE:
filename = os.path.basename(matches[2]).split('\\')[-1]
self.def_src.set_title(_("Import from %s") % filename)
elif matches[1] == TOKEN_COPR:
self.def_src.set_publication_info(matches[2])
elif matches[1] == TOKEN_SUBM:
self.parse_subm(1)
elif matches[1] in (TOKEN_CORP,TOKEN_DATA,TOKEN_SUBN,TOKEN_LANG):
self.ignore_sub_junk(2)
elif matches[1] == TOKEN_DEST:
if genby == "GRAMPS":
self.gedsource = self.gedmap.get_from_source_tag(matches[2])
self.lexer.set_broken_conc(self.gedsource.get_conc())
elif matches[1] == TOKEN_CHAR and not self.override:
if matches[2] == "ANSEL":
self.lexer.set_charset_fn(ansel_to_utf8)
elif matches[2] not in ("UNICODE","UTF-8","UTF8"):
self.lexer.set_charset_fn(latin_to_utf8)
self.ignore_sub_junk(2)
elif matches[1] == TOKEN_GEDC:
self.ignore_sub_junk(2)
elif matches[1] == TOKEN__SCHEMA:
self.parse_ftw_schema(2)
elif matches[1] == TOKEN_PLAC:
self.parse_place_form(2)
elif matches[1] == TOKEN_DATE:
date = self.parse_date(2)
date.date = matches[2]
self.def_src.set_data_item('Creation date',matches[2])
elif matches[1] == TOKEN_NOTE:
note = matches[2]
elif matches[1] == TOKEN_UNKNOWN:
self.ignore_sub_junk(2)
else:
self.barf(2)
def parse_subm(self, level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_NAME:
self.def_src.set_author(matches[2])
else:
self.ignore_sub_junk(2)
def parse_ftw_schema(self,level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_INDI:
self.parse_ftw_indi_schema(level+1)
elif matches[1] == TOKEN_FAM:
self.parse_ftw_fam_schema(level+1)
else:
self.barf(2)
def parse_ftw_indi_schema(self,level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
else:
label = self.parse_label(level+1)
ged2gramps[matches[1]] = label
def parse_label(self,level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_LABL:
return matches[2]
else:
self.barf(2)
return None
def parse_ftw_fam_schema(self,level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
else:
label = self.parse_label(level+1)
ged2fam_custom[matches[3]] = label
return None
def ignore_sub_junk(self,level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
return
def ignore_change_data(self,level):
matches = self.get_next()
if matches[1] == TOKEN_CHAN:
self.ignore_sub_junk(level+1)
else:
self.backup()
def parse_place_form(self,level):
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return
elif matches[1] == TOKEN_FORM:
for item in matches[2].split(','):
item = item.lower().strip()
fcn = _place_match.get(item,_empty_func)
_place_field.append(fcn)
else:
self.barf(level+1)
def parse_date(self,level):
date = DateStruct()
while True:
matches = self.get_next()
if int(matches[0]) < level:
self.backup()
return date
elif matches[1] == TOKEN_TIME:
date.time = matches[2]
else:
self.barf(level+1)
return None
def extract_date(self,text):
dateobj = RelLib.Date()
try:
match = rangeRegexp.match(text)
if match:
(cal1,data1,cal2,data2) = match.groups()
if cal1 != cal2:
pass
if cal1 == "FRENCH R":
cal = RelLib.Date.CAL_FRENCH
elif cal1 == "JULIAN":
cal = RelLib.Date.CAL_JULIAN
elif cal1 == "HEBREW":
cal = RelLib.Date.CAL_HEBREW
else:
cal = RelLib.Date.CAL_GREGORIAN
start = self.dp.parse(data1)
stop = self.dp.parse(data2)
dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_RANGE, cal,
start.get_start_date() + stop.get_start_date())
return dateobj
match = spanRegexp.match(text)
if match:
(cal1,data1,cal2,data2) = match.groups()
if cal1 != cal2:
pass
if cal1 == "FRENCH R":
cal = RelLib.Date.CAL_FRENCH
elif cal1 == "JULIAN":
cal = RelLib.Date.CAL_JULIAN
elif cal1 == "HEBREW":
cal = RelLib.Date.CAL_HEBREW
else:
cal = RelLib.Date.CAL_GREGORIAN
start = self.dp.parse(data1)
stop = self.dp.parse(data2)
dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_SPAN, cal,
start.get_start_date() + stop.get_start_date())
return dateobj
match = calRegexp.match(text)
if match:
(abt,cal,data) = match.groups()
dateobj = self.dp.parse("%s %s" % (abt, data))
if cal == "FRENCH R":
dateobj.set_calendar(RelLib.Date.CAL_FRENCH)
elif cal == "JULIAN":
dateobj.set_calendar(RelLib.Date.CAL_JULIAN)
elif cal == "HEBREW":
dateobj.set_calendar(RelLib.Date.CAL_HEBREW)
return dateobj
else:
dval = self.dp.parse(text)
return dval
except IOError:
return self.dp.set_text(text)
def handle_source(self,matches,level):
source_ref = RelLib.SourceRef()
if matches[2] and matches[2][0] != "@":
title = matches[2]
note = ''
handle = self.inline_srcs.get((title,note),Utils.create_id())
self.inline_srcs[(title,note)] = handle
self.ignore_sub_junk(level+1)
else:
handle = self.find_or_create_source(matches[2][1:-1]).handle
self.parse_source_reference(source_ref,level)
source_ref.set_base_handle(handle)
return source_ref
def resolve_refns(self):
return
prefix = self.db.iprefix
index = 0
new_pmax = self.db.pmap_index
for pid in self.added:
index = index + 1
if self.refn.has_key(pid):
val = self.refn[pid]
new_key = prefix % val
new_pmax = max(new_pmax,val)
person = self.db.get_person_from_handle(pid,self.trans)
# new ID is not used
if not self.db.has_person_handle(new_key):
self.db.remove_person(pid,self.trans)
person.set_handle(new_key)
person.set_gramps_id(new_key)
self.db.add_person(person,self.trans)
else:
tp = self.db.get_person_from_handle(new_key,self.trans)
# same person, just change it
if person == tp:
self.db.remove_person(pid,self.trans)
person.set_handle(new_key)
person.set_gramps_id(new_key)
self.db.add_person(person,self.trans)
# give up trying to use the refn as a key
else:
pass
self.db.pmap_index = new_pmax
def invert_year(self,subdate):
return (subdate[0],subdate[1],-subdate[2],subdate[3])
#--------------------------------------------------------------------
#
#
#
#--------------------------------------------------------------------
def func_person_name(self,matches,state):
name = RelLib.Name()
m = snameRegexp.match(matches[2])
if m:
(n,n2) = m.groups()
names = (n2,'',n,'','')
else:
try:
names = nameRegexp.match(matches[2]).groups()
except:
names = (matches[2],"","","","")
if names[0]:
name.set_first_name(names[0].strip())
if names[2]:
name.set_surname(names[2].strip())
if names[4]:
name.set_suffix(names[4].strip())
if state.name_cnt == 0:
state.person.set_primary_name(name)
else:
state.person.add_alternate_name(name)
state.name_cnt += 1
self.parse_name(name,2,state)
def func_person_asso(self, matches, state):
print matches
gid = matches[2]
handle = self.find_person_handle(self.map_gid(gid[1:-1]))
ref = RelLib.PersonRef()
print handle
ref.ref = handle
self.person.add_person_ref(ref)
while True:
matches = self.get_next()
if int(matches[0]) < 2:
self.backup()
return
elif matches[1] == TOKEN_RELA:
ref.rel = matches[2]
elif matches[1] == TOKEN_SOUR:
ref.add_source_reference(self.handle_source(matches,2))
elif matches[1] == TOKEN_NOTE:
note = self.parse_note(matches,ref,2,"")
ref.set_note(note)
else:
self.barf(2)
def func_person_alt_name(self,matches,state):
aka = RelLib.Name()
try:
names = nameRegexp.match(matches[2]).groups()
except:
names = (matches[2],"","","","")
if names[0]:
aka.set_first_name(names[0])
if names[2]:
aka.set_surname(names[2])
if names[4]:
aka.set_suffix(names[4])
state.person.add_alternate_name(aka)
def func_person_object(self,matches,state):
if matches[2] and matches[2][0] == '@':
self.barf(2)
else:
self.parse_person_object(2,state)
def func_person_note(self,matches,state):
self.note = self.parse_note(matches,self.person,1,state.note)
def func_person_sex(self,matches,state):
if matches[2] == '':
state.person.set_gender(RelLib.Person.UNKNOWN)
elif matches[2][0] == "M":
state.person.set_gender(RelLib.Person.MALE)
elif matches[2][0] == "F":
state.person.set_gender(RelLib.Person.FEMALE)
else:
state.person.set_gender(RelLib.Person.UNKNOWN)
def func_person_bapl(self,matches,state):
lds_ord = RelLib.LdsOrd()
lds_ord.set_type(RelLib.LdsOrd.BAPTISM)
state.person.lds_ord_list.append(lds_ord)
self.parse_ord(lds_ord,2)
def func_person_endl(self,matches,state):
lds_ord = RelLib.LdsOrd()
lds_ord.set_type(RelLib.LdsOrd.ENDOWMENT)
state.person.lds_ord_list.append(lds_ord)
self.parse_ord(lds_ord,2)
def func_person_slgc(self,matches,state):
lds_ord = RelLib.LdsOrd()
lds_ord.set_type(RelLib.LdsOrd.SEAL_TO_PARENTS)
state.person.lds_ord_list.append(lds_ord)
self.parse_ord(lds_ord,2)
def func_person_fams(self,matches,state):
handle = self.find_family_handle(matches[2][1:-1])
state.person.add_family_handle(handle)
state.add_to_note(self.parse_optional_note(2))
def func_person_famc(self,matches,state):
ftype,note = self.parse_famc_type(2,state.person)
handle = self.find_family_handle(matches[2][1:-1])
for f in self.person.get_parent_family_handle_list():
if f[0] == handle:
break
else:
if int(ftype) in rel_types:
state.person.add_parent_family_handle(handle)
else:
if state.person.get_main_parents_family_handle() == handle:
state.person.set_main_parent_family_handle(None)
state.person.add_parent_family_handle(handle)
print "NEED TO CHANGE CHILDREF TO",ftype,ftype
def func_person_resi(self,matches,state):
addr = RelLib.Address()
state.person.add_address(addr)
self.parse_residence(addr,2)
def func_person_addr(self,matches,state):
addr = RelLib.Address()
addr.set_street(matches[2])
self.parse_address(addr,2)
state.person.add_address(addr)
def func_person_phon(self,matches,state):
addr = RelLib.Address()
addr.set_street("Unknown")
addr.set_phone(matches[2])
state.person.add_address(addr)
def func_person_birt(self,matches,state):
event = RelLib.Event()
event.set_gramps_id(self.emapper.find_next())
if matches[2]:
event.set_description(matches[2])
event.set_type((RelLib.Event.BIRTH,""))
self.parse_person_event(event,2)
person_event_name(event,state.person)
self.db.add_event(event, self.trans)
event_ref = RelLib.EventRef()
event_ref.set_reference_handle(event.handle)
event_ref.set_role((RelLib.EventRef.PRIMARY,''))
if state.person.get_birth_ref():
state.person.add_event_ref(event_ref)
else:
state.person.set_birth_ref(event_ref)
def func_person_adop(self,matches,state):
event = RelLib.Event()
event.set_gramps_id(self.emapper.find_next())
event.set_type((RelLib.Event.ADOPT,''))
self.parse_adopt_event(event,2)
person_event_name(event,state.person)
self.db.add_event(event, self.trans)
event_ref = RelLib.EventRef()
event_ref.set_reference_handle(event.handle)
event_ref.set_role((RelLib.EventRef.PRIMARY,''))
state.person.add_event_ref(event_ref)
def func_person_deat(self,matches,state):
event = RelLib.Event()
event.set_gramps_id(self.emapper.find_next())
if matches[2]:
event.set_description(matches[2])
event.set_type((RelLib.Event.DEATH,""))
self.parse_person_event(event,2)
person_event_name(event,state.person)
self.db.add_event(event, self.trans)
event_ref = RelLib.EventRef()
event_ref.set_reference_handle(event.handle)
event_ref.set_role((RelLib.EventRef.PRIMARY,''))
if state.person.get_death_ref():
state.person.add_event_ref(event_ref)
else:
state.person.set_death_ref(event_ref)
def func_person_even(self,matches,state):
event = RelLib.Event()
event.set_gramps_id(self.emapper.find_next())
if matches[2]:
event.set_description(matches[2])
self.parse_person_event(event,2)
(t,n) = event.get_type()
if t == RelLib.Event.CUSTOM and n in self.attrs:
attr = RelLib.Attribute()
attr.set_type((RelLib.Event.CUSTOM,self.gedattr[n]))
attr.set_value(event.get_description())
state.person.add_attribute(attr)
else:
self.db.add_event(event, self.trans)
event_ref = RelLib.EventRef()
event_ref.set_reference_handle(event.handle)
event_ref.set_role((RelLib.EventRef.PRIMARY,''))
state.person.add_event_ref(event_ref)
def func_person_sour(self,matches,state):
source_ref = self.handle_source(matches,2)
state.person.add_source_reference(source_ref)
def func_person_refn(self,matches,state):
if intRE.match(matches[2]):
try:
self.refn[self.person.handle] = int(matches[2])
except:
pass
def func_person_attr(self,matches,state):
attr = RelLib.Attribute()
n = matches[3]
atype = self.gedattr.get(n,RelLib.Attribute.CUSTOM)
if atype == RelLib.Attribute.CUSTOM:
attr.set_type((atype,n))
else:
attr.set_type((atype,''))
attr.set_value(matches[2])
state.person.add_attribute(attr)
def func_person_event(self,matches,state):
n = matches[3].strip()
if self.gedattr.has_key(n):
attr = RelLib.Attribute()
attr.set_type((self.gedattr[n],''))
attr.set_value(matches[2])
state.person.add_attribute(attr)
self.parse_person_attr(attr,2)
return
elif ged2gramps.has_key(n):
event = RelLib.Event()
event.set_gramps_id(self.emapper.find_next())
event.set_type((ged2gramps[n],''))
else:
event = RelLib.Event()
event.set_gramps_id(self.emapper.find_next())
val = self.gedsource.tag2gramps(n)
if val:
event.set_type((RelLib.Event.CUSTOM,val))
else:
event.set_type((RelLib.Event.CUSTOM,n))
self.parse_person_event(event,2)
if matches[2]:
event.set_description(matches[2])
person_event_name(event,state.person)
self.db.add_event(event, self.trans)
event_ref = RelLib.EventRef()
event_ref.set_reference_handle(event.handle)
event_ref.set_role((RelLib.EventRef.PRIMARY,''))
state.person.add_event_ref(event_ref)
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
def parse_name(self,name,level,state):
"""Parses the person's name information"""
sub_state = CurrentState()
sub_state.person = state.person
sub_state.name = name
sub_state.level = level
while True:
matches = self.get_next()
if int(matches[0]) < level:
name.set_note(sub_state.get_text())
self.backup()
return
else:
func = self.name_func.get(matches[1],self.func_name_undefined)
func(matches,sub_state)
def func_name_undefined(self,matches,state):
self.barf(state.level+1)
def func_name_note(self,matches,state):
state.add_to_note(self.parse_note(matches,state.name,
state.level+1,state.note))
def func_name_alia(self,matches,state):
aka = RelLib.Name()
try:
names = nameRegexp.match(matches[2]).groups()
except:
names = (matches[2],"","","","")
if names[0]:
aka.set_first_name(names[0])
if names[2]:
aka.set_surname(names[2])
if names[4]:
aka.set_suffix(names[4])
state.person.add_alternate_name(aka)
def func_name_npfx(self,matches,state):
state.name.set_title(matches[2])
def func_name_givn(self,matches,state):
state.name.set_first_name(matches[2])
def func_name_spfx(self,matches,state):
state.name.set_surname_prefix(matches[2])
def func_name_surn(self,matches,state):
state.name.set_surname(matches[2])
def func_name_marnm(self,matches,state):
self.parse_marnm(state.person,matches[2].strip())
def func_name_titl(self,matches,state):
state.name.set_suffix(matches[2])
def func_name_nsfx(self,matches,state):
if state.name.get_suffix() == "":
state.name.set_suffix(matches[2])
def func_name_nick(self,matches,state):
state.person.set_nick_name(matches[2])
def func_name_aka(self,matches,state):
lname = matches[2].split()
l = len(lname)
if l == 1:
state.person.set_nick_name(matches[2])
else:
name = RelLib.Name()
name.set_surname(lname[-1])
name.set_first_name(' '.join(lname[0:l-1]))
state.person.add_alternate_name(name)
def func_name_sour(self,matches,state):
sref = self.handle_source(matches,state.level+1)
state.name.add_source_reference(sref)
def parse_repository(self,repo):
state = CurrentState()
state.repo = repo
while True:
matches = self.get_next()
if int(matches[0]) < 1:
self.backup()
if state.get_text():
state.repo.set_note(state.get_text())
return
else:
func = self.repo_func.get(matches[1],self.skip_record)
func(matches,state)
def func_repo_name(self,matches,state):
state.repo.set_name(matches[2])
def func_repo_addr(self,matches,state):
addr = RelLib.Address()
matched = False
match = addr_re.match(matches[2])
if match:
groups = match.groups()
addr.set_street(groups[0].strip())
addr.set_city(groups[2].strip())
addr.set_state(groups[3].strip())
addr.set_postal_code(groups[4].strip())
addr.set_country(groups[5].strip())
matched = True
match = addr2_re.match(matches[2])
if match:
groups = match.groups()
addr.set_street(groups[0].strip())
addr.set_city(groups[2].strip())
addr.set_state(groups[3].strip())
addr.set_postal_code(groups[4].strip())
matched = True
match = addr3_re.match(matches[2])
if match:
groups = match.groups()
addr.set_street(groups[0].strip())
addr.set_city(groups[2].strip())
addr.set_state(groups[3].strip())
matched = True
if not matched:
addr.set_street(matches[2])
state.repo.add_address(addr)
def skip_record(self,matches,state):
self.ignore_sub_junk(2)
def extract_temple(self, matches):
def get_code(code):
if lds.temple_to_abrev.has_key(code):
return code
elif lds.temple_codes.has_key(code):
return lds.temple_codes[code]
c = get_code(matches[2])
if c: return c
## Not sure why we do this. Kind of ugly.
c = get_code(matches[2].split()[0])
if c: return c
## Okay we have no clue which temple this is.
## We should tell the user and store it anyway.
self.warn("Invalid temple code '%s'" % (matches[2],))
return matches[2]
def person_event_name(event,person):
if event.get_type()[0] != RelLib.Event.CUSTOM:
if not event.get_description():
text = _event_person_str % {
'event_name' : Utils.personal_events[event.get_type()[0]],
'person' : NameDisplay.displayer.display(person),
}
event.set_description(text)
def load_place_values(place,text):
items = text.split(',')
if len(items) != len(_place_field):
return
loc = place.get_main_location()
index = 0
for item in items:
_place_field[index](loc,item.strip())
index += 1
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
def create_id():
return Utils.create_id()
if __name__ == "__main__":
import sys
import hotshot#, hotshot.stats
from GrampsDb import gramps_db_factory, gramps_db_reader_factory
def callback(val):
print val
codeset = None
db_class = gramps_db_factory(const.app_gramps)
database = db_class()
database.load("test.grdb",lambda x: None, mode="w")
np = NoteParser(sys.argv[1],False)
g = GedcomParser(database,sys.argv[1],callback, codeset, np.get_map(),np.get_lines(),np.get_persons())
if False:
pr = hotshot.Profile('mystats.profile')
print "Start"
pr.runcall(g.parse_gedcom_file,False)
print "Finished"
pr.close()
## print "Loading profile"
## stats = hotshot.stats.load('mystats.profile')
## print "done"
## stats.strip_dirs()
## stats.sort_stats('time','calls')
## stats.print_stats(100)
else:
t = time.time()
g.parse_gedcom_file(False)
print time.time() - t