2007-02-08 11:39:46 +05:30
|
|
|
#
|
|
|
|
# Gramps - a GTK+/GNOME based genealogy program
|
|
|
|
#
|
|
|
|
# Copyright (C) 2000-2006 Donald N. Allingham
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
#
|
|
|
|
|
|
|
|
# $Id: _ReadGedcom.py 8032 2007-02-03 17:11:05Z hippy $
|
|
|
|
|
|
|
|
"Import from GEDCOM"
|
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
__revision__ = "$Revision: $"
|
|
|
|
__author__ = "Don Allingham"
|
|
|
|
|
2007-02-08 11:39:46 +05:30
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# standard python modules
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
2007-02-25 10:56:32 +05:30
|
|
|
|
2007-02-08 11:39:46 +05:30
|
|
|
import re
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# GRAMPS modules
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
|
|
|
|
from _GedcomInfo import *
|
2007-02-11 11:33:29 +05:30
|
|
|
from _GedcomTokens import *
|
2007-02-09 11:10:49 +05:30
|
|
|
import RelLib
|
|
|
|
from DateHandler._DateParser import DateParser
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-28 10:50:30 +05:30
|
|
|
#------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# Set up logging
|
|
|
|
#
|
|
|
|
#------------------------------------------------------------------------
|
|
|
|
import logging
|
|
|
|
LOG = logging.getLogger(".GedcomImport")
|
|
|
|
|
2007-02-08 11:39:46 +05:30
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
2007-02-25 10:56:32 +05:30
|
|
|
# constants #
|
2007-02-08 11:39:46 +05:30
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
GED2GRAMPS = {}
|
2007-02-08 11:39:46 +05:30
|
|
|
for _val in personalConstantEvents.keys():
|
|
|
|
_key = personalConstantEvents[_val]
|
|
|
|
if _key != "":
|
2007-02-25 10:56:32 +05:30
|
|
|
GED2GRAMPS[_key] = _val
|
2007-02-08 11:39:46 +05:30
|
|
|
|
|
|
|
for _val in familyConstantEvents.keys():
|
|
|
|
_key = familyConstantEvents[_val]
|
|
|
|
if _key != "":
|
2007-02-25 10:56:32 +05:30
|
|
|
GED2GRAMPS[_key] = _val
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
GED2ATTR = {}
|
2007-02-10 04:46:57 +05:30
|
|
|
for _val in personalConstantAttributes.keys():
|
|
|
|
_key = personalConstantAttributes[_val]
|
|
|
|
if _key != "":
|
2007-02-25 10:56:32 +05:30
|
|
|
GED2ATTR[_key] = _val
|
2007-02-10 04:46:57 +05:30
|
|
|
|
2007-02-08 11:39:46 +05:30
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# GedLine
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
MOD = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
|
|
|
|
CAL = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
|
|
|
|
RANGE = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
|
|
|
|
SPAN = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
CALENDAR_MAP = {
|
2007-02-08 11:39:46 +05:30
|
|
|
"FRENCH R" : RelLib.Date.CAL_FRENCH,
|
|
|
|
"JULIAN" : RelLib.Date.CAL_JULIAN,
|
|
|
|
"HEBREW" : RelLib.Date.CAL_HEBREW,
|
|
|
|
}
|
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
QUALITY_MAP = {
|
2007-02-08 11:39:46 +05:30
|
|
|
'CAL' : RelLib.Date.QUAL_CALCULATED,
|
|
|
|
'INT' : RelLib.Date.QUAL_CALCULATED,
|
|
|
|
'EST' : RelLib.Date.QUAL_ESTIMATED,
|
|
|
|
}
|
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
SEX_MAP = {
|
2007-02-08 11:39:46 +05:30
|
|
|
'F' : RelLib.Person.FEMALE,
|
|
|
|
'M' : RelLib.Person.MALE,
|
|
|
|
}
|
|
|
|
|
2007-02-09 11:10:49 +05:30
|
|
|
#-----------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# GedLine - represents a tokenized version of a GEDCOM line
|
|
|
|
#
|
|
|
|
#-----------------------------------------------------------------------
|
|
|
|
class GedcomDateParser(DateParser):
|
|
|
|
|
|
|
|
month_to_int = {
|
|
|
|
'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4,
|
|
|
|
'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8,
|
|
|
|
'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12,
|
|
|
|
}
|
|
|
|
|
|
|
|
#-----------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# GedLine - represents a tokenized version of a GEDCOM line
|
|
|
|
#
|
|
|
|
#-----------------------------------------------------------------------
|
2007-02-08 11:39:46 +05:30
|
|
|
class GedLine:
|
2007-02-09 11:10:49 +05:30
|
|
|
"""
|
|
|
|
GedLine is a class the represents a GEDCOM line. The form of a GEDCOM line
|
|
|
|
is:
|
|
|
|
|
|
|
|
<LEVEL> <TOKEN> <TEXT>
|
|
|
|
|
|
|
|
This gets parsed into
|
|
|
|
|
|
|
|
Line Number, Level, Token Value, Token Text, and Data
|
|
|
|
|
|
|
|
Data is dependent on the context the Token Value. For most of tokens, this is
|
|
|
|
just a text string. However, for certain tokens where we know the context, we
|
|
|
|
can provide some value. The current parsed tokens are:
|
|
|
|
|
|
|
|
TOKEN_DATE - RelLib.Date
|
|
|
|
TOKEN_SEX - RelLib.Person gender item
|
|
|
|
TOEKN_UKNOWN - Check to see if this is a known event
|
|
|
|
"""
|
2007-02-08 11:39:46 +05:30
|
|
|
|
|
|
|
def __init__(self, data):
|
2007-02-09 11:10:49 +05:30
|
|
|
"""
|
|
|
|
If the level is 0, then this is a top level instance. In this case, we may
|
|
|
|
find items in the form of:
|
|
|
|
|
|
|
|
<LEVEL> @ID@ <ITEM>
|
|
|
|
|
|
|
|
If this is not the top level, we check the MAP_DATA array to see if there is
|
|
|
|
a conversion function for the data.
|
|
|
|
"""
|
2007-02-08 11:39:46 +05:30
|
|
|
self.line = data[4]
|
|
|
|
self.level = data[0]
|
|
|
|
self.token = data[1]
|
2007-02-11 11:33:29 +05:30
|
|
|
self.token_text = data[3].strip()
|
2007-02-08 11:39:46 +05:30
|
|
|
self.data = data[2]
|
|
|
|
|
|
|
|
if self.level == 0:
|
2007-02-25 10:56:32 +05:30
|
|
|
if self.token_text and self.token_text[0] == '@' \
|
|
|
|
and self.token_text[-1] == '@':
|
2007-02-08 11:39:46 +05:30
|
|
|
self.token = TOKEN_ID
|
|
|
|
self.token_text = self.token_text[1:-1]
|
2007-02-09 11:10:49 +05:30
|
|
|
self.data = self.data.strip()
|
2007-02-08 11:39:46 +05:30
|
|
|
else:
|
2007-02-25 10:56:32 +05:30
|
|
|
func = MAP_DATA.get(self.token)
|
|
|
|
if func:
|
|
|
|
func(self)
|
2007-02-08 11:39:46 +05:30
|
|
|
|
|
|
|
def calc_sex(self):
|
2007-02-09 11:10:49 +05:30
|
|
|
"""
|
|
|
|
Converts the data field to a RelLib token indicating the gender
|
|
|
|
"""
|
2007-02-25 10:56:32 +05:30
|
|
|
self.data = SEX_MAP.get(self.data.strip(), RelLib.Person.UNKNOWN)
|
2007-02-08 11:39:46 +05:30
|
|
|
|
|
|
|
def calc_date(self):
|
2007-02-09 11:10:49 +05:30
|
|
|
"""
|
|
|
|
Converts the data field to a RelLib.Date object
|
|
|
|
"""
|
2007-02-08 11:39:46 +05:30
|
|
|
self.data = extract_date(self.data)
|
|
|
|
|
|
|
|
def calc_unknown(self):
|
2007-02-09 11:10:49 +05:30
|
|
|
"""
|
|
|
|
Checks to see if the token maps a known GEDCOM event. If so, we
|
|
|
|
change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and
|
|
|
|
the data is assigned to the associated GRAMPS EventType
|
|
|
|
"""
|
2007-02-25 10:56:32 +05:30
|
|
|
token = GED2GRAMPS.get(self.token_text)
|
2007-02-08 11:39:46 +05:30
|
|
|
if token:
|
2007-02-27 04:42:29 +05:30
|
|
|
event = RelLib.Event()
|
|
|
|
event.set_description(self.data)
|
|
|
|
event.set_type(token)
|
2007-02-08 11:39:46 +05:30
|
|
|
self.token = TOKEN_GEVENT
|
2007-02-27 04:42:29 +05:30
|
|
|
self.data = event
|
2007-02-10 04:46:57 +05:30
|
|
|
else:
|
2007-02-25 10:56:32 +05:30
|
|
|
token = GED2ATTR.get(self.token_text)
|
2007-02-10 04:46:57 +05:30
|
|
|
if token:
|
|
|
|
attr = RelLib.Attribute()
|
|
|
|
attr.set_value(self.data)
|
|
|
|
attr.set_type(token)
|
|
|
|
self.token = TOKEN_ATTR
|
|
|
|
self.data = attr
|
|
|
|
|
|
|
|
def calc_note(self):
|
2007-02-25 10:56:32 +05:30
|
|
|
gid = self.data.strip()
|
|
|
|
if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@':
|
2007-02-10 04:46:57 +05:30
|
|
|
self.token = TOKEN_RNOTE
|
2007-02-25 10:56:32 +05:30
|
|
|
self.data = gid[1:-1]
|
2007-02-10 04:46:57 +05:30
|
|
|
|
|
|
|
def calc_nchi(self):
|
|
|
|
attr = RelLib.Attribute()
|
|
|
|
attr.set_value(self.data)
|
|
|
|
attr.set_type(RelLib.AttributeType.NUM_CHILD)
|
|
|
|
self.data = attr
|
|
|
|
self.token = TOKEN_ATTR
|
|
|
|
|
2007-02-11 11:33:29 +05:30
|
|
|
def calc_attr(self):
|
|
|
|
attr = RelLib.Attribute()
|
|
|
|
attr.set_value(self.data)
|
|
|
|
attr.set_type((RelLib.AttributeType.CUSTOM, self.token_text))
|
|
|
|
self.data = attr
|
|
|
|
self.token = TOKEN_ATTR
|
|
|
|
|
2007-02-08 11:39:46 +05:30
|
|
|
def __repr__(self):
|
|
|
|
return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token,
|
|
|
|
self.token_text, self.data)
|
|
|
|
|
2007-02-09 11:10:49 +05:30
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# MAP_DATA - kept as a separate table, so that it is static, and does not
|
|
|
|
# have to be initialized every time in the GedLine constructor
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
2007-02-08 11:39:46 +05:30
|
|
|
MAP_DATA = {
|
|
|
|
TOKEN_UNKNOWN : GedLine.calc_unknown,
|
|
|
|
TOKEN_DATE : GedLine.calc_date,
|
|
|
|
TOKEN_SEX : GedLine.calc_sex,
|
2007-02-10 04:46:57 +05:30
|
|
|
TOKEN_NOTE : GedLine.calc_note,
|
|
|
|
TOKEN_NCHI : GedLine.calc_nchi,
|
2007-02-11 11:33:29 +05:30
|
|
|
TOKEN__STAT : GedLine.calc_attr,
|
|
|
|
TOKEN__UID : GedLine.calc_attr,
|
|
|
|
TOKEN_AFN : GedLine.calc_attr,
|
2007-02-08 11:39:46 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# extract_date
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
2007-02-09 11:10:49 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
DATE_CNV = GedcomDateParser()
|
2007-02-08 11:39:46 +05:30
|
|
|
|
|
|
|
def extract_date(text):
|
2007-02-09 11:10:49 +05:30
|
|
|
"""
|
|
|
|
Converts the specified text to a RelLib.Date object.
|
|
|
|
"""
|
2007-02-08 11:39:46 +05:30
|
|
|
dateobj = RelLib.Date()
|
|
|
|
try:
|
|
|
|
# extract out the MOD line
|
2007-02-25 10:56:32 +05:30
|
|
|
match = MOD.match(text)
|
2007-02-08 11:39:46 +05:30
|
|
|
if match:
|
|
|
|
(mod, text) = match.groups()
|
2007-02-25 10:56:32 +05:30
|
|
|
qual = QUALITY_MAP.get(mod, RelLib.Date.QUAL_NONE)
|
2007-02-08 11:39:46 +05:30
|
|
|
else:
|
|
|
|
qual = RelLib.Date.QUAL_NONE
|
|
|
|
|
|
|
|
# parse the range if we match, if so, return
|
2007-02-25 10:56:32 +05:30
|
|
|
match = RANGE.match(text)
|
2007-02-08 11:39:46 +05:30
|
|
|
if match:
|
2007-02-25 10:56:32 +05:30
|
|
|
(cal1, data1, cal2, data2) = match.groups()
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
start = DATE_CNV.parse(data1)
|
|
|
|
stop = DATE_CNV.parse(data2)
|
2007-02-08 11:39:46 +05:30
|
|
|
dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_RANGE, cal,
|
|
|
|
start.get_start_date() + stop.get_start_date())
|
|
|
|
dateobj.set_quality(qual)
|
|
|
|
return dateobj
|
|
|
|
|
|
|
|
# parse a span if we match
|
2007-02-25 10:56:32 +05:30
|
|
|
match = SPAN.match(text)
|
2007-02-08 11:39:46 +05:30
|
|
|
if match:
|
2007-02-25 10:56:32 +05:30
|
|
|
(cal1, data1, cal2, data2) = match.groups()
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
start = DATE_CNV.parse(data1)
|
|
|
|
stop = DATE_CNV.parse(data2)
|
2007-02-08 11:39:46 +05:30
|
|
|
dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_SPAN, cal,
|
|
|
|
start.get_start_date() + stop.get_start_date())
|
|
|
|
dateobj.set_quality(qual)
|
|
|
|
return dateobj
|
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
match = CAL.match(text)
|
2007-02-08 11:39:46 +05:30
|
|
|
if match:
|
2007-02-25 10:56:32 +05:30
|
|
|
(abt, cal, data) = match.groups()
|
|
|
|
dateobj = DATE_CNV.parse("%s %s" % (abt, data))
|
|
|
|
dateobj.set_calendar(CALENDAR_MAP.get(cal,
|
|
|
|
RelLib.Date.CAL_GREGORIAN))
|
2007-02-08 11:39:46 +05:30
|
|
|
dateobj.set_quality(qual)
|
|
|
|
return dateobj
|
2007-02-09 11:10:49 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
dateobj = DATE_CNV.parse(text)
|
2007-02-09 11:10:49 +05:30
|
|
|
dateobj.set_quality(qual)
|
|
|
|
return dateobj
|
2007-02-08 11:39:46 +05:30
|
|
|
except IOError:
|
2007-02-25 10:56:32 +05:30
|
|
|
return DATE_CNV.set_text(text)
|
2007-02-08 11:39:46 +05:30
|
|
|
|
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# Reader - serves as the lexical analysis engine
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
class Reader:
|
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
def __init__(self, ifile):
|
|
|
|
self.ifile = ifile
|
2007-02-08 11:39:46 +05:30
|
|
|
self.current_list = []
|
|
|
|
self.eof = False
|
|
|
|
self.cnv = None
|
|
|
|
self.cnt = 0
|
|
|
|
self.index = 0
|
2007-02-09 11:10:49 +05:30
|
|
|
self.func_map = {
|
|
|
|
TOKEN_CONT : self._fix_token_cont,
|
|
|
|
TOKEN_CONC : self._fix_token_conc,
|
|
|
|
}
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-25 10:56:32 +05:30
|
|
|
def set_broken_conc(self, broken):
|
2007-02-09 11:10:49 +05:30
|
|
|
self.func_map = {
|
|
|
|
TOKEN_CONT : self._fix_token_cont,
|
|
|
|
TOKEN_CONC : self._fix_token_broken_conc,
|
|
|
|
}
|
2007-02-08 11:39:46 +05:30
|
|
|
|
|
|
|
def readline(self):
|
|
|
|
if len(self.current_list) <= 1 and not self.eof:
|
|
|
|
self.readahead()
|
|
|
|
try:
|
|
|
|
return GedLine(self.current_list.pop())
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
|
2007-02-09 11:10:49 +05:30
|
|
|
def _fix_token_cont(self, data):
|
2007-02-25 10:56:32 +05:30
|
|
|
line = self.current_list[0]
|
|
|
|
new_value = line[2]+'\n'+data[2]
|
|
|
|
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
|
2007-02-09 11:10:49 +05:30
|
|
|
|
|
|
|
def _fix_token_conc(self, data):
|
2007-02-25 10:56:32 +05:30
|
|
|
line = self.current_list[0]
|
|
|
|
new_value = line[2] + data[2]
|
|
|
|
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
|
2007-02-09 11:10:49 +05:30
|
|
|
|
|
|
|
def _fix_token_broken_conc(self, data):
|
2007-02-25 10:56:32 +05:30
|
|
|
line = self.current_list[0]
|
|
|
|
new_value = u"%s %s" % (line[2], data[2])
|
|
|
|
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
|
2007-02-09 11:10:49 +05:30
|
|
|
|
2007-02-08 11:39:46 +05:30
|
|
|
def readahead(self):
|
|
|
|
while len(self.current_list) < 5:
|
2007-02-25 10:56:32 +05:30
|
|
|
line = self.ifile.readline()
|
2007-02-08 11:39:46 +05:30
|
|
|
self.index += 1
|
|
|
|
if not line:
|
2007-02-09 11:10:49 +05:30
|
|
|
self.eof = True
|
|
|
|
return
|
|
|
|
|
2007-02-08 11:39:46 +05:30
|
|
|
try:
|
2007-02-28 10:50:30 +05:30
|
|
|
line = line.strip('\n\r').split(None, 2) + ['']
|
2007-02-08 11:39:46 +05:30
|
|
|
level = int(line[0])
|
|
|
|
except:
|
2007-02-28 10:50:30 +05:30
|
|
|
continue
|
2007-02-08 11:39:46 +05:30
|
|
|
|
2007-02-28 10:50:30 +05:30
|
|
|
data = (level, tokens.get(line[1], TOKEN_UNKNOWN), line[2], line[1],
|
2007-02-25 10:56:32 +05:30
|
|
|
self.index)
|
2007-02-09 11:10:49 +05:30
|
|
|
|
|
|
|
func = self.func_map.get(data[1])
|
|
|
|
if func:
|
|
|
|
func(data)
|
2007-02-08 11:39:46 +05:30
|
|
|
else:
|
2007-02-09 11:10:49 +05:30
|
|
|
self.current_list.insert(0, data)
|
2007-02-08 11:39:46 +05:30
|
|
|
|