2007-02-08 06:09:46 +00:00
# Gramps - a GTK+/GNOME based genealogy program
# Copyright (C) 2000-2006 Donald N. Allingham
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# $Id: _ReadGedcom.py 8032 2007-02-03 17:11:05Z hippy $
"Import from GEDCOM"
2007-02-25 05:26:32 +00:00
__revision__ = "$Revision: $"
__author__ = "Don Allingham"
2007-02-08 06:09:46 +00:00
# standard python modules
2007-02-25 05:26:32 +00:00
2007-02-08 06:09:46 +00:00
import re
# GRAMPS modules
from _GedcomInfo import *
2007-02-11 06:03:29 +00:00
from _GedcomTokens import *
2007-02-09 05:40:49 +00:00
import RelLib
from DateHandler._DateParser import DateParser
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
# constants #
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
2007-02-08 06:09:46 +00:00
for _val in personalConstantEvents.keys():
_key = personalConstantEvents[_val]
if _key != "":
2007-02-25 05:26:32 +00:00
GED2GRAMPS[_key] = _val
2007-02-08 06:09:46 +00:00
for _val in familyConstantEvents.keys():
_key = familyConstantEvents[_val]
if _key != "":
2007-02-25 05:26:32 +00:00
GED2GRAMPS[_key] = _val
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
2007-02-09 23:16:57 +00:00
for _val in personalConstantAttributes.keys():
_key = personalConstantAttributes[_val]
if _key != "":
2007-02-25 05:26:32 +00:00
GED2ATTR[_key] = _val
2007-02-09 23:16:57 +00:00
2007-02-08 06:09:46 +00:00
# GedLine
2007-02-25 05:26:32 +00:00
MOD = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
CAL = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
RANGE = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
SPAN = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
2007-02-08 06:09:46 +00:00
'F' : RelLib.Person.FEMALE,
'M' : RelLib.Person.MALE,
2007-02-09 05:40:49 +00:00
# GedLine - represents a tokenized version of a GEDCOM line
class GedcomDateParser(DateParser):
month_to_int = {
'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4,
'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8,
'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12,
# GedLine - represents a tokenized version of a GEDCOM line
2007-02-08 06:09:46 +00:00
class GedLine:
2007-02-09 05:40:49 +00:00
GedLine is a class the represents a GEDCOM line. The form of a GEDCOM line
This gets parsed into
Line Number, Level, Token Value, Token Text, and Data
Data is dependent on the context the Token Value. For most of tokens, this is
just a text string. However, for certain tokens where we know the context, we
can provide some value. The current parsed tokens are:
TOKEN_DATE - RelLib.Date
TOKEN_SEX - RelLib.Person gender item
TOEKN_UKNOWN - Check to see if this is a known event
2007-02-08 06:09:46 +00:00
def __init__(self, data):
2007-02-09 05:40:49 +00:00
If the level is 0, then this is a top level instance. In this case, we may
find items in the form of:
If this is not the top level, we check the MAP_DATA array to see if there is
a conversion function for the data.
2007-02-08 06:09:46 +00:00
self.line = data[4]
self.level = data[0]
self.token = data[1]
2007-02-11 06:03:29 +00:00
self.token_text = data[3].strip()
2007-02-08 06:09:46 +00:00
self.data = data[2]
if self.level == 0:
2007-02-25 05:26:32 +00:00
if self.token_text and self.token_text[0] == '@' \
and self.token_text[-1] == '@':
2007-02-08 06:09:46 +00:00
self.token = TOKEN_ID
self.token_text = self.token_text[1:-1]
2007-02-09 05:40:49 +00:00
self.data = self.data.strip()
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
func = MAP_DATA.get(self.token)
if func:
2007-02-08 06:09:46 +00:00
def calc_sex(self):
2007-02-09 05:40:49 +00:00
Converts the data field to a RelLib token indicating the gender
2007-02-25 05:26:32 +00:00
self.data = SEX_MAP.get(self.data.strip(), RelLib.Person.UNKNOWN)
2007-02-08 06:09:46 +00:00
def calc_date(self):
2007-02-09 05:40:49 +00:00
Converts the data field to a RelLib.Date object
2007-02-08 06:09:46 +00:00
self.data = extract_date(self.data)
def calc_unknown(self):
2007-02-09 05:40:49 +00:00
Checks to see if the token maps a known GEDCOM event. If so, we
change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and
the data is assigned to the associated GRAMPS EventType
2007-02-25 05:26:32 +00:00
token = GED2GRAMPS.get(self.token_text)
2007-02-08 06:09:46 +00:00
if token:
2007-02-26 23:12:29 +00:00
event = RelLib.Event()
2007-02-08 06:09:46 +00:00
self.token = TOKEN_GEVENT
2007-02-26 23:12:29 +00:00
self.data = event
2007-02-09 23:16:57 +00:00
2007-02-25 05:26:32 +00:00
token = GED2ATTR.get(self.token_text)
2007-02-09 23:16:57 +00:00
if token:
attr = RelLib.Attribute()
self.token = TOKEN_ATTR
self.data = attr
def calc_note(self):
2007-02-25 05:26:32 +00:00
gid = self.data.strip()
if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@':
2007-02-09 23:16:57 +00:00
self.token = TOKEN_RNOTE
2007-02-25 05:26:32 +00:00
self.data = gid[1:-1]
2007-02-09 23:16:57 +00:00
def calc_nchi(self):
attr = RelLib.Attribute()
self.data = attr
self.token = TOKEN_ATTR
2007-02-11 06:03:29 +00:00
def calc_attr(self):
attr = RelLib.Attribute()
attr.set_type((RelLib.AttributeType.CUSTOM, self.token_text))
self.data = attr
self.token = TOKEN_ATTR
2007-02-08 06:09:46 +00:00
def __repr__(self):
return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token,
self.token_text, self.data)
2007-02-09 05:40:49 +00:00
# MAP_DATA - kept as a separate table, so that it is static, and does not
# have to be initialized every time in the GedLine constructor
2007-02-08 06:09:46 +00:00
TOKEN_UNKNOWN : GedLine.calc_unknown,
TOKEN_DATE : GedLine.calc_date,
TOKEN_SEX : GedLine.calc_sex,
2007-02-09 23:16:57 +00:00
TOKEN_NOTE : GedLine.calc_note,
TOKEN_NCHI : GedLine.calc_nchi,
2007-02-11 06:03:29 +00:00
TOKEN__STAT : GedLine.calc_attr,
TOKEN__UID : GedLine.calc_attr,
TOKEN_AFN : GedLine.calc_attr,
2007-02-08 06:09:46 +00:00
# extract_date
2007-02-09 05:40:49 +00:00
2007-02-25 05:26:32 +00:00
DATE_CNV = GedcomDateParser()
2007-02-08 06:09:46 +00:00
def extract_date(text):
2007-02-09 05:40:49 +00:00
Converts the specified text to a RelLib.Date object.
2007-02-08 06:09:46 +00:00
dateobj = RelLib.Date()
# extract out the MOD line
2007-02-25 05:26:32 +00:00
match = MOD.match(text)
2007-02-08 06:09:46 +00:00
if match:
(mod, text) = match.groups()
2007-02-25 05:26:32 +00:00
qual = QUALITY_MAP.get(mod, RelLib.Date.QUAL_NONE)
2007-02-08 06:09:46 +00:00
qual = RelLib.Date.QUAL_NONE
# parse the range if we match, if so, return
2007-02-25 05:26:32 +00:00
match = RANGE.match(text)
2007-02-08 06:09:46 +00:00
if match:
2007-02-25 05:26:32 +00:00
(cal1, data1, cal2, data2) = match.groups()
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
start = DATE_CNV.parse(data1)
stop = DATE_CNV.parse(data2)
2007-02-08 06:09:46 +00:00
dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_RANGE, cal,
start.get_start_date() + stop.get_start_date())
return dateobj
# parse a span if we match
2007-02-25 05:26:32 +00:00
match = SPAN.match(text)
2007-02-08 06:09:46 +00:00
if match:
2007-02-25 05:26:32 +00:00
(cal1, data1, cal2, data2) = match.groups()
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
start = DATE_CNV.parse(data1)
stop = DATE_CNV.parse(data2)
2007-02-08 06:09:46 +00:00
dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_SPAN, cal,
start.get_start_date() + stop.get_start_date())
return dateobj
2007-02-25 05:26:32 +00:00
match = CAL.match(text)
2007-02-08 06:09:46 +00:00
if match:
2007-02-25 05:26:32 +00:00
(abt, cal, data) = match.groups()
dateobj = DATE_CNV.parse("%s %s" % (abt, data))
2007-02-08 06:09:46 +00:00
return dateobj
2007-02-09 05:40:49 +00:00
2007-02-25 05:26:32 +00:00
dateobj = DATE_CNV.parse(text)
2007-02-09 05:40:49 +00:00
return dateobj
2007-02-08 06:09:46 +00:00
except IOError:
2007-02-25 05:26:32 +00:00
return DATE_CNV.set_text(text)
2007-02-08 06:09:46 +00:00
# Reader - serves as the lexical analysis engine
class Reader:
2007-02-25 05:26:32 +00:00
def __init__(self, ifile):
self.ifile = ifile
2007-02-08 06:09:46 +00:00
self.current_list = []
self.eof = False
self.cnv = None
self.cnt = 0
self.index = 0
2007-02-09 05:40:49 +00:00
self.func_map = {
TOKEN_CONT : self._fix_token_cont,
TOKEN_CONC : self._fix_token_conc,
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
def set_broken_conc(self, broken):
2007-02-09 05:40:49 +00:00
self.func_map = {
TOKEN_CONT : self._fix_token_cont,
TOKEN_CONC : self._fix_token_broken_conc,
2007-02-08 06:09:46 +00:00
def readline(self):
if len(self.current_list) <= 1 and not self.eof:
return GedLine(self.current_list.pop())
return None
2007-02-09 05:40:49 +00:00
def _fix_token_cont(self, data):
2007-02-25 05:26:32 +00:00
line = self.current_list[0]
new_value = line[2]+'\n'+data[2]
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
2007-02-09 05:40:49 +00:00
def _fix_token_conc(self, data):
2007-02-25 05:26:32 +00:00
line = self.current_list[0]
new_value = line[2] + data[2]
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
2007-02-09 05:40:49 +00:00
def _fix_token_broken_conc(self, data):
2007-02-25 05:26:32 +00:00
line = self.current_list[0]
new_value = u"%s %s" % (line[2], data[2])
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
2007-02-09 05:40:49 +00:00
2007-02-08 06:09:46 +00:00
def readahead(self):
while len(self.current_list) < 5:
2007-02-25 05:26:32 +00:00
line = self.ifile.readline()
2007-02-08 06:09:46 +00:00
self.index += 1
if not line:
2007-02-09 05:40:49 +00:00
self.eof = True
2007-02-25 05:26:32 +00:00
line = line.split(None, 2) + ['']
2007-02-08 06:09:46 +00:00
2007-02-25 05:26:32 +00:00
val = line[2]
2007-02-08 06:09:46 +00:00
level = int(line[0])
level = 0
2007-02-25 05:26:32 +00:00
data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1],
2007-02-09 05:40:49 +00:00
func = self.func_map.get(data[1])
if func:
2007-02-08 06:09:46 +00:00
2007-02-09 05:40:49 +00:00
self.current_list.insert(0, data)
2007-02-08 06:09:46 +00:00