2007-09-07 Don Allingham <don@gramps-project.org>

* src/GrampsDb/_DbBase.py: pylint
	* src/GrampsDbUtils/_GedcomStageOne.py: added
	* src/GrampsDbUtils/_GedcomParse.py: pylint
	* src/GrampsDbUtils/_ReadGedcom.py: pylint
	* src/GrampsDbUtils/_ReadXML.py: pylint
	* src/GrampsDbUtils/_GedcomLex.py: pylint
	* src/GrampsDbUtils/_WriteXML.py: pylint
	* src/GrampsDbUtils/_GedcomUtils.py: pylint
	* src/GrampsDbUtils/_ProxyDbBase.py: pylint
	* src/GrampsDbUtils/Makefile.am: added _GedcomStageOne.py
	* po/POTFILES.in: added _GedcomStageOne.py



svn: r8943
This commit is contained in:
Don Allingham 2007-09-07 21:24:01 +00:00
parent 4f7e87cc99
commit 3ffc1a8af9
12 changed files with 938 additions and 886 deletions

View File

@ -1,3 +1,16 @@
2007-09-07 Don Allingham <don@gramps-project.org>
* src/GrampsDb/_DbBase.py: pylint
* src/GrampsDbUtils/_GedcomStageOne.py: added
* src/GrampsDbUtils/_GedcomParse.py: pylint
* src/GrampsDbUtils/_ReadGedcom.py: pylint
* src/GrampsDbUtils/_ReadXML.py: pylint
* src/GrampsDbUtils/_GedcomLex.py: pylint
* src/GrampsDbUtils/_WriteXML.py: pylint
* src/GrampsDbUtils/_GedcomUtils.py: pylint
* src/GrampsDbUtils/_ProxyDbBase.py: pylint
* src/GrampsDbUtils/Makefile.am: added _GedcomStageOne.py
* po/POTFILES.in: added _GedcomStageOne.py
2007-09-07 Zsolt Foldvari <zfoldvar@users.sourceforge.net>
* src/docgen/GtkPrint.py: Small fixes. Enable graphical reports.

View File

@ -209,6 +209,7 @@ src/GrampsDb/__init__.py
# GrampsDbUtils package
src/GrampsDbUtils/_Backup.py
src/GrampsDbUtils/_GedcomInfo.py
src/GrampsDbUtils/_GedcomStageOne.py
#src/GrampsDbUtils/_GedcomLex.py
src/GrampsDbUtils/_GedcomParse.py
src/GrampsDbUtils/_GedcomTokens.py

View File

@ -40,12 +40,6 @@ class DbBase:
but all data marked private will be hidden from the user.
"""
def __init__(self,db):
"""
Creates a new PrivateProxyDb instance.
"""
raise NotImplementedError
def set_prefixes(self, person, media, family, source, place, event,
repository, note):
raise NotImplementedError

View File

@ -11,6 +11,7 @@ pkgdata_PYTHON = \
_GedcomInfo.py\
_GedcomLex.py\
_GedcomParse.py\
_GedcomStageOne.py\
_GedcomTokens.py\
_GedcomUtils.py\
_GrampsDbWRFactories.py\

View File

@ -38,7 +38,7 @@ import re
#-------------------------------------------------------------------------
from _GedcomInfo import *
from _GedcomTokens import *
import _GedcomTokens as GedcomTokens
import RelLib
from DateHandler._DateParser import DateParser
@ -74,7 +74,7 @@ for __val in personalConstantAttributes.keys():
#-------------------------------------------------------------------------
#
# GedLine
#
#
#-------------------------------------------------------------------------
@ -102,7 +102,7 @@ SEX_MAP = {
#-----------------------------------------------------------------------
#
# GedLine - represents a tokenized version of a GEDCOM line
#
#
#-----------------------------------------------------------------------
class GedcomDateParser(DateParser):
@ -157,7 +157,7 @@ class GedLine:
if self.level == 0:
if self.token_text and self.token_text[0] == '@' \
and self.token_text[-1] == '@':
self.token = TOKEN_ID
self.token = GedcomTokens.TOKEN_ID
self.token_text = self.token_text[1:-1]
self.data = self.data.strip()
else:
@ -188,24 +188,24 @@ class GedLine:
"""
token = GED2GRAMPS.get(self.token_text)
if token:
event = RelLib.Event()
event.set_description(self.data)
event.set_type(token)
self.token = TOKEN_GEVENT
self.data = event
event = RelLib.Event()
event.set_description(self.data)
event.set_type(token)
self.token = GedcomTokens.TOKEN_GEVENT
self.data = event
else:
token = GED2ATTR.get(self.token_text)
if token:
attr = RelLib.Attribute()
attr.set_value(self.data)
attr.set_type(token)
self.token = TOKEN_ATTR
self.token = GedcomTokens.TOKEN_ATTR
self.data = attr
def calc_note(self):
gid = self.data.strip()
if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@':
self.token = TOKEN_RNOTE
self.token = GedcomTokens.TOKEN_RNOTE
self.data = gid[1:-1]
def calc_nchi(self):
@ -213,14 +213,14 @@ class GedLine:
attr.set_value(self.data)
attr.set_type(RelLib.AttributeType.NUM_CHILD)
self.data = attr
self.token = TOKEN_ATTR
self.token = GedcomTokens.TOKEN_ATTR
def calc_attr(self):
attr = RelLib.Attribute()
attr.set_value(self.data)
attr.set_type((RelLib.AttributeType.CUSTOM, self.token_text))
self.data = attr
self.token = TOKEN_ATTR
self.token = GedcomTokens.TOKEN_ATTR
def __repr__(self):
return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token,
@ -233,14 +233,14 @@ class GedLine:
#
#-------------------------------------------------------------------------
MAP_DATA = {
TOKEN_UNKNOWN : GedLine.calc_unknown,
TOKEN_DATE : GedLine.calc_date,
TOKEN_SEX : GedLine.calc_sex,
TOKEN_NOTE : GedLine.calc_note,
TOKEN_NCHI : GedLine.calc_nchi,
TOKEN__STAT : GedLine.calc_attr,
TOKEN__UID : GedLine.calc_attr,
TOKEN_AFN : GedLine.calc_attr,
GedcomTokens.TOKEN_UNKNOWN : GedLine.calc_unknown,
GedcomTokens.TOKEN_DATE : GedLine.calc_date,
GedcomTokens.TOKEN_SEX : GedLine.calc_sex,
GedcomTokens.TOKEN_NOTE : GedLine.calc_note,
GedcomTokens.TOKEN_NCHI : GedLine.calc_nchi,
GedcomTokens.TOKEN__STAT : GedLine.calc_attr,
GedcomTokens.TOKEN__UID : GedLine.calc_attr,
GedcomTokens.TOKEN_AFN : GedLine.calc_attr,
}
#-------------------------------------------------------------------------
@ -329,8 +329,8 @@ class Reader:
self.cnt = 0
self.index = 0
self.func_map = {
TOKEN_CONT : self.__fix_token_cont,
TOKEN_CONC : self.__fix_token_conc,
GedcomTokens.TOKEN_CONT : self.__fix_token_cont,
GedcomTokens.TOKEN_CONC : self.__fix_token_conc,
}
def readline(self):
@ -343,7 +343,7 @@ class Reader:
def __fix_token_cont(self, data):
line = self.current_list[0]
new_value = line[2]+'\n'+data[2]
new_value = line[2] + '\n' + data[2]
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
def __fix_token_conc(self, data):
@ -365,7 +365,7 @@ class Reader:
except:
continue
token = tokens.get(line[1], TOKEN_UNKNOWN)
token = tokens.get(line[1], GedcomTokens.TOKEN_UNKNOWN)
data = (level, token, line[2], line[1], self.index)
func = self.func_map.get(data[1])

View File

@ -93,7 +93,6 @@ import os
import sys
import re
import time
import codecs
from gettext import gettext as _
#------------------------------------------------------------------------
@ -112,27 +111,19 @@ LOG = logging.getLogger(".GedcomImport")
import Errors
import RelLib
from BasicUtils import name_displayer, UpdateCallback
import Utils
import Mime
import LdsUtils
import Utils
from _GedcomInfo import *
from _GedcomTokens import *
from _GedcomLex import Reader
from _GedcomChar import *
import _GedcomInfo as GedcomInfo
import _GedcomUtils as GedcomUtils
import _GedcomLex as GedcomLex
import _GedcomChar as GedcomChar
from GrampsDb._GrampsDbConst import EVENT_KEY
try:
import Config
DEFAULT_SOURCE = Config.get(Config.DEFAULT_SOURCE)
except:
LOG.warn("No Config module available using defaults.")
DEFAULT_SOURCE = False
#-------------------------------------------------------------------------
#
# Address/Place constants
@ -144,9 +135,6 @@ ADDR3_RE = re.compile('(.+)([\n\r]+)(.+)\s*, (.+)')
TRUNC_MSG = _("Your GEDCOM file is corrupted. "
"It appears to have been truncated.")
BAD_UTF16 = _("Your GEDCOM file is corrupted. "
"The file appears to be encoded using the UTF16 "
"character set, but is missing the BOM marker.")
#-------------------------------------------------------------------------
#
@ -216,14 +204,14 @@ MEDIA_MAP = {
#
#-------------------------------------------------------------------------
GED_2_GRAMPS = {}
for _val in personalConstantEvents.keys():
_key = personalConstantEvents[_val]
for _val in GedcomInfo.personalConstantEvents.keys():
_key = GedcomInfo.personalConstantEvents[_val]
if _key != "":
GED_2_GRAMPS[_key] = _val
GED_2_FAMILY = {}
for _val in familyConstantEvents.keys():
_key = familyConstantEvents[_val]
for _val in GedcomInfo.familyConstantEvents.keys():
_key = GedcomInfo.familyConstantEvents[_val]
if _key != "":
GED_2_FAMILY[_key] = _val
@ -239,142 +227,20 @@ PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")
#-------------------------------------------------------------------------
#
# is_xref_value
# find_from_handle
#
#-------------------------------------------------------------------------
def is_xref_value(value):
def find_from_handle(gramps_id, table):
"""
Returns True if value is in the form of a XREF value. We assume that
if we have a leading '@' character, then we are okay.
Finds a handle corresponding the the specified GRAMPS ID. The passed
table contains the mapping. If the value is found, we return it,
otherwise we create a new handle, store it, and return it.
"""
return value and value[0] == '@'
#-------------------------------------------------------------------------
#
# StageOne
#
#-------------------------------------------------------------------------
class StageOne:
"""
The StageOne parser scans the file quickly, looking for a few things. This
includes:
1. Character set encoding
2. Number of people and families in the list
3. Child to family references, since Ancestry.com creates GEDCOM files
without the FAMC references.
"""
def __init__(self, ifile):
self.ifile = ifile
self.famc = {}
self.fams = {}
self.enc = ""
self.pcnt = 0
self.lcnt = 0
def __detect_file_decoder(self, input_file):
"""
Detects the file encoding of the file by looking for a BOM
(byte order marker) in the GEDCOM file. If we detect a UTF-16
encoded file, we must connect to a wrapper using the codecs
package.
"""
line = input_file.read(2)
if line == "\xef\xbb":
input_file.read(1)
self.enc = "UTF8"
return input_file
elif line == "\xff\xfe":
self.enc = "UTF16"
input_file.seek(0)
return codecs.EncodedFile(input_file, 'utf8', 'utf16')
elif line[0] == "\x00" or line[1] == "\x00":
raise Errors.GedcomError(BAD_UTF16)
else:
input_file.seek(0)
return input_file
def parse(self):
"""
Parse the input file.
"""
current = ""
reader = self.__detect_file_decoder(self.ifile)
for line in reader:
line = line.strip()
if not line:
continue
self.lcnt += 1
data = line.split(None, 2) + ['']
try:
(level, key, value) = data[:3]
value = value.strip()
level = int(level)
key = key.strip()
except:
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
continue
if level == 0 and key[0] == '@':
if value == ("FAM", "FAMILY") :
current = key.strip()[1:-1]
elif value == ("INDI", "INDIVIDUAL"):
self.pcnt += 1
elif key in ("HUSB", "HUSBAND", "WIFE") and is_xref_value(value):
value = value[1:-1]
if self.fams.has_key(value):
self.fams[value].append(current)
else:
self.fams[value] = [current]
elif key in ("CHIL", "CHILD") and is_xref_value(value):
value = value[1:-1]
if self.famc.has_key(value):
self.famc[value].append(current)
else:
self.famc[value] = [current]
elif key == 'CHAR' and not self.enc:
assert(type(value) == str or type(value) == unicode)
self.enc = value
def get_famc_map(self):
"""
Returns the Person to Child Family map
"""
return self.famc
def get_fams_map(self):
"""
Returns the Person to Family map (where the person is a spouse)
"""
return self.fams
def get_encoding(self):
"""
Returns the detected encoding
"""
return self.enc.upper()
def set_encoding(self, enc):
"""
Forces the encoding
"""
assert(type(enc) == str or type(enc) == unicode)
self.enc = enc
def get_person_count(self):
"""
Returns the number of INDI records found
"""
return self.pcnt
def get_line_count(self):
"""
Returns the number of lines in the file
"""
return self.lcnt
intid = table.get(gramps_id)
if not intid:
intid = Utils.create_id()
table[gramps_id] = intid
return intid
#-------------------------------------------------------------------------
#
@ -390,11 +256,12 @@ class GedcomParser(UpdateCallback):
SyntaxError = "Syntax Error"
BadFile = "Not a GEDCOM file"
def __init__(self, dbase, ifile, filename, callback, stage_one):
def __init__(self, dbase, ifile, filename, callback, stage_one, default_source):
UpdateCallback.__init__(self, callback)
self.set_total(stage_one.get_line_count())
self.repo2id = {}
self.trans = None
self.maxpeople = stage_one.get_person_count()
self.dbase = dbase
self.emapper = GedcomUtils.IdFinder(dbase.get_gramps_ids(EVENT_KEY),
@ -405,9 +272,9 @@ class GedcomParser(UpdateCallback):
self.place_parser = GedcomUtils.PlaceParser()
self.inline_srcs = {}
self.media_map = {}
self.gedmap = GedcomInfoDB()
self.gedmap = GedcomInfo.GedcomInfoDB()
self.gedsource = self.gedmap.get_from_source_tag('GEDCOM 5.5')
self.use_def_src = DEFAULT_SOURCE
self.use_def_src = default_source
if self.use_def_src:
self.def_src = RelLib.Source()
fname = os.path.basename(filename).split('\\')[-1]
@ -416,6 +283,7 @@ class GedcomParser(UpdateCallback):
self.dir_path = os.path.dirname(filename)
self.is_ftw = False
self.is_ancestry_com = False
self.groups = None
self.pid_map = GedcomUtils.IdMapper(
self.dbase.id_trans,
@ -920,15 +788,15 @@ class GedcomParser(UpdateCallback):
enc = stage_one.get_encoding()
if enc == "ANSEL":
rdr = AnselReader(ifile)
rdr = GedcomChar.AnselReader(ifile)
elif enc in ("UTF-8", "UTF8"):
rdr = UTF8Reader(ifile)
rdr = GedcomChar.UTF8Reader(ifile)
elif enc in ("UTF-16", "UTF16", "UNICODE"):
rdr = UTF16Reader(ifile)
rdr = GedcomChar.UTF16Reader(ifile)
else:
rdr = AnsiReader(ifile)
rdr = GedcomChar.AnsiReader(ifile)
self.lexer = Reader(rdr)
self.lexer = GedcomLex.Reader(rdr)
self.filename = filename
self.backoff = False
@ -936,7 +804,7 @@ class GedcomParser(UpdateCallback):
self.geddir = os.path.dirname(fullpath)
self.error_count = 0
amap = personalConstantAttributes
amap = GedcomInfo.personalConstantAttributes
self.attrs = amap.values()
self.gedattr = {}
@ -951,7 +819,6 @@ class GedcomParser(UpdateCallback):
no_magic = self.maxpeople < 1000
self.trans = self.dbase.transaction_begin("", not use_trans, no_magic)
self.debug = False
self.dbase.disable_signals()
self.__parse_header_head()
self.__parse_header_source()
@ -972,41 +839,29 @@ class GedcomParser(UpdateCallback):
self.dbase.enable_signals()
self.dbase.request_rebuild()
def __find_from_handle(self, gramps_id, table):
"""
Finds a handle corresponding the the specified GRAMPS ID. The passed
table contains the mapping. If the value is found, we return it,
otherwise we create a new handle, store it, and return it.
"""
intid = table.get(gramps_id)
if not intid:
intid = Utils.create_id()
table[gramps_id] = intid
return intid
def __find_person_handle(self, gramps_id):
"""
Returns the database handle associated with the person's GRAMPS ID
"""
return self.__find_from_handle(gramps_id, self.gid2id)
return find_from_handle(gramps_id, self.gid2id)
def __find_family_handle(self, gramps_id):
"""
Returns the database handle associated with the family's GRAMPS ID
"""
return self.__find_from_handle(gramps_id, self.fid2id)
return find_from_handle(gramps_id, self.fid2id)
def __find_object_handle(self, gramps_id):
"""
Returns the database handle associated with the media object's GRAMPS ID
"""
return self.__find_from_handle(gramps_id, self.oid2id)
return find_from_handle(gramps_id, self.oid2id)
def __find_note_handle(self, gramps_id):
"""
Returns the database handle associated with the media object's GRAMPS ID
"""
return self.__find_from_handle(gramps_id, self.nid2id)
return find_from_handle(gramps_id, self.nid2id)
def __find_or_create_person(self, gramps_id):
"""
@ -1019,7 +874,7 @@ class GedcomParser(UpdateCallback):
if self.dbase.has_person_handle(intid):
person.unserialize(self.dbase.get_raw_person_data(intid))
else:
intid = self.__find_from_handle(gramps_id, self.gid2id)
intid = find_from_handle(gramps_id, self.gid2id)
person.set_handle(intid)
person.set_gramps_id(gramps_id)
return person
@ -1035,7 +890,7 @@ class GedcomParser(UpdateCallback):
if self.dbase.has_family_handle(intid):
family.unserialize(self.dbase.get_raw_family_data(intid))
else:
intid = self.__find_from_handle(gramps_id, self.fid2id)
intid = find_from_handle(gramps_id, self.fid2id)
family.set_handle(intid)
family.set_gramps_id(gramps_id)
return family
@ -1051,7 +906,7 @@ class GedcomParser(UpdateCallback):
if self.dbase.has_object_handle(intid):
obj.unserialize(self.dbase.get_raw_object_data(intid))
else:
intid = self.__find_from_handle(gramps_id, self.oid2id)
intid = find_from_handle(gramps_id, self.oid2id)
obj.set_handle(intid)
obj.set_gramps_id(gramps_id)
return obj
@ -1067,7 +922,7 @@ class GedcomParser(UpdateCallback):
if self.dbase.has_source_handle(intid):
obj.unserialize(self.dbase.get_raw_source_data(intid))
else:
intid = self.__find_from_handle(gramps_id, self.sid2id)
intid = find_from_handle(gramps_id, self.sid2id)
obj.set_handle(intid)
obj.set_gramps_id(gramps_id)
return obj
@ -1092,7 +947,7 @@ class GedcomParser(UpdateCallback):
if self.dbase.has_repository_handle(intid):
repository.unserialize(self.dbase.get_raw_repository_data(intid))
else:
intid = self.__find_from_handle(gramps_id, self.rid2id)
intid = find_from_handle(gramps_id, self.rid2id)
repository.set_handle(intid)
repository.set_gramps_id(gramps_id)
if need_commit:
@ -1119,7 +974,7 @@ class GedcomParser(UpdateCallback):
if self.dbase.has_note_handle(intid):
note.unserialize(self.dbase.get_raw_note_data(intid))
else:
intid = self.__find_from_handle(gramps_id, self.nid2id)
intid = find_from_handle(gramps_id, self.nid2id)
note.set_handle(intid)
note.set_gramps_id(gramps_id)
if need_commit:
@ -1225,7 +1080,7 @@ class GedcomParser(UpdateCallback):
self.backoff = False
return self.groups
def __not_recognized(self, level):
def __not_recognized(self, line, level):
"""
Prints a message when an undefined token is found. All subordinate items
to the current item are ignored.
@ -1233,8 +1088,7 @@ class GedcomParser(UpdateCallback):
@param level: Current level in the file
@type level: int
"""
text = self.groups.line
msg = _("Line %d was not understood, so it was ignored.") % text
msg = _("Line %d was not understood, so it was ignored.") % line.line
self.__warn(msg)
self.error_count += 1
self.__skip_subordinate_levels(level)
@ -1260,7 +1114,7 @@ class GedcomParser(UpdateCallback):
try:
line = self.__get_next_line()
if line and line.token != TOKEN_TRLR:
self.__not_recognized(0)
self.__not_recognized(line, 0)
except TypeError:
return
@ -1325,7 +1179,7 @@ class GedcomParser(UpdateCallback):
pass
self.__parse_inline_note(line, 1)
else:
self.__not_recognized(1)
self.__not_recognized(line, 1)
def __parse_level(self, state, __map, default):
"""
@ -1340,8 +1194,6 @@ class GedcomParser(UpdateCallback):
return
else:
func = __map.get(line.token, default)
if self.debug:
print line, func
func(line, state)
def __undefined(self, line, state):
@ -1351,7 +1203,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state
@type state: CurrentState
"""
self.__not_recognized(state.level+1)
self.__not_recognized(line, state.level+1)
#----------------------------------------------------------------------
#
@ -1390,29 +1242,28 @@ class GedcomParser(UpdateCallback):
# find the person
real_id = self.pid_map[line.token_text]
self.person = self.__find_or_create_person(real_id)
person = self.__find_or_create_person(real_id)
# set up the state for the parsing
state = GedcomUtils.CurrentState(person=self.person, level=1)
state = GedcomUtils.CurrentState(person=person, level=1)
# Ancestry.com GEDCOM files are massively broken, not providing
# the FAMC and FAMS values for a person
if self.is_ancestry_com:
self.map_ancestry_com(line.token_text.strip())
self.map_ancestry_com(person, line.token_text.strip())
# do the actual parsing
self.__parse_level(state, self.indi_parse_tbl, self.__person_event)
# Add the default reference if no source has found
self.__add_default_source(self.person)
self.__add_default_source(person)
# commit the person to the database
if self.person.change:
self.dbase.commit_person(self.person, self.trans,
change_time=state.person.change)
if person.change:
self.dbase.commit_person(person, self.trans,
change_time=state.person.change)
else:
self.dbase.commit_person(self.person, self.trans)
del self.person
self.dbase.commit_person(person, self.trans)
def __person_sour(self, line, state):
"""
@ -2208,7 +2059,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state
@type state: CurrentState
"""
value = self.extract_temple(line)
value = self.__extract_temple(line)
if value:
state.lds_ord.set_temple(value)
@ -2296,7 +2147,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state
@type state: CurrentState
"""
status = lds_status.get(line.data, RelLib.LdsOrd.STATUS_NONE)
status = GedcomInfo.lds_status.get(line.data, RelLib.LdsOrd.STATUS_NONE)
state.lds_ord.set_status(status)
def __person_famc(self, line, state):
@ -2788,7 +2639,7 @@ class GedcomParser(UpdateCallback):
@type state: CurrentState
"""
if line.data and line.data[0] == '@':
self.__not_recognized(state.level)
self.__not_recognized(line, state.level)
else:
(form, filename, title, note) = self.__obje(state.level)
self.build_media_object(state.family, form, filename, title, note)
@ -2986,7 +2837,7 @@ class GedcomParser(UpdateCallback):
@type state: CurrentState
"""
if line.data and line.data[0] == '@':
self.__not_recognized(state.level)
self.__not_recognized(line, state.level)
else:
(form, filename, title, note) = self.__obje(state.level)
self.build_media_object(state.event, form, filename, title, note)
@ -3093,7 +2944,7 @@ class GedcomParser(UpdateCallback):
@type state: CurrentState
"""
if line.data and line.data[0] == '@':
self.__not_recognized(state.level)
self.__not_recognized(line, state.level)
else:
(form, filename, title, note) = self.__obje(state.level)
self.build_media_object(state.place, form, filename, title, note)
@ -3323,10 +3174,8 @@ class GedcomParser(UpdateCallback):
@type state: CurrentState
"""
if line.data and line.data[0] == "@":
"""
n _WITN @<XREF:INDI>@
+1 TYPE <TYPE_OF_RELATION>
"""
# n _WITN @<XREF:INDI>@
# +1 TYPE <TYPE_OF_RELATION>
assert( state.event.handle) # event handle is required to be set
wit = self.__find_or_create_person(self.pid_map[line.data])
event_ref = RelLib.EventRef()
@ -3337,17 +3186,16 @@ class GedcomParser(UpdateCallback):
break
elif line.token == TOKEN_TYPE:
if line.data in ("WITNESS_OF_MARRIAGE"):
r = RelLib.EventRoleType(RelLib.EventRoleType.WITNESS)
role = RelLib.EventRoleType(
RelLib.EventRoleType.WITNESS)
else:
r = RelLib.EventRoleType((RelLib.EventRoleType.CUSTOM,
line.data))
event_ref.set_role(r)
role = RelLib.EventRoleType(
(RelLib.EventRoleType.CUSTOM, line.data))
event_ref.set_role(role)
wit.add_event_ref(event_ref)
self.dbase.commit_person(wit, self.trans)
else:
"""
n _WITN <TEXTUAL_LIST_OF_NAMES>
"""
# n _WITN <TEXTUAL_LIST_OF_NAMES>
attr = RelLib.Attribute()
attr.set_type(RelLib.AttributeType.WITNESS)
attr.set_value(line.data)
@ -3598,7 +3446,7 @@ class GedcomParser(UpdateCallback):
@type state: CurrentState
"""
if line.data and line.data[0] == '@':
self.__not_recognized(state.level)
self.__not_recognized(line, state.level)
else:
src = self.dbase.get_source_from_handle(state.handle)
(form, filename, title, note) = self.__obje(state.level)
@ -3696,7 +3544,7 @@ class GedcomParser(UpdateCallback):
@type state: CurrentState
"""
if line.data and line.data[0] == '@':
self.__not_recognized(state.level)
self.__not_recognized(line, state.level)
else:
(form, filename, title, note) = self.__obje(state.level+1)
self.build_media_object(state.source, form, filename, title, note)
@ -3717,7 +3565,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state
@type state: CurrentState
"""
self.__not_recognized(state.level+1)
self.__not_recognized(line, state.level+1)
def __source_repo(self, line, state):
"""
@ -4183,7 +4031,7 @@ class GedcomParser(UpdateCallback):
state.location = RelLib.Location()
self.__parse_note(line, state.event, state.level+1)
def map_ancestry_com(self, original_gid):
def map_ancestry_com(self, person, original_gid):
"""
GEDCOM files created by Ancestry.com for some reason do not include
the FAMC and FAMS mappings in the INDI record. If we don't fix this,
@ -4200,12 +4048,12 @@ class GedcomParser(UpdateCallback):
for fams_id in self.fams_map.get(original_gid, []):
mapped_id = self.fid_map[fams_id]
fams_handle = self.__find_family_handle(mapped_id)
self.person.add_family_handle(fams_handle)
person.add_family_handle(fams_handle)
for famc_id in self.famc_map.get(original_gid, []):
mapped_id = self.fid_map[famc_id]
famc_handle = self.__find_family_handle(mapped_id)
self.person.add_parent_family_handle(famc_handle)
person.add_parent_family_handle(famc_handle)
def __optional_note(self, line, state):
"""
@ -4432,7 +4280,7 @@ class GedcomParser(UpdateCallback):
elif line.token == TOKEN_NOTE:
self.__skip_subordinate_levels(level+1)
else:
self.__not_recognized(level+1)
self.__not_recognized(line, level+1)
# Attempt to convert the values to a valid change time
if tstr:
@ -4542,7 +4390,7 @@ class GedcomParser(UpdateCallback):
event_ref.set_reference_handle(event.handle)
return event_ref
def extract_temple(self, line):
def __extract_temple(self, line):
def get_code(code):
if LdsUtils.Temples.is_valid_code(code):
return code

View File

@ -0,0 +1,206 @@
#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2000-2007 Donald N. Allingham
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id: _ReadGedcom.py 8032 2007-02-03 17:11:05Z hippy $
"""
Import from GEDCOM
"""
__revision__ = "$Revision: $"
__author__ = "Don Allingham"
#-------------------------------------------------------------------------
#
# standard python modules
#
#-------------------------------------------------------------------------
import codecs
from gettext import gettext as _
#-------------------------------------------------------------------------
#
# GRAMPS modules
#
#-------------------------------------------------------------------------
import Errors
#------------------------------------------------------------------------
#
# Set up logging
#
#------------------------------------------------------------------------
import logging
LOG = logging.getLogger(".GedcomImport")
#-------------------------------------------------------------------------
#
# Constants
#
#-------------------------------------------------------------------------
BAD_UTF16 = _("Your GEDCOM file is corrupted. "
"The file appears to be encoded using the UTF16 "
"character set, but is missing the BOM marker.")
#-------------------------------------------------------------------------
#
# is_xref_value
#
#-------------------------------------------------------------------------
def is_xref_value(value):
"""
Returns True if value is in the form of a XREF value. We assume that
if we have a leading '@' character, then we are okay.
"""
return value and value[0] == '@'
#-------------------------------------------------------------------------
#
# add_to_list
#
#-------------------------------------------------------------------------
def add_to_list(table, key, value):
"""
Adds the value to the table entry associated with key. If the entry
does not exist, it is added.
"""
if table.has_key(key):
table[key].append(value)
else:
table[key] = [value]
#-------------------------------------------------------------------------
#
# StageOne
#
#-------------------------------------------------------------------------
class StageOne:
"""
The StageOne parser scans the file quickly, looking for a few things. This
includes:
1. Character set encoding
2. Number of people and families in the list
3. Child to family references, since Ancestry.com creates GEDCOM files
without the FAMC references.
"""
def __init__(self, ifile):
self.ifile = ifile
self.famc = {}
self.fams = {}
self.enc = ""
self.pcnt = 0
self.lcnt = 0
def __detect_file_decoder(self, input_file):
"""
Detects the file encoding of the file by looking for a BOM
(byte order marker) in the GEDCOM file. If we detect a UTF-16
encoded file, we must connect to a wrapper using the codecs
package.
"""
line = input_file.read(2)
if line == "\xef\xbb":
input_file.read(1)
self.enc = "UTF8"
return input_file
elif line == "\xff\xfe":
self.enc = "UTF16"
input_file.seek(0)
return codecs.EncodedFile(input_file, 'utf8', 'utf16')
elif line[0] == "\x00" or line[1] == "\x00":
raise Errors.GedcomError(BAD_UTF16)
else:
input_file.seek(0)
return input_file
def parse(self):
"""
Parse the input file.
"""
current_family_id = ""
reader = self.__detect_file_decoder(self.ifile)
for line in reader:
line = line.strip()
if not line:
continue
self.lcnt += 1
data = line.split(None, 2) + ['']
try:
(level, key, value) = data[:3]
value = value.strip()
level = int(level)
key = key.strip()
except:
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
continue
if level == 0 and key[0] == '@':
if value == ("FAM", "FAMILY") :
current_family_id = key.strip()[1:-1]
elif value == ("INDI", "INDIVIDUAL"):
self.pcnt += 1
elif key in ("HUSB", "HUSBAND", "WIFE") and is_xref_value(value):
add_to_list(self.fams, value[1:-1], current_family_id)
elif key in ("CHIL", "CHILD") and is_xref_value(value):
add_to_list(self.famc, value[1:-1], current_family_id)
elif key == 'CHAR' and not self.enc:
assert(type(value) == str or type(value) == unicode)
self.enc = value
def get_famc_map(self):
"""
Returns the Person to Child Family map
"""
return self.famc
def get_fams_map(self):
"""
Returns the Person to Family map (where the person is a spouse)
"""
return self.fams
def get_encoding(self):
"""
Returns the detected encoding
"""
return self.enc.upper()
def set_encoding(self, enc):
"""
Forces the encoding
"""
assert(type(enc) == str or type(enc) == unicode)
self.enc = enc
def get_person_count(self):
"""
Returns the number of INDI records found
"""
return self.pcnt
def get_line_count(self):
"""
Returns the number of lines in the file
"""
return self.lcnt

View File

@ -18,10 +18,16 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
import re
"""
Support classes to simplify GEDCOM importing
"""
import re
import RelLib
NAME_RE = re.compile(r"/?([^/]*)(/([^/]*)(/([^/]*))?)?")
SURNAME_RE = re.compile(r"/([^/]*)/([^/]*)")
#-------------------------------------------------------------------------
#
# CurrentState
@ -35,7 +41,6 @@ class CurrentState:
"""
Initializes the object
"""
self.note = ""
self.name_cnt = 0
self.person = person
self.level = level
@ -55,15 +60,19 @@ class CurrentState:
"""
self.__dict__[name] = value
def add_to_note(self, text):
self.note += text
def get_text(self):
return self.note
#-------------------------------------------------------------------------
#
# PlaceParser
#
#-------------------------------------------------------------------------
class PlaceParser:
"""
Provides the ability to parse GEDCOM FORM statements for places, and
the parse the line of text, mapping the text components to Location
values based of the FORM statement.
"""
field_map = {
__field_map = {
'addr' : RelLib.Location.set_street,
'subdivision' : RelLib.Location.set_street,
'addr1' : RelLib.Location.set_street,
@ -81,27 +90,44 @@ class PlaceParser:
}
def __init__(self, line=None):
self.pf = []
self.parse_function = []
if line:
self.parse_form(line)
if line:
self.parse_form(line)
def parse_form(self, line):
"""
Parses the GEDCOM PLAC.FORM into a list of function
pointers (if possible). It does this my mapping the text strings
(separated by commas) to the corresponding RelLib.Location
method via the __field_map variable
"""
for item in line.data.split(','):
item = item.lower().strip()
fcn = self.field_map.get(item, lambda x, y: None)
self.pf.append(fcn)
fcn = self.__field_map.get(item, lambda x, y: None)
self.parse_function.append(fcn)
def load_place(self, place, text):
items = [item.strip() for item in text.split(',')]
if len(items) != len(self.pf):
return
loc = place.get_main_location()
index = 0
for item in items:
self.pf[index](loc, item)
index += 1
"""
Takes the text string representing a place, splits it into
its subcomponents (comma separated), and calls the approriate
function based of its position, depending on the parsed value
from the FORM statement.
"""
items = [item.strip() for item in text.split(',')]
if len(items) != len(self.parse_function):
return
loc = place.get_main_location()
index = 0
for item in items:
self.parse_function[index](loc, item)
index += 1
#-------------------------------------------------------------------------
#
# IdFinder
#
#-------------------------------------------------------------------------
class IdFinder:
"""
Provides method of finding the next available ID.
@ -130,6 +156,11 @@ class IdFinder:
self.index += 1
return index
#-------------------------------------------------------------------------
#
# IdMapper
#
#-------------------------------------------------------------------------
class IdMapper:
def __init__(self, trans, find_next, translate):
@ -168,16 +199,15 @@ class IdMapper:
# Support functions
#
#------------------------------------------------------------------------
NAME_RE = re.compile(r"/?([^/]*)(/([^/]*)(/([^/]*))?)?")
SURNAME_RE = re.compile(r"/([^/]*)/([^/]*)")
def parse_name_personal(text):
"""
Parses a GEDCOM NAME value into an Name structure
"""
name = RelLib.Name()
m = SURNAME_RE.match(text)
if m:
names = m.groups()
match = SURNAME_RE.match(text)
if match:
names = match.groups()
name.set_first_name(names[1].strip())
name.set_surname(names[0].strip())
else:
@ -193,7 +223,7 @@ def parse_name_personal(text):
def extract_id(value):
"""
Extracts a value to use for the GRAMPS ID value from the GEDCOM
reference token. The value should be in the form of @XXX@, and the
returned value will be XXX
reference token. The value should be in the form of @XYZ@, and the
returned value will be XYZ
"""
return value.strip()[1:-1]

View File

@ -32,7 +32,6 @@ __revision__ = "$Revision: 8864 $"
# GRAMPS libraries
#
#-------------------------------------------------------------------------
from RelLib import *
from GrampsDb import DbBase
class ProxyDbBase(DbBase):
@ -41,7 +40,7 @@ class ProxyDbBase(DbBase):
but all data marked private will be hidden from the user.
"""
def __init__(self,db):
def __init__(self, db):
"""
Creates a new PrivateProxyDb instance.
"""

View File

@ -22,20 +22,49 @@
"Import from GEDCOM"
#------------------------------------------------------------------------
#
# python modules
#
#------------------------------------------------------------------------
import os
from gettext import gettext as _
import gtk
#------------------------------------------------------------------------
#
# Set up logging
#
#------------------------------------------------------------------------
import logging
LOG = logging.getLogger(".GedcomImport")
#------------------------------------------------------------------------
#
# GRAMPS modules
#
#------------------------------------------------------------------------
import Errors
from _GedcomParse import GedcomParser, StageOne
from _GedcomParse import GedcomParser
from _GedcomStageOne import StageOne
from QuestionDialog import ErrorDialog, DBErrorDialog
try:
import Config
DEFAULT_SOURCE = Config.get(Config.DEFAULT_SOURCE)
except ImportError:
LOG.warn("No Config module available using defaults.")
DEFAULT_SOURCE = False
#-------------------------------------------------------------------------
#
#
# importData
#
#-------------------------------------------------------------------------
def importData(database, filename, callback=None, use_trans=False):
"""
Try to handle ANSEL encoded files that are not really ANSEL encoded
"""
try:
ifile = open(filename, "r")
except IOError:
@ -65,23 +94,30 @@ def importData(database, filename, callback=None, use_trans=False):
dialog.destroy()
else:
code_set = ""
import2(database, filename, callback, code_set, use_trans)
#-------------------------------------------------------------------------
#
# import2
#
#-------------------------------------------------------------------------
def import2(database, filename, callback, code_set, use_trans):
# add some checking here
"""
Do the actual import of data
"""
assert(type(code_set) == str or type(code_set) == unicode)
try:
ifile = open(filename,"rU")
np = StageOne(ifile)
np.parse()
stage_one = StageOne(ifile)
stage_one.parse()
if code_set:
np.set_encoding(code_set)
ifile.seek(0)
gedparse = GedcomParser(database, ifile, filename, callback, np)
stage_one.set_encoding(code_set)
ifile.seek(0)
gedparse = GedcomParser(database, ifile, filename, callback,
stage_one, DEFAULT_SOURCE)
except IOError, msg:
ErrorDialog(_("%s could not be opened\n") % filename, str(msg))
return
@ -96,7 +132,7 @@ def import2(database, filename, callback, code_set, use_trans):
try:
read_only = database.readonly
database.readonly = False
close = gedparse.parse_gedcom_file(use_trans)
gedparse.parse_gedcom_file(use_trans)
database.readonly = read_only
ifile.close()
except IOError, msg:
@ -110,43 +146,3 @@ def import2(database, filename, callback, code_set, use_trans):
ErrorDialog(_('Error reading GEDCOM file'), str(msg))
return
def import_from_string(database, text, callback, code_set, use_trans):
# add some checking here
from cStringIO import StringIO
ifile = StringIO(text)
try:
np = NoteParser(ifile, False, code_set)
ifile.seek(0)
gedparse = GedcomParser(database, ifile, "inline-string", callback,
code_set, np.get_map(), np.get_lines(),
np.get_persons())
except IOError, msg:
ErrorDialog(_("%s could not be opened\n") % "inline-string", str(msg))
return
if database.get_number_of_people() == 0:
use_trans = False
try:
read_only = database.readonly
database.readonly = False
gedparse.parse_gedcom_file(use_trans)
database.readonly = read_only
ifile.close()
except IOError, msg:
msg = _("%s could not be opened\n") % 'inline-string'
ErrorDialog(msg, str(msg))
return
except Errors.DbError, msg:
WarningDialog(_('Database corruption detected'),
_('A problem was detected with the database. Please '
'run the Check and Repair Database tool to fix the '
'problem.'))
return
except Errors.GedcomError, msg:
ErrorDialog(_('Error reading GEDCOM file'), str(msg))
return

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
@ -45,10 +45,13 @@ import ExportOptions
#-------------------------------------------------------------------------
#
#
# export_data
#
#-------------------------------------------------------------------------
def exportData(database, filename, person, option_box, callback=None):
def export_data(database, filename, person, option_box, callback=None):
"""
Calls the XML writer with the syntax expected by the export plugin
"""
return GrampsDb.exportData(database, filename, person, option_box,
callback, const.version)
@ -62,19 +65,18 @@ class XmlWriter(GrampsDb.GrampsDbXmlWriter):
Writes a database to the XML file.
"""
def __init__(self, db, callback, strip_photos, compress=1):
GrampsDb.GrampsDbXmlWriter.__init__(self, db, strip_photos, compress,
const.version, callback)
def __init__(self, dbase, callback, strip_photos, compress=1):
GrampsDb.GrampsDbXmlWriter.__init__(
self, dbase, strip_photos, compress, const.version, callback)
def write(self,filename):
def write(self, filename):
"""
Write the database to the specified file.
"""
try:
ret = GramspDb.GrampsDbXmlWriter.write(self, filename)
ret = GrampsDb.GrampsDbXmlWriter.write(self, filename)
except GrampsDb.GrampsDbWriteFailure, val:
ErrorDialog(val[0],val[1])
ErrorDialog(val[0], val[1])
return ret
#-------------------------------------------------------------------------
@ -82,12 +84,12 @@ class XmlWriter(GrampsDb.GrampsDbXmlWriter):
#
#
#-------------------------------------------------------------------------
_title = _('GRAMPS _XML database')
_description = _('The GRAMPS XML database is a format used by older '
TITLE = _('GRAMPS _XML database')
DESCRIPTION = _('The GRAMPS XML database is a format used by older '
'versions of GRAMPS. It is read-write compatible with '
'the present GRAMPS database format.')
_config = (_('GRAMPS XML export options'), ExportOptions.WriterOptionBox)
_filename = 'gramps'
CONFIG = (_('GRAMPS XML export options'), ExportOptions.WriterOptionBox)
FILENAME = 'gramps'
from PluginUtils import register_export
register_export(exportData,_title,_description,_config,_filename)
register_export(export_data, TITLE, DESCRIPTION, CONFIG, FILENAME)