From c7b595210f9b6a643556af55afb2ca9f5bf60a80 Mon Sep 17 00:00:00 2001 From: Brian Matherly Date: Tue, 29 Dec 2009 05:37:20 +0000 Subject: [PATCH] Move GedcomStageOne.py into GedcomParse.py. svn: r13936 --- po/POTFILES.in | 1 - src/GrampsDbUtils/Makefile.am | 1 - src/GrampsDbUtils/_GedcomParse.py | 148 ++++++++++++++++++- src/GrampsDbUtils/_GedcomStageOne.py | 206 --------------------------- src/plugins/import/ImportGedcom.py | 5 +- 5 files changed, 148 insertions(+), 213 deletions(-) delete mode 100644 src/GrampsDbUtils/_GedcomStageOne.py diff --git a/po/POTFILES.in b/po/POTFILES.in index 70f7ef356..f85d9e179 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -342,7 +342,6 @@ src/docgen/SpreadSheetDoc.py src/docgen/TextBufDoc.py # GrampsDbUtils package -src/GrampsDbUtils/_GedcomStageOne.py src/GrampsDbUtils/_GedcomParse.py src/GrampsDbUtils/_GedcomTokens.py src/GrampsDbUtils/__init__.py diff --git a/src/GrampsDbUtils/Makefile.am b/src/GrampsDbUtils/Makefile.am index 3f7d589cb..c42694576 100644 --- a/src/GrampsDbUtils/Makefile.am +++ b/src/GrampsDbUtils/Makefile.am @@ -7,7 +7,6 @@ pkgdatadir = $(datadir)/@PACKAGE@/GrampsDbUtils pkgdata_PYTHON = \ _GedcomParse.py\ - _GedcomStageOne.py\ _GedcomTokens.py\ _GedcomUtils.py\ __init__.py diff --git a/src/GrampsDbUtils/_GedcomParse.py b/src/GrampsDbUtils/_GedcomParse.py index 3b3b63c51..8c47bebbe 100644 --- a/src/GrampsDbUtils/_GedcomParse.py +++ b/src/GrampsDbUtils/_GedcomParse.py @@ -100,7 +100,7 @@ from xml.parsers.expat import ParserCreate # #------------------------------------------------------------------------ import logging -LOG = logging.getLogger(".GedcomImport") +LOG = logging.getLogger(".libgedcom") #------------------------------------------------------------------------- # @@ -5051,4 +5051,148 @@ class GedcomParser(UpdateCallback): """ state.res.set_phone(line.data) -#===eof=== +#------------------------------------------------------------------------- +# +# GedcomStageOne +# +#------------------------------------------------------------------------- +class GedcomStageOne(object): + """ + The GedcomStageOne parser scans the file quickly, looking for a few things. + This includes: + + 1. Character set encoding + 2. Number of people and families in the list + 3. Child to family references, since Ancestry.com creates GEDCOM files + without the FAMC references. + """ + __BAD_UTF16 = _("Your GEDCOM file is corrupted. " + "The file appears to be encoded using the UTF16 " + "character set, but is missing the BOM marker.") + __EMPTY_GED = _("Your GEDCOM file is empty.") + + @staticmethod + def __is_xref_value(value): + """ + Return True if value is in the form of a XREF value. We assume that + if we have a leading '@' character, then we are okay. + """ + return value and value[0] == '@' + + @staticmethod + def __add_to_list(table, key, value): + """ + Add the value to the table entry associated with key. If the entry + does not exist, it is added. + """ + if key in table: + table[key].append(value) + else: + table[key] = [value] + + def __init__(self, ifile): + self.ifile = ifile + self.famc = {} + self.fams = {} + self.enc = "" + self.pcnt = 0 + self.lcnt = 0 + + def __detect_file_decoder(self, input_file): + """ + Detects the file encoding of the file by looking for a BOM + (byte order marker) in the GEDCOM file. If we detect a UTF-16 + encoded file, we must connect to a wrapper using the codecs + package. + """ + line = input_file.read(2) + if line == "\xef\xbb": + input_file.read(1) + self.enc = "UTF8" + return input_file + elif line == "\xff\xfe": + self.enc = "UTF16" + input_file.seek(0) + return codecs.EncodedFile(input_file, 'utf8', 'utf16') + elif not line : + raise Errors.GedcomError(self.__EMPTY_GED) + elif line[0] == "\x00" or line[1] == "\x00": + raise Errors.GedcomError(self.__BAD_UTF16) + else: + input_file.seek(0) + return input_file + + def parse(self): + """ + Parse the input file. + """ + current_family_id = "" + + reader = self.__detect_file_decoder(self.ifile) + + for line in reader: + line = line.strip() + if not line: + continue + self.lcnt += 1 + + data = line.split(None, 2) + [''] + try: + (level, key, value) = data[:3] + value = value.strip() + level = int(level) + key = key.strip() + except: + LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt) + continue + + if level == 0 and key[0] == '@': + if value == ("FAM", "FAMILY") : + current_family_id = key.strip()[1:-1] + elif value == ("INDI", "INDIVIDUAL"): + self.pcnt += 1 + elif key in ("HUSB", "HUSBAND", "WIFE") and \ + self.__is_xref_value(value): + self.__add_to_list(self.fams, value[1:-1], current_family_id) + elif key in ("CHIL", "CHILD") and self.__is_xref_value(value): + self.__add_to_list(self.famc, value[1:-1], current_family_id) + elif key == 'CHAR' and not self.enc: + assert(isinstance(value, basestring)) + self.enc = value + + def get_famc_map(self): + """ + Return the Person to Child Family map + """ + return self.famc + + def get_fams_map(self): + """ + Return the Person to Family map (where the person is a spouse) + """ + return self.fams + + def get_encoding(self): + """ + Return the detected encoding + """ + return self.enc.upper() + + def set_encoding(self, enc): + """ + Forces the encoding + """ + assert(isinstance(enc, basestring)) + self.enc = enc + + def get_person_count(self): + """ + Return the number of INDI records found + """ + return self.pcnt + + def get_line_count(self): + """ + Return the number of lines in the file + """ + return self.lcnt diff --git a/src/GrampsDbUtils/_GedcomStageOne.py b/src/GrampsDbUtils/_GedcomStageOne.py deleted file mode 100644 index e0160cc92..000000000 --- a/src/GrampsDbUtils/_GedcomStageOne.py +++ /dev/null @@ -1,206 +0,0 @@ -# -# Gramps - a GTK+/GNOME based genealogy program -# -# Copyright (C) 2000-2007 Donald N. Allingham -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -# $Id$ - -""" -Import from GEDCOM -""" - -#------------------------------------------------------------------------- -# -# standard python modules -# -#------------------------------------------------------------------------- -import codecs -from gettext import gettext as _ - -#------------------------------------------------------------------------- -# -# GRAMPS modules -# -#------------------------------------------------------------------------- -import Errors - -#------------------------------------------------------------------------ -# -# Set up logging -# -#------------------------------------------------------------------------ -import logging -LOG = logging.getLogger(".GedcomImport") - -#------------------------------------------------------------------------- -# -# Constants -# -#------------------------------------------------------------------------- -BAD_UTF16 = _("Your GEDCOM file is corrupted. " - "The file appears to be encoded using the UTF16 " - "character set, but is missing the BOM marker.") -EMPTY_GED = _("Your GEDCOM file is empty.") - -#------------------------------------------------------------------------- -# -# is_xref_value -# -#------------------------------------------------------------------------- -def is_xref_value(value): - """ - Return True if value is in the form of a XREF value. We assume that - if we have a leading '@' character, then we are okay. - """ - return value and value[0] == '@' - -#------------------------------------------------------------------------- -# -# add_to_list -# -#------------------------------------------------------------------------- -def add_to_list(table, key, value): - """ - Add the value to the table entry associated with key. If the entry - does not exist, it is added. - """ - if key in table: - table[key].append(value) - else: - table[key] = [value] - -#------------------------------------------------------------------------- -# -# StageOne -# -#------------------------------------------------------------------------- -class StageOne(object): - """ - The StageOne parser scans the file quickly, looking for a few things. This - includes: - - 1. Character set encoding - 2. Number of people and families in the list - 3. Child to family references, since Ancestry.com creates GEDCOM files - without the FAMC references. - """ - def __init__(self, ifile): - self.ifile = ifile - self.famc = {} - self.fams = {} - self.enc = "" - self.pcnt = 0 - self.lcnt = 0 - - def __detect_file_decoder(self, input_file): - """ - Detects the file encoding of the file by looking for a BOM - (byte order marker) in the GEDCOM file. If we detect a UTF-16 - encoded file, we must connect to a wrapper using the codecs - package. - """ - line = input_file.read(2) - if line == "\xef\xbb": - input_file.read(1) - self.enc = "UTF8" - return input_file - elif line == "\xff\xfe": - self.enc = "UTF16" - input_file.seek(0) - return codecs.EncodedFile(input_file, 'utf8', 'utf16') - elif not line : - raise Errors.GedcomError(EMPTY_GED) - elif line[0] == "\x00" or line[1] == "\x00": - raise Errors.GedcomError(BAD_UTF16) - else: - input_file.seek(0) - return input_file - - def parse(self): - """ - Parse the input file. - """ - current_family_id = "" - - reader = self.__detect_file_decoder(self.ifile) - - for line in reader: - line = line.strip() - if not line: - continue - self.lcnt += 1 - - data = line.split(None, 2) + [''] - try: - (level, key, value) = data[:3] - value = value.strip() - level = int(level) - key = key.strip() - except: - LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt) - continue - - if level == 0 and key[0] == '@': - if value == ("FAM", "FAMILY") : - current_family_id = key.strip()[1:-1] - elif value == ("INDI", "INDIVIDUAL"): - self.pcnt += 1 - elif key in ("HUSB", "HUSBAND", "WIFE") and is_xref_value(value): - add_to_list(self.fams, value[1:-1], current_family_id) - elif key in ("CHIL", "CHILD") and is_xref_value(value): - add_to_list(self.famc, value[1:-1], current_family_id) - elif key == 'CHAR' and not self.enc: - assert(isinstance(value, basestring)) - self.enc = value - - def get_famc_map(self): - """ - Return the Person to Child Family map - """ - return self.famc - - def get_fams_map(self): - """ - Return the Person to Family map (where the person is a spouse) - """ - return self.fams - - def get_encoding(self): - """ - Return the detected encoding - """ - return self.enc.upper() - - def set_encoding(self, enc): - """ - Forces the encoding - """ - assert(isinstance(enc, basestring)) - self.enc = enc - - def get_person_count(self): - """ - Return the number of INDI records found - """ - return self.pcnt - - def get_line_count(self): - """ - Return the number of lines in the file - """ - return self.lcnt diff --git a/src/plugins/import/ImportGedcom.py b/src/plugins/import/ImportGedcom.py index f3f46f602..5a47b3019 100644 --- a/src/plugins/import/ImportGedcom.py +++ b/src/plugins/import/ImportGedcom.py @@ -43,8 +43,7 @@ LOG = logging.getLogger(".GedcomImport") # #------------------------------------------------------------------------ import Errors -from GrampsDbUtils._GedcomParse import GedcomParser -from GrampsDbUtils._GedcomStageOne import StageOne +from GrampsDbUtils._GedcomParse import GedcomParser, GedcomStageOne from QuestionDialog import ErrorDialog, DBErrorDialog from glade import Glade from libmixin import DbMixin @@ -103,7 +102,7 @@ def importData(database, filename, callback=None): try: ifile = open(filename, "rU") - stage_one = StageOne(ifile) + stage_one = GedcomStageOne(ifile) stage_one.parse() if code_set: