Move _GedcomChar.py into _GedcomParse.py

svn: r13915
This commit is contained in:
Brian Matherly 2009-12-25 21:06:36 +00:00
parent 8b2da469cf
commit 229c4287d2
3 changed files with 69 additions and 87 deletions

View File

@ -6,7 +6,6 @@
pkgdatadir = $(datadir)/@PACKAGE@/GrampsDbUtils pkgdatadir = $(datadir)/@PACKAGE@/GrampsDbUtils
pkgdata_PYTHON = \ pkgdata_PYTHON = \
_GedcomChar.py\
_GedcomInfo.py\ _GedcomInfo.py\
_GedcomLex.py\ _GedcomLex.py\
_GedcomParse.py\ _GedcomParse.py\

View File

@ -1,79 +0,0 @@
#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2000-2005 Donald N. Allingham
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
from ansel_utf8 import ansel_to_utf8
import codecs
class BaseReader(object):
def __init__(self, ifile, encoding):
self.ifile = ifile
self.enc = encoding
def reset(self):
self.ifile.seek(0)
def readline(self):
return unicode(self.ifile.readline(),
encoding=self.enc,
errors='replace')
class UTF8Reader(BaseReader):
def __init__(self, ifile):
BaseReader.__init__(self, ifile, 'utf8')
self.reset()
def reset(self):
self.ifile.seek(0)
data = self.ifile.read(3)
if data != "\xef\xbb\xbf":
self.ifile.seek(0)
def readline(self):
return unicode(self.ifile.readline(),
encoding=self.enc,
errors='replace')
class UTF16Reader(BaseReader):
def __init__(self, ifile):
new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16')
BaseReader.__init__(self, new_file, 'utf16')
self.reset()
def readline(self):
l = self.ifile.readline()
if l.strip():
return l
else:
return self.ifile.readline()
class AnsiReader(BaseReader):
def __init__(self, ifile):
BaseReader.__init__(self, ifile, 'latin1')
class AnselReader(BaseReader):
def __init__(self, ifile):
BaseReader.__init__(self, ifile, "")
def readline(self):
return ansel_to_utf8(self.ifile.readline())

View File

@ -88,9 +88,9 @@ all lines until the next level 2 token is found (in this case, skipping the
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
import os import os
import sys
import re import re
import time import time
import codecs
from gettext import gettext as _ from gettext import gettext as _
#------------------------------------------------------------------------ #------------------------------------------------------------------------
@ -108,17 +108,17 @@ LOG = logging.getLogger(".GedcomImport")
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
import Errors import Errors
import gen.lib import gen.lib
from BasicUtils import name_displayer, UpdateCallback from BasicUtils import UpdateCallback
import Mime import Mime
import LdsUtils import LdsUtils
import Utils import Utils
from ansel_utf8 import ansel_to_utf8
from _GedcomTokens import * from _GedcomTokens import *
import _GedcomInfo as GedcomInfo import _GedcomInfo as GedcomInfo
import _GedcomUtils as GedcomUtils import _GedcomUtils as GedcomUtils
import _GedcomLex as GedcomLex import _GedcomLex as GedcomLex
import _GedcomChar as GedcomChar
from gen.db.dbconst import EVENT_KEY from gen.db.dbconst import EVENT_KEY
@ -238,6 +238,68 @@ def find_from_handle(gramps_id, table):
table[gramps_id] = intid table[gramps_id] = intid
return intid return intid
#-------------------------------------------------------------------------
#
# File Readers
#
#-------------------------------------------------------------------------
class BaseReader(object):
def __init__(self, ifile, encoding):
self.ifile = ifile
self.enc = encoding
def reset(self):
self.ifile.seek(0)
def readline(self):
return unicode(self.ifile.readline(),
encoding=self.enc,
errors='replace')
class UTF8Reader(BaseReader):
def __init__(self, ifile):
BaseReader.__init__(self, ifile, 'utf8')
self.reset()
def reset(self):
self.ifile.seek(0)
data = self.ifile.read(3)
if data != "\xef\xbb\xbf":
self.ifile.seek(0)
def readline(self):
return unicode(self.ifile.readline(),
encoding=self.enc,
errors='replace')
class UTF16Reader(BaseReader):
def __init__(self, ifile):
new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16')
BaseReader.__init__(self, new_file, 'utf16')
self.reset()
def readline(self):
l = self.ifile.readline()
if l.strip():
return l
else:
return self.ifile.readline()
class AnsiReader(BaseReader):
def __init__(self, ifile):
BaseReader.__init__(self, ifile, 'latin1')
class AnselReader(BaseReader):
def __init__(self, ifile):
BaseReader.__init__(self, ifile, "")
def readline(self):
return ansel_to_utf8(self.ifile.readline())
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
# #
# GedcomParser # GedcomParser
@ -783,13 +845,13 @@ class GedcomParser(UpdateCallback):
enc = stage_one.get_encoding() enc = stage_one.get_encoding()
if enc == "ANSEL": if enc == "ANSEL":
rdr = GedcomChar.AnselReader(ifile) rdr = AnselReader(ifile)
elif enc in ("UTF-8", "UTF8"): elif enc in ("UTF-8", "UTF8"):
rdr = GedcomChar.UTF8Reader(ifile) rdr = UTF8Reader(ifile)
elif enc in ("UTF-16", "UTF16", "UNICODE"): elif enc in ("UTF-16", "UTF16", "UNICODE"):
rdr = GedcomChar.UTF16Reader(ifile) rdr = UTF16Reader(ifile)
else: else:
rdr = GedcomChar.AnsiReader(ifile) rdr = AnsiReader(ifile)
self.lexer = GedcomLex.Reader(rdr) self.lexer = GedcomLex.Reader(rdr)
self.filename = filename self.filename = filename