From 706916af15b3fdc7d83f595848389663e8e2cb3c Mon Sep 17 00:00:00 2001
From: Don Allingham <don@gramps-project.org>
Date: Sun, 25 Feb 2007 05:26:32 +0000
Subject: [PATCH] 2007-02-24  Don Allingham  <don@gramps-project.org> 	*
 src/DisplayTabs/_NoteModel.py: added 	* src/DisplayTabs/_NoteTab.py: support
 new list 	* src/GrampsDbUtils/_GedcomParse.py: enhancements to parsing 
 * src/GrampsDbUtils/_ReadGedcom.py: handle encoding properly 	*
 src/GrampsDbUtils/_GedcomChar.py: new encoding interface 	*
 src/GrampsDbUtils/_GedcomLex.py: cleanup

svn: r8231
---
 ChangeLog                         |   8 ++
 src/DisplayTabs/_NoteModel.py     |  46 +++++++
 src/DisplayTabs/_NoteTab.py       | 173 +++++--------------------
 src/GrampsDbUtils/_GedcomChar.py  |  76 +++++++++++
 src/GrampsDbUtils/_GedcomLex.py   | 201 ++++++++++--------------------
 src/GrampsDbUtils/_GedcomParse.py | 116 ++++++-----------
 src/GrampsDbUtils/_ReadGedcom.py  |   7 +-
 7 files changed, 268 insertions(+), 359 deletions(-)
 create mode 100644 src/DisplayTabs/_NoteModel.py
 create mode 100644 src/GrampsDbUtils/_GedcomChar.py

diff --git a/ChangeLog b/ChangeLog
index 145bc9a6c..6d7561a1b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2007-02-24  Don Allingham  <don@gramps-project.org>
+	* src/DisplayTabs/_NoteModel.py: added
+	* src/DisplayTabs/_NoteTab.py: support new list
+	* src/GrampsDbUtils/_GedcomParse.py: enhancements to parsing
+	* src/GrampsDbUtils/_ReadGedcom.py: handle encoding properly
+	* src/GrampsDbUtils/_GedcomChar.py: new encoding interface
+	* src/GrampsDbUtils/_GedcomLex.py: cleanup
+
 2007-02-24  Brian Matherly  <brian@gramps-project.org>
 	* src/docgen/SvgDrawDoc.py.py: Fix XML error in draw_text.
 
diff --git a/src/DisplayTabs/_NoteModel.py b/src/DisplayTabs/_NoteModel.py
new file mode 100644
index 000000000..f4a128c5e
--- /dev/null
+++ b/src/DisplayTabs/_NoteModel.py
@@ -0,0 +1,46 @@
+#
+# Gramps - a GTK+/GNOME based genealogy program
+#
+# Copyright (C) 2000-2006  Donald N. Allingham
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+# $Id: _NoteModel.py 7068 2006-07-24 23:06:49Z rshura $
+
+#-------------------------------------------------------------------------
+#
+# GTK libraries
+#
+#-------------------------------------------------------------------------
+import gtk
+
+#-------------------------------------------------------------------------
+#
+# NoteModel
+#
+#-------------------------------------------------------------------------
+class NoteModel(gtk.ListStore):
+
+    def __init__(self, note_list, db):
+        gtk.ListStore.__init__(self, str, str, object)
+        self.db = db
+        for handle in note_list:
+            note = self.db.get_note_from_handle(handle)
+            self.append(row=[
+                str(note.get_type()), 
+                note.get().replace('\n', ' ')[:80], 
+                handle, 
+                ])
diff --git a/src/DisplayTabs/_NoteTab.py b/src/DisplayTabs/_NoteTab.py
index e5824a4bc..37422e87c 100644
--- a/src/DisplayTabs/_NoteTab.py
+++ b/src/DisplayTabs/_NoteTab.py
@@ -27,169 +27,60 @@
 #-------------------------------------------------------------------------
 from gettext import gettext as _
 
-#-------------------------------------------------------------------------
-#
-# GTK libraries
-#
-#-------------------------------------------------------------------------
-import gtk
-import pango
-
 #-------------------------------------------------------------------------
 #
 # GRAMPS classes
 #
 #-------------------------------------------------------------------------
 import Spell
-from _GrampsTab import GrampsTab
 from DisplayTabs import log
-from MarkupText import EditorBuffer
+from _NoteModel import NoteModel
+from _EmbeddedList import EmbeddedList
 
 #-------------------------------------------------------------------------
 #
 # NoteTab
 #
 #-------------------------------------------------------------------------
-class NoteTab(GrampsTab):
+class NoteTab(EmbeddedList):
 
-    def __init__(self, dbstate, uistate, track, note_list, title=_('Note')):
-        self.note_list = note_list        
-        self.original = note_list[:]
+    _HANDLE_COL = 2
 
-        GrampsTab.__init__(self, dbstate, uistate, track, title)
-        self.show_all()
+    _column_names = [
+        (_('Type'), 0, 100), 
+        (_('Preview'), 1, 200), 
+        ]
 
-    def get_icon_name(self):
-        return 'gramps-notes'
+    def __init__(self, dbstate, uistate, track, data):
+        self.data = data
+        EmbeddedList.__init__(self, dbstate, uistate, track, 
+                              _("Notes"), NoteModel)
 
-    def _update_label(self, *obj):
-        cc = self.buf.get_char_count()
-        if cc == 0 and not self.empty:
-            self.empty = True
-            self._set_label()
-        elif cc != 0 and self.empty:
-            self.empty = False
-            self._set_label()
+    def get_editor(self):
+        pass
 
-    def is_empty(self):
-        """
-        Indicates if the tab contains any data. This is used to determine
-        how the label should be displayed.
-        """
-        return self.buf.get_char_count() == 0
+    def get_user_values(self):
+        return []
 
-    def build_interface(self):
-        BUTTON = [(_('Italic'),gtk.STOCK_ITALIC,'<i>i</i>','<Control>I'),
-                  (_('Bold'),gtk.STOCK_BOLD,'<b>b</b>','<Control>B'),
-                  (_('Underline'),gtk.STOCK_UNDERLINE,'<u>u</u>','<Control>U'),
-                  #('Separator', None, None, None),
-              ]
+    def get_data(self):
+        return self.data
 
-        vbox = gtk.VBox()
+    def column_order(self):
+        return ((1, 0), (1, 1))
 
-        self.text = gtk.TextView()
-        self.text.set_accepts_tab(True)
-        # Accelerator dictionary used for formatting shortcuts
-        #  key: tuple(key, modifier)
-        #  value: widget, to emit 'activate' signal on
-        self.accelerator = {}
-        self.text.connect('key-press-event', self._on_key_press_event)
+    def add_button_clicked(self, obj):
+        pass
 
-        self.flowed = gtk.RadioButton(None, _('Flowed'))
-        self.format = gtk.RadioButton(self.flowed, _('Formatted'))
-
-#        if self.note_obj and self.note_obj.get_format():
-#            self.format.set_active(True)
-#            self.text.set_wrap_mode(gtk.WRAP_NONE)
-#        else:
-#            self.flowed.set_active(True)
-#            self.text.set_wrap_mode(gtk.WRAP_WORD)
-        self.spellcheck = Spell.Spell(self.text)
-
-        self.flowed.connect('toggled', self.flow_changed)
-
-        scroll = gtk.ScrolledWindow()
-        scroll.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
-        scroll.add(self.text)
-        # FIXME: is this signal called at all
-        scroll.connect('focus-out-event', self.update)
-
-        vbox.pack_start(scroll, True)
-        vbox.set_spacing(6)
-        vbox.set_border_width(6)
-
-        hbox = gtk.HBox()
-        hbox.set_spacing(12)
-        hbox.set_border_width(6)
-        hbox.pack_start(self.flowed, False)
-        hbox.pack_start(self.format, False)
-        vbox.pack_start(hbox, False)
-        self.pack_start(vbox, True)
-
-        self.buf = EditorBuffer()
-        self.text.set_buffer(self.buf)
-        tooltips = gtk.Tooltips()
-        for tip, stock, markup, accel in BUTTON:
-            if markup:
-                button = gtk.ToggleButton()
-                image = gtk.Image()
-                image.set_from_stock(stock, gtk.ICON_SIZE_MENU)
-                button.set_image(image)
-                button.set_relief(gtk.RELIEF_NONE)
-                tooltips.set_tip(button, tip)
-                self.buf.setup_widget_from_xml(button, markup)
-                key, mod = gtk.accelerator_parse(accel)
-                self.accelerator[(key, mod)] = button
-                hbox.pack_start(button, False)
-            else:
-                hbox.pack_start(gtk.VSeparator(), False)
-	hbox.pack_start(gtk.Label(_('Additional Notes:')),False)
-	self.menu = gtk.ComboBox()
-	hbox.pack_start(self.menu, True)
-
-#        if self.note_obj:
-#            self.empty = False
-#            self.buf.set_text(self.note_obj.get(markup=True))
-#            log.debug("Text: %s" % self.buf.get_text())
-#        else:
-#            self.empty = True
-            
-        self.buf.connect('changed', self.update)
-        self.buf.connect_after('apply-tag', self.update)
-        self.buf.connect_after('remove-tag', self.update)
+    def add_callback(self, name):
+        self.get_data().append(name)
+        self.changed = True
         self.rebuild()
 
-    def _on_key_press_event(self, widget, event):
-        log.debug("Key %s (%d) was pressed on %s" %
-                  (gtk.gdk.keyval_name(event.keyval), event.keyval, widget))
-        key = event.keyval
-        mod = event.state
-        if self.accelerator.has_key((key, mod)):
-            self.accelerator[(key, mod)].emit('activate')
-            return True
+    def edit_button_clicked(self, obj):
+        note = self.get_selected()
+        if note:
+            print note
 
-    def update(self, obj, *args):
-#        if self.note_obj:
-#            start = self.buf.get_start_iter()
-#            stop = self.buf.get_end_iter()
-#            text = self.buf.get_text(start, stop)
-#            self.note_obj.set(text)
-#        else:
-#            print "NOTE OBJ DOES NOT EXIST"
-        self._update_label(obj)
-        return False
-
-    def flow_changed(self, obj):
-        if obj.get_active():
-            self.text.set_wrap_mode(gtk.WRAP_WORD)
-#            self.note_obj.set_format(0)
-        else:
-            self.text.set_wrap_mode(gtk.WRAP_NONE)
-#            self.note_obj.set_format(1)
-
-    def rebuild(self):
-        self._set_label()
-
-    def cancel(self):
-        pass
-#        self.note_obj.unserialize(self.original)
+    def edit_callback(self, name):
+        self.changed = True
+        self.rebuild()
diff --git a/src/GrampsDbUtils/_GedcomChar.py b/src/GrampsDbUtils/_GedcomChar.py
new file mode 100644
index 000000000..1e68410da
--- /dev/null
+++ b/src/GrampsDbUtils/_GedcomChar.py
@@ -0,0 +1,76 @@
+#
+# Gramps - a GTK+/GNOME based genealogy program
+#
+# Copyright (C) 2000-2005  Donald N. Allingham
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+from ansel_utf8 import ansel_to_utf8
+
+class BaseReader:
+    def __init__(self, ifile, encoding):
+        self.ifile = ifile
+        self.enc = encoding
+
+    def reset(self):
+        self.ifile.seek(0)
+
+    def readline(self):
+        return unicode(self.ifile.readline(), 
+                       encoding=self.enc,
+                       errors='replace').strip('\n\r')
+
+class UTF8Reader(BaseReader):
+
+    def __init__(self, ifile):
+        BaseReader.__init__(self, ifile, 'utf8')
+
+    def reset(self):
+        self.ifile.seek(0)
+        data = self.ifile.read(3)
+        if data != "\xef\xbb\xbf":
+            self.ifile.seek(0)
+
+    def readline(self):
+        return unicode(self.ifile.readline(),
+                       encoding=self.enc,
+                       errors='replace').strip('\n\r')
+
+class UTF16Reader(BaseReader):
+
+    def __init__(self, ifile):
+        BaseReader.__init__(self, ifile, 'utf16')
+
+    def reset(self):
+        self.ifile.seek(0)
+        data = self.ifile.read(2)
+        if data != "\xff\xfe":
+            self.ifile.seek(0)
+
+class AnsiReader(BaseReader):
+
+    def __init__(self, ifile):
+        BaseReader.__init__(self, ifile, 'latin1')
+    
+class AnselReader(BaseReader):
+
+    def __init__(self, ifile):
+        BaseReader.__init__(self, ifile, "")
+
+    def readline(self):
+        return ansel_to_utf8(self.ifile.readline().strip('\n\r'))
+
+        
diff --git a/src/GrampsDbUtils/_GedcomLex.py b/src/GrampsDbUtils/_GedcomLex.py
index 6c8ff988a..dc7a75d51 100644
--- a/src/GrampsDbUtils/_GedcomLex.py
+++ b/src/GrampsDbUtils/_GedcomLex.py
@@ -22,21 +22,22 @@
 
 "Import from GEDCOM"
 
+__revision__ = "$Revision: $"
+__author__ = "Don Allingham"
+
 #-------------------------------------------------------------------------
 #
 # standard python modules
 #
 #-------------------------------------------------------------------------
+
 import re
-import string
-from gettext import gettext as _
 
 #-------------------------------------------------------------------------
 #
 # GRAMPS modules
 #
 #-------------------------------------------------------------------------
-from ansel_utf8 import ansel_to_utf8
 
 from _GedcomInfo import *
 from _GedcomTokens import *
@@ -45,60 +46,25 @@ from DateHandler._DateParser import DateParser
 
 #-------------------------------------------------------------------------
 #
-# latin/utf8 conversions
-#
+# constants #
 #-------------------------------------------------------------------------
 
-def utf8_to_latin(msg):
-    """
-    Converts a string from unicode to iso-8859-1. If any illegal characters 
-    are found, they are converted to ?
-
-    @param msg: unicode string to convert
-    @type level: unicode
-    @return: Returns the string, converted to a ISO-8859-1 object
-    @rtype: str
-    """
-    return msg.encode('iso-8859-1', 'replace')
-
-def latin_to_utf8(s):
-    if type(s) == unicode:
-        return s
-    else:
-        return unicode(s,'iso-8859-1')
-
-def nocnv(s):
-    return unicode(s,errors='replace')
-
-#-------------------------------------------------------------------------
-#
-# constants
-#
-#-------------------------------------------------------------------------
-ANSEL = 1
-UNICODE = 2
-UPDATE = 25
-
-_transtable = string.maketrans('','')
-_delc = _transtable[0:8] + _transtable[10:31]
-_transtable2 = _transtable[0:128] + ('?' * 128)
-
-ged2gramps = {}
+GED2GRAMPS = {}
 for _val in personalConstantEvents.keys():
     _key = personalConstantEvents[_val]
     if _key != "":
-        ged2gramps[_key] = _val
+        GED2GRAMPS[_key] = _val
 
 for _val in familyConstantEvents.keys():
     _key = familyConstantEvents[_val]
     if _key != "":
-        ged2gramps[_key] = _val
+        GED2GRAMPS[_key] = _val
 
-ged2attr = {}
+GED2ATTR = {}
 for _val in personalConstantAttributes.keys():
     _key = personalConstantAttributes[_val]
     if _key != "":
-        ged2attr[_key] = _val
+        GED2ATTR[_key] = _val
     
 #-------------------------------------------------------------------------
 #
@@ -106,26 +72,24 @@ for _val in personalConstantAttributes.keys():
 #
 #-------------------------------------------------------------------------
 
-intRE       = re.compile(r"\s*(\d+)\s*$")
-modRegexp   = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
-calRegexp   = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
-rangeRegexp = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
-spanRegexp  = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")
-intRegexp   = re.compile(r"\s*INT\s+([^(]+)\((.*)\)$")
+MOD   = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
+CAL   = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
+RANGE = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
+SPAN  = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")
 
-_calendar_map = {
+CALENDAR_MAP = {
     "FRENCH R" : RelLib.Date.CAL_FRENCH,
     "JULIAN"   : RelLib.Date.CAL_JULIAN,
     "HEBREW"   : RelLib.Date.CAL_HEBREW,
 }
 
-_quality_map = {
+QUALITY_MAP = {
     'CAL' : RelLib.Date.QUAL_CALCULATED,
     'INT' : RelLib.Date.QUAL_CALCULATED,
     'EST' : RelLib.Date.QUAL_ESTIMATED,
 }
 
-_sex_map = {
+SEX_MAP = {
     'F' : RelLib.Person.FEMALE,
     'M' : RelLib.Person.MALE,
 }
@@ -185,20 +149,21 @@ class GedLine:
         self.data = data[2]
 
         if self.level == 0:
-            if self.token_text and self.token_text[0] == '@' and self.token_text[-1] == '@':
+            if self.token_text and self.token_text[0] == '@' \
+                    and self.token_text[-1] == '@':
                 self.token = TOKEN_ID
                 self.token_text = self.token_text[1:-1]
                 self.data = self.data.strip()
         else:
-            f = MAP_DATA.get(self.token)
-            if f:
-                f(self)
+            func = MAP_DATA.get(self.token)
+            if func:
+                func(self)
 
     def calc_sex(self):
         """
         Converts the data field to a RelLib token indicating the gender
         """
-        self.data = _sex_map.get(self.data.strip(),RelLib.Person.UNKNOWN)
+        self.data = SEX_MAP.get(self.data.strip(), RelLib.Person.UNKNOWN)
 
     def calc_date(self):
         """
@@ -212,12 +177,12 @@ class GedLine:
         change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and
         the data is assigned to the associated GRAMPS EventType
         """
-        token = ged2gramps.get(self.token_text)
+        token = GED2GRAMPS.get(self.token_text)
         if token:
             self.token = TOKEN_GEVENT
             self.data = token
         else:
-            token = ged2attr.get(self.token_text)
+            token = GED2ATTR.get(self.token_text)
             if token:
                 attr = RelLib.Attribute()
                 attr.set_value(self.data)
@@ -226,10 +191,10 @@ class GedLine:
                 self.data = attr
 
     def calc_note(self):
-        d = self.data.strip()
-        if len(d) > 2 and d[0] == '@' and d[-1] == '@':
+        gid = self.data.strip()
+        if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@':
             self.token = TOKEN_RNOTE
-            self.data = d[1:-1]
+            self.data = gid[1:-1]
 
     def calc_nchi(self):
         attr = RelLib.Attribute()
@@ -245,10 +210,6 @@ class GedLine:
         self.data = attr
         self.token = TOKEN_ATTR
 
-    def calc_lds(self):
-        self.data = _
-        self.token = TOKEN_ATTR
-
     def __repr__(self):
         return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token, 
                                       self.token_text, self.data)
@@ -276,7 +237,7 @@ MAP_DATA = {
 #
 #-------------------------------------------------------------------------
 
-_dp = GedcomDateParser()
+DATE_CNV = GedcomDateParser()
 
 def extract_date(text):
     """
@@ -285,54 +246,55 @@ def extract_date(text):
     dateobj = RelLib.Date()
     try:
         # extract out the MOD line
-        match = modRegexp.match(text)
+        match = MOD.match(text)
         if match:
             (mod, text) = match.groups()
-            qual = _quality_map.get(mod, RelLib.Date.QUAL_NONE)
+            qual = QUALITY_MAP.get(mod, RelLib.Date.QUAL_NONE)
         else:
             qual = RelLib.Date.QUAL_NONE
 
         # parse the range if we match, if so, return
-        match = rangeRegexp.match(text)
+        match = RANGE.match(text)
         if match:
-            (cal1,data1,cal2,data2) = match.groups()
+            (cal1, data1, cal2, data2) = match.groups()
 
-            cal = _calendar_map.get(cal1, RelLib.Date.CAL_GREGORIAN)
+            cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
                     
-            start = _dp.parse(data1)
-            stop =  _dp.parse(data2)
+            start = DATE_CNV.parse(data1)
+            stop =  DATE_CNV.parse(data2)
             dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_RANGE, cal,
                         start.get_start_date() + stop.get_start_date())
             dateobj.set_quality(qual)
             return dateobj
 
         # parse a span if we match
-        match = spanRegexp.match(text)
+        match = SPAN.match(text)
         if match:
-            (cal1,data1,cal2,data2) = match.groups()
+            (cal1, data1, cal2, data2) = match.groups()
 
-            cal = _calendar_map.get(cal1, RelLib.Date.CAL_GREGORIAN)
+            cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
                     
-            start = _dp.parse(data1)
-            stop =  _dp.parse(data2)
+            start = DATE_CNV.parse(data1)
+            stop =  DATE_CNV.parse(data2)
             dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_SPAN, cal,
                         start.get_start_date() + stop.get_start_date())
             dateobj.set_quality(qual)
             return dateobj
         
-        match = calRegexp.match(text)
+        match = CAL.match(text)
         if match:
-            (abt,cal,data) = match.groups()
-            dateobj = _dp.parse("%s %s" % (abt, data))
-            dateobj.set_calendar(_calendar_map.get(cal, RelLib.Date.CAL_GREGORIAN))
+            (abt, cal, data) = match.groups()
+            dateobj = DATE_CNV.parse("%s %s" % (abt, data))
+            dateobj.set_calendar(CALENDAR_MAP.get(cal, 
+                                                  RelLib.Date.CAL_GREGORIAN))
             dateobj.set_quality(qual)
             return dateobj
 
-        dateobj = _dp.parse(text)
+        dateobj = DATE_CNV.parse(text)
         dateobj.set_quality(qual)
         return dateobj
     except IOError:
-        return self.dp.set_text(text)
+        return DATE_CNV.set_text(text)
 
 #-------------------------------------------------------------------------
 #
@@ -341,8 +303,8 @@ def extract_date(text):
 #-------------------------------------------------------------------------
 class Reader:
 
-    def __init__(self, f):
-        self.f = f
+    def __init__(self, ifile):
+        self.ifile = ifile
         self.current_list = []
         self.eof = False
         self.cnv = None
@@ -353,11 +315,7 @@ class Reader:
             TOKEN_CONC : self._fix_token_conc,
             }
 
-    def set_charset_fn(self,cnv):
-        print "Character set changed", cnv
-        self.cnv = cnv
-
-    def set_broken_conc(self,broken):
+    def set_broken_conc(self, broken):
         self.func_map = {
             TOKEN_CONT : self._fix_token_cont,
             TOKEN_CONC : self._fix_token_broken_conc,
@@ -372,46 +330,39 @@ class Reader:
             return None
 
     def _fix_token_cont(self, data):
-        l = self.current_list[0]
-        new_value = l[2]+'\n'+data[2]
-        self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
+        line = self.current_list[0]
+        new_value = line[2]+'\n'+data[2]
+        self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
 
     def _fix_token_conc(self, data):
-        l = self.current_list[0]
-        new_value = l[2] + data[2]
-        self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
+        line = self.current_list[0]
+        new_value = line[2] + data[2]
+        self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
 
     def _fix_token_broken_conc(self, data):
-        l = self.current_list[0]
-        new_value = u"%s %s" % (l[2], data[2])
-        self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
+        line = self.current_list[0]
+        new_value = u"%s %s" % (line[2], data[2])
+        self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
 
     def readahead(self):
         while len(self.current_list) < 5:
-            line = self.f.readline()
+            line = self.ifile.readline()
             self.index += 1
             if not line:
                 self.eof = True
                 return
 
-            if self.cnv:
-                try:
-                    line = self.cnv(line)
-                except:
-                    line = self.cnv(line.translate(_transtable2))
-            else:
-                line = unicode(line,errors='replace')
+            line = line.split(None, 2) + ['']
 
-            line = line.split(None,2) + ['']
-
-            val = line[2].rstrip('\r\n')
+            val = line[2]
                 
             try:
                 level = int(line[0])
             except:
                 level = 0
 
-            data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1], self.index)
+            data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1], 
+                    self.index)
 
             func = self.func_map.get(data[1])
             if func:
@@ -419,25 +370,3 @@ class Reader:
             else:
                 self.current_list.insert(0, data)
 
-if __name__ == "__main__":
-    import sys
-
-    def run():
-        print "Reading", sys.argv[1]
-        a = Reader(sys.argv[1])
-        while True:
-            line = a.readline()
-            print line
-            if not line: break
-
-#    import Utils
-#    Utils.profile(run)
-    run()
-
-    print extract_date("20 JAN 2000")
-    print extract_date("EST 20 JAN 2000")
-    print extract_date("CAL 20 JAN 2000")
-    print extract_date("ABT 20 JAN 2000")
-    print extract_date("INT 20 JAN 2000")
-    print extract_date("BET 20 JAN 2000 AND FEB 2000")
-    print extract_date("FROM 20 JAN 2000 TO FEB 2000")
diff --git a/src/GrampsDbUtils/_GedcomParse.py b/src/GrampsDbUtils/_GedcomParse.py
index 127e85674..d7a02be2b 100644
--- a/src/GrampsDbUtils/_GedcomParse.py
+++ b/src/GrampsDbUtils/_GedcomParse.py
@@ -64,13 +64,11 @@ all tokens at the lower level.
 
 For example:
 
-
 1 BIRT
   2 DATE 1 JAN 2000
   2 UKNOWN TAG
     3 NOTE DATA
 
-
 The function parsing the individual at level 1, would encounter the BIRT tag.
 It would look up the BIRT token in the table to see if a function as defined 
 for this TOKEN, and pass control to this function. This function would then
@@ -81,7 +79,6 @@ the level 2 parser, which would then encounter the "UKNOWN" tag. Since this is
 not a valid token, it would not be in the table, and a function that would skip
 all lines until the next level 2 token is found (in this case, skipping the 
 "3 NOTE DATA" line.
-
 """
 
 __revision__ = "$Revision: $"
@@ -94,10 +91,8 @@ __author__   = "Don Allingham"
 #-------------------------------------------------------------------------
 import os
 import re
-import string
 import time
 from gettext import gettext as _
-import copy 
 
 #------------------------------------------------------------------------
 #
@@ -114,20 +109,19 @@ LOG = logging.getLogger(".GedcomImport")
 #-------------------------------------------------------------------------
 import Errors
 import RelLib
-from BasicUtils import NameDisplay
+from BasicUtils import NameDisplay, UpdateCallback
 import Utils
 import Mime
 import LdsUtils
-from ansel_utf8 import ansel_to_utf8
 
 from _GedcomInfo import *
 from _GedcomTokens import *
 from _GedcomLex import Reader
+from _GedcomChar import *
 
 import _GedcomUtils as GedcomUtils 
 
 from GrampsDb._GrampsDbConst  import EVENT_KEY
-from BasicUtils import UpdateCallback
 
 try:
     import Config
@@ -145,53 +139,14 @@ ADDR_RE  = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)\s+(\d+)\s*(.*)')
 ADDR2_RE = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)\s+(\d+)')
 ADDR3_RE = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)')
 
-
 TRUNC_MSG = _("Your GEDCOM file is corrupted. "
               "It appears to have been truncated.")
 
-#-------------------------------------------------------------------------
-#
-# latin/utf8 conversions
-#
-#-------------------------------------------------------------------------
-
-
-def latin_to_utf8(msg):
-    """
-    Converts a string from iso-8859-1 to unicode. If the string is already
-    unicode, we do nothing.
-
-    @param msg: string to convert
-    @type level: str
-    @return: Returns the string, converted to a unicode object
-    @rtype: unicode
-    """
-    if type(msg) == unicode:
-        return msg
-    else:
-        return unicode(msg, 'iso-8859-1')
-
-def nocnv(msg):
-    """
-    Null operation that makes sure that a unicode string remains a unicode 
-    string
-
-    @param msg: unicode to convert
-    @type level: unicode
-    @return: Returns the string, converted to a unicode object
-    @rtype: unicode
-    """
-    return unicode(msg)
-
 #-------------------------------------------------------------------------
 #
 # constants
 #
 #-------------------------------------------------------------------------
-ANSEL = 1
-UNICODE = 2
-UPDATE = 25
-
 TYPE_BIRTH  = RelLib.ChildRefType()
 TYPE_ADOPT  = RelLib.ChildRefType(RelLib.ChildRefType.ADOPTED)
 TYPE_FOSTER = RelLib.ChildRefType(RelLib.ChildRefType.FOSTER)
@@ -224,10 +179,6 @@ MIME_MAP = {
 EVENT_FAMILY_STR = _("%(event_name)s of %(family)s")
 EVENT_PERSON_STR = _("%(event_name)s of %(person)s")
 
-TRANS_TABLE = string.maketrans('', '')
-DEL_CHARS = TRANS_TABLE[0:8] + TRANS_TABLE[10:31]
-TRANS_TABLE2 = TRANS_TABLE[0:128] + ('?' * 128)
-
 FTW_BAD_PLACE = [
     RelLib.EventType.OCCUPATION, 
     RelLib.EventType.RELIGION,
@@ -265,6 +216,7 @@ CONC_RE    = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
 PERSON_RE  = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")
 
 class StageOne:
+
     def __init__(self, ifile):
 	self.ifile = ifile
 	self.famc = {}
@@ -275,44 +227,47 @@ class StageOne:
 
     def parse(self):
 	current = ""
+
+        line = self.ifile.read(3)
+        if line == "\xef\xbb":
+            self.ifile.read(1)
+            self.enc = "UTF8"
+        else:
+            self.ifile.seek(0)
+
 	for line in self.ifile:
 	    self.lcnt +=1
+
             data = line.split(None,2) + ['']
             try:
                 (level, key, value) = data[:3]
-                value = value.strip()
-                # convert the first value to an integer. We have to be a bit
-                # careful here, since some GEDCOM files have garbage characters
-                # at the front of the first file if they are unicode encoded.
-                # So, if we have a failure to convert, check the last character
-                # of the string, which shoul de a '0'
                 try:
                     level = int(level)
                 except:
-                    level = int(level[-1])
+                    level = 0
                 key = key.strip()
             except:
                 raise Errors.GedcomError("Corrupted file at line %d" % self.lcnt)
 
 	    if level == 0 and key[0] == '@':
-                if value == "FAM":
+                if value == ("FAM", "FAMILY") :
                     current = key.strip()
                     current = current[1:-1]
-                elif value == "INDI":
+                elif value == ("INDI", "INDIVIDUAL"):
                     self.pcnt += 1
-	    elif key in ("HUSB", "WIFE") and value and value[0] == '@':
+	    elif key in ("HUSB", "HUSBAND", "WIFE") and value and value[0] == '@':
 		value = value[1:-1]
 		if self.fams.has_key(value):
 		    self.fams[value].append(current)
 		else:
 		    self.fams[value] = [current]
-	    elif key == "CHIL" and value and value[0] == '@':
+	    elif key in ("CHIL", "CHILD") and value and value[0] == '@':
 		value = value[1:-1]
 		if self.famc.has_key(value):
 		    self.famc[value].append(current)
 		else:
 		    self.famc[value] = [current]
-	    elif key == 'CHAR':
+	    elif key == 'CHAR' and not self.enc:
 		self.enc = value
 
     def get_famc_map(self):
@@ -322,7 +277,10 @@ class StageOne:
 	return self.fams
 
     def get_encoding(self):
-	return self.enc
+	return self.enc.upper()
+
+    def set_encoding(self, enc):
+	self.enc = enc
 
     def get_person_count(self):
 	return self.pcnt
@@ -806,16 +764,20 @@ class GedcomParser(UpdateCallback):
             data = cursor.next()
         cursor.close()
 
-        self.lexer = Reader(ifile)
+        enc = stage_one.get_encoding()
+
+        if enc == "ANSEL":
+            rdr = AnselReader(ifile)
+        elif enc in ("UTF-8", "UTF8"):
+            rdr = UTF8Reader(ifile)
+        elif enc in ("UTF-16", "UTF16", "UNICODE"):
+            rdr = UTF16Reader(ifile)
+        else:
+            rdr = AnsiReader(ifile)
+
+        self.lexer = Reader(rdr)
         self.filename = filename
         self.backoff = False
-        self.override = False
-#
-#        if self.override != 0:
-#            if self.override == 1:
-#                self.lexer.set_charset_fn(ansel_to_utf8)
-#            elif self.override == 2:
-#                self.lexer.set_charset_fn(latin_to_utf8)
 
         fullpath = os.path.normpath(os.path.abspath(filename))
         self.geddir = os.path.dirname(fullpath)
@@ -1064,9 +1026,6 @@ class GedcomParser(UpdateCallback):
         """
         text = self.groups.line
         msg = _("Line %d was not understood, so it was ignored.") % text
-        import traceback
-        traceback.print_stack()
-        print self.groups
         self.warn(msg)
         self.error_count += 1
         self.skip_subordinate_levels(level)
@@ -4039,11 +3998,8 @@ class GedcomParser(UpdateCallback):
                 if genby == "GRAMPS":
                     self.gedsource = self.gedmap.get_from_source_tag(line.data)
                     self.lexer.set_broken_conc(self.gedsource.get_conc())
-            elif line.token == TOKEN_CHAR and not self.override:
-                if line.data == "ANSEL":
-                    self.lexer.set_charset_fn(ansel_to_utf8)
-                elif line.data not in ("UNICODE","UTF-8","UTF8"):
-                    self.lexer.set_charset_fn(latin_to_utf8)
+            elif line.token == TOKEN_CHAR:
+                pass
                 self.skip_subordinate_levels(2)
             elif line.token == TOKEN_GEDC:
                 self.skip_subordinate_levels(2)
diff --git a/src/GrampsDbUtils/_ReadGedcom.py b/src/GrampsDbUtils/_ReadGedcom.py
index d2b767ab7..51f092ab5 100644
--- a/src/GrampsDbUtils/_ReadGedcom.py
+++ b/src/GrampsDbUtils/_ReadGedcom.py
@@ -66,6 +66,7 @@ def importData(database, filename, callback=None, use_trans=False):
         dialog.destroy()
     else:
         code_set = None
+
     import2(database, filename, callback, code_set, use_trans)
 
 def import2(database, filename, callback, code_set, use_trans):
@@ -74,7 +75,10 @@ def import2(database, filename, callback, code_set, use_trans):
         ifile = open(filename,"rU")
         np = StageOne(ifile)
 	np.parse()
-	print np.get_encoding()
+
+        if code_set:
+            np.set_encoding(code_set)
+
 	ifile.seek(0)
         gedparse = GedcomParser(database, ifile, filename, callback, np)
     except IOError, msg:
@@ -85,7 +89,6 @@ def import2(database, filename, callback, code_set, use_trans):
                     _("%s could not be imported") % filename + "\n" + str(msg))
         return
 
-
     if database.get_number_of_people() == 0:
         use_trans = False