9499: Fix CSV import of utf-8 encoded files exported by Gramps

This commit is contained in:
prculley 2016-06-15 11:20:30 -05:00 committed by Nick Hall
parent d77f0562e8
commit 495e7801d7
9 changed files with 1625 additions and 73 deletions

2
.gitattributes vendored
View File

@ -11,3 +11,5 @@
# don't mess with line endings for Gedcom files
*.ged binary
*.GED binary
*.csv -text

View File

@ -0,0 +1,122 @@
Person,Surname,Given,Call,Suffix,Prefix,Title,Gender,Birth date,Birth place,Birth source,Baptism date,Baptism place,Baptism source,Death date,Death place,Death source,Burial date,Burial place,Burial source,Note
[I0030],Adams,Janice Ann,,,,,female,1965-08-26,"Fremont, Alameda Co., CA",,,,,,,,,,,
[I0016],Anderson,Jennifer,,,,,female,1907-11-05,"Rønne, Bornholm, Denmark",,,,,1985-05-29,"San Francisco, San Francisco Co., CA",,,,,
[I0025],Ericsdotter,Marta,,,,,female,about 1775,Sweden,,,,,,,,,,,
[I0041],Green,Janis Elaine,,,,,female,1935-12-02,,,,,,,,,,,,
[I0000],Hansdotter,Anna,,,,,female,1864-10-02,"Löderup, Malmöhus Län, Sweden",,,,,1945-09-29,"Sparks, Washoe Co., NV",,,,,
[I0038],Hansdotter,Kerstina,,,,,female,1832-11-29,"Smestorp, Kristianstad Län, Sweden",,,,,before 1908,Sweden,,,,,
[I0032],Horne,Darcy,,,,,female,1966-07-02,"Sacramento, Sacramento Co., CA",,,,,,,,,,,
[I0036],Jefferson,Elna,,,,,female,1800-09-14,"Gladsax, Kristianstad Län, Sweden",,,,,,Sweden,,,,,
[I0017],Jones,Lillie Harriet,,,,,female,1910-05-02,"Rønne, Bornholm, Denmark",,,,,1990-06-26,,,,,,
[I0042],Ke 柯,,,,,,male,,,,,,,,,,,,,
[I0013],Michaels,Evelyn,,,,,female,about 1897,,,,,,,,,,,,
[I0012],Nielsen,Herman Julius,,,,,male,1889-08-31,"Rønne, Bornholm, Denmark",,,,,1945,,,,,,
[I0031],Ohman,Marjorie,,,,,female,1903-06-03,"Denver, Denver Co., CO, Denver Co., Colorado, USA",,,,,1980-06-22,"Reno, Washoe Co., NV",,,,,
[I0034],Perkins,Alice Paula,,,,,female,1933-11-22,"Sparks, Washoe Co., NV",,,,,,,,,,,
[I0002],Smith,Amber Marie,,,,,female,1998-04-12,"Hayward, Alameda Co., CA",,,,,,,,,,,
[I0023],Smith,Astrid Shermanna Augusta,,,,,female,1889-01-31,"Rønne, Bornholm, Denmark",,,,,1963-12-21,"San Francisco, San Francisco Co., CA",,,,,
[I0020],Smith,Carl Emil,,,,,male,1899-12-20,"Rønne, Bornholm, Denmark",,,,,1959-01-28,"Reno, Washoe Co., NV",,,,,
[I0029],Smith,Craig Peter,,,,,male,after 1966,"San Francisco, San Francisco Co., CA",,,,,,,,,,,
[I0037],Smith,Edwin Michael,,,,,male,1961-05-24,"San Jose, Santa Clara Co., CA","Birth, Death and Marriage Records",,,,,,,,,,
[I0009],Smith,Emil,,,,,male,1860-09-27,"Simrishamn, Kristianstad Län, Sweden",,,,,,,,,,,
[I0019],Smith,Eric Lloyd,,,,Dr.,male,1963-08-28,"San Francisco, San Francisco Co., CA",,,,,,,,,,,
[I0015],Smith,Gus,,,,,male,1897-09-11,"Rønne, Bornholm, Denmark",,,,,1963-10-21,"San Francisco, San Francisco Co., CA",,,,,
[I0024],Smith,Gustaf,,Sr.,,,male,1862-11-28,"Grostorp, Kristianstad Län, Sweden",,,,,before 1930-07-23,"Sparks, Washoe Co., NV",,,,,
[I0011],Smith,Hanna,,,,,female,1821-01-29,"Gladsax, Kristianstad Län, Sweden",,,,,,,,,,,
[I0010],Smith,Hans Peter,,,,,male,1904-04-17,"Rønne, Bornholm, Denmark",Birth Records,,,,1977-01-29,"San Francisco, San Francisco Co., CA",,1977-02-05,"San Francisco, San Francisco Co., CA",findagrave.com,
[I0021],Smith,Hjalmar,,,,,male,1893-01-31,"Rønne, Bornholm, Denmark",,,,,1894-09-25,"Rønne, Bornholm, Denmark",,,,,
[I0008],Smith,Hjalmar,,,,,male,1895-04-07,"Rønne, Bornholm, Denmark",,1895-06-03,"Rønne Bornholm, Denmark",,1975-06-26,"Reno, Washoe Co., NV",,,,,
[I0007],Smith,Ingar,,,,,female,after 1823,"Gladsax, Kristianstad Län, Sweden",,,,,,,,,,,
[I0027],Smith,Ingeman,,,,,male,about 1770,Sweden,,,,,,,,,,,
[I0004],Smith,Ingeman,,,,,male,1826-01-29,"Gladsax, Kristianstad Län, Sweden",,,,,,,,,,,
[I0018],Smith,John Hjalmar,,,,,male,1932-01-30,"San Francisco, San Francisco Co., CA",,,,,,,,,,,
[I0001],Smith,Keith Lloyd,,,,,male,1966-08-11,"San Francisco, San Francisco Co., CA",,,,,,,,,,,
[I0026],Smith,Kirsti Marie,,,,,female,1886-12-15,"Rønne, Bornholm, Denmark",,,,,1966-07-18,"San Francisco, San Francisco Co., CA",,,,,
[I0035],Smith,Lars Peter,,,,,male,1991-09-16,"Santa Rosa, Sonoma Co., CA",,,,,,,,,,,
[I0033],Smith,Lloyd,,,,,male,1935-03-13,"San Francisco, San Francisco Co., CA",,,,,,,,,,,
[I0003],Smith,Magnes,,,,,male,1858-10-06,"Simrishamn, Kristianstad Län, Sweden",,,,,1910-02-20,"Rønne, Bornholm, Denmark",,,,,
[I0040],Smith,Marjorie Alice,,,,,female,1960-02-05,"San Jose, Santa Clara Co., CA",,,,,,,,,,,
[I0014],Smith,Marjorie Lee,,,,,female,1934-11-04,"Reno, Washoe Co., NV",,,,,,,,,,,
[I0022],Smith,Martin,,,,,male,1830-11-19,"Gladsax, Kristianstad Län, Sweden",,1830-11-23,"Gladsax, Kristianstad Län, Sweden",,between 1899 and 1905,Sweden,,,,,
[I0039],Smith,Martin,,,,,male,between 1794 and 1796,"Tommarp, Kristianstad Län, Sweden",,,,,,Sweden,,,,,
[I0005],Smith,Mason Michael,,,,,male,1996-06-26,"Hayward, Alameda Co., CA",,,,,,,,,,,
[I0028],Streiffert,Anna,,,,,female,1860-09-23,"Hoya/Jona/Hoia, Sweden",,,,,1927-02-02,"Rønne, Bornholm, Denmark",,,,,
[I0006],Willard,Edwin,,,,,male,about 1886,,,,,,,,,,,,
Marriage,Husband,Wife,Date,Place,Source,Note
[F0000],[I0039],[I0036],about 1816,"Gladsax, Kristianstad Län, Sweden",,
[F0001],[I0027],[I0025],about 1790,Sweden,,
[F0002],[I0022],[I0038],about 1856,,,
[F0003],[I0024],[I0000],1885-11-27,"Rønne, Bornholm, Denmark",,
[F0004],[I0006],[I0026],about 1910,,,
[F0005],[I0012],[I0023],1912-11-30,"Rønne, Bornholm, Denmark",,
[F0006],[I0008],[I0031],1927-10-31,"Reno, Washoe Co., NV",,
[F0007],[I0015],[I0013],about 1920,,,
[F0008],[I0033],[I0041],1958-08-10,"San Francisco, San Francisco Co., CA",,
[F0009],[I0010],[I0017],,,,
[F0010],[I0019],[I0032],1986-07-12,"Woodland, Yolo Co., CA",,
[F0011],[I0003],[I0028],1884-08-24,"Rønne, Bornholm, Denmark",,
[F0012],[I0018],[I0034],1954-06-04,"Sparks, Washoe Co., NV",Marriage Certificae,
[F0013],[I0037],[I0030],1995-05-27,"San Ramon, Conta Costa Co., CA",,
[F0014],[I0010],[I0016],,,,
Family,Child
[F0000],[I0011]
[F0000],[I0007]
[F0000],[I0004]
[F0000],[I0022]
[F0001],[I0039]
[F0002],[I0003]
[F0002],[I0009]
[F0002],[I0024]
[F0003],[I0026]
[F0003],[I0023]
[F0003],[I0021]
[F0003],[I0008]
[F0003],[I0015]
[F0003],[I0020]
[F0003],[I0010]
[F0005],[I0042]
[F0006],[I0018]
[F0006],[I0014]
[F0008],[I0019]
[F0008],[I0001]
[F0008],[I0029]
[F0009],[I0033]
[F0010],[I0035]
[F0012],[I0040]
[F0012],[I0037]
[F0013],[I0005]
[F0013],[I0002]
Place,Title,Name,Type,Latitude,Longitude,Code,Enclosed_by,Date
[P0000],"Löderup, Malmöhus Län, Sweden","Löderup, Malmöhus Län, Sweden",Unknown,,,,,
[P0001],"Sparks, Washoe Co., NV","Sparks, Washoe Co., NV",Unknown,,,,,
[P0002],"San Francisco, San Francisco Co., CA","San Francisco, San Francisco Co., CA",Unknown,,,,,
[P0003],"Rønne, Bornholm, Denmark","Rønne, Bornholm, Denmark",Unknown,,,,,
[P0004],"Gladsax, Kristianstad Län, Sweden","Gladsax, Kristianstad Län, Sweden",Unknown,,,,,
[P0005],"Reno, Washoe Co., NV","Reno, Washoe Co., NV",Unknown,,,,,
[P0006],"Hayward, Alameda Co., CA","Hayward, Alameda Co., CA",Unknown,,,,,
[P0007],"Community Presbyterian Church, Danville, CA","Community Presbyterian Church, Danville, CA",Unknown,,,,,
[P0008],Sweden,Sweden,Unknown,,,,,
[P0009],"Grostorp, Kristianstad Län, Sweden","Grostorp, Kristianstad Län, Sweden",Unknown,,,,,
[P0010],"Copenhagen, Denmark","Copenhagen, Denmark",Unknown,,,,,
[P0011],"Hoya/Jona/Hoia, Sweden","Hoya/Jona/Hoia, Sweden",Unknown,,,,,
[P0012],"Simrishamn, Kristianstad Län, Sweden","Simrishamn, Kristianstad Län, Sweden",Unknown,,,,,
[P0013],"Fremont, Alameda Co., CA","Fremont, Alameda Co., CA",Unknown,,,,,
[P0016],"Santa Rosa, Sonoma Co., CA","Santa Rosa, Sonoma Co., CA",Unknown,,,,,
[P0017],"San Jose, Santa Clara Co., CA","San Jose, Santa Clara Co., CA",Unknown,,,,,
[P0018],UC Berkeley,UC Berkeley,Unknown,,,,,
[P0019],"Smestorp, Kristianstad Län, Sweden","Smestorp, Kristianstad Län, Sweden",Unknown,,,,,
[P0020],"Tommarp, Kristianstad Län, Sweden","Tommarp, Kristianstad Län, Sweden",Unknown,,,,,
[P0021],"Rønne Bornholm, Denmark","Rønne Bornholm, Denmark",Unknown,,,,,
[P0022],"Woodland, Yolo Co., CA","Woodland, Yolo Co., CA",Unknown,,,,,
[P0023],"San Ramon, Conta Costa Co., CA","San Ramon, Conta Costa Co., CA",Unknown,,,,,
[P0026],United States of America,USA,Country,,,,,
[P0028],"California, USA",California,State,,,,[P0026],2016-06-04
[P0025],"Colorado, USA",Colorado,State,,,,[P0026],
[P0027],"Sacramento Co., California, USA",Sacramento Co.,County,,,,[P0028],2016-06-01
[P0015],"Sacramento, Sacramento Co., CA","Sacramento, Sacramento Co., CA",City,,,,[P0027],2016-06-04
[P0024],"Denver Co., Colorado, USA",Denver Co.,County,,,,[P0025],
[P0014],"Denver, Denver Co., CO","Denver, Denver Co., CO",City,39.7392,104.9903 W,,[P0024],
Can't render this file because it has a wrong number of fields in line 46.

File diff suppressed because it is too large Load Diff

View File

@ -58,6 +58,7 @@ from gramps.gen.utils.string import gender as gender_map
from gramps.gen.datehandler import get_date
from gramps.gen.display.place import displayer as _pd
from gramps.gui.glade import Glade
from gramps.gen.constfunc import win
#-------------------------------------------------------------------------
#
@ -102,72 +103,6 @@ def get_primary_source_title(db, obj):
return source.get_title()
return ""
#-------------------------------------------------------------------------
#
# Encoding support for CSV, from http://docs.python.org/lib/csv-examples.html
#
#-------------------------------------------------------------------------
class UTF8Recoder:
"""Iterator that reads an encoded stream and reencodes the input to UTF-8."""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def __next__(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f", which is
encoded in the given encoding.
"""
def __init__(self, f, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, **kwds)
def __next__(self):
row = next(self.reader)
return [str(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f", which is encoded in
the given encoding.
"""
def __init__(self, f, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = StringIO()
self.writer = csv.writer(self.queue, **kwds)
self.stream = f
self.encoder = codecs.getencoder(encoding)
def writerow(self, row):
self.writer.writerow(row)
data = self.queue.getvalue()
#data now contains the csv data in unicode
# ... and reencode it into the target encoding
data, length = self.encoder(data)
# write to the target stream
self.stream.write(data)
# empty queue, go to start position, then truncate
self.queue.seek(0)
self.queue.truncate(0)
def writerows(self, rows):
list(map(self.writerow, rows))
def close(self):
self.stream.close()
#-------------------------------------------------------------------------
#
# CSVWriter Options
@ -270,7 +205,7 @@ class CSVWriter:
# make place list so that dependencies are first:
self.place_list = []
place_list = [x for x in self.db.iter_place_handles()]
place_list = sorted([x for x in self.db.iter_place_handles()])
while place_list:
handle = place_list[0]
place = self.db.get_place_from_handle(handle)
@ -317,9 +252,10 @@ class CSVWriter:
def export_data(self):
self.dirname = os.path.dirname (self.filename)
try:
self.g = open(self.filename,"w")
self.fp = open(self.filename, "wb")
self.g = UnicodeWriter(self.fp)
self.fp = open(self.filename, "w",
encoding='utf_8_sig' if win() else 'utf_8',
newline='')
self.g = csv.writer(self.fp)
except IOError as msg:
msg2 = _("Could not create %s") % self.filename
self.user.notify_error(msg2,str(msg))
@ -594,7 +530,7 @@ class CSVWriter:
place_latitude, place_longitude, place_code, "",
"")
self.writeln()
self.g.close()
self.fp.close()
return True
def format_date(self, date):

View File

@ -32,6 +32,7 @@
import time
import csv
import codecs
from io import TextIOWrapper
#------------------------------------------------------------------------
#
@ -106,7 +107,16 @@ def importData(dbase, filename, user):
parser = CSVParser(dbase, user, (config.get('preferences.tag-on-import-format') if
config.get('preferences.tag-on-import') else None))
try:
with open(filename, 'r') as filehandle:
with open(filename, 'rb') as filehandle:
line = filehandle.read(3)
if line == codecs.BOM_UTF8:
filehandle.seek(0)
filehandle = TextIOWrapper(filehandle, encoding='utf_8_sig',
errors='replace', newline='')
else: # just open with OS encoding
filehandle.seek(0)
filehandle = TextIOWrapper(filehandle,
errors='replace', newline='')
parser.parse(filehandle)
except EnvironmentError as err:
user.notify_error(_("%s could not be opened\n") % filename, str(err))

View File

@ -0,0 +1,111 @@
#! /usr/bin/env python3
"""
Gramps - a GTK+/GNOME based genealogy program
Copyright (c) 2016 Gramps Development Team
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""
import unittest
import os
import difflib
from gramps.test.test_util import Gramps
from gramps.gen.const import TEMP_DIR, DATA_DIR
from gramps.gen.datehandler import set_format
from gramps.cli.user import User
TREE_NAME = "Test_exporttest"
TEST_DIR = os.path.abspath(os.path.join(DATA_DIR, "tests"))
def call(*args):
""" Call Gramps to perform the action with out and err captured """
print("call:", args)
set_format(0) # Use ISO date for test
gramps = Gramps(user=User(auto_accept=True, quiet=True))
out, err = gramps.run(*args)
print("out:", out, "err:", err)
return out, err
def do_it(tstfile):
""" based on tstfile, prepare an result export and compare with
expected.
"""
fname = os.path.splitext(os.path.basename(tstfile))[0]
tst_file = os.path.join(TEST_DIR, fname + ".gramps")
expect_file = os.path.join(TEST_DIR, tstfile)
result_file = os.path.join(TEMP_DIR, tstfile)
err = call("-C", TREE_NAME, "-q",
"--import", tst_file,
"--export", result_file)[1]
if "Cleaning up." not in err:
return "Export failed, no 'Cleaning up.'"
msg = compare(expect_file, result_file)
if not msg:
# we will leave the result_file in place if there was an error.
try:
os.remove(result_file)
except OSError:
pass
return
else:
return msg
def compare(expect_file, result_file):
""" This uses the diff library to compare two files
"""
with open(expect_file, encoding='utf-8_sig') as exp_f, \
open(result_file, encoding='utf-8_sig') as res_f:
diff = difflib.unified_diff(exp_f.readlines(),
res_f.readlines(),
n=2, lineterm='\n')
msg = ""
for line in diff:
if line == "--- \n" or line == "+++ \n":
continue
msg += line
return msg
class ExportControl(unittest.TestCase):
""" These tests compare various exported files with expected files,
based on the matching '.gramps' test file as a source.
As more types of exports are tested, we will need to provide some
filters for the differences; some types of exports have Gramps versions,
export dates, file names etc. that don't count as differences.
"""
def setUp(self):
self.tearDown() # removes it if it existed
# out, err = self.call("-C", TREE_NAME,
# "--import", example)
def tearDown(self):
call("-y -q", "--remove", TREE_NAME)
def test_csv(self):
""" Run a csv export test """
tst_file = 'exp_sample_csv.csv'
msg = do_it(tst_file)
if msg:
self.fail(tst_file + ': ' + msg)
if __name__ == "__main__":
unittest.main()

View File

@ -243,7 +243,8 @@ else:
_tstfiles = []
for _tstfile in os.listdir(TEST_DIR):
(fname, ext) = os.path.splitext(os.path.basename(_tstfile))
if ext == ".gramps" or ext == ".difs" or ext == ".bak":
if ext == ".gramps" or ext == ".difs" or ext == ".bak" \
or fname.startswith("exp_"):
continue
test_func = make_tst_function(_tstfile, fname)
clname = 'Import_{0}'.format(_tstfile)