gramps/src/plugins/ImportCSV.py
Don Allingham d58641415d 2007-10-08 Don Allingham <don@gramps-project.org>
* various: gen.lib integration. probably touches almost every file

	* various: gen.utils integration


svn: r9101
2007-10-08 16:41:39 +00:00

670 lines
28 KiB
Python

#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2007 Douglas S. Blank
# Copyright (C) 2000-2007 Donald N. Allingham
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id$
"Import from CSV Spreadsheet"
#-------------------------------------------------------------------------
#
# Standard Python Modules
#
#-------------------------------------------------------------------------
import time
from gettext import gettext as _
import csv
import string
import codecs
import cStringIO
#------------------------------------------------------------------------
#
# Set up logging
#
#------------------------------------------------------------------------
import logging
log = logging.getLogger(".ImportCSV")
#-------------------------------------------------------------------------
#
# GTK/GNOME Modules
#
#-------------------------------------------------------------------------
import gtk
#-------------------------------------------------------------------------
#
# GRAMPS modules
#
#-------------------------------------------------------------------------
import Errors
import gen.lib
import const
from QuestionDialog import ErrorDialog
from DateHandler import parser as _dp
from PluginUtils import register_import
from Utils import gender as gender_map
from htmlentitydefs import name2codepoint
#-------------------------------------------------------------------------
#
# Encoding support for CSV, from http://docs.python.org/lib/csv-examples.html
#
#-------------------------------------------------------------------------
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, **kwds)
self.stream = f
self.encoder = codecs.getencoder(encoding)
def writerow(self, row):
self.writer.writerow([s.encode('utf-8') for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode('utf-8')
# ... and reencode it into the target encoding
data, length = self.encoder(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
def close(self):
self.stream.close()
#-------------------------------------------------------------------------
#
# Support and main functions
#
#-------------------------------------------------------------------------
def rd(line_number, row, col, key, default = None):
""" Return Row data by column name """
if key in col:
if col[key] >= len(row):
print "Error: invalid column reference on line", line_number
return default
retval = row[col[key]].strip()
if retval == "":
return default
else:
return retval
else:
return default
def cleanup_column_name(column):
""" Handle column aliases """
retval = string.lower(column)
if retval == "lastname":
retval = "surname"
elif retval == "mother":
retval = "wife"
elif retval == "father":
retval = "husband"
elif retval == "parent1":
retval = "husband"
elif retval == "parent2":
retval = "wife"
return retval
def importData(db, filename, callback=None):
g = CSVParser(db, filename, callback)
g.process()
#-------------------------------------------------------------------------
#
# CSV Parser
#
#-------------------------------------------------------------------------
class CSVParser:
def __init__(self, db, filename, callback):
self.db = db
self.filename = filename
self.callback = callback
self.debug = 0
def readCSV(self):
fp = None
reader = []
try:
fp = open(self.filename, "rb")
reader = UnicodeReader(fp)
except IOError, msg:
errmsg = _("%s could not be opened\n") % self.filename
ErrorDialog(errmsg,str(msg))
try:
fp.close()
except:
pass
return None
data = []
try:
for row in reader:
row = map(string.strip, row)
data.append( row )
except csv.Error, e:
ErrorDialog(_('format error: file %s, line %d: %s') %
(self.filename, reader.line_num, e))
try:
fp.close()
except:
pass
return None
return data
def lookup(self, type, id):
if id == None: return None
if type == "family":
if id.startswith("[") and id.endswith("]"):
id = id[1:-1]
db_lookup = self.db.get_family_from_gramps_id(id)
if db_lookup == None:
return self.lookup(type, id)
else:
return db_lookup
elif id.lower() in self.fref.keys():
return self.fref[id.lower()]
else:
return None
elif type == "person":
if id.startswith("[") and id.endswith("]"):
id = id[1:-1]
db_lookup = self.db.get_person_from_gramps_id(id)
if db_lookup == None:
return self.lookup(type, id)
else:
return db_lookup
elif id.lower() in self.pref.keys():
return self.pref[id.lower()]
else:
return None
else:
print "error: invalid lookup type in CSV import: '%s'" % type
return None
def storeup(self, type, id, object):
if id.startswith("[") and id.endswith("]"):
id = id[1:-1]
#return # do not store gramps people; go look them up
if type == "person":
self.pref[id.lower()] = object
elif type == "family":
self.fref[id.lower()] = object
else:
print "error: invalid storeup type in CSV import: '%s'" % type
def process(self):
data = self.readCSV()
self.trans = self.db.transaction_begin("",batch=True)
self.db.disable_signals()
t = time.time()
self.lineno = 0
self.index = 0
self.fam_count = 0
self.indi_count = 0
self.pref = {} # person ref, internal to this sheet
self.fref = {} # family ref, internal to this sheet
header = None
line_number = 0
for row in data:
line_number += 1
if "".join(row) == "": # no blanks are allowed inside a table
header = None # clear headers, ready for next "table"
continue
######################################
if header == None:
header = map(cleanup_column_name, row)
col = {}
count = 0
for key in header:
col[key] = count
count += 1
continue
# three different kinds of data: person, family, and marriage
if (("marriage" in header) or
("husband" in header) or
("wife" in header)):
# marriage, husband, wife
marriage_ref = rd(line_number, row, col, "marriage")
husband = rd(line_number, row, col, "husband")
wife = rd(line_number, row, col, "wife")
marriagedate = rd(line_number, row, col, "date")
marriageplace = rd(line_number, row, col, "place")
marriagesource = rd(line_number, row, col, "source")
note = rd(line_number, row, col, "note")
wife = self.lookup("person", wife)
husband = self.lookup("person", husband)
if husband == None and wife == None:
# might have children, so go ahead and add
print "Warning: no parents on line %d; adding family anyway" % line_number
family = self.get_or_create_family(marriage_ref, husband, wife)
# adjust gender, if not already provided
if husband:
# this is just a guess, if unknown
if husband.get_gender() == gen.lib.Person.UNKNOWN:
husband.set_gender(gen.lib.Person.MALE)
self.db.commit_person(husband, self.trans)
if wife:
# this is just a guess, if unknown
if wife.get_gender() == gen.lib.Person.UNKNOWN:
wife.set_gender(gen.lib.Person.FEMALE)
self.db.commit_person(wife, self.trans)
if marriage_ref:
self.storeup("family", marriage_ref.lower(), family)
if marriagesource:
# add, if new
new, marriagesource = self.get_or_create_source(marriagesource)
if marriageplace:
# add, if new
new, marriageplace = self.get_or_create_place(marriageplace)
if marriagedate:
marriagedate = _dp.parse(marriagedate)
if marriagedate or marriageplace or marriagesource:
# add, if new; replace, if different
new, marriage = self.get_or_create_event(family, gen.lib.EventType.MARRIAGE, marriagedate, marriageplace, marriagesource)
if new:
mar_ref = gen.lib.EventRef()
mar_ref.set_reference_handle(marriage.get_handle())
family.add_event_ref(mar_ref)
self.db.commit_family(family, self.trans)
# only add note to event:
if note:
# append notes, if previous notes
previous_notes = marriage.get_note()
if previous_notes != "":
if note not in previous_notes:
note = previous_notes + "\n" + note
marriage.set_note(note)
self.db.commit_event(marriage, self.trans)
elif "family" in header:
# family, child
family_ref = rd(line_number, row, col, "family")
if family_ref == None:
print "Error: no family reference found for family on line %d" % line_number
continue # required
child = rd(line_number, row, col, "child")
source = rd(line_number, row, col, "source")
note = rd(line_number, row, col, "note")
gender = rd(line_number, row, col, "gender")
child = self.lookup("person", child)
family = self.lookup("family", family_ref)
if family == None:
print "Error: no matching family reference found for family on line %d" % line_number
continue
if child == None:
print "Error: no matching child reference found for family on line %d" % line_number
continue
# is this child already in this family? If so, don't add
if self.debug: print "children:", [ref.ref for ref in family.get_child_ref_list()]
if self.debug: print "looking for:", child.get_handle()
if child.get_handle() not in [ref.ref for ref in family.get_child_ref_list()]:
# add child to family
if self.debug: print " adding child to family", child.get_gramps_id(), family.get_gramps_id()
childref = gen.lib.ChildRef()
childref.set_reference_handle(child.get_handle())
family.add_child_ref( childref)
self.db.commit_family(family, self.trans)
child.add_parent_family_handle(family.get_handle())
if gender:
# replace
gender = gender.lower()
if gender == gender_map[gen.lib.Person.MALE]:
gender = gen.lib.Person.MALE
elif gender == gender_map[gen.lib.Person.FEMALE]:
gender = gen.lib.Person.FEMALE
else:
gender = gen.lib.Person.UNKNOWN
child.set_gender(gender)
if source:
# add, if new
new, source = self.get_or_create_source(source)
source_refs = child.get_source_references()
found = 0
for ref in source_refs:
if self.debug: print "child: looking for ref:", ref.ref, source.get_handle()
if ref.ref == source.get_handle():
found = 1
if not found:
sref = gen.lib.SourceRef()
sref.set_reference_handle(source.get_handle())
child.add_source_reference(sref)
# put note on child
if note:
# append notes, if previous notes
previous_notes = child.get_note()
if self.debug: print " previous note:", previous_notes
if previous_notes != "":
if note not in previous_notes:
note = previous_notes + "\n" + note
child.set_note(note)
self.db.commit_person(child, self.trans)
elif "surname" in header: # person data
# surname, and any of the following
surname = rd(line_number, row, col, "surname")
firstname = rd(line_number, row, col, "firstname", "")
callname = rd(line_number, row, col, "callname")
title = rd(line_number, row, col, "title")
prefix = rd(line_number, row, col, "prefix")
suffix = rd(line_number, row, col, "suffix")
gender = rd(line_number, row, col, "gender")
source = rd(line_number, row, col, "source")
note = rd(line_number, row, col, "note")
birthplace = rd(line_number, row, col, "birthplace")
birthdate = rd(line_number, row, col, "birthdate")
birthsource = rd(line_number, row, col, "birthsource")
deathplace = rd(line_number, row, col, "deathplace")
deathdate = rd(line_number, row, col, "deathdate")
deathsource = rd(line_number, row, col, "deathsource")
deathcause = rd(line_number, row, col, "deathcause")
grampsid = rd(line_number, row, col, "grampsid")
person_ref = rd(line_number, row, col, "person")
#########################################################
# if this person already exists, don't create them
person = self.lookup("person", person_ref)
if person == None:
if surname == None and firstname == "":
print "Error: need both firstname and surname for new person on line %d" % line_number
continue # need a name if it is a new person
# new person
person = self.create_person(firstname, surname)
name = gen.lib.Name()
name.set_type( gen.lib.NameType(gen.lib.NameType.BIRTH))
name.set_first_name(firstname)
name.set_surname(surname)
person.set_primary_name(name)
else:
name = person.get_primary_name()
#########################################################
if person_ref != None:
self.storeup("person", person_ref, person)
# replace
if callname != None:
name.set_call_name(callname)
if title != None:
name.set_title(title)
if prefix != None:
name.prefix = prefix
name.group_as = '' # HELP? what should I do here?
if suffix != None:
name.set_suffix(suffix)
if note != None:
# append notes, if previous notes
previous_notes = person.get_note()
if previous_notes != "":
if note not in previous_notes:
note = previous_notes + "\n" + note
person.set_note(note)
if grampsid != None:
person.gramps_id = grampsid
elif person_ref != None:
if person_ref.startswith("[") and person_ref.endswith("]"):
person.gramps_id = person_ref[1:-1]
if person.get_gender() == gen.lib.Person.UNKNOWN and gender != None:
gender = gender.lower()
if gender == gender_map[gen.lib.Person.MALE]:
gender = gen.lib.Person.MALE
elif gender == gender_map[gen.lib.Person.FEMALE]:
gender = gen.lib.Person.FEMALE
else:
gender = gen.lib.Person.UNKNOWN
person.set_gender(gender)
#########################################################
# add if new, replace if different
if birthdate != None:
birthdate = _dp.parse(birthdate)
if birthplace != None:
new, birthplace = self.get_or_create_place(birthplace)
if birthsource != None:
new, birthsource = self.get_or_create_source(birthsource)
if birthdate or birthplace or birthsource:
new, birth = self.get_or_create_event(person, gen.lib.EventType.BIRTH, birthdate, birthplace, birthsource)
birth_ref = person.get_birth_ref()
if birth_ref == None:
# new
birth_ref = gen.lib.EventRef()
birth_ref.set_reference_handle( birth.get_handle())
person.set_birth_ref( birth_ref)
if deathdate != None:
deathdate = _dp.parse(deathdate)
if deathplace != None:
new, deathplace = self.get_or_create_place(deathplace)
if deathsource != None:
new, deathsource = self.get_or_create_source(deathsource)
if deathdate or deathplace or deathsource or deathcause:
new, death = self.get_or_create_event(person, gen.lib.EventType.DEATH, deathdate, deathplace, deathsource)
if deathcause:
death.set_description(deathcause)
self.db.commit_event(death, self.trans)
death_ref = person.get_death_ref()
if death_ref == None:
# new
death_ref = gen.lib.EventRef()
death_ref.set_reference_handle(death.get_handle())
person.set_death_ref(death_ref)
if source:
# add, if new
new, source = self.get_or_create_source(source)
source_refs = person.get_source_references()
found = 0
for ref in source_refs:
if self.debug: print "person: looking for ref:", ref.ref, source.get_handle()
if ref.ref == source.get_handle():
found = 1
if not found:
sref = gen.lib.SourceRef()
sref.set_reference_handle(source.get_handle())
person.add_source_reference(sref)
self.db.commit_person(person, self.trans)
else:
print "Warning: ignoring line %d" % line_number
t = time.time() - t
msg = _('Import Complete: %d seconds') % t
self.db.transaction_commit(self.trans,_("CSV import"))
self.db.enable_signals()
self.db.request_rebuild()
print msg
print "New Families: %d" % self.fam_count
print "New Individuals: %d" % self.indi_count
return None
def get_or_create_family(self, family_ref, husband, wife):
# if a gramps_id and exists:
if self.debug: print "get_or_create_family"
if family_ref.startswith("[") and family_ref.endswith("]"):
family = self.db.get_family_from_gramps_id(family_ref[1:-1])
if family:
# don't delete, only add
fam_husband_handle = family.get_father_handle()
fam_wife_handle = family.get_mother_handle()
if husband:
if husband.get_handle() != fam_husband_handle:
# this husband is not the same old one! Add him!
family.set_father_handle(husband.get_handle())
if wife:
if wife.get_handle() != fam_wife_handle:
# this wife is not the same old one! Add her!
family.set_wife_handle(wife.get_handle())
if self.debug: print " returning existing family"
return family
# if not, create one:
family = gen.lib.Family()
# was marked with a gramps_id, but didn't exist, so we'll use it:
if family_ref.startswith("[") and family_ref.endswith("]"):
family.set_gramps_id(family_ref[1:-1])
# add it:
self.db.add_family(family, self.trans)
if husband:
family.set_father_handle(husband.get_handle())
husband.add_family_handle(family.get_handle())
if wife:
family.set_mother_handle(wife.get_handle())
wife.add_family_handle(family.get_handle())
self.db.commit_family(family,self.trans)
if husband:
self.db.commit_person(husband, self.trans)
if wife:
self.db.commit_person(wife, self.trans)
self.fam_count += 1
return family
def get_or_create_event(self, object, type, date=None, place=None, source=None):
""" Add or find a type event on object """
# first, see if it exists
if self.debug: print "get_or_create_event"
ref_list = object.get_event_ref_list()
if self.debug: print "refs:", ref_list
# look for a match, and possible correction
for ref in ref_list:
event = self.db.get_event_from_handle(ref.ref)
if self.debug: print " compare event type", int(event.get_type()), type
if int(event.get_type()) == type:
# Match! Let's update
if date:
event.set_date_object(date)
if place:
event.set_place_handle(place.get_handle())
if source:
source_refs = event.get_source_references()
found = 0
for ref in source_refs:
if self.debug: print "get_or_create_event: looking for ref:", ref.ref, source.get_handle()
if ref.ref == source.get_handle():
found = 1
if not found:
sref = gen.lib.SourceRef()
sref.set_reference_handle(source.get_handle())
event.add_source_reference(sref)
self.db.commit_event(event,self.trans)
if self.debug: print " returning existing event"
return (0, event)
# else create it:
if self.debug: print " creating event"
event = gen.lib.Event()
if type:
event.set_type(gen.lib.EventType(type))
if date:
event.set_date_object(date)
if place:
event.set_place_handle(place.get_handle())
if source:
source_refs = event.get_source_references()
found = 0
for ref in source_refs:
if self.debug: print "looking for ref:", ref.ref, source.get_handle()
if ref.ref == source.get_handle():
found = 1
if not found:
sref = gen.lib.SourceRef()
sref.set_reference_handle(source.get_handle())
event.add_source_reference(sref)
self.db.add_event(event,self.trans)
self.db.commit_event(event,self.trans)
return (1, event)
def create_person(self,firstname,lastname):
""" Used to create a new person we know doesn't exist """
person = gen.lib.Person()
mykey = firstname+lastname
self.db.add_person(person,self.trans)
self.db.commit_person(person,self.trans)
self.indi_count += 1
return person
def get_or_create_place(self,place_name):
place_list = self.db.get_place_handles()
if self.debug: print "get_or_create_place: list:", place_list
if self.debug: print "get_or_create_place: looking for:", place_name
for place_handle in place_list:
place = self.db.get_place_from_handle(place_handle)
if place.get_title() == place_name:
return (0, place)
place = gen.lib.Place()
place.set_title(place_name)
self.db.add_place(place,self.trans)
self.db.commit_place(place,self.trans)
return (1, place)
def get_or_create_source(self, source_text):
source_list = self.db.get_source_handles()
if self.debug: print "get_or_create_source: list:", source_list
if self.debug: print "get_or_create_source: looking for:", source_text
for source_handle in source_list:
source = self.db.get_source_from_handle(source_handle)
if source.get_title() == source_text:
return (0, source)
source = gen.lib.Source()
source.set_title(source_text)
self.db.add_source(source, self.trans)
self.db.commit_source(source, self.trans)
return (1, source)
#-------------------------------------------------------------------------
#
# Register the plugin
#
#-------------------------------------------------------------------------
_mime_type = "text/x-comma-separated-values" # CSV Document
_filter = gtk.FileFilter()
_filter.set_name(_('CSV spreadsheet files'))
_filter.add_mime_type(_mime_type)
_format_name = _('CSV Spreadheet')
register_import(importData,_filter,_mime_type,0,_format_name)