gramps/src/plugins/import/ImportSql.py
2009-08-09 04:14:33 +00:00

829 lines
32 KiB
Python

#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2009 Douglas S. Blank <doug.blank@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id$
"Import from SQL Database"
#-------------------------------------------------------------------------
#
# Standard Python Modules
#
#-------------------------------------------------------------------------
from gettext import gettext as _
from gettext import ngettext
import sqlite3 as sqlite
import re
import time
#------------------------------------------------------------------------
#
# Set up logging
#
#------------------------------------------------------------------------
import logging
log = logging.getLogger(".ImportSql")
#-------------------------------------------------------------------------
#
# GRAMPS modules
#
#-------------------------------------------------------------------------
import gen.lib
from QuestionDialog import ErrorDialog
from DateHandler import parser as _dp
from gen.plug import PluginManager, ImportPlugin
from Utils import gender as gender_map
from gui.utils import ProgressMeter
from Utils import create_id
def lookup(handle, event_ref_list):
"""
Find the handle in a unserialized event_ref_list and return code.
"""
if handle is None:
return -1
else:
count = 0
for event_ref in event_ref_list:
(private, note_list, attribute_list, ref, role) = event_ref
if handle == ref:
return count
count += 1
return -1
#-------------------------------------------------------------------------
#
# SQL Reader
#
#-------------------------------------------------------------------------
class Database(object):
"""
The db connection.
"""
def __init__(self, database):
self.database = database
self.db = sqlite.connect(self.database)
self.cursor = self.db.cursor()
def query(self, q, *args):
if q.strip().upper().startswith("DROP"):
try:
self.cursor.execute(q, args)
self.db.commit()
except:
"WARN: no such table to drop: '%s'" % q
else:
try:
self.cursor.execute(q, args)
self.db.commit()
except:
print "ERROR: query :", q
print "ERROR: values:", args
raise
return self.cursor.fetchall()
def close(self):
""" Closes and writes out tables """
self.cursor.close()
self.db.close()
class SQLReader(object):
def __init__(self, db, filename, callback):
if not callable(callback):
callback = lambda (percent): None # dummy
self.db = db
self.filename = filename
self.callback = callback
self.debug = 0
def openSQL(self):
sql = None
try:
sql = Database(self.filename)
except IOError, msg:
errmsg = _("%s could not be opened\n") % self.filename
ErrorDialog(errmsg,str(msg))
return None
return sql
# -----------------------------------------------
# Get methods to retrieve data from the tables
# -----------------------------------------------
def get_address_list(self, sql, from_type, from_handle):
results = self.get_links(sql, from_type, from_handle, "address")
retval = []
for handle in results:
result = sql.query("select * from address where handle = ?;",
handle)
retval.append(self.pack_address(sql, result[0]))
return retval
def get_attribute_list(self, sql, from_type, from_handle):
handles = self.get_links(sql, from_type, from_handle, "attribute")
retval = []
for handle in handles:
rows = sql.query("select * from attribute where handle = ?;",
handle)
for row in rows:
(handle,
the_type0,
the_type1,
value,
private) = row
source_list = self.get_source_ref_list(sql, "attribute", handle)
note_list = self.get_note_list(sql, "attribute", handle)
retval.append((private, source_list, note_list,
(the_type0, the_type1), value))
return retval
def get_child_ref_list(self, sql, from_type, from_handle):
results = self.get_links(sql, from_type, from_handle, "child_ref")
retval = []
for handle in results:
rows = sql.query("select * from child_ref where handle = ?;",
handle)
for row in rows:
(handle, ref, frel0, frel1, mrel0, mrel1, private) = row
source_list = self.get_source_ref_list(sql, "child_ref", handle)
note_list = self.get_note_list(sql, "child_ref", handle)
retval.append((private, source_list, note_list, ref,
(frel0, frel1), (mrel0, mrel1)))
return retval
def get_datamap(self, sql, from_type, from_handle):
# FIXME:
return {}
def get_event_ref_list(self, sql, from_type, from_handle):
results = self.get_links(sql, from_type, from_handle, "event_ref")
retval = []
for handle in results:
result = sql.query("select * from event_ref where handle = ?;",
handle)
retval.append(self.pack_event_ref(sql, result[0]))
return retval
def get_family_list(self, sql, from_type, from_handle):
return self.get_links(sql, from_type, from_handle, "family")
def get_parent_family_list(self, sql, from_type, from_handle):
return self.get_links(sql, from_type, from_handle, "parent_family")
def get_person_ref_list(self, sql, from_type, from_handle):
handles = self.get_links(sql, from_type, from_handle, "person_ref")
retval = []
for ref_handle in handles:
rows = sql.query("select * from person_ref where handle = ?;",
ref_handle)
for row in rows:
(handle,
description,
private) = row
source_list = self.get_source_ref_list(sql, "person_ref", handle)
note_list = self.get_note_list(sql, "person_ref", handle)
retval.append((private,
source_list,
note_list,
handle,
description))
return retval
def get_location_list(self, sql, from_type, from_handle):
handles = self.get_links(sql, from_type, from_handle, "location")
results = []
for handle in handles:
results += sql.query("""select * from location where handle = ?;""",
handle)
return [self.pack_location(sql, result, with_parish=True) for result in results]
def get_lds_list(self, sql, from_type, from_handle):
handles = self.get_links(sql, from_type, from_handle, "lds")
results = []
for handle in handles:
results += sql.query("""select * from lds where handle = ?;""",
handle)
return [self.pack_lds(sql, result) for result in results]
def get_media_list(self, sql, from_type, from_handle):
handles = self.get_links(sql, from_type, from_handle, "media_ref")
results = []
for handle in handles:
results += sql.query("""select * from media_ref where handle = ?;""",
handle)
return [self.pack_media_ref(sql, result) for result in results]
def get_note_list(self, sql, from_type, from_handle):
return self.get_links(sql, from_type, from_handle, "note")
def get_repository_list(self, sql, from_type, from_handle):
handles = self.get_links(sql, from_type, from_handle, "repository")
results = []
for handle in handles:
results += sql.query("""select * from repository where handle = ?;""",
handle)
return [self.pack_repository(sql, result) for result in results]
def get_source_ref_list(self, sql, from_type, from_handle):
handles = self.get_links(sql, from_type, from_handle, "source_ref")
results = []
for handle in handles:
results += sql.query("""select * from source_ref where handle = ?;""",
handle)
return [self.pack_source_ref(sql, result) for result in results]
def get_url_list(self, sql, from_type, from_handle):
handles = self.get_links(sql, from_type, from_handle, "url")
results = []
for handle in handles:
results += sql.query("""select * from url where handle = ?;""",
handle)
return [self.pack_url(sql, result) for result in results]
# ---------------------------------
# Helpers
# ---------------------------------
def pack_address(self, sql, data):
(handle, private) = data
source_list = self.get_source_ref_list(sql, "address", handle)
date_handle = self.get_link(sql, "address", handle, "date")
date = self.get_date(sql, date_handle)
note_list = self.get_note_list(sql, "address", handle)
location = self.get_location(sql, "address", handle)
return (private, source_list, note_list, date, location)
def pack_lds(self, sql, data):
(handle, type, place, famc, temple, status, private) = data
source_list = self.get_source_ref_list(sql, "lds", handle)
note_list = self.get_note_list(sql, "lds", handle)
date_handle = self.get_link(sql, "lds", handle, "date")
date = self.get_date(sql, date_handle)
return (source_list, note_list, date, type, place,
famc, temple, status, private)
def pack_media_ref(self, sql, data):
(handle,
ref,
role0,
role1,
private) = data
source_list = self.get_source_ref_list(sql, "media_ref", handle)
note_list = self.get_note_list(sql, "media_ref", handle)
attribute_list = self.get_attribute_list(sql, "event_ref", handle)
return (private, source_list, note_list, attribute_list, ref, (role0, role1))
def pack_repository(self, sql, data):
(handle,
gid,
the_type0,
the_type1,
name,
change,
marker0,
marker1,
private) = data
note_list = self.get_note_list(sql, "repository", handle)
address_list = self.get_address_list(sql, "repository", handle)
urls = self.get_url_list(sql, "repository", handle)
return (handle, gid, (the_type0, the_type1), name, note_list,
address_list, urls, change, (marker0, marker1), private)
def pack_url(self, sql, data):
(handle,
path,
desc,
type0,
type1,
private) = data
return (private, path, desc, (type0, type1))
def pack_event_ref(self, sql, data):
(handle,
ref,
role0,
role1,
private) = data
note_list = self.get_note_list(sql, "event_ref", handle)
attribute_list = self.get_attribute_list(sql, "event_ref", handle)
return (private, note_list, attribute_list, ref, (role0, role1))
def pack_source_ref(self, sql, data):
(handle,
ref,
confidence,
page,
private) = data
date_handle = self.get_link(sql, "source_ref", handle, "date")
date = self.get_date(sql, date_handle)
note_list = self.get_note_list(sql, "source_ref", handle)
return (date, private, note_list, confidence, ref, page)
def pack_source(self, sql, data):
(handle,
gid,
title,
author,
pubinfo,
abbrev,
change,
marker0,
marker1,
private) = data
note_list = self.get_note_list(sql, "source", handle)
media_list = self.get_media_list(sql, "source", handle)
reporef_list = self.get_repository_list(sql, "source", handle)
datamap = {}
return (handle, gid, title,
author, pubinfo,
note_list,
media_list,
abbrev,
change, datamap,
reporef_list,
(marker0, marker1), private)
def get_location(self, sql, from_type, from_handle):
handle = self.get_link(sql, from_type, from_handle, "location")
if handle:
results = sql.query("""select * from location where handle = ?;""",
handle)
if len(results) == 1:
return self.pack_location(sql, results[0], with_parish=True)
def get_names(self, sql, from_type, from_handle, primary):
handles = self.get_links(sql, from_type, from_handle, "name")
names = []
for handle in handles:
results = sql.query("""select * from name where handle = ? and primary_name = ?;""",
handle, primary)
if len(results) > 0:
names += results
result = [self.pack_name(sql, name) for name in names]
if primary:
if len(result) == 1:
return result[0]
elif len(result) == 0:
return gen.lib.Name().serialize()
else:
raise Exception("too many primary names")
else:
return result
def pack_name(self, sql, data):
# unpack name from SQL table:
(handle,
primary_name,
private,
first_name,
surname,
suffix,
title,
name_type0,
name_type1,
prefix,
patronymic,
group_as,
sort_as,
display_as,
call) = data
source_list = self.get_source_ref_list(sql, "name", handle)
note_list = self.get_note_list(sql, "name", handle)
date_handle = self.get_link(sql, "name", handle, "date")
date = self.get_date(sql, date_handle)
return (private, source_list, note_list, date,
first_name, surname, suffix, title,
(name_type0, name_type1), prefix, patronymic,
group_as, sort_as, display_as, call)
def pack_location(self, sql, data, with_parish):
(handle,
street,
city,
county,
state,
country,
postal,
phone,
parish) = data
if with_parish:
return ((street, city, county, state, country, postal, phone), parish)
else:
return (street, city, county, state, country, postal, phone)
def get_place_from_handle(self, sql, ref_handle):
if ref_handle:
place_row = sql.query("select * from place where handle = ?;",
ref_handle)
if len(place_row) == 1:
# return raw DB data here:
return place_row[0]
elif len(place_row) == 0:
print "ERROR: get_place_from_handle('%s'), no such handle." % (ref_handle, )
else:
print "ERROR: get_place_from_handle('%s') should be unique; returned %d records." % (ref_handle, len(place_row))
return None
def get_location_from_handle(self, sql, ref_handle, with_parish=False):
if ref_handle:
place_row = sql.query("select * from location where handle = ?;",
ref_handle)
if len(place_row) == 1:
return self.pack_location(sql, place_row[0], with_parish)
elif len(place_row) == 0:
print "ERROR: get_location_from_handle('%s'), no such handle." % (ref_handle, )
else:
print "ERROR: get_location_from_handle('%s') should be unique; returned %d records." % (ref_handle, len(place_row))
return gen.lib.Location().serialize()
def get_link(self, sql, from_type, from_handle, to_link):
"""
Return a link, and return handle.
"""
assert type(from_handle) in [unicode, str], "from_handle is wrong type: %s is %s" % (from_handle, type(from_handle))
rows = self.get_links(sql, from_type, from_handle, to_link)
if len(rows) == 1:
return rows[0]
elif len(rows) > 1:
print "ERROR: too many links %s:%s -> %s (%d)" % (from_type, from_handle, to_link, len(rows))
return None
def get_links(self, sql, from_type, from_handle, to_link):
"""
Return a list of handles (possibly none).
"""
results = sql.query("""select to_handle from link where from_type = ? and from_handle = ? and to_type = ?;""",
from_type, from_handle, to_link)
return [result[0] for result in results]
def get_date(self, sql, handle):
assert type(handle) in [unicode, str, type(None)], "handle is wrong type: %s" % handle
if handle:
rows = sql.query("select * from date where handle = ?;", handle)
if len(rows) == 1:
(handle,
calendar,
modifier,
quality,
day1,
month1,
year1,
slash1,
day2,
month2,
year2,
slash2,
text,
sortval,
newyear) = rows[0]
dateval = day1, month1, year1, slash1, day2, month2, year2, slash2
return (calendar, modifier, quality, dateval, text, sortval, newyear)
elif len(rows) == 0:
return None
else:
print Exception("ERROR, wrong number of dates: %s" % rows)
def process(self):
sql = self.openSQL()
total = (sql.query("select count(*) from note;")[0][0] +
sql.query("select count(*) from person;")[0][0] +
sql.query("select count(*) from event;")[0][0] +
sql.query("select count(*) from family;")[0][0] +
sql.query("select count(*) from repository;")[0][0] +
sql.query("select count(*) from place;")[0][0] +
sql.query("select count(*) from media;")[0][0] +
sql.query("select count(*) from source;")[0][0])
self.trans = self.db.transaction_begin("",batch=True)
self.db.disable_signals()
count = 0.0
self.t = time.time()
# ---------------------------------
# Process note
# ---------------------------------
notes = sql.query("""select * from note;""")
for note in notes:
(handle,
gid,
text,
format,
note_type1,
note_type2,
change,
marker0,
marker1,
private) = note
styled_text = [text, []]
# direct connection with note handle
markups = sql.query("""select * from markup where handle = ?""", handle)
for markup in markups:
(mhandle,
markup0,
markup1,
value,
start_stop_list) = markup
ss_list = eval(start_stop_list)
styled_text[1] += [((markup0, markup1), value, ss_list)]
self.db.note_map[str(handle)] = (str(handle), gid, styled_text,
format, (note_type1, note_type2), change,
(marker0, marker1), private)
count += 1
self.callback(100 * count/total)
# ---------------------------------
# Process event
# ---------------------------------
events = sql.query("""select * from event;""")
for event in events:
(handle,
gid,
the_type0,
the_type1,
description,
change,
marker0,
marker1,
private) = event
note_list = self.get_note_list(sql, "event", handle)
source_list = self.get_source_ref_list(sql, "event", handle)
media_list = self.get_media_list(sql, "event", handle)
attribute_list = self.get_attribute_list(sql, "event", handle)
date_handle = self.get_link(sql, "event", handle, "date")
date = self.get_date(sql, date_handle)
place_handle = self.get_link(sql, "event", handle, "place")
place = self.get_place_from_handle(sql, place_handle)
data = (str(handle), gid, (the_type0, the_type1), date, description, place,
source_list, note_list, media_list, attribute_list,
change, (marker0, marker1), private)
self.db.event_map[str(handle)] = data
count += 1
self.callback(100 * count/total)
# ---------------------------------
# Process person
# ---------------------------------
people = sql.query("""select * from person;""")
for person in people:
if person is None:
continue
(handle, # 0
gid, # 1
gender, # 2
death_ref_handle, # 5
birth_ref_handle, # 6
change, # 17
marker0, # 18
marker1, # 18
private, # 19
) = person
primary_name = self.get_names(sql, "person", handle, True) # one
alternate_names = self.get_names(sql, "person", handle, False) # list
event_ref_list = self.get_event_ref_list(sql, "person", handle)
family_list = self.get_family_list(sql, "person", handle)
parent_family_list = self.get_parent_family_list(sql, "person", handle)
media_list = self.get_media_list(sql, "person", handle)
address_list = self.get_address_list(sql, "person", handle)
attribute_list = self.get_attribute_list(sql, "person", handle)
urls = self.get_url_list(sql, "person", handle)
lds_ord_list = self.get_lds_list(sql, "person", handle)
psource_list = self.get_source_ref_list(sql, "person", handle)
pnote_list = self.get_note_list(sql, "person", handle)
person_ref_list = self.get_person_ref_list(sql, "person", handle)
death_ref_index = lookup(death_ref_handle, event_ref_list)
birth_ref_index = lookup(birth_ref_handle, event_ref_list)
self.db.person_map[str(handle)] = (str(handle), # 0
gid, # 1
gender, # 2
primary_name, # 3
alternate_names, # 4
death_ref_index, # 5
birth_ref_index, # 6
event_ref_list, # 7
family_list, # 8
parent_family_list, # 9
media_list, # 10
address_list, # 11
attribute_list, # 12
urls, # 13
lds_ord_list, # 14
psource_list, # 15
pnote_list, # 16
change, # 17
(marker0, marker1), # 18
private, # 19
person_ref_list, # 20
)
count += 1
self.callback(100 * count/total)
# ---------------------------------
# Process family
# ---------------------------------
families = sql.query("""select * from family;""")
for family in families:
(handle,
gid,
father_handle,
mother_handle,
the_type0,
the_type1,
change,
marker0,
marker1,
private) = family
child_ref_list = self.get_child_ref_list(sql, "family", handle)
event_ref_list = self.get_event_ref_list(sql, "family", handle)
media_list = self.get_media_list(sql, "family", handle)
attribute_list = self.get_attribute_list(sql, "family", handle)
lds_seal_list = self.get_lds_list(sql, "family", handle)
source_list = self.get_source_ref_list(sql, "family", handle)
note_list = self.get_note_list(sql, "family", handle)
self.db.family_map[str(handle)] = (str(handle), gid,
father_handle, mother_handle,
child_ref_list, (the_type0, the_type1),
event_ref_list, media_list,
attribute_list, lds_seal_list,
source_list, note_list,
change, (marker0, marker1), private)
count += 1
self.callback(100 * count/total)
# ---------------------------------
# Process repository
# ---------------------------------
repositories = sql.query("""select * from repository;""")
for repo in repositories:
(handle,
gid,
the_type0,
the_type1,
name,
change,
marker0,
marker1,
private) = repo
note_list = self.get_note_list(sql, "repository", handle)
address_list = self.get_address_list(sql, "repository", handle)
urls = self.get_url_list(sql, "repository", handle)
self.db.repository_map[str(handle)] = (str(handle), gid,
(the_type0, the_type1),
name, note_list,
address_list, urls, change,
(marker0, marker1), private)
count += 1
self.callback(100 * count/total)
# ---------------------------------
# Process place
# ---------------------------------
places = sql.query("""select * from place;""")
for place in places:
count += 1
(handle,
gid,
title,
main_loc,
long,
lat,
change,
marker0,
marker1,
private) = place
# We could look this up by "place_main", but we have the handle:
main_loc = self.get_location_from_handle(sql, main_loc,
with_parish=True)
alt_location_list = self.get_location_list(sql, "place_alt", handle)
urls = self.get_url_list(sql, "place", handle)
media_list = self.get_media_list(sql, "place", handle)
source_list = self.get_source_ref_list(sql, "place", handle)
note_list = self.get_note_list(sql, "place", handle)
self.db.place_map[str(handle)] = (str(handle), gid, title, long, lat,
main_loc, alt_location_list,
urls,
media_list,
source_list,
note_list,
change, (marker0, marker1),
private)
self.callback(100 * count/total)
# ---------------------------------
# Process source
# ---------------------------------
sources = sql.query("""select * from source;""")
for source in sources:
(handle,
gid,
title,
author,
pubinfo,
abbrev,
change,
marker0,
marker1,
private) = source
note_list = self.get_note_list(sql, "source", handle)
media_list = self.get_media_list(sql, "source", handle)
datamap = self.get_datamap(sql, "source", handle)
reporef_list = self.get_repository_list(sql, "source", handle)
self.db.source_map[str(handle)] = (str(handle), gid, title,
author, pubinfo,
note_list,
media_list,
abbrev,
change, datamap,
reporef_list,
(marker0, marker1), private)
count += 1
self.callback(100 * count/total)
# ---------------------------------
# Process media
# ---------------------------------
media = sql.query("""select * from media;""")
for med in media:
(handle,
gid,
path,
mime,
desc,
change,
marker0,
marker1,
private) = med
attribute_list = self.get_attribute_list(sql, "media", handle)
source_list = self.get_source_ref_list(sql, "media", handle)
note_list = self.get_note_list(sql, "media", handle)
date_handle = self.get_link(sql, "media", handle, "date")
date = self.get_date(sql, date_handle)
self.db.media_map[str(handle)] = (str(handle), gid, path, mime, desc,
attribute_list,
source_list,
note_list,
change,
date,
(marker0, marker1),
private)
count += 1
self.callback(100 * count/total)
return None
def cleanup(self):
self.t = time.time() - self.t
msg = ngettext('Import Complete: %d second','Import Complete: %d seconds', self.t ) % self.t
self.db.transaction_commit(self.trans,_("SQL import"))
self.db.enable_signals()
self.db.request_rebuild()
print msg
def importData(db, filename, callback=None):
g = SQLReader(db, filename, callback)
g.process()
g.cleanup()
#-------------------------------------------------------------------------
#
# Register the plugin
#
#-------------------------------------------------------------------------
_name = _('SQLite Import')
_description = _('SQLite is a common local database format')
pmgr = PluginManager.get_instance()
plugin = ImportPlugin(name = _('SQLite Database'),
description = _("Import data from SQLite database"),
import_function = importData,
extension = "sql")
pmgr.register_plugin(plugin)