gramps/src/plugins/tool/PatchNames.py

557 lines
22 KiB
Python
Raw Normal View History

2002-10-20 19:55:16 +05:30
#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2000-2006 Donald N. Allingham
# Copyright (C) 2008 Brian G. Matherly
# Copyright (C) 2010 Jakim Friant
2002-10-20 19:55:16 +05:30
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id$
"""Tools/Database Processing/Extract Information from Names"""
2002-10-20 19:55:16 +05:30
#-------------------------------------------------------------------------
#
# python modules
#
#-------------------------------------------------------------------------
2002-10-20 19:55:16 +05:30
import re
#-------------------------------------------------------------------------
#
# gnome/gtk
#
#-------------------------------------------------------------------------
2002-10-20 19:55:16 +05:30
import gtk
import gobject
2002-10-20 19:55:16 +05:30
#-------------------------------------------------------------------------
#
# gramps modules
#
#-------------------------------------------------------------------------
import const
from gui.utils import ProgressMeter
from gui.plug import tool
from QuestionDialog import OkDialog
2006-04-27 09:29:47 +05:30
import ManagedWindow
import GrampsDisplay
import gen.lib
from gen.ggettext import sgettext as _
from glade import Glade
2008-04-19 13:59:43 +05:30
#-------------------------------------------------------------------------
#
# Constants
#
#-------------------------------------------------------------------------
WIKI_HELP_PAGE = '%s_-_Tools' % const.URL_MANUAL_PAGE
2008-04-19 13:59:43 +05:30
WIKI_HELP_SEC = _('manual|Extract_Information_from_Names')
#-------------------------------------------------------------------------
#
# constants
#
#-------------------------------------------------------------------------
# List of possible surname prefixes. Notice that you must run the tool
# multiple times for prefixes such as "van der".
PREFIX_LIST = [
"de", "van", "von", "di", "le", "du", "dela", "della",
"des", "vande", "ten", "da", "af", "den", "das", "dello",
"del", "en", "ein", "el" "et", "les", "lo", "los", "un",
"um", "una", "uno", "der", "ter", "te", "die",
]
CONNECTOR_LIST = ['e', 'y', ]
CONNECTOR_LIST_NONSPLIT = ['de', 'van']
_title_re = re.compile(r"^ ([A-Za-z][A-Za-z]+\.) \s+ (.+) $", re.VERBOSE)
_nick_re = re.compile(r"(.+) \s* [(\"] (.+) [)\"]", re.VERBOSE)
2002-10-20 19:55:16 +05:30
#-------------------------------------------------------------------------
#
# Search each name in the database, and compare the firstname against the
# form of "Name (Nickname)". If it matches, change the first name entry
# to "Name" and add "Nickname" into the nickname field. Also, search for
# surname prefixes. If found, change the name entry and put the prefix in
# the name prefix field.
2002-10-20 19:55:16 +05:30
#
#-------------------------------------------------------------------------
class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
titleid = 1
nickid = 2
pref1id = 3
compid = 4
def __init__(self, dbstate, uistate, options_class, name, callback=None):
2006-04-27 09:29:47 +05:30
self.label = _('Name and title extraction tool')
ManagedWindow.ManagedWindow.__init__(self, uistate, [], self.__class__)
self.set_window(gtk.Window(), gtk.Label(), '')
tool.BatchTool.__init__(self, dbstate, options_class, name)
if self.fail:
return
winprefix = gtk.Dialog("Default prefix and connector settings",
self.uistate.window,
gtk.DIALOG_MODAL|gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_ACCEPT))
winprefix.set_has_separator(False)
winprefix.vbox.set_spacing(5)
hboxpref = gtk.HBox()
hboxpref.pack_start(gtk.Label(_('Prefixes to search for:')),
expand=False, padding=5)
self.prefixbox = gtk.Entry()
self.prefixbox.set_text(', '.join(PREFIX_LIST))
hboxpref.pack_start(self.prefixbox)
winprefix.vbox.pack_start(hboxpref)
hboxcon = gtk.HBox()
hboxcon.pack_start(gtk.Label(_('Connectors splitting surnames:')),
expand=False, padding=5)
self.conbox = gtk.Entry()
self.conbox.set_text(', '.join(CONNECTOR_LIST))
hboxcon.pack_start(self.conbox)
winprefix.vbox.pack_start(hboxcon)
hboxconns = gtk.HBox()
2010-10-24 18:36:55 +05:30
hboxconns.pack_start(gtk.Label(_('Connectors not splitting surnames:')),
expand=False, padding=5)
self.connsbox = gtk.Entry()
self.connsbox.set_text(', '.join(CONNECTOR_LIST_NONSPLIT))
hboxconns.pack_start(self.connsbox)
winprefix.vbox.pack_start(hboxconns)
winprefix.show_all()
winprefix.resize(700, 100)
response = winprefix.run()
self.prefix_list = self.prefixbox.get_text().split(',')
self.prefix_list = map(strip, self.prefix_list)
self.prefixbox = None
self.connector_list = self.conbox.get_text().split(',')
self.connector_list = map(strip, self.connector_list)
self.conbox = None
self.connector_list_nonsplit = self.connsbox.get_text().split(',')
self.connector_list_nonsplit = map(strip, self.connector_list_nonsplit)
self.connsbox = None
# Find a prefix in the first_name
self._fn_prefix_re = re.compile("(\S+)\s+(%s)\s*$" % '|'.join(self.prefix_list),
re.IGNORECASE)
# Find a prefix in the surname
self._sn_prefix_re = re.compile("^\s*(%s)\s+(.+)" % '|'.join(self.prefix_list),
re.IGNORECASE)
# Find a connector in the surname
self._sn_con_re = re.compile("^\s*(.+)\s+(%s)\s+(.+)" % '|'.join(self.connector_list),
re.IGNORECASE)
winprefix.destroy()
2002-10-20 19:55:16 +05:30
self.cb = callback
self.handle_to_action = {}
2002-11-04 09:35:47 +05:30
self.progress = ProgressMeter(
_('Extracting Information from Names'), '')
self.progress.set_pass(_('Analyzing names'),
self.db.get_number_of_people())
for person in self.db.iter_people():
key = person.handle
name = person.get_primary_name()
first = name.get_first_name()
sname = name.get_surname()
old_prefix = []
old_surn = []
old_con = []
old_prim = []
old_orig = []
for surn in name.get_surname_list():
old_prefix.append(surn.get_prefix())
old_surn.append(surn.get_surname())
old_con.append(surn.get_connector())
old_prim.append(surn.get_primary())
old_orig.append(surn.get_origintype())
if name.get_title():
old_title = [name.get_title()]
else:
old_title = []
new_title = []
match = _title_re.match(first)
while match:
2002-10-20 19:55:16 +05:30
groups = match.groups()
first = groups[1]
new_title.append(groups[0])
match = _title_re.match(first)
matchnick = _nick_re.match(first)
if new_title:
titleval = (" ".join(old_title+new_title), first)
if key in self.handle_to_action:
self.handle_to_action[key][self.titleid] = titleval
else:
self.handle_to_action[key] = {self.titleid: titleval}
elif matchnick:
# we check for nick, which changes given name like title
groups = matchnick.groups()
nickval = (groups[0], groups[1])
if key in self.handle_to_action:
self.handle_to_action[key][self.nickid] = nickval
else:
self.handle_to_action[key] = {self.nickid: nickval}
else:
# Try to find the name prefix in the given name, also this
# changes given name
match = self._fn_prefix_re.match(first)
if match:
groups = match.groups()
if old_prefix[0]:
# Put the found prefix before the old prefix
new_prefix = " ".join([groups[1], old_prefix[0]])
else:
new_prefix = groups[1]
pref1val = (groups[0], new_prefix, groups[1])
if key in self.handle_to_action:
self.handle_to_action[key][self.pref1id] = pref1val
else:
self.handle_to_action[key] = {self.pref1id: pref1val}
#check for Gedcom import of compound surnames
if len(old_surn) == 1 and old_con[0] == '':
prefixes = old_prefix[0].split(',')
surnames = old_surn[0].split(',')
if len(prefixes) > 1 and len(prefixes) == len(surnames):
#assume a list of prefix and a list of surnames
prefixes = map(strip, prefixes)
surnames = map(strip, surnames)
primaries = [False] * len(prefixes)
primaries[0] = True
origs = []
for ind in range(len(prefixes)):
origs.append(gen.lib.NameOriginType())
origs[0] = old_orig[0]
compoundval = (surnames, prefixes, ['']*len(prefixes),
primaries, origs)
if key in self.handle_to_action:
self.handle_to_action[key][self.compid] = compoundval
else:
self.handle_to_action[key] = {self.compid: compoundval}
#we cannot check compound surnames, so continue the loop
continue
# Next, try to split surname in compounds: prefix surname connector
found = False
new_prefix_list = []
new_surname_list = []
new_connector_list = []
new_prim_list = []
new_orig_list = []
ind = 0
cont = True
for pref, surn, con, prim, orig in zip(old_prefix, old_surn,
old_con, old_prim, old_orig):
2010-10-24 18:36:55 +05:30
surnval = surn.split()
if surnval == []:
new_prefix_list.append(pref)
new_surname_list.append('')
new_connector_list.append(con)
new_prim_list.append(prim)
new_orig_list.append(orig)
cont = False
continue
val = surnval.pop(0)
while cont:
new_prefix_list.append(pref)
new_surname_list.append('')
new_connector_list.append(con)
new_prim_list.append(prim)
new_orig_list.append(orig)
2010-10-24 18:36:55 +05:30
while cont and (val.lower() in self.prefix_list):
found = True
if new_prefix_list[-1]:
new_prefix_list[-1] += ' ' + val
else:
new_prefix_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#after prefix we have a surname
if cont:
new_surname_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#if value after surname indicates continue, then continue
2010-10-24 18:36:55 +05:30
while cont and (val.lower() in self.connector_list_nonsplit):
#add this val to the current surname
new_surname_list[-1] += ' ' + val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
# if previous is non-splitting connector, then add new val to
# current surname
2010-10-24 18:36:55 +05:30
if cont and (new_surname_list[-1].split()[-1].lower() \
in self.connector_list_nonsplit):
new_surname_list[-1] += ' ' + val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#if next is a connector, add it to the surname
if cont and val.lower() in self.connector_list:
found = True
if new_connector_list[-1]:
new_connector_list[-1] = ' ' + val
else:
2010-10-24 18:36:55 +05:30
new_connector_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#initialize for a next surname in case there are still
#val
if cont:
found = True # we split surname
pref=''
con = ''
prim = False
orig = gen.lib.NameOriginType()
ind += 1
if found:
compoundval = (new_surname_list, new_prefix_list,
new_connector_list, new_prim_list, new_orig_list)
if key in self.handle_to_action:
self.handle_to_action[key][self.compid] = compoundval
else:
self.handle_to_action[key] = {self.compid: compoundval}
self.progress.step()
2002-10-20 19:55:16 +05:30
if self.handle_to_action:
self.display()
2002-10-20 19:55:16 +05:30
else:
self.progress.close()
self.close()
2003-03-19 09:57:34 +05:30
OkDialog(_('No modifications made'),
_("No titles, nicknames or prefixes were found"))
2002-10-20 19:55:16 +05:30
def build_menu_names(self, obj):
return (self.label, None)
def toggled(self, cell, path_string):
path = tuple(map(int, path_string.split(':')))
row = self.model[path]
row[0] = not row[0]
self.model.row_changed(path, row.iter)
def display(self):
self.top = Glade()
window = self.top.toplevel
self.top.connect_signals({
"destroy_passed_object" : self.close,
"on_ok_clicked" : self.on_ok_clicked,
"on_help_clicked" : self.on_help_clicked,
"on_delete_event" : self.close,
})
self.list = self.top.get_object("list")
self.set_window(window, self.top.get_object('title'), self.label)
self.model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING,
gobject.TYPE_STRING, gobject.TYPE_STRING,
gobject.TYPE_STRING)
r = gtk.CellRendererToggle()
r.connect('toggled', self.toggled)
c = gtk.TreeViewColumn(_('Select'), r, active=0)
self.list.append_column(c)
c = gtk.TreeViewColumn(_('ID'), gtk.CellRendererText(), text=1)
self.list.append_column(c)
c = gtk.TreeViewColumn(_('Type'), gtk.CellRendererText(), text=2)
self.list.append_column(c)
c = gtk.TreeViewColumn(_('Value'), gtk.CellRendererText(), text=3)
self.list.append_column(c)
c = gtk.TreeViewColumn(_('Current Name'), gtk.CellRendererText(), text=4)
self.list.append_column(c)
self.list.set_model(self.model)
self.nick_hash = {}
self.title_hash = {}
self.prefix1_hash = {}
self.compound_hash = {}
self.progress.set_pass(_('Building display'),
len(self.handle_to_action.keys()))
for key, data in self.handle_to_action.items():
p = self.db.get_person_from_handle(key)
gid = p.get_gramps_id()
if self.nickid in data:
given, nick = data[self.nickid]
handle = self.model.append()
self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Nickname'))
self.model.set_value(handle, 3, nick)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.nick_hash[key] = handle
if self.titleid in data:
title, given = data[self.titleid]
handle = self.model.append()
self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Person|Title'))
self.model.set_value(handle, 3, title)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.title_hash[key] = handle
if self.pref1id in data:
given, prefixtotal, new_prefix = data[self.pref1id]
handle = self.model.append()
self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Prefix in given name'))
self.model.set_value(handle, 3, prefixtotal)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.prefix1_hash[key] = handle
if self.compid in data:
surn_list, pref_list, con_list, prims, origs = data[self.compid]
handle = self.model.append()
self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Compound surname'))
newval = ''
for sur, pre, con in zip(surn_list, pref_list, con_list):
if newval:
newval += '-['
else:
newval = '['
newval += pre + ',' + sur
if con:
newval += ',' + con + ']'
else:
newval += ']'
self.model.set_value(handle, 3, newval)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.compound_hash[key] = handle
self.progress.step()
self.progress.close()
self.show()
def on_help_clicked(self, obj):
"""Display the relevant portion of GRAMPS manual"""
2008-04-19 13:59:43 +05:30
GrampsDisplay.help(webpage=WIKI_HELP_PAGE, section=WIKI_HELP_SEC)
def on_ok_clicked(self, obj):
trans = self.db.transaction_begin("", batch=True)
self.db.disable_signals()
for key, data in self.handle_to_action.items():
p = self.db.get_person_from_handle(key)
if self.nickid in data:
modelhandle = self.nick_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
given, nick = data[self.nickid]
name = p.get_primary_name()
name.set_first_name(given.strip())
name.set_nick_name(nick.strip())
if self.titleid in data:
modelhandle = self.title_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
title, given = data[self.titleid]
name = p.get_primary_name()
name.set_first_name(given.strip())
name.set_title(title.strip())
if self.pref1id in data:
modelhandle = self.prefix1_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
given, prefixtotal, prefix = data[self.pref1id]
name = p.get_primary_name()
name.set_first_name(given.strip())
oldpref = name.get_surname_list()[0].get_prefix().strip()
if oldpref == '' or oldpref == prefix.strip():
name.get_surname_list()[0].set_prefix(prefix)
else:
name.get_surname_list()[0].set_prefix('%s %s' % (prefix, oldpref))
if self.compid in data:
modelhandle = self.compound_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
surns, prefs, cons, prims, origs = data[self.compid]
name = p.get_primary_name()
new_surn_list = []
for surn, pref, con, prim, orig in zip(surns, prefs, cons,
prims, origs):
new_surn_list.append(gen.lib.Surname())
new_surn_list[-1].set_surname(surn.strip())
new_surn_list[-1].set_prefix(pref.strip())
new_surn_list[-1].set_connector(con.strip())
new_surn_list[-1].set_primary(prim)
new_surn_list[-1].set_origintype(orig)
name.set_surname_list(new_surn_list)
self.db.commit_person(p, trans)
self.db.transaction_commit(trans,
_("Extract information from names"))
self.db.enable_signals()
self.db.request_rebuild()
self.close()
self.cb()
class PatchNamesOptions(tool.ToolOptions):
2005-12-06 12:08:09 +05:30
"""
Defines options and provides handling interface.
"""
def __init__(self, name, person_id=None):
tool.ToolOptions.__init__(self, name, person_id)
def strip(arg):
return arg.strip()