d0c255cb53
Fall back to not using soundex if the charset can't be latin-1 encoded. svn: r2778
567 lines
17 KiB
Python
567 lines
17 KiB
Python
#
|
|
# Gramps - a GTK+/GNOME based genealogy program
|
|
#
|
|
# Copyright (C) 2000-2004 Donald N. Allingham
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
#
|
|
|
|
# $Id$
|
|
|
|
"Database Processing/Merge people"
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# GRAMPS modules
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
import RelLib
|
|
import Utils
|
|
import soundex
|
|
import GrampsCfg
|
|
import ListModel
|
|
import MergeData
|
|
from gettext import gettext as _
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# standard python models
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
import string
|
|
import os
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# GNOME libraries
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
from gnome.ui import *
|
|
import gtk
|
|
import gtk.glade
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
#
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
def is_initial(name):
|
|
if len(name) > 2:
|
|
return 0
|
|
elif len(name) == 2:
|
|
if name[0] == name[0].upper() and name[1] == '.':
|
|
return 1
|
|
else:
|
|
return name[0] == name[0].upper()
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
#
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
def ancestors_of(p1,list):
|
|
if p1 == None or p1 in list:
|
|
return
|
|
list.append(p1)
|
|
f1 = p1.getMainParents()
|
|
if f1 != None:
|
|
ancestors_of(f1.getFather(),list)
|
|
ancestors_of(f1.getMother(),list)
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
#
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
class Merge:
|
|
|
|
def __init__(self,database,callback):
|
|
self.db = database
|
|
self.map = {}
|
|
self.list = []
|
|
self.index = 0
|
|
self.merger = None
|
|
self.mergee = None
|
|
self.removed = {}
|
|
self.update = callback
|
|
self.use_soundex = 1
|
|
|
|
self.family_list = database.getFamilyMap().values()[:]
|
|
self.person_list = database.getPersonMap().values()[:]
|
|
|
|
base = os.path.dirname(__file__)
|
|
self.glade_file = "%s/%s" % (base,"merge.glade")
|
|
top = gtk.glade.XML(self.glade_file,"dialog","gramps")
|
|
|
|
my_menu = gtk.Menu()
|
|
item = gtk.MenuItem(_("Low"))
|
|
item.set_data("v",0.25)
|
|
item.show()
|
|
my_menu.append(item)
|
|
item = gtk.MenuItem(_("Medium"))
|
|
item.set_data("v",1.0)
|
|
item.show()
|
|
my_menu.append(item)
|
|
item = gtk.MenuItem(_("High"))
|
|
item.set_data("v",2.0)
|
|
item.show()
|
|
my_menu.append(item)
|
|
|
|
self.soundex_obj = top.get_widget("soundex")
|
|
self.menu = top.get_widget("menu")
|
|
self.menu.set_menu(my_menu)
|
|
|
|
Utils.set_titles(top.get_widget('dialog'), top.get_widget('title'),
|
|
_('Merge people'))
|
|
|
|
top.signal_autoconnect({
|
|
"on_merge_ok_clicked" : self.on_merge_ok_clicked,
|
|
"destroy_passed_object" : Utils.destroy_passed_object
|
|
})
|
|
|
|
def on_merge_ok_clicked(self,obj):
|
|
active = self.menu.get_menu().get_active().get_data("v")
|
|
self.use_soundex = self.soundex_obj.get_active()
|
|
Utils.destroy_passed_object(obj)
|
|
self.find_potentials(active)
|
|
self.show()
|
|
|
|
def progress_update(self,val):
|
|
self.progress.set_fraction(val/100.0)
|
|
while gtk.events_pending():
|
|
gtk.mainiteration()
|
|
|
|
def find_potentials(self,thresh):
|
|
top = gtk.glade.XML(self.glade_file,"message","gramps")
|
|
self.topWin = top.get_widget("message")
|
|
self.progress = top.get_widget("progressbar1")
|
|
|
|
Utils.set_titles(self.topWin,top.get_widget('title'),
|
|
_('Determining possible merges'))
|
|
|
|
index = 0
|
|
|
|
males = {}
|
|
females = {}
|
|
for p1 in self.person_list:
|
|
key = self.gen_key(p1.getPrimaryName().getSurname())
|
|
if p1.getGender() == RelLib.Person.male:
|
|
if males.has_key(key):
|
|
males[key].append(p1.getId())
|
|
else:
|
|
males[key] = [p1.getId()]
|
|
else:
|
|
if females.has_key(key):
|
|
females[key].append(p1.getId())
|
|
else:
|
|
females[key] = [p1.getId()]
|
|
|
|
length = len(self.person_list)
|
|
|
|
num = 0
|
|
for p1 in self.person_list:
|
|
p1key = p1.getId()
|
|
if num % 25 == 0:
|
|
self.progress_update((float(num)/float(length))*100)
|
|
num = num + 1
|
|
|
|
key = self.gen_key(p1.getPrimaryName().getSurname())
|
|
if p1.getGender() == RelLib.Person.male:
|
|
remaining = males[key]
|
|
else:
|
|
remaining = females[key]
|
|
|
|
index = 0
|
|
for p2key in remaining:
|
|
index = index + 1
|
|
if p1key == p2key:
|
|
continue
|
|
p2 = self.db.getPerson(p2key)
|
|
if self.map.has_key(p2key):
|
|
(v,c) = self.map[p2key]
|
|
if v == p1:
|
|
continue
|
|
|
|
chance = self.compare_people(p1,p2)
|
|
if chance >= thresh:
|
|
if self.map.has_key(p1key):
|
|
val = self.map[p1key]
|
|
if val[1] > chance:
|
|
self.map[p1key] = (p2,chance)
|
|
else:
|
|
self.map[p1key] = (p2,chance)
|
|
|
|
self.list = self.map.keys()
|
|
self.list.sort()
|
|
self.length = len(self.list)
|
|
self.topWin.destroy()
|
|
self.dellist = {}
|
|
|
|
def show(self):
|
|
top = gtk.glade.XML(self.glade_file,"mergelist","gramps")
|
|
self.window = top.get_widget("mergelist")
|
|
|
|
Utils.set_titles(self.window, top.get_widget('title'),
|
|
_('Potential Merges'))
|
|
|
|
self.mlist = top.get_widget("mlist")
|
|
top.signal_autoconnect({
|
|
"destroy_passed_object" : Utils.destroy_passed_object,
|
|
"on_do_merge_clicked" : self.on_do_merge_clicked,
|
|
})
|
|
|
|
mtitles = [(_('Rating'),3,75),(_('First Person'),1,200),
|
|
(_('Second Person'),2,200),('',-1,0)]
|
|
self.list = ListModel.ListModel(self.mlist,mtitles,
|
|
event_func=self.on_do_merge_clicked)
|
|
|
|
self.redraw()
|
|
|
|
def redraw(self):
|
|
list = []
|
|
for p1 in self.map.keys():
|
|
if self.dellist.has_key(p1):
|
|
continue
|
|
(p2,c) = self.map[p1]
|
|
p2key = p2.getId()
|
|
if self.dellist.has_key(p2key):
|
|
p2 = self.dellist[p2key]
|
|
if p1 == p2:
|
|
continue
|
|
list.append((c,p1,p2.getId()))
|
|
|
|
self.list.clear()
|
|
for (c,p1,p2) in list:
|
|
c1 = "%5.2f" % c
|
|
c2 = "%5.2f" % (100-c)
|
|
pn1 = self.db.getPerson(p1).getPrimaryName().getName()
|
|
pn2 = self.db.getPerson(p2).getPrimaryName().getName()
|
|
self.list.add([c, pn1, pn2,c2],(p1,p2))
|
|
|
|
def on_do_merge_clicked(self,obj):
|
|
store,iter = self.list.selection.get_selected()
|
|
if not iter:
|
|
return
|
|
|
|
(p1,p2) = self.list.get_object(iter)
|
|
pn1 = self.db.getPerson(p1)
|
|
pn2 = self.db.getPerson(p2)
|
|
MergeData.MergePeople(self.db,pn1,pn2,self.on_update)
|
|
|
|
def on_update(self,p1,p2,old_id):
|
|
self.dellist[p2.getId()] = p1.getId()
|
|
for key in self.dellist.keys():
|
|
if self.dellist[key] == p2.getId():
|
|
self.dellist[key] = p1.getId()
|
|
self.redraw()
|
|
|
|
def update_and_destroy(self,obj):
|
|
self.update(1)
|
|
Utils.destroy_passed_object(obj)
|
|
|
|
def list_reduce(self,list1,list2):
|
|
value = 0
|
|
for name in list1:
|
|
for name2 in list2:
|
|
if is_initial(name) and name[0] == name2[0]:
|
|
value = value + 0.25
|
|
break
|
|
if is_initial(name2) and name2[0] == name[0]:
|
|
value = value + 0.25
|
|
break
|
|
if name == name2:
|
|
value = value + 0.5
|
|
break
|
|
if name[0] == name2[0] and self.name_compare(name,name2):
|
|
value = value + 0.25
|
|
break
|
|
if value == 0:
|
|
return -1
|
|
else:
|
|
return min(value,1)
|
|
|
|
def gen_key(self,val):
|
|
if self.use_soundex:
|
|
try:
|
|
return soundex.soundex(val)
|
|
except UnicodeEncodeError:
|
|
return val
|
|
else:
|
|
return val
|
|
|
|
def name_compare(self,s1,s2):
|
|
if self.use_soundex:
|
|
try:
|
|
return soundex.compare(s1,s2)
|
|
except UnicodeEncodeError:
|
|
return s1 == s2
|
|
else:
|
|
return s1 == s2
|
|
|
|
def date_match(self,date1,date2):
|
|
if date1.getDate() == "" or date2.getDate() == "":
|
|
return 0
|
|
if date1.getDate() == date2.getDate():
|
|
return 1
|
|
|
|
if date1.isRange() or date2.isRange():
|
|
return self.range_compare(date1,date2)
|
|
|
|
date1 = date1.get_start_date()
|
|
date2 = date2.get_start_date()
|
|
|
|
if date1.getYear() == date2.getYear():
|
|
if date1.getMonth() == date2.getMonth():
|
|
return 0.75
|
|
if not date1.getMonthValid() or not date2.getMonthValid():
|
|
return 0.75
|
|
else:
|
|
return -1
|
|
else:
|
|
return -1
|
|
|
|
def range_compare(self,date1,date2):
|
|
if date1.isRange() and date2.isRange():
|
|
if date1.get_start_date() >= date2.get_start_date() and \
|
|
date1.get_start_date() <= date2.get_stop_date() or \
|
|
date2.get_start_date() >= date1.get_start_date() and \
|
|
date2.get_start_date() <= date1.get_stop_date() or \
|
|
date1.get_stop_date() >= date2.get_start_date() and \
|
|
date1.get_stop_date() <= date2.get_stop_date() or \
|
|
date2.get_stop_date() >= date1.get_start_date() and \
|
|
date2.get_stop_date() <= date1.get_stop_date():
|
|
return 0.5
|
|
else:
|
|
return -1
|
|
elif date2.isRange():
|
|
if date1.get_start_date() >= date2.get_start_date() and \
|
|
date1.get_start_date() <= date2.get_stop_date():
|
|
return 0.5
|
|
else:
|
|
return -1
|
|
else:
|
|
if date2.get_start_date() >= date1.get_start_date() and \
|
|
date2.get_start_date() <= date1.get_stop_date():
|
|
return 0.5
|
|
else:
|
|
return -1
|
|
|
|
def name_match(self,name,name1):
|
|
|
|
if not name1 or not name:
|
|
return 0
|
|
|
|
srn1 = name.getSurname()
|
|
sfx1 = name.getSuffix()
|
|
srn2 = name1.getSurname()
|
|
sfx2 = name1.getSuffix()
|
|
|
|
if not self.name_compare(srn1,srn2):
|
|
return -1
|
|
if sfx1 != sfx2:
|
|
if sfx1 != "" and sfx2 != "":
|
|
return -1
|
|
|
|
if name.getFirstName() == name1.getFirstName():
|
|
return 1
|
|
else:
|
|
list1 = string.split(name.getFirstName())
|
|
list2 = string.split(name1.getFirstName())
|
|
|
|
if len(list1) < len(list2):
|
|
return self.list_reduce(list1,list2)
|
|
else:
|
|
return self.list_reduce(list2,list1)
|
|
|
|
def place_match(self,p1,p2):
|
|
if p1 == p2:
|
|
return 1
|
|
|
|
if p1 == None:
|
|
name1 = ""
|
|
else:
|
|
name1 = p1.get_title()
|
|
|
|
if p2 == None:
|
|
name2 = ""
|
|
else:
|
|
name2 = p2.get_title()
|
|
|
|
if name1 == "" or name2 == "":
|
|
return 0
|
|
if name1 == name2:
|
|
return 1
|
|
|
|
list1 = string.split(string.replace(name1,","," "))
|
|
list2 = string.split(string.replace(name2,","," "))
|
|
|
|
value = 0
|
|
for name in list1:
|
|
for name2 in list2:
|
|
if name == name2:
|
|
value = value + 0.5
|
|
break
|
|
if name[0] == name2[0] and self.name_compare(name,name2):
|
|
value = value + 0.25
|
|
break
|
|
if value == 0:
|
|
return -1
|
|
else:
|
|
return min(value,1)
|
|
|
|
def compare_people(self,p1,p2):
|
|
|
|
name1 = p1.getPrimaryName()
|
|
name2 = p2.getPrimaryName()
|
|
|
|
chance = self.name_match(name1,name2)
|
|
if chance == -1 :
|
|
return -1
|
|
|
|
birth1 = p1.getBirth()
|
|
death1 = p1.getDeath()
|
|
birth2 = p2.getBirth()
|
|
death2 = p2.getDeath()
|
|
|
|
value = self.date_match(birth1.getDateObj(),birth2.getDateObj())
|
|
if value == -1 :
|
|
return -1
|
|
chance = chance + value
|
|
|
|
value = self.date_match(death1.getDateObj(),death2.getDateObj())
|
|
if value == -1 :
|
|
return -1
|
|
chance = chance + value
|
|
|
|
value = self.place_match(birth1.getPlace(),birth2.getPlace())
|
|
if value == -1 :
|
|
return -1
|
|
chance = chance + value
|
|
|
|
value = self.place_match(death1.getPlace(),death2.getPlace())
|
|
if value == -1 :
|
|
return -1
|
|
chance = chance + value
|
|
|
|
ancestors = []
|
|
ancestors_of(p1,ancestors)
|
|
if p2 in ancestors:
|
|
return -1
|
|
|
|
ancestors = []
|
|
ancestors_of(p2,ancestors)
|
|
if p1 in ancestors:
|
|
return -1
|
|
|
|
f1 = p1.getMainParents()
|
|
f2 = p2.getMainParents()
|
|
|
|
if f1 and f2:
|
|
dad1 = get_name_obj(f1.getFather())
|
|
dad2 = get_name_obj(f2.getFather())
|
|
|
|
value = self.name_match(dad1,dad2)
|
|
|
|
if value == -1:
|
|
return -1
|
|
|
|
chance = chance + value
|
|
|
|
mom1 = get_name_obj(f1.getMother())
|
|
mom2 = get_name_obj(f2.getMother())
|
|
|
|
value = self.name_match(mom1,mom2)
|
|
if value == -1:
|
|
return -1
|
|
|
|
chance = chance + value
|
|
|
|
for f1 in p1.getFamilyList():
|
|
for f2 in p2.getFamilyList():
|
|
if p1.getGender() == RelLib.Person.female:
|
|
father1 = f1.getFather()
|
|
father2 = f2.getFather()
|
|
if father1 and father2:
|
|
if father1 == father2:
|
|
chance = chance + 1
|
|
else:
|
|
fname1 = get_name_obj(father1)
|
|
fname2 = get_name_obj(father2)
|
|
value = self.name_match(fname1,fname2)
|
|
if value != -1:
|
|
chance = chance + value
|
|
else:
|
|
mother1 = f1.getMother()
|
|
mother2 = f2.getMother()
|
|
if mother1 and mother2:
|
|
if mother1 == mother2:
|
|
chance = chance + 1
|
|
else:
|
|
mname1 = get_name_obj(mother1)
|
|
mname2 = get_name_obj(mother2)
|
|
value = self.name_match(mname1,mname2)
|
|
if value != -1:
|
|
chance = chance + value
|
|
|
|
return chance
|
|
|
|
|
|
def name_of(p):
|
|
if not p:
|
|
return ""
|
|
return "%s (%s)" % ( GrampsCfg.nameof(p),p.getId())
|
|
|
|
def get_name_obj(person):
|
|
if person:
|
|
return person.getPrimaryName()
|
|
else:
|
|
return None
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
#
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
def runTool(database,active_person,callback):
|
|
try:
|
|
Merge(database,callback)
|
|
except:
|
|
import DisplayTrace
|
|
DisplayTrace.DisplayTrace()
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
#
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
def by_id(p1,p2):
|
|
return cmp(p1.getId(),p2.getId())
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
#
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
from Plugins import register_tool
|
|
|
|
register_tool(
|
|
runTool,
|
|
_("Find possible duplicate people"),
|
|
category=_("Database Processing"),
|
|
description=_("Searches the entire database, looking for "
|
|
"individual entries that may represent the same person.")
|
|
)
|
|
|