gramps/gramps2/src/plugins/Merge.py

612 lines
19 KiB
Python
Raw Normal View History

2002-10-20 19:55:16 +05:30
#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2000-2004 Donald N. Allingham
2002-10-20 19:55:16 +05:30
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id$
2002-10-20 19:55:16 +05:30
"Database Processing/Merge people"
#-------------------------------------------------------------------------
#
# GRAMPS modules
#
#-------------------------------------------------------------------------
2002-10-20 19:55:16 +05:30
import RelLib
import Utils
import soundex
import GrampsCfg
import ListModel
import MergeData
2003-08-17 07:44:33 +05:30
from gettext import gettext as _
2002-10-20 19:55:16 +05:30
#-------------------------------------------------------------------------
#
# standard python models
#
#-------------------------------------------------------------------------
2002-10-20 19:55:16 +05:30
import string
import os
#-------------------------------------------------------------------------
#
# GNOME libraries
#
#-------------------------------------------------------------------------
2002-10-20 19:55:16 +05:30
from gnome.ui import *
import gtk
import gtk.glade
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
def is_initial(name):
if len(name) > 2:
return 0
elif len(name) == 2:
if name[0] == name[0].upper() and name[1] == '.':
2002-10-20 19:55:16 +05:30
return 1
else:
return name[0] == name[0].upper()
2002-10-20 19:55:16 +05:30
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
2002-10-20 19:55:16 +05:30
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
class Merge:
def __init__(self,database,callback):
self.db = database
self.map = {}
self.list = []
self.index = 0
self.merger = None
self.mergee = None
self.removed = {}
self.update = callback
self.use_soundex = 1
self.family_list = database.get_family_keys()[:]
self.person_list = database.get_person_keys()[:]
2002-10-20 19:55:16 +05:30
base = os.path.dirname(__file__)
self.glade_file = "%s/%s" % (base,"merge.glade")
2003-08-17 07:44:33 +05:30
top = gtk.glade.XML(self.glade_file,"dialog","gramps")
2002-10-20 19:55:16 +05:30
my_menu = gtk.Menu()
item = gtk.MenuItem(_("Low"))
item.set_data("v",0.25)
item.show()
my_menu.append(item)
item = gtk.MenuItem(_("Medium"))
item.set_data("v",1.0)
item.show()
my_menu.append(item)
item = gtk.MenuItem(_("High"))
item.set_data("v",2.0)
item.show()
my_menu.append(item)
self.soundex_obj = top.get_widget("soundex")
self.menu = top.get_widget("menu")
self.menu.set_menu(my_menu)
Utils.set_titles(top.get_widget('dialog'), top.get_widget('title'),
_('Merge people'))
2002-10-20 19:55:16 +05:30
top.signal_autoconnect({
"on_merge_ok_clicked" : self.on_merge_ok_clicked,
"destroy_passed_object" : Utils.destroy_passed_object
})
def ancestors_of(self,p1_id,id_list):
if (not p1_id) or (p1_id in id_list):
return
id_list.append(p1_id)
p1 = self.db.find_person_from_id(p1_id)
f1_id = p1.get_main_parents_family_id()
if f1_id:
f1 = self.db.find_family_from_id(f1_id)
self.ancestors_of(f1.get_father_id(),id_list)
self.ancestors_of(f1.get_mother_id(),id_list)
2002-10-20 19:55:16 +05:30
def on_merge_ok_clicked(self,obj):
active = self.menu.get_menu().get_active().get_data("v")
self.use_soundex = self.soundex_obj.get_active()
Utils.destroy_passed_object(obj)
self.find_potentials(active)
self.show()
def progress_update(self,val):
self.progress.set_fraction(val/100.0)
while gtk.events_pending():
gtk.main_iteration()
2002-10-20 19:55:16 +05:30
def find_potentials(self,thresh):
2003-08-17 07:44:33 +05:30
top = gtk.glade.XML(self.glade_file,"message","gramps")
2002-10-20 19:55:16 +05:30
self.topWin = top.get_widget("message")
self.progress = top.get_widget("progressbar1")
Utils.set_titles(self.topWin,top.get_widget('title'),
_('Determining possible merges'))
2002-10-20 19:55:16 +05:30
index = 0
males = {}
females = {}
for p1_id in self.person_list:
p1 = self.db.find_person_from_id(p1_id)
key = self.gen_key(p1.get_primary_name().get_surname())
if p1.get_gender() == RelLib.Person.male:
2002-10-20 19:55:16 +05:30
if males.has_key(key):
males[key].append(p1_id)
2002-10-20 19:55:16 +05:30
else:
males[key] = [p1_id]
2002-10-20 19:55:16 +05:30
else:
if females.has_key(key):
females[key].append(p1_id)
2002-10-20 19:55:16 +05:30
else:
females[key] = [p1_id]
2002-10-20 19:55:16 +05:30
length = len(self.person_list)
num = 0
for p1key in self.person_list:
p1 = self.db.find_person_from_id(p1key)
2002-10-20 19:55:16 +05:30
if num % 25 == 0:
self.progress_update((float(num)/float(length))*100)
num = num + 1
key = self.gen_key(p1.get_primary_name().get_surname())
if p1.get_gender() == RelLib.Person.male:
2002-10-20 19:55:16 +05:30
remaining = males[key]
else:
remaining = females[key]
index = 0
for p2key in remaining:
2002-10-20 19:55:16 +05:30
index = index + 1
if p1key == p2key:
2002-10-20 19:55:16 +05:30
continue
p2 = self.db.find_person_from_id(p2key)
if self.map.has_key(p2key):
(v,c) = self.map[p2key]
if v == p1key:
2002-10-20 19:55:16 +05:30
continue
chance = self.compare_people(p1,p2)
if chance >= thresh:
if self.map.has_key(p1key):
val = self.map[p1key]
2002-10-20 19:55:16 +05:30
if val[1] > chance:
self.map[p1key] = (p2key,chance)
2002-10-20 19:55:16 +05:30
else:
self.map[p1key] = (p2key,chance)
2002-10-20 19:55:16 +05:30
self.list = self.map.keys()
self.list.sort()
2002-10-20 19:55:16 +05:30
self.length = len(self.list)
self.topWin.destroy()
self.dellist = {}
def show(self):
2003-08-17 07:44:33 +05:30
top = gtk.glade.XML(self.glade_file,"mergelist","gramps")
2002-10-20 19:55:16 +05:30
self.window = top.get_widget("mergelist")
Utils.set_titles(self.window, top.get_widget('title'),
_('Potential Merges'))
2002-10-20 19:55:16 +05:30
self.mlist = top.get_widget("mlist")
top.signal_autoconnect({
"destroy_passed_object" : Utils.destroy_passed_object,
"on_do_merge_clicked" : self.on_do_merge_clicked,
})
mtitles = [(_('Rating'),3,75),(_('First Person'),1,200),
(_('Second Person'),2,200),('',-1,0)]
self.list = ListModel.ListModel(self.mlist,mtitles,
event_func=self.on_do_merge_clicked)
2002-10-20 19:55:16 +05:30
self.redraw()
def redraw(self):
list = []
for p1key in self.map.keys():
if self.dellist.has_key(p1key):
2002-10-20 19:55:16 +05:30
continue
(p2key,c) = self.map[p1key]
if p1key == p2key:
2002-10-20 19:55:16 +05:30
continue
list.append((c,p1key,p2key))
2002-10-20 19:55:16 +05:30
self.list.clear()
for (c,p1key,p2key) in list:
c1 = "%5.2f" % c
c2 = "%5.2f" % (100-c)
pn1 = self.db.find_person_from_id(p1key).get_primary_name().get_name()
pn2 = self.db.find_person_from_id(p2key).get_primary_name().get_name()
self.list.add([c, pn1, pn2,c2],(p1key,p2key))
2002-10-20 19:55:16 +05:30
def on_do_merge_clicked(self,obj):
store,iter = self.list.selection.get_selected()
if not iter:
2002-10-20 19:55:16 +05:30
return
(p1,p2) = self.list.get_object(iter)
pn1 = self.db.find_person_from_id(p1)
pn2 = self.db.find_person_from_id(p2)
MergeData.MergePeople(self.db,pn1,pn2,self.on_update)
2002-10-20 19:55:16 +05:30
def on_update(self,p1_id,p2_id,old_id):
self.dellist[p2_id] = p1_id
2002-10-20 19:55:16 +05:30
for key in self.dellist.keys():
if self.dellist[key] == p2_id:
self.dellist[key] = p1_id
2002-10-20 19:55:16 +05:30
self.redraw()
def update_and_destroy(self,obj):
self.update(1)
Utils.destroy_passed_object(obj)
def list_reduce(self,list1,list2):
value = 0
for name in list1:
for name2 in list2:
if is_initial(name) and name[0] == name2[0]:
value = value + 0.25
break
if is_initial(name2) and name2[0] == name[0]:
value = value + 0.25
break
if name == name2:
value = value + 0.5
break
if name[0] == name2[0] and self.name_compare(name,name2):
value = value + 0.25
break
if value == 0:
return -1
else:
return min(value,1)
def gen_key(self,val):
if self.use_soundex:
try:
return soundex.soundex(val)
except UnicodeEncodeError:
return val
2002-10-20 19:55:16 +05:30
else:
return val
def name_compare(self,s1,s2):
if self.use_soundex:
try:
return soundex.compare(s1,s2)
except UnicodeEncodeError:
return s1 == s2
2002-10-20 19:55:16 +05:30
else:
return s1 == s2
def date_match(self,date1,date2):
if date1.get_date() == "" or date2.get_date() == "":
2002-10-20 19:55:16 +05:30
return 0
if date1.get_date() == date2.get_date():
2002-10-20 19:55:16 +05:30
return 1
if date1.is_range() or date2.is_range():
2002-10-20 19:55:16 +05:30
return self.range_compare(date1,date2)
date1 = date1.get_start_date()
date2 = date2.get_start_date()
if date1.get_year() == date2.get_year():
if date1.get_month() == date2.get_month():
2002-10-20 19:55:16 +05:30
return 0.75
if not date1.get_month_valid() or not date2.get_month_valid():
2002-10-20 19:55:16 +05:30
return 0.75
else:
return -1
else:
return -1
def range_compare(self,date1,date2):
if date1.is_range() and date2.is_range():
2002-10-20 19:55:16 +05:30
if date1.get_start_date() >= date2.get_start_date() and \
date1.get_start_date() <= date2.get_stop_date() or \
date2.get_start_date() >= date1.get_start_date() and \
date2.get_start_date() <= date1.get_stop_date() or \
date1.get_stop_date() >= date2.get_start_date() and \
date1.get_stop_date() <= date2.get_stop_date() or \
date2.get_stop_date() >= date1.get_start_date() and \
date2.get_stop_date() <= date1.get_stop_date():
return 0.5
else:
return -1
elif date2.is_range():
2002-10-20 19:55:16 +05:30
if date1.get_start_date() >= date2.get_start_date() and \
date1.get_start_date() <= date2.get_stop_date():
return 0.5
else:
return -1
else:
if date2.get_start_date() >= date1.get_start_date() and \
date2.get_start_date() <= date1.get_stop_date():
return 0.5
else:
return -1
def name_match(self,name,name1):
if not name1 or not name:
return 0
srn1 = name.get_surname()
sfx1 = name.get_suffix()
srn2 = name1.get_surname()
sfx2 = name1.get_suffix()
2002-10-20 19:55:16 +05:30
if not self.name_compare(srn1,srn2):
return -1
if sfx1 != sfx2:
if sfx1 != "" and sfx2 != "":
return -1
if name.get_first_name() == name1.get_first_name():
2002-10-20 19:55:16 +05:30
return 1
else:
list1 = string.split(name.get_first_name())
list2 = string.split(name1.get_first_name())
2002-10-20 19:55:16 +05:30
if len(list1) < len(list2):
return self.list_reduce(list1,list2)
else:
return self.list_reduce(list2,list1)
def place_match(self,p1_id,p2_id):
if p1_id == p2_id:
2002-10-20 19:55:16 +05:30
return 1
if not p1_id:
2002-10-20 19:55:16 +05:30
name1 = ""
else:
p1 = self.db.find_place_from_id(p1_id)
2002-10-20 19:55:16 +05:30
name1 = p1.get_title()
if not p2_id:
2002-10-20 19:55:16 +05:30
name2 = ""
else:
p2 = self.db.find_place_from_id(p2_id)
2002-10-20 19:55:16 +05:30
name2 = p2.get_title()
if not (name1 and name2):
2002-10-20 19:55:16 +05:30
return 0
if name1 == name2:
return 1
list1 = string.split(string.replace(name1,","," "))
list2 = string.split(string.replace(name2,","," "))
value = 0
for name in list1:
for name2 in list2:
if name == name2:
value = value + 0.5
break
if name[0] == name2[0] and self.name_compare(name,name2):
value = value + 0.25
break
if value == 0:
return -1
else:
return min(value,1)
def compare_people(self,p1,p2):
name1 = p1.get_primary_name()
name2 = p2.get_primary_name()
2002-10-20 19:55:16 +05:30
chance = self.name_match(name1,name2)
if chance == -1 :
return -1
birth1_id = p1.get_birth_id()
if birth1_id:
birth1 = self.db.find_event_from_id(birth1_id)
else:
birth1 = RelLib.Event()
death1_id = p1.get_death_id()
if death1_id:
death1 = self.db.find_event_from_id(death1_id)
else:
death1 = RelLib.Event()
birth2_id = p2.get_birth_id()
if birth2_id:
birth2 = self.db.find_event_from_id(birth2_id)
else:
birth2 = RelLib.Event()
death2_id = p2.get_death_id()
if death2_id:
death2 = self.db.find_event_from_id(death2_id)
else:
death2 = RelLib.Event()
2002-10-20 19:55:16 +05:30
value = self.date_match(birth1.get_date_object(),birth2.get_date_object())
2002-10-20 19:55:16 +05:30
if value == -1 :
return -1
chance = chance + value
value = self.date_match(death1.get_date_object(),death2.get_date_object())
2002-10-20 19:55:16 +05:30
if value == -1 :
return -1
chance = chance + value
value = self.place_match(birth1.get_place_id(),birth2.get_place_id())
2002-10-20 19:55:16 +05:30
if value == -1 :
return -1
chance = chance + value
value = self.place_match(death1.get_place_id(),death2.get_place_id())
2002-10-20 19:55:16 +05:30
if value == -1 :
return -1
chance = chance + value
ancestors = []
self.ancestors_of(p1.get_id(),ancestors)
if p2.get_id() in ancestors:
2002-10-20 19:55:16 +05:30
return -1
ancestors = []
self.ancestors_of(p2.get_id(),ancestors)
if p1.get_id() in ancestors:
2002-10-20 19:55:16 +05:30
return -1
f1_id = p1.get_main_parents_family_id()
f2_id = p2.get_main_parents_family_id()
if f1_id and f2_id:
f1 = self.db.find_family_from_id(f1_id)
f2 = self.db.find_family_from_id(f2_id)
dad1_id = f1.get_father_id()
if dad1_id:
dad1 = get_name_obj(self.db.find_person_from_id(dad1_id))
else:
dad1 = None
dad2_id = f2.get_father_id()
if dad2_id:
dad2 = get_name_obj(self.db.find_person_from_id(dad2_id))
else:
dad2 = None
2002-10-20 19:55:16 +05:30
value = self.name_match(dad1,dad2)
if value == -1:
return -1
chance = chance + value
mom1_id = f1.get_mother_id()
if mom1_id:
mom1 = get_name_obj(self.db.find_person_from_id(mom1_id))
else:
mom1 = None
mom2_id = f2.get_mother_id()
if mom2_id:
mom2 = get_name_obj(self.db.find_person_from_id(mom2_id))
else:
mom2 = None
2002-10-20 19:55:16 +05:30
value = self.name_match(mom1,mom2)
if value == -1:
return -1
chance = chance + value
for f1_id in p1.get_family_id_list():
f1 = self.db.find_family_from_id(f1_id)
for f2_id in p2.get_family_id_list():
f2 = self.db.find_family_from_id(f2_id)
if p1.get_gender() == RelLib.Person.female:
father1_id = f1.get_father_id()
father2_id = f2.get_father_id()
if father1_id and father2_id:
if father1_id == father2_id:
2002-10-20 19:55:16 +05:30
chance = chance + 1
else:
father1 = self.db.find_person_from_id(father1_id)
father2 = self.db.find_person_from_id(father2_id)
2002-10-20 19:55:16 +05:30
fname1 = get_name_obj(father1)
fname2 = get_name_obj(father2)
value = self.name_match(fname1,fname2)
if value != -1:
chance = chance + value
else:
mother1_id = f1.get_mother_id()
mother2_id = f2.get_mother_id()
if mother1_id and mother2_id:
if mother1_id == mother2_id:
2002-10-20 19:55:16 +05:30
chance = chance + 1
else:
mother1 = self.db.find_person_from_id(mother1_id)
mother2 = self.db.find_person_from_id(mother2_id)
2002-10-20 19:55:16 +05:30
mname1 = get_name_obj(mother1)
mname2 = get_name_obj(mother2)
value = self.name_match(mname1,mname2)
if value != -1:
chance = chance + value
return chance
def name_of(p):
if not p:
return ""
return "%s (%s)" % ( GrampsCfg.nameof(p),p.get_id())
2002-10-20 19:55:16 +05:30
def get_name_obj(person):
if person:
return person.get_primary_name()
2002-10-20 19:55:16 +05:30
else:
return None
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
def runTool(database,active_person,callback):
try:
Merge(database,callback)
except:
import DisplayTrace
DisplayTrace.DisplayTrace()
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
def by_id(p1,p2):
return cmp(p1.get_id(),p2.get_id())
2002-10-20 19:55:16 +05:30
#-------------------------------------------------------------------------
#
#
#
#-------------------------------------------------------------------------
from Plugins import register_tool
register_tool(
runTool,
_("Find possible duplicate people"),
category=_("Database Processing"),
description=_("Searches the entire database, looking for "
"individual entries that may represent the same person.")
2002-10-20 19:55:16 +05:30
)