Merge pull request #251 from prculley/dupid

Update Tool 'Check & Repair' and 'TestCaseGenerator' to detect and repair duplicate Gramps IDs
This commit is contained in:
Sam Manzi 2016-10-27 19:20:58 +11:00 committed by GitHub
commit e6088271bd
3 changed files with 216 additions and 6 deletions

View File

@ -139,15 +139,16 @@ class ToolControl(unittest.TestCase):
"-y", "-a", "tool", "-p", "name=check") "-y", "-a", "tool", "-p", "name=check")
expect = ["7 broken child/family links were fixed", expect = ["7 broken child/family links were fixed",
"4 broken spouse/family links were fixed", "4 broken spouse/family links were fixed",
"1 place alternate names fixed", "1 place alternate name fixed",
"10 media objects were referenced, but not found", "10 media objects were referenced, but not found",
"References to 10 media objects were kept", "References to 10 media objects were kept",
"3 events were referenced, but not found", "3 events were referenced, but not found",
"1 invalid birth event name was fixed", "1 invalid birth event name was fixed",
"1 invalid death event name was fixed", "1 invalid death event name was fixed",
"2 places were referenced, but not found", "2 places were referenced, but not found",
"11 citations were referenced, but not found", "14 citations were referenced, but not found",
"14 sources were referenced, but not found", "17 sources were referenced, but not found",
"9 Duplicated Gramps IDs fixed",
"7 empty objects removed", "7 empty objects removed",
"1 person objects", "1 person objects",
"1 family objects", "1 family objects",

View File

@ -211,6 +211,7 @@ class Check(tool.BatchTool):
total = checker.family_errors() total = checker.family_errors()
checker.fix_duplicated_grampsid()
checker.check_events() checker.check_events()
checker.check_person_references() checker.check_person_references()
checker.check_family_references() checker.check_family_references()
@ -271,6 +272,7 @@ class CheckIntegrity:
self.empty_objects = defaultdict(list) self.empty_objects = defaultdict(list)
self.replaced_sourceref = [] self.replaced_sourceref = []
self.place_errors = 0 self.place_errors = 0
self.duplicated_gramps_ids = 0
self.text = StringIO() self.text = StringIO()
self.last_img_dir = config.get('behavior.addmedia-image-dir') self.last_img_dir = config.get('behavior.addmedia-image-dir')
self.progress = ProgressMeter(_('Checking Database'), '', self.progress = ProgressMeter(_('Checking Database'), '',
@ -1999,6 +2001,135 @@ class CheckIntegrity:
logging.info(' OK: no broken source citations on mediarefs ' logging.info(' OK: no broken source citations on mediarefs '
'found') 'found')
def fix_duplicated_grampsid(self):
"""
This searches for duplicated Gramps ID within each of the major
classes. It does not check across classes. If duplicates are
found, a new Gramps ID is assigned.
"""
total = (
self.db.get_number_of_citations() +
self.db.get_number_of_events() +
self.db.get_number_of_families() +
self.db.get_number_of_media() +
self.db.get_number_of_notes() +
self.db.get_number_of_people() +
self.db.get_number_of_places() +
self.db.get_number_of_repositories() +
self.db.get_number_of_sources()
)
self.progress.set_pass(_('Looking for Duplicated Gramps ID '
'problems'), total)
logging.info('Looking for Duplicated Gramps ID problems')
gid_list = []
for citation in self.db.iter_citations():
self.progress.step()
ogid = gid = citation.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_citation_gramps_id()
citation.set_gramps_id(gid)
self.db.commit_citation(citation, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
gid_list = []
for event in self.db.iter_events():
self.progress.step()
ogid = gid = event.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_event_gramps_id()
event.set_gramps_id(gid)
self.db.commit_event(event, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
gid_list = []
for family in self.db.iter_families():
self.progress.step()
ogid = gid = family.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_family_gramps_id()
family.set_gramps_id(gid)
self.db.commit_family(family, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
gid_list = []
for media in self.db.iter_media():
self.progress.step()
ogid = gid = media.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_media_gramps_id()
media.set_gramps_id(gid)
self.db.commit_media(media, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
gid_list = []
for note in self.db.iter_notes():
ogid = gid = note.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_note_gramps_id()
note.set_gramps_id(gid)
self.db.commit_note(note, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
gid_list = []
for person in self.db.iter_people():
self.progress.step()
ogid = gid = person.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_person_gramps_id()
person.set_gramps_id(gid)
self.db.commit_person(person, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
gid_list = []
for place in self.db.iter_places():
self.progress.step()
ogid = gid = place.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_place_gramps_id()
place.set_gramps_id(gid)
self.db.commit_place(place, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
gid_list = []
for repository in self.db.iter_repositories():
self.progress.step()
ogid = gid = repository.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_repository_gramps_id()
repository.set_gramps_id(gid)
self.db.commit_repository(repository, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
gid_list = []
for source in self.db.iter_sources():
self.progress.step()
ogid = gid = source.get_gramps_id()
if gid in gid_list:
gid = self.db.find_next_source_gramps_id()
source.set_gramps_id(gid)
self.db.commit_source(source, self.trans)
logging.warning(' FAIL: Duplicated Gramps ID found, '
'Original: "%s" changed to: "%s"', ogid, gid)
self.duplicated_gramps_ids += 1
gid_list.append(gid)
def class_person(self, handle): def class_person(self, handle):
person = Person() person = Person()
person.set_handle(handle) person.set_handle(handle)
@ -2107,6 +2238,7 @@ class CheckIntegrity:
tag_references = len(self.invalid_tag_references) tag_references = len(self.invalid_tag_references)
name_format = len(self.removed_name_format) name_format = len(self.removed_name_format)
replaced_sourcerefs = len(self.replaced_sourceref) replaced_sourcerefs = len(self.replaced_sourceref)
dup_gramps_ids = self.duplicated_gramps_ids
empty_objs = sum(len(obj) for obj in self.empty_objects.values()) empty_objs = sum(len(obj) for obj in self.empty_objects.values())
errors = (photos + efam + blink + plink + slink + rel + errors = (photos + efam + blink + plink + slink + rel +
@ -2114,7 +2246,7 @@ class CheckIntegrity:
person_references + family_references + place_references + person_references + family_references + place_references +
citation_references + repo_references + media_references + citation_references + repo_references + media_references +
note_references + tag_references + name_format + empty_objs + note_references + tag_references + name_format + empty_objs +
invalid_dates + source_references) invalid_dates + source_references + dup_gramps_ids)
if errors == 0: if errors == 0:
if uistate: if uistate:
@ -2231,7 +2363,7 @@ class CheckIntegrity:
# translators: leave all/any {...} untranslated # translators: leave all/any {...} untranslated
ngettext("{quantity} place alternate name fixed\n", ngettext("{quantity} place alternate name fixed\n",
"{quantity} place alternate names fixed\n", "{quantity} place alternate names fixed\n",
rel).format(quantity=self.place_errors) self.place_errors).format(quantity=self.place_errors)
) )
if person_references: if person_references:
@ -2418,6 +2550,14 @@ class CheckIntegrity:
).format(quantity=replaced_sourcerefs) ).format(quantity=replaced_sourcerefs)
) )
if dup_gramps_ids > 0:
self.text.write(
# translators: leave all/any {...} untranslated
ngettext("{quantity} Duplicated Gramps ID fixed\n",
"{quantity} Duplicated Gramps IDs fixed\n",
dup_gramps_ids).format(quantity=dup_gramps_ids)
)
if empty_objs > 0: if empty_objs > 0:
self.text.write(_( self.text.write(_(
"%(empty_obj)d empty objects removed:\n" "%(empty_obj)d empty objects removed:\n"

View File

@ -384,7 +384,7 @@ class TestcaseGenerator(tool.BatchTool):
if self.options_dict['bugs']: if self.options_dict['bugs']:
with self.progress(_('Generating testcases'), with self.progress(_('Generating testcases'),
_('Generating database errors'), _('Generating database errors'),
19) as step: 20) as step:
self.generate_data_errors(step) self.generate_data_errors(step)
if self.options_dict['persons']: if self.options_dict['persons']:
@ -431,6 +431,8 @@ class TestcaseGenerator(tool.BatchTool):
step() step()
self.test_fix_alt_place_names() self.test_fix_alt_place_names()
step() step()
self.test_fix_duplicated_grampsid()
step()
self.test_clean_deleted_name_format() self.test_clean_deleted_name_format()
step() step()
self.test_cleanup_empty_objects() self.test_cleanup_empty_objects()
@ -586,6 +588,73 @@ class TestcaseGenerator(tool.BatchTool):
plac.set_alternative_names(alt_names) plac.set_alternative_names(alt_names)
self.db.add_place(plac, self.trans) self.db.add_place(plac, self.trans)
def test_fix_duplicated_grampsid(self):
"""
Create some duplicate Gramps IDs in various object types
This tests Check.fix_duplicated_grampsid()
"""
with DbTxn(_("Testcase generator step %d") % self.transaction_count,
self.db) as self.trans:
self.transaction_count += 1
for dummy in range(0, 2):
cit = Citation()
self.fill_object(cit)
cit.set_gramps_id("C1001")
self.db.add_citation(cit, self.trans)
evt = Event()
self.fill_object(evt)
evt.set_gramps_id("E1001")
self.db.add_event(evt, self.trans)
person1_h = self.generate_person(
Person.MALE, "Smith",
"Dup Gramps ID test F1001")
person2_h = self.generate_person(Person.FEMALE, "Jones", None)
fam = Family()
fam.set_father_handle(person1_h)
fam.set_mother_handle(person2_h)
fam.set_relationship((FamilyRelType.MARRIED, ''))
fam.set_gramps_id("F1001")
fam_h = self.db.add_family(fam, self.trans)
person1 = self.db.get_person_from_handle(person1_h)
person1.add_family_handle(fam_h)
self.db.commit_person(person1, self.trans)
person2 = self.db.get_person_from_handle(person2_h)
person2.add_family_handle(fam_h)
self.db.commit_person(person2, self.trans)
med = Media()
self.fill_object(med)
med.set_gramps_id("O1001")
self.db.add_media(med, self.trans)
note = Note()
self.fill_object(note)
note.set_gramps_id("N1001")
self.db.add_note(note, self.trans)
person1_h = self.generate_person(Person.MALE, "Smith",
"Dup GID test GID I1001")
person1 = self.db.get_person_from_handle(person1_h)
person1.set_gramps_id("I1001")
self.db.commit_person(person1, self.trans)
place = Place()
self.fill_object(place)
place.set_gramps_id("P1001")
self.db.add_place(place, self.trans)
rep = Repository()
self.fill_object(rep)
rep.set_gramps_id("R1001")
self.db.add_repository(rep, self.trans)
src = Source()
self.fill_object(src)
src.set_gramps_id("S1001")
self.db.add_source(src, self.trans)
def test_cleanup_missing_photos(self): def test_cleanup_missing_photos(self):
pass pass