Speed improvement for the "has common ancestor" filter rule. For a

database with 2300 persons the time to filter goes from 500 seconds to 80. I believe that the difference must be even bigger for larger database. The new algorithm is more or less linear. Each person is only visited once to compute the ancestors, and it does depth first. * src/Filters/Rules/Person/_HasCommonAncestorWith.py svn: r11451
2008-12-11 10:06:16 +00:00
parent 13ad0f858e
commit a2ea6b1e51
1 changed files with 31 additions and 25 deletions
--- a/src/Filters/Rules/Person/_HasCommonAncestorWith.py
+++ b/src/Filters/Rules/Person/_HasCommonAncestorWith.py
@ -51,33 +51,39 @@ class HasCommonAncestorWith(Rule):
    def prepare(self, db):
        self.db = db
-        # Keys in `ancestor_cache' are ancestors of list[0].
+        # For each(!) person we keep track of who their ancestors
-        # We delay the computation of ancestor_cache until the
+        # are, in a set(). So we only have to compute a person's
-        # first use, because it's not uncommon to instantiate
+        # ancestor list once.
-        # this class and not use it.
+        # Start with filling the cache for root person (gramps_id in self.list[0])
        self.ancestor_cache = {}
        self.root_person = db.get_person_from_gramps_id(self.list[0])
        self.add_ancs(db, self.root_person)
    def add_ancs(self, db, person):
        if person.handle not in self.ancestor_cache:
            self.ancestor_cache[person.handle] = set()
        for fam_handle in person.get_parent_family_handle_list():
            fam = db.get_family_from_handle(fam_handle)
            for par_handle in (fam.get_father_handle(), fam.get_mother_handle()):
                if par_handle:
                    par = db.get_person_from_handle(par_handle)
                    if par and par.handle not in self.ancestor_cache:
                        self.add_ancs(db, par)
                    if par:
                        self.ancestor_cache[person.handle].add(par)
                        self.ancestor_cache[person.handle] |= self.ancestor_cache[par.handle]
    def reset(self):
        self.ancestor_cache = {}
-    def init_ancestor_cache(self,db):
+    def has_common_ancestor(self, other):
-        # list[0] is an Id, but we need to pass a Person to for_each_ancestor.
+        if self.ancestor_cache[self.root_person.handle] & self.ancestor_cache[other.handle]:
-        try:
+            return True
-            handle = db.get_person_from_gramps_id(self.list[0]).get_handle()
+        return False
            if handle:
                def init(self, handle): self.ancestor_cache[handle] = 1
                for_each_ancestor(db,[handle],init,self)
        except:
            pass
    def apply(self, db, person):
-        # On the first call, we build the ancestor cache for the
+        if person.handle not in self.ancestor_cache:
-        # reference person.   Then, for each person to test,
+            self.add_ancs(db, person)
-        # we browse his ancestors until we found one in the cache.
+
-        if len(self.ancestor_cache) == 0:
+        return self.has_common_ancestor(person)
            self.init_ancestor_cache(db)
        handle = person.handle
        return for_each_ancestor(
            db,[handle],
            lambda self, handle: handle in self.ancestor_cache,
            self);