Exportgedcom and importgedcom. 0006382: ADDR tag in GEDCOM export does not contain a full address. On export, all the elements of the structured address are output as ADDR/CONT. On import the structured address takes precedence (as at present), but a warning is only given if an element in the free-form address is missing from the structured address.

svn: r21608
This commit is contained in:
Tim G L Lyons 2013-03-11 18:44:01 +00:00
parent ce1c01f963
commit d2dbe4427d
2 changed files with 188 additions and 156 deletions

View File

@ -364,39 +364,19 @@ class GedcomWriter(UpdateCallback):
"""
owner = self.dbase.get_researcher()
name = owner.get_name()
addr = owner.get_address()
adr2 = owner.get_locality()
city = owner.get_city()
state = owner.get_state()
ctry = owner.get_country()
post = owner.get_postal_code()
phon = owner.get_phone()
mail = owner.get_email()
if not name :
name = u'Not Provided'
if not addr :
addr = u'Not Provided'
self.__writeln(0, "@SUBM@", "SUBM")
self.__writeln(1, "NAME", name)
self.__writeln(1, "ADDR", addr)
if city and state and post:
self.__writeln(2, "CONT", "%s, %s %s" % (city, state, post))
else:
self.__writeln(2, "CONT", u"Not Provided")
if addr:
self.__writeln(2, "ADR1", addr)
if adr2:
self.__writeln(2, "ADR2", adr2)
if city:
self.__writeln(2, "CITY", city)
if state:
self.__writeln(2, "STAE", state)
if post:
self.__writeln(2, "POST", post)
if ctry:
self.__writeln(2, "CTRY", ctry)
# Researcher is a sub-type of LocationBase, so get_city etc. which are
# used in __write_addr work fine. However, the database owner street is
# stored in address, so we need to temporarily copy it into street so
# __write_addr works properly
owner.set_street(owner.get_address())
self.__write_addr(1, owner)
if phon:
self.__writeln(1, "PHON", phon)
if mail:
@ -687,19 +667,7 @@ class GedcomWriter(UpdateCallback):
for addr in person.get_address_list():
self.__writeln(1, 'RESI')
self.__date(2, addr.get_date_object())
self.__writeln(2, "ADDR", addr.get_street())
if addr.get_street():
self.__writeln(3, 'ADR1', addr.get_street())
if addr.get_locality():
self.__writeln(3, 'ADR2', addr.get_locality())
if addr.get_city():
self.__writeln(3, 'CITY', addr.get_city())
if addr.get_state():
self.__writeln(3, 'STAE', addr.get_state())
if addr.get_postal_code():
self.__writeln(3, 'POST', addr.get_postal_code())
if addr.get_country():
self.__writeln(3, 'CTRY', addr.get_country())
self.__write_addr(2, addr)
if addr.get_phone():
self.__writeln(2, 'PHON', addr.get_phone())
@ -1017,19 +985,7 @@ class GedcomWriter(UpdateCallback):
if repo.get_name():
self.__writeln(1, 'NAME', repo.get_name())
for addr in repo.get_address_list():
self.__writeln(1, "ADDR", addr.get_street())
if addr.get_street():
self.__writeln(2, 'ADR1', addr.get_street())
if addr.get_locality():
self.__writeln(2, 'ADR2', addr.get_locality())
if addr.get_city():
self.__writeln(2, 'CITY', addr.get_city())
if addr.get_state():
self.__writeln(2, 'STAE', addr.get_state())
if addr.get_postal_code():
self.__writeln(2, 'POST', addr.get_postal_code())
if addr.get_country():
self.__writeln(2, 'CTRY', addr.get_country())
self.__write_addr(1, addr)
if addr.get_phone():
self.__writeln(1, 'PHON', addr.get_phone())
for url in repo.get_url_list():
@ -1412,24 +1368,64 @@ class GedcomWriter(UpdateCallback):
# http://homepages.rootsweb.com/~pmcbride/gedcom/55gcch2.htm#EVENT_DETAIL
location = place.get_main_location()
if location and not location.is_empty():
self.__writeln(level, "ADDR", location.get_street())
if location.get_street():
self.__writeln(level + 1, 'ADR1', location.get_street())
if location.get_locality():
self.__writeln(level + 1, 'ADR2', location.get_locality())
if location.get_city():
self.__writeln(level + 1, 'CITY', location.get_city())
if location.get_state():
self.__writeln(level + 1, 'STAE', location.get_state())
if location.get_postal_code():
self.__writeln(level + 1, 'POST', location.get_postal_code())
if location.get_country():
self.__writeln(level + 1, 'CTRY', location.get_country())
self.__write_addr(level, location)
if location.get_phone():
self.__writeln(level, 'PHON', location.get_phone())
self.__note_references(place.get_note_list(), level+1)
def __write_addr(self, level, addr):
"""
n ADDR <ADDRESS_LINE> {0:1}
+1 CONT <ADDRESS_LINE> {0:M}
+1 ADR1 <ADDRESS_LINE1> {0:1} (Street)
+1 ADR2 <ADDRESS_LINE2> {0:1} (Locality)
+1 CITY <ADDRESS_CITY> {0:1}
+1 STAE <ADDRESS_STATE> {0:1}
+1 POST <ADDRESS_POSTAL_CODE> {0:1}
+1 CTRY <ADDRESS_COUNTRY> {0:1}
This is done along the lines suggested by Tamura Jones in
http://www.tamurajones.net/GEDCOMADDR.xhtml as a result of bug 6382.
"GEDCOM writers should always use the structured address format,
and it use it for all addresses, including the submitter address and
their own corporate address." "Vendors that want their product to pass
even the strictest GEDCOM validation, should include export to the old
free-form format..." [This goes on to say the free-form should be an
option, but we have not made it an option in Gramps].
@param level: The level number for the ADDR tag
@type level: Integer
@param addr: The location or address
@type addr: [a super-type of] LocationBase
"""
if addr.get_street() or addr.get_locality() or addr.get_city() or \
addr.get_state() or addr.get_postal_code or addr.get_country():
self.__writeln(level, 'ADDR', addr.get_street())
if addr.get_locality():
self.__writeln(level + 1, 'CONT', addr.get_locality())
if addr.get_city():
self.__writeln(level + 1, 'CONT', addr.get_city())
if addr.get_state():
self.__writeln(level + 1, 'CONT', addr.get_state())
if addr.get_postal_code():
self.__writeln(level + 1, 'CONT', addr.get_postal_code())
if addr.get_country():
self.__writeln(level + 1, 'CONT', addr.get_country())
if addr.get_street():
self.__writeln(level + 1, 'ADR1', addr.get_street())
if addr.get_locality():
self.__writeln(level + 1, 'ADR2', addr.get_locality())
if addr.get_city():
self.__writeln(level + 1, 'CITY', addr.get_city())
if addr.get_state():
self.__writeln(level + 1, 'STAE', addr.get_state())
if addr.get_postal_code():
self.__writeln(level + 1, 'POST', addr.get_postal_code())
if addr.get_country():
self.__writeln(level + 1, 'CTRY', addr.get_country())
#-------------------------------------------------------------------------
#
#

View File

@ -98,6 +98,7 @@ from xml.parsers.expat import ParserCreate
from collections import defaultdict
import cStringIO
from urlparse import urlparse
import string
#------------------------------------------------------------------------
#
@ -629,9 +630,6 @@ DATE_QUALITY = {
# regular expressions
#
#-------------------------------------------------------------------------
ADDR_RE = re.compile('(.+)([\n\r]+)(.+)\s*, (.+)\s+(\d+)\s*(.*)')
ADDR2_RE = re.compile('(.+)([\n\r]+)(.+)\s*, (.+)\s+(\d+)')
ADDR3_RE = re.compile('(.+)([\n\r]+)(.+)\s*, (.+)')
NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$")
CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
@ -2087,6 +2085,7 @@ class GedcomParser(UpdateCallback):
TOKEN_SOUR : self.__event_source,
TOKEN_PLAC : self.__event_place,
TOKEN_ADDR : self.__event_addr,
TOKEN_PHON : self.__event_phon,
TOKEN_CAUS : self.__event_cause,
TOKEN_AGNC : self.__event_agnc,
TOKEN_AGE : self.__event_age,
@ -2198,14 +2197,15 @@ class GedcomParser(UpdateCallback):
self.func_list.append(self.object_parse_tbl)
self.parse_loc_tbl = {
TOKEN_ADDR : self.__location_addr,
TOKEN_ADR1 : self.__location_adr1,
TOKEN_ADR2 : self.__location_adr2,
TOKEN_DATE : self.__location_date,
TOKEN_CITY : self.__location_city,
TOKEN_STAE : self.__location_stae,
TOKEN_POST : self.__location_post,
TOKEN_CTRY : self.__location_ctry,
# Not legal GEDCOM - not clear why these are included at this level
TOKEN_ADDR : self.__ignore,
TOKEN_DATE : self.__location_date,
TOKEN_NOTE : self.__location_note,
TOKEN_RNOTE : self.__location_note,
TOKEN__LOC : self.__ignore,
@ -2263,8 +2263,7 @@ class GedcomParser(UpdateCallback):
# +1 <<CHANGE_DATE>> {0:1}
TOKEN_CHAN : self.__family_chan,
TOKEN_ENDL : self.__ignore,
TOKEN_ADDR : self.__family_addr,
TOKEN_ADDR : self.__ignore,
TOKEN_RIN : self.__family_cust_attr,
TOKEN_SUBM : self.__ignore,
TOKEN_ATTR : self.__family_attr,
@ -3120,6 +3119,69 @@ class GedcomParser(UpdateCallback):
self.__add_msg(txt)
self.number_of_errors -= 1
def __merge_address(self, free_form_address, addr, line, state):
"""
Merge freeform and structured addrssses.
n ADDR <ADDRESS_LINE> {0:1}
+1 CONT <ADDRESS_LINE> {0:M}
+1 ADR1 <ADDRESS_LINE1> {0:1} (Street)
+1 ADR2 <ADDRESS_LINE2> {0:1} (Locality)
+1 CITY <ADDRESS_CITY> {0:1}
+1 STAE <ADDRESS_STATE> {0:1}
+1 POST <ADDRESS_POSTAL_CODE> {0:1}
+1 CTRY <ADDRESS_COUNTRY> {0:1}
This is done along the lines suggested by Tamura Jones in
http://www.tamurajones.net/GEDCOMADDR.xhtml as a result of bug 6382.
"When a GEDCOM reader encounters a double address, it should read the
structured address. ... A GEDCOM reader that does verify that the
addresses are the same should issue an error if they are not".
This is called for SUBMitter addresses (__subm_addr), INDIvidual
addresses (__person_addr), REPO addresses and HEADer corp address
(__repo_address) and EVENt addresses (__event_adr).
The structured address (if any) will have been accumulated into an
object of type LocationBase, which will either be a Location, or an
Address object.
If ADDR is provided, but none of ADR1, ADR2, CITY, STAE, or POST (not
CTRY), then Street is set to the freeform address. N.B. this is a change
for Repository addresses and HEADer Corp address where previously the
free-form address was deconstrucated into different structured
components. N.B. PAF provides a free-form address and a country, so this
allows for that case.
If both forms of address are provided, then the structured address is
used, and if the ADDR/CONT contains anything not in the structured
address, a warning is issued.
If just ADR1, ADR2, CITY, STAE, POST or CTRY are provided (this is not
actually legal GEDCOM symtax, but may be possible by GEDCOM extensions)
then just the structrued address is used.
"""
if not (addr.get_street() or addr.get_locality() or
addr.get_city() or addr.get_state() or
addr.get_postal_code()):
addr.set_street(free_form_address)
else:
# structured address provided
addr_list = free_form_address.split("\n")
str_list = []
for func in (addr.get_street(), addr.get_locality(),
addr.get_city(), addr.get_state(),
addr.get_postal_code(), addr.get_country()):
str_list += [i.strip(',' + string.whitespace) for i in func.split("\n")]
for elmn in addr_list:
if elmn.strip(',' + string.whitespace) not in str_list:
# message means that the element %s was ignored, but
# expressed the wrong way round because the message is
# truncated for output
self.__add_msg(_("ADDR element ignored '%s'"
% elmn), line, state)
# The free-form address ADDR is discarded
def __parse_trailer(self):
"""
Looks for the expected TRLR token
@ -3756,7 +3818,7 @@ class GedcomParser(UpdateCallback):
def __person_addr(self, line, state):
"""
Parses the Address structure
Parses the INDIvidual <ADDRESS_STRUCTURE>
n ADDR <ADDRESS_LINE> {0:1}
+1 CONT <ADDRESS_LINE> {0:M}
@ -3766,21 +3828,23 @@ class GedcomParser(UpdateCallback):
+1 STAE <ADDRESS_STATE> {0:1}
+1 POST <ADDRESS_POSTAL_CODE> {0:1}
+1 CTRY <ADDRESS_COUNTRY> {0:1}
n PHON <PHONE_NUMBER> {0:3}
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
sub_state = CurrentState()
sub_state.level = state.level+1
free_form = line.data
sub_state = CurrentState(level=state.level + 1)
sub_state.addr = gen.lib.Address()
sub_state.addr.set_street(line.data)
state.person.add_address(sub_state.addr)
self.__parse_level(sub_state, self.parse_addr_tbl, self.__ignore)
state.msg += sub_state.msg
self.__merge_address(free_form, sub_state.addr, line, state)
state.person.add_address(sub_state.addr)
def __person_phon(self, line, state):
"""
n PHON <PHONE_NUMBER> {0:3}
@ -4871,17 +4935,6 @@ class GedcomParser(UpdateCallback):
"""
self.__parse_change(line, state.family, state.level+1, state)
def __family_addr(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
state.addr = gen.lib.Address()
state.addr.set_street(line.data)
self.__parse_level(state, self.parse_addr_tbl, self.__ignore)
def __family_attr(self, line, state):
"""
@param line: The current line in GedLine format
@ -5232,20 +5285,34 @@ class GedcomParser(UpdateCallback):
def __event_addr(self, line, state):
"""
Parses the EVENt <ADDRESS_STRUCTURE>
n ADDR <ADDRESS_LINE> {0:1}
+1 CONT <ADDRESS_LINE> {0:M}
+1 ADR1 <ADDRESS_LINE1> {0:1} (Street)
+1 ADR2 <ADDRESS_LINE2> {0:1} (Locality)
+1 CITY <ADDRESS_CITY> {0:1}
+1 STAE <ADDRESS_STATE> {0:1}
+1 POST <ADDRESS_POSTAL_CODE> {0:1}
+1 CTRY <ADDRESS_COUNTRY> {0:1}
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
free_form = line.data
sub_state = CurrentState(level=state.level+1)
sub_state.location = gen.lib.Location()
sub_state.location.set_street(line.data)
sub_state.note = []
sub_state.event = state.event
self.__parse_level(sub_state, self.parse_loc_tbl, self.__undefined)
state.msg += sub_state.msg
self.__merge_address(free_form, sub_state.location, line, state)
location = sub_state.location
note_list = sub_state.note
@ -5646,6 +5713,7 @@ class GedcomParser(UpdateCallback):
@type state: CurrentState
"""
# The ADDR may already have been parsed by the level above
assert state.addr.get_street() == ""
if state.addr.get_street() != "":
self.__add_msg(_("Warn: ADDR overwritten"), line, state)
state.addr.set_street(line.data)
@ -5916,6 +5984,7 @@ class GedcomParser(UpdateCallback):
state = CurrentState()
state.source = self.__find_or_create_source(self.sid_map[name])
# SOURce with the given gramps_id had no title
state.source.set_title(_("No title - ID %s") %
state.source.get_gramps_id())
state.level = level
@ -6376,6 +6445,8 @@ class GedcomParser(UpdateCallback):
def __repo_addr(self, line, state):
"""
Parses the REPOsitory and HEADer COPR <ADDRESS_STRUCTURE>
n ADDR <ADDRESS_LINE> {0:1}
+1 CONT <ADDRESS_LINE> {0:M}
+1 ADR1 <ADDRESS_LINE1> {0:1} (Street)
@ -6384,52 +6455,22 @@ class GedcomParser(UpdateCallback):
+1 STAE <ADDRESS_STATE> {0:1}
+1 POST <ADDRESS_POSTAL_CODE> {0:1}
+1 CTRY <ADDRESS_COUNTRY> {0:1}
n PHON <PHONE_NUMBER> {0:3}
Some repositories do not try to break up the address,
instead they put everything on a single line. Try to determine
if this happened, and try to fix it.
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
free_form = line.data
addr = gen.lib.Address()
addr.set_street(line.data)
sub_state = CurrentState()
sub_state.level = state.level+1
sub_state.addr = addr
sub_state = CurrentState(level=state.level + 1)
sub_state.addr = gen.lib.Address()
self.__parse_level(sub_state, self.parse_addr_tbl, self.__ignore)
state.msg += sub_state.msg
text = addr.get_street()
if not (addr.get_city() or addr.get_state() or
addr.get_postal_code() or addr.get_country()):
match = ADDR_RE.match(text)
if match:
groups = match.groups()
addr.set_street(groups[0].strip())
addr.set_city(groups[2].strip())
addr.set_state(groups[3].strip())
addr.set_postal_code(groups[4].strip())
addr.set_country(groups[5].strip())
match = ADDR2_RE.match(text)
if match:
groups = match.groups()
addr.set_street(groups[0].strip())
addr.set_city(groups[2].strip())
addr.set_state(groups[3].strip())
addr.set_postal_code(groups[4].strip())
match = ADDR3_RE.match(text)
if match:
groups = match.groups()
addr.set_street(groups[0].strip())
addr.set_city(groups[2].strip())
addr.set_state(groups[3].strip())
state.repo.add_address(addr)
self.__merge_address(free_form, sub_state.addr, line, state)
state.repo.add_address(sub_state.addr)
def __repo_phon(self, line, state):
"""
@ -6466,22 +6507,6 @@ class GedcomParser(UpdateCallback):
url.set_type(gen.lib.UrlType(gen.lib.UrlType.EMAIL))
state.repo.add_url(url)
def __location_addr(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
if not state.location:
state.location = gen.lib.Location()
val = state.location.get_street()
if val:
val = "%s, %s" % (val, line.data.strip())
else:
val = line.data.strip()
state.location.set_street(val.replace('\n', ' '))
def __location_date(self, line, state):
"""
@param line: The current line in GedLine format
@ -7319,25 +7344,36 @@ class GedcomParser(UpdateCallback):
def __subm_addr(self, line, state):
"""
Parses the SUBMitter address structure
n ADDR <ADDRESS_LINE> {0:1}
+1 CONT <ADDRESS_LINE> {0:M}
+1 ADR1 <ADDRESS_LINE1> {0:1} (Street)
+1 ADR2 <ADDRESS_LINE2> {0:1} (Locality)
+1 CITY <ADDRESS_CITY> {0:1}
+1 STAE <ADDRESS_STATE> {0:1}
+1 POST <ADDRESS_POSTAL_CODE> {0:1}
+1 CTRY <ADDRESS_COUNTRY> {0:1}
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
free_form = line.data
sub_state = CurrentState(level=state.level + 1)
sub_state.location = gen.lib.Location()
sub_state.location.set_street(line.data)
sub_state.location = state.res
self.__parse_level(sub_state, self.parse_loc_tbl, self.__undefined)
state.msg += sub_state.msg
location = sub_state.location
state.res.set_address(location.get_street())
state.res.set_locality(location.get_locality())
state.res.set_city(location.get_city())
state.res.set_state(location.get_state())
state.res.set_country(location.get_country())
state.res.set_postal_code(location.get_postal_code())
self.__merge_address(free_form, state.res, line, state)
# Researcher is a sub-type of LocationBase, so get_street and set_street
# which are used in routines called from self.parse_loc_tbl work fine.
# Unfortunately, Researcher also has get_address and set_address, so we
# need to copy the street into that.
state.res.set_address(state.res.get_street())
def __subm_phon(self, line, state):
"""