diff --git a/data/tests/exp_sample_ged.ged b/data/tests/exp_sample_ged.ged index cce2ab1bc..3d8e2b10e 100644 --- a/data/tests/exp_sample_ged.ged +++ b/data/tests/exp_sample_ged.ged @@ -1,12 +1,12 @@ 0 HEAD 1 SOUR Gramps -2 VERS 5.0.1 +2 VERS 5.0.2 2 NAME Gramps -1 DATE 7 NOV 2018 -2 TIME 16:03:33 +1 DATE 5 MAR 2019 +2 TIME 09:11:15 1 SUBM @SUBM@ 1 FILE C:\Users\prc\AppData\Roaming\gramps\temp\exp_sample_ged.ged -1 COPR Copyright (c) 2018 Alex Roitman,,,. +1 COPR Copyright (c) 2019 Alex Roitman,,,. 1 GEDC 2 VERS 5.5.1 2 FORM LINEAGE-LINKED @@ -1420,8 +1420,8 @@ 0 @N0018@ NOTE Another Citation Note 0 @N0019@ NOTE A bad photo for sure 0 @O0000@ OBJE -1 FILE c:\users\prc\workspace\grampsm\main\data\tests\O0.jpg -2 FORM jpeg +1 FILE c:\msys64\mingw64\share\gramps\tests\O0.jpg +2 FORM jpg 2 TITL Michael O'Toole 2015-11 1 NOTE @N0019@ 1 CHAN diff --git a/data/tests/imp_FTM_16dec2015a-mod1.gramps b/data/tests/imp_FTM_16dec2015a-mod1.gramps index 7ae656e37..93f391ed7 100644 --- a/data/tests/imp_FTM_16dec2015a-mod1.gramps +++ b/data/tests/imp_FTM_16dec2015a-mod1.gramps @@ -3,41 +3,41 @@ "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">
- +
- + Birth - + Residence - + Death - + Marriage - + Marriage - + M Andrew @@ -54,13 +54,13 @@ - + - + @@ -68,19 +68,19 @@ - + Year: 1850; Census Place: District 14, Cape Girardeau, Missouri; Roll: M432_394; Page: 435B; Image: 248 2 - + Year: 1850; Census Place: District 14, Cape Girardeau, Missouri; Roll: M432_394; Page: 435B; Image: 248 2 - + Year: 1850; Census Place: District 14, Cape Girardeau, Missouri; Roll: M432_394; Page: 435B; Image: 248 2 @@ -88,7 +88,7 @@ - + 1850 United States Federal Census Ancestry.com Name: Ancestry.com Operations, Inc.; Location: Provo, UT, USA; Date: 2009; @@ -96,42 +96,43 @@ - + Tennessee, USA - + District 14, Cape Girardeau, Missouri, USA - + Bollinger Co. MO - + Union Co.?, IL - + Wayne, Missouri, United States - + - - + + + - + - + Ancestry.com Library
@@ -140,16 +141,26 @@ - - Year: 1850; Census Place: District 14, Cape Girardeau, Missouri; Roll: M432_394; Page: 435B; Image: 248 + + Year: 1850; Census Place: District 14, Cape Girardeau, Missouri; Roll: M432_394; Page: 435B; Image: 248 - + Records not imported into OBJE (multi-media object) Gramps ID M159: -Could not import 1850 United States Federal Census(11)-1.jpg Line 70: 1 FILE 1850 United States Federal Census(11)-1.jpg +Could not import 1850 United States Federal Census(11)-1.jpg Line 70: 1 FILE 1850 United States Federal Census(11)-1.jpg + + + Records not imported into OBJE (multi-media object) Gramps ID M158: + +Could not import D:\Users\PRC\Downloads\1850 United States Federa Line 75: 1 FILE D:\Users\PRC\Downloads\1850 United States Federal Census(11)-1.jpg + + + diff --git a/data/tests/imp_MediaTest.gramps b/data/tests/imp_MediaTest.gramps index 0b58cc6f3..86decb356 100644 --- a/data/tests/imp_MediaTest.gramps +++ b/data/tests/imp_MediaTest.gramps @@ -3,7 +3,7 @@ "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">
- +
@@ -69,7 +69,7 @@
- + @@ -118,7 +118,7 @@ - + @@ -130,7 +130,7 @@ - + diff --git a/data/tests/imp_sample.gramps b/data/tests/imp_sample.gramps index db2cc534f..abc9822a3 100644 --- a/data/tests/imp_sample.gramps +++ b/data/tests/imp_sample.gramps @@ -3,7 +3,7 @@ "http://gramps-project.org/xml/1.7.1/grampsxml.dtd">
- + Alex Roitman,,, Not Provided @@ -1456,13 +1456,13 @@
- + - + - + @@ -1506,7 +1506,8 @@ Records not imported into HEAD (header): -GEDCOM FORM not supported Line 14: 2 FORM NOT LINEAGE-LINKED +GEDCOM FORM not supported Line 14: 2 FORM NOT LINEAGE-LINKED + @@ -1514,7 +1515,8 @@ GEDCOM FORM not supported Line 14: Records not imported into SUBM (Submitter): (@SUBM@) Alex Roitman,,,: -Line ignored as not understood Line 23: 2 NOTE No address provided (note not supported) +Line ignored as not understood Line 23: 2 NOTE No address provided (note not supported) + @@ -1523,7 +1525,8 @@ Line ignored as not understood Line 23: Records not imported into FAM (family) Gramps ID F0003: Line ignored as not understood Line 46: 2 SOUR Not really allowed here -Filename omitted Line 48: 1 OBJE +Filename omitted Line 48: 1 OBJE + @@ -1562,7 +1565,8 @@ Filename omitted Line 48: Records not imported into INDI (individual) Gramps ID I0016: Warn: ADDR overwritten Line 204: 3 ADR1 456 Main St again -ADDR element ignored '459 Main St.' Line 202: 2 ADDR 459 Main St., The Village, San Francisco, CA, USA +ADDR element ignored '459 Main St.' Line 202: 2 ADDR 459 Main St., The Village, San Francisco, CA, USA + @@ -1573,7 +1577,8 @@ ADDR element ignored '459 Main St.' Line 202: Records not imported into INDI (individual) Gramps ID I0018: -Tag recognized but not supported Line 245: 2 TYPE first generaton +Tag recognized but not supported Line 245: 2 TYPE first generaton + @@ -1604,7 +1609,8 @@ Company. He enlisted in the army at Sparks 7 December 1917 and served as a Corpo Records not imported into FAM (family) Gramps ID F0010: -Tag recognized but not supported Line 863: 2 _STAT +Tag recognized but not supported Line 863: 2 _STAT + @@ -1613,7 +1619,8 @@ Tag recognized but not supported Line 863: Records not imported into FAM (family) Gramps ID F0011: Could not import Magnes&Anna_smiths_marr_cert.jpg Line 878: 3 OBJE -Could not import Magnes&Anna_smiths_marr_cert.jpg Line 881: 2 OBJE +Could not import Magnes&Anna_smiths_marr_cert.jpg Line 881: 2 OBJE + @@ -1621,7 +1628,8 @@ Could not import Magnes&Anna_smiths_marr_cert.jpg Line 8 Records not imported into FAM (family) Gramps ID F0012: -Could not import John&Alice_smiths_marr_cert.jpg Line 905: 1 OBJE +Could not import John&Alice_smiths_marr_cert.jpg Line 905: 1 OBJE + @@ -1629,7 +1637,8 @@ Could not import John&Alice_smiths_marr_cert.jpg Line 9 Records not imported into FAM (family) Gramps ID F0008: -Tag recognized but not supported Line 1005: 1 ADDR 123 Main st, Grantville, Virginia, USA +Tag recognized but not supported Line 1005: 1 ADDR 123 Main st, Grantville, Virginia, USA + @@ -1653,7 +1662,8 @@ Tag recognized but not supported Line 1005: Records not imported into SOUR (source) Gramps ID S0003: Tag recognized but not supported Line 1045: 1 DATA -Skipped subordinate line Line 1046: 2 AGNC NYC Public Library +Skipped subordinate line Line 1046: 2 AGNC NYC Public Library + @@ -1669,7 +1679,8 @@ Skipped subordinate line Line 1046: REFN ignored Line 1075: 3 REFN blah blah Skipped subordinate line Line 1076: 4 TYPE who knows -Could not import Attic_photo.jpg Line 1079: 3 OBJE +Could not import Attic_photo.jpg Line 1079: 3 OBJE + @@ -1677,7 +1688,8 @@ Could not import Attic_photo.jpg Line 1079: Records not imported into Top Level: -Unknown tag Line 1106: 0 XXX an unknown token at level 0 +Unknown tag Line 1106: 0 XXX an unknown token at level 0 + @@ -1685,12 +1697,13 @@ Unknown tag Line 1106: Records not imported into Top Level: -Unknown tag Line 1109: 1 @X1@ XXX and unknown token xref definition +Unknown tag Line 1109: 1 @X1@ XXX and unknown token xref definition + - + Objects referenced by this note were missing in a file imported on 12/25/1999 12:00:00 AM. diff --git a/gramps/plugins/export/exportgedcom.py b/gramps/plugins/export/exportgedcom.py index 9ce4cc206..e928f5eaf 100644 --- a/gramps/plugins/export/exportgedcom.py +++ b/gramps/plugins/export/exportgedcom.py @@ -104,15 +104,6 @@ LANGUAGES = { # #------------------------------------------------------------------------- -MIME2GED = { - "image/bmp" : "bmp", - "image/gif" : "gif", - "image/jpeg" : "jpeg", - "image/x-pcx" : "pcx", - "image/tiff" : "tiff", - "audio/x-wav" : "wav" -} - QUALITY_MAP = { Citation.CONF_VERY_HIGH : "3", Citation.CONF_HIGH : "2", @@ -1467,8 +1458,7 @@ class GedcomWriter(UpdateCallback): gramps_id = media.get_gramps_id() self._writeln(0, '@%s@' % gramps_id, 'OBJE') - mime = media.get_mime_type() - form = MIME2GED.get(mime, mime) + form = os.path.splitext(media.get_path())[1][1:] path = media_path_full(self.dbase, media.get_path()) self._writeln(1, 'FILE', path, limit=255) if form: diff --git a/gramps/plugins/lib/libgedcom.py b/gramps/plugins/lib/libgedcom.py index cdfd42ad8..49900f30d 100644 --- a/gramps/plugins/lib/libgedcom.py +++ b/gramps/plugins/lib/libgedcom.py @@ -94,6 +94,7 @@ import time # from xml.parsers.expat import ParserCreate from collections import defaultdict, OrderedDict import string +import mimetypes from io import StringIO, TextIOWrapper from urllib.parse import urlparse @@ -123,7 +124,7 @@ from gramps.gen.lib import ( Surname, Tag, Url, UrlType, PlaceType, PlaceRef, PlaceName) from gramps.gen.db import DbTxn from gramps.gen.updatecallback import UpdateCallback -from gramps.gen.mime import get_type +from gramps.gen.utils.file import media_path from gramps.gen.utils.id import create_id from gramps.gen.utils.lds import TEMPLES from gramps.gen.utils.unknown import make_unknown, create_explanation_note @@ -531,15 +532,6 @@ PEDIGREE_TYPES = { 'adopted': TYPE_ADOPT, 'foster' : TYPE_FOSTER, } -MIME_MAP = { - 'jpeg' : 'image/jpeg', 'rtf' : 'text/rtf', - 'jpg' : 'image/jpeg', 'pdf' : 'application/pdf', - 'mpeg' : 'video/mpeg', 'gif' : 'image/gif', - 'mpg' : 'video/mpeg', 'bmp' : 'image/x-ms-bmp', - 'tiff' : 'image/tiff', 'aif' : 'audio/x-aiff', - 'text' : 'text/plain', 'w8bn' : 'application/msword', - 'wav' : 'audio/x-wav', 'mov' : 'video/quicktime', } - FTW_BAD_PLACE = [ EventType.OCCUPATION, EventType.RELIGION, @@ -2728,7 +2720,6 @@ class GedcomParser(UpdateCallback): self.attrs = list(amap.values()) self.gedattr = dict([key, val] for val, key in amap.items()) - self.search_paths = [] def parse_gedcom_file(self, use_trans=False): """ @@ -3021,6 +3012,9 @@ class GedcomParser(UpdateCallback): # FIXME: problem possibly caused by umlaut/accented character # in filename return (0, fullname) + # strip off Windows drive letter, if present + if len(fullname) > 3 and fullname[1] == ':': + fullname = fullname[2:] # look where we found the '.ged', using the full path in fullname other = os.path.join(altpath, fullname) if os.path.isfile(other): @@ -3029,15 +3023,15 @@ class GedcomParser(UpdateCallback): other = os.path.join(altpath, os.path.basename(fullname)) if os.path.isfile(other): return (1, other) - # I don't think the following code does anything because search_paths - # is never initialized... - if len(fullname) > 3: - if fullname[1] == ':': - fullname = fullname[2:] - for path in self.search_paths: - other = os.path.normpath("%s/%s" % (path, fullname)) - if os.path.isfile(other): - return (1, other) + # lets try using the base path for relative media paths + other = os.path.join(media_path(self.dbase), fullname) + if os.path.isfile(other): + return (1, fullname) + # lets try using the base path for relative media paths with base name + other = os.path.join(media_path(self.dbase), + os.path.basename(fullname)) + if os.path.isfile(other): + return (1, os.path.basename(fullname)) return (0, fullname) def __get_next_line(self): @@ -5366,8 +5360,9 @@ class GedcomParser(UpdateCallback): # to allow import of references to URLs (especially for import from # geni.com), do not try to find the file if it is blatently a URL res = urlparse(sub_state.filename) - if sub_state.filename != '' and \ - (res.scheme == '' or res.scheme == 'file'): + if sub_state.filename != '' and (res.scheme == '' or + len(res.scheme) == 1 or + res.scheme == 'file'): (valid, path) = self.__find_file(sub_state.filename, self.dir_path) if not valid: @@ -5384,13 +5379,19 @@ class GedcomParser(UpdateCallback): if sub_state.title: photo.set_description(sub_state.title) else: - photo.set_description(path) + photo.set_description(path.replace('\\', '/')) full_path = os.path.abspath(path) - if os.path.isfile(full_path): - photo.set_mime_type(get_type(full_path)) - else: - photo.set_mime_type(MIME_MAP.get(sub_state.form, - 'unknown')) + # deal with mime types + value = mimetypes.guess_type(full_path) + if value and value[0]: # found from filename + photo.set_mime_type(value[0]) + else: # get from OBJE.FILE.FORM + if '/' in sub_state.form: # already has expanded mime type + photo.set_mime_type(sub_state.form) + else: + value = mimetypes.types_map.get('.' + sub_state.form, + _('unknown')) + photo.set_mime_type(value) if sub_state.attr: photo.attribute_list.append(sub_state.attr) self.dbase.add_media(photo, self.trans) @@ -6691,6 +6692,17 @@ class GedcomParser(UpdateCallback): if state.media.get_path() == "": self.__add_msg(_("Filename omitted"), line, state) + # deal with mime types + value = mimetypes.guess_type(state.media.get_path()) + if value and value[0]: # found from filename + state.media.set_mime_type(value[0]) + else: # get from OBJE.FILE.FORM + if '/' in state.form: # already has expanded mime type + state.media.set_mime_type(state.form) + else: + value = mimetypes.types_map.get('.' + state.form, + _('unknown')) + state.media.set_mime_type(value) # Add the default reference if no source has found self.__add_default_source(media) @@ -6726,21 +6738,21 @@ class GedcomParser(UpdateCallback): self.__skip_subordinate_levels(state.level + 1, state) return res = urlparse(line.data) - if line.data != '' and (res.scheme == '' or res.scheme == 'file'): + if line.data != '' and (res.scheme == '' or + len(res.scheme) == 1 or res.scheme == 'file'): (file_ok, filename) = self.__find_file(line.data, self.dir_path) if state.form != "url": # Might not work if FORM doesn't precede FILE if not file_ok: - self.__add_msg(_("Could not import %s") % filename, line, + self.__add_msg(_("Could not import %s") % line.data, line, state) path = filename else: path = line.data state.media.set_path(path) - state.media.set_mime_type(get_type(path)) if not state.media.get_description(): - state.media.set_description(path) + state.media.set_description(path.replace('\\', '/')) def __obje_title(self, line, state): """