gramps/data/evidencestyle/evidencefieldgenerator.py

#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2013  Benny Malengier
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

# $Id$

"""
This module parses the evidence csv file and generates the code we need in
Gramps to use the evidence style.
"""

#-------------------------------------------------------------------------
#
# Standard Python modules
#
#-------------------------------------------------------------------------
import csv


#-------------------------------------------------------------------------
#
# Code
#
#-------------------------------------------------------------------------
csvfilename = "evidence_style.csv"
NRCOL = 0
CATCOL = 1
CATTYPECOL = 2
TYPECOL = 3
DESCRCOL= 4
CITETYPECOL = 5
IDENTCOL = 6
LDELCOL = 7  # left delimiter
FIELDCOL = 8
LABELCOL = 9
RDELCOL = 10 # right delimiter
GEDCOMCOL = 11
SHORTERCOL = 12
STYLECOL = 13
PRIVACYCOL = 14
OPTCOL = 15
HINTCOL = 16
TOOLTIPCOL = 17

CITE_TYPES = {'F': 'REF_TYPE_F', 'L': 'REF_TYPE_L', 'S': 'REF_TYPE_S'}
GEDCOMFIELDS = {'A': 'GED_AUTHOR', 'T': 'GED_TITLE', 
                'P': 'GED_PUBINF', 'D': 'GED_DATE'}
SHORTERALG = {'LOC': 'SHORTERALG_LOC', 'YEAR': 'SHORTERALG_YEAR',
              'ETAL': 'SHORTERALG_ETAL', 'REV.': 'SHORTERALG_REVERT_TO_DOT'}
STYLES = {'Quoted': 'STYLE_QUOTE', 'Italics': 'STYLE_EMPH',
          'QuotedCont': 'STYLE_QUOTECONT', 'Bold': 'STYLE_BOLD'}

DEFAULT_HINTS = {
    "ACT": "Public Law 12-98",
    "ADDRESS": "Broadway Avenue, New York",
    "AFFILIATION": "Agent of Gramps Software",
    "AUTHOR": "Doe, D.P. & Cameron, E.",
    "AUTHOR_LOCATION": "Chicago",
    "BOOK": "The big example Gramps manual",
    "CASE": "B. Malengier versus N. Hall",
    "CEMETERY": "Greenwich Cemetery Office",
    "CHAPTER": "The first office of T. Rooseveld",
    "CHAPTER_PAGES": "24-55",
    "COLLECTION": "Bruges Lace Collection",
    "COLUMN": "col. 3",
    "COMPILER": "T. Da Silva",
    "CREATION_DATE": "13 Aug 1965",
    "CREATOR": "P. Picasso",
    "CREDIT_LINE": "Based on unnamed document lost in fire",
    "DATE": "17 Sep 1745",
    "DATE_ACCESSED": "18 Jun 2013",
    "DATE_RANGE": "2003-6",
    "DESCRIPTION": "The lace has inscriptions with names of nobility",
    "DISTRICT": "Enumeration district (ED) 14",
    "DIVISION": "Peterburg Post Office, or Portland, ward 4",
    "EDITION": "Second Edition",
    "EDITOR": "Hoover, J.E.",
    "FILE": "Membership application J. Rapinat",
    "FILE_DATE": "15 Jan 1870",
    "FILE_LOCATION": "Accession 7, Box 3",
    "FILE_NO": "1243-EB-98",
    "FILE_UNIT": "Letters to George Washington",
    "FILM_ID": "T345",
    "FILM_PUBLICATION_PLACE": "Kansas City",
    "FILM_PUBLISHER": "NY Genealogy Association",
    "FILM_TYPE": "FHL microfilm",
    "FORMAT": "Digital Images, or Database, or Cards, ...",
    "FRAME": "frames 387-432",
    "GROUP": "Miami Patent Office",
    "HOUSEHOLD": "dwelling 345, family 654",
    "ID": "I50-68, or 1910 U.S. census, or ...",
    "INSTITUTION": "Sorbonne University",
    "INTERVIEWER": "Materley, B.",
    "ISSUE_DATE": "Jun 2004",
    "ISSUE_RANGE": "145-394, scattered issues",
    "ITEM_OF_INTEREST": "entry for G. Galileo, or Doe Household, or A. Einstein Grave ...",
    "JURISDICTION": "Jackson County, Alabama",
    "LOCATION": "Istanbul",
    "NUMBER": "2, or Record Group 34, or ...",
    "NUMBER_6TOTAL9": "5",
    "ORIGINAL_REPOSITORY": "National Archives",
    "ORIGINAL_REPOSITORY_LOCATION": "Washington, D.C.",
    "ORIGINAL_YEAR": "1966",
    "PAGE": "5; or 4,6-8, ...",
    "PAGE_RANGE": "1-13",
    "PART": "Part 3",
    "PLACE_CREATED": "London",
    "POSITION": "written in the left margin, or second row, 3th line",
    "POSTING_DATE": "5 Jul 1799",
    "PROFESSIONAL_CREDENTIALS": "Prof.; or Dr. ...",
    "PROVENANCE": "add provenance of the material",
    "PUBLICATION_FORMAT": "CD-ROM or eprint or ...",
    "PUBLICATION_PLACE": "Berlin",
    "PUBLICATION_TITLE": "Title of Blog, Newsletter, DVD, ...",
    "PUBLICATION_YEAR": "2014",
    "PUBLISHER": "Springer",
    "PUB_INFO": "Springer, Berlin, 2014",
    "RECIPIENT": "J. Ralls",
    "RELATIONSHIP": "Paul's uncle and brother of Erik",
    "REPORT_DATE": "3 May 1999",
    "RESEARCH_COMMENT": "Descriptive detail or provenance or research analysis conclusion, ...",
    "RESEARCH_PROJECT": "Tahiti Natives",
    "ROLL": "176, or rolls 145-160",
    "SCHEDULE": "population schedule or slave schedule or ...",
    "SECTION": "1890 section or ER patients or ...",
    "SERIES": "Carnival County Records",
    "SERIES_NO": "series 34-38",
    "SESSION": "2nd session",
    "SHEET_NO": "sheet 13-C",
    "SUBGROUP": "Diplomatic correspondance, 1798-1810",
    "SUBJECT": "D. Copernicus and close family",
    "SUBSERIES": "",
    "SUBTITLE": "Subtitle of article or magazine ...",
    "TERM": "June Term 1934 or 13th Congress or Reagan Office or ...",
    "TIMESTAMP": "min. 34-36",
    "TITLE": "Diary Title, Message Title, Bible Name, Article Title, ...",
    "TRANSLATION": "A translated version, typically of the title",
    "TYPE": "Letter",
    "URL_6DIGITAL_LOCATION9": "http://gramps-project.org/blog",
    "VOLUME": "4",
    "VOLUME_INFO": "5 volumes",
    "WEBSITE": "gramps-project.org",
    "WEBSITE_CREATOR_OR_OWNER": "Family Historians Inc",
    "YEAR": "1888",
    "YEAR_ACCESSED": "2013",
}

DEFAULT_TOOLTIPS = {
    "ACT": "A statute or law name passed by a legislature",
    "ADDRESS": "Store address information. Set Private if needed! Give"
        " information from lowest to highest level separated by comma's",
    "AFFILIATION": "A relevant affiliation that might influence data in the source",
    "AUTHOR": "Give names in following form: 'FirstAuthorSurname, Given Names & SecondAuthorSurname, Given Names'."
            " Like this Gramps can parse the name and shorten as needed.",
    "AUTHOR_LOCATION": "City where author resides or wrote.",
    "BOOK": "Title of the Book",
    "CASE": "Dispute between opposing parties in a court of law.",
    "CEMETERY": "Name of cemetery or cemetery office with sources.",
    "CHAPTER": "",
    "CHAPTER_PAGES": "The pages in the chapter.",
    "COLLECTION": "",
    "COLUMN": "",
    "COMPILER": "The name of the person who compiled the source.",
    "CREATION_DATE": "",
    "CREATOR": "The name of the creator of the artifact.",
    "CREDIT_LINE": "Acknowledgement of writers and contributors",
    "DATE": "",
    "DATE_ACCESSED": "",
    "DATE_RANGE": "The range of years which are present in the source.",
    "DESCRIPTION": "Some important detail of the source.",
    "DISTRICT": "District as handled by Census",
    "DIVISION": "The subdivision of a larger group that is handled in the source.",
    "EDITION": "",
    "EDITOR": "The Editor of a multi-author book.",
    "FILE": "The title of a specific file in a source.",
    "FILE_DATE": "Date of submitting the document to a clerk or court.",
    "FILE_LOCATION": "Accession method to the file",
    "FILE_NO": "Number to indicate a file",
    "FILE_UNIT": "A grouping unit for a number of files in a source.",
    "FILM_ID": "ID of a Microfilm.",
    "FILM_PUBLICATION_PLACE": "",
    "FILM_PUBLISHER": "",
    "FILM_TYPE": "The type of the microfilm.",
    "FORMAT": "The format of the source.",
    "FRAME": "What frames in the source are relevant.",
    "GROUP": "A larger grouping to which the source belongs.",
    "HOUSEHOLD": "Household of interest on a census.",
    "ID": "ID to identify the source or citation part",
    "INSTITUTION": "Institution that issued the source.",
    "INTERVIEWER": "",
    "ISSUE_DATE": "Date the source was issued.",
    "ISSUE_RANGE": "A range of magazine, journal, ... issues covered in the source",
    "ITEM_OF_INTEREST": "Specific part, item, or person of interest in the source",
    "JURISDICTION": "Area with a set of laws under the control of a system of courts or government entity."
            " Enter this from lowest to highest relevant jurisdiction, separated by comma's.",
    "LOCATION": "City that is relevant.",
    "NUMBER": "A number.",
    "NUMBER_6TOTAL9": "The maximum of entities available.",
    "ORIGINAL_REPOSITORY": "Name of the repository where the original is stored.",
    "ORIGINAL_REPOSITORY_LOCATION": "Address or only city of the repository where the original is stored.",
    "ORIGINAL_YEAR": "Year the original source was published/created",
    "PAGE": "The page or page(s) relevant for the citation",
    "PAGE_RANGE": "The range of the pages in the source. The page given for"
            " a citation must be in this range.",
    "PART": "",
    "PLACE_CREATED": "",
    "POSITION": "Where in or on the source the citation piece can be found.",
    "POSTING_DATE": "",
    "PROFESSIONAL_CREDENTIALS": "",
    "PROVENANCE": "Where the material originated from.",
    "PUBLICATION_FORMAT": "",
    "PUBLICATION_PLACE": "",
    "PUBLICATION_TITLE": "",
    "PUBLICATION_YEAR": "",
    "PUBLISHER": "",
    "PUB_INFO": "Publication Information, such as city and year of publication, name of publisher, ...",
    "RECIPIENT": "The person to who the letter is addressed.",
    "RELATIONSHIP": "The relationship of the author to the person of interest that is the subject.",
    "REPORT_DATE": "Date the report was written/submitted.",
    "RESEARCH_COMMENT": "Descriptive detail or provenance or research analysis conclusion, ...",
    "RESEARCH_PROJECT": "The genealogical or scientific research project.",
    "ROLL": "The Microfilm role.",
    "SCHEDULE": "The census schedule (the type of census table) used, eg population schedule or slave schedule. or ...",
    "SECTION": "",
    "SERIES": "",
    "SERIES_NO": "",
    "SESSION": "The number of the meeting or series of connected meetings devoted "
        "by a legislature to a single order of business, program, agenda, or announced purpose.",
    "SHEET_NO": "Number of a census sheet.",
    "SUBGROUP": "",
    "SUBJECT": "",
    "SUBSERIES": "",
    "SUBTITLE": "",
    "TERM": "Reference to the time a person/group/parliament is in office or session.",
    "TIMESTAMP": "Indication of the time in audio or video where the relevant fragment can be found.",
    "TITLE": "",
    "TRANSLATION": "A translated version, typically of the title",
    "TYPE": "",
    "URL_6DIGITAL_LOCATION9": "Detailed internet address of the content",
    "VOLUME": "",
    "VOLUME_INFO": "Information about the volumes, eg the amount of volumes.",
    "WEBSITE": "The main internet address.",
    "WEBSITE_CREATOR_OR_OWNER": "Organization or person behind a website.",
    "YEAR": "",
    "YEAR_ACCESSED": "",
}


nr = -1
cat = ''
cattype = ''
type = ''
descr = ''
cite_type = ''
ident = ''

TYPE2CITEMAP = {}
FIELDTYPEMAP = {}
FIELDS_SHORT = []
index = 100
indexval = 10
first = True

with open(csvfilename, 'rb') as csvfile:
   reader = csv.reader(csvfile, delimiter=';')
   for row in reader:
        if first:
            #skip first row with headers
            first=False
            continue

        if row[CATCOL]:
            cat = row[CATCOL].strip()
            if cat.startswith('EE '):
                cat = cat[3:]
                EE = True
            else:
                EE = False
            cattype = row[CATTYPECOL].strip()
            types = row[TYPECOL].strip()
            descr = row[DESCRCOL].strip()
            source_type = row[IDENTCOL].strip()
            if descr:
                source_descr = '%s - %s - %s (%s)' % (cat, cattype, types, descr)
                if not EE:
                    source_descr_code = "_('%(first)s - %(sec)s - %(third)s (%(fourth)s)') % { "\
                        " 'first': _('" + cat + "'),"\
                        " 'sec': _('" + cattype + "'),"\
                        " 'third': _('" + types + "'),"\
                        " 'fourth': _('" + descr + "')}"
                else:
                    source_descr_code = "_('EE %(first)s - %(sec)s - %(third)s (%(fourth)s)') % { "\
                        " 'first': _('" + cat + "'),"\
                        " 'sec': _('" + cattype + "'),"\
                        " 'third': _('" + types + "'),"\
                        " 'fourth': _('" + descr + "')}"
            else:
                source_descr = '%s - %s - %s' % (cat, cattype, types)
                if not EE:
                    source_descr_code = "_('%(first)s - %(sec)s - %(third)s') % { "\
                        " 'first': _('" + cat + "'),"\
                        " 'sec': _('" + cattype + "'),"\
                        " 'third': _('" + types + "')}"
                else:
                    source_descr_code = "_('EE %(first)s - %(sec)s - %(third)s') % { "\
                        " 'first': _('" + cat + "'),"\
                        " 'sec': _('" + cattype + "'),"\
                        " 'third': _('" + types + "')}"
            if source_type in TYPE2CITEMAP:
                assert TYPE2CITEMAP[source_type] ['descr'] == source_descr, source_type + ' ' + TYPE2CITEMAP[source_type] ['descr'] + ' NOT ' + source_descr
            else:
                TYPE2CITEMAP[source_type] = {'REF_TYPE_L': [], 'REF_TYPE_F': [],
                            'REF_TYPE_S': [], 
                            'i': indexval, 'descr': source_descr,
                            'descrcode': source_descr_code}
                indexval += 1
            
        if row[CITETYPECOL]:
            #new citation type,
            cite_type = row[CITETYPECOL].strip()
            if cite_type == 'S':
                shortcite = True
            else:
                shortcite = False
            cite_type = CITE_TYPES[cite_type]
        #add field for template to evidence style
        field = row[FIELDCOL].strip()
        field_type = field.replace(' ', '_').replace("'","")\
                     .replace('&','AND').replace('(', '6').replace(')','9')\
                     .replace('[', '').replace(']','').replace('/', '_OR_')\
                     .replace(',', '').replace('.', '').replace(':', '')\
                     .replace('-', '_')
        field_descr =  field.replace('[', '').replace(']','').lower().capitalize()
        field_label = row[LABELCOL].strip()
        field_label = field_label.replace("'", "\\'")
        if field_type in FIELDTYPEMAP:
            assert field_descr == FIELDTYPEMAP[field_type][1], 'Problem %s %s %s' % (field_type, field_descr, FIELDTYPEMAP[field_type][1])
        else:
            FIELDTYPEMAP[field_type] = (index, field_descr)
            index += 1
        fielddata = []
        private = 'False'
        if  row[PRIVACYCOL].strip():
            private = 'True'
        optional = 'False'
        if  row[OPTCOL].strip():
            optional = 'True'
        shorteralg = SHORTERALG.get(row[SHORTERCOL].strip()) or 'EMPTY'
        gedcommap = GEDCOMFIELDS.get(row[GEDCOMCOL].strip()) or 'EMPTY'
        style = STYLES.get(row[STYLECOL].strip()) or 'EMPTY'
        hint = row[HINTCOL]
        tooltip = row[TOOLTIPCOL]
        
        TYPE2CITEMAP[source_type][cite_type] += [(row[LDELCOL], field_type, 
                        row[RDELCOL], field_label, style, private, optional, 
                        shorteralg, gedcommap, hint, tooltip)]
        #if shorttype, we store a type for the short version so user can store
        #this
        if shortcite and shorteralg == 'EMPTY':
            field_type_short = field_type + '_SHORT_VERSION'
            if field_type_short in FIELDTYPEMAP:
                pass
            else:
                FIELDTYPEMAP[field_type_short] = (index, field_descr + ' (Short)')
                FIELDS_SHORT.append(field_type_short)
                index += 1

#now generate the python code we need in source attr types
code = "    #following fields are generated with evidencefieldgenerator.py\n" \
       "    #the index starts at 100!\n"
datamap = "\n    _DATAMAP += [\n"
allkeys = sorted(FIELDTYPEMAP.keys())
for field_type in allkeys:
    code += "    " + field_type + ' = %d\n' % FIELDTYPEMAP[field_type][0]
    datamap += '        (' + field_type + ', _("' + FIELDTYPEMAP[field_type][1] \
            +'"), "' + FIELDTYPEMAP[field_type][1] + '"),\n'
code += "\n    _DATAMAPIGNORE = [\n" 
for field in FIELDS_SHORT:
    code += "        " + field + ',\n'
code += '    ]\n\n' + datamap + '        ]\n'

#now add default hints
code += "\n    _DEFAULT_HINTS = {\n"
for key in sorted(DEFAULT_HINTS.keys()):
    if DEFAULT_HINTS[key]:
        code += "        " + key +': _("' +  DEFAULT_HINTS[key] + '"),\n'
code += "    }\n"
#now add default tooltips
code += "\n    _DEFAULT_TOOLTIPS = {\n"
for key in sorted(DEFAULT_TOOLTIPS.keys()):
    if DEFAULT_TOOLTIPS[key]:
        code += "        " + key + ': _("' +  DEFAULT_TOOLTIPS[key] + '"),\n'
code += "    }\n"

with open('srcattrtype_extra.py', 'wb') as srcattrfile:
    srcattrfile.write(code)

#now generate the python code we need in evidencestyle
# we have predefined sourcetypes, and these have a template for formatting
# 

#first an English to internationalized map
code = "    #SRCTEMPLATE has some predefined values which map to citation styles\n"

datamap = "    _SRCTEMPLATEVAL_MAP = [\n"\
        "        (UNKNOWN, _('Unknown'), 'Unknown'),\n"
allkeys = sorted(TYPE2CITEMAP.keys())
for source_type in allkeys:
    code += "    " + source_type + ' = %d\n' % TYPE2CITEMAP[source_type]['i']
    # we use descrcode in to translate string to reduce work for translators
    datamap += "        (" + source_type + ", " + TYPE2CITEMAP[source_type]['descrcode'] \
            +", '" + source_type+ "'),\n"

code += '\n    # Localization of the different source types\n'\
            + datamap + '        ]\n'

code += "\n    #templates for the source types defined\n"\
        "    # F: Full reference\n"\
        "    # S: Short reference (if F used once, use S afterwards)\n" \
        "    # L: List reference (for in bibliography list)\n"
code += '    EVIDENCETEMPLATES = {\n'
for source_type in allkeys:
    code += "        " + source_type + ": {\n"
    for val in ['REF_TYPE_L', 'REF_TYPE_F', 'REF_TYPE_S']:
        code += "            " + val + ": [\n"
        for field in TYPE2CITEMAP[source_type][val]:
            # field is tuple (row[LDELCOL], field_type, row[RDELCOL], 
            # field_label, row[STYLECOL]
            #    , private, optional, shorteralg, gedcommap, hint, tooltip)
            if field[9]:
                hint = '_("' + field[9] + '")'
            else:
                hint = 'None'
            if field[10]:
                tooltip = '_("' + field[10] + '")'
            else:
                tooltip = 'None'
            code += "                ('"+ field[0] + "', " + field[1]  + ", _('"\
                    +field[3] + "'), '"+ field[2] + "', " + field[4] + ", " + field[5] + ", "\
                    + field[6] + ", " + field[7] + ", " + field[8] + ",\n" \
                    + "                " +hint + ", " + tooltip + "),\n"
        code += "                ],\n"
    code += "        },\n"
code += "    }\n"

with open('srcattrtype_extraevidence.py', 'wb') as srcattrfile:
    srcattrfile.write(code)