#! /usr/bin/env python
#
# check_po - a gramps tool to check validity of po files
#
# Copyright (C) 2006-2006  Kees Bakker
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#
# TODO
#
# * Check for HTML text in msgstr when there is none in msgid
# * Check for matching HTML tag/endtag in msgstr
#

import sys
import re
import os
from argparse import ArgumentParser

all_total = {}
all_fuzzy = {}
all_untranslated = {}
all_percent_s = {}
all_named_s = {}
all_bnamed_s = {}
all_context = {}
all_coverage = {}
all_template_coverage = {}

def strip_quotes(st):
        if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"':
                st = st.strip()[1:-1]
        return st

class CheckException( Exception ):
        pass

# This is a base class for all checks
class Check:
        def __init__( self ):
                self.msgs = []
        def diag( self ):
                if len( self.msgs ):
                        print
                        print(self.diag_header)
                        for m in self.msgs:
                                m.diag()
        def summary( self ):
                print("%-20s%d" % ( self.summary_text, len(self.msgs) ))

class Check_fmt( Check ):
        def __init__( self, fmt ):
                Check.__init__( self )
                self.diag_header = "-------- %s mismatches --------------" % fmt
                self.summary_text = "%s mismatches:" % fmt
                self.fmt = fmt

        def __process( self, msg, msgid, msgstr ):
                cnt1 = msgid.count( self.fmt )
                cnt2 = msgstr.count( self.fmt )
                if cnt1 != cnt2:
                        self.msgs.append( msg )

        def process( self, msg ):
                msgid = msg.msgid
                msgstr = msg.msgstr[0]
                self.__process( msg, msgid, msgstr )

                if msg.msgidp and len(msg.msgstr) >= 2:
                        msgid = msg.msgidp
                        msgstr = msg.msgstr[1]
                        self.__process( msg, msgid, msgstr )

class Check_named_fmt( Check ):
        # A pattern to find all %()
        find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE)

        def __init__( self ):
                Check.__init__( self )
                self.diag_header = "-------- %() name mismatches --------------"
                self.summary_text = "%() name mismatches:"

        def __process( self, msg, msgid, msgstr ):
                # Same number of named formats?
                fmts1 = self.find_named_fmt_pat.findall( msgid )
                fmts2 = self.find_named_fmt_pat.findall( msgstr )
                if len( fmts1 ) != len( fmts2 ):
                        self.msgs.append( msg )
                else:
                        # Do we have the same named formats?
                        fmts1.sort()
                        fmts2.sort()
                        if fmts1 != fmts2:
                                self.msgs.append( msg )

        def process( self, msg ):
                msgid = msg.msgid
                msgstr = msg.msgstr[0]
                self.__process( msg, msgid, msgstr )

                if msg.msgidp and len(msg.msgstr) >= 2:
                        msgid = msg.msgidp
                        msgstr = msg.msgstr[1]
                        self.__process( msg, msgid, msgstr )

class Check_missing_sd( Check ):
        # A pattern to find %() without s or d
        # Here is a command to use for testing
        # print(re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' ))
        find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE)

        def __init__( self ):
                Check.__init__( self )
                self.diag_header = "-------- %() without 's' or 'd' mismatches --------------"
                self.summary_text = "%() missing s/d:"
        def process( self, msg ):
                for msgstr in msg.msgstr:
                        fmts = self.find_named_fmt_pat2.findall( msgstr )
                        for f in fmts:
                                if not f in ('s', 'd'):
                                        self.msgs.append( msg )
                                        break

class Check_runaway( Check ):
        def __init__( self ):
                Check.__init__( self )
                self.diag_header = "-------- Runaway context in translation ---------"
                self.summary_text = "Runaway context:"

        def __process( self, msg, msgid, msgstr ):
                # Runaway context. In the translated part we only to see
                # the translation of the word after the |
                if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr:
                        self.msgs.append( msg )

        def process( self, msg ):
                msgid = msg.msgid
                msgstr = msg.msgstr[0]
                self.__process( msg, msgid, msgstr )

                if msg.msgidp and len(msg.msgstr) >= 2:
                        msgid = msg.msgidp
                        msgstr = msg.msgstr[1]
                        self.__process( msg, msgid, msgstr )

class Check_xml_chars( Check ):
        # Special XML characters
        # It is not allowed to have a quote, an ampersand or an angle bracket
        xml_chars_pat = re.compile( r'<(?!(b>|/b>|i>|/i>|br/>)) | (?<=!(<b|/b|<i|/i|r/))> | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE )

        def __init__( self ):
                Check.__init__( self )
                self.diag_header = "-------- unescaped XML special characters ---------"
                self.summary_text = "XML special chars:"

        def process( self, msg ):
                msgid = msg.msgid
                msgstr = msg.msgstr[0]

                # XML errors
                # Only look at messages in the tips.xml
                if msg.is_tips_xml:
                        if self.xml_chars_pat.search( msgstr ):
                                self.msgs.append( msg )

class Check_last_char( Check ):
        def __init__( self ):
                Check.__init__( self )
                self.diag_header = "-------- last character not identical ---------"
                self.summary_text = "Last character:"

        def __process( self, msg, msgid, msgstr ):
                msgid_last = msgid[-1:]
                msgstr_last = msgstr[-1:]
                if msgid_last.isspace() != msgstr_last.isspace():
                        self.msgs.append( msg )
                elif (msgid_last == '.') != (msgstr_last == '.'):
                        self.msgs.append( msg )

        def process( self, msg ):
                # Last character of msgid? White space? Period?
                if msg.is_fuzzy:
                        return
                msgid = msg.msgid
                msgstr = msg.msgstr[0]
                self.__process( msg, msgid, msgstr )

                if msg.msgidp and len(msg.msgstr) >= 2:
                        msgid = msg.msgidp
                        msgstr = msg.msgstr[1]
                        self.__process( msg, msgid, msgstr )

class Check_shortcut_trans( Check ):
        def __init__( self ):
                Check.__init__( self )
                self.diag_header = "-------- shortcut key in translation ---------"
                self.summary_text = "Shortcut in msgstr:"

        def __process( self, msg, msgid, msgstr ):
                if msgid.count('_') == 0 and msgstr.count('_') > 0:
                        self.msgs.append( msg )

        def process( self, msg ):
                msgid = msg.msgid
                msgstr = msg.msgstr[0]
                self.__process( msg, msgid, msgstr )

                if msg.msgidp and len(msg.msgstr) >= 2:
                        msgid = msg.msgidp
                        msgstr = msg.msgstr[1]
                        self.__process( msg, msgid, msgstr )

class Msgid:
        fuzzy_pat = re.compile( 'fuzzy' )
        tips_xml_pat = re.compile( r'tips\.xml' )
        def __init__( self, msgnr, lineno ):
                self._msgid = []        # For debugging purpose the original text
                self._msgidp = []       # For debugging purpose the original text
                self._msgstr = []       # For debugging purpose the original text
                self.msgid = ''
                self.msgidp = ''
                self.msgstr = []        # This is a list to support plural
                self._cmnt = []
                self.nr = msgnr
                self.lineno = lineno
                self.is_fuzzy = 0
                self.is_tips_xml = 0

        def diag( self ):
                print
                print("msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" ))
                sys.stdout.write( ''.join( self._msgid ) )
                sys.stdout.write( ''.join( self._msgidp ) )
                sys.stdout.write( ''.join( self._msgstr ) )

        def add_msgid( self, line, lineno ):
                self._msgid.append( line )
                line = re.sub( r'msgid\s+', '', line )
                line = line.strip()
                if line[0] != '"' or line[-1:] != '"':
                        print("ERROR at line %d: Missing quote." % lineno)
                line = strip_quotes( line )
                self.msgid += line

        def add_msgidp( self, line, lineno ):
                self._msgidp.append( line )
                line = re.sub( r'msgid_plural\s+', '', line )
                line = line.strip()
                if line[0] != '"' or line[-1:] != '"':
                        print("ERROR at line %d: Missing quote." % lineno)
                line = strip_quotes( line )
                self.msgidp += line

        def add_new_msgstr( self, line, lineno ):
                self.msgstr.append( '' )        # Start a new msgstr
                self.add_msgstr( line, lineno )

        def add_msgstr( self, line, lineno ):
                self._msgstr.append( line )
                line = re.sub( r'msgstr(\[\d\])?\s+', '', line )
                line = line.strip()
                if line[0] != '"' or line[-1:] != '"':
                        print("ERROR at line %d: Missing quote." % lineno)
                line = strip_quotes( line )
                self.msgstr[-1] += line

        def add_cmnt( self, line ):
                self._cmnt.append( line )
                if not self.is_fuzzy and self.fuzzy_pat.search( line ):
                        self.is_fuzzy = 1
                if not self.is_tips_xml and self.tips_xml_pat.search( line ):
                        self.is_tips_xml = 1

def create_new_Msgid( msgs, lineno ):
        msg = Msgid( len(msgs), lineno )
        msgs.append( msg )
        return msg

def read_msgs( fname ):
        empty_pat   = re.compile( r'^ \s* $',      re.VERBOSE )
        comment_pat = re.compile( r'\#',           re.VERBOSE )
        msgid_pat   = re.compile( r'msgid \s+ "',  re.VERBOSE )
        msgid_plural_pat = re.compile( r'msgid_plural \s+ "', re.VERBOSE )
        msgstr_pat  = re.compile( r'msgstr (\[\d\])? \s+ "', re.VERBOSE )
        str_pat     = re.compile( r'"',            re.VERBOSE )
        old_pat     = re.compile( r'\#~ \s+ ',     re.VERBOSE )

        f = open( fname )
        lines = f.readlines()

        # parse it like a statemachine
        NONE   = 'NONE'                 # Nothing detected, yet
        CMNT   = 'CMNT'                 # Inside comment part
        MSGID  = 'msgid'                # Inside msgid part
        MSGIDP = 'msgid_plural'         # Inside msgid_plural part
        MSGSTR = 'msgstr'               # Inside msgstr part
        STR    = 'STR'                  # A continuation string
        OLD    = 'OLD'                  # An old pattern with #~

        global msgs
        state = NONE
        msg = None

        msgs = []
        for ix, line in enumerate( lines ):     # Use line numbers for messages
                lineno = ix + 1

                m = empty_pat.match( line )
                if m:
                        continue        # Empty lines are not interesting

                # What's the next state?
                if  old_pat.match( line ):
                        next_state = OLD
                elif comment_pat.match( line ):
                        next_state = CMNT
                elif msgid_pat.match( line ):
                        next_state = MSGID
                elif msgid_plural_pat.match( line ):
                        next_state = MSGIDP
                elif msgstr_pat.match( line ):
                        next_state = MSGSTR
                elif str_pat.match( line ):
                        next_state = STR
                else:
                        print('WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars())
                        next_state = NONE

                #print("%(state)d->%(next_state)d\t%(line)s" % vars())
                if state == NONE:
                        # expect msgid or comment or old stuff
                        if next_state == CMNT:
                                state = CMNT
                                msg = create_new_Msgid( msgs, lineno ) # Start with an empty new item
                                msg.add_cmnt( line )

                        elif next_state == MSGID:
                                state = MSGID
                                msg = create_new_Msgid( msgs, lineno ) # Start with an empty new item
                                msg.add_msgid( line, lineno )

                        elif next_state == MSGIDP:
                                raise CheckException( 'Unexpected %(next_state)s at %(fname)s:%(lineno)d' % vars() )

                        elif next_state == MSGSTR:
                                print('WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars())
                                state = MSGSTR
                                msg = create_new_Msgid( msgs, lineno ) # Start with an empty new item
                                msg.add_new_msgstr( line, lineno )

                        elif next_state == STR:
                                print('WARNING: Wild string at %(fname)s:%(lineno)d' % vars())

                        elif next_state == OLD:
                                pass    # Just skip

                        else:
                                raise CheckException( 'Unexpected state in po parsing (state = %(state)s)' % vars() )

                elif state == CMNT:
                        # Expect more comment, or msgid. If msgstr or string it is flagged as error.
                        if next_state == CMNT:
                                if msg:
                                        msg.add_cmnt( line )
                                else:
                                        # Note. We may need to do something about these comments
                                        # Skip for now
                                        pass

                        elif next_state == MSGID:
                                state = MSGID
                                if not msg:
                                        msg = create_new_Msgid( msgs, lineno ) # Start with an empty new item
                                msg.add_msgid( line, lineno )

                        elif next_state == MSGIDP:
                                raise CheckException( 'Unexpected %(next_state)s at %(fname)s:%(lineno)d' % vars() )

                        elif next_state == MSGSTR:
                                print('WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars())
                                state = MSGSTR
                                msg = create_new_Msgid( msgs, lineno ) # Start with an empty new item
                                msg.add_new_msgstr( line, lineno )

                        elif next_state == STR:
                                print('WARNING: Wild string at %(fname)s:%(lineno)d' % vars())

                        elif next_state == OLD:
                                msg = None
                                pass    # Just skip

                        else:
                                raise CheckException( 'Unexpected state in po parsing (state = %(state)s)' % vars() )

                elif state == MSGID:
                        # Expect msgstr or msgid_plural or string
                        if next_state == CMNT:
                                # Hmmm. A comment here?
                                print('WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars())

                        elif next_state == MSGID:
                                raise CheckException( 'Unexpected %(next_state)s at %(fname)s:%(lineno)d' % vars() )

                        elif next_state == MSGIDP:
                                state = MSGIDP
                                msg.add_msgidp( line, lineno )

                        elif next_state == MSGSTR:
                                state = MSGSTR
                                msg.add_new_msgstr( line, lineno )

                        elif next_state == STR:
                                # Continuation of msgid, stay in state MSGID
                                msg.add_msgid( line, lineno )

                        elif next_state == OLD:
                                msg = None
                                pass    # Just skip

                        else:
                                raise CheckException( 'Unexpected state in po parsing (state = %(state)s)' % vars() )

                elif state == MSGIDP:
                        # Expect msgstr or string or comment
                        if next_state == CMNT:
                                # Hmmm. A comment here?
                                print('WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars())

                        elif next_state == MSGID:
                                raise CheckException( 'Unexpected %(next_state)s at %(fname)s:%(lineno)d' % vars() )

                        elif next_state == MSGIDP:
                                raise CheckException( 'Unexpected %(next_state)s at %(fname)s:%(lineno)d' % vars() )

                        elif next_state == MSGSTR:
                                state = MSGSTR
                                msg.add_new_msgstr( line, lineno )

                        elif next_state == STR:
                                # Continuation of msgid_plural, stay in state MSGIDP
                                msg.add_msgidp( line, lineno )

                        elif next_state == OLD:
                                msg = None
                                pass    # Just skip

                        else:
                                raise CheckException( 'Unexpected state in po parsing (state = %(state)s)' % vars() )

                elif state == MSGSTR:
                        # Expect comment, or msgid, or string.
                        if next_state == CMNT:
                                # A comment probably starts a new item
                                state = CMNT
                                msg = create_new_Msgid( msgs, lineno )
                                msg.add_cmnt( line )

                        elif next_state == MSGID:
                                state = MSGID
                                msg = create_new_Msgid( msgs, lineno )
                                msg.add_msgid( line, lineno )

                        elif next_state == MSGIDP:
                                raise CheckException( 'Unexpected %(next_state)s at %(fname)s:%(lineno)d' % vars() )

                        elif next_state == MSGSTR:
                                # New msgstr, probably for plural form
                                # Stay in MSGSTR state
                                msg.add_new_msgstr( line, lineno )

                        elif next_state == STR:
                                msg.add_msgstr( line, lineno )

                        elif next_state == OLD:
                                msg = None
                                pass    # Just skip

                        else:
                                raise CheckException( 'Unexpected state in po parsing (state = %(state)s)' % vars() )

                else:
                        raise CheckException( 'Unexpected state in po parsing (state = %(state)s)' % vars() )

        # Strip items with just comments. (Can this happen?)
        msgs1 = []
        for m in msgs:
                if not m.msgid and not m.msgstr:
                        #print("INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno ))
                        pass
                else:
                        msgs1.append( m )
        msgs = msgs1
        return msgs

def analyze_msgs( args, fname, msgs, nr_templates = None, nth = 0 ):
        nr_fuzzy = 0
        nr_untranslated = 0

        checks = []
        checks.append( Check_fmt( '%s' ) )
        checks.append( Check_fmt( '%d' ) )
        checks.append( Check_named_fmt() )
        checks.append( Check_missing_sd() )
        checks.append( Check_runaway() )
        checks.append( Check_xml_chars() )
        checks.append( Check_last_char() )
        checks.append( Check_shortcut_trans() )

        for msg in msgs:
                msgid = msg.msgid
                msgstr = msg.msgstr
                #print
                #print("msgid: %(msgid)s" % vars())
                #print("msgstr: %(msgstr)s" % vars())

                if ''.join(msgstr) == '':
                        nr_untranslated += 1
                        continue

                if msg.is_fuzzy:
                        nr_fuzzy += 1
                        continue

                for c in checks:
                        c.process( msg )

        nr_msgs = len(msgs)
        if nth > 0:
                print
                print("=====================================")
        print("%-20s%s"     % ( "File:",              fname ))
        print("%-20s%d"     % ( "Template total:",    nr_templates ))
        print("%-20s%d"     % ( "PO total:",          nr_msgs ))
        print("%-20s%d"     % ( "Fuzzy:",             nr_fuzzy ))
        print("%-20s%d"     % ( "Untranslated:",      nr_untranslated ))

        for c in checks:
                c.summary()

        po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100
        print("%-20s%5.2f%%" % ( "PO Coverage:",       po_coverage ))

        template_coverage = po_coverage * float(nr_msgs) / float(nr_templates)
        print("%-20s%5.2f%%" % ( "Template Coverage:", template_coverage ))

        not_displayed = nr_untranslated + nr_fuzzy
        translation = (1.0 - (float(not_displayed) / float(nr_templates))) * 100
        text = "%-20s%5.2f%%" % ( "Localized at:",     translation)

        if int(template_coverage*1000) == int(po_coverage*1000):
                print(text)
        else:
                print(text + ' (previous gramps.pot)')

        for c in checks:
                c.diag()

def main():

    parser = ArgumentParser( description='This program validates a PO file for GRAMPS.')

    parser.add_argument("-s", dest="summary",
              choices=[file for file in os.listdir('.') if file.endswith('.po')],
              default=False, help="the summary of check, and if need, it gives details")

    args = parser.parse_args()

    if args.summary:
        files = sys.argv[2:]

        try:
                pot_msgs = read_msgs( 'gramps.pot' )
                nr_templates = len( pot_msgs )
                nth = 0
                for fname in files:
                        msgs = read_msgs( fname )
                        analyze_msgs( files, fname, msgs, nr_templates, nth )
                        nth += 1

        except CheckException as e:
                print('Oops.', e)
                print('Bailing out')

if __name__ == "__main__":
        main()