This is a rewrite of the check_po script.
A few checks have been improved and a new check was added. The new check is for XML special characters. svn: r7583
This commit is contained in:
parent
d29b55723a
commit
0d321299c9
536
po/check_po
536
po/check_po
@ -1,8 +1,8 @@
|
||||
#! /usr/bin/env python
|
||||
#
|
||||
# Gramps - a GTK+/GNOME based genealogy program
|
||||
# check_po - a gramps tool to check validity of po files
|
||||
#
|
||||
# Copyright (C) 2000-2006 Donald N. Allingham
|
||||
# Copyright (C) 2006-2006 Kees Bakker
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -18,25 +18,11 @@
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
# $Id: check_po,v 1.1.2.6 2006/04/22 18:30:33 rshura Exp $
|
||||
# $Id:$
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
f = open('gramps.pot')
|
||||
template_total = 0
|
||||
for line in f.xreadlines():
|
||||
try:
|
||||
if (line.split()[0] == 'msgid'):
|
||||
template_total += 1
|
||||
except:
|
||||
pass
|
||||
f.close()
|
||||
|
||||
NONE = 0
|
||||
MSGID = 1
|
||||
MSGSTR = 2
|
||||
|
||||
all_total = {}
|
||||
all_fuzzy = {}
|
||||
all_untranslated = {}
|
||||
@ -47,170 +33,400 @@ all_context = {}
|
||||
all_coverage = {}
|
||||
all_template_coverage = {}
|
||||
|
||||
|
||||
def strip_quotes(st):
|
||||
if len(st.strip()) > 2:
|
||||
return st.strip()[1:-1]
|
||||
else:
|
||||
return ""
|
||||
st = st.strip()
|
||||
if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"':
|
||||
st = st.strip()[1:-1]
|
||||
return st
|
||||
|
||||
args = sys.argv
|
||||
while len(args) > 1:
|
||||
args = args[1:]
|
||||
class Msgid:
|
||||
fuzzy_pat = re.compile( 'fuzzy' )
|
||||
tips_xml_pat = re.compile( r'tips\.xml' )
|
||||
def __init__( self, lineno ):
|
||||
self._msgid = []
|
||||
self._msgstr = []
|
||||
self._cmnt = []
|
||||
self.lineno = lineno
|
||||
self.is_fuzzy = 0
|
||||
self.has_sfmt_mismatch = 0
|
||||
self.has_named_sfmt_mismatch = 0
|
||||
self.has_fmt_missing_sd = 0
|
||||
self.has_context_error = 0
|
||||
self.has_named_fmt_mismatch = 0
|
||||
self.has_xml_error = 0
|
||||
|
||||
f = open(args[0],"r")
|
||||
def diag( self ):
|
||||
if 0:
|
||||
print "lineno: %d" % self.lineno
|
||||
sys.stdout.write( ''.join( self._msgid ) )
|
||||
sys.stdout.write( ''.join( self._msgstr ) )
|
||||
else:
|
||||
# Compatible with the old check_po
|
||||
print "%d '%s' : '%s'" % ( self.lineno, self.msgid(), self.msgstr() )
|
||||
|
||||
mode = NONE
|
||||
fuzzy = False
|
||||
fuzzy_count = 0
|
||||
string_map = {}
|
||||
current_msgid = ""
|
||||
current_msgstr = ""
|
||||
def msgid( self ):
|
||||
if not self._msgid:
|
||||
return None
|
||||
txt = ''
|
||||
for l in self._msgid:
|
||||
l = re.sub( r'msgid\s+', '', l )
|
||||
l = strip_quotes( l )
|
||||
txt += l
|
||||
return txt
|
||||
|
||||
for line in f.xreadlines():
|
||||
data = line.split(None,1)
|
||||
if mode == NONE:
|
||||
if len(data) > 0 and data[0] == "msgid":
|
||||
mode = MSGID
|
||||
if len(data) > 1:
|
||||
current_msgid = strip_quotes(data[1])
|
||||
elif (len(data) > 0) and (data[0] == "#,") \
|
||||
and (data[1] == 'fuzzy\n'):
|
||||
fuzzy = True
|
||||
elif mode == MSGID:
|
||||
if data[0][0] == '"':
|
||||
current_msgid += strip_quotes(line)
|
||||
elif data[0] == "msgstr":
|
||||
mode = MSGSTR
|
||||
if len(data) > 1:
|
||||
current_msgstr = strip_quotes(data[1])
|
||||
elif mode == MSGSTR:
|
||||
if line == "" or line[0] == "#":
|
||||
mode = NONE
|
||||
if fuzzy:
|
||||
fuzzy = False
|
||||
fuzzy_count += 1
|
||||
else:
|
||||
string_map[current_msgid] = current_msgstr
|
||||
elif len(data) > 0 and data[0][0] == '"':
|
||||
current_msgstr += strip_quotes(line)
|
||||
def add_msgid( self, line ):
|
||||
self._msgid.append( line )
|
||||
|
||||
f.close()
|
||||
def msgstr( self ):
|
||||
if not self._msgstr:
|
||||
return None
|
||||
txt = ''
|
||||
for l in self._msgstr:
|
||||
l = re.sub( r'msgstr\s+', '', l )
|
||||
l = strip_quotes( l )
|
||||
txt += l
|
||||
return txt
|
||||
|
||||
named = re.compile('%\((\w+)\)\d*s')
|
||||
bnamed = re.compile('%\((\w+)\)\d*[^sd]')
|
||||
def add_msgstr( self, line ):
|
||||
self._msgstr.append( line )
|
||||
|
||||
total = len(string_map) + fuzzy_count
|
||||
untranslated = 0
|
||||
percent_s = 0
|
||||
percent_s_list = []
|
||||
named_s = 0
|
||||
named_s_list = []
|
||||
bnamed_s = 0
|
||||
bnamed_s_list = []
|
||||
context = 0
|
||||
context_list = []
|
||||
def add_cmnt( self, line ):
|
||||
self._cmnt.append( line )
|
||||
if not self.is_fuzzy and self.fuzzy_pat.search( line ):
|
||||
self.is_fuzzy = 1
|
||||
|
||||
for (msgid,msgstr) in string_map.items():
|
||||
if msgstr == "":
|
||||
untranslated += 1
|
||||
continue
|
||||
def is_tips_xml( self ):
|
||||
for c in self._cmnt:
|
||||
if self.tips_xml_pat.search( c ):
|
||||
return 1
|
||||
return 0
|
||||
|
||||
cnt1 = msgid.count('%s')
|
||||
cnt2 = msgstr.count('%s')
|
||||
if cnt1 != cnt2:
|
||||
percent_s += 1
|
||||
percent_s_list.append(msgid)
|
||||
def set_sfmt_mismatch( self ):
|
||||
self.has_sfmt_mismatch = 1
|
||||
|
||||
list1 = named.findall(msgid)
|
||||
list2 = named.findall(msgstr)
|
||||
if len(list1) != len(list2):
|
||||
percent_s += 1
|
||||
percent_s_list.append(msgid)
|
||||
def set_named_fmt_mismatch( self ):
|
||||
self.has_named_fmt_mismatch = 1
|
||||
|
||||
list1.sort()
|
||||
list2.sort()
|
||||
if list1 != list2:
|
||||
named_s += 1
|
||||
named_s_list.append(msgid)
|
||||
def set_fmt_missing_sd( self ):
|
||||
self.has_fmt_missing_sd = 1
|
||||
|
||||
match = bnamed.match(msgstr)
|
||||
if match:
|
||||
bnamed_s +=1
|
||||
bnamed_s_list.append(msgstr)
|
||||
def set_context_error( self ):
|
||||
self.has_context_error = 1
|
||||
|
||||
has_context1 = (msgid.count('|') > 0)
|
||||
has_context2 = (msgstr.count('|') > 0)
|
||||
if has_context1 and has_context2 and (msgid != msgstr):
|
||||
context += 1
|
||||
context_list.append(msgid)
|
||||
def set_named_fmt_mismatch( self ):
|
||||
self.has_named_fmt_mismatch = 1
|
||||
|
||||
def set_xml_error( self ):
|
||||
self.has_xml_error = 1
|
||||
|
||||
coverage = (1.0 - (float(untranslated)/float(total))) * 100
|
||||
template_coverage = coverage * float(total) / float(template_total)
|
||||
def read_msgs( fname ):
|
||||
empty_pat = re.compile( r'^ \s* $', re.VERBOSE )
|
||||
comment_pat = re.compile( r'\#', re.VERBOSE )
|
||||
msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE )
|
||||
msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE )
|
||||
str_pat = re.compile( r'"', re.VERBOSE )
|
||||
old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE )
|
||||
|
||||
print "File: %s" % args[0]
|
||||
print "Template total: %d" % template_total
|
||||
print "PO total: %d" % total
|
||||
all_total[args[0]] = total
|
||||
print "Fuzzy: %d" % fuzzy_count
|
||||
all_fuzzy[args[0]] = fuzzy_count
|
||||
print "Untranslated: %d" % untranslated
|
||||
all_untranslated[args[0]] = untranslated
|
||||
print "%%s mismatches: %d" % percent_s
|
||||
all_percent_s[args[0]] = percent_s
|
||||
print "%%()s mismatches: %d" % named_s
|
||||
all_named_s[args[0]] = named_s
|
||||
print "%%() missing s/d: %d" % bnamed_s
|
||||
all_bnamed_s[args[0]] = bnamed_s
|
||||
print "Runaway context: %d" % context
|
||||
all_context[args[0]] = context
|
||||
print "PO Coverage: %5.2f%%" % coverage
|
||||
all_coverage[args[0]] = coverage
|
||||
print "Template Coverage: %5.2f%%" % template_coverage
|
||||
all_template_coverage[args[0]] = coverage
|
||||
f = open( fname )
|
||||
lines = f.readlines()
|
||||
|
||||
if percent_s:
|
||||
print "\n-------- %s mismatches --------------"
|
||||
for i in percent_s_list:
|
||||
print "'%s' : '%s'" % (i, string_map[i])
|
||||
# parse it like a statemachine
|
||||
NONE = 0 # Nothing detected, yet
|
||||
CMNT = 1 # Inside comment part
|
||||
MSGID = 2 # Inside msgid part
|
||||
MSGSTR = 3 # Inside msgstr part
|
||||
STR = 4 # A continuation string
|
||||
OLD = 5 # An old pattern with #~
|
||||
|
||||
if named_s:
|
||||
print "\n-------- %()s mismatches ------------"
|
||||
for i in named_s_list:
|
||||
print "'%s' : '%s'" % (i, string_map[i])
|
||||
state = NONE
|
||||
msg = None
|
||||
msgs = []
|
||||
|
||||
if bnamed_s:
|
||||
print "\n-------- %() missing s or d ---------"
|
||||
for i in bnamed_s_list:
|
||||
print "'%s' : '%s'" % (i, string_map[i])
|
||||
for ix in range( len(lines) ): # Use line numbers for messages
|
||||
line = lines[ix]
|
||||
lineno = ix + 1
|
||||
|
||||
if context:
|
||||
print "\n-------- Runaway context in translation ---------"
|
||||
for i in context_list:
|
||||
print "'%s' : '%s'" % (i, string_map[i])
|
||||
print ""
|
||||
m = empty_pat.match( line )
|
||||
if m:
|
||||
continue # Empty lines are not interesting
|
||||
|
||||
# What's the next state?
|
||||
if old_pat.match( line ):
|
||||
next_state = OLD
|
||||
elif comment_pat.match( line ):
|
||||
next_state = CMNT
|
||||
elif msgid_pat.match( line ):
|
||||
next_state = MSGID
|
||||
elif msgstr_pat.match( line ):
|
||||
next_state = MSGSTR
|
||||
elif str_pat.match( line ):
|
||||
next_state = STR
|
||||
else:
|
||||
next_state = NONE
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
print "\n\nFile \tTotal \tFuzzy \tUntranslated \t%s mismatch \t%()s mismatch \tmissing s/d \tcontext \tCoverage"
|
||||
for pofile in sys.argv[1:]:
|
||||
print "%s \t%5d \t%7d \t%7d \t%7d \t%7d \t%7d \t%7d \t%3.2f%% \t%3.2f%%" %\
|
||||
(pofile,
|
||||
all_total[pofile],
|
||||
all_fuzzy[pofile],
|
||||
all_untranslated[pofile],
|
||||
all_percent_s[pofile],
|
||||
all_named_s[pofile],
|
||||
all_bnamed_s[pofile],
|
||||
all_context[pofile],
|
||||
all_coverage[pofile],
|
||||
all_template_coverage[pofile]
|
||||
)
|
||||
#print "%(state)d->%(next_state)d\t%(line)s" % vars()
|
||||
if state == NONE:
|
||||
# expect msgid or comment or old stuff
|
||||
if next_state == CMNT:
|
||||
state = CMNT
|
||||
msg = Msgid( lineno ) # Start with an empty new item
|
||||
msgs.append( msg )
|
||||
msg.add_cmnt( line )
|
||||
|
||||
f = open("used_strings.txt","w")
|
||||
keys = string_map.keys()
|
||||
keys.sort()
|
||||
for i in keys:
|
||||
f.write(i + "\n")
|
||||
f.close()
|
||||
elif next_state == MSGID:
|
||||
state = MSGID
|
||||
msg = Msgid( lineno ) # Start with an empty new item
|
||||
msgs.append( msg )
|
||||
msg.add_msgid( line )
|
||||
|
||||
elif next_state == MSGSTR:
|
||||
print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
|
||||
state = MSGSTR
|
||||
msg = Msgid( lineno ) # Start with an empty new item
|
||||
msgs.append( msg )
|
||||
msg.add_msgstr( line )
|
||||
|
||||
elif next_state == STR:
|
||||
print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
|
||||
|
||||
elif next_state == OLD:
|
||||
pass # Just skip
|
||||
|
||||
elif state == CMNT:
|
||||
if next_state == CMNT:
|
||||
if msg:
|
||||
msg.add_cmnt( line )
|
||||
else:
|
||||
# Note. We may need to do something about these comments
|
||||
# Skip for now
|
||||
pass
|
||||
|
||||
elif next_state == MSGID:
|
||||
state = MSGID
|
||||
if not msg:
|
||||
msg = Msgid( lineno ) # Start with an empty new item
|
||||
msgs.append( msg )
|
||||
msg.add_msgid( line )
|
||||
|
||||
elif next_state == MSGSTR:
|
||||
print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
|
||||
state = MSGSTR
|
||||
msg = Msgid( lineno ) # Start with an empty new item
|
||||
msgs.append( msg )
|
||||
msg.add_msgstr( line )
|
||||
|
||||
elif next_state == STR:
|
||||
print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
|
||||
|
||||
elif next_state == OLD:
|
||||
msg = None
|
||||
pass # Just skip
|
||||
|
||||
elif state == MSGID:
|
||||
if next_state == CMNT:
|
||||
# Hmmm. A comment here?
|
||||
print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars()
|
||||
|
||||
elif next_state == MSGID:
|
||||
raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() )
|
||||
|
||||
elif next_state == MSGSTR:
|
||||
state = MSGSTR
|
||||
msg.add_msgstr( line )
|
||||
|
||||
elif next_state == STR:
|
||||
msg.add_msgid( line )
|
||||
|
||||
elif next_state == OLD:
|
||||
msg = None
|
||||
pass # Just skip
|
||||
|
||||
elif state == MSGSTR:
|
||||
if next_state == CMNT:
|
||||
# A comment probably starts a new item
|
||||
state = CMNT
|
||||
msg = Msgid( lineno )
|
||||
msgs.append( msg )
|
||||
msg.add_cmnt( line )
|
||||
|
||||
elif next_state == MSGID:
|
||||
state = MSGID
|
||||
msg = Msgid( lineno )
|
||||
msgs.append( msg )
|
||||
msg.add_msgid( line )
|
||||
|
||||
elif next_state == MSGSTR:
|
||||
raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() )
|
||||
|
||||
elif next_state == STR:
|
||||
msg.add_msgstr( line )
|
||||
|
||||
elif next_state == OLD:
|
||||
msg = None
|
||||
pass # Just skip
|
||||
|
||||
else:
|
||||
raise Exception( 'Unexpected state in po parsing (state = %d)' % state )
|
||||
|
||||
# Strip items with just comments. (Can this happen?)
|
||||
msgs1 = []
|
||||
for m in msgs:
|
||||
if not m.msgid() and not m.msgstr():
|
||||
#print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno )
|
||||
pass
|
||||
else:
|
||||
msgs1.append( m )
|
||||
msgs = msgs1
|
||||
return msgs
|
||||
|
||||
def analyze_msgs( fname, msgs, nr_templates = None, nth = 0 ):
|
||||
nr_fuzzy = 0
|
||||
nr_untranslated = 0
|
||||
nr_sfmt_mismatches = 0
|
||||
nr_named_fmt_mismatches = 0
|
||||
nr_fmt_missing_sd = 0
|
||||
nr_context_errors = 0
|
||||
nr_xml_errors = 0
|
||||
|
||||
# A pattern to find %() without s or d
|
||||
# Here is a command to use for testing
|
||||
# print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' )
|
||||
find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE)
|
||||
|
||||
# A pattern to find all %()
|
||||
find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE)
|
||||
|
||||
# Special XML characters
|
||||
# It is not allowed to have a quote, an ampersand or an angle bracket
|
||||
xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE )
|
||||
|
||||
for msg in msgs:
|
||||
msgid = msg.msgid()
|
||||
msgstr = msg.msgstr()
|
||||
#print
|
||||
#print "msgid: %(msgid)s" % vars()
|
||||
#print "msgstr: %(msgstr)s" % vars()
|
||||
|
||||
if not msgstr:
|
||||
nr_untranslated += 1
|
||||
continue
|
||||
|
||||
if msg.is_fuzzy:
|
||||
nr_fuzzy += 1
|
||||
# Skip fuzzies or not?
|
||||
# continue
|
||||
|
||||
cnt1 = msgid.count('%s')
|
||||
cnt2 = msgstr.count('%s')
|
||||
if cnt1 != cnt2:
|
||||
nr_sfmt_mismatches += 1
|
||||
msg.set_sfmt_mismatch()
|
||||
|
||||
# Same number of named formats?
|
||||
fmts1 = find_named_fmt_pat.findall( msgid )
|
||||
fmts2 = find_named_fmt_pat.findall( msgstr )
|
||||
if len( fmts1 ) != len( fmts2 ):
|
||||
if not msg.has_sfmt_mismatch:
|
||||
nr_sfmt_mismatches += 1
|
||||
msg.set_sfmt_mismatch()
|
||||
|
||||
# Do we have the same named formats?
|
||||
fmts1.sort()
|
||||
fmts2.sort()
|
||||
if fmts1 != fmts2:
|
||||
nr_named_fmt_mismatches += 1
|
||||
msg.set_named_fmt_mismatch()
|
||||
|
||||
# Any formats missing format letter?
|
||||
fmts = find_named_fmt_pat2.findall( msgstr )
|
||||
for f in fmts:
|
||||
if not f in ('s', 'd'):
|
||||
nr_fmt_missing_sd += 1
|
||||
msg.set_fmt_missing_sd()
|
||||
break
|
||||
|
||||
# Runaway context. In the translated part we only to see
|
||||
# the translation of the word after the |
|
||||
if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr:
|
||||
nr_context_errors += 1
|
||||
msg.set_context_error()
|
||||
|
||||
# XML errors
|
||||
# Only look at messages in the tips.xml
|
||||
if msg.is_tips_xml():
|
||||
if xml_chars_pat.search( msgstr ):
|
||||
nr_xml_errors += 1
|
||||
msg.set_xml_error()
|
||||
|
||||
nr_msgs = len(msgs)
|
||||
if nth > 0:
|
||||
print
|
||||
print "====================================="
|
||||
print "%-20s%s" % ( "File:", fname )
|
||||
print "%-20s%d" % ( "Template total:", nr_templates )
|
||||
print "%-20s%d" % ( "PO total:", nr_msgs )
|
||||
print "%-20s%d" % ( "Fuzzy:", nr_fuzzy )
|
||||
print "%-20s%d" % ( "Untranslated:", nr_untranslated )
|
||||
print "%-20s%d" % ( "%s mismatches:", nr_sfmt_mismatches )
|
||||
print "%-20s%d" % ( "%() name mismatches:", nr_named_fmt_mismatches )
|
||||
print "%-20s%d" % ( "%() missing s/d:", nr_fmt_missing_sd )
|
||||
print "%-20s%d" % ( "Runaway context:", nr_context_errors )
|
||||
print "%-20s%d" % ( "XML special chars:", nr_xml_errors )
|
||||
|
||||
po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100
|
||||
print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage )
|
||||
|
||||
template_coverage = po_coverage * float(nr_msgs) / float(nr_templates)
|
||||
print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage )
|
||||
|
||||
if nr_sfmt_mismatches:
|
||||
print
|
||||
print "-------- %s mismatches --------------"
|
||||
for m in msgs:
|
||||
if m.has_sfmt_mismatch:
|
||||
m.diag()
|
||||
|
||||
if nr_named_fmt_mismatches:
|
||||
print
|
||||
print "-------- %() name mismatches --------------"
|
||||
for m in msgs:
|
||||
if m.has_named_fmt_mismatch:
|
||||
m.diag()
|
||||
|
||||
if nr_fmt_missing_sd:
|
||||
print
|
||||
print "-------- %() without 's' or 'd' mismatches --------------"
|
||||
for m in msgs:
|
||||
if m.has_fmt_missing_sd:
|
||||
m.diag()
|
||||
|
||||
if nr_context_errors:
|
||||
print
|
||||
print "-------- Runaway context in translation ---------"
|
||||
for m in msgs:
|
||||
if m.has_context_error:
|
||||
m.diag()
|
||||
|
||||
if nr_xml_errors:
|
||||
print
|
||||
print "-------- unescaped XML special characters ---------"
|
||||
for m in msgs:
|
||||
if m.has_xml_error:
|
||||
m.diag()
|
||||
|
||||
def main():
|
||||
try:
|
||||
pot_msgs = read_msgs( 'gramps.pot' )
|
||||
nr_templates = len( pot_msgs )
|
||||
#analyze_msgs( 'gramps.pot', pot_msgs )
|
||||
nth = 0
|
||||
for fname in sys.argv[1:]:
|
||||
msgs = read_msgs( fname )
|
||||
analyze_msgs( fname, msgs, nr_templates, nth )
|
||||
nth += 1
|
||||
|
||||
except Exception, e:
|
||||
print e
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user