Revert to using the good old postats.sh instead of the brand new postats.py.

Jose', for information the problems are:

* postats.py does not mangle e-mail addresses

* postats.py does not work in non-"C" locale

Another difference (is it a problem?) is that postats.py uses explicit 
codepoints instead of HTML entities for accented characters.


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/branches/BRANCH_1_5_X@20686 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Jean-Marc Lasgouttes 2007-10-03 08:36:50 +00:00
parent 8ed6e1178a
commit afb0a48dc1
3 changed files with 207 additions and 139 deletions

View File

@ -53,7 +53,7 @@ DISTFILES = $(DISTFILES.common) Makevars POTFILES.in \
$(POFILES) $(GMOFILES) \
$(DISTFILES.extra1) $(DISTFILES.extra2) $(DISTFILES.extra3)
DISTFILES.extra1 = lyx_pot.py postats.py pocheck.pl
DISTFILES.extra1 = lyx_pot.py postats.sh pocheck.pl
POTFILE_IN_DEPS = $(shell find $(top_srcdir)/src -name Makefile.am)
@ -413,8 +413,8 @@ ui_l10n.pot: $(top_srcdir)/lib/ui/*.ui $(top_srcdir)/lib/ui/*.inc
LC_ALL=C ; export LC_ALL ; \
python $(srcdir)/lyx_pot.py -b $(top_srcdir) -o $@ -t ui ${top_srcdir}/lib/ui/*.ui ${top_srcdir}/lib/ui/*.inc
i18n.inc: $(POFILES) postats.py
(cd $(srcdir) ; python postats.py $(POFILES)) >$@
i18n.inc: $(POFILES) postats.sh
(cd $(srcdir) ; ./postats.sh $(POFILES)) >$@
external_l10n.pot: $(top_srcdir)/lib/external_templates
python $(srcdir)/lyx_pot.py -b $(top_srcdir) -o $@ -t external ${top_srcdir}/lib/external_templates

View File

@ -1,136 +0,0 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2007 Michael Gerz <michael.gerz@teststep.org>
# Copyright (C) 2007 José Matos <jamatos@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
This script extracts some information from the po file headers (last
translator, revision date), generates the corresponding gmo files
to retrieve the number of translated/fuzzy/untranslated messages,
and generates a PHP web page.
Invocation:
postats.py po_files > "pathToWebPages"/i18n.inc
"""
# modify this when you change version
# Note that an empty lyx_branch variable (ie svn trunk)
# will "do the right thing".
lyx_version="1.5.2svn"
lyx_branch="BRANCH_1_5_X"
import os
import sys
def extract_number(line, issues, prop):
"""
line is a string like
'588 translated messages, 1248 fuzzy translations, 2 untranslated messages.'
Any one of these substrings may not appear if the associated number is 0.
issues is the set of words following the number to be extracted,
ie, 'translated', 'fuzzy', or 'untranslated'.
extract_number returns a list with those numbers, or sets it to
zero if the word is not found in the string.
"""
for issue in issues:
i = line.find(issue)
if i == -1:
prop[issue] = 0
else:
prop[issue] = int(line[:i].split()[-1])
def read_pofile(pofile):
""" Read the header of the pofile and return it as a dictionary"""
header = {}
read_header = False
for line in open(pofile):
line = line[:-1]
if line[:5] == 'msgid':
if read_header:
break
read_header = True
continue
if not line or line[0] == '#' or line == 'msgstr ""' or not read_header:
continue
line = line.strip('"')
args = line.split(': ')
if len(args) == 1:
continue
header[args[0]] = args[1].strip()[:-2]
return header
def run_msgfmt(pofile):
""" pofile is the name of the po file.
The function runs msgfmt on it and returns corresponding php code.
"""
if not pofile.endswith('.po'):
print >> sys.stderr, "%s is not a po file" % pofile
sys.exit(1)
dirname = os.path.dirname(pofile)
gmofile = pofile.replace('.po', '.gmo')
header = read_pofile(pofile)
charset= header['Content-Type'].split('charset=')[1]
# po file properties
prop = {}
prop["langcode"] = os.path.basename(pofile)[:-3]
prop["date"] = header['PO-Revision-Date'].split()[0]
prop["email"] = header['Last-Translator'].split('<')[1][:-1]
translator = header['Last-Translator'].split('<')[0].strip()
try:
prop["translator"] = translator.decode(charset).encode('ascii','xmlcharrefreplace')
except LookupError:
prop["translator"] = translator
p_in, p_out = os.popen4("msgfmt --statistics -o %s %s" % (gmofile, pofile))
extract_number(p_out.readline(),
('translated', 'fuzzy', 'untranslated'),
prop)
return """
array ( 'langcode' => '%(langcode)s', "date" => "%(date)s",
"msg_tr" => %(translated)d, "msg_fu" => %(fuzzy)d, "msg_nt" => %(untranslated)d,
"translator" => "%(translator)s", "email" => "%(email)s")""" % prop
if __name__ == "__main__":
if lyx_branch:
branch_tag = "branches/%s" % lyx_branch
else:
branch_tag = "trunk"
print """<?php
// The current version
$lyx_version = "%s";
// The branch tag
$branch_tag = "%s";
// The data itself
\$podata = array (%s
)?>
""" % (lyx_version, branch_tag, ",".join([run_msgfmt(po) for po in sys.argv[1:]]))

204
po/postats.sh Executable file
View File

@ -0,0 +1,204 @@
#! /bin/sh
# file postats.sh
#
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.
#
# author: Michael Gerz, michael.gerz@teststep.org
#
# This script extracts some information from the po file headers (last
# translator, revision date), generates the corresponding gmo files
# to retrieve the number of translated/fuzzy/untranslated messages,
# and generates a PHP web page.
#
# Invocation:
# postats.sh po_files > "pathToWebPages"/i18n.inc
# modify this when you change version
# Note that an empty lyx_branch variable (ie svn trunk)
# will "do the right thing".
lyx_version=1.5.2svn
lyx_branch=BRANCH_1_5_X
# GNU sed and grep have real problems dealing with 8-bit characters
# in UTF-8 encoded environments.
unset LANG
LANGUAGE=C
warning () {
echo $* 1>&2
}
error () {
warning $*
exit 1
}
# $1 is a string like
# '588 translated messages, 1248 fuzzy translations, 2 untranslated messages.'
# Any one of these substrings may not appear if the associated number is 0.
#
# $2 is the word following the number to be extracted,
# ie, 'translated', 'fuzzy', or 'untranslated'.
#
# extract_number fills var $number with this number, or sets it to zero if the
# word is not found in the string.
extract_number () {
test $# -eq 2 || error 'extract_number expects 2 args'
number=0
echo $1 | grep $2 >/dev/null || return
# It /is/ safe to use 'Z' as a delimiter here.
number=`echo $1 | sed "s/\([0-9]*\)[ ]*$2/Z\1Z/" | cut -d 'Z' -f 2`
}
# $template is used by run_msgfmt, below, to fill $output. The function extracts
# the appropriate values from the data.
template="array ( 'langcode' => 'LC',
\"msg_tr\" => TR, \"msg_fu\" => FU, \"msg_nt\" => NT,
\"translator\" => \"AUTHOR\", \"email\" => \"EMAIL\",
\"date\" => \"DATE\" )"
readonly template
# $1 is the name of the po file.
#
# The function runs msgfmt on it and fills var $output.
# All other variables created in the function are unset on exit.
run_msgfmt () {
test $# -eq 1 || error 'run_msgfmt expects 1 arg'
output=
test -f $1 || {
warning "File $1 does not exist"
return
}
origdir=`pwd`
dir=`dirname $1`
pofile=`basename $1`
gmofile=`echo $pofile | sed 's/po$/gmo/'`
test $pofile != '' -a $pofile != $gmofile || {
warning "File $1 is not a po file"
unset origdir dir pofile gmofile
return
}
cd $dir
unset dir
langcode=`echo $pofile | sed 's/\.po$//'`
# Searching for a string of the form
# '"PO-Revision-Date: 2003-01-18 03:00+0100\n"'
date=`grep 'Revision-Date' $pofile | sed 's/ */ /g' | cut -d ' ' -f 2`
# Searching for a string of the form
# '"Last-Translator: Michael Gerz <Michael.Gerz@teststep.org>\n"'
translator=
email=
input=`grep "Last-Translator" $pofile` && {
input=`echo $input | sed 's/ */ /g' | cut -d ' ' -f 2-`
translator=`echo $input | cut -d '<' -f 1 | sed 's/ *$//'`
email=`echo $input | cut -d '<' -f 2 | cut -d '>' -f 1 | sed -e 's/@/ () /' -e 's/\./ ! /g'`
}
unset input
# Does $translator contain 8-bit characters?
TAB=' '
echo $translator | grep "[^${TAB} -~]" >/dev/null && {
# If so, grab the encoding from the po file.
charset=`sed -n '/Content-Type/{s/.*charset=//;s/\\\\n" *$//p;q}' $pofile`
# Use recode to generate HTML character codes for the 8-bit
# characters.
translator=`echo $translator | recode "${charset}..h4"` || exit 1
# The ampersands in the $translator entries will mess things
# up unless we escape 'em.
translator=`echo $translator | sed 's/&/\\\&/g'`
}
# Run msgfmt on the pofile, filling $message with the raw info.
message=`$msgfmt --statistics -o $gmofile $pofile 2>&1 | grep "^[1-9]"` || {
warning "Unable to run msgfmt successfully on file $1"
cg $origdir
unset origdir pofile gmofile
return
}
unset pofile gmofile
extract_number "$message" 'translated'
translated=$number
extract_number "$message" 'fuzzy'
fuzzy=$number
extract_number "$message" 'untranslated'
untranslated=$number
unset message number
output=`echo "$template" | sed "s/LC/$langcode/; \
s/TR/$translated/; s/FU/$fuzzy/; s/NT/$untranslated/; \
s/AUTHOR/$translator/; s/EMAIL/$email/; s/DATE/$date/"`
unset langcode date translator email untranslated fuzzy translated
cd $origdir
unset origdir
}
# The head of the generated php file.
dump_head () {
test "$lyx_branch" = "" && {
branch_tag="trunk"
} || {
branch_tag="branches/$lyx_branch"
}
cat <<EOF
<?php
// The current version
\$lyx_version = "$lyx_version";
// The branch tag
\$branch_tag = "$branch_tag";
// The data itself
\$podata = array (
EOF
}
# The foot of the generated php file.
dump_tail () {
cat <<EOF
?>
EOF
}
# The main body of the script
msgfmt=`which msgfmt`
test $msgfmt != '' || error "Unable to find 'msgfmt'. Cannot proceed."
dump_head
while [ $# -ne 0 ]
do
run_msgfmt $1
shift
if [ $# -eq 0 ]; then
echo "${output});"
else
echo "${output},"
echo
fi
done
dump_tail
# The end