csv2lyx.py: new csv2lyx version by Hartmut and José

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@24818 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Uwe Stöhr 2008-05-18 19:08:23 +00:00
parent 69dd2ac3a7
commit 58220e118d

View File

@ -6,70 +6,100 @@
# Licence details can be found in the file COPYING.
# author Hartmut Haase
# author José Matos
# Full author contact details are available in file CREDITS
# This script reads a csv-table (file name.csv) and converts it into
# a LyX-table for versions 1.5.0 and higher (LyX table format 276).
# It uses Python's csv module for parsing.
# The original csv2lyx was witten by Antonio Gulino <antonio.gulino@tin.it>
# in Perl for LyX 1.x and modified for LyX table format 276 by the author.
#
import os, re, string, sys, unicodedata
import csv, unicodedata
import os, sys
import optparse
def error(message):
sys.stderr.write(message + '\n')
sys.exit(1)
# processing command line options
if len(sys.argv) == 1 or sys.argv[1] == '--help':
print '''Usage:
csv2lyx [options] mycsvfile mytmptable.lyx
# delegate this to standard module optparse
args = {}
args["usage"] = "Usage: csv2lyx [options] mycsvfile mytmptable.lyx"
This script creates a LyX document containing a table
args["description"] = """This script creates a LyX document containing a table
from a comma-separated-value file. The LyX file has format 276
and can be opened with LyX 1.5.0 and newer.
"""
parser = optparse.OptionParser(**args)
Options:
-s separator column separator, default is Tab
--help usage instructions
parser.set_defaults(excel = 'n', column_sep = 'n', guess_sep = False)
parser.add_option("-e", "--excel",
help="""'character' Excel type, default is 'n'
'e': Excel-generated CSV file
't': Excel-generated TAB-delimited CSV file""")
parser.add_option("-s", "--separator", dest="column_sep",
help= "column separator, default is ','")
parser.add_option("-g", "--guess-sep", action="store_true",
help = "guess the columns separator")
Remarks:
If your .csv file contains special characters (e. g. umlauts,
group = optparse.OptionGroup(parser, "Remarks", """If your .csv file contains special characters (e. g. umlauts,
accented letters, etc.) make sure it is coded in UTF-8 (unicode).
Else LyX will loose some cell contents.'''
sys.exit(0)
Else LyX will loose some cell contents. If your .csv file was not written according to the "Common Format and MIME Type for Comma-Separated Values (CSV) Files" (http://tools.ietf.org/html/rfc4180) there may be unexpected results.""")
parser.add_option_group(group)
# print len(sys.argv), sys.argv
separator = '\t'
infile = ""
if len(sys.argv) == 3:
infile = sys.argv[1]
outfile = sys.argv[2]
elif len(sys.argv) == 5:
infile = sys.argv[3]
outfile = sys.argv[4]
if sys.argv[1] == '-s':
separator = sys.argv[2]
(options, args) = parser.parse_args()
# validate input
if len(args) == 1:
infile = args[0]
fout = sys.stdout
elif len(args) ==2:
infile = args[0]
fout = open(args[1], 'w')
else:
parser.print_help()
sys.exit(1)
if not os.path.exists(infile):
error('File "%s" not found.' % infile)
# read input
finput = open(infile, 'r')
rowcontent = finput.readlines()
finput.close()
num_rows = len(rowcontent) # number of lines
# print 'num_rows ', num_rows
i = 0
num_cols = 1 # max columns
while i < num_rows:
# print len(rowcontent[i]), ' ', rowcontent[i]
num_cols = max(num_cols, rowcontent[i].count(separator) + 1)
i += 1
# print num_cols
fout = open(outfile, 'w')
dialects = {'n' : None, 'e' : 'excel', 't' : 'excel-tab'}
if options.excel not in dialects:
parser.print_help()
sys.exit(1)
dialect= dialects[options.excel]
# when no special column separator is given, try to detect it:
if options.column_sep == 'n':
options.guess_sep = 'True'
print options.column_sep, options.guess_sep
if options.guess_sep:
guesser = csv.Sniffer()
input_file = "".join(open(infile,'rb').readlines())
try:
dialect = guesser.sniff(input_file)
print 'found:', dialect.delimiter
reader = csv.reader(open(infile, "rb"), dialect= dialect)
except:
print 'error, using ,'
reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=',')
else:
reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=options.column_sep)
# read input
num_cols = 1 # max columns
rows = []
for row in reader:
num_cols = max(num_cols, len(row))
rows.append(row)
num_rows = reader.line_num # number of lines
# create a LyX file
#####################
# write first part
####################
@ -125,21 +155,31 @@ while i < num_cols:
j = 0
while j < num_rows:
fout.write('<row>\n')
row = str(rowcontent[j])
row = string.split(row,separator)
#print j, ': ' , row
num_cols_2 = len(rows[j]) # columns in current row
#print j, ': ' , rows[j]
############################
# write contents of one line
############################
i = 0
while i < num_cols:
while i < num_cols_2:
fout.write("""<cell alignment="left" valignment="top" usebox="none">
\\begin_inset Text
\\begin_layout Standard\n""")
fout.write(row[i].strip('\n'))
#print rows[j][i]
fout.write(rows[j][i])
fout.write('\n\\end_layout\n\n\\end_inset\n</cell>\n')
i += 1
# If row has less columns than num_cols
if num_cols_2 < num_cols:
while i < num_cols:
fout.write("""<cell alignment="left" valignment="top" usebox="none">
\\begin_inset Text
\\begin_layout Standard\n""")
fout.write(' ')
fout.write('\n\\end_layout\n\n\\end_inset\n</cell>\n')
i += 1
fout.write('</row>\n')
j += 1
#####################
@ -154,4 +194,5 @@ fout.write("""</lyxtabular>
\\end_body
\\end_document\n""")
# close the LyX file
fout.close()