2008-05-07 16:43:29 +00:00
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# file csv2lyx.py
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.
# author Hartmut Haase
2008-05-18 19:15:20 +00:00
# author José Matos
2008-05-07 16:43:29 +00:00
# Full author contact details are available in file CREDITS
# This script reads a csv-table (file name.csv) and converts it into
# a LyX-table for versions 1.5.0 and higher (LyX table format 276).
2008-05-18 19:15:20 +00:00
# It uses Python's csv module for parsing.
2008-05-07 16:43:29 +00:00
# The original csv2lyx was witten by Antonio Gulino <antonio.gulino@tin.it>
# in Perl for LyX 1.x and modified for LyX table format 276 by the author.
#
2008-05-18 19:15:20 +00:00
import csv , unicodedata
import os , sys
import optparse
2008-05-07 16:43:29 +00:00
def error ( message ) :
sys . stderr . write ( message + ' \n ' )
sys . exit ( 1 )
2008-05-18 20:43:57 +00:00
header = """ #csv2lyx created this file
\lyxformat 276
\\begin_document
\\begin_header
\\textclass article
\\inputencoding auto
\\font_roman default
\\font_sans default
\\font_typewriter default
\\font_default_family default
\\font_sc false
\\font_osf false
\\font_sf_scale 100
\\font_tt_scale 100
\\graphics default
\\paperfontsize default
\\papersize default
\\use_geometry false
\\use_amsmath 1
\\use_esint 0
\\cite_engine basic
\\use_bibtopic false
\\paperorientation portrait
\\secnumdepth 3
\\tocdepth 3
\\paragraph_separation indent
\\defskip medskip
\\papercolumns 1
\\papersides 1
\\paperpagestyle default
\\tracking_changes false
\\output_changes false
\\end_header
\\begin_body
\\begin_layout Standard
\\align left
\\begin_inset Tabular
< lyxtabular version = " 3 " rows = " %d " columns = " %d " >
< features >
"""
cell = """ <cell alignment= " left " valignment= " top " usebox= " none " >
\\begin_inset Text
\\begin_layout Standard
% s
\\end_layout
\\end_inset
< / cell > """
footer = """ </lyxtabular>
\\end_inset
\\end_layout
\\end_body
\\end_document
"""
2008-05-07 16:43:29 +00:00
# processing command line options
2008-05-18 19:15:20 +00:00
# delegate this to standard module optparse
args = { }
2008-05-18 20:43:57 +00:00
args [ " usage " ] = " Usage: csv2lyx [options] csvfile [file.lyx] "
2008-05-07 16:43:29 +00:00
2008-05-18 20:43:57 +00:00
args [ " description " ] = """ This script creates a LyX document containing a table created from a
comma - separated - value ( CSV ) file . The resulting LyX file can be opened
with LyX 1.5 .0 or any later version .
If no options are given csv2lyx will try to infer the CSV type of the csvfile ,
2008-05-18 19:15:20 +00:00
"""
parser = optparse . OptionParser ( * * args )
2008-09-18 18:01:41 +00:00
parser . set_defaults ( excel = ' ' , column_sep = ' ' )
parser . add_option ( " -e " , " --excel " , metavar = " CHAR " ,
help = """ CHAR corresponds to a CSV type:
2008-05-18 19:15:20 +00:00
' e ' : Excel - generated CSV file
' t ' : Excel - generated TAB - delimited CSV file """ )
2008-09-18 18:01:41 +00:00
parser . add_option ( " -s " , " --separator " , dest = " column_sep " ,
help = """ column separator
2008-05-18 20:43:57 +00:00
' t ' means Tab """ )
2008-05-18 19:15:20 +00:00
2008-05-18 20:43:57 +00:00
group = optparse . OptionGroup ( parser , " Remarks " , """ If your CSV file contains special characters (e. g. umlauts,
2008-05-07 16:43:29 +00:00
accented letters , etc . ) make sure it is coded in UTF - 8 ( unicode ) .
2008-05-18 20:43:57 +00:00
Else LyX will loose some cell contents . If your CSV file was not written according to the " Common Format and MIME Type for Comma-Separated Values (CSV) Files " ( http : / / tools . ietf . org / html / rfc4180 ) there may be unexpected results . """ )
2008-05-18 19:15:20 +00:00
parser . add_option_group ( group )
( options , args ) = parser . parse_args ( )
# validate input
if len ( args ) == 1 :
infile = args [ 0 ]
fout = sys . stdout
2008-09-18 18:01:41 +00:00
elif len ( args ) == 2 :
2008-05-18 19:15:20 +00:00
infile = args [ 0 ]
fout = open ( args [ 1 ] , ' w ' )
else :
parser . print_help ( )
sys . exit ( 1 )
2008-05-07 16:43:29 +00:00
if not os . path . exists ( infile ) :
error ( ' File " %s " not found. ' % infile )
2008-05-18 19:15:20 +00:00
2008-05-18 20:43:57 +00:00
dialects = { ' ' : None , ' e ' : ' excel ' , ' t ' : ' excel-tab ' }
2008-05-18 19:15:20 +00:00
if options . excel not in dialects :
parser . print_help ( )
sys . exit ( 1 )
2008-09-18 18:01:41 +00:00
dialect = dialects [ options . excel ]
2008-05-18 19:15:20 +00:00
2008-05-18 20:43:57 +00:00
# Set Tab, if necessary
if options . column_sep == ' t ' :
options . column_sep = " \t "
2008-05-18 19:15:20 +00:00
# when no special column separator is given, try to detect it:
2008-09-18 18:01:41 +00:00
if options . column_sep and dialect :
reader = csv . reader ( open ( infile , " rb " ) , dialect = dialect , delimiter = options . column_sep )
2008-05-18 20:43:57 +00:00
else :
2008-05-18 19:15:20 +00:00
guesser = csv . Sniffer ( )
input_file = " " . join ( open ( infile , ' rb ' ) . readlines ( ) )
try :
dialect = guesser . sniff ( input_file )
2008-09-18 18:01:41 +00:00
reader = csv . reader ( open ( infile , " rb " ) , dialect = dialect )
2008-05-18 19:15:20 +00:00
except :
2008-09-19 12:03:14 +00:00
# older versions (python < 2.5) of csv have problems (bugs)
# that is why we try harder to get a result, this should work on most cases
# as it assumes that the separator is a comma (the c in csv :-) )
try :
2008-09-18 18:01:41 +00:00
reader = csv . reader ( open ( infile , " rb " ) , dialect = dialect , delimiter = ' , ' )
2008-09-19 12:03:14 +00:00
except :
2008-09-18 18:01:41 +00:00
reader = csv . reader ( open ( infile , " rb " ) , delimiter = ' , ' )
2008-05-18 19:15:20 +00:00
2008-05-07 16:43:29 +00:00
# read input
num_cols = 1 # max columns
2008-05-18 19:15:20 +00:00
rows = [ ]
for row in reader :
num_cols = max ( num_cols , len ( row ) )
rows . append ( row )
2008-09-18 18:01:41 +00:00
num_rows = len ( rows ) # number of lines
2008-05-07 16:43:29 +00:00
2008-05-18 19:15:20 +00:00
# create a LyX file
2008-05-07 16:43:29 +00:00
#####################
# write first part
####################
2008-05-18 20:43:57 +00:00
fout . write ( header % ( num_rows , num_cols ) )
2008-05-07 16:43:29 +00:00
#####################
# write table
####################
2008-05-18 20:43:57 +00:00
for i in range ( num_cols ) :
2008-05-07 16:43:29 +00:00
fout . write ( ' <column alignment= " left " valignment= " top " width= " 0pt " > \n ' )
2008-05-18 20:43:57 +00:00
for j in range ( num_rows ) :
row = [ ' <row> ' ]
2008-05-18 19:15:20 +00:00
2008-05-18 20:43:57 +00:00
############################
# write contents of one line
############################
for i in range ( len ( rows [ j ] ) ) :
2008-05-20 11:23:53 +00:00
row . append ( cell % rows [ j ] [ i ] . replace ( ' \\ ' , ' \\ backslash \n ' ) )
2008-05-07 16:43:29 +00:00
2008-05-18 20:43:57 +00:00
# If row has less columns than num_cols fill with blank entries
for i in range ( len ( rows [ j ] ) , num_cols ) :
row . append ( cell % " " )
2008-05-07 16:43:29 +00:00
2008-05-18 20:43:57 +00:00
fout . write ( " \n " . join ( row ) + ' \n </row> \n ' )
2008-05-07 16:43:29 +00:00
2008-05-18 20:43:57 +00:00
#####################
# write last part
####################
fout . write ( footer )
2008-05-18 19:15:20 +00:00
# close the LyX file
2008-05-07 16:43:29 +00:00
fout . close ( )