#!/usr/bin/python3 # file spreadsheet_to_docbook.py # This file is part of LyX, the document processor. # Licence details can be found in the file COPYING. # author Thibaut Cuvelier & Kornel Benko # Full author contact details are available in file CREDITS. """reformat output of ssconvert of a single spreadsheet to match the needs of docbook5 table format . Expects to read from file specified by sys.argv[1] and output to to file specified by sys.argv[2] """ import re import sys import subprocess def process_file(contents): # Scrap the header and the footer. contents = contents.split("")[1] contents = contents.split("")[0] # Gnumeric may generate more than one table, just take the first one. contents = contents.split("")[0] + "\n" # Convert the rest of the table to DocBook. contents = contents.replace("

", "") contents = contents.replace("", "") contents = contents.replace("", "") contents = contents.replace("", "") contents = contents.replace("", "") contents = contents.replace("", "") contents = contents.replace("", "") contents = re.sub(r"", "", contents) assert '", "") # Generates invalid XML if there are still font tags left... # If the table has a caption, then the right tag is . Otherwise, it's . if '
' not in contents: contents = contents.replace("", "") # Return the processed string. contents = contents.replace("\n\n", "\n") return contents if __name__ == "__main__": if len(sys.argv) == 1: # Read from stdin, output to stdout. contents = sys.stdin.read() f = sys.stdout else: # Read from output of ssconvert assert len(sys.argv) == 3 # Script name, file to process, output file. proc = subprocess.Popen(["ssconvert", "--export-type=Gnumeric_html:xhtml", sys.argv[1], "fd://1"], stdout=subprocess.PIPE) f = open(sys.argv[2], 'w') sys.stdout = f # Redirect stdout to the output file. contents = proc.stdout.read() # Process and output to stdout. print(process_file(contents)) f.close() exit(0)