Enable handling of spreadsheets in export to docbook5 format.

In cooperation with Thibaut Cuvelier: lib/scripts/spreadsheet_to_docbook.py: Strip the document header and convert some flags lib/xtemplates/gnumeric.xtemplate: use this output to be inserted in docbook5 lib/configure.py: Add needed conversion entries
2024-11-21 17:51:03 +00:00 · 2020-10-04 01:43:44 +02:00 · 2020-10-04 01:43:44 +02:00 · 31d64c7395
commit 31d64c7395
parent 14b0da28c2
4 changed files with 82 additions and 4 deletions
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@ -2495,6 +2495,7 @@ dist_scripts_DATA = \
 	scripts/lyxknitr.R \
 	scripts/lyxstangle.R \
 	scripts/lyxsweave.R
 # We use DATA now instead of PYTHON because automake 1.11.2 complains.
 # Note that we "chmod 755" manually these files in install-data-hook.
 dist_scripts_DATA += \
@ -2524,6 +2525,7 @@ dist_scripts_DATA += \
 	scripts/prefs2prefs_lfuns.py \
 	scripts/prefs2prefs_prefs.py \
 	scripts/prefTest.pl.in \
 	scripts/spreadsheet_to_docbook.py \
 	scripts/tex_copy.py \
 	scripts/TeXFiles.py
--- a/lib/configure.py
+++ b/lib/configure.py
@ -751,7 +751,7 @@ def checkFormatEntries(dtl_tools):
 \Format pdf6       pdf    "PDF (graphics)"        "" "%%"	""	"vector"	"application/pdf"
 \Format pdf7       pdf    "PDF (cropped)"         "" "%%"	""	"document,vector"	""
 \Format pdf8       pdf    "PDF (lower resolution)"         "" "%%"	""	"document,vector"	""
-\Format pdf9       pdf    "PDF (docbook)"         "" "%%"       ""      "document,vector,menu=export"   ""'''])
+\Format pdf9       pdf    "PDF (docbook)"         "" "%%"	""	"document,vector,menu=export"	""'''])
    #
    checkViewer('a DVI previewer', ['xdvi', 'kdvi', 'okular',
                                    'evince', 'xreader',
@ -932,8 +932,8 @@ def checkConverterEntries():
    checkProg('an Open Document (Pandoc) -> LaTeX converter', ['pandoc -s -f odt -o $$o -t latex $$i'],
        rc_entry = [ r'\converter odt3        latex      "%%"	""' ])
    #
-    checkProg('DocBook converter -> PDF (docbook)', ['pandoc -f docbook -t latex --latex-engine=lualatex --toc -o $$o $$i'],
+    checkProg('DocBook converter -> PDF (docbook)', ['pandoc -f docbook -t latex --latex-engine=lualatex --toc --template=$$s/xtemplates/lyx.latex -o $$o $$i'],
-        rc_entry = [ r'\converter docbook5      pdf9      "%%" ""' ])
+        rc_entry = [ r'\converter docbook5      pdf9      "%%"	""' ])
    #
    checkProg('a MS Word Office Open XML converter -> LaTeX', ['pandoc -s -f docx -o $$o -t latex $$i'],
        rc_entry = [ r'\converter word2      latex      "%%"	""' ])
@ -1176,6 +1176,10 @@ def checkConverterEntries():
 \converter oocalc html_table "ssconvert --export-type=Gnumeric_html:html40frag $$i $$o" ""
 \converter excel  html_table "ssconvert --export-type=Gnumeric_html:html40frag $$i $$o" ""
 \converter excel2 html_table "ssconvert --export-type=Gnumeric_html:html40frag $$i $$o" ""
 \converter gnumeric xhtml_table "python $$s/scripts/spreadsheet_to_docbook.py $$i $$o" ""
 \converter oocalc xhtml_table "python $$s/scripts/spreadsheet_to_docbook.py $$i $$o" ""
 \converter excel  xhtml_table "python $$s/scripts/spreadsheet_to_docbook.py $$i $$o" ""
 \converter excel2 xhtml_table "python $$s/scripts/spreadsheet_to_docbook.py $$i $$o" ""
 '''])
    path, lilypond = checkProg('a LilyPond -> EPS/PDF/PNG converter', ['lilypond'])
--- a/lib/scripts/spreadsheet_to_docbook.py
+++ b/lib/scripts/spreadsheet_to_docbook.py
@ -0,0 +1,70 @@
 #!/usr/bin/python3
 # file spreadsheet_to_docbook.py
 # This file is part of LyX, the document processor.
 # Licence details can be found in the file COPYING.
 # author Thibaut Cuvelier & Kornel Benko
 # Full author contact details are available in file CREDITS.
 """reformat output of ssconvert of a single spreadsheet to match the needs
 of docbook5 table format .
 Expects to read from file specified by sys.argv[1]
 and output to to file specified by sys.argv[2]
 """
 import re
 import sys
 import subprocess
 def process_file(contents):
    # Scrap the header and the footer.
    contents = contents.split("<body>")[1]
    contents = contents.split("</body>")[0]
    # Gnumeric may generate more than one table, just take the first one.
    contents = contents.split("</table>")[0] + "\n</table>"
    # Convert the rest of the table to DocBook.
    contents = contents.replace("<p></p>", "")
    contents = contents.replace("<i>", "<emphasis>")
    contents = contents.replace("</i>", "</emphasis>")
    contents = contents.replace("<b>", "<emphasis role='bold'>")
    contents = contents.replace("</b>", "</emphasis>")
    contents = contents.replace("<u>", "<emphasis role='underline'>")
    contents = contents.replace("</u>", "</emphasis>")
    contents = re.sub(r"<font color=\"(.*)\">", "<phrase role='color \\1'>", contents)
    assert '<font' not in contents  # If this happens, implement something to catch these cases.
    contents = contents.replace("</font>", "</phrase>")  # Generates invalid XML if there are still font tags left...
    # If the table has a caption, then the right tag is <table>. Otherwise, it's <informaltable>.
    if '<caption>' not in contents:
        contents = contents.replace("<table", "<informaltable")
        contents = contents.replace("</table>", "</informaltable>")
    # Return the processed string.
    contents = contents.replace("\n\n", "\n")
    return contents
 if __name__ == "__main__":
    if len(sys.argv) == 1:
        # Read from stdin, output to stdout.
        contents = sys.stdin.read()
        f = sys.stdout
    else:
        # Read from output of ssconvert
        assert len(sys.argv) == 3  # Script name, file to process, output file.
        proc = subprocess.Popen(["ssconvert", "--export-type=Gnumeric_html:xhtml", sys.argv[1], "fd://1"], stdout=subprocess.PIPE)
        f = open(sys.argv[2], 'w')
        sys.stdout = f  # Redirect stdout to the output file.
        contents = proc.stdout.read()
    # Process and output to stdout.
    print(process_file(contents))
    f.close()
    exit(0)
--- a/lib/xtemplates/gnumeric.xtemplate
+++ b/lib/xtemplates/gnumeric.xtemplate
@ -52,7 +52,9 @@ Template GnumericSpreadsheet
 		Product "[Spreadsheet: $$FName]"
 	FormatEnd
 	Format DocBook
-		Product "[Spreadsheet: $$FName]"
+		Product "$$Contents(\"$$AbsPath$$Basename.xhtml\")"
 		UpdateFormat xhtml_table
 		UpdateResult "$$AbsPath$$Basename.xhtml"
 	FormatEnd
 	Format XHTML
 		Product "$$Contents(\"$$AbsPath$$Basename.html\")"