ePub: restructure the script as a class.

This allows to share a little bit more code and to avoid functions with many arguments.
2025-01-26 18:07:18 +00:00 · 2021-02-07 20:46:29 +01:00 · 2021-02-07 20:46:29 +01:00 · d7d31ab512
commit d7d31ab512
parent 9d4ffac7fb
1 changed files with 113 additions and 105 deletions
--- a/lib/scripts/docbook2epub.py
+++ b/lib/scripts/docbook2epub.py
@ -21,139 +21,147 @@ import tempfile
 import zipfile


-def parse_arguments():
-    if len(sys.argv) != 5:
-        print('Five arguments are expected, only %s found.' % len(sys.argv))
-        print(sys.argv)
-        sys.exit(1)
-    own_path, java_path, xsltproc_path, input, output = sys.argv
-    script_folder = os.path.dirname(own_path) + '/../'
+class DocBookToEpub:
+    def __init__(self, args=None):
+        if args is None:
+            args = sys.argv

-    print('Generating ePub with the following parameters:')
-    print(own_path)
-    print(java_path)
-    print(xsltproc_path)
-    print(input)
-    print(output)
+        if len(args) != 5:
+            print('Five arguments are expected, only %s found.' % len(sys.argv))
+            print(args)
+            sys.exit(1)

-    return java_path, xsltproc_path, input, output, script_folder
+        self.own_path = sys.argv[0]
+        self.java_path = sys.argv[1] if sys.argv[1] != '' and sys.argv[1] != 'none' else ''
+        self.xsltproc_path = sys.argv[2] if sys.argv[2] != '' and sys.argv[2] != 'none' else ''
+        self.input = sys.argv[3]
+        self.output = sys.argv[4]
+        self.script_folder = os.path.dirname(self.own_path) + '/../'

+        print('Generating ePub with the following parameters:')
+        print(self.own_path)
+        print(self.java_path)
+        print(self.xsltproc_path)
+        print(self.input)
+        print(self.output)

-def create_temporary_folder():
-    output_dir = tempfile.mkdtemp().replace('\\', '/')
-    print('Temporary output directory:')
-    print(output_dir)
-    return output_dir
+        # Precompute paths that will be used later.
+        self.output_dir = tempfile.mkdtemp().replace('\\', '/')
+        self.package_opf = self.output_dir + '/OEBPS/package.opf'  # Does not exist yet,
+        print('Temporary output directory: %s' % self.output_dir)

+        self.xslt = self.script_folder + 'docbook/epub3/chunk.xsl'
+        print('XSLT style sheet to use:')
+        print(self.xslt)

-def start_xslt_transformation(input, output_dir, script_folder, java_path, xsltproc_path):
-    xslt = script_folder + 'docbook/epub3/chunk.xsl'
-    if xsltproc_path != '' and xsltproc_path != 'none':
-        command = start_xslt_transformation_xsltproc(input, output_dir, script_folder, xslt, xsltproc_path)
-    elif java_path != '' and java_path != 'none':
-        command = start_xslt_transformation_saxon6(input, output_dir, script_folder, xslt, java_path)
-    else:
-        print('docbook2epub fails: no XSLT processor available')
-        shutil.rmtree(output_dir, ignore_errors=True)
+        # These will be filled during the execution of the script.
+        self.renamed = None
+
+    def gracefully_fail(self, reason):
+        print('docbook2epub fails: %s' % reason)
+        shutil.rmtree(self.output_dir, ignore_errors=True)
        sys.exit(1)

-    print('XSLT style sheet to use:')
-    print(xslt)
-    print('Command to execute:')
-    print(command)
+    def start_xslt_transformation(self):
+        command = None
+        if self.xsltproc_path != '':
+            command = self.start_xslt_transformation_xsltproc()
+        elif self.java_path != '':
+            command = self.start_xslt_transformation_saxon6()

-    quoted_command = command
-    if os.name == 'nt':
-        # On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command
-        # in quotes. On Linux, this might create errors when starting the command.
-        quoted_command = '"' + command + '"'
-    # This could be simplified by using subprocess.run, but this requires Python 3.5.
+        if command is None:
+            self.gracefully_fail('no XSLT processor available')

-    if os.system(quoted_command) != 0:
-        print('docbook2epub fails: error from the XSLT processor')
-        shutil.rmtree(output_dir, ignore_errors=True)
-        sys.exit(1)
+        print('Command to execute:')
+        print(command)

-    print('Generated ePub contents.')
+        quoted_command = command
+        if os.name == 'nt':
+            # On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command
+            # in quotes. On Linux, this might create errors when starting the command.
+            quoted_command = '"' + command + '"'
+        # This could be simplified by using subprocess.run, but this requires Python 3.5.

+        if os.system(quoted_command) != 0:
+            self.gracefully_fail('error from the XSLT processor')

-def start_xslt_transformation_xsltproc(input, output_dir, _, xslt, xsltproc_path):
-    return '"' + xsltproc_path + '" -stringparam base.dir "' + output_dir + '" "' + xslt + '" "' + input + '"'
+        print('Generated ePub contents.')

+    def start_xslt_transformation_xsltproc(self):
+        params = '-stringparam base.dir "' + self.output_dir + '"'
+        return '"' + self.xsltproc_path + '" ' + params + ' "' + self.xslt + '" "' + self.input + '"'

-def start_xslt_transformation_saxon6(input, output_dir, script_folder, xslt, java_path):
-    saxon_jar = script_folder + 'scripts/saxon6.5.5.jar'
-    params = 'base.dir=%s' % output_dir
-    return '"' + java_path + '" -jar "' + saxon_jar + '" "' + input + '" "' + xslt + '" "' + params + '"'
+    def start_xslt_transformation_saxon6(self):
+        saxon_jar = self.script_folder + 'scripts/saxon6.5.5.jar'
+        params = 'base.dir=%s' % self.output_dir
+        executable = '"' + self.java_path + '" -jar "' + saxon_jar + '"'
+        return executable + ' "' + self.input + '" "' + self.xslt + '" "' + params + '"'

+    def get_images_from_package_opf(self):
+        images = []

-def get_images_from_package_opf(package_opf):
-    images = []
+        # Example in the OPF file:
+        #     <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
+        # The XHTML files are also <item> tags:
+        #     <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
+        try:
+            with open(self.package_opf, 'r') as f:
+                for line in f.readlines():
+                    if '<item' in line and 'media-type="image' in line:
+                        images.append(line.split('href="')[1].split('"')[0])
+        except FileNotFoundError:
+            print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')

-    # Example in the OPF file:
-    #     <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
-    # The XHTML files are also <item> tags:
-    #     <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
-    try:
-        with open(package_opf, 'r') as f:
-            for line in f.readlines():
-                if '<item' in line and 'media-type="image' in line:
-                    images.append(line.split('href="')[1].split('"')[0])
-    except FileNotFoundError:
-        print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
+        return images

-    return images
+    def change_image_paths(self, file):
+        # This could be optimised, as the same operation is performed a zillion times on many files:
+        # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
+        with open(file, 'r', encoding='utf8') as f:
+            contents = list(f)

+        with open(file, 'w', encoding='utf8') as f:
+            for line in contents:
+                for (old, new) in self.renamed.items():
+                    line = line.replace(old, new)
+                f.write(line)

-def change_image_paths(file, renamed):
-    # This could be optimised, as the same operation is performed a zillion times on many files:
-    # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
-    with open(file, 'r', encoding='utf8') as f:
-        contents = list(f)
+    def copy_images(self):
+        # Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be
+        # changed in the XHTML files. Typically, the current paths are absolute.

-    with open(file, 'w', encoding='utf8') as f:
-        for line in contents:
-            for (old, new) in renamed.items():
-                line = line.replace(old, new)
-            f.write(line)
+        # First, get the mapping old file => file in the ePub archive.
+        original_images = self.get_images_from_package_opf()
+        self.renamed = {img: 'images/' + os.path.basename(img) for img in original_images}

+        # Then, transform all paths (both OPF and XHTML files).
+        self.change_image_paths(self.output_dir + '/OEBPS/package.opf')
+        for file in glob.glob(self.output_dir + '/OEBPS/*.xhtml'):
+            self.change_image_paths(file)

-def copy_images(output_dir):
-    # Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be changed
-    # in the XHTML files. Typically, the current paths are absolute.
+        # Ensure that the destination path exists. OEBPS exists due to the DocBook-to-ePub transformation.
+        if not os.path.exists(self.output_dir + '/OEBPS/images/'):
+            os.mkdir(self.output_dir + '/OEBPS/images/')

-    # First, get the mapping old file => file in the ePub archive.
-    original_images = get_images_from_package_opf(output_dir + '/OEBPS/package.opf')
-    renamed = {img: 'images/' + os.path.basename(img) for img in original_images}
+        # Finally, actually copy the image files.
+        for (old, new) in self.renamed.items():
+            shutil.copyfile(old, self.output_dir + '/OEBPS/' + new)

-    # Then, transform all paths (both OPF and XHTML files).
-    change_image_paths(output_dir + '/OEBPS/package.opf', renamed)
-    for file in glob.glob(output_dir + '/OEBPS/*.xhtml'):
-        change_image_paths(file, renamed)
+    def create_zip_archive(self):
+        with zipfile.ZipFile(self.output, 'w', zipfile.ZIP_DEFLATED) as zip:
+            # Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
+            # for file in glob.glob(output_dir + '/**/*', recursive=True):
+            for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(self.output_dir) for f in filenames]:
+                zip.write(file, os.path.relpath(file, self.output_dir), compress_type=zipfile.ZIP_STORED)

-    # Ensure that the destination path exists.
-    if not os.path.exists(output_dir + '/OEBPS/images/'):
-        os.mkdir(output_dir + '/OEBPS/images/')
+        shutil.rmtree(self.output_dir)
+        print('Generated ePub.')

-    # Finally, actually copy the image files.
-    for (old, new) in renamed.items():
-        shutil.copyfile(old, output_dir + '/OEBPS/' + new)
-
-
-def create_zip_archive(output, output_dir):
-    with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zip:
-        # Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
-        # for file in glob.glob(output_dir + '/**/*', recursive=True):
-        for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(output_dir) for f in filenames]:
-            zip.write(file, os.path.relpath(file, output_dir), compress_type=zipfile.ZIP_STORED)
-
-    shutil.rmtree(output_dir)
-    print('Generated ePub.')
+    def transform(self):
+        self.start_xslt_transformation()
+        self.copy_images()
+        self.create_zip_archive()


 if __name__ == '__main__':
-    java_path, xsltproc_path, input, output, script_folder = parse_arguments()
-    output_dir = create_temporary_folder()
-    start_xslt_transformation(input, output_dir, script_folder, java_path, xsltproc_path)
-    copy_images(output_dir)
-    create_zip_archive(output, output_dir)
+    DocBookToEpub(sys.argv).transform()