mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-25 22:06:15 +00:00
ePub: restructure the script as a class.
This allows to share a little bit more code and to avoid functions with many arguments.
This commit is contained in:
parent
9d4ffac7fb
commit
d7d31ab512
@ -21,139 +21,147 @@ import tempfile
|
||||
import zipfile
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
if len(sys.argv) != 5:
|
||||
print('Five arguments are expected, only %s found.' % len(sys.argv))
|
||||
print(sys.argv)
|
||||
sys.exit(1)
|
||||
own_path, java_path, xsltproc_path, input, output = sys.argv
|
||||
script_folder = os.path.dirname(own_path) + '/../'
|
||||
class DocBookToEpub:
|
||||
def __init__(self, args=None):
|
||||
if args is None:
|
||||
args = sys.argv
|
||||
|
||||
print('Generating ePub with the following parameters:')
|
||||
print(own_path)
|
||||
print(java_path)
|
||||
print(xsltproc_path)
|
||||
print(input)
|
||||
print(output)
|
||||
if len(args) != 5:
|
||||
print('Five arguments are expected, only %s found.' % len(sys.argv))
|
||||
print(args)
|
||||
sys.exit(1)
|
||||
|
||||
return java_path, xsltproc_path, input, output, script_folder
|
||||
self.own_path = sys.argv[0]
|
||||
self.java_path = sys.argv[1] if sys.argv[1] != '' and sys.argv[1] != 'none' else ''
|
||||
self.xsltproc_path = sys.argv[2] if sys.argv[2] != '' and sys.argv[2] != 'none' else ''
|
||||
self.input = sys.argv[3]
|
||||
self.output = sys.argv[4]
|
||||
self.script_folder = os.path.dirname(self.own_path) + '/../'
|
||||
|
||||
print('Generating ePub with the following parameters:')
|
||||
print(self.own_path)
|
||||
print(self.java_path)
|
||||
print(self.xsltproc_path)
|
||||
print(self.input)
|
||||
print(self.output)
|
||||
|
||||
def create_temporary_folder():
|
||||
output_dir = tempfile.mkdtemp().replace('\\', '/')
|
||||
print('Temporary output directory:')
|
||||
print(output_dir)
|
||||
return output_dir
|
||||
# Precompute paths that will be used later.
|
||||
self.output_dir = tempfile.mkdtemp().replace('\\', '/')
|
||||
self.package_opf = self.output_dir + '/OEBPS/package.opf' # Does not exist yet,
|
||||
print('Temporary output directory: %s' % self.output_dir)
|
||||
|
||||
self.xslt = self.script_folder + 'docbook/epub3/chunk.xsl'
|
||||
print('XSLT style sheet to use:')
|
||||
print(self.xslt)
|
||||
|
||||
def start_xslt_transformation(input, output_dir, script_folder, java_path, xsltproc_path):
|
||||
xslt = script_folder + 'docbook/epub3/chunk.xsl'
|
||||
if xsltproc_path != '' and xsltproc_path != 'none':
|
||||
command = start_xslt_transformation_xsltproc(input, output_dir, script_folder, xslt, xsltproc_path)
|
||||
elif java_path != '' and java_path != 'none':
|
||||
command = start_xslt_transformation_saxon6(input, output_dir, script_folder, xslt, java_path)
|
||||
else:
|
||||
print('docbook2epub fails: no XSLT processor available')
|
||||
shutil.rmtree(output_dir, ignore_errors=True)
|
||||
# These will be filled during the execution of the script.
|
||||
self.renamed = None
|
||||
|
||||
def gracefully_fail(self, reason):
|
||||
print('docbook2epub fails: %s' % reason)
|
||||
shutil.rmtree(self.output_dir, ignore_errors=True)
|
||||
sys.exit(1)
|
||||
|
||||
print('XSLT style sheet to use:')
|
||||
print(xslt)
|
||||
print('Command to execute:')
|
||||
print(command)
|
||||
def start_xslt_transformation(self):
|
||||
command = None
|
||||
if self.xsltproc_path != '':
|
||||
command = self.start_xslt_transformation_xsltproc()
|
||||
elif self.java_path != '':
|
||||
command = self.start_xslt_transformation_saxon6()
|
||||
|
||||
quoted_command = command
|
||||
if os.name == 'nt':
|
||||
# On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command
|
||||
# in quotes. On Linux, this might create errors when starting the command.
|
||||
quoted_command = '"' + command + '"'
|
||||
# This could be simplified by using subprocess.run, but this requires Python 3.5.
|
||||
if command is None:
|
||||
self.gracefully_fail('no XSLT processor available')
|
||||
|
||||
if os.system(quoted_command) != 0:
|
||||
print('docbook2epub fails: error from the XSLT processor')
|
||||
shutil.rmtree(output_dir, ignore_errors=True)
|
||||
sys.exit(1)
|
||||
print('Command to execute:')
|
||||
print(command)
|
||||
|
||||
print('Generated ePub contents.')
|
||||
quoted_command = command
|
||||
if os.name == 'nt':
|
||||
# On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command
|
||||
# in quotes. On Linux, this might create errors when starting the command.
|
||||
quoted_command = '"' + command + '"'
|
||||
# This could be simplified by using subprocess.run, but this requires Python 3.5.
|
||||
|
||||
if os.system(quoted_command) != 0:
|
||||
self.gracefully_fail('error from the XSLT processor')
|
||||
|
||||
def start_xslt_transformation_xsltproc(input, output_dir, _, xslt, xsltproc_path):
|
||||
return '"' + xsltproc_path + '" -stringparam base.dir "' + output_dir + '" "' + xslt + '" "' + input + '"'
|
||||
print('Generated ePub contents.')
|
||||
|
||||
def start_xslt_transformation_xsltproc(self):
|
||||
params = '-stringparam base.dir "' + self.output_dir + '"'
|
||||
return '"' + self.xsltproc_path + '" ' + params + ' "' + self.xslt + '" "' + self.input + '"'
|
||||
|
||||
def start_xslt_transformation_saxon6(input, output_dir, script_folder, xslt, java_path):
|
||||
saxon_jar = script_folder + 'scripts/saxon6.5.5.jar'
|
||||
params = 'base.dir=%s' % output_dir
|
||||
return '"' + java_path + '" -jar "' + saxon_jar + '" "' + input + '" "' + xslt + '" "' + params + '"'
|
||||
def start_xslt_transformation_saxon6(self):
|
||||
saxon_jar = self.script_folder + 'scripts/saxon6.5.5.jar'
|
||||
params = 'base.dir=%s' % self.output_dir
|
||||
executable = '"' + self.java_path + '" -jar "' + saxon_jar + '"'
|
||||
return executable + ' "' + self.input + '" "' + self.xslt + '" "' + params + '"'
|
||||
|
||||
def get_images_from_package_opf(self):
|
||||
images = []
|
||||
|
||||
def get_images_from_package_opf(package_opf):
|
||||
images = []
|
||||
# Example in the OPF file:
|
||||
# <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
|
||||
# The XHTML files are also <item> tags:
|
||||
# <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
|
||||
try:
|
||||
with open(self.package_opf, 'r') as f:
|
||||
for line in f.readlines():
|
||||
if '<item' in line and 'media-type="image' in line:
|
||||
images.append(line.split('href="')[1].split('"')[0])
|
||||
except FileNotFoundError:
|
||||
print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
|
||||
|
||||
# Example in the OPF file:
|
||||
# <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
|
||||
# The XHTML files are also <item> tags:
|
||||
# <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
|
||||
try:
|
||||
with open(package_opf, 'r') as f:
|
||||
for line in f.readlines():
|
||||
if '<item' in line and 'media-type="image' in line:
|
||||
images.append(line.split('href="')[1].split('"')[0])
|
||||
except FileNotFoundError:
|
||||
print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
|
||||
return images
|
||||
|
||||
return images
|
||||
def change_image_paths(self, file):
|
||||
# This could be optimised, as the same operation is performed a zillion times on many files:
|
||||
# https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
|
||||
with open(file, 'r', encoding='utf8') as f:
|
||||
contents = list(f)
|
||||
|
||||
with open(file, 'w', encoding='utf8') as f:
|
||||
for line in contents:
|
||||
for (old, new) in self.renamed.items():
|
||||
line = line.replace(old, new)
|
||||
f.write(line)
|
||||
|
||||
def change_image_paths(file, renamed):
|
||||
# This could be optimised, as the same operation is performed a zillion times on many files:
|
||||
# https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
|
||||
with open(file, 'r', encoding='utf8') as f:
|
||||
contents = list(f)
|
||||
def copy_images(self):
|
||||
# Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be
|
||||
# changed in the XHTML files. Typically, the current paths are absolute.
|
||||
|
||||
with open(file, 'w', encoding='utf8') as f:
|
||||
for line in contents:
|
||||
for (old, new) in renamed.items():
|
||||
line = line.replace(old, new)
|
||||
f.write(line)
|
||||
# First, get the mapping old file => file in the ePub archive.
|
||||
original_images = self.get_images_from_package_opf()
|
||||
self.renamed = {img: 'images/' + os.path.basename(img) for img in original_images}
|
||||
|
||||
# Then, transform all paths (both OPF and XHTML files).
|
||||
self.change_image_paths(self.output_dir + '/OEBPS/package.opf')
|
||||
for file in glob.glob(self.output_dir + '/OEBPS/*.xhtml'):
|
||||
self.change_image_paths(file)
|
||||
|
||||
def copy_images(output_dir):
|
||||
# Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be changed
|
||||
# in the XHTML files. Typically, the current paths are absolute.
|
||||
# Ensure that the destination path exists. OEBPS exists due to the DocBook-to-ePub transformation.
|
||||
if not os.path.exists(self.output_dir + '/OEBPS/images/'):
|
||||
os.mkdir(self.output_dir + '/OEBPS/images/')
|
||||
|
||||
# First, get the mapping old file => file in the ePub archive.
|
||||
original_images = get_images_from_package_opf(output_dir + '/OEBPS/package.opf')
|
||||
renamed = {img: 'images/' + os.path.basename(img) for img in original_images}
|
||||
# Finally, actually copy the image files.
|
||||
for (old, new) in self.renamed.items():
|
||||
shutil.copyfile(old, self.output_dir + '/OEBPS/' + new)
|
||||
|
||||
# Then, transform all paths (both OPF and XHTML files).
|
||||
change_image_paths(output_dir + '/OEBPS/package.opf', renamed)
|
||||
for file in glob.glob(output_dir + '/OEBPS/*.xhtml'):
|
||||
change_image_paths(file, renamed)
|
||||
def create_zip_archive(self):
|
||||
with zipfile.ZipFile(self.output, 'w', zipfile.ZIP_DEFLATED) as zip:
|
||||
# Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
|
||||
# for file in glob.glob(output_dir + '/**/*', recursive=True):
|
||||
for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(self.output_dir) for f in filenames]:
|
||||
zip.write(file, os.path.relpath(file, self.output_dir), compress_type=zipfile.ZIP_STORED)
|
||||
|
||||
# Ensure that the destination path exists.
|
||||
if not os.path.exists(output_dir + '/OEBPS/images/'):
|
||||
os.mkdir(output_dir + '/OEBPS/images/')
|
||||
shutil.rmtree(self.output_dir)
|
||||
print('Generated ePub.')
|
||||
|
||||
# Finally, actually copy the image files.
|
||||
for (old, new) in renamed.items():
|
||||
shutil.copyfile(old, output_dir + '/OEBPS/' + new)
|
||||
|
||||
|
||||
def create_zip_archive(output, output_dir):
|
||||
with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zip:
|
||||
# Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
|
||||
# for file in glob.glob(output_dir + '/**/*', recursive=True):
|
||||
for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(output_dir) for f in filenames]:
|
||||
zip.write(file, os.path.relpath(file, output_dir), compress_type=zipfile.ZIP_STORED)
|
||||
|
||||
shutil.rmtree(output_dir)
|
||||
print('Generated ePub.')
|
||||
def transform(self):
|
||||
self.start_xslt_transformation()
|
||||
self.copy_images()
|
||||
self.create_zip_archive()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
java_path, xsltproc_path, input, output, script_folder = parse_arguments()
|
||||
output_dir = create_temporary_folder()
|
||||
start_xslt_transformation(input, output_dir, script_folder, java_path, xsltproc_path)
|
||||
copy_images(output_dir)
|
||||
create_zip_archive(output, output_dir)
|
||||
DocBookToEpub(sys.argv).transform()
|
||||
|
Loading…
Reference in New Issue
Block a user