lyx_mirror/lib/scripts/lyxpak.py
Enrico Forestieri 556efce34b Account for extensions when creating a LyX archive
LyX archives created after importing a LaTeX document may not
include bibliography style and catalog files. This is because
tex2lyx does not remove extensions, but the lyxpak script
expects files without extensions. So, teach lyxpak to also take
possible extensions into account.

Fixes #13129
2024-12-05 10:05:53 +01:00

405 lines
14 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# file lyxpak.py
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.
# author Enrico Forestieri
# author Richard Kimberly Heck
# Full author contact details are available in file CREDITS
# This script creates a tar or zip archive with a lyx file and all included
# files (graphics and so on). By default, the created archive is the standard
# type on a given platform, such that a zip archive is created on Windows and
# a gzip compressed tar archive on *nix. This can be controlled by command
# line options, however.
import gzip, os, re, sys
from io import BytesIO
import subprocess
# The path to the current python executable. sys.executable may fail, so in
# this case we revert to simply calling "python" from the path.
PYTHON_BIN = sys.executable if sys.executable else "python"
running_on_windows = (os.name == 'nt')
if running_on_windows:
from shutil import copyfile
from tempfile import NamedTemporaryFile
from lyxwin_getopt import getopt
else:
from getopt import getopt
# Pre-compiled regular expressions.
re_lyxfile = re.compile(br"\.lyx$")
re_input = re.compile(b'^(.*)\\\\(input|include){(\\s*)(.+)(\\s*)}.*$')
re_ertinput = re.compile(b'^(input|include)({)(\\s*)(.+)(\\s*)}.*$')
re_package = re.compile(b'^(.*)\\\\(usepackage){(\\s*)(.+)(\\s*)}.*$')
re_class = re.compile(b'^(\\\\)(textclass)(\\s+)(.+)\\s*$')
re_norecur = re.compile(b'^(.*)\\\\(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*){(\\s*)(.+)(\\s*)}.*$')
re_ertnorecur = re.compile(b'^(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*)({)(\\s*)(.+)(\\s*)}.*$')
re_filename = re.compile(b'^(\\s*)(filename)(\\s+)(.+)\\s*$')
re_options = re.compile(b'^(\\s*)options(\\s+)(.+)\\s*$')
re_bibfiles = re.compile(b'^(\\s*)bibfiles(\\s+)(.+)\\s*$')
def usage(prog_name):
msg = '''
Usage: %s [-t] [-z] [-l path] [-o output_dir] file.lyx
Options:
-l: Path to lyx2lyx script
-o: Directory for output
-t: Create gzipped tar file
-z: Create zip file
By default, we create file.zip on Windows and file.tar.gz on *nix,
with the file output to where file.lyx is, and we look for lyx2lyx
in the known locations, querying LyX itself if necessary.
'''
return msg % prog_name
def error(message):
sys.stderr.write(message + '\n')
sys.exit(1)
def tostr(message):
return message.decode(sys.getfilesystemencoding())
def gzopen(file):
input = open(file.decode('utf-8'), 'rb')
magicnum = input.read(2)
input.close()
if magicnum == b"\x1f\x8b":
return gzip.open(file.decode('utf-8'))
return open(file.decode('utf-8'), 'rb')
def find_exe(candidates, extlist, path):
for prog in candidates:
for directory in path:
for ext in extlist:
full_path = os.path.join(directory, prog + ext)
if os.access(full_path, os.X_OK):
return prog, full_path
return None, None
def abspath(name):
" Resolve symlinks and returns the absolute normalized name."
newname = os.path.normpath(os.path.abspath(name))
if not running_on_windows:
newname = os.path.realpath(newname)
return newname
def gather_files(curfile, incfiles, lyx2lyx):
" Recursively gather files."
curdir = os.path.dirname(abspath(curfile))
is_lyxfile = re_lyxfile.search(curfile)
if is_lyxfile:
if running_on_windows:
# subprocess cannot cope with unicode arguments and we cannot be
# sure that curfile can be correctly converted to the current
# code page. So, we resort to running lyx2lyx on a copy.
tmp = NamedTemporaryFile(delete=False)
tmp.close()
copyfile(curfile.decode('utf-8'), tmp.name)
try:
l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, tmp.name])
except subprocess.CalledProcessError:
error(f'{lyx2lyx} failed to convert "{tostr(curfile)}"')
os.unlink(tmp.name)
else:
try:
l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, curfile])
except subprocess.CalledProcessError:
error(f'{lyx2lyx} failed to convert "{tostr(curfile)}"')
if l2l_stdout.startswith(b"\x1f\x8b"):
l2l_stdout = gzip.GzipFile("", "rb", 0, BytesIO(l2l_stdout)).read()
elif running_on_windows:
# For some unknown reason, there can be a spurious '\r' in the line
# separators, causing spurious empty lines when calling splitlines.
l2l_stdout = l2l_stdout.replace(b'\r\r\n', b'\r\n')
lines = l2l_stdout.splitlines()
else:
input = gzopen(curfile)
lines = input.readlines()
input.close()
maybe_in_ert = False
i = 0
while i < len(lines):
# Gather used files.
recursive = True
extlist = [b'']
match = re_filename.match(lines[i])
if not match:
if maybe_in_ert:
match = re_ertinput.match(lines[i])
else:
match = re_input.match(lines[i])
if not match:
match = re_package.match(lines[i])
extlist = [b'.sty']
if not match:
match = re_class.match(lines[i])
extlist = [b'.cls']
if not match:
if maybe_in_ert:
match = re_ertnorecur.match(lines[i])
else:
match = re_norecur.match(lines[i])
extlist = [b'', b'.eps', b'.pdf', b'.png', b'.jpg']
recursive = False
maybe_in_ert = is_lyxfile and lines[i] == b"\\backslash"
if match:
file = match.group(4).strip(b'"')
if not os.path.isabs(file):
file = os.path.join(curdir, file)
file_exists = False
if not os.path.isdir(file):
for ext in extlist:
if os.path.exists(file + ext):
file = file + ext
file_exists = True
break
if file_exists and not abspath(file) in incfiles:
incfiles.append(abspath(file))
if recursive:
gather_files(file, incfiles, lyx2lyx)
i += 1
continue
if not is_lyxfile:
i += 1
continue
# Gather bibtex *.bst files.
match = re_options.match(lines[i])
if match:
file = match.group(3).strip(b'"')
if file.startswith(b"bibtotoc,"):
file = file[9:]
ext = os.path.splitext(file)[-1]
if ext != b'.bst':
file = file + b'.bst'
if not os.path.isabs(file):
file = os.path.join(curdir, file)
if os.path.exists(file):
incfiles.append(abspath(file))
i += 1
continue
# Gather bibtex *.bib files.
match = re_bibfiles.match(lines[i])
if match:
bibfiles = match.group(3).strip(b'"').split(b',')
j = 0
while j < len(bibfiles):
file = bibfiles[j]
ext = os.path.splitext(file)[-1]
if ext != b'.bib':
file = file + b'.bib'
if not os.path.isabs(file):
file = os.path.join(curdir, file)
if os.path.exists(file):
incfiles.append(abspath(file))
j += 1
i += 1
continue
i += 1
return 0
def find_lyx2lyx(progloc, path):
" Find a usable version of the lyx2lyx script. "
# first we will see if the script is roughly where we are
# i.e., we will assume we are in $SOMEDIR/scripts and look
# for $SOMEDIR/lyx2lyx/lyx2lyx.
ourpath = os.path.dirname(abspath(progloc))
(upone, discard) = os.path.split(ourpath)
if running_on_windows:
tryit = os.path.join(upone, b"lyx2lyx", b"lyx2lyx")
else:
tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
if os.access(tryit, os.X_OK):
return tryit
# now we will try to query LyX itself to find the path.
extlist = ['']
if "PATHEXT" in os.environ:
extlist = extlist + os.environ["PATHEXT"].split(os.pathsep)
lyx_exe, full_path = find_exe(["lyxc", "lyx"], extlist, path)
if lyx_exe is None:
error('Cannot find the LyX executable in the path.')
try:
cmd_stdout = subprocess.check_output([lyx_exe, '-version'], stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
error('Cannot query LyX about the lyx2lyx script.')
re_msvc = re.compile(r'^(\s*)(Host type:)(\s+)(win32)$')
re_sysdir = re.compile(r'^(\s*)(LyX files dir:)(\s+)(\S+)$')
lines = cmd_stdout.splitlines()
for line in lines:
match = re_msvc.match(line)
if match:
# The LyX executable was built with MSVC, so the
# "LyX files dir:" line is unusable
basedir = os.path.dirname(os.path.dirname(full_path))
tryit = os.path.join(basedir, 'Resources', 'lyx2lyx', 'lyx2lyx')
break
match = re_sysdir.match(line)
if match:
tryit = os.path.join(match.group(4), 'lyx2lyx', 'lyx2lyx')
break
if not os.access(tryit, os.X_OK):
error('Unable to find the lyx2lyx script.')
return tryit
def main(args):
ourprog = args[0]
try:
if running_on_windows:
(options, argv) = getopt(args[1:], b"htzl:o:")
else:
(options, argv) = getopt(args[1:], "htzl:o:")
except:
error(usage(ourprog))
# we expect the filename to be left
if len(argv) != 1:
error(usage(ourprog))
makezip = running_on_windows
outdir = ""
lyx2lyx = None
for (opt, param) in options:
if opt == "-h":
print(usage(ourprog))
sys.exit(0)
elif opt == "-t":
makezip = False
elif opt == "-z":
makezip = True
elif opt == "-l":
lyx2lyx = param
elif opt == "-o":
outdir = param
if not os.path.isdir(outdir):
error('Error: "%s" is not a directory.' % outdir)
lyxfile = argv[0]
if not os.path.exists(lyxfile):
error('File "%s" not found.' % tostr(lyxfile))
# Check that it actually is a LyX document
input = gzopen(lyxfile)
line = input.readline()
input.close()
if not (line and line.startswith(b'#LyX')):
error('File "%s" is not a LyX document.' % tostr(lyxfile))
if makezip:
import zipfile
else:
import tarfile
ar_ext = b".tar.gz"
if makezip:
ar_ext = b".zip"
ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile)).decode('utf-8')
if outdir:
ar_name = os.path.join(abspath(outdir), os.path.basename(ar_name))
path = os.environ["PATH"].split(os.pathsep)
if lyx2lyx is None:
lyx2lyx = find_lyx2lyx(ourprog, path)
# Initialize the list with the specified LyX file and recursively
# gather all required files (also from child documents).
incfiles = [abspath(lyxfile)]
gather_files(lyxfile, incfiles, lyx2lyx)
# Find the topmost dir common to all files
path_sep = os.path.sep.encode('utf-8')
if len(incfiles) > 1:
topdir = os.path.commonprefix(incfiles)
# As os.path.commonprefix() works on a character by character basis,
# rather than on path elements, we need to remove any trailing bytes.
topdir = topdir.rpartition(path_sep)[0] + path_sep
else:
topdir = os.path.dirname(incfiles[0]) + path_sep
# Remove the prefix common to all paths in the list
i = 0
while i < len(incfiles):
incfiles[i] = incfiles[i].replace(topdir, b'', 1)
i += 1
# Remove duplicates and sort the list
incfiles = list(set(incfiles))
incfiles.sort()
if topdir != '':
os.chdir(topdir)
# Create the archive
try:
if makezip:
zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
for file in incfiles:
zip.write(file.decode('utf-8'))
zip.close()
else:
tar = tarfile.open(ar_name, "w:gz")
for file in incfiles:
tar.add(file.decode('utf-8'))
tar.close()
except:
error('Failed to create LyX archive "%s"' % ar_name)
print('LyX archive "%s" created successfully.' % ar_name)
return 0
if __name__ == "__main__":
if running_on_windows:
# This works around <http://bugs.python.org/issue2128> for Python 2.
# All arguments are retrieved in unicode format and converted to utf-8.
# In this way, when launched from the command line, lyxpak.py can deal
# with any non-ascii names. Unfortunately, this is not the case when
# launched by LyX, because LyX converts the arguments of the converters
# to the filesystem encoding. On Windows this corresponds to the current
# code page and not to the UTF-16 encoding used by NTFS, such that they
# are transliterated if not exactly encodable. As an example, α may
# become a, β may become ß, and so on. However, this is a problem only
# if the full path of the LyX document contains an unencodable character
# as all other paths are extracted from the document in utf-8 format.
from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
from ctypes.wintypes import LPWSTR, LPCWSTR
GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
argc = c_int(0)
argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
# unicode_argv[0] is the Python interpreter, so skip that.
argv = [argv_unicode[i].encode('utf-8') for i in range(1, argc.value)]
# Also skip option arguments to the Python interpreter.
while len(argv) > 0:
if not argv[0].startswith(b"-"):
break
argv = argv[1:]
sys.argv = argv
main(sys.argv)