mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-10 10:56:00 +00:00
556efce34b
LyX archives created after importing a LaTeX document may not include bibliography style and catalog files. This is because tex2lyx does not remove extensions, but the lyxpak script expects files without extensions. So, teach lyxpak to also take possible extensions into account. Fixes #13129
405 lines
14 KiB
Python
Executable File
405 lines
14 KiB
Python
Executable File
# file lyxpak.py
|
||
# This file is part of LyX, the document processor.
|
||
# Licence details can be found in the file COPYING.
|
||
|
||
# author Enrico Forestieri
|
||
# author Richard Kimberly Heck
|
||
|
||
# Full author contact details are available in file CREDITS
|
||
|
||
# This script creates a tar or zip archive with a lyx file and all included
|
||
# files (graphics and so on). By default, the created archive is the standard
|
||
# type on a given platform, such that a zip archive is created on Windows and
|
||
# a gzip compressed tar archive on *nix. This can be controlled by command
|
||
# line options, however.
|
||
|
||
import gzip, os, re, sys
|
||
from io import BytesIO
|
||
import subprocess
|
||
|
||
# The path to the current python executable. sys.executable may fail, so in
|
||
# this case we revert to simply calling "python" from the path.
|
||
PYTHON_BIN = sys.executable if sys.executable else "python"
|
||
|
||
running_on_windows = (os.name == 'nt')
|
||
|
||
if running_on_windows:
|
||
from shutil import copyfile
|
||
from tempfile import NamedTemporaryFile
|
||
from lyxwin_getopt import getopt
|
||
else:
|
||
from getopt import getopt
|
||
|
||
# Pre-compiled regular expressions.
|
||
re_lyxfile = re.compile(br"\.lyx$")
|
||
re_input = re.compile(b'^(.*)\\\\(input|include){(\\s*)(.+)(\\s*)}.*$')
|
||
re_ertinput = re.compile(b'^(input|include)({)(\\s*)(.+)(\\s*)}.*$')
|
||
re_package = re.compile(b'^(.*)\\\\(usepackage){(\\s*)(.+)(\\s*)}.*$')
|
||
re_class = re.compile(b'^(\\\\)(textclass)(\\s+)(.+)\\s*$')
|
||
re_norecur = re.compile(b'^(.*)\\\\(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*){(\\s*)(.+)(\\s*)}.*$')
|
||
re_ertnorecur = re.compile(b'^(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*)({)(\\s*)(.+)(\\s*)}.*$')
|
||
re_filename = re.compile(b'^(\\s*)(filename)(\\s+)(.+)\\s*$')
|
||
re_options = re.compile(b'^(\\s*)options(\\s+)(.+)\\s*$')
|
||
re_bibfiles = re.compile(b'^(\\s*)bibfiles(\\s+)(.+)\\s*$')
|
||
|
||
|
||
def usage(prog_name):
|
||
msg = '''
|
||
Usage: %s [-t] [-z] [-l path] [-o output_dir] file.lyx
|
||
Options:
|
||
-l: Path to lyx2lyx script
|
||
-o: Directory for output
|
||
-t: Create gzipped tar file
|
||
-z: Create zip file
|
||
By default, we create file.zip on Windows and file.tar.gz on *nix,
|
||
with the file output to where file.lyx is, and we look for lyx2lyx
|
||
in the known locations, querying LyX itself if necessary.
|
||
'''
|
||
return msg % prog_name
|
||
|
||
|
||
def error(message):
|
||
sys.stderr.write(message + '\n')
|
||
sys.exit(1)
|
||
|
||
|
||
def tostr(message):
|
||
return message.decode(sys.getfilesystemencoding())
|
||
|
||
|
||
def gzopen(file):
|
||
input = open(file.decode('utf-8'), 'rb')
|
||
magicnum = input.read(2)
|
||
input.close()
|
||
if magicnum == b"\x1f\x8b":
|
||
return gzip.open(file.decode('utf-8'))
|
||
return open(file.decode('utf-8'), 'rb')
|
||
|
||
|
||
def find_exe(candidates, extlist, path):
|
||
for prog in candidates:
|
||
for directory in path:
|
||
for ext in extlist:
|
||
full_path = os.path.join(directory, prog + ext)
|
||
if os.access(full_path, os.X_OK):
|
||
return prog, full_path
|
||
return None, None
|
||
|
||
|
||
def abspath(name):
|
||
" Resolve symlinks and returns the absolute normalized name."
|
||
newname = os.path.normpath(os.path.abspath(name))
|
||
if not running_on_windows:
|
||
newname = os.path.realpath(newname)
|
||
return newname
|
||
|
||
|
||
def gather_files(curfile, incfiles, lyx2lyx):
|
||
" Recursively gather files."
|
||
curdir = os.path.dirname(abspath(curfile))
|
||
is_lyxfile = re_lyxfile.search(curfile)
|
||
|
||
if is_lyxfile:
|
||
if running_on_windows:
|
||
# subprocess cannot cope with unicode arguments and we cannot be
|
||
# sure that curfile can be correctly converted to the current
|
||
# code page. So, we resort to running lyx2lyx on a copy.
|
||
tmp = NamedTemporaryFile(delete=False)
|
||
tmp.close()
|
||
copyfile(curfile.decode('utf-8'), tmp.name)
|
||
try:
|
||
l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, tmp.name])
|
||
except subprocess.CalledProcessError:
|
||
error(f'{lyx2lyx} failed to convert "{tostr(curfile)}"')
|
||
os.unlink(tmp.name)
|
||
else:
|
||
try:
|
||
l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, curfile])
|
||
except subprocess.CalledProcessError:
|
||
error(f'{lyx2lyx} failed to convert "{tostr(curfile)}"')
|
||
if l2l_stdout.startswith(b"\x1f\x8b"):
|
||
l2l_stdout = gzip.GzipFile("", "rb", 0, BytesIO(l2l_stdout)).read()
|
||
elif running_on_windows:
|
||
# For some unknown reason, there can be a spurious '\r' in the line
|
||
# separators, causing spurious empty lines when calling splitlines.
|
||
l2l_stdout = l2l_stdout.replace(b'\r\r\n', b'\r\n')
|
||
lines = l2l_stdout.splitlines()
|
||
else:
|
||
input = gzopen(curfile)
|
||
lines = input.readlines()
|
||
input.close()
|
||
|
||
maybe_in_ert = False
|
||
i = 0
|
||
while i < len(lines):
|
||
# Gather used files.
|
||
recursive = True
|
||
extlist = [b'']
|
||
match = re_filename.match(lines[i])
|
||
if not match:
|
||
if maybe_in_ert:
|
||
match = re_ertinput.match(lines[i])
|
||
else:
|
||
match = re_input.match(lines[i])
|
||
if not match:
|
||
match = re_package.match(lines[i])
|
||
extlist = [b'.sty']
|
||
if not match:
|
||
match = re_class.match(lines[i])
|
||
extlist = [b'.cls']
|
||
if not match:
|
||
if maybe_in_ert:
|
||
match = re_ertnorecur.match(lines[i])
|
||
else:
|
||
match = re_norecur.match(lines[i])
|
||
extlist = [b'', b'.eps', b'.pdf', b'.png', b'.jpg']
|
||
recursive = False
|
||
maybe_in_ert = is_lyxfile and lines[i] == b"\\backslash"
|
||
if match:
|
||
file = match.group(4).strip(b'"')
|
||
if not os.path.isabs(file):
|
||
file = os.path.join(curdir, file)
|
||
file_exists = False
|
||
if not os.path.isdir(file):
|
||
for ext in extlist:
|
||
if os.path.exists(file + ext):
|
||
file = file + ext
|
||
file_exists = True
|
||
break
|
||
if file_exists and not abspath(file) in incfiles:
|
||
incfiles.append(abspath(file))
|
||
if recursive:
|
||
gather_files(file, incfiles, lyx2lyx)
|
||
i += 1
|
||
continue
|
||
|
||
if not is_lyxfile:
|
||
i += 1
|
||
continue
|
||
|
||
# Gather bibtex *.bst files.
|
||
match = re_options.match(lines[i])
|
||
if match:
|
||
file = match.group(3).strip(b'"')
|
||
if file.startswith(b"bibtotoc,"):
|
||
file = file[9:]
|
||
ext = os.path.splitext(file)[-1]
|
||
if ext != b'.bst':
|
||
file = file + b'.bst'
|
||
if not os.path.isabs(file):
|
||
file = os.path.join(curdir, file)
|
||
if os.path.exists(file):
|
||
incfiles.append(abspath(file))
|
||
i += 1
|
||
continue
|
||
|
||
# Gather bibtex *.bib files.
|
||
match = re_bibfiles.match(lines[i])
|
||
if match:
|
||
bibfiles = match.group(3).strip(b'"').split(b',')
|
||
j = 0
|
||
while j < len(bibfiles):
|
||
file = bibfiles[j]
|
||
ext = os.path.splitext(file)[-1]
|
||
if ext != b'.bib':
|
||
file = file + b'.bib'
|
||
if not os.path.isabs(file):
|
||
file = os.path.join(curdir, file)
|
||
if os.path.exists(file):
|
||
incfiles.append(abspath(file))
|
||
j += 1
|
||
i += 1
|
||
continue
|
||
|
||
i += 1
|
||
|
||
return 0
|
||
|
||
|
||
def find_lyx2lyx(progloc, path):
|
||
" Find a usable version of the lyx2lyx script. "
|
||
# first we will see if the script is roughly where we are
|
||
# i.e., we will assume we are in $SOMEDIR/scripts and look
|
||
# for $SOMEDIR/lyx2lyx/lyx2lyx.
|
||
ourpath = os.path.dirname(abspath(progloc))
|
||
(upone, discard) = os.path.split(ourpath)
|
||
if running_on_windows:
|
||
tryit = os.path.join(upone, b"lyx2lyx", b"lyx2lyx")
|
||
else:
|
||
tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
|
||
if os.access(tryit, os.X_OK):
|
||
return tryit
|
||
|
||
# now we will try to query LyX itself to find the path.
|
||
extlist = ['']
|
||
if "PATHEXT" in os.environ:
|
||
extlist = extlist + os.environ["PATHEXT"].split(os.pathsep)
|
||
lyx_exe, full_path = find_exe(["lyxc", "lyx"], extlist, path)
|
||
if lyx_exe is None:
|
||
error('Cannot find the LyX executable in the path.')
|
||
try:
|
||
cmd_stdout = subprocess.check_output([lyx_exe, '-version'], stderr=subprocess.STDOUT)
|
||
except subprocess.CalledProcessError:
|
||
error('Cannot query LyX about the lyx2lyx script.')
|
||
re_msvc = re.compile(r'^(\s*)(Host type:)(\s+)(win32)$')
|
||
re_sysdir = re.compile(r'^(\s*)(LyX files dir:)(\s+)(\S+)$')
|
||
lines = cmd_stdout.splitlines()
|
||
for line in lines:
|
||
match = re_msvc.match(line)
|
||
if match:
|
||
# The LyX executable was built with MSVC, so the
|
||
# "LyX files dir:" line is unusable
|
||
basedir = os.path.dirname(os.path.dirname(full_path))
|
||
tryit = os.path.join(basedir, 'Resources', 'lyx2lyx', 'lyx2lyx')
|
||
break
|
||
match = re_sysdir.match(line)
|
||
if match:
|
||
tryit = os.path.join(match.group(4), 'lyx2lyx', 'lyx2lyx')
|
||
break
|
||
|
||
if not os.access(tryit, os.X_OK):
|
||
error('Unable to find the lyx2lyx script.')
|
||
return tryit
|
||
|
||
|
||
def main(args):
|
||
|
||
ourprog = args[0]
|
||
|
||
try:
|
||
if running_on_windows:
|
||
(options, argv) = getopt(args[1:], b"htzl:o:")
|
||
else:
|
||
(options, argv) = getopt(args[1:], "htzl:o:")
|
||
except:
|
||
error(usage(ourprog))
|
||
|
||
# we expect the filename to be left
|
||
if len(argv) != 1:
|
||
error(usage(ourprog))
|
||
|
||
makezip = running_on_windows
|
||
outdir = ""
|
||
lyx2lyx = None
|
||
|
||
for (opt, param) in options:
|
||
if opt == "-h":
|
||
print(usage(ourprog))
|
||
sys.exit(0)
|
||
elif opt == "-t":
|
||
makezip = False
|
||
elif opt == "-z":
|
||
makezip = True
|
||
elif opt == "-l":
|
||
lyx2lyx = param
|
||
elif opt == "-o":
|
||
outdir = param
|
||
if not os.path.isdir(outdir):
|
||
error('Error: "%s" is not a directory.' % outdir)
|
||
|
||
lyxfile = argv[0]
|
||
if not os.path.exists(lyxfile):
|
||
error('File "%s" not found.' % tostr(lyxfile))
|
||
|
||
# Check that it actually is a LyX document
|
||
input = gzopen(lyxfile)
|
||
line = input.readline()
|
||
input.close()
|
||
if not (line and line.startswith(b'#LyX')):
|
||
error('File "%s" is not a LyX document.' % tostr(lyxfile))
|
||
|
||
if makezip:
|
||
import zipfile
|
||
else:
|
||
import tarfile
|
||
|
||
ar_ext = b".tar.gz"
|
||
if makezip:
|
||
ar_ext = b".zip"
|
||
|
||
ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile)).decode('utf-8')
|
||
if outdir:
|
||
ar_name = os.path.join(abspath(outdir), os.path.basename(ar_name))
|
||
|
||
path = os.environ["PATH"].split(os.pathsep)
|
||
|
||
if lyx2lyx is None:
|
||
lyx2lyx = find_lyx2lyx(ourprog, path)
|
||
|
||
# Initialize the list with the specified LyX file and recursively
|
||
# gather all required files (also from child documents).
|
||
incfiles = [abspath(lyxfile)]
|
||
gather_files(lyxfile, incfiles, lyx2lyx)
|
||
|
||
# Find the topmost dir common to all files
|
||
path_sep = os.path.sep.encode('utf-8')
|
||
if len(incfiles) > 1:
|
||
topdir = os.path.commonprefix(incfiles)
|
||
# As os.path.commonprefix() works on a character by character basis,
|
||
# rather than on path elements, we need to remove any trailing bytes.
|
||
topdir = topdir.rpartition(path_sep)[0] + path_sep
|
||
else:
|
||
topdir = os.path.dirname(incfiles[0]) + path_sep
|
||
|
||
# Remove the prefix common to all paths in the list
|
||
i = 0
|
||
while i < len(incfiles):
|
||
incfiles[i] = incfiles[i].replace(topdir, b'', 1)
|
||
i += 1
|
||
|
||
# Remove duplicates and sort the list
|
||
incfiles = list(set(incfiles))
|
||
incfiles.sort()
|
||
|
||
if topdir != '':
|
||
os.chdir(topdir)
|
||
|
||
# Create the archive
|
||
try:
|
||
if makezip:
|
||
zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
|
||
for file in incfiles:
|
||
zip.write(file.decode('utf-8'))
|
||
zip.close()
|
||
else:
|
||
tar = tarfile.open(ar_name, "w:gz")
|
||
for file in incfiles:
|
||
tar.add(file.decode('utf-8'))
|
||
tar.close()
|
||
except:
|
||
error('Failed to create LyX archive "%s"' % ar_name)
|
||
|
||
print('LyX archive "%s" created successfully.' % ar_name)
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
if running_on_windows:
|
||
# This works around <http://bugs.python.org/issue2128> for Python 2.
|
||
# All arguments are retrieved in unicode format and converted to utf-8.
|
||
# In this way, when launched from the command line, lyxpak.py can deal
|
||
# with any non-ascii names. Unfortunately, this is not the case when
|
||
# launched by LyX, because LyX converts the arguments of the converters
|
||
# to the filesystem encoding. On Windows this corresponds to the current
|
||
# code page and not to the UTF-16 encoding used by NTFS, such that they
|
||
# are transliterated if not exactly encodable. As an example, α may
|
||
# become a, β may become ß, and so on. However, this is a problem only
|
||
# if the full path of the LyX document contains an unencodable character
|
||
# as all other paths are extracted from the document in utf-8 format.
|
||
from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
|
||
from ctypes.wintypes import LPWSTR, LPCWSTR
|
||
GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
|
||
CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
|
||
argc = c_int(0)
|
||
argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
|
||
# unicode_argv[0] is the Python interpreter, so skip that.
|
||
argv = [argv_unicode[i].encode('utf-8') for i in range(1, argc.value)]
|
||
# Also skip option arguments to the Python interpreter.
|
||
while len(argv) > 0:
|
||
if not argv[0].startswith(b"-"):
|
||
break
|
||
argv = argv[1:]
|
||
sys.argv = argv
|
||
|
||
main(sys.argv)
|