From e6e3777363bfb4ab11ffe2c4e774e30eb639a4fe Mon Sep 17 00:00:00 2001 From: Juergen Spitzmueller Date: Sat, 7 Jul 2018 15:25:35 +0200 Subject: [PATCH] Add support for bib files encoding File format change. Fixes: #6223 --- development/FORMAT | 7 +++ lib/doc/UserGuide.lyx | 10 +++ lib/lyx2lyx/lyx_2_4.py | 102 ++++++++++++++++++++++++++++++- src/Buffer.cpp | 9 +-- src/Buffer.h | 2 +- src/BufferParams.cpp | 5 ++ src/BufferParams.h | 9 +++ src/frontends/qt4/GuiBibtex.cpp | 25 ++++++++ src/frontends/qt4/ui/BibtexUi.ui | 82 +++++++++++++++++-------- src/insets/InsetBibtex.cpp | 44 +++++++++++-- src/tex2lyx/TODO.txt | 3 + src/version.h | 4 +- 12 files changed, 265 insertions(+), 37 deletions(-) diff --git a/development/FORMAT b/development/FORMAT index ef3d7f87f9..edab9cb82d 100644 --- a/development/FORMAT +++ b/development/FORMAT @@ -7,6 +7,13 @@ changes happened in particular if possible. A good example would be ----------------------- +2018-07-07 Jürgen Spitzmüller + * format incremented to 556: Support for bib file encoding. + - New param "encoding" for CommandInset bibtex + This will be used for biblatex output as bibencoding package option, + for BibTeX, \bibliography will be wrapped into + \bgroup\inputencoding{}...\egroup + 2018-07-01 Jürgen Spitzmüller * format incremented to 555: Support varwidth's V tabular column type. This effectively enables linebreaks, multipars and layout changes in diff --git a/lib/doc/UserGuide.lyx b/lib/doc/UserGuide.lyx index c253e22704..119259dd97 100644 --- a/lib/doc/UserGuide.lyx +++ b/lib/doc/UserGuide.lyx @@ -30183,6 +30183,16 @@ Content \family default drop box you can select whether to include all the entries in the database in the document or just the cited references. + +\change_inserted -712698321 1530969762 + The +\family sans +Encoding +\family default + drop box lets you specify the encoding of the database(s), should this + differ from the encoding of the document. +\change_unchanged + \end_layout \begin_layout Standard diff --git a/lib/lyx2lyx/lyx_2_4.py b/lib/lyx2lyx/lyx_2_4.py index 9b388aa788..c00e630148 100644 --- a/lib/lyx2lyx/lyx_2_4.py +++ b/lib/lyx2lyx/lyx_2_4.py @@ -439,6 +439,104 @@ def revert_vcolumns(document): add_to_preamble(document, ["\\usepackage{varwidth}"]) +def revert_bibencoding(document): + " Revert bibliography encoding " + + # Get cite engine + engine = "basic" + i = find_token(document.header, "\\cite_engine", 0) + if i == -1: + document.warning("Malformed document! Missing \\cite_engine") + else: + engine = get_value(document.header, "\\cite_engine", i) + + # Check if biblatex + biblatex = False + if engine in ["biblatex", "biblatex-natbib"]: + biblatex = True + + # Map lyx to latex encoding names + encodings = { + "utf8" : "utf8", + "utf8x" : "utf8x", + "armscii8" : "armscii8", + "iso8859-1" : "latin1", + "iso8859-2" : "latin2", + "iso8859-3" : "latin3", + "iso8859-4" : "latin4", + "iso8859-5" : "iso88595", + "iso8859-6" : "8859-6", + "iso8859-7" : "iso-8859-7", + "iso8859-8" : "8859-8", + "iso8859-9" : "latin5", + "iso8859-13" : "latin7", + "iso8859-15" : "latin9", + "iso8859-16" : "latin10", + "applemac" : "applemac", + "cp437" : "cp437", + "cp437de" : "cp437de", + "cp850" : "cp850", + "cp852" : "cp852", + "cp855" : "cp855", + "cp858" : "cp858", + "cp862" : "cp862", + "cp865" : "cp865", + "cp866" : "cp866", + "cp1250" : "cp1250", + "cp1251" : "cp1251", + "cp1252" : "cp1252", + "cp1255" : "cp1255", + "cp1256" : "cp1256", + "cp1257" : "cp1257", + "koi8-r" : "koi8-r", + "koi8-u" : "koi8-u", + "pt154" : "pt154", + "utf8-platex" : "utf8", + "ascii" : "ascii" + } + + i = 0 + bibresources = [] + while (True): + i = find_token(document.body, "\\begin_inset CommandInset bibtex", i) + if i == -1: + break + j = find_end_of_inset(document.body, i) + if j == -1: + document.warning("Can't find end of bibtex inset at line %d!!" %(i)) + i += 1 + continue + encoding = get_quoted_value(document.body, "encoding", i, j) + if not encoding: + i += 1 + continue + # remove encoding line + k = find_token(document.body, "encoding", i, j) + if k != -1: + del document.body[k] + # Re-find inset end line + j = find_end_of_inset(document.body, i) + if biblatex: + biblio_options = "" + h = find_token(document.header, "\\biblio_options", 0) + if h != -1: + biblio_options = get_value(document.header, "\\biblio_options", h) + if not "bibencoding" in biblio_options: + document.header[h] += ",bibencoding=%s" % encodings[encoding] + else: + bs = find_token(document.header, "\\biblatex_bibstyle", 0) + if bs == -1: + # this should not happen + document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!") + else: + document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]] + else: + document.body[j+1:j+1] = put_cmd_in_ert("\\egroup") + document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}") + + i = j + 1 + + ## # Conversion hub # @@ -455,10 +553,12 @@ convert = [ [552, []], [553, []], [554, []], - [555, []] + [555, []], + [556, []] ] revert = [ + [555, [revert_bibencoding]], [554, [revert_vcolumns]], [553, [revert_stretchcolumn]], [552, [revert_tuftecite]], diff --git a/src/Buffer.cpp b/src/Buffer.cpp index 3030340dfc..8ca74103a2 100644 --- a/src/Buffer.cpp +++ b/src/Buffer.cpp @@ -2420,7 +2420,8 @@ BiblioInfo const & Buffer::bibInfo() const } -void Buffer::registerBibfiles(FileNamePairList const & bf) const { +void Buffer::registerBibfiles(FileNamePairList const & bf) const +{ // We register the bib files in the master buffer, // if there is one, but also in every single buffer, // in case a child is compiled alone. @@ -2468,17 +2469,17 @@ void Buffer::checkIfBibInfoCacheIsValid() const } -void Buffer::reloadBibInfoCache() const +void Buffer::reloadBibInfoCache(bool const force) const { // use the master's cache Buffer const * const tmp = masterBuffer(); if (tmp != this) { - tmp->reloadBibInfoCache(); + tmp->reloadBibInfoCache(force); return; } checkIfBibInfoCacheIsValid(); - if (d->bibinfo_cache_valid_) + if (d->bibinfo_cache_valid_ && !force) return; d->bibinfo_.clear(); diff --git a/src/Buffer.h b/src/Buffer.h index eb6c1a8767..50d086f287 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -505,7 +505,7 @@ public: /// whether the cache is valid. If so, we do nothing. If not, then we /// reload all the BibTeX info. /// Note that this operates on the master document. - void reloadBibInfoCache() const; + void reloadBibInfoCache(bool const force = false) const; /// \return the bibliography information for this buffer's master, /// or just for it, if it isn't a child. BiblioInfo const & masterBibInfo() const; diff --git a/src/BufferParams.cpp b/src/BufferParams.cpp index 634f99f0b4..867f04536a 100644 --- a/src/BufferParams.cpp +++ b/src/BufferParams.cpp @@ -2352,6 +2352,11 @@ bool BufferParams::writeLaTeX(otexstream & os, LaTeXFeatures & features, opts += delim + "backend=bibtex"; delim = ","; } + if (!bib_encoding.empty() && encodings.fromLyXName(bib_encoding)) { + opts += delim + "bibencoding=" + + encodings.fromLyXName(bib_encoding)->latexName(); + delim = ","; + } if (!biblio_opts.empty()) opts += delim + biblio_opts; if (!opts.empty()) diff --git a/src/BufferParams.h b/src/BufferParams.h index d5e17d1118..0fbc2e707a 100644 --- a/src/BufferParams.h +++ b/src/BufferParams.h @@ -499,6 +499,10 @@ public: std::string biblatex_bibstyle; /// The biblatex citation style std::string biblatex_citestyle; + /// Set the bib file encoding (for biblatex) + void setBibEncoding(std::string const & s) { bib_encoding = s; } + /// Get the bib file encoding (for biblatex) + std::string const & bibEncoding() const { return bib_encoding; } /// options for pdf output PDFOptions & pdfoptions(); @@ -572,6 +576,11 @@ private: CiteEngineType cite_engine_type_; /// the default BibTeX style file for the document std::string biblio_style; + /// The encoding of the bib files, for Biblatex + /// (only one supported currently) + // FIXME: biblatex 3.12 introduces per-file + // encoding options. Update once that's spread. + std::string bib_encoding; /// Split bibliography? bool use_bibtopic; /// diff --git a/src/frontends/qt4/GuiBibtex.cpp b/src/frontends/qt4/GuiBibtex.cpp index 1c3917b94b..3d958c5102 100644 --- a/src/frontends/qt4/GuiBibtex.cpp +++ b/src/frontends/qt4/GuiBibtex.cpp @@ -18,6 +18,7 @@ #include "Buffer.h" #include "BufferParams.h" #include "CiteEnginesList.h" +#include "Encoding.h" #include "FuncRequest.h" #include "LyXRC.h" #include "qt_helpers.h" @@ -85,6 +86,8 @@ GuiBibtex::GuiBibtex(GuiView & lv) this, SLOT(rescanClicked())); connect(biblatexOptsLE, SIGNAL(textChanged(QString)), this, SLOT(change_adaptor())); + connect(bibEncodingCO, SIGNAL(activated(int)), + this, SLOT(change_adaptor())); add_ = new GuiBibtexAddDialog(this); add_bc_.setPolicy(ButtonPolicy::OkCancelPolicy); @@ -123,9 +126,23 @@ GuiBibtex::GuiBibtex(GuiView & lv) bc().addReadOnly(styleCB); bc().addReadOnly(bibtocCB); bc().addReadOnly(addBibPB); + bc().addReadOnly(bibEncodingCO); // Delete/Up/Down are handled with more conditions in // databaseChanged(). + // Always put the default encoding in the first position. + bibEncodingCO->addItem(qt_("Document Encoding"), "default"); + QMap encodinglist; + for (auto const & encvar : encodings) { + if (!encvar.unsafe() && !encvar.guiName().empty()) + encodinglist.insert(qt_(encvar.guiName()), toqstr(encvar.name())); + } + QMap::const_iterator it = encodinglist.constBegin(); + while (it != encodinglist.constEnd()) { + bibEncodingCO->addItem(it.key(), it.value()); + ++it; + } + // Make sure the delete/up/down buttons are disabled if necessary. databaseChanged(); } @@ -350,6 +367,12 @@ void GuiBibtex::updateContents() btprint = from_ascii("btPrintCited"); btPrintCO->setCurrentIndex(btPrintCO->findData(toqstr(btprint))); + docstring encoding = params_["encoding"]; + if (encoding.empty()) + // default + encoding = from_ascii("default"); + bibEncodingCO->setCurrentIndex(bibEncodingCO->findData(toqstr(encoding))); + // Only useful for biblatex biblatexOptsLA->setVisible(biblatex); biblatexOptsLE->setVisible(biblatex); @@ -421,6 +444,8 @@ void GuiBibtex::applyView() params_["biblatexopts"] = qstring_to_ucs4(biblatexOptsLE->text()); params_["btprint"] = qstring_to_ucs4(btPrintCO->itemData(btPrintCO->currentIndex()).toString()); + + params_["encoding"] = qstring_to_ucs4(bibEncodingCO->itemData(bibEncodingCO->currentIndex()).toString()); } diff --git a/src/frontends/qt4/ui/BibtexUi.ui b/src/frontends/qt4/ui/BibtexUi.ui index f72ba6ce29..0cac4098b9 100644 --- a/src/frontends/qt4/ui/BibtexUi.ui +++ b/src/frontends/qt4/ui/BibtexUi.ui @@ -6,8 +6,8 @@ 0 0 - 361 - 397 + 368 + 441 @@ -17,7 +17,7 @@ true - + @@ -39,7 +39,7 @@ - + @@ -82,7 +82,7 @@ - + Qt::Vertical @@ -98,7 +98,7 @@ - + Scan for new databases and styles @@ -108,7 +108,7 @@ - + @@ -127,7 +127,7 @@ - + Choose a style file @@ -143,7 +143,7 @@ - + Choose a style file @@ -156,7 +156,7 @@ - + @@ -175,7 +175,7 @@ - + This bibliography section contains... @@ -197,7 +197,7 @@ - + Qt::Vertical @@ -213,17 +213,7 @@ - - - - Add bibliography to the table of contents - - - Add bibliography to &TOC - - - - + @@ -244,7 +234,7 @@ - + 6 @@ -302,6 +292,50 @@ + + + + + + &Encoding: + + + bibEncodingCO + + + + + + + If your bibliography databases use a different encoding than the LyX document, specify it here + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + + Add bibliography to the table of contents + + + Add bibliography to &TOC + + + diff --git a/src/insets/InsetBibtex.cpp b/src/insets/InsetBibtex.cpp index 0db4555ce0..d2e7284052 100644 --- a/src/insets/InsetBibtex.cpp +++ b/src/insets/InsetBibtex.cpp @@ -26,6 +26,7 @@ #include "FuncRequest.h" #include "FuncStatus.h" #include "LaTeXFeatures.h" +#include "output_latex.h" #include "output_xhtml.h" #include "OutputParams.h" #include "PDFOptions.h" @@ -69,6 +70,7 @@ ParamInfo const & InsetBibtex::findInfo(string const & /* cmdName */) param_info_.add("btprint", ParamInfo::LATEX_OPTIONAL); param_info_.add("bibfiles", ParamInfo::LATEX_REQUIRED); param_info_.add("options", ParamInfo::LYX_INTERNAL); + param_info_.add("encoding", ParamInfo::LYX_INTERNAL); param_info_.add("biblatexopts", ParamInfo::LATEX_OPTIONAL); } return param_info_; @@ -326,6 +328,19 @@ void InsetBibtex::latex(otexstream & os, OutputParams const & runparams) const _("There are spaces in the path to your BibTeX style file.\n" "BibTeX will be unable to find it.")); } + // Encoding + bool encoding_switched = false; + Encoding const * const save_enc = runparams.encoding; + docstring const encoding = getParam("encoding"); + if (!encoding.empty() && encoding != from_ascii("default")) { + Encoding const * const enc = encodings.fromLyXName(to_ascii(encoding)); + if (enc != runparams.encoding) { + os << "\\bgroup"; + switchEncoding(os.os(), buffer().params(), runparams, *enc, true); + runparams.encoding = enc; + encoding_switched = true; + } + } // Handle the bibtopic case if (!db_out.empty() && buffer().masterParams().useBibtopic()) { os << "\\begin{btSect}"; @@ -357,6 +372,13 @@ void InsetBibtex::latex(otexstream & os, OutputParams const & runparams) const } os << "\\bibliography{" << getStringFromVector(db_out) << "}\n"; } + if (encoding_switched){ + // Switch back + switchEncoding(os.os(), buffer().params(), + runparams, *save_enc, true, true); + os << "\\egroup" << breakln; + runparams.encoding = save_enc; + } } } @@ -645,9 +667,9 @@ void InsetBibtex::parseBibTeXFiles(FileNameList & checkedFiles) const // bibtex does. // // Officially bibtex does only support ASCII, but in practice - // you can use the encoding of the main document as long as - // some elements like keys and names are pure ASCII. Therefore - // we convert the file from the buffer encoding. + // you can use any encoding as long as some elements like keys + // and names are pure ASCII. We support specifying an encoding, + // and we convert the file from that (default is buffer encoding). // We don't restrict keys to ASCII in LyX, since our own // InsetBibitem can generate non-ASCII keys, and nonstandard // 8bit clean bibtex forks exist. @@ -665,8 +687,12 @@ void InsetBibtex::parseBibTeXFiles(FileNameList & checkedFiles) const else // record that we check this. checkedFiles.push_back(bibfile); + string encoding = buffer().masterParams().encoding().iconvName(); + string const ienc = to_ascii(params()["encoding"]); + if (!ienc.empty() && ienc != "default" && encodings.fromLyXName(ienc)) + encoding = encodings.fromLyXName(ienc)->iconvName(); ifdocstream ifs(bibfile.toFilesystemEncoding().c_str(), - ios_base::in, buffer().masterParams().encoding().iconvName()); + ios_base::in, encoding); char_type ch; VarMap strings; @@ -892,8 +918,16 @@ void InsetBibtex::validate(LaTeXFeatures & features) const } -void InsetBibtex::updateBuffer(ParIterator const &, UpdateType) { +void InsetBibtex::updateBuffer(ParIterator const &, UpdateType) +{ buffer().registerBibfiles(getBibFiles()); + // record encoding of bib files for biblatex + string const enc = (params()["encoding"] == from_ascii("default")) ? + string() : to_ascii(params()["encoding"]); + if (buffer().params().bibEncoding() != enc) { + buffer().params().setBibEncoding(enc); + buffer().reloadBibInfoCache(true); + } } diff --git a/src/tex2lyx/TODO.txt b/src/tex2lyx/TODO.txt index d93b5fa2c6..b2b540644f 100644 --- a/src/tex2lyx/TODO.txt +++ b/src/tex2lyx/TODO.txt @@ -60,6 +60,9 @@ Format LaTeX feature LyX feature \begin{landscape}...\end{landscape} \begin_inset Flex Landscape with longtable content: 555 V column type (varwidth package) Automatically detected with newlines, paragraph breaks and environment content in cells of rows +556 Bib files encoding + \begin_inset CommandInset bibtex Biblatex: bibencoding= [latex name] + encoding= [lyx name] BibTeX: \bgroup\inputencoding{}...\egroup diff --git a/src/version.h b/src/version.h index ba838b47d0..e8ca153473 100644 --- a/src/version.h +++ b/src/version.h @@ -32,8 +32,8 @@ extern char const * const lyx_version_info; // Do not remove the comment below, so we get merge conflict in // independent branches. Instead add your own. -#define LYX_FORMAT_LYX 555 // spitz: varwidth V columns -#define LYX_FORMAT_TEX2LYX 555 +#define LYX_FORMAT_LYX 556 // spitz: bib encoding +#define LYX_FORMAT_TEX2LYX 556 #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX #ifndef _MSC_VER