From 5bb22d9498da67b2cc279b9c07271fb4c033ca39 Mon Sep 17 00:00:00 2001 From: Richard Heck Date: Thu, 16 Aug 2007 01:59:20 +0000 Subject: [PATCH] Re-write of the BibTeX representation. The main change is that we now have a structure representing field->value instead of just a single string with all the data. The data structures are defined in src/Biblio_typedefs.h, and the main changes are to the parser code in src/insets/InsetBibtex.cpp. -src/Biblio_typedefs.h Contains typedefs for new representation. Separating them out limits how much gets #include'd by other files, and also resolves a circularity problem with Buffer.h. -src/Biblio.{h,cpp} Signature changes and massive simplifications to routines that report BibTeX data, since we now have an articulate representation. -src/insets/InsetBibtex.{h,cpp} Re-write the parser code so we store a key->value map of the BibTeX data rather than just one long string. This is the main work. -src/frontends/controllers/ControlCitation.{h,cpp} -src/insets/InsetBibitem.{h,cpp} -src/insets/InsetCitation.cpp Adaptations and simplifications. -src/insets/Inset.h -src/Buffer.{h,cpp} -src/insets/InsetInclude.{h,cpp} Signature changes. -src/Makefile.am -development/scons/scons_manifest.py Add src/Biblio_typedefs.h git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@19598 a592a061-630c-0410-9148-cb99ea01b6c8 --- development/scons/scons_manifest.py | 1 + src/Biblio.cpp | 413 ++++++------------ src/Biblio.h | 39 +- src/Biblio_typedefs.h | 51 +++ src/Buffer.cpp | 9 +- src/Buffer.h | 3 +- src/Makefile.am | 1 + src/frontends/controllers/ControlCitation.cpp | 23 +- src/frontends/controllers/ControlCitation.h | 2 +- src/insets/Inset.h | 4 +- src/insets/InsetBibitem.cpp | 12 +- src/insets/InsetBibitem.h | 4 +- src/insets/InsetBibtex.cpp | 116 ++--- src/insets/InsetBibtex.h | 7 +- src/insets/InsetCitation.cpp | 25 +- src/insets/InsetInclude.cpp | 4 +- src/insets/InsetInclude.h | 8 +- 17 files changed, 306 insertions(+), 416 deletions(-) create mode 100644 src/Biblio_typedefs.h diff --git a/development/scons/scons_manifest.py b/development/scons/scons_manifest.py index b45ac7ef61..739713fb37 100644 --- a/development/scons/scons_manifest.py +++ b/development/scons/scons_manifest.py @@ -34,6 +34,7 @@ src_header_files = Split(''' ASpell_local.h Author.h Biblio.h + Biblio_typedefs.h Bidi.h Box.h BranchList.h diff --git a/src/Biblio.cpp b/src/Biblio.cpp index c3c53998ee..e314afb6d6 100644 --- a/src/Biblio.cpp +++ b/src/Biblio.cpp @@ -49,6 +49,22 @@ using support::trim; namespace biblio { + +BibTeXInfo::BibTeXInfo(): isBibTeX(true) +{} + + +BibTeXInfo::BibTeXInfo(bool isBibTeX): isBibTeX(isBibTeX) +{}; + + +bool BibTeXInfo::hasKey(docstring const & key) +{ + const_iterator it = find(key); + return it == end(); +} + + namespace { vector const init_possible_cite_commands() @@ -109,8 +125,6 @@ namespace { } // namespace anon -const docstring TheBibliographyRef(from_ascii("TheBibliographyRef")); - string const asValidLatexCommand(string const & input, CiteEngine const engine) { @@ -182,89 +196,58 @@ docstring const familyName(docstring const & name) } -docstring const getAbbreviatedAuthor(InfoMap const & map, string const & key) +docstring const getAbbreviatedAuthor(BibKeyList const & map, string const & key) { BOOST_ASSERT(!map.empty()); - - InfoMap::const_iterator it = map.find(key); + BibKeyList::const_iterator it = map.find(key); if (it == map.end()) return docstring(); - docstring const & data = it->second; - -// Is the entry a BibTeX one or one from lyx-layout "bibliography"? - docstring::size_type const pos = data.find(TheBibliographyRef); - if (pos != docstring::npos) { - if (pos <= 2) { - return docstring(); - } - - docstring const opt = trim(data.substr(0, pos - 1)); - if (opt.empty()) - return docstring(); - - docstring authors; - split(opt, authors, '('); - return authors; - } - - docstring author = parseBibTeX(data, "author"); - - if (author.empty()) - author = parseBibTeX(data, "editor"); - + BibTeXInfo const & data = it->second; + + if (!data.isBibTeX) + return docstring(); + + docstring author = getValueForKey(data, "author"); + if (author.empty()) { - author = parseBibTeX(data, "key"); - if (author.empty()) - // FIXME UNICODE - return from_utf8(key); - return author; + author = getValueForKey(data, "editor"); + if (author.empty()) { + author = getValueForKey(data, "key"); + if (author.empty()) + // FIXME UNICODE + return from_utf8(key); + else + return author; //this is the key + } } + //OK, we've got some names. Let's format them. + //try to split the author list on " and " vector const authors = getVectorFromString(author, from_ascii(" and ")); - if (authors.empty()) - return author; - + if (authors.size() == 2) return bformat(_("%1$s and %2$s"), - familyName(authors[0]), familyName(authors[1])); - - if (authors.size() > 2) + familyName(authors[0]), familyName(authors[1])); + else if (authors.size() > 2) return bformat(_("%1$s et al."), familyName(authors[0])); - - return familyName(authors[0]); + else + return familyName(authors[0]); } -docstring const getYear(InfoMap const & map, string const & key) +docstring const getYear(BibKeyList const & map, string const & key) { BOOST_ASSERT(!map.empty()); - - InfoMap::const_iterator it = map.find(key); + + BibKeyList::const_iterator it = map.find(key); if (it == map.end()) return docstring(); - docstring const & data = it->second; - -// Is the entry a BibTeX one or one from lyx-layout "bibliography"? - docstring::size_type const pos = data.find(TheBibliographyRef); - if (pos != docstring::npos) { - if (pos <= 2) { - return docstring(); - } - - docstring const opt = - trim(data.substr(0, pos - 1)); - if (opt.empty()) - return docstring(); - - docstring authors; - docstring const tmp = split(opt, authors, '('); - docstring year; - split(tmp, year, ')'); - return year; - - } - - docstring year = parseBibTeX(data, "year"); + BibTeXInfo const & data = it->second; + + if (!data.isBibTeX) + return docstring(); + + docstring year = getValueForKey(data, "year"); if (year.empty()) year = _("No year"); @@ -284,11 +267,11 @@ class compareNoCase: public std::binary_function } // namespace anon -vector const getKeys(InfoMap const & map) +vector const getKeys(BibKeyList const & map) { vector bibkeys; - InfoMap::const_iterator it = map.begin(); - InfoMap::const_iterator end = map.end(); + BibKeyList::const_iterator it = map.begin(); + BibKeyList::const_iterator end = map.end(); for (; it != end; ++it) { bibkeys.push_back(it->first); } @@ -298,72 +281,67 @@ vector const getKeys(InfoMap const & map) } -docstring const getInfo(InfoMap const & map, string const & key) +docstring const getInfo(BibKeyList const & map, string const & key) { BOOST_ASSERT(!map.empty()); - - InfoMap::const_iterator it = map.find(key); + + BibKeyList::const_iterator it = map.find(key); if (it == map.end()) return docstring(); - docstring const & data = it->second; - -// is the entry a BibTeX one or one from lyx-layout "bibliography"? - docstring::size_type const pos = data.find(TheBibliographyRef); - if (pos != docstring::npos) { - docstring::size_type const pos2 = pos + TheBibliographyRef.size(); - docstring const info = trim(data.substr(pos2)); - return info; + BibTeXInfo const & data = it->second; + + if (!data.isBibTeX) { + BibTeXInfo::const_iterator it3 = data.find(from_ascii("ref")); + return it3->second; } - -// Search for all possible "required" keys - docstring author = parseBibTeX(data, "author"); + + //FIXME + //This could be made alot better using the biblio::TheEntryType + //field to customize the output based upon entry type. + + //Search for all possible "required" fields + docstring author = getValueForKey(data, "author"); if (author.empty()) - author = parseBibTeX(data, "editor"); - - docstring year = parseBibTeX(data, "year"); - docstring title = parseBibTeX(data, "title"); - docstring booktitle = parseBibTeX(data, "booktitle"); - docstring chapter = parseBibTeX(data, "chapter"); - docstring number = parseBibTeX(data, "number"); - docstring volume = parseBibTeX(data, "volume"); - docstring pages = parseBibTeX(data, "pages"); - docstring annote = parseBibTeX(data, "annote"); - docstring media = parseBibTeX(data, "journal"); - if (media.empty()) - media = parseBibTeX(data, "publisher"); - if (media.empty()) - media = parseBibTeX(data, "school"); - if (media.empty()) - media = parseBibTeX(data, "institution"); - - odocstringstream result; - if (!author.empty()) - result << author << ", "; - if (!title.empty()) - result << title; - if (!booktitle.empty()) - result << ", in " << booktitle; - if (!chapter.empty()) - result << ", Ch. " << chapter; - if (!media.empty()) - result << ", " << media; - if (!volume.empty()) - result << ", vol. " << volume; - if (!number.empty()) - result << ", no. " << number; - if (!pages.empty()) - result << ", pp. " << pages; - if (!year.empty()) - result << ", " << year; - if (!annote.empty()) - result << "\n\n" << annote; - - docstring const result_str = rtrim(result.str()); - if (!result_str.empty()) - return result_str; - -// This should never happen (or at least be very unusual!) - return data; + author = getValueForKey(data, "editor"); + + docstring year = getValueForKey(data, "year"); + docstring title = getValueForKey(data, "title"); + docstring docLoc = getValueForKey(data, "pages"); + if (docLoc.empty()) { + docLoc = getValueForKey(data, "chapter"); + if (!docLoc.empty()) + docLoc = from_ascii("Ch. ") + docLoc; + } else + docLoc = from_ascii("pp. ") + docLoc; + docstring media = getValueForKey(data, "journal"); + if (media.empty()) { + media = getValueForKey(data, "publisher"); + if (media.empty()) { + media = getValueForKey(data, "school"); + if (media.empty()) + media = getValueForKey(data, "institution"); + } + } + docstring volume = getValueForKey(data, "volume"); + + odocstringstream result; + if (!author.empty()) + result << author << ", "; + if (!title.empty()) + result << title; + if (!media.empty()) + result << ", " << media; + if (!year.empty()) + result << ", " << year; + if (!docLoc.empty()) + result << ", " << docLoc; + + docstring const result_str = rtrim(result.str()); + if (!result_str.empty()) + return result_str; + + // This should never happen (or at least be very unusual!) + return docstring(); } @@ -400,37 +378,33 @@ class RegexMatch : public std::unary_function public: // re and icase are used to construct an instance of boost::RegEx. // if icase is true, then matching is insensitive to case - RegexMatch(InfoMap const & m, string const & re, bool icase) + RegexMatch(BibKeyList const & m, string const & re, bool icase) : map_(m), regex_(re, icase) {} bool operator()(string const & key) const { -// the data searched is the key + its associated BibTeX/biblio -// fields - string data = key; - InfoMap::const_iterator info = map_.find(key); - if (info != map_.end()) - // FIXME UNICODE - data += ' ' + to_utf8(info->second); - -// Attempts to find a match for the current RE -// somewhere in data. + //FIXME This should search the monolith. + BibKeyList::const_iterator info = map_.find(key); + if (info == map_.end()) + return false; + + BibTeXInfo const kvm = info->second; + string const data = key + ' ' + to_utf8(kvm.allData); + return boost::regex_search(data, regex_); } private: - InfoMap const map_; + BibKeyList const map_; mutable boost::regex regex_; }; } // namespace anon -vector::const_iterator searchKeys(InfoMap const & theMap, +vector::const_iterator searchKeys(BibKeyList const & theMap, vector const & keys, string const & search_expr, vector::const_iterator start, - Search type, - Direction dir, - bool caseSensitive) + Search type, Direction dir, bool caseSensitive) { // Preliminary checks if (start < keys.begin() || start >= keys.end()) @@ -471,144 +445,17 @@ vector::const_iterator searchKeys(InfoMap const & theMap, } -docstring const parseBibTeX(docstring data, string const & findkey) +docstring const getValueForKey(BibTeXInfo const & data, string const & findkey) { - // at first we delete all characters right of '%' and - // replace tabs through a space and remove leading spaces - // we read the data line by line so that the \n are - // ignored, too. - docstring data_; - int Entries = 0; - docstring dummy = token(data,'\n', Entries); - while (!dummy.empty()) { - // no tabs - dummy = subst(dummy, '\t', ' '); - // no leading spaces - dummy = ltrim(dummy); - // ignore lines with a beginning '%' or ignore all right of % - docstring::size_type const idx = - dummy.empty() ? docstring::npos : dummy.find('%'); - if (idx != docstring::npos) - // Check if this is really a comment or just "\%" - if (idx == 0 || dummy[idx - 1] != '\\') - dummy.erase(idx, docstring::npos); - else - // This is "\%", so just erase the '\' - dummy.erase(idx - 1, 1); - // do we have a new token or a new line of - // the same one? In the first case we ignore - // the \n and in the second we replace it - // with a space - if (!dummy.empty()) { - if (!contains(dummy, '=')) - data_ += ' ' + dummy; - else - data_ += dummy; - } - dummy = token(data, '\n', ++Entries); - } //end while - - // replace double commas with "" for easy scanning - data = subst(data_, from_ascii(",,"), from_ascii("\"\"")); - - // unlikely! - if (data.empty()) + docstring key = from_ascii(findkey); + BibTeXInfo::const_iterator it = data.find(key); + if (it == data.end()) return docstring(); - - // now get only the important line of the bibtex entry. - // all entries are devided by ',' except the last one. - data += ','; - // now we have same behaviour for all entries because the last one - // is "blah ... }" - Entries = 0; - bool found = false; - // parsing of title and booktitle is different from the - // others, because booktitle contains title - do { - dummy = token(data, ',', Entries++); - if (!dummy.empty()) { - found = contains(ascii_lowercase(dummy), from_ascii(findkey)); - if (findkey == "title" && - contains(ascii_lowercase(dummy), from_ascii("booktitle"))) - found = false; - } - } while (!found && !dummy.empty()); - if (dummy.empty()) - // no such keyword - return docstring(); - - // we are not sure, if we get all, because "key= "blah, blah" is - // allowed. - // Therefore we read all until the next "=" character, which follows a - // new keyword - docstring keyvalue = dummy; - dummy = token(data, ',', Entries++); - while (!contains(dummy, '=') && !dummy.empty()) { - keyvalue += ',' + dummy; - dummy = token(data, ',', Entries++); - } - - // replace double "" with originals ,, (two commas) - // leaving us with the all-important line - data = subst(keyvalue, from_ascii("\"\""), from_ascii(",,")); - - // Clean-up. - // 1. Spaces - data = rtrim(data); - // 2. if there is no opening '{' then a closing '{' is probably cruft. - if (!contains(data, '{')) - data = rtrim(data, "}"); - // happens, when last keyword - docstring::size_type const idx = - !data.empty() ? data.find('=') : docstring::npos; - - if (idx == docstring::npos) - return docstring(); - - data = trim(data.substr(idx)); - - // a valid entry? - if (data.length() < 2 || data[0] != '=') - return docstring(); - else { - // delete '=' and the following spaces - data = ltrim(data, " ="); - if (data.length() < 2) { - // not long enough to find delimiters - return data; - } else { - docstring::size_type keypos = 1; - char_type enclosing; - if (data[0] == '{') { - enclosing = '}'; - } else if (data[0] == '"') { - enclosing = '"'; - } else { - // no {} and no "", pure data but with a - // possible ',' at the end - return rtrim(data, ","); - } - docstring tmp = data.substr(keypos); - while (tmp.find('{') != docstring::npos && - tmp.find('}') != docstring::npos && - tmp.find('{') < tmp.find('}') && - tmp.find('{') < tmp.find(enclosing)) { - keypos += tmp.find('{') + 1; - tmp = data.substr(keypos); - keypos += tmp.find('}') + 1; - tmp = data.substr(keypos); - } - if (tmp.find(enclosing) == docstring::npos) - return data; - else { - keypos += tmp.find(enclosing); - return data.substr(1, keypos - 1); - } - } - } + //FIXME ?? return it->second?? + BibTeXInfo & data2 = const_cast(data); + return data2[key]; } - namespace { @@ -723,7 +570,7 @@ vector const getCiteStyles(CiteEngine const engine) vector const getNumericalStrings(string const & key, - InfoMap const & map, vector const & styles) + BibKeyList const & map, vector const & styles) { if (map.empty()) return vector(); @@ -777,7 +624,7 @@ vector const vector const getAuthorYearStrings(string const & key, - InfoMap const & map, vector const & styles) + BibKeyList const & map, vector const & styles) { if (map.empty()) return vector(); @@ -835,7 +682,7 @@ vector const void fillWithBibKeys(Buffer const * const buf, - vector > & keys) + BibKeyList & keys) { /// if this is a child document and the parent is already loaded /// use the parent's list instead [ale990412] diff --git a/src/Biblio.h b/src/Biblio.h index a6fe6c6acd..4e8e39f13b 100644 --- a/src/Biblio.h +++ b/src/Biblio.h @@ -13,8 +13,8 @@ #ifndef BIBLIO_H #define BIBLIO_H +#include "Biblio_typedefs.h" #include "Buffer.h" -#include "support/docstring.h" #include @@ -22,8 +22,6 @@ namespace lyx { namespace biblio { - extern const docstring TheBibliographyRef; - enum CiteEngine { ENGINE_BASIC, ENGINE_NATBIB_AUTHORYEAR, @@ -59,8 +57,7 @@ namespace biblio { /** Fills keys with BibTeX information derived from the various * in this document or its master document. */ - void fillWithBibKeys(Buffer const * const buf, - std::vector > & keys); + void fillWithBibKeys(Buffer const * const buf, BibKeyList & keys); /** Each citation engine recognizes only a subset of all possible * citation commands. Given a latex command \c input, this function @@ -69,49 +66,43 @@ namespace biblio { std::string const asValidLatexCommand(std::string const & input, CiteEngine const engine); -/// First entry is the bibliography key, second the data - typedef std::map InfoMap; - /// Returns a vector of bibliography keys - std::vector const getKeys(InfoMap const &); + std::vector const getKeys(BibKeyList const &); /** Returns the BibTeX data associated with a given key. Empty if no info exists. */ - docstring const getInfo(InfoMap const &, std::string const & key); + docstring const getInfo(BibKeyList const &, std::string const & key); /// return the year from the bibtex data record - docstring const getYear(InfoMap const & map, std::string const & key); + docstring const getYear(BibKeyList const & map, std::string const & key); /// return the short form of an authorlist - docstring const getAbbreviatedAuthor(InfoMap const & map, std::string const & key); + docstring const getAbbreviatedAuthor(BibKeyList const & map, std::string const & key); -// return only the family name +/// return only the family name docstring const familyName(docstring const & name); /** Search a BibTeX info field for the given key and return the associated field. */ - docstring const parseBibTeX(docstring data, std::string const & findkey); + docstring const getValueForKey(BibTeXInfo const & data, std::string const & findkey); /** Returns an iterator to the first key that meets the search criterion, or end() if unsuccessful. User supplies : - the InfoMap of bibkeys info, + the BibKeyList of bibliography info, the vector of keys to be searched, the search criterion, an iterator defining the starting point of the search, an enum defining a Simple or Regex search, an enum defining the search direction. */ - std::vector::const_iterator - searchKeys(InfoMap const & map, + searchKeys(BibKeyList const & map, std::vector const & keys_to_search, docstring const & search_expression, std::vector::const_iterator start, - Search, - Direction, - bool caseSensitive=false); + Search, Direction, bool caseSensitive=false); class CitationStyle { @@ -145,12 +136,12 @@ namespace biblio { User supplies : the key, - the InfoMap of bibkeys info, + the BibKeyList of bibkeys info, the available citation styles */ std::vector const getNumericalStrings(std::string const & key, - InfoMap const & map, + BibKeyList const & map, std::vector const & styles); /** @@ -162,12 +153,12 @@ namespace biblio { User supplies : the key, - the InfoMap of bibkeys info, + the BibKeyList of bibkeys info, the available citation styles */ std::vector const getAuthorYearStrings(std::string const & key, - InfoMap const & map, + BibKeyList const & map, std::vector const & styles); } // namespace biblio diff --git a/src/Biblio_typedefs.h b/src/Biblio_typedefs.h new file mode 100644 index 0000000000..18d345a475 --- /dev/null +++ b/src/Biblio_typedefs.h @@ -0,0 +1,51 @@ +// -*- C++ -*- +/** + * \file Biblio_typedef.h + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Richard Heck + * + * Full author contact details are available in file CREDITS. + */ + +#ifndef BIBLIO_TYPEDEFS_H +#define BIBLIO_TYPEDEFS_H + +#include "support/docstring.h" +#include + +namespace lyx { +namespace biblio { + +/// Class to represent information about a BibTeX or +/// bibliography entry. +/// The keys are BibTeX fields, and the values are the +/// associated field values. +/// \param isBibTex false if this is from an InsetBibitem +/// \param allData the entire BibTeX entry, more or less +/// \param entryType the BibTeX entry type +class BibTeXInfo : public std::map { + public: + BibTeXInfo(); + BibTeXInfo(bool isBibTeX); + bool hasKey(docstring const & key); + bool isBibTeX; + docstring allData; + docstring entryType; +}; + +/* +class BibKeyList : public std::set { + public: + std::set keys; +} + +*/ + +/// First entry is the bibliography key, second the data +typedef std::map BibKeyList; + +} +} +#endif diff --git a/src/Buffer.cpp b/src/Buffer.cpp index ae0dccaf02..ed969f8303 100644 --- a/src/Buffer.cpp +++ b/src/Buffer.cpp @@ -13,6 +13,7 @@ #include "Buffer.h" #include "Author.h" +#include "Biblio.h" #include "BranchList.h" #include "buffer_funcs.h" #include "BufferList.h" @@ -1359,7 +1360,7 @@ void Buffer::getLabelList(vector & list) const // This is also a buffer property (ale) -void Buffer::fillWithBibKeys(vector > & keys) +void Buffer::fillWithBibKeys(biblio::BibKeyList & keys) const { biblio::fillWithBibKeys(this, keys); @@ -1730,10 +1731,10 @@ void Buffer::changeRefsIfUnique(docstring const & from, docstring const & to, vector labels; if (code == Inset::CITE_CODE) { - vector > keys; + biblio::BibKeyList keys; fillWithBibKeys(keys); - vector >::const_iterator bit = keys.begin(); - vector >::const_iterator bend = keys.end(); + biblio::BibKeyList::const_iterator bit = keys.begin(); + biblio::BibKeyList::const_iterator bend = keys.end(); for (; bit != bend; ++bit) // FIXME UNICODE diff --git a/src/Buffer.h b/src/Buffer.h index 218f472744..b611fd0b31 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -12,6 +12,7 @@ #ifndef BUFFER_H #define BUFFER_H +#include "Biblio_typedefs.h" #include "DocIterator.h" #include "support/FileName.h" @@ -302,7 +303,7 @@ public: void validate(LaTeXFeatures &) const; /// return all bibkeys from buffer and its childs - void fillWithBibKeys(std::vector > & keys) const; + void fillWithBibKeys(biblio::BibKeyList & keys) const; /// Update the cache with all bibfiles in use (including bibfiles /// of loaded child documents). void updateBibfilesCache(); diff --git a/src/Makefile.am b/src/Makefile.am index 7daedf1db3..511ff8afbe 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -78,6 +78,7 @@ endif liblyxcore_la_SOURCES = \ Author.cpp \ Author.h \ + Biblio_typedefs.h \ Biblio.h \ Biblio.cpp \ Bidi.cpp \ diff --git a/src/frontends/controllers/ControlCitation.cpp b/src/frontends/controllers/ControlCitation.cpp index 8a3a7134ce..bd716a95dc 100644 --- a/src/frontends/controllers/ControlCitation.cpp +++ b/src/frontends/controllers/ControlCitation.cpp @@ -48,12 +48,8 @@ bool ControlCitation::initialiseParams(string const & data) bool use_styles = engine != biblio::ENGINE_BASIC; - vector > blist; - kernel().buffer().fillWithBibKeys(blist); - bibkeysInfo_.clear(); - for (size_t i = 0; i < blist.size(); ++i) - bibkeysInfo_[blist[i].first] = blist[i].second; - + kernel().buffer().fillWithBibKeys(bibkeysInfo_); + if (citeStyles_.empty()) citeStyles_ = biblio::getCiteStyles(engine); else { @@ -137,23 +133,20 @@ vector ControlCitation::searchKeys( // it is treated as a simple string by boost::regex. expr = escape_special_chars(expr); - boost::regex reg_exp(to_utf8(expr), case_sensitive? + boost::regex reg_exp(to_utf8(expr), case_sensitive ? boost::regex_constants::normal : boost::regex_constants::icase); vector::const_iterator it = keys_to_search.begin(); vector::const_iterator end = keys_to_search.end(); for (; it != end; ++it ) { - biblio::InfoMap::const_iterator info = bibkeysInfo_.find(*it); + biblio::BibKeyList::const_iterator info = bibkeysInfo_.find(*it); if (info == bibkeysInfo_.end()) continue; - - string data = *it; - // FIXME UNICODE - data += ' ' + to_utf8(info->second); - + + biblio::BibTeXInfo const kvm = info->second; + string const data = *it + ' ' + to_utf8(kvm.allData); + try { - // Attempts to find a match for the current RE - // somewhere in data. if (boost::regex_search(data, reg_exp)) foundKeys.push_back(*it); } diff --git a/src/frontends/controllers/ControlCitation.h b/src/frontends/controllers/ControlCitation.h index 4bf16fde4e..93219dc535 100644 --- a/src/frontends/controllers/ControlCitation.h +++ b/src/frontends/controllers/ControlCitation.h @@ -62,7 +62,7 @@ public: } private: /// The info associated with each key - biblio::InfoMap bibkeysInfo_; + biblio::BibKeyList bibkeysInfo_; /// static std::vector citeStyles_; diff --git a/src/insets/Inset.h b/src/insets/Inset.h index bae6669ee6..438dfb54d6 100644 --- a/src/insets/Inset.h +++ b/src/insets/Inset.h @@ -15,6 +15,7 @@ #ifndef INSETBASE_H #define INSETBASE_H +#include "Biblio_typedefs.h" #include "Changes.h" #include "Dimension.h" @@ -440,8 +441,7 @@ public: virtual void addToToc(TocList &, Buffer const &, ParConstIterator const &) const {} /// Fill keys with BibTeX information virtual void fillWithBibKeys(Buffer const &, - std::vector > &, - InsetIterator const &) const { return; } + biblio::BibKeyList &, InsetIterator const &) const { return; } /// Update the counters of this inset and of its contents virtual void updateLabels(Buffer const &, ParIterator const &) {} diff --git a/src/insets/InsetBibitem.cpp b/src/insets/InsetBibitem.cpp index 166bcdd5a8..8c9410b2b1 100644 --- a/src/insets/InsetBibitem.cpp +++ b/src/insets/InsetBibitem.cpp @@ -187,16 +187,16 @@ docstring const bibitemWidest(Buffer const & buffer) void InsetBibitem::fillWithBibKeys(Buffer const & buf, - std::vector > & keys, - InsetIterator const & it) const + biblio::BibKeyList & keys, InsetIterator const & it) const { string const key = to_utf8(getParam("key")); - docstring const label = getParam("label"); + biblio::BibTeXInfo keyvalmap; + keyvalmap[from_ascii("label")] = getParam("label"); DocIterator doc_it(it); doc_it.forwardPos(); - docstring const ref = doc_it.paragraph().asString(buf, false); - docstring const info = label + biblio::TheBibliographyRef + ref; - keys.push_back(std::pair(key, info)); + keyvalmap [from_ascii("ref")] = doc_it.paragraph().asString(buf, false); + keyvalmap.isBibTeX = false; + keys[key] = keyvalmap; } } // namespace lyx diff --git a/src/insets/InsetBibitem.h b/src/insets/InsetBibitem.h index 98db70e68d..68edfca092 100644 --- a/src/insets/InsetBibitem.h +++ b/src/insets/InsetBibitem.h @@ -14,6 +14,7 @@ #include "InsetCommand.h" +#include "Biblio_typedefs.h" namespace lyx { @@ -45,8 +46,7 @@ public: int plaintext(Buffer const &, odocstream &, OutputParams const &) const; /// virtual void fillWithBibKeys(Buffer const &, - std::vector > &, - InsetIterator const &) const; + biblio::BibKeyList &, InsetIterator const &) const; protected: /// diff --git a/src/insets/InsetBibtex.cpp b/src/insets/InsetBibtex.cpp index 7ba0a2814b..4c612bd495 100644 --- a/src/insets/InsetBibtex.cpp +++ b/src/insets/InsetBibtex.cpp @@ -4,6 +4,7 @@ * Licence details can be found in the file COPYING. * * \author Alejandro Aguilar Sierra + * \author Richard Heck (BibTeX parser improvements) * * Full author contact details are available in file CREDITS. */ @@ -414,14 +415,14 @@ namespace { // read value bool legalChar = true; while (ifs && !isSpace(ch) && - delimChars.find(ch) == docstring::npos && - (legalChar = illegalChars.find(ch) == docstring::npos) - ) { - if (chCase == makeLowerCase) { + delimChars.find(ch) == docstring::npos && + (legalChar = (illegalChars.find(ch) == docstring::npos)) + ) + { + if (chCase == makeLowerCase) val += lowercase(ch); - } else { + else val += ch; - } ifs.get(ch); } @@ -478,17 +479,40 @@ namespace { return false; } else if (ch == '"' || ch == '{') { + // set end delimiter + char_type delim = ch == '"' ? '"': '}'; - // read delimited text - set end delimiter - char_type delim = ch == '"'? '"': '}'; - - // inside this delimited text braces must match. - // Thus we can have a closing delimiter only - // when nestLevel == 0 + //Skip whitespace + do { + ifs.get(ch); + } while (ifs && isSpace(ch)); + + if (!ifs) + return false; + + //We now have the first non-whitespace character + //We'll collapse adjacent whitespace. + bool lastWasWhiteSpace = false; + + // inside this delimited text braces must match. + // Thus we can have a closing delimiter only + // when nestLevel == 0 int nestLevel = 0; - - ifs.get(ch); + while (ifs && (nestLevel > 0 || ch != delim)) { + if (isSpace(ch)) { + lastWasWhiteSpace = true; + ifs.get(ch); + continue; + } + //We output the space only after we stop getting + //whitespace so as not to output any whitespace + //at the end of the value. + if (lastWasWhiteSpace) { + lastWasWhiteSpace = false; + val += ' '; + } + val += ch; // update nesting level @@ -556,8 +580,7 @@ namespace { // This method returns a comma separated list of Bibtex entries void InsetBibtex::fillWithBibKeys(Buffer const & buffer, - std::vector > & keys, - InsetIterator const & /*di*/) const + biblio::BibKeyList & keys, InsetIterator const & /*di*/) const { vector const files = getFiles(buffer); for (vector::const_iterator it = files.begin(); @@ -573,15 +596,6 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer, // - it accepts more characters in keys or value names than // bibtex does. // - // TODOS: - // - the entries are split into name = value pairs by the - // parser. These have to be merged again because of the - // way lyx treats the entries ( pair<...>(...) ). The citation - // mechanism in lyx should be changed such that it can use - // the split entries. - // - messages on parsing errors can be generated. - // - // Officially bibtex does only support ASCII, but in practice // you can use the encoding of the main document as long as // some elements like keys and names are pure ASCII. Therefore @@ -589,9 +603,10 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer, // We don't restrict keys to ASCII in LyX, since our own // InsetBibitem can generate non-ASCII keys, and nonstandard // 8bit clean bibtex forks exist. + idocfstream ifs(it->toFilesystemEncoding().c_str(), - std::ios_base::in, - buffer.params().encoding().iconvName()); + std::ios_base::in, + buffer.params().encoding().iconvName()); char_type ch; VarMap strings; @@ -660,25 +675,31 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer, } else { - // Citation entry. Read the key and all name = value pairs + // Citation entry. Try to read the key. docstring key; - docstring fields; - docstring name; - docstring value; - docstring commaNewline; if (!readTypeOrKey(key, ifs, from_ascii(","), from_ascii("}"), keepCase) || !ifs) continue; - // now we have a key, so we will add an entry - // (even if it's empty, as bibtex does) + ///////////////////////////////////////////// + // now we have a key, so we will add an entry + // (even if it's empty, as bibtex does) // - // all items must be separated by a comma. If - // it is missing the scanning of this entry is - // stopped and the next is searched. + // we now read the field = value pairs. + // all items must be separated by a comma. If + // it is missing the scanning of this entry is + // stopped and the next is searched. + docstring fields; + docstring name; + docstring value; + docstring commaNewline; + docstring data; + biblio::BibTeXInfo keyvalmap; + keyvalmap.entryType = entryType; + bool readNext = removeWSAndComma(ifs); - + while (ifs && readNext) { // read field name @@ -699,27 +720,18 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer, if (!readValue(value, ifs, strings)) break; - // append field to the total entry string. - // - // TODO: Here is where the fields can be put in - // a more intelligent structure that preserves - // the already known parts. - fields += commaNewline; - fields += name + from_ascii(" = {") + value + '}'; - - if (!commaNewline.length()) - commaNewline = from_ascii(",\n"); + keyvalmap[name] = value; + data += "\n\n" + value; readNext = removeWSAndComma(ifs); } // add the new entry - keys.push_back(pair( - to_utf8(key), fields)); + keyvalmap.allData = data; + keyvalmap.isBibTeX = true; + keys[to_utf8(key)] = keyvalmap; } - } //< searching '@' - } //< for loop over files } diff --git a/src/insets/InsetBibtex.h b/src/insets/InsetBibtex.h index 82defd4fd7..fcbf0b716c 100644 --- a/src/insets/InsetBibtex.h +++ b/src/insets/InsetBibtex.h @@ -12,9 +12,9 @@ #ifndef INSET_BIBTEX_H #define INSET_BIBTEX_H - -#include +#include #include "InsetCommand.h" +#include "Biblio_typedefs.h" #include "support/FileName.h" @@ -39,8 +39,7 @@ public: int latex(Buffer const &, odocstream &, OutputParams const &) const; /// virtual void fillWithBibKeys(Buffer const &, - std::vector > &, - InsetIterator const &) const; + biblio::BibKeyList &, InsetIterator const &) const; /// std::vector const getFiles(Buffer const &) const; /// diff --git a/src/insets/InsetCitation.cpp b/src/insets/InsetCitation.cpp index f0812e5b8f..6541e3ceb6 100644 --- a/src/insets/InsetCitation.cpp +++ b/src/insets/InsetCitation.cpp @@ -65,13 +65,13 @@ docstring const getNatbibLabel(Buffer const & buffer, return docstring(); // Cache the labels - typedef std::map CachedMap; + typedef std::map CachedMap; static CachedMap cached_keys; // and cache the timestamp of the bibliography files. static std::map bibfileStatus; - biblio::InfoMap infomap; + biblio::BibKeyList keylist; vector const & bibfilesCache = buffer.getBibfilesCache(); // compare the cached timestamps with the actual ones. @@ -97,22 +97,13 @@ docstring const getNatbibLabel(Buffer const & buffer, // build the keylist only if the bibfiles have been changed if (cached_keys[&buffer].empty() || bibfileStatus.empty() || changed) { - typedef vector > InfoType; - InfoType bibkeys; - buffer.fillWithBibKeys(bibkeys); - - InfoType::const_iterator bit = bibkeys.begin(); - InfoType::const_iterator bend = bibkeys.end(); - - for (; bit != bend; ++bit) - infomap[bit->first] = bit->second; - - cached_keys[&buffer] = infomap; + buffer.fillWithBibKeys(keylist); + cached_keys[&buffer] = keylist; } else // use the cached keys - infomap = cached_keys[&buffer]; + keylist = cached_keys[&buffer]; - if (infomap.empty()) + if (keylist.empty()) return docstring(); // the natbib citation-styles @@ -175,8 +166,8 @@ docstring const getNatbibLabel(Buffer const & buffer, vector::const_iterator end = keys.end(); for (; it != end; ++it) { // get the bibdata corresponding to the key - docstring const author(biblio::getAbbreviatedAuthor(infomap, *it)); - docstring const year(biblio::getYear(infomap, *it)); + docstring const author(biblio::getAbbreviatedAuthor(keylist, *it)); + docstring const year(biblio::getYear(keylist, *it)); // Something isn't right. Fail safely. if (author.empty() || year.empty()) diff --git a/src/insets/InsetInclude.cpp b/src/insets/InsetInclude.cpp index 6037a956d3..08461aadc6 100644 --- a/src/insets/InsetInclude.cpp +++ b/src/insets/InsetInclude.cpp @@ -727,12 +727,12 @@ void InsetInclude::getLabelList(Buffer const & buffer, void InsetInclude::fillWithBibKeys(Buffer const & buffer, - std::vector > & keys, - InsetIterator const & /*di*/) const + biblio::BibKeyList & keys, InsetIterator const & /*di*/) const { if (loadIfNeeded(buffer, params_)) { string const included_file = includedFilename(buffer, params_).absFilename(); Buffer * tmp = theBufferList().getBuffer(included_file); + //FIXME This is kind of a dirty hack and should be made reasonable. tmp->setParentName(""); tmp->fillWithBibKeys(keys); tmp->setParentName(parentFilename(buffer)); diff --git a/src/insets/InsetInclude.h b/src/insets/InsetInclude.h index 2b71890a83..dbdaa340f3 100644 --- a/src/insets/InsetInclude.h +++ b/src/insets/InsetInclude.h @@ -12,6 +12,7 @@ #ifndef INSET_INCLUDE_H #define INSET_INCLUDE_H +#include "Biblio_typedefs.h" #include "Inset.h" #include "InsetCommandParams.h" #include "RenderButton.h" @@ -58,10 +59,11 @@ public: /** Fills \c keys * \param buffer the Buffer containing this inset. * \param keys the list of bibkeys in the child buffer. + * \param it not used here */ - virtual void fillWithBibKeys(Buffer const &, - std::vector > &, - InsetIterator const & /*di*/) const; + virtual void fillWithBibKeys(Buffer const & buffer, + biblio::BibKeyList & keys, InsetIterator const & it) const; + /** Update the cache with all bibfiles in use of the child buffer * (including bibfiles of grandchild documents). * Does nothing if the child document is not loaded to prevent