diff --git a/src/Buffer.cpp b/src/Buffer.cpp index e95d0edcc8..6c1619151c 100644 --- a/src/Buffer.cpp +++ b/src/Buffer.cpp @@ -56,7 +56,7 @@ #include "PDFOptions.h" #include "Session.h" #include "SpellChecker.h" -#include "xml.h" +#include "Statistics.h" #include "texstream.h" #include "TexRow.h" #include "Text.h" @@ -66,6 +66,7 @@ #include "VCBackend.h" #include "version.h" #include "WordLangTuple.h" +#include "xml.h" #include "insets/InsetBranch.h" #include "insets/InsetInclude.h" @@ -357,10 +358,8 @@ public: /// mutable bool need_update; -private: - int word_count_; - int char_count_; - int blank_count_; + /// + Statistics statistics_; public: /// This is here to force the test to be done whenever parent_buffer @@ -395,22 +394,6 @@ public: parent_buffer->invalidateBibinfoCache(); } - /// compute statistics - /// \p from initial position - /// \p to points to the end position - void updateStatistics(DocIterator & from, DocIterator & to, - bool skipNoOutput = true); - /// statistics accessor functions - int wordCount() const - { - return word_count_; - } - int charCount(bool with_blanks) const - { - return char_count_ - + (with_blanks ? blank_count_ : 0); - } - // Make sure the file monitor monitors the good file. void refreshFileMonitor(); @@ -462,8 +445,7 @@ Buffer::Impl::Impl(Buffer * owner, FileName const & file, bool readonly_, have_bibitems_(false), lyx_clean(true), bak_clean(true), unnamed(false), internal_buffer(false), read_only(readonly_), file_fully_loaded(false), need_format_backup(false), ignore_parent(false), macro_lock(false), - externally_modified_(false), bibinfo_cache_valid_(false), - need_update(false), word_count_(0), char_count_(0), blank_count_(0) + externally_modified_(false), bibinfo_cache_valid_(false), need_update(false) { refreshFileMonitor(); if (!cloned_buffer_) { @@ -5464,83 +5446,9 @@ void Buffer::requestSpellcheck() } -void Buffer::Impl::updateStatistics(DocIterator & from, DocIterator & to, bool skipNoOutput) +Statistics & Buffer::statistics() { - bool inword = false; - word_count_ = 0; - char_count_ = 0; - blank_count_ = 0; - - for (DocIterator dit = from ; dit != to && !dit.atEnd(); ) { - if (!dit.inTexted()) { - dit.forwardPos(); - continue; - } - - Paragraph const & par = dit.paragraph(); - pos_type const pos = dit.pos(); - - // Copied and adapted from isWordSeparator() in Paragraph - if (pos == dit.lastpos()) { - inword = false; - } else { - Inset const * ins = par.getInset(pos); - if (ins && skipNoOutput && !ins->producesOutput()) { - // skip this inset - ++dit.top().pos(); - // stop if end of range was skipped - if (!to.atEnd() && dit >= to) - break; - continue; - } else if (!par.isDeleted(pos)) { - if (par.isWordSeparator(pos)) - inword = false; - else if (!inword) { - ++word_count_; - inword = true; - } - if (ins && ins->isLetter()) { - odocstringstream os; - ins->toString(os); - char_count_ += os.str().length(); - } - else if (ins && ins->isSpace()) - ++blank_count_; - else if (ins) { - pair words = ins->isWords(); - char_count_ += words.first; - word_count_ += words.second; - inword = false; - } - else { - char_type const c = par.getChar(pos); - if (isPrintableNonspace(c)) - ++char_count_; - else if (isSpace(c)) - ++blank_count_; - } - } - } - dit.forwardPos(); - } -} - - -void Buffer::updateStatistics(DocIterator & from, DocIterator & to, bool skipNoOutput) const -{ - d->updateStatistics(from, to, skipNoOutput); -} - - -int Buffer::wordCount() const -{ - return d->wordCount(); -} - - -int Buffer::charCount(bool with_blanks) const -{ - return d->charCount(with_blanks); + return d->statistics_; } diff --git a/src/Buffer.h b/src/Buffer.h index d9e7e325d4..fd4d8bc8e3 100644 --- a/src/Buffer.h +++ b/src/Buffer.h @@ -51,6 +51,7 @@ class otexstream; class ParagraphList; class ParIterator; class ParConstIterator; +class Statistics; class TeXErrors; class TexRow; class TocBackend; @@ -784,15 +785,8 @@ public: std::string includedFilePath(std::string const & name, std::string const & ext = empty_string()) const; - /// compute statistics between \p from and \p to - /// \p from initial position - /// \p to points to the end position - /// \p skipNoOutput if notes etc. should be ignored - void updateStatistics(DocIterator & from, DocIterator & to, - bool skipNoOutput = true) const; - /// statistics accessor functions - int wordCount() const; - int charCount(bool with_blanks) const; + /// Count of words, characters and blanks + Statistics & statistics(); /// bool areChangesPresent() const; diff --git a/src/BufferView.cpp b/src/BufferView.cpp index ce315a6f49..f754186d07 100644 --- a/src/BufferView.cpp +++ b/src/BufferView.cpp @@ -38,6 +38,7 @@ #include "MetricsInfo.h" #include "Paragraph.h" #include "Session.h" +#include "Statistics.h" #include "texstream.h" #include "Text.h" #include "TextMetrics.h" @@ -78,6 +79,7 @@ #include "support/Lexer.h" #include "support/lstrings.h" #include "support/lyxlib.h" +#include "support/pmprof.h" #include "support/types.h" #include @@ -2006,18 +2008,11 @@ void BufferView::dispatch(FuncRequest const & cmd, DispatchResult & dr) } case LFUN_STATISTICS: { - DocIterator from, to; - if (cur.selection()) { - from = cur.selectionBegin(); - to = cur.selectionEnd(); - } else { - from = doc_iterator_begin(&buffer_); - to = doc_iterator_end(&buffer_); - } - buffer_.updateStatistics(from, to); - int const words = buffer_.wordCount(); - int const chars = buffer_.charCount(false); - int const chars_blanks = buffer_.charCount(true); + Statistics & stats = buffer_.statistics(); + stats.update(cur); + int const words = stats.word_count; + int const chars = stats.char_count; + int const chars_blanks = chars + stats.blank_count; docstring message; if (cur.selection()) message = _("Statistics for the selection:"); @@ -2040,8 +2035,8 @@ void BufferView::dispatch(FuncRequest const & cmd, DispatchResult & dr) message += _("One character (no blanks)"); Alert::information(_("Statistics"), message); - } break; + } case LFUN_STATISTICS_REFERENCE_CLAMP: { d->stats_update_trigger_ = true; @@ -2050,14 +2045,11 @@ void BufferView::dispatch(FuncRequest const & cmd, DispatchResult & dr) break; } - DocIterator from, to; - from = doc_iterator_begin(&buffer_); - to = doc_iterator_end(&buffer_); - buffer_.updateStatistics(from, to); - - d->stats_ref_value_w_ = buffer_.wordCount(); - d->stats_ref_value_c_ = buffer_.charCount(true); - d->stats_ref_value_nb_ = buffer_.charCount(false); + Statistics & stats = buffer_.statistics(); + stats.update(cur); + d->stats_ref_value_w_ = stats.word_count; + d->stats_ref_value_c_ = stats.char_count; + stats.blank_count; + d->stats_ref_value_nb_ = stats.char_count; break; } diff --git a/src/Makefile.am b/src/Makefile.am index d6c9a0cb18..fa1161b71a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -173,6 +173,7 @@ SOURCEFILESCORE = \ ServerSocket.cpp \ Session.cpp \ Spacing.cpp \ + Statistics.cpp \ TexRow.cpp \ texstream.cpp \ Text.cpp \ @@ -277,6 +278,7 @@ HEADERFILESCORE = \ Session.h \ Spacing.h \ SpellChecker.h \ + Statistics.h \ TexRow.h \ texstream.h \ Text.h \ diff --git a/src/Statistics.cpp b/src/Statistics.cpp new file mode 100644 index 0000000000..eabae8f399 --- /dev/null +++ b/src/Statistics.cpp @@ -0,0 +1,120 @@ +// -*- C++ -*- +/** + * \file Statistics.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Jean-Marc Lasgouttes + * + * Full author contact details are available in file CREDITS. + */ + +#include + +#include "Statistics.h" + +#include "Paragraph.h" +#include "Text.h" +#include "Cursor.h" + +#include "support/lassert.h" +#include "support/lstrings.h" +#include "support/textutils.h" + + +namespace lyx { + +using namespace support; + + +void Statistics::update(CursorData const & cur) +{ + // reset counts + *this = Statistics(); + if (cur.selection()) { + if (cur.inMathed()) + return; + CursorSlice from, to; + from = cur.selBegin(); + to = cur.selEnd(); + update(from, to); + } else + update(*cur.bottom().text()); +} + + +void Statistics::update(docstring const & s) +{ + // FIXME: use a stripped-down version of the paragraph code. + // This is the original code from InsetCitation::isWords() + char_count += s.size(); + // FIXME: this does not count words properly + word_count += wordCount(s); + // FIXME: spaces are not counted +} + + +void Statistics::update(Text const & text) +{ + for (Paragraph const & par : text.paragraphs()) + update(par); +} + + +void Statistics::update(CursorSlice const & from, CursorSlice & to) +{ + LASSERT(from.text() == to.text(), return); + if (from.idx() == to.idx()) { + if (from.pit() == to.pit()) { + update(from.paragraph(), from.pos(), to.pos()); + } else { + pos_type frompos = from.pos(); + for (pit_type pit = from.pit() ; pit < to.pit() ; ++pit) { + update(from.text()->getPar(pit), frompos); + frompos = 0; + } + update(to.paragraph(), 0, to.pos()); + } + } else + for (idx_type idx = from.idx() ; idx <= to.idx(); ++idx) + update(*from.inset().getText(idx)); +} + + +void Statistics::update(Paragraph const & par, pos_type from, pos_type to) +{ + if (to == -1) + to = par.size(); + + for (pos_type pos = from ; pos < to ; ++pos) { + Inset const * ins = par.isInset(pos) ? par.getInset(pos) : nullptr; + // Stuff that we skip + if (par.isDeleted(pos)) + continue; + if (ins && skip_no_output && !ins->producesOutput()) + continue; + + // words + if (par.isWordSeparator(pos)) + inword = false; + else if (!inword) { + ++word_count; + inword = true; + } + + if (ins) + ins->updateStatistics(*this); + else { + char_type const c = par.getChar(pos); + if (isPrintableNonspace(c)) + ++char_count; + else if (lyx::isSpace(c)) + ++blank_count; + } + } + inword = false; +} + + +} // namespace lyx + diff --git a/src/Statistics.h b/src/Statistics.h new file mode 100644 index 0000000000..7439de06cf --- /dev/null +++ b/src/Statistics.h @@ -0,0 +1,64 @@ +// -*- C++ -*- +/** + * \file Statistics.h + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Jean-Marc Lasgouttes + * + * Full author contact details are available in file CREDITS. + */ + +#ifndef STATISTICS_H +#define STATISTICS_H + +#include "support/strfwd.h" +#include "support/types.h" + +namespace lyx { + +class CursorData; +class CursorSlice; +class Text; +class Paragraph; + +// Class used to compute letters/words statistics on buffer or selection +class Statistics { +public: + // Number of words + int word_count = 0; + // Number of non blank characters + int char_count = 0; + // Number of blank characters + int blank_count = 0; + // Indicate whether parts that are not output should be counted. + bool skip_no_output = true; + + /// Count characters in the whole document, or in the selection if + /// there is one. This is the main entry point. + void update(CursorData const & cur); + /// Count chars and words in this string + void update(docstring const & s); + /// Count chars and words in the paragraphs of \c text + void update(Text const & text); + +private: + + /// Count chars and words between two positions + void update(CursorSlice const & from, CursorSlice & to); + + /** Count chars and words in a paragraph + * \param par: the paragraph + * \param from: starting position + * \param to: end position. If it is equal to -1, then the end is + * the end of the paragraph. + */ + void update(Paragraph const & par, pos_type from = 0, pos_type to = -1); + + // Used in the code to track status + bool inword = false; +}; + +} + +#endif // STATISTICS_H diff --git a/src/Text.cpp b/src/Text.cpp index c61bb8f812..2ec8f4905a 100644 --- a/src/Text.cpp +++ b/src/Text.cpp @@ -52,6 +52,7 @@ #include "Paragraph.h" #include "ParagraphParameters.h" #include "SpellChecker.h" +#include "Statistics.h" #include "TextClass.h" #include "TextMetrics.h" #include "Thesaurus.h" @@ -6328,27 +6329,20 @@ void Text::dispatch(Cursor & cur, FuncRequest & cmd) break; case LFUN_SERVER_GET_STATISTICS: { - DocIterator from, to; - if (cur.selection()) { - from = cur.selectionBegin(); - to = cur.selectionEnd(); - } else { - from = doc_iterator_begin(cur.buffer()); - to = doc_iterator_end(cur.buffer()); - } + Statistics & stats = cur.buffer()->statistics(); + stats.update(cur); - cur.buffer()->updateStatistics(from, to); string const arg0 = cmd.getArg(0); if (arg0 == "words") { - cur.message(convert(cur.buffer()->wordCount())); + cur.message(convert(stats.word_count)); } else if (arg0 == "chars") { - cur.message(convert(cur.buffer()->charCount(false))); + cur.message(convert(stats.char_count)); } else if (arg0 == "chars-space") { - cur.message(convert(cur.buffer()->charCount(true))); + cur.message(convert(stats.char_count + stats.blank_count)); } else { - cur.message(convert(cur.buffer()->wordCount()) + " " - + convert(cur.buffer()->charCount(false)) + " " - + convert(cur.buffer()->charCount(true))); + cur.message(convert(stats.word_count) + " " + + convert(stats.char_count) + " " + + convert(stats.char_count + stats.blank_count)); } break; } diff --git a/src/frontends/qt/GuiView.cpp b/src/frontends/qt/GuiView.cpp index 3b24846852..a6b48b3053 100644 --- a/src/frontends/qt/GuiView.cpp +++ b/src/frontends/qt/GuiView.cpp @@ -60,8 +60,9 @@ #include "LyXRC.h" #include "LyXVC.h" #include "Paragraph.h" -#include "SpellChecker.h" #include "Session.h" +#include "SpellChecker.h" +#include "Statistics.h" #include "TexRow.h" #include "Text.h" #include "Toolbars.h" @@ -1478,31 +1479,21 @@ void GuiView::showStats() if (d.time_to_update > 0) return; - DocIterator from, to; - if (cur.selection()) { - from = cur.selectionBegin(); - to = cur.selectionEnd(); - d.already_in_selection_ = true; - } else { - from = doc_iterator_begin(buf); - to = doc_iterator_end(buf); - d.already_in_selection_ = false; - } - // Don't attempt to calculate stats if // the buffer is busy as this might crash (#12935) + Statistics & statistics = buf->statistics(); if (!busy() && !bv->busy()) - buf->updateStatistics(from, to); + statistics.update(cur); QStringList stats; if (word_count_enabled_) { - int const words = buf->wordCount() - bv->stats_ref_value_w(); + int const words = statistics.word_count - bv->stats_ref_value_w(); if (words == 1) stats << toqstr(bformat(_("%1$d Word"), words)); else stats << toqstr(bformat(_("%1$d Words"), words)); } - int const chars_with_blanks = buf->charCount(true); + int const chars_with_blanks = statistics.char_count + statistics.blank_count; if (char_count_enabled_) { int const chars_with_blanks_disp = chars_with_blanks - bv->stats_ref_value_c(); if (chars_with_blanks == 1) @@ -1511,7 +1502,7 @@ void GuiView::showStats() stats << toqstr(bformat(_("%1$d Characters"), chars_with_blanks_disp)); } if (char_nb_count_enabled_) { - int const chars = buf->charCount(false) - bv->stats_ref_value_nb(); + int const chars = statistics.char_count - bv->stats_ref_value_nb(); if (chars == 1) stats << toqstr(bformat(_("%1$d Character (no Blanks)"), chars)); else diff --git a/src/insets/Inset.cpp b/src/insets/Inset.cpp index b949f9c989..c3efc2b5f2 100644 --- a/src/insets/Inset.cpp +++ b/src/insets/Inset.cpp @@ -30,10 +30,11 @@ #include "InsetLayout.h" #include "MetricsInfo.h" #include "output_xhtml.h" -#include "xml.h" +#include "Statistics.h" #include "Text.h" #include "TextClass.h" #include "TocBackend.h" +#include "xml.h" #include "frontends/Application.h" #include "frontends/Painter.h" @@ -626,6 +627,17 @@ bool Inset::undefined() const } +void Inset::updateStatistics(Statistics & stats) const +{ + if (isLetter()) { + odocstringstream os; + toString(os); + stats.char_count += os.str().length(); + } else if (isSpace()) + ++stats.blank_count; +} + + CtObject Inset::getCtObject(OutputParams const &) const { return CtObject::Normal; diff --git a/src/insets/Inset.h b/src/insets/Inset.h index ea8f00e38c..4340c1f2a0 100644 --- a/src/insets/Inset.h +++ b/src/insets/Inset.h @@ -60,6 +60,7 @@ class MathAtom; class MetricsInfo; class PainterInfo; class ParIterator; +class Statistics; class Text; class TocBackend; class XMLStream; @@ -480,9 +481,8 @@ public: /// is this equivalent to a space (which is BTW different from /// a line separator)? virtual bool isSpace() const { return false; } - /// returns chars, words if the inset is equivalent to such, otherwise - /// (0,0), which should be interpreted as 'false' - virtual std::pair isWords() const { return std::pair(0, 0); } + /// Count words, characters and spaces in inset + virtual void updateStatistics(Statistics & stats) const; /// does this inset try to use all available space (like \\hfill does)? virtual bool isHfill() const { return false; } diff --git a/src/insets/InsetCitation.cpp b/src/insets/InsetCitation.cpp index 87130462e0..8dc71541bb 100644 --- a/src/insets/InsetCitation.cpp +++ b/src/insets/InsetCitation.cpp @@ -29,6 +29,7 @@ #include "output_xhtml.h" #include "output_docbook.h" #include "ParIterator.h" +#include "Statistics.h" #include "texstream.h" #include "TocBackend.h" @@ -791,10 +792,9 @@ void InsetCitation::latex(otexstream & os, OutputParams const & runparams) const } -pair InsetCitation::isWords() const +void InsetCitation::updateStatistics(Statistics & stats) const { - docstring const label = generateLabel(false); - return pair(label.size(), wordCount(label)); + stats.update(generateLabel(false)); } diff --git a/src/insets/InsetCitation.h b/src/insets/InsetCitation.h index b1b27e0291..30e78013dc 100644 --- a/src/insets/InsetCitation.h +++ b/src/insets/InsetCitation.h @@ -104,7 +104,7 @@ public: /// search and open citation source void openCitation(); /// - std::pair isWords() const override; + void updateStatistics(Statistics & stats) const override; private: /// tries to make a pretty label and makes a basic one if not diff --git a/src/insets/InsetHyperlink.cpp b/src/insets/InsetHyperlink.cpp index dcdcbbb8e1..017845622e 100644 --- a/src/insets/InsetHyperlink.cpp +++ b/src/insets/InsetHyperlink.cpp @@ -20,8 +20,9 @@ #include "LyX.h" #include "output_docbook.h" #include "output_xhtml.h" -#include "xml.h" +#include "Statistics.h" #include "texstream.h" +#include "xml.h" #include "support/debug.h" #include "support/docstream.h" @@ -299,10 +300,9 @@ void InsetHyperlink::validate(LaTeXFeatures & features) const } -pair InsetHyperlink::isWords() const +void InsetHyperlink::updateStatistics(Statistics & stats) const { - docstring const label = getParam("name"); - return pair(label.size(), wordCount(label)); + stats.update(getParam("name")); } diff --git a/src/insets/InsetHyperlink.h b/src/insets/InsetHyperlink.h index 36bd249902..cdf1d2e9ca 100644 --- a/src/insets/InsetHyperlink.h +++ b/src/insets/InsetHyperlink.h @@ -55,7 +55,7 @@ public: /// docstring xhtml(XMLStream &, OutputParams const &) const override; /// - std::pair isWords() const override; + void updateStatistics(Statistics & stats) const override; //@} /// \name Static public methods obligated for InsetCommand derived classes diff --git a/src/insets/InsetQuotes.cpp b/src/insets/InsetQuotes.cpp index 8d6caf1e5e..06bace7b25 100644 --- a/src/insets/InsetQuotes.cpp +++ b/src/insets/InsetQuotes.cpp @@ -28,6 +28,7 @@ #include "MetricsInfo.h" #include "Paragraph.h" #include "ParIterator.h" +#include "Statistics.h" #include "texstream.h" #include "xml.h" @@ -1032,7 +1033,7 @@ string InsetQuotes::contextMenuName() const } -pair InsetQuotes::isWords() const +void InsetQuotes::updateStatistics(Statistics & stats) const { int length = 1; // In PassThru, we use straight quotes otherwise we need to check for French @@ -1052,7 +1053,7 @@ pair InsetQuotes::isWords() const } //one or two characters from the statistics perspective - return std::pair(length, 0); + stats.char_count += length; } } // namespace lyx diff --git a/src/insets/InsetQuotes.h b/src/insets/InsetQuotes.h index 13bd8ea124..fdf69c1b13 100644 --- a/src/insets/InsetQuotes.h +++ b/src/insets/InsetQuotes.h @@ -178,7 +178,7 @@ public: /// Returns the current quote type std::string getType() const; /// - std::pair isWords() const override; + void updateStatistics(Statistics & stats) const override; private: /// diff --git a/src/insets/InsetTabular.cpp b/src/insets/InsetTabular.cpp index 74a0060aaf..6f1b470ea7 100644 --- a/src/insets/InsetTabular.cpp +++ b/src/insets/InsetTabular.cpp @@ -45,6 +45,7 @@ #include "output_xhtml.h" #include "Paragraph.h" #include "ParIterator.h" +#include "Statistics.h" #include "TexRow.h" #include "texstream.h" #include "TextClass.h" @@ -5017,6 +5018,13 @@ void InsetTabular::updateBuffer(ParIterator const & it, UpdateType utype, bool c } +void InsetTabular::updateStatistics(Statistics & stats) const +{ + for (idx_type idx = 0 ; idx < nargs(); ++idx) + stats.update(*getText(idx)); +} + + void InsetTabular::addToToc(DocIterator const & cpit, bool output_active, UpdateType utype, TocBackend & backend) const { diff --git a/src/insets/InsetTabular.h b/src/insets/InsetTabular.h index f6939c6eff..8b052c5d73 100644 --- a/src/insets/InsetTabular.h +++ b/src/insets/InsetTabular.h @@ -1113,9 +1113,12 @@ public: Inset * editXY(Cursor & cur, int x, int y) override; /// can we go further down on mouse click? bool descendable(BufferView const &) const override { return true; } + /// Update the counters of this inset and of its contents void updateBuffer(ParIterator const &, UpdateType, bool const deleted = false) override; /// + void updateStatistics(Statistics & stats) const; + /// void addToToc(DocIterator const & di, bool output_active, UpdateType utype, TocBackend & backend) const override; diff --git a/src/insets/InsetText.cpp b/src/insets/InsetText.cpp index ac3bc8e102..8a3109947f 100644 --- a/src/insets/InsetText.cpp +++ b/src/insets/InsetText.cpp @@ -50,6 +50,7 @@ #include "Paragraph.h" #include "ParagraphParameters.h" #include "ParIterator.h" +#include "Statistics.h" #include "TexRow.h" #include "texstream.h" #include "TextClass.h" @@ -1141,6 +1142,12 @@ void InsetText::updateBuffer(ParIterator const & it, UpdateType utype, bool cons } +void InsetText::updateStatistics(Statistics & stats) const +{ + stats.update(text()); +} + + void InsetText::toString(odocstream & os) const { os << text().asString(0, 1, AS_STR_LABEL | AS_STR_INSETS); diff --git a/src/insets/InsetText.h b/src/insets/InsetText.h index 423735a032..f46bf1bbfe 100644 --- a/src/insets/InsetText.h +++ b/src/insets/InsetText.h @@ -177,6 +177,9 @@ public: /// Update the counters of this inset and of its contents void updateBuffer(ParIterator const &, UpdateType, bool const deleted = false) override; + /// + void updateStatistics(Statistics & stats) const override; + /// void setMacrocontextPositionRecursive(DocIterator const & pos); ///