/** * \file MathExtern.cpp * This file is part of LyX, the document processor. * Licence details can be found in the file COPYING. * * \author André Pönitz * * Full author contact details are available in file CREDITS. */ // This file contains most of the magic that extracts "context // information" from the unstructered layout-oriented stuff in // MathData. #include #include "MathExtern.h" #include "InsetMathAMSArray.h" #include "InsetMathArray.h" #include "InsetMathChar.h" #include "InsetMathDelim.h" #include "InsetMathDiff.h" #include "InsetMathExFunc.h" #include "InsetMathExInt.h" #include "InsetMathFont.h" #include "InsetMathFrac.h" #include "InsetMathLim.h" #include "InsetMathMatrix.h" #include "InsetMathNumber.h" #include "InsetMathScript.h" #include "InsetMathString.h" #include "InsetMathSymbol.h" #include "MathData.h" #include "MathParser.h" #include "MathStream.h" #include "Encoding.h" #include "support/debug.h" #include "support/docstream.h" #include "support/FileName.h" #include "support/filetools.h" #include "support/gettext.h" #include "support/lstrings.h" #include "support/TempFile.h" #include "support/textutils.h" #include "support/unique_ptr.h" #include #include #include #include using namespace std; using namespace lyx::support; namespace lyx { namespace { enum ExternalMath { HTML, MAPLE, MAXIMA, MATHEMATICA, MATHML, OCTAVE }; static char const * function_names[] = { "arccos", "arcsin", "arctan", "arg", "bmod", "cos", "cosh", "cot", "coth", "csc", "deg", "det", "dim", "exp", "gcd", "hom", "inf", "ker", "lg", "lim", "liminf", "limsup", "ln", "log", "max", "min", "sec", "sin", "sinh", "sup", "tan", "tanh", "Pr", nullptr }; static size_t const npos = lyx::docstring::npos; // define a function for tests typedef bool TestItemFunc(MathAtom const &); // define a function for replacing subexpressions typedef MathAtom ReplaceArgumentFunc(const MathData & ar); // try to extract a super/subscript // modify iterator position to point behind the thing bool extractScript(MathData & ar, MathData::iterator & pos, MathData::iterator last, bool superscript) { // nothing to get here if (pos == last) return false; // is this a scriptinset? if (!(*pos)->asScriptInset()) return false; // do we want superscripts only? if (superscript && !(*pos)->asScriptInset()->hasUp()) return false; // it is a scriptinset, use it. ar.push_back(*pos); ++pos; return true; } // try to extract an "argument" to some function. // returns position behind the argument MathData::iterator extractArgument(MathData & ar, MathData::iterator pos, MathData::iterator last, ExternalMath kind, bool function = false) { // nothing to get here if (pos == last) return pos; // something delimited _is_ an argument if ((*pos)->asDelimInset()) { // leave out delimiters if this is a function argument // unless we are doing MathML, in which case we do want // the delimiters if (function && kind != MATHML && kind != HTML) { MathData const & arg = (*pos)->asDelimInset()->cell(0); MathData::const_iterator cur = arg.begin(); MathData::const_iterator end = arg.end(); while (cur != end) ar.push_back(*cur++); } else ar.push_back(*pos); ++pos; if (pos == last) return pos; // if there's one, get following superscript only if this // isn't a function argument if (!function) extractScript(ar, pos, last, true); return pos; } // always take the first thing, no matter what it is ar.push_back(*pos); // go ahead if possible ++pos; if (pos == last) return pos; // if the next item is a super/subscript, it most certainly belongs // to the thing we have extractScript(ar, pos, last, false); if (pos == last) return pos; // but it might be more than that. // FIXME: not implemented //for (MathData::iterator it = pos + 1; it != last; ++it) { // // always take the first thing, no matter // if (it == pos) { // ar.push_back(*it); // continue; // } //} return pos; } // returns sequence of char with same code starting at it up to end // it might be less, though... docstring charSequence (MathData::const_iterator it, MathData::const_iterator end) { docstring s; for (; it != end && (*it)->asCharInset(); ++it) s += (*it)->getChar(); return s; } void extractStrings(MathData & ar) { //lyxerr << "\nStrings from: " << ar << endl; for (size_t i = 0; i < ar.size(); ++i) { if (!ar[i]->asCharInset()) continue; docstring s = charSequence(ar.begin() + i, ar.end()); ar[i] = MathAtom(new InsetMathString(s)); ar.erase(i + 1, i + s.size()); } //lyxerr << "\nStrings to: " << ar << endl; } void extractMatrices(MathData & ar) { //lyxerr << "\nMatrices from: " << ar << endl; // first pass for explicitly delimited stuff for (size_t i = 0; i < ar.size(); ++i) { InsetMathDelim const * const inset = ar[i]->asDelimInset(); if (!inset) continue; MathData const & arr = inset->cell(0); if (arr.size() != 1) continue; if (!arr.front()->asGridInset()) continue; ar[i] = MathAtom(new InsetMathMatrix(*(arr.front()->asGridInset()), inset->left_, inset->right_)); } // second pass for AMS "pmatrix" etc for (size_t i = 0; i < ar.size(); ++i) { InsetMathAMSArray const * const inset = ar[i]->asAMSArrayInset(); if (inset) { string left = inset->name_left(); if (left == "Vert") left = "["; string right = inset->name_right(); if (right == "Vert") right = "]"; ar[i] = MathAtom(new InsetMathMatrix(*inset, from_ascii(left), from_ascii(right))); } } //lyxerr << "\nMatrices to: " << ar << endl; } // convert this inset somehow to a string bool extractString(MathAtom const & at, docstring & str) { if (at->getChar()) { str = docstring(1, at->getChar()); return true; } if (at->asStringInset()) { str = at->asStringInset()->str(); return true; } return false; } // is this a known function? bool isKnownFunction(docstring const & str) { for (int i = 0; function_names[i]; ++i) { if (str == function_names[i]) return true; } return false; } // extract a function name from this inset bool extractFunctionName(MathAtom const & at, docstring & str) { if (at->asSymbolInset()) { str = at->asSymbolInset()->name(); return isKnownFunction(str); } if (at->asUnknownInset()) { // assume it is well known... str = at->name(); return true; } if (at->asFontInset() && at->name() == "mathrm") { // assume it is well known... MathData const & ar = at->asFontInset()->cell(0); str = charSequence(ar.begin(), ar.end()); return ar.size() == str.size(); } return false; } bool testString(MathAtom const & at, docstring const & str) { docstring s; return extractString(at, s) && str == s; } bool testString(MathAtom const & at, char const * const str) { return testString(at, from_ascii(str)); } // search end of nested sequence MathData::iterator endNestSearch( MathData::iterator it, MathData::iterator last, TestItemFunc testOpen, TestItemFunc testClose ) { for (int level = 0; it != last; ++it) { if (testOpen(*it)) ++level; if (testClose(*it)) --level; if (level == 0) break; } return it; } // replace nested sequences by a real Insets void replaceNested( MathData & ar, TestItemFunc testOpen, TestItemFunc testClose, ReplaceArgumentFunc replaceArg) { Buffer * buf = ar.buffer(); // use indices rather than iterators for the loop because we are going // to modify the array. for (size_t i = 0; i < ar.size(); ++i) { // check whether this is the begin of the sequence if (!testOpen(ar[i])) continue; // search end of sequence MathData::iterator it = ar.begin() + i; MathData::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose); if (jt == ar.end()) continue; // replace the original stuff by the new inset ar[i] = replaceArg(MathData(buf, it + 1, jt)); ar.erase(it + 1, jt + 1); } } // // split scripts into separate super- and subscript insets. sub goes in // front of super... // void splitScripts(MathData & ar) { Buffer * buf = ar.buffer(); //lyxerr << "\nScripts from: " << ar << endl; for (size_t i = 0; i < ar.size(); ++i) { InsetMathScript const * script = ar[i]->asScriptInset(); // is this a script inset and do we also have a superscript? if (!script || !script->hasUp()) continue; // we must have a nucleus if we only have a superscript if (!script->hasDown() && script->nuc().empty()) continue; if (script->nuc().size() == 1) { // leave alone sums and integrals InsetMathSymbol const * sym = script->nuc().front()->asSymbolInset(); if (sym && (sym->name() == "sum" || sym->name() == "int")) continue; } // create extra script inset and move superscript over InsetMathScript * p = ar[i].nucleus()->asScriptInset(); auto q = make_unique(buf, true); swap(q->up(), p->up()); p->removeScript(true); // if we don't have a subscript, get rid of the ScriptInset if (!script->hasDown()) { MathData arg(p->nuc()); MathData::const_iterator it = arg.begin(); MathData::const_iterator et = arg.end(); ar.erase(i); while (it != et) ar.insert(i++, *it++); } else ++i; // insert new inset behind ar.insert(i, MathAtom(q.release())); } //lyxerr << "\nScripts to: " << ar << endl; } // // extract exp(...) // void extractExps(MathData & ar) { Buffer * buf = ar.buffer(); //lyxerr << "\nExps from: " << ar << endl; for (size_t i = 0; i + 1 < ar.size(); ++i) { // is this 'e'? if (ar[i]->getChar() != 'e') continue; // we need an exponent but no subscript InsetMathScript const * sup = ar[i + 1]->asScriptInset(); if (!sup || sup->hasDown()) continue; // create a proper exp-inset as replacement ar[i] = MathAtom(new InsetMathExFunc(buf, from_ascii("exp"), sup->cell(1))); ar.erase(i + 1); } //lyxerr << "\nExps to: " << ar << endl; } // // extract det(...) from |matrix| // void extractDets(MathData & ar) { Buffer * buf = ar.buffer(); //lyxerr << "\ndet from: " << ar << endl; for (MathData::iterator it = ar.begin(); it != ar.end(); ++it) { InsetMathDelim const * del = (*it)->asDelimInset(); if (!del) continue; if (!del->isAbs()) continue; *it = MathAtom(new InsetMathExFunc(buf, from_ascii("det"), del->cell(0))); } //lyxerr << "\ndet to: " << ar << endl; } // // search numbers // bool isDigitOrSimilar(char_type c) { return ('0' <= c && c <= '9') || c == '.'; } // returns sequence of digits docstring digitSequence (MathData::const_iterator it, MathData::const_iterator end) { docstring s; for (; it != end && (*it)->asCharInset(); ++it) { if (!isDigitOrSimilar((*it)->getChar())) break; s += (*it)->getChar(); } return s; } void extractNumbers(MathData & ar) { //lyxerr << "\nNumbers from: " << ar << endl; for (size_t i = 0; i < ar.size(); ++i) { if (!ar[i]->asCharInset()) continue; if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar())) continue; docstring s = digitSequence(ar.begin() + i, ar.end()); ar[i] = MathAtom(new InsetMathNumber(s)); ar.erase(i + 1, i + s.size()); } //lyxerr << "\nNumbers to: " << ar << endl; } // // search delimiters // bool testOpenParen(MathAtom const & at) { return testString(at, "("); } bool testCloseParen(MathAtom const & at) { return testString(at, ")"); } MathAtom replaceParenDelims(const MathData & ar) { return MathAtom(new InsetMathDelim(const_cast(ar.buffer()), from_ascii("("), from_ascii(")"), ar)); } bool testOpenBracket(MathAtom const & at) { return testString(at, "["); } bool testCloseBracket(MathAtom const & at) { return testString(at, "]"); } MathAtom replaceBracketDelims(const MathData & ar) { return MathAtom(new InsetMathDelim(const_cast(ar.buffer()), from_ascii("["), from_ascii("]"), ar)); } // replace '('...')' and '['...']' sequences by a real InsetMathDelim void extractDelims(MathData & ar) { //lyxerr << "\nDelims from: " << ar << endl; replaceNested(ar, testOpenParen, testCloseParen, replaceParenDelims); replaceNested(ar, testOpenBracket, testCloseBracket, replaceBracketDelims); //lyxerr << "\nDelims to: " << ar << endl; } // // search well-known functions // // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real InsetMathExFunc // assume 'extractDelims' ran before void extractFunctions(MathData & ar, ExternalMath kind) { // FIXME From what I can see, this is quite broken right now, for reasons // I will note below. (RGH) // we need at least two items... if (ar.size() < 2) return; Buffer * buf = ar.buffer(); //lyxerr << "\nFunctions from: " << ar << endl; for (size_t i = 0; i + 1 < ar.size(); ++i) { MathData::iterator it = ar.begin() + i; MathData::iterator jt = it + 1; docstring name; // is it a function? // it certainly is if it is well known... // FIXME This will never give us anything. When we get here, *it will // never point at a string, but only at a character. I.e., if we are // working on "sin(x)", then we are seeing: // [char s mathalpha][char i mathalpha][char n mathalpha][delim ( ) [char x mathalpha]] // and of course we will not find the function name "sin" in there, but // rather "n(x)". // // It appears that we original ran extractStrings() before we ran // extractFunctions(), but Andre changed this at f200be55, I think // because this messed up what he was trying to do with "dx" in the // context of integrals. // // This could be fixed by looking at a charSequence instead of just at // the various characters, one by one. But I am not sure I understand // exactly what we are trying to do here. And it involves a lot of // guessing. if (!extractFunctionName(*it, name)) { // is this a user defined function? // probably not, if it doesn't have a name. if (!extractString(*it, name)) continue; // it is not if it has no argument if (jt == ar.end()) continue; // guess so, if this is followed by // a DelimInset with a single item in the cell InsetMathDelim const * del = (*jt)->asDelimInset(); if (!del || del->cell(0).size() != 1) continue; // fall through into main branch } // do we have an exponent like in // 'sin' '^2' 'x' -> 'sin(x)' '^2' MathData exp; extractScript(exp, jt, ar.end(), true); // create a proper inset as replacement auto p = lyx::make_unique(buf, name); // jt points to the "argument". Get hold of this. MathData::iterator st = extractArgument(p->cell(0), jt, ar.end(), kind, true); // replace the function name by a real function inset *it = MathAtom(p.release()); // remove the source of the argument from the array ar.erase(it + 1, st); // re-insert exponent ar.insert(i + 1, exp); //lyxerr << "\nFunctions to: " << ar << endl; } } // // search integrals // bool testSymbol(MathAtom const & at, docstring const & name) { return at->asSymbolInset() && at->asSymbolInset()->name() == name; } bool testSymbol(MathAtom const & at, char const * const name) { return at->asSymbolInset() && at->asSymbolInset()->name() == from_ascii(name); } bool testIntSymbol(MathAtom const & at) { return testSymbol(at, from_ascii("int")); } bool testIntegral(MathAtom const & at) { return testIntSymbol(at) || ( at->asScriptInset() && !at->asScriptInset()->nuc().empty() && testIntSymbol(at->asScriptInset()->nuc().back()) ); } bool testIntDiff(MathAtom const & at) { return testString(at, "d"); } // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real InsetMathExInt // assume 'extractDelims' ran before void extractIntegrals(MathData & ar, ExternalMath kind) { // we need at least three items... if (ar.size() < 3) return; Buffer * buf = ar.buffer(); //lyxerr << "\nIntegrals from: " << ar << endl; for (size_t i = 0; i + 1 < ar.size(); ++i) { MathData::iterator it = ar.begin() + i; // search 'd' MathData::iterator jt = endNestSearch(it, ar.end(), testIntegral, testIntDiff); // something sensible found? if (jt == ar.end()) continue; // is this a integral name? if (!testIntegral(*it)) continue; // core is part from behind the scripts to the 'd' auto p = lyx::make_unique(buf, from_ascii("int")); // handle scripts if available if (!testIntSymbol(*it)) { p->cell(2) = (*it)->asScriptInset()->down(); p->cell(3) = (*it)->asScriptInset()->up(); } p->cell(0) = MathData(buf, it + 1, jt); // use the "thing" behind the 'd' as differential MathData::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end(), kind); // remove used parts ar.erase(it + 1, tt); *it = MathAtom(p.release()); } //lyxerr << "\nIntegrals to: " << ar << endl; } bool testTermDelimiter(MathAtom const & at) { return testString(at, "+") || testString(at, "-"); } // try to extract a "term", i.e., something delimited by '+' or '-'. // returns position behind the term MathData::iterator extractTerm(MathData & ar, MathData::iterator pos, MathData::iterator last) { while (pos != last && !testTermDelimiter(*pos)) { ar.push_back(*pos); ++pos; } return pos; } // // search sums // bool testEqualSign(MathAtom const & at) { return testString(at, "="); } bool testSumSymbol(MathAtom const & p) { return testSymbol(p, from_ascii("sum")); } bool testSum(MathAtom const & at) { return testSumSymbol(at) || ( at->asScriptInset() && !at->asScriptInset()->nuc().empty() && testSumSymbol(at->asScriptInset()->nuc().back()) ); } // replace '\sum' ['_^'] f(x) sequences by a real InsetMathExInt // assume 'extractDelims' ran before void extractSums(MathData & ar) { // we need at least two items... if (ar.size() < 2) return; Buffer * buf = ar.buffer(); //lyxerr << "\nSums from: " << ar << endl; for (size_t i = 0; i + 1 < ar.size(); ++i) { MathData::iterator it = ar.begin() + i; // is this a sum name? if (!testSum(ar[i])) continue; // create a proper inset as replacement auto p = lyx::make_unique(buf, from_ascii("sum")); // collect lower bound and summation index InsetMathScript const * sub = ar[i]->asScriptInset(); if (sub && sub->hasDown()) { // try to figure out the summation index from the subscript MathData const & md = sub->down(); MathData::const_iterator xt = find_if(md.begin(), md.end(), &testEqualSign); if (xt != md.end()) { // we found a '=', use everything in front of that as index, // and everything behind as lower index p->cell(1) = MathData(buf, md.begin(), xt); p->cell(2) = MathData(buf, xt + 1, md.end()); } else { // use everything as summation index, don't use scripts. p->cell(1) = md; } } // collect upper bound if (sub && sub->hasUp()) p->cell(3) = sub->up(); // use something behind the script as core MathData::iterator tt = extractTerm(p->cell(0), it + 1, ar.end()); // cleanup ar.erase(it + 1, tt); *it = MathAtom(p.release()); } //lyxerr << "\nSums to: " << ar << endl; } // // search differential stuff // // tests for 'd' or '\partial' bool testDiffItem(MathAtom const & at) { if (testString(at, "d") || testSymbol(at, "partial")) return true; // we may have d^n .../d and splitScripts() has not yet seen it InsetMathScript const * sup = at->asScriptInset(); if (sup && !sup->hasDown() && sup->hasUp() && sup->nuc().size() == 1) { MathAtom const & ma = sup->nuc().front(); return testString(ma, "d") || testSymbol(ma, "partial"); } return false; } bool testDiffArray(MathData const & ar) { return !ar.empty() && testDiffItem(ar.front()); } bool testDiffFrac(MathAtom const & at) { return at->asFracInset() && testDiffArray(at->asFracInset()->cell(0)) && testDiffArray(at->asFracInset()->cell(1)); } void extractDiff(MathData & ar) { Buffer * buf = ar.buffer(); //lyxerr << "\nDiffs from: " << ar << endl; for (size_t i = 0; i < ar.size(); ++i) { MathData::iterator it = ar.begin() + i; // is this a "differential fraction"? if (!testDiffFrac(*it)) continue; InsetMathFrac const * f = (*it)->asFracInset(); if (!f) { lyxerr << "should not happen" << endl; continue; } // create a proper diff inset auto diff = lyx::make_unique(buf); // collect function, let jt point behind last used item MathData::iterator jt = it + 1; //int n = 1; MathData numer(f->cell(0)); splitScripts(numer); if (numer.size() > 1 && numer[1]->asScriptInset()) { // this is something like d^n f(x) / d... or d^n / d... // FIXME //n = 1; if (numer.size() > 2) diff->cell(0) = MathData(buf, numer.begin() + 2, numer.end()); else jt = extractTerm(diff->cell(0), jt, ar.end()); } else { // simply d f(x) / d... or d/d... if (numer.size() > 1) diff->cell(0) = MathData(buf, numer.begin() + 1, numer.end()); else jt = extractTerm(diff->cell(0), jt, ar.end()); } // collect denominator parts MathData denom(f->cell(1)); splitScripts(denom); for (MathData::iterator dt = denom.begin(); dt != denom.end();) { // find the next 'd' MathData::iterator et = find_if(dt + 1, denom.end(), &testDiffItem); // point before this MathData::iterator st = et - 1; InsetMathScript const * script = (*st)->asScriptInset(); if (script && script->hasUp()) { // things like d.../dx^n int mult = 1; if (extractNumber(script->up(), mult)) { //lyxerr << "mult: " << mult << endl; if (mult < 0 || mult > 1000) { lyxerr << "Cannot differentiate less than 0 or more than 1000 times !" << endl; continue; } for (int ii = 0; ii < mult; ++ii) diff->addDer(MathData(buf, dt + 1, st)); } } else { // just d.../dx diff->addDer(MathData(buf, dt + 1, et)); } dt = et; } // cleanup ar.erase(it + 1, jt); *it = MathAtom(diff.release()); } //lyxerr << "\nDiffs to: " << ar << endl; } // // search limits // bool testRightArrow(MathAtom const & at) { return testSymbol(at, "to") || testSymbol(at, "rightarrow"); } // replace '\lim_{x->x0} f(x)' sequences by a real InsetMathLim // assume 'extractDelims' ran before void extractLims(MathData & ar) { Buffer * buf = ar.buffer(); //lyxerr << "\nLimits from: " << ar << endl; for (size_t i = 0; i < ar.size(); ++i) { MathData::iterator it = ar.begin() + i; // must be a script inset with a subscript (without superscript) InsetMathScript const * sub = (*it)->asScriptInset(); if (!sub || !sub->hasDown() || sub->hasUp() || sub->nuc().size() != 1) continue; // is this a limit function? if (!testSymbol(sub->nuc().front(), "lim")) continue; // subscript must contain a -> symbol MathData const & s = sub->down(); MathData::const_iterator st = find_if(s.begin(), s.end(), &testRightArrow); if (st == s.end()) continue; // the -> splits the subscript int x and x0 MathData x = MathData(buf, s.begin(), st); MathData x0 = MathData(buf, st + 1, s.end()); // use something behind the script as core MathData f; MathData::iterator tt = extractTerm(f, it + 1, ar.end()); // cleanup ar.erase(it + 1, tt); // create a proper inset as replacement *it = MathAtom(new InsetMathLim(buf, f, x, x0)); } //lyxerr << "\nLimits to: " << ar << endl; } // // combine searches // void extractStructure(MathData & ar, ExternalMath kind) { //lyxerr << "\nStructure from: " << ar << endl; if (kind != MATHML && kind != HTML) splitScripts(ar); extractDelims(ar); extractIntegrals(ar, kind); if (kind != MATHML && kind != HTML) extractSums(ar); extractNumbers(ar); extractMatrices(ar); if (kind != MATHML && kind != HTML) { extractFunctions(ar, kind); extractDets(ar); extractDiff(ar); extractExps(ar); extractLims(ar); extractStrings(ar); } //lyxerr << "\nStructure to: " << ar << endl; } namespace { string captureOutput(string const & cmd, string const & data) { // In order to avoid parsing problems with command interpreters // we pass input data through a file // Since the CAS is supposed to read the temp file we need // to unlock it on windows (bug 10262). unique_ptr tempfile(new TempFile("casinput")); tempfile->setAutoRemove(false); FileName const cas_tmpfile = tempfile->name(); tempfile.reset(); if (cas_tmpfile.empty()) { lyxerr << "Warning: cannot create temporary file." << endl; return string(); } ofstream os(cas_tmpfile.toFilesystemEncoding().c_str()); os << data << endl; os.close(); string command = cmd + " < " + quoteName(cas_tmpfile.toFilesystemEncoding()); lyxerr << "calling: " << cmd << "\ninput: '" << data << "'" << endl; cmd_ret const ret = runCommand(command); cas_tmpfile.removeFile(); return ret.result; } size_t get_matching_brace(string const & str, size_t i) { int count = 1; size_t n = str.size(); while (i < n) { i = str.find_first_of("{}", i+1); if (i == npos) return i; if (str[i] == '{') ++count; else --count; if (count == 0) return i; } return npos; } size_t get_matching_brace_back(string const & str, size_t i) { int count = 1; while (i > 0) { i = str.find_last_of("{}", i-1); if (i == npos) return i; if (str[i] == '}') ++count; else --count; if (count == 0) return i; } return npos; } MathData pipeThroughMaxima(docstring const &, MathData const & ar) { odocstringstream os; MaximaStream ms(os); ms << ar; docstring expr = os.str(); docstring const header = from_ascii("simpsum:true;"); string out; for (int i = 0; i < 100; ++i) { // at most 100 attempts // try to fix missing '*' the hard way // // > echo "2x;" | maxima // ... // (C1) Incorrect syntax: x is not an infix operator // 2x; // ^ // lyxerr << "checking expr: '" << to_utf8(expr) << "'" << endl; docstring full = header + "tex(" + expr + ");"; out = captureOutput("maxima", to_utf8(full)); // leave loop if expression syntax is probably ok if (out.find("Incorrect syntax") == npos) break; // search line with "Incorrect syntax" istringstream is(out); string line; while (is) { getline(is, line); if (line.find("Incorrect syntax") != npos) break; } // 2nd next line is the one with caret getline(is, line); getline(is, line); size_t pos = line.find('^'); lyxerr << "found caret at pos: '" << pos << "'" << endl; if (pos == npos || pos < 4) break; // caret position not found pos -= 4; // skip the "tex(" part if (expr[pos] == '*') break; // two '*' in a row are definitely bad expr.insert(pos, from_ascii("*")); } vector tmp = getVectorFromString(out, "$$"); if (tmp.size() < 2) return MathData(); out = subst(subst(tmp[1], "\\>", string()), "{\\it ", "\\mathit{"); lyxerr << "output: '" << out << "'" << endl; // Ugly code that tries to make the result prettier size_t i = out.find("\\mathchoice"); while (i != npos) { size_t j = get_matching_brace(out, i + 12); size_t k = get_matching_brace(out, j + 1); k = get_matching_brace(out, k + 1); k = get_matching_brace(out, k + 1); string mid = out.substr(i + 13, j - i - 13); if (mid.find("\\over") != npos) mid = '{' + mid + '}'; out = out.substr(0, i) + mid + out.substr(k + 1); //lyxerr << "output: " << out << endl; i = out.find("\\mathchoice", i); } i = out.find("\\over"); while (i != npos) { size_t j = get_matching_brace_back(out, i - 1); if (j == npos || j == 0) break; size_t k = get_matching_brace(out, i + 5); if (k == npos || k + 1 == out.size()) break; out = out.substr(0, j - 1) + "\\frac" + out.substr(j, i - j) + out.substr(i + 5, k - i - 4) + out.substr(k + 2); //lyxerr << "output: " << out << endl; i = out.find("\\over", i + 4); } MathData res; mathed_parse_cell(res, from_utf8(out)); return res; } MathData pipeThroughMaple(docstring const & extra, MathData const & ar) { string header = "readlib(latex):\n"; // remove the \\it for variable names //"#`latex/csname_font` := `\\it `:" header += "`latex/csname_font` := ``:\n"; // export matrices in (...) instead of [...] header += "`latex/latex/matrix` := " "subs(`[`=`(`, `]`=`)`," "eval(`latex/latex/matrix`)):\n"; // replace \\cdots with proper '*' header += "`latex/latex/*` := " "subs(`\\,`=`\\cdot `," "eval(`latex/latex/*`)):\n"; // remove spurious \\noalign{\\medskip} in matrix output header += "`latex/latex/matrix`:= " "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`," "eval(`latex/latex/matrix`)):\n"; //"#`latex/latex/symbol` " // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): "; string trailer = "quit;"; odocstringstream os; MapleStream ms(os); ms << ar; string expr = to_utf8(os.str()); lyxerr << "ar: '" << ar << "'\n" << "ms: '" << expr << "'" << endl; for (int i = 0; i < 100; ++i) { // at most 100 attempts // try to fix missing '*' the hard way by using mint // // ... > echo "1A;" | mint -i 1 -S -s -q // on line 1: 1A; // ^ syntax error - // Probably missing an operator such as * p // lyxerr << "checking expr: '" << expr << "'" << endl; string out = captureOutput("mint -i 1 -S -s -q -q", expr + ';'); if (out.empty()) break; // expression syntax is ok istringstream is(out); string line; getline(is, line); if (!prefixIs(line, "on line")) break; // error message not identified getline(is, line); size_t pos = line.find('^'); if (pos == string::npos || pos < 15) break; // caret position not found pos -= 15; // skip the "on line ..." part if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*')) break; // two '*' in a row are definitely bad expr.insert(pos, 1, '*'); } // FIXME UNICODE Is utf8 encoding correct? string full = "latex(" + to_utf8(extra) + '(' + expr + "));"; string out = captureOutput("maple -q", header + full + trailer); // change \_ into _ // MathData res; mathed_parse_cell(res, from_utf8(out)); return res; } MathData pipeThroughOctave(docstring const &, MathData const & ar) { odocstringstream os; OctaveStream vs(os); vs << ar; string expr = to_utf8(os.str()); string out; // FIXME const cast Buffer * buf = const_cast(ar.buffer()); lyxerr << "pipe: ar: '" << ar << "'\n" << "pipe: expr: '" << expr << "'" << endl; for (int i = 0; i < 100; ++i) { // at most 100 attempts // // try to fix missing '*' the hard way // parse error: // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]]) // ^ // lyxerr << "checking expr: '" << expr << "'" << endl; out = captureOutput("octave -q 2>&1", expr); lyxerr << "output: '" << out << "'" << endl; // leave loop if expression syntax is probably ok if (out.find("parse error:") == string::npos) break; // search line with single caret istringstream is(out); string line; while (is) { getline(is, line); lyxerr << "skipping line: '" << line << "'" << endl; if (line.find(">>> ") != string::npos) break; } // found line with error, next line is the one with caret getline(is, line); size_t pos = line.find('^'); lyxerr << "caret line: '" << line << "'" << endl; lyxerr << "found caret at pos: '" << pos << "'" << endl; if (pos == string::npos || pos < 4) break; // caret position not found pos -= 4; // skip the ">>> " part if (expr[pos] == '*') break; // two '*' in a row are definitely bad expr.insert(pos, 1, '*'); } // remove 'ans = ' taking into account that there may be an // ansi control sequence before, such as '\033[?1034hans = ' size_t i = out.find("ans = "); if (i == string::npos) return MathData(); out = out.substr(i + 6); // parse output as matrix or single number MathAtom at(new InsetMathArray(buf, from_ascii("array"), from_utf8(out))); InsetMathArray const * mat = at->asArrayInset(); MathData res(buf); if (mat->ncols() == 1 && mat->nrows() == 1) res.append(mat->cell(0)); else { res.push_back(MathAtom( new InsetMathDelim(buf, from_ascii("("), from_ascii(")")))); res.back().nucleus()->cell(0).push_back(at); } return res; } string fromMathematicaName(string const & name) { if (name == "Sin") return "sin"; if (name == "Sinh") return "sinh"; if (name == "ArcSin") return "arcsin"; if (name == "Cos") return "cos"; if (name == "Cosh") return "cosh"; if (name == "ArcCos") return "arccos"; if (name == "Tan") return "tan"; if (name == "Tanh") return "tanh"; if (name == "ArcTan") return "arctan"; if (name == "Cot") return "cot"; if (name == "Coth") return "coth"; if (name == "Csc") return "csc"; if (name == "Sec") return "sec"; if (name == "Exp") return "exp"; if (name == "Log") return "log"; if (name == "Arg" ) return "arg"; if (name == "Det" ) return "det"; if (name == "GCD" ) return "gcd"; if (name == "Max" ) return "max"; if (name == "Min" ) return "min"; if (name == "Erf" ) return "erf"; if (name == "Erfc" ) return "erfc"; return name; } void prettifyMathematicaOutput(string & out, string const & macroName, bool roman, bool translate) { string const macro = "\\" + macroName + "{"; size_t const len = macro.length(); size_t i = out.find(macro); while (i != npos) { size_t const j = get_matching_brace(out, i + len); string const name = out.substr(i + len, j - i - len); out = out.substr(0, i) + (roman ? "\\mathrm{" : "") + (translate ? fromMathematicaName(name) : name) + out.substr(roman ? j : j + 1); //lyxerr << "output: " << out << endl; i = out.find(macro, i); } } MathData pipeThroughMathematica(docstring const &, MathData const & ar) { odocstringstream os; MathematicaStream ms(os); ms << ar; // FIXME UNICODE Is utf8 encoding correct? string const expr = to_utf8(os.str()); string out; lyxerr << "expr: '" << expr << "'" << endl; string const full = "TeXForm[" + expr + "]"; out = captureOutput("math", full); lyxerr << "output: '" << out << "'" << endl; size_t pos1 = out.find("Out[1]//TeXForm= "); size_t pos2 = out.find("In[2]:="); if (pos1 == string::npos || pos2 == string::npos) return MathData(); // get everything from pos1+17 to pos2 out = out.substr(pos1 + 17, pos2 - pos1 - 17); out = subst(subst(out, '\r', ' '), '\n', ' '); // tries to make the result prettier prettifyMathematicaOutput(out, "Mfunction", true, true); prettifyMathematicaOutput(out, "Muserfunction", true, false); prettifyMathematicaOutput(out, "Mvariable", false, false); MathData res; mathed_parse_cell(res, from_utf8(out)); return res; } } // namespace } // namespace void write(MathData const & dat, TeXMathStream & wi) { wi.firstitem() = true; docstring s; for (MathData::const_iterator it = dat.begin(); it != dat.end(); ++it) { InsetMathChar const * const c = (*it)->asCharInset(); if (c) s += c->getChar(); else { if (!s.empty()) { writeString(s, wi); s.clear(); } (*it)->write(wi); wi.firstitem() = false; } } if (!s.empty()) { writeString(s, wi); wi.firstitem() = false; } } void writeString(docstring const & s, TeXMathStream & os) { if (!os.latex()) { os << (os.asciiOnly() ? escape(s) : s); return; } if (os.lockedMode()) { bool space; docstring cmd; for (char_type c : s) { try { Encodings::latexMathChar(c, true, os.encoding(), cmd, space); os << cmd; os.pendingSpace(space); } catch (EncodingException const & e) { switch (os.output()) { case TeXMathStream::wsDryrun: { os << "<" << _("LyX Warning: ") << _("uncodable character") << " '"; os << docstring(1, e.failed_char); os << "'>"; break; } case TeXMathStream::wsPreview: { // indicate the encoding error by a boxed '?' os << "{\\fboxsep=1pt\\fbox{?}}"; LYXERR0("Uncodable character" << " '" << docstring(1, e.failed_char) << "'"); break; } case TeXMathStream::wsDefault: default: // throw again throw; } } } return; } // We may already be inside an \ensuremath command. bool in_forced_mode = os.pendingBrace(); // We will take care of matching braces. os.pendingBrace(false); for (char_type const c : s) { bool mathmode = in_forced_mode ? os.textMode() : !os.textMode(); docstring command(1, c); try { bool termination = false; if (isASCII(c) || Encodings::latexMathChar(c, mathmode, os.encoding(), command, termination)) { if (os.textMode()) { if (in_forced_mode) { // we were inside \lyxmathsym os << '}'; os.textMode(false); in_forced_mode = false; } if (!isASCII(c) && os.textMode()) { os << "\\ensuremath{"; os.textMode(false); in_forced_mode = true; } } else if (isASCII(c) && in_forced_mode) { // we were inside \ensuremath os << '}'; os.textMode(true); in_forced_mode = false; } } else if (!os.textMode()) { if (in_forced_mode) { // we were inside \ensuremath os << '}'; in_forced_mode = false; } else { os << "\\lyxmathsym{"; in_forced_mode = true; } os.textMode(true); } os << command; // We may need a space if the command contains a macro // and the last char is ASCII. if (termination) os.pendingSpace(true); } catch (EncodingException const & e) { switch (os.output()) { case TeXMathStream::wsDryrun: { os << "<" << _("LyX Warning: ") << _("uncodable character") << " '"; os << docstring(1, e.failed_char); os << "'>"; break; } case TeXMathStream::wsPreview: { // indicate the encoding error by a boxed '?' os << "{\\fboxsep=1pt\\fbox{?}}"; LYXERR0("Uncodable character" << " '" << docstring(1, e.failed_char) << "'"); break; } case TeXMathStream::wsDefault: default: // throw again throw; } } } if (in_forced_mode && os.textMode()) { // We have to care for closing \lyxmathsym os << '}'; os.textMode(false); } else { os.pendingBrace(in_forced_mode); } } void normalize(MathData const & ar, NormalStream & os) { for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it) (*it)->normalize(os); } void octave(MathData const & dat, OctaveStream & os) { MathData ar = dat; extractStructure(ar, OCTAVE); for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it) (*it)->octave(os); } void maple(MathData const & dat, MapleStream & os) { MathData ar = dat; extractStructure(ar, MAPLE); for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it) (*it)->maple(os); } void maxima(MathData const & dat, MaximaStream & os) { MathData ar = dat; extractStructure(ar, MAXIMA); for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it) (*it)->maxima(os); } void mathematica(MathData const & dat, MathematicaStream & os) { MathData ar = dat; extractStructure(ar, MATHEMATICA); for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it) (*it)->mathematica(os); } void mathmlize(MathData const & dat, MathMLStream & ms) { MathData ar = dat; extractStructure(ar, MATHML); if (ar.empty()) ms << CTag("mrow"); else if (ar.size() == 1) ms << ar.front(); else { ms << MTag("mrow"); for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it) (*it)->mathmlize(ms); ms << ETag("mrow"); } } void htmlize(MathData const & dat, HtmlStream & os) { MathData ar = dat; extractStructure(ar, HTML); if (ar.empty()) return; if (ar.size() == 1) { os << ar.front(); return; } for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it) (*it)->htmlize(os); } // convert this inset somehow to a number bool extractNumber(MathData const & ar, int & i) { idocstringstream is(charSequence(ar.begin(), ar.end())); is >> i; // Do not convert is implicitly to bool, since that is forbidden in C++11. return !is.fail(); } bool extractNumber(MathData const & ar, double & d) { idocstringstream is(charSequence(ar.begin(), ar.end())); is >> d; // Do not convert is implicitly to bool, since that is forbidden in C++11. return !is.fail(); } MathData pipeThroughExtern(string const & lang, docstring const & extra, MathData const & ar) { if (lang == "octave") return pipeThroughOctave(extra, ar); if (lang == "maxima") return pipeThroughMaxima(extra, ar); if (lang == "maple") return pipeThroughMaple(extra, ar); if (lang == "mathematica") return pipeThroughMathematica(extra, ar); // create normalized expression odocstringstream os; NormalStream ns(os); os << '[' << extra << ' '; ns << ar; os << ']'; // FIXME UNICODE Is utf8 encoding correct? string data = to_utf8(os.str()); // search external script FileName const file = libFileSearch("mathed", "extern_" + lang); if (file.empty()) { lyxerr << "converter to '" << lang << "' not found" << endl; return MathData(); } // run external sript string out = captureOutput(file.absFileName(), data); MathData res; mathed_parse_cell(res, from_utf8(out)); return res; } } // namespace lyx