From 5f74a0cdd108c4c634b8b63ecc62ff43532fa31d Mon Sep 17 00:00:00 2001 From: konrad Date: Sat, 20 Nov 2010 13:23:58 +0000 Subject: [PATCH] expression parsing, to be tested git-svn-id: https://silmor.de/svn/softmagic/elam/trunk@636 6e3c4bff-ac9f-4ac1-96c5-d2ea494d3e33 --- src/elamcharclass.cpp | 43 +++++----- src/elamcharclass.h | 3 + src/elamengine.cpp | 10 +++ src/elamengine.h | 3 + src/elamexpression.cpp | 212 +++++++++++++++++++++++++++++++----------------- src/elamexpression.h | 98 +++++++++++++++++----- tests/eval/eval.cpp | 2 +- 7 files changed, 254 insertions(+), 117 deletions(-) diff --git a/src/elamcharclass.cpp b/src/elamcharclass.cpp index ed2f337..45996f3 100644 --- a/src/elamcharclass.cpp +++ b/src/elamcharclass.cpp @@ -180,27 +180,6 @@ Token::Type CharacterClassSettings::charType(QChar c, ELAM::Token::Type otype) c return Token::Invalid; } -bool CharacterClassSettings::isAssignment(QString op)const -{ - //sanity checks: size - if(op.size()<1)return false; - if(d->assignmentChars.first!=0 && d->assignmentChars.second!=0) - if(op.size()<2)return false; - //check we have assignments at all - if(d->assignmentChars.first==0 && d->assignmentChars.second==0)return false; - //check it is assignment - if(d->assignmentChars.first!=0 && op[0]!=d->assignmentChars.first) - return false; - if(d->assignmentChars.second!=0 && op[op.size()-1]!=d->assignmentChars.second) - return false; - //check it is an operator - for(int i=0;ioperatorClass.contains(op[i])) - return false; - //passed everything - return true; -} - bool CharacterClassSettings::isSimpleAssignment ( QString op) const { if(op.size()<1)return false; @@ -228,6 +207,28 @@ bool CharacterClassSettings::isOperator(QString op) const return true; } +QString CharacterClassSettings::toOperator(QString op) const +{ + if(!isOperator(op))return QString(); + if(!isAssignment(op))return op; + if(d->assignmentChars.first!=0)op=op.mid(1); + if(d->assignmentChars.second!=0)op=op.left(op.size()-1); + return op; +} + +bool CharacterClassSettings::isAssignment(QString op)const +{ + //sanity check + int cnt=0; + if(d->assignmentChars.first!=0)cnt++; + if(d->assignmentChars.second!=0)cnt++; + if(cnt==0 || op.size()assignmentChars.first!=0 && d->assignmentChars.first!=op[0])return false; + if(d->assignmentChars.second!=0 && d->assignmentChars.second!=op[op.size()-1])return false; + return true; +} //end of namespace }; \ No newline at end of file diff --git a/src/elamcharclass.h b/src/elamcharclass.h index 56e4886..d6c2b88 100644 --- a/src/elamcharclass.h +++ b/src/elamcharclass.h @@ -146,6 +146,9 @@ class CharacterClassSettings ///returns true if the string can be interpreted as operator bool isOperator(QString)const; + + ///removes assignment chars from the token and returns the pure operator or an empty string if it was a pure assignment + QString toOperator(QString)const; }; //end of namespace diff --git a/src/elamengine.cpp b/src/elamengine.cpp index 560e889..bba3723 100644 --- a/src/elamengine.cpp +++ b/src/elamengine.cpp @@ -338,6 +338,16 @@ UnaryOperator Engine::unaryOperator ( QString name ) return d->unary[name]; } +bool Engine::isAssignment(QString name) const +{ + //is it composed of operator chars? + if(!d->cclass.isOperator(name))return false; + //is there an overriding operator? + if(d->binary.contains(name))return false; + //return cclasses view of things + return d->cclass.isAssignment(name); +} + //end of namespace }; \ No newline at end of file diff --git a/src/elamengine.h b/src/elamengine.h index 4102940..c828c20 100644 --- a/src/elamengine.h +++ b/src/elamengine.h @@ -98,6 +98,9 @@ class Engine:public QObject ///returns the priority of the operator, or -1 if the operator does not exist Q_INVOKABLE int binaryOperatorPrio(QString name); + + ///returns true if the name represents an assignment operator + Q_INVOKABLE bool isAssignment(QString name)const; public slots: ///returns the value of the named variable or constant QVariant getValue(QString)const; diff --git a/src/elamexpression.cpp b/src/elamexpression.cpp index 65d0a7b..d40a901 100644 --- a/src/elamexpression.cpp +++ b/src/elamexpression.cpp @@ -12,10 +12,9 @@ namespace ELAM { class DPTR_CLASS_NAME(Token):public SharedDPtr { public: - DPTR_NAME(){type=Invalid;subtype=None;} + DPTR_NAME(){type=Invalid;} QString cont; Type type; - SubType subtype; QVariant val; Position pos; QListsubtok; @@ -32,8 +31,12 @@ Token::Token(Position pos) Token::Token(QString c,Token::Type t,Position pos) { d->cont=c; - d->type=t; d->pos=pos; + //genericise most types + if(t&LiteralMask)d->type=Literal;else + if(t&NameMask)d->type=Name;else + if(t&OperatorMask)d->type=Operator; + else d->type=t; } Token::Token(QString c,QVariant v,Position pos) @@ -50,8 +53,12 @@ QVariant Token::literalValue()const{return d->val;} Position Token::position()const{return d->pos;} QList< Token > Token::subTokens() const{return d->subtok;} -Token::SubType Token::subType() const{return d->subtype;} -void Token::setSubType(Token::SubType s){d->subtype=s;} +void Token::setSubType(Token::Type s) +{ + //only set if it becomes more specialized + if(d->type&s) + d->type=s; +} void Token::addSubToken(const ELAM::Token& t) { d->subtok<simplifyTokens(Engine*eng,QList toks) +inline QListExpression::simplifyTokens(QList toks) { - Q_UNUSED(eng); QListret; int min=0,max=toks.size()-1; //eliminate redundant parentheses @@ -176,9 +168,9 @@ static inline QListsimplifyTokens(Engine*eng,QList toks) return ret; } -//reduce surrounding parentheses and whitespace -static inline QListclassifyTokens(Engine*eng,QList toks) +inline QListExpression::classifyTokens(QList toks) { + Engine*eng=d->parent; QListret; if(toks.size()<1)return toks; //check token 0 @@ -200,14 +192,13 @@ static inline QListclassifyTokens(Engine*eng,QList toks) }else //define operators if(t.type()==Token::Operator){ - switch(toks[i-1].type()){ - case Token::ParOpen: - case Token::Operator: - t.setSubType(Token::UnaryOp); - break; - default: + if(toks[i-1].type() & (Token::Parentheses | Token::ParOpen | Token::OperatorMask)) + t.setSubType(Token::UnaryOp); + else{ + if(eng->isAssignment(toks[i].content())) + t.setSubType(Token::AssignmentOp); + else t.setSubType(Token::BinaryOp); - break; } } //add @@ -215,31 +206,31 @@ static inline QListclassifyTokens(Engine*eng,QList toks) } return ret; } -//reduce surrounding parentheses and whitespace -static inline QListreduceTokens(Engine*eng,QList toks) + +inline QListExpression::reduceTokens(QList toks) { - toks=classifyTokens(eng,simplifyTokens(eng,toks)); + toks=classifyTokens(simplifyTokens(toks)); QListret,sub; //copy and create hierarchy int pcnt=0; for(int i=0;ireduceTokens(Engine*eng,QList toks) } return ret; } -//scan for simple errors, return exception (NoError if no error found) -static inline Exception scanForError(const QList< Token >& toks) + +inline Exception Expression::scanForError(const QList< Token >& toks) { //check for invalid tokens for(int i=0;i& toks) if(toks[i].type()==Token::ParOpen)pcnt++;else if(toks[i].type()==Token::ParClose)pcnt--; if(pcnt<0){ - return Exception(Exception::ParserError, "parentheses mismatch", toks[i].position()); + return ELAM::Exception(ELAM::Exception::ParserError, "parentheses mismatch", toks[i].position()); } } if(pcnt!=0){ - return Exception(Exception::ParserError, "parentheses mismatch", toks[0].position()); + return ELAM::Exception(ELAM::Exception::ParserError, "parentheses mismatch", toks[0].position()); } //nothing found - return Exception(Exception::NoError); + return ELAM::Exception(ELAM::Exception::NoError); } Expression::Expression(Engine* parent, const QList< Token >& toks) @@ -280,48 +271,121 @@ Expression::Expression(Engine* parent, const QList< Token >& toks) ELAM::Exception ex=scanForError(toks); if(ex.errorType()!=ELAM::Exception::NoError){ d->type=Exception; - d->value=ex; + d->excep=ex; d->tokens=toks; return; } d->parent=parent; - d->tokens=reduceTokens(parent,toks); + d->tokens=reduceTokens(toks); qDebug()<<"expression:"<tokens; //check for nothing and complain if(d->tokens.size()==0){ d->type=Exception; - d->value=ELAM::Exception(ELAM::Exception::ParserError,"no tokens", (toks.size()>0?toks[0].position():Position())); + d->excep=ELAM::Exception(ELAM::Exception::ParserError,"no tokens", (toks.size()>0?toks[0].position():Position())); return; } - //check for simplicity (literals, vars, consts) + //order 1: check for simplicity (literals, vars, consts) if(d->tokens.size()==1){ switch(d->tokens[0].type()){ - case Token::Name: - if(parent->hasFunction(d->tokens[0].content())){ - d->type=Exception; - d->value=ELAM::Exception(ELAM::Exception::ParserError, "function call incomplete", d->tokens[0].position()); - }else if(parent->hasConstant(d->tokens[0].content())){ - d->type=Constant; - d->value=parent->getConstant(d->tokens[0].content()); - }else{ - d->type=Variable; - } + case Token::Function: + functionInit(); + d->type=(Type)d->tokens[0].type(); + break; + case Token::Parentheses: + d->subexpr<tokens[0].subTokens()); + d->type=(Type)d->tokens[0].type(); break; + case Token::Constant: + case Token::Variable: case Token::Literal: - d->type=Literal; - d->value=d->tokens[0].literalValue(); + d->type=(Type)d->tokens[0].type(); break; default: + qDebug()<<"expression in single token mode: unexpected token" <tokens[0]; d->type=Exception; - d->value=ELAM::Exception(ELAM::Exception::ParserError, "unexpected token", d->tokens[0].position()); + d->excep=ELAM::Exception(ELAM::Exception::ParserError, "unexpected token", d->tokens[0].position()); break; } return; } + //search for assignment + for(int i=0;itokens.size();i++){ + if(d->tokens[i].type()==Token::AssignmentOp){ + //check 1: position is 1 + if(i!=1){ + d->type=Exception; + d->excep=ELAM::Exception(ELAM::Exception::ParserError, "invalid assignment", d->tokens[i].position()); + return; + } + //check 2: pos 0 is variable + if(d->tokens[0].type()!=Token::Variable){ + d->type=Exception; + d->excep=ELAM::Exception(ELAM::Exception::OperationError, "left side of assignment must be a variable", d->tokens[i].position()); + return; + } + //check 3: there is a right + if(d->tokens.size()<3){ + d->type=Exception; + d->excep=ELAM::Exception(ELAM::Exception::OperationError, "assignment must have right side expression", d->tokens[i].position()); + return; + } + //ok + d->subexpr<tokens.mid(2)); + return; + } + } + //search for lowest operator on the right + int cprio=1000,cpos=-1; + for(int i=0;itokens.size();i++){ + if(d->tokens[i].type()==Token::BinaryOp){ + int oprio=d->parent->binaryOperatorPrio(d->tokens[i].content()); + if(oprio<=cprio){ + cprio=oprio; + cpos=i; + } + }else if(d->tokens[i].type()==Token::UnaryOp){ + if(cprio>100){ + cprio=100; + cpos=i; + } + } + } + //validity check + if(cpos<0){ + d->type=Exception; + d->excep=ELAM::Exception(ELAM::Exception::ParserError, "invalid expression", d->tokens[0].position()); + return; + } + //split operation + d->oppos=cpos; + if(d->tokens[cpos].type()==Token::BinaryOp){ + d->subexpr<tokens.mid(0,cpos)); + d->subexpr<tokens.mid(cpos+1)); + }else{ + d->subexpr<tokens.mid(cpos+1)); + } +} + +void Expression::functionInit() +{ + QListsub=d->tokens[0].subTokens(); + QListpar; + for(int i=0;isubexpr<parent,par); + par.clear(); + } + } + if(par.size()>0){ + d->subexpr<parent,par); + } } + QVariant Expression::evaluate() { + if(d->type==Exception)return d->excep; + if(d->parent.isNull())return ELAM::Exception(ELAM::Exception::OperationError,"Lost engine context, cannot evaluate."); return QVariant(); } diff --git a/src/elamexpression.h b/src/elamexpression.h index dff4853..23ad657 100644 --- a/src/elamexpression.h +++ b/src/elamexpression.h @@ -22,34 +22,57 @@ class Token enum Type { ///invalid token Invalid=0, + ///the token represents a function call + Function = 1, + ///the token represents a constant + Constant = 2, + ///the token represents a variable + Variable = 4, ///a name: function, variable, or constant - Name=7, + Name=0xff, + ///mask for name tokens + NameMask=Name, + + ///the token represents an unary operator + UnaryOp = 0x100, + ///the token represents a binary operator + BinaryOp = 0x200, + ///the token represents an assignment (with optional implicit binary op) + AssignmentOp = 0x400, ///an operator (unary or binary) - Operator=24, + Operator=0xff00, + ///mask for operator tokens + OperatorMask=Operator, + + ///mask for special class tokens that have a syntactic function (parentheses, comma, ...) + SpecialCharMask=0xff0000, + ///mask for parentheses + ParenthesesMask=0xf0000, ///meta-type used for parsed sub-tokens - Parentheses=96, + Parentheses=0x10000, ///opening parenthese - ParOpen=32, + ParOpen=0x20000, ///closing parenthese - ParClose=64, + ParClose=0x40000, ///a comma - separating expressions in function calls - Comma=128, + Comma=0x100000, + ///a literal value - Literal=256, + Literal=0x1000000, + ///mask for literal values + LiteralMask=Literal, + + ///mask for tokens that are functional (non-ignored) + FunctionalMask=NameMask|OperatorMask|SpecialCharMask, + ///white space chars, this is actually not used for tokens, but for parsing - Whitespace=512, - }; - enum SubType{ - None = 0, - Function = 1, - Constant = 2, - Variable = 4, - UnaryOp = 8, - BinaryOp = 16, + Whitespace=0x10000000, + ///mask for ignored tokens + IgnoredTokenMask=0xf0000000, }; ///creates an empty/invalid token Token(Position pos=Position(-1,-1)); - ///creates a token from a parsed piece of string + ///creates a token from a parsed piece of string, only generic types can be used Token(QString,Type,Position pos=Position(-1,-1)); ///creates a literal token Token(QString,QVariant,Position pos=Position(-1,-1)); @@ -61,9 +84,22 @@ class Token QVariant literalValue()const; ///returns the original position of the token Position position()const; - SubType subType()const; - void setSubType(SubType); + ///when the token is hierarchised (Parentheses, Function) this returns the subordinate tokens QListsubTokens()const; + + ///true if the token is functional (not ignored) + inline bool isFunctional()const{return type()&FunctionalMask;} + ///true if the token represents an operator + inline bool isOperator()const{return type()&OperatorMask;} + ///true if the token can be ignored + inline bool isIgnored()const{return type()&IgnoredTokenMask;} + ///true if the token represents a name + inline bool isName()const{return type()&NameMask;} + ///true if the token is a literal + inline bool isLiteral()const{return type()&LiteralMask;} + protected: + friend class Expression; + void setSubType(Type); void addSubToken(const Token&); void setSubTokens(const QList&); }; @@ -71,6 +107,10 @@ class Token QDebug& operator<<(QDebug&,const Token&); class Engine; +/**Represents an expression in the context of its engine. + +An expression always has an engine as context and stores a hierarchy of operations (also expressions) to perform when called to evaluate. Expressions may become invalid if constants, functions, or operators change between the time they are created and the time they are evaluated. +*/ class Expression { DECLARE_SHARED_DPTR(d) @@ -80,15 +120,31 @@ class Expression Variable=Token::Variable, Constant=Token::Constant, Function=Token::Function, - Parentheses=96, + Parentheses=Token::Parentheses, UnaryOp=Token::UnaryOp, BinaryOp=Token::BinaryOp, - Exception=0x8000, + AssignmentOp=Token::AssignmentOp, + Exception=Token::IgnoredTokenMask, }; + ///creates and invalid expression, that always evaluates to an exception Expression(); + ///creates an expression by its context engine and its tokens Expression(Engine*parent,const QList&tokens); + ///evaluates the expression and returns the result of the evaluation QVariant evaluate(); + private: + ///scan tokens and decide what specific sub-type they are + QListclassifyTokens(QList toks); + /**pushes parentheses and function arguments into the sub-tokens of their parents; + the result is a list of tokens that are on the same level*/ + QListreduceTokens(QList toks); + ///scan tokens for errors, returns Exception::NoError if none are found + ELAM::Exception scanForError(const QList< Token >& toks); + ///reduce surrounding parentheses and whitespace + QListsimplifyTokens(QList toks); + ///parses tokens and splits them by comma + void functionInit(); }; //end of namespace diff --git a/tests/eval/eval.cpp b/tests/eval/eval.cpp index bc5e706..65614ca 100644 --- a/tests/eval/eval.cpp +++ b/tests/eval/eval.cpp @@ -13,7 +13,7 @@ void ElamTest::evaltest() { IntEngine ie; FloatEngine::configureFloatEngine(ie); - QString ex="a= 345*(65.3/(5))"; + QString ex="a=b+=345*int(3.5)+ - -(+65/(5))"; QVariant v=ie.evaluate(ex); } -- 1.7.2.5