expression parsing, to be tested
authorkonrad <konrad@6e3c4bff-ac9f-4ac1-96c5-d2ea494d3e33>
Sat, 20 Nov 2010 13:23:58 +0000 (13:23 +0000)
committerkonrad <konrad@6e3c4bff-ac9f-4ac1-96c5-d2ea494d3e33>
Sat, 20 Nov 2010 13:23:58 +0000 (13:23 +0000)
git-svn-id: https://silmor.de/svn/softmagic/elam/trunk@636 6e3c4bff-ac9f-4ac1-96c5-d2ea494d3e33

src/elamcharclass.cpp
src/elamcharclass.h
src/elamengine.cpp
src/elamengine.h
src/elamexpression.cpp
src/elamexpression.h
tests/eval/eval.cpp

index ed2f337..45996f3 100644 (file)
@@ -180,27 +180,6 @@ Token::Type CharacterClassSettings::charType(QChar c, ELAM::Token::Type otype) c
        return Token::Invalid;
 }
 
-bool CharacterClassSettings::isAssignment(QString op)const
-{
-       //sanity checks: size
-       if(op.size()<1)return false;
-       if(d->assignmentChars.first!=0 && d->assignmentChars.second!=0)
-               if(op.size()<2)return false;
-       //check we have assignments at all
-       if(d->assignmentChars.first==0 && d->assignmentChars.second==0)return false;
-       //check it is assignment
-       if(d->assignmentChars.first!=0 && op[0]!=d->assignmentChars.first)
-               return false;
-       if(d->assignmentChars.second!=0 && op[op.size()-1]!=d->assignmentChars.second)
-               return false;
-       //check it is an operator
-       for(int i=0;i<op.size();i++)
-               if(!d->operatorClass.contains(op[i]))
-                       return false;
-       //passed everything
-       return true;
-}
-
 bool CharacterClassSettings::isSimpleAssignment ( QString op) const
 {
        if(op.size()<1)return false;
@@ -228,6 +207,28 @@ bool CharacterClassSettings::isOperator(QString op) const
        return true;
 }
 
+QString CharacterClassSettings::toOperator(QString op) const
+{
+       if(!isOperator(op))return QString();
+       if(!isAssignment(op))return op;
+       if(d->assignmentChars.first!=0)op=op.mid(1);
+       if(d->assignmentChars.second!=0)op=op.left(op.size()-1);
+       return op;
+}
+
+bool CharacterClassSettings::isAssignment(QString op)const
+{
+       //sanity check
+       int cnt=0;
+       if(d->assignmentChars.first!=0)cnt++;
+       if(d->assignmentChars.second!=0)cnt++;
+       if(cnt==0 || op.size()<cnt)return false;
+       if(!isOperator(op))return false;
+       //char check
+       if(d->assignmentChars.first!=0 && d->assignmentChars.first!=op[0])return false;
+       if(d->assignmentChars.second!=0 && d->assignmentChars.second!=op[op.size()-1])return false;
+       return true;
+}
 
 //end of namespace
 };
\ No newline at end of file
index 56e4886..d6c2b88 100644 (file)
@@ -146,6 +146,9 @@ class CharacterClassSettings
                
                ///returns true if the string can be interpreted as operator
                bool isOperator(QString)const;
+               
+               ///removes assignment chars from the token and returns the pure operator or an empty string if it was a pure assignment
+               QString toOperator(QString)const;
 };
 
 //end of namespace
index 560e889..bba3723 100644 (file)
@@ -338,6 +338,16 @@ UnaryOperator Engine::unaryOperator ( QString name )
        return d->unary[name];
 }
 
+bool Engine::isAssignment(QString name) const
+{
+       //is it composed of operator chars?
+       if(!d->cclass.isOperator(name))return false;
+       //is there an overriding operator?
+       if(d->binary.contains(name))return false;
+       //return cclasses view of things
+       return d->cclass.isAssignment(name);
+}
+
 
 //end of namespace
 };
\ No newline at end of file
index 4102940..c828c20 100644 (file)
@@ -98,6 +98,9 @@ class Engine:public QObject
                
                ///returns the priority of the operator, or -1 if the operator does not exist
                Q_INVOKABLE int binaryOperatorPrio(QString name);
+               
+               ///returns true if the name represents an assignment operator
+               Q_INVOKABLE bool isAssignment(QString name)const;
        public slots:
                ///returns the value of the named variable or constant
                QVariant getValue(QString)const;
index 65d0a7b..d40a901 100644 (file)
@@ -12,10 +12,9 @@ namespace ELAM {
 class DPTR_CLASS_NAME(Token):public SharedDPtr
 {
        public:
-               DPTR_NAME(){type=Invalid;subtype=None;}
+               DPTR_NAME(){type=Invalid;}
                QString cont;
                Type type;
-               SubType subtype;
                QVariant val;
                Position pos;
                QList<Token>subtok;
@@ -32,8 +31,12 @@ Token::Token(Position pos)
 Token::Token(QString c,Token::Type t,Position pos)
 {
        d->cont=c;
-       d->type=t;
        d->pos=pos;
+       //genericise most types
+       if(t&LiteralMask)d->type=Literal;else
+       if(t&NameMask)d->type=Name;else
+       if(t&OperatorMask)d->type=Operator;
+       else d->type=t;
 }
 
 Token::Token(QString c,QVariant v,Position pos)
@@ -50,8 +53,12 @@ QVariant Token::literalValue()const{return d->val;}
 Position Token::position()const{return d->pos;}
 
 QList< Token > Token::subTokens() const{return d->subtok;}
-Token::SubType Token::subType() const{return d->subtype;}
-void Token::setSubType(Token::SubType s){d->subtype=s;}
+void Token::setSubType(Token::Type s)
+{
+       //only set if it becomes more specialized
+       if(d->type&s)
+               d->type=s;
+}
 void Token::addSubToken(const ELAM::Token& t)
 {
        d->subtok<<t;
@@ -91,17 +98,13 @@ static void printtoken(QDebug&dbg,const Token&tok,int level)
                case Token::Literal:dbg<<"LiteralValue,value="<<tok.literalValue();break;
                case Token::Whitespace:dbg<<"WhiteSpace";break;
                case Token::Parentheses:dbg<<"Parentheses";break;
-       }
-       if(tok.subType()!=Token::None){
-               dbg<<",subtype=";
-               switch(tok.subType()){
-                       case Token::Function:dbg<<"Function";break;
-                       case Token::Constant:dbg<<"Constant";break;
-                       case Token::Variable:dbg<<"Variable";break;
-                       case Token::UnaryOp:dbg<<"Unary";break;
-                       case Token::BinaryOp:dbg<<"Binary";break;
-                       default:break;
-               }
+               case Token::Function:dbg<<"Function";break;
+               case Token::Constant:dbg<<"Constant";break;
+               case Token::Variable:dbg<<"Variable";break;
+               case Token::UnaryOp:dbg<<"Unary";break;
+               case Token::BinaryOp:dbg<<"Binary";break;
+               case Token::AssignmentOp:dbg<<"Assignment";break;
+               default:dbg<<"Unknown:"<<(int)tok.type();break;
        }
        dbg<<",pos="<<tok.position();
        QList<Token> sub=tok.subTokens();
@@ -124,18 +127,6 @@ QDebug&operator<<(QDebug&dbg,const QList<Token>&tok)
        printtokenlist(dbg,tok,0,0);
        return dbg.space();
 }
-///////////////////////////////////////////////////////////////////////////////
-// TokenBundle
-
-class TokenBundle
-{
-       public:
-               TokenBundle(const Token&);
-               TokenBundle(const TokenBundle&);
-               TokenBundle(const QList<Token>&);
-       private:
-               QList<TokenBundle>&mtoks;
-};
 
 ///////////////////////////////////////////////////////////////////////////////
 // Expression
@@ -143,23 +134,24 @@ class TokenBundle
 class DPTR_CLASS_NAME(Expression):public SharedDPtr
 {
        public:
-               DPTR_NAME(){type=Exception;}
+               DPTR_NAME(){type=Exception;oppos=-1;}
                QPointer<Engine>parent;
                QList<Token>tokens;
                Type type;
-               QVariant value;
+               ELAM::Exception excep;
+               QList<Expression>subexpr;
+               int oppos;
 };
 DEFINE_SHARED_DPTR(Expression);
 
 Expression::Expression()
 {
-
+       d->excep=ELAM::Exception(ELAM::Exception::ParserError,"Invalid Expression");
 }
 
 //reduce surrounding parentheses and whitespace
-static inline QList<Token>simplifyTokens(Engine*eng,QList<Token> toks)
+inline QList<Token>Expression::simplifyTokens(QList<Token> toks)
 {
-       Q_UNUSED(eng);
        QList<Token>ret;
        int min=0,max=toks.size()-1;
        //eliminate redundant parentheses
@@ -176,9 +168,9 @@ static inline QList<Token>simplifyTokens(Engine*eng,QList<Token> toks)
        return ret;
 }
 
-//reduce surrounding parentheses and whitespace
-static inline QList<Token>classifyTokens(Engine*eng,QList<Token> toks)
+inline QList<Token>Expression::classifyTokens(QList<Token> toks)
 {
+       Engine*eng=d->parent;
        QList<Token>ret;
        if(toks.size()<1)return toks;
        //check token 0
@@ -200,14 +192,13 @@ static inline QList<Token>classifyTokens(Engine*eng,QList<Token> toks)
                }else
                //define operators
                if(t.type()==Token::Operator){
-                       switch(toks[i-1].type()){
-                               case Token::ParOpen:
-                               case Token::Operator:
-                                       t.setSubType(Token::UnaryOp);
-                                       break;
-                               default:
+                       if(toks[i-1].type() & (Token::Parentheses | Token::ParOpen | Token::OperatorMask))
+                               t.setSubType(Token::UnaryOp);
+                       else{
+                               if(eng->isAssignment(toks[i].content()))
+                                       t.setSubType(Token::AssignmentOp);
+                               else
                                        t.setSubType(Token::BinaryOp);
-                                       break;
                        }
                }
                //add
@@ -215,31 +206,31 @@ static inline QList<Token>classifyTokens(Engine*eng,QList<Token> toks)
        }
        return ret;
 }
-//reduce surrounding parentheses and whitespace
-static inline QList<Token>reduceTokens(Engine*eng,QList<Token> toks)
+
+inline QList<Token>Expression::reduceTokens(QList<Token> toks)
 {
-       toks=classifyTokens(eng,simplifyTokens(eng,toks));
+       toks=classifyTokens(simplifyTokens(toks));
        QList<Token>ret,sub;
        //copy and create hierarchy
        int pcnt=0;
        for(int i=0;i<toks.size();i++){
                //count parentheses
                if(toks[i].type()==Token::ParOpen){
-                       if(!pcnt){
-                               if(i==0 || toks.value(i-1).subType()!=Token::Function)
+                       pcnt++;
+                       if(pcnt==1){
+                               if(i==0 || toks.value(i-1).type()!=Token::Function)
                                        ret<<Token("",Token::Parentheses,toks[i].position());
                                sub.clear();
+                               continue;
                        }
-                       pcnt++;
-                       continue;
                }else
                if(toks[i].type()==Token::ParClose){
                        pcnt--;
-                       if(!pcnt){
+                       if(pcnt==0){
                                ret[ret.size()-1].setSubTokens(sub);
                                sub.clear();
+                               continue;
                        }
-                       continue;
                }
                //collect tokens
                if(pcnt)
@@ -249,13 +240,13 @@ static inline QList<Token>reduceTokens(Engine*eng,QList<Token> toks)
        }
        return ret;
 }
-//scan for simple errors, return exception (NoError if no error found)
-static inline Exception scanForError(const QList< Token >& toks)
+
+inline Exception Expression::scanForError(const QList< Token >& toks)
 {
        //check for invalid tokens
        for(int i=0;i<toks.size();i++)
                if(toks[i].type()==Token::Invalid){
-                       return Exception(Exception::ParserError, "invalid token", toks[i].position());
+                       return ELAM::Exception(Exception::ParserError, "invalid token", toks[i].position());
                }
        //scan for parentheses mismatch
        int pcnt=0;
@@ -264,14 +255,14 @@ static inline Exception scanForError(const QList< Token >& toks)
                if(toks[i].type()==Token::ParOpen)pcnt++;else
                if(toks[i].type()==Token::ParClose)pcnt--;
                if(pcnt<0){
-                       return Exception(Exception::ParserError, "parentheses mismatch", toks[i].position());
+                       return ELAM::Exception(ELAM::Exception::ParserError, "parentheses mismatch", toks[i].position());
                }
        }
        if(pcnt!=0){
-               return Exception(Exception::ParserError, "parentheses mismatch", toks[0].position());
+               return ELAM::Exception(ELAM::Exception::ParserError, "parentheses mismatch", toks[0].position());
        }
        //nothing found
-       return Exception(Exception::NoError);
+       return ELAM::Exception(ELAM::Exception::NoError);
 }
 
 Expression::Expression(Engine* parent, const QList< Token >& toks)
@@ -280,48 +271,121 @@ Expression::Expression(Engine* parent, const QList< Token >& toks)
        ELAM::Exception ex=scanForError(toks);
        if(ex.errorType()!=ELAM::Exception::NoError){
                d->type=Exception;
-               d->value=ex;
+               d->excep=ex;
                d->tokens=toks;
                return;
        }
        d->parent=parent;
-       d->tokens=reduceTokens(parent,toks);
+       d->tokens=reduceTokens(toks);
        qDebug()<<"expression:"<<d->tokens;
        //check for nothing and complain
        if(d->tokens.size()==0){
                d->type=Exception;
-               d->value=ELAM::Exception(ELAM::Exception::ParserError,"no tokens", (toks.size()>0?toks[0].position():Position()));
+               d->excep=ELAM::Exception(ELAM::Exception::ParserError,"no tokens", (toks.size()>0?toks[0].position():Position()));
                return;
        }
-       //check for simplicity (literals, vars, consts)
+       //order 1: check for simplicity (literals, vars, consts)
        if(d->tokens.size()==1){
                switch(d->tokens[0].type()){
-                       case Token::Name:
-                               if(parent->hasFunction(d->tokens[0].content())){
-                                       d->type=Exception;
-                                       d->value=ELAM::Exception(ELAM::Exception::ParserError, "function call incomplete", d->tokens[0].position());
-                               }else if(parent->hasConstant(d->tokens[0].content())){
-                                       d->type=Constant;
-                                       d->value=parent->getConstant(d->tokens[0].content());
-                               }else{
-                                       d->type=Variable;
-                               }
+                       case Token::Function:
+                               functionInit();
+                               d->type=(Type)d->tokens[0].type();
+                               break;
+                       case Token::Parentheses:
+                               d->subexpr<<Expression(parent,d->tokens[0].subTokens());
+                               d->type=(Type)d->tokens[0].type();
                                break;
+                       case Token::Constant:
+                       case Token::Variable:
                        case Token::Literal:
-                               d->type=Literal;
-                               d->value=d->tokens[0].literalValue();
+                               d->type=(Type)d->tokens[0].type();
                                break;
                        default:
+                               qDebug()<<"expression in single token mode: unexpected token" <<d->tokens[0];
                                d->type=Exception;
-                               d->value=ELAM::Exception(ELAM::Exception::ParserError, "unexpected token", d->tokens[0].position());
+                               d->excep=ELAM::Exception(ELAM::Exception::ParserError, "unexpected token", d->tokens[0].position());
                                break;
                }
                return;
        }
+       //search for assignment
+       for(int i=0;i<d->tokens.size();i++){
+               if(d->tokens[i].type()==Token::AssignmentOp){
+                       //check 1: position is 1
+                       if(i!=1){
+                               d->type=Exception;
+                               d->excep=ELAM::Exception(ELAM::Exception::ParserError, "invalid assignment", d->tokens[i].position());
+                               return;
+                       }
+                       //check 2: pos 0 is variable
+                       if(d->tokens[0].type()!=Token::Variable){
+                               d->type=Exception;
+                               d->excep=ELAM::Exception(ELAM::Exception::OperationError, "left side of assignment must be a variable", d->tokens[i].position());
+                               return;
+                       }
+                       //check 3: there is a right
+                       if(d->tokens.size()<3){
+                               d->type=Exception;
+                               d->excep=ELAM::Exception(ELAM::Exception::OperationError, "assignment must have right side expression", d->tokens[i].position());
+                               return;
+                       }
+                       //ok
+                       d->subexpr<<Expression(parent,d->tokens.mid(2));
+                       return;
+               }
+       }
+       //search for lowest operator on the right
+       int cprio=1000,cpos=-1;
+       for(int i=0;i<d->tokens.size();i++){
+               if(d->tokens[i].type()==Token::BinaryOp){
+                       int oprio=d->parent->binaryOperatorPrio(d->tokens[i].content());
+                       if(oprio<=cprio){
+                               cprio=oprio;
+                               cpos=i;
+                       }
+               }else if(d->tokens[i].type()==Token::UnaryOp){
+                       if(cprio>100){
+                               cprio=100;
+                               cpos=i;
+                       }
+               }
+       }
+       //validity check
+       if(cpos<0){
+               d->type=Exception;
+               d->excep=ELAM::Exception(ELAM::Exception::ParserError, "invalid expression", d->tokens[0].position());
+               return;
+       }
+       //split operation
+       d->oppos=cpos;
+       if(d->tokens[cpos].type()==Token::BinaryOp){
+               d->subexpr<<Expression(parent,d->tokens.mid(0,cpos));
+               d->subexpr<<Expression(parent,d->tokens.mid(cpos+1));
+       }else{
+               d->subexpr<<Expression(parent,d->tokens.mid(cpos+1));
+       }
+}
+
+void Expression::functionInit()
+{
+       QList<Token>sub=d->tokens[0].subTokens();
+       QList<Token>par;
+       for(int i=0;i<sub.size();i++){
+               if(sub[i].type()==Token::Comma){
+                       d->subexpr<<Expression(d->parent,par);
+                       par.clear();
+               }
+       }
+       if(par.size()>0){
+               d->subexpr<<Expression(d->parent,par);
+       }
 }
 
+
 QVariant Expression::evaluate()
 {
+       if(d->type==Exception)return d->excep;
+       if(d->parent.isNull())return ELAM::Exception(ELAM::Exception::OperationError,"Lost engine context, cannot evaluate.");
        return QVariant();
 }
 
index dff4853..23ad657 100644 (file)
@@ -22,34 +22,57 @@ class Token
                enum Type {
                        ///invalid token
                        Invalid=0,
+                       ///the token represents a function call
+                       Function = 1,
+                       ///the token represents a constant
+                       Constant = 2,
+                       ///the token represents a variable
+                       Variable = 4,
                        ///a name: function, variable, or constant
-                       Name=7,
+                       Name=0xff,
+                       ///mask for name tokens
+                       NameMask=Name,
+                       
+                       ///the token represents an unary operator
+                       UnaryOp = 0x100,
+                       ///the token represents a binary operator
+                       BinaryOp = 0x200,
+                       ///the token represents an assignment (with optional implicit binary op)
+                       AssignmentOp = 0x400,
                        ///an operator (unary or binary)
-                       Operator=24,
+                       Operator=0xff00,
+                       ///mask for operator tokens
+                       OperatorMask=Operator,
+                       
+                       ///mask for special class tokens that have a syntactic function (parentheses, comma, ...)
+                       SpecialCharMask=0xff0000,
+                       ///mask for parentheses
+                       ParenthesesMask=0xf0000,
                        ///meta-type used for parsed sub-tokens
-                       Parentheses=96,
+                       Parentheses=0x10000,
                        ///opening parenthese
-                       ParOpen=32,
+                       ParOpen=0x20000,
                        ///closing parenthese
-                       ParClose=64,
+                       ParClose=0x40000,
                        ///a comma - separating expressions in function calls
-                       Comma=128,
+                       Comma=0x100000,
+                       
                        ///a literal value
-                       Literal=256,
+                       Literal=0x1000000,
+                       ///mask for literal values
+                       LiteralMask=Literal,
+                       
+                       ///mask for tokens that are functional (non-ignored)
+                       FunctionalMask=NameMask|OperatorMask|SpecialCharMask,
+                       
                        ///white space chars, this is actually not used for tokens, but for parsing
-                       Whitespace=512,
-               };
-               enum SubType{
-                       None = 0,
-                       Function = 1,
-                       Constant = 2,
-                       Variable = 4,
-                       UnaryOp = 8,
-                       BinaryOp = 16,
+                       Whitespace=0x10000000,
+                       ///mask for ignored tokens
+                       IgnoredTokenMask=0xf0000000,
                };
                ///creates an empty/invalid token
                Token(Position pos=Position(-1,-1));
-               ///creates a token from a parsed piece of string
+               ///creates a token from a parsed piece of string, only generic types can be used
                Token(QString,Type,Position pos=Position(-1,-1));
                ///creates a literal token
                Token(QString,QVariant,Position pos=Position(-1,-1));
@@ -61,9 +84,22 @@ class Token
                QVariant literalValue()const;
                ///returns the original position of the token
                Position position()const;
-               SubType subType()const;
-               void setSubType(SubType);
+               ///when the token is hierarchised (Parentheses, Function) this returns the subordinate tokens
                QList<Token>subTokens()const;
+               
+               ///true if the token is functional (not ignored)
+               inline bool isFunctional()const{return type()&FunctionalMask;}
+               ///true if the token represents an operator
+               inline bool isOperator()const{return type()&OperatorMask;}
+               ///true if the token can be ignored
+               inline bool isIgnored()const{return type()&IgnoredTokenMask;}
+               ///true if the token represents a name
+               inline bool isName()const{return type()&NameMask;}
+               ///true if the token is a literal
+               inline bool isLiteral()const{return type()&LiteralMask;}
+       protected:
+               friend class Expression;
+               void setSubType(Type);
                void addSubToken(const Token&);
                void setSubTokens(const QList<Token>&);
 };
@@ -71,6 +107,10 @@ class Token
 QDebug& operator<<(QDebug&,const Token&);
 
 class Engine;
+/**Represents an expression in the context of its engine.
+
+An expression always has an engine as context and stores a hierarchy of operations (also expressions) to perform when called to evaluate. Expressions may become invalid if constants, functions, or operators change between the time they are created and the time they are evaluated.
+*/
 class Expression
 {
        DECLARE_SHARED_DPTR(d)
@@ -80,15 +120,31 @@ class Expression
                        Variable=Token::Variable,
                        Constant=Token::Constant,
                        Function=Token::Function,
-                       Parentheses=96,
+                       Parentheses=Token::Parentheses,
                        UnaryOp=Token::UnaryOp,
                        BinaryOp=Token::BinaryOp,
-                       Exception=0x8000,
+                       AssignmentOp=Token::AssignmentOp,
+                       Exception=Token::IgnoredTokenMask,
                };
+               ///creates and invalid expression, that always evaluates to an exception
                Expression();
+               ///creates an expression by its context engine and its tokens
                Expression(Engine*parent,const QList<Token>&tokens);
                
+               ///evaluates the expression and returns the result of the evaluation
                QVariant evaluate();
+       private:
+               ///scan tokens and decide what specific sub-type they are
+               QList<Token>classifyTokens(QList<Token> toks);
+               /**pushes parentheses and function arguments into the sub-tokens of their parents;
+               the result is a list of tokens that are on the same level*/
+               QList<Token>reduceTokens(QList<Token> toks);
+               ///scan tokens for errors, returns Exception::NoError if none are found
+               ELAM::Exception scanForError(const QList< Token >& toks);
+               ///reduce surrounding parentheses and whitespace
+               QList<Token>simplifyTokens(QList<Token> toks);
+               ///parses tokens and splits them by comma
+               void functionInit();
 };
 
 //end of namespace
index bc5e706..65614ca 100644 (file)
@@ -13,7 +13,7 @@ void ElamTest::evaltest()
 {
        IntEngine ie;
        FloatEngine::configureFloatEngine(ie);
-       QString ex="a= 345*(65.3/(5))";
+       QString ex="a=b+=345*int(3.5)+ - -(+65/(5))";
        QVariant v=ie.evaluate(ex);
 }