在Python中解析用户提供的数学公式的安全方法

Safe way to parse user-supplied mathematical formula in Python

是否有Python的数学表达式解析器+评估器?

我不是第一个提出这个问题的人,但答案通常指向eval()。 例如,人们可以这样做:

1
2
3
4
5
>>> safe_list = ['math','acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh', 'degrees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 'hypot', 'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'abs']
>>> safe_dict = dict([ (k, locals().get(k, None)) for k in safe_list ])
>>> s ="2+3"
>>> eval(s, {"__builtins__":None}, safe_dict)
5

但这不安全:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
>>> s_badbaduser ="""
... (lambda fc=(
...     lambda n: [
...         c for c in
...             ().__class__.__bases__[0].__subclasses__()
...             if c.__name__ == n
...         ][0]
...     ):
...     fc("function")(
...         fc("code")(
...             0,0,0,0,"KABOOM",(),(),(),"","",0,""
...         ),{}
...     )()
... )()
..."""

>>> eval(s_badbaduser, {"__builtins__":None}, safe_dict)
Segmentation fault

另外,使用eval来解析和评估数学表达式对我来说似乎不对。

我找到了PyMathParser,但它也使用了eval并且不是更好:

1
2
3
4
5
>>> import MathParser
>>> m=MathParser.PyMathParser()
>>> m.expression = s_badbaduser
>>> m.evaluate();
Segmentation fault

是否有可用于解析和评估数学表达式而不使用Python解析器的库?


查看Paul McGuire的pyparsing。 他为算术表达式编写了通用解析器和语法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from __future__ import division
import pyparsing as pyp
import math
import operator

class NumericStringParser(object):
    '''
    Most of this code comes from the fourFn.py pyparsing example
    http://pyparsing.wikispaces.com/file/view/fourFn.py
    http://pyparsing.wikispaces.com/message/view/home/15549426
    __author__='Paul McGuire'

    All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
    more easily in other places.
    '''

    def pushFirst(self, strg, loc, toks ):
        self.exprStack.append( toks[0] )
    def pushUMinus(self, strg, loc, toks ):
        if toks and toks[0] == '-':
            self.exprStack.append( 'unary -' )
    def __init__(self):
       """
        expop   :: '^'
        multop  :: '*' | '/'
        addop   :: '+' | '-'
        integer :: ['+' | '-'] '0'..'9'+
        atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
        factor  :: atom [ expop factor ]*
        term    :: factor [ multop factor ]*
        expr    :: term [ addop term ]*
       """

        point = pyp.Literal("." )
        e     = pyp.CaselessLiteral("E" )
        fnumber = pyp.Combine( pyp.Word("+-"+pyp.nums, pyp.nums ) +
                           pyp.Optional( point + pyp.Optional( pyp.Word( pyp.nums ) ) ) +
                           pyp.Optional( e + pyp.Word("+-"+pyp.nums, pyp.nums ) ) )
        ident = pyp.Word(pyp.alphas, pyp.alphas+pyp.nums+"_$")      
        plus  = pyp.Literal("+" )
        minus = pyp.Literal("-" )
        mult  = pyp.Literal("*" )
        div   = pyp.Literal("/" )
        lpar  = pyp.Literal("(" ).suppress()
        rpar  = pyp.Literal(")" ).suppress()
        addop  = plus | minus
        multop = mult | div
        expop = pyp.Literal("^" )
        pi    = pyp.CaselessLiteral("PI" )
        expr = pyp.Forward()
        atom = ((pyp.Optional(pyp.oneOf("- +")) +
                 (pi|e|fnumber|ident+lpar+expr+rpar).setParseAction(self.pushFirst))
                | pyp.Optional(pyp.oneOf("- +")) + pyp.Group(lpar+expr+rpar)
                ).setParseAction(self.pushUMinus)      
        # by defining exponentiation as"atom [ ^ factor ]..." instead of
        #"atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
        # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
        factor = pyp.Forward()
        factor << atom + pyp.ZeroOrMore( ( expop + factor ).setParseAction(
            self.pushFirst ) )
        term = factor + pyp.ZeroOrMore( ( multop + factor ).setParseAction(
            self.pushFirst ) )
        expr << term + pyp.ZeroOrMore( ( addop + term ).setParseAction( self.pushFirst ) )
        self.bnf = expr
        # map operator symbols to corresponding arithmetic operations
        epsilon = 1e-12
        self.opn = {"+" : operator.add,
               "-" : operator.sub,
               "*" : operator.mul,
               "/" : operator.truediv,
               "^" : operator.pow }
        self.fn  = {"sin" : math.sin,
               "cos" : math.cos,
               "tan" : math.tan,
               "abs" : abs,
               "trunc" : lambda a: int(a),
               "round" : round,
                # For Python3 compatibility, cmp replaced by ((a > 0) - (a < 0)). See
                # https://docs.python.org/3.0/whatsnew/3.0.html#ordering-comparisons
               "sgn" : lambda a: abs(a)>epsilon and ((a > 0) - (a < 0)) or 0}
        self.exprStack = []
    def evaluateStack(self, s ):
        op = s.pop()
        if op == 'unary -':
            return -self.evaluateStack( s )
        if op in"+-*/^":
            op2 = self.evaluateStack( s )
            op1 = self.evaluateStack( s )
            return self.opn[op]( op1, op2 )
        elif op =="PI":
            return math.pi # 3.1415926535
        elif op =="E":
            return math.e  # 2.718281828
        elif op in self.fn:
            return self.fn[op]( self.evaluateStack( s ) )
        elif op[0].isalpha():
            return 0
        else:
            return float( op )
    def eval(self, num_string, parseAll = True):
        self.exprStack = []
        results = self.bnf.parseString(num_string, parseAll)
        val = self.evaluateStack( self.exprStack[:] )
        return val

nsp = NumericStringParser()
print(nsp.eval('1+2'))
# 3.0

print(nsp.eval('2*3-5'))
# 1.0


我建议使用ast.parse然后将解析树列入白名单。

1
2
3
4
5
tree = ast.parse(s, mode='eval')
valid = all(isinstance(node, whitelist) for node in ast.walk(tree))
if valid:
    result = eval(compile(tree, filename='', mode='eval'),
                  {"__builtins__": None}, safe_dict)

这里whitelist可能是这样的:

1
2
3
4
whitelist = (ast.Expression, ast.Call, ast.Name, ast.Load,
             ast.BinOp, ast.UnaryOp, ast.operator, ast.unaryop, ast.cmpop,
             ast.Num,
            )


我在这里建立了几个帖子来建立评估员课程。 也用过
我基本上重写为类对象的eval示例。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import sys
import ast
import operator as op
import abc

import math

class IEvaluator:
    __metaclass__ = abc.ABCMeta

    @abc.abstractmethod
    def eval_expr(cls, expr, subs):  # @NoSelf
        '''IMPORTANT: this is class method, overload it with @classmethod!
        Evaluate an expression given in the expr string.

        :param expr: str. String expression.
        :param subs: dict. Dictionary with values to substitute.
        :returns: Evaluated expression result.
        '''



class Evaluator(IEvaluator):
    '''Generic evaluator for a string expression. Uses ast and operator
    modules. The expr string is parsed with ast resulting in a node tree.
    Then the node tree is recursively traversed and evaluated with operations
    from the operator module.

    :implements: IEvaluator
    '''


    @classmethod
    def _get_op(cls, node):
        '''Get the operator corresponding to the node.
        :param node: Operator node type with node.op property.
        '''

        # supported operators
        operators = {
            ast.Add: op.add,
            ast.Sub: op.sub,
            ast.Mult: op.mul,
            ast.Div: op.truediv,
            ast.Pow: op.pow,
            ast.BitXor: op.xor,
            ast.USub: op.neg
        }
        return operators[type(node.op)]

    @classmethod
    def _get_op_fun(cls, node):
        # fun_call = {'sin': math.sin, 'cos': math.cos}[node.func.id]
        fun_call = getattr(math, node.func.id)
        return fun_call

    @classmethod
    def _num_op(cls, node, subs):
        '''Return the value of the node.
        :param node: Value node type with node.n property.
        '''

        return node.n

    @classmethod
    def _bin_op(cls, node, subs):
        '''Eval the left and right nodes, and call the binary operator.
        :param node: Binary operator with node.op, node.left, and node.right
            properties.
        '''

        op = cls._get_op(node)
        left_node = cls.eval(node.left, subs)
        right_node = cls.eval(node.right, subs)
        return op(left_node, right_node)

    @classmethod
    def _unary_op(cls, node, subs):
        '''Eval the node operand and call the unary operator.
        :param node: Unary operator with node.op and node.operand properties.
        '''

        op = cls._get_op(node)
        return op(cls.eval(node.operand, subs))

    @classmethod
    def _subs_op(cls, node, subs):
        '''Return the value of the variable represented by the node.
        :param node: Name node with node.id property to identify the variable.
        '''

        try:
            return subs[node.id]
        except KeyError:
            raise TypeError(node)

    @classmethod
    def _call_op(cls, node, subs):
        arg_list = []
        for node_arg in node.args:
            arg_list.append(cls.eval(node_arg, subs))
        fun_call = cls._get_op_fun(node)
        return fun_call(*arg_list)

    @classmethod
    def eval(cls, node, subs):
        '''The node is actually a tree. The node type i.e. type(node) is:
            ast.Num, ast.BinOp, ast.UnaryOp or ast.Name.
        Depending on the node type the node will have the following properties:
            node.n - Nodes value.
            node.id - Node id corresponding to a key in the subs dictionary.
            node.op - operation node. Type of node.op identifies the operation.
                type(node.op) is one of ast.Add, ast.Sub, ast.Mult, ast.Div,
                ast.Pow, ast.BitXor, or ast.USub.
            node.left or node.right - Binary operation node needs to have links
                to left and right nodes.
            node.operand - Unary operation node needs to have an operand.

        The binary and unary operations call eval recursively.
        '''

        # The functional logic is:
        # if isinstance(node, ast.Num):  # <number>
        #     return node.n
        # elif isinstance(node, ast.BinOp):  # <left> <operator> <right>
        #     return operators[type(node.op)](eval_(node.left, subs),
        #                                     eval_(node.right, subs))
        # elif isinstance(node, ast.UnaryOp):  # <operator> <operand> e.g., -1
        #     return operators[type(node.op)](eval_(node.operand, subs))
        # else:
        #     try:
        #         return subs[node.id]
        #     except KeyError:
        #         raise TypeError(node)

        node_type = type(node)

        return {
            # Value in the expression. Leaf.
            ast.Num: cls._num_op,  # <number>

            # Bin operation with two operands.
            ast.BinOp: cls._bin_op,  # <left> <operator> <right>

            # Unary operation such as neg.
            ast.UnaryOp: cls._unary_op,  # <operator> <operand> e.g., -1

            # Sub the value for the variable. Leaf.
            ast.Name: cls._subs_op,  # <variable>

            ast.Call: cls._call_op

        }[node_type](node, subs)

    @classmethod
    def eval_expr(cls, expr, subs=None):
        '''Evaluates a string expression. The expr string is parsed with ast
        resulting in a node tree. Then the eval method is used to recursively
        traverse and evaluate the nodes. Symbolic params are taken from subs.

        :Example:
            >>> eval_expr('2^6')
            4
            >>> eval_expr('2**6')
            64
            >>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
            -5.0
            >>> eval_expr('x + y', {'x': 1, 'y': 2})
            3

        :param expr: str. String expression.
        :param subs: dict. (default: globals of current and calling stack.)
        :returns: Result of running the evaluator.

        :implements: IEvaluator.eval_expr

        '''

        # ref: https://stackoverflow.com/a/9558001/3457624
        if subs is None:
            # Get the globals
            frame = sys._getframe()
            subs = {}
            subs.update(frame.f_globals)

            if frame.f_back:
                subs.update(frame.f_back.f_globals)

        expr_tree = ast.parse(expr, mode='eval').body
        return cls.eval(expr_tree, subs)

这里有些例子:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import sympy

from eval_sympy import Evaluator

# test case...
x = sympy.Symbol('x')
y = sympy.Symbol('y')

expr = x * 2 - y ** 2
# z = expr.subs({x:1, y:2})

str_expr = str(expr)
print str_expr

x = 1
y = 2
out0 = Evaluator.eval_expr(str_expr)
print '(x, y): ({}, {})'.format(x, y)
print str_expr, ' = ', out0

subs1 = {'x': 1, 'y': 2}
out1 = Evaluator.eval_expr(str_expr, subs1)
print 'subs: ', subs1
print str_expr, ' = ', out1

sin_subs = {'x': 1, 'y': 2}
sin_out = Evaluator.eval_expr('sin(log10(x*y))', sin_subs)
print 'sin_subs: ', sin_subs
print 'sin(log10(x*y)) = ', sin_out

结果

1
2
3
4
5
6
7
8
9
10
2*x - y**2

(x, y): (1, 2)
2*x - y**2  =  -2

subs:  {'y': 2, 'x': 1}
2*x - y**2  =  -2

sin_subs:  {'y': 2, 'x': 1}
sin(log10(x*y)) =  0.296504042171