Evaluating a mathematical expression in a string
1 2 | stringExp ="2^4" intVal = int(stringExp) # Expected value: 16 |
这将返回以下错误:
1 2 3 4 | Traceback (most recent call last): File"<stdin>", line 1, in <module> ValueError: invalid literal for int() with base 10: '2^4' |
我知道
1 2 | eval("__import__('os').remove('important file')") # arbitrary commands eval("9**9**9**9**9**9**9**9", {'__builtins__': None}) # CPU, memory |
注意:即使您使用set
1 | eval('(1).__class__.__bases__[0].__subclasses__()', {'__builtins__': None}) |
使用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import ast import operator as op # supported operators operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul, ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor, ast.USub: op.neg} def eval_expr(expr): """ >>> eval_expr('2^6') 4 >>> eval_expr('2**6') 64 >>> eval_expr('1 + 2*3**(4^5) / (6 + -7)') -5.0 """ return eval_(ast.parse(expr, mode='eval').body) def eval_(node): if isinstance(node, ast.Num): # <number> return node.n elif isinstance(node, ast.BinOp): # <left> <operator> <right> return operators[type(node.op)](eval_(node.left), eval_(node.right)) elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1 return operators[type(node.op)](eval_(node.operand)) else: raise TypeError(node) |
您可以轻松限制每个操作或任何中间结果的允许范围,例如,限制
1 2 3 4 5 | def power(a, b): if any(abs(n) > 100 for n in [a, b]): raise ValueError((a,b)) return op.pow(a, b) operators[ast.Pow] = power |
或者限制中间结果的大小:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | import functools def limit(max_=None): """Return decorator that limits allowed returned values.""" def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): ret = func(*args, **kwargs) try: mag = abs(ret) except TypeError: pass # not applicable else: if mag > max_: raise ValueError(ret) return ret return wrapper return decorator eval_ = limit(max_=10**100)(eval_) |
例
1 2 3 4 5 6 7 8 9 10 11 | >>> evil ="__import__('os').remove('important file')" >>> eval_expr(evil) #doctest:+IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... TypeError: >>> eval_expr("9**9") 387420489 >>> eval_expr("9**9**9**9**9**9**9**9") #doctest:+IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... ValueError: |
Pyparsing可用于解析数学表达式。特别是fourFn.py
演示了如何解析基本的算术表达式。下面,我将fourFn重新编译为数字解析器类,以便于重用。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | from __future__ import division from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional, ZeroOrMore, Forward, nums, alphas, oneOf) import math import operator __author__ = 'Paul McGuire' __version__ = '$Revision: 0.0 $' __date__ = '$Date: 2009-03-20 $' __source__ = '''http://pyparsing.wikispaces.com/file/view/fourFn.py http://pyparsing.wikispaces.com/message/view/home/15549426 ''' __note__ = ''' All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it more easily in other places. ''' class NumericStringParser(object): ''' Most of this code comes from the fourFn.py pyparsing example ''' def pushFirst(self, strg, loc, toks): self.exprStack.append(toks[0]) def pushUMinus(self, strg, loc, toks): if toks and toks[0] == '-': self.exprStack.append('unary -') def __init__(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal(".") e = CaselessLiteral("E") fnumber = Combine(Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums +"_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar) ).setParseAction(self.pushUMinus) # by defining exponentiation as"atom [ ^ factor ]..." instead of #"atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + \ ZeroOrMore((expop + factor).setParseAction(self.pushFirst)) term = factor + \ ZeroOrMore((multop + factor).setParseAction(self.pushFirst)) expr << term + \ ZeroOrMore((addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = {"+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow} self.fn = {"sin": math.sin, "cos": math.cos, "tan": math.tan, "exp": math.exp, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0} def evaluateStack(self, s): op = s.pop() if op == 'unary -': return -self.evaluateStack(s) if op in"+-*/^": op2 = self.evaluateStack(s) op1 = self.evaluateStack(s) return self.opn[op](op1, op2) elif op =="PI": return math.pi # 3.1415926535 elif op =="E": return math.e # 2.718281828 elif op in self.fn: return self.fn[op](self.evaluateStack(s)) elif op[0].isalpha(): return 0 else: return float(op) def eval(self, num_string, parseAll=True): self.exprStack = [] results = self.bnf.parseString(num_string, parseAll) val = self.evaluateStack(self.exprStack[:]) return val |
你可以像这样使用它
1 2 3 4 5 6 7 8 | nsp = NumericStringParser() result = nsp.eval('2^4') print(result) # 16.0 result = nsp.eval('exp(2^4)') print(result) # 8886110.520507872 |
- asteval
- numexpr
*根据文档中的以下警告,SymPy
Warning: Note that this function uses
eval , and thus shouldn’t be used on unsanitized input.
相反,您可以轻松地创建一个简单的
1 2 3 | c = compile(stringExp, 'userinput', 'eval') if c.co_code[0]==b'd' and c.co_code[3]==b'S': return c.co_consts[ord(c.co_code[1])+ord(c.co_code[2])*256] |
它的工作方式很简单,任何常量数学表达式在编译期间都会被安全地评估并存储为常量。 compile返回的代码对象包含
这也为一些更复杂的输入格式打开了大门。例如:
1 | stringExp ="1 + cos(2)" |
这需要实际评估字节码,这仍然非常简单。 Python字节码是一种面向堆栈的语言,所以一切都是
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | from dis import opmap from Queue import LifoQueue from math import sin,cos import operator globs = {'sin':sin, 'cos':cos} safe = globs.values() stack = LifoQueue() class BINARY(object): def __init__(self, operator): self.op=operator def __call__(self, context): stack.put(self.op(stack.get(),stack.get())) class UNARY(object): def __init__(self, operator): self.op=operator def __call__(self, context): stack.put(self.op(stack.get())) def CALL_FUNCTION(context, arg): argc = arg[0]+arg[1]*256 args = [stack.get() for i in range(argc)] func = stack.get() if func not in safe: raise TypeError("Function %r now allowed"%func) stack.put(func(*args)) def LOAD_CONST(context, arg): cons = arg[0]+arg[1]*256 stack.put(context['code'].co_consts[cons]) def LOAD_NAME(context, arg): name_num = arg[0]+arg[1]*256 name = context['code'].co_names[name_num] if name in context['locals']: stack.put(context['locals'][name]) else: stack.put(context['globals'][name]) def RETURN_VALUE(context): return stack.get() opfuncs = { opmap['BINARY_ADD']: BINARY(operator.add), opmap['UNARY_INVERT']: UNARY(operator.invert), opmap['CALL_FUNCTION']: CALL_FUNCTION, opmap['LOAD_CONST']: LOAD_CONST, opmap['LOAD_NAME']: LOAD_NAME opmap['RETURN_VALUE']: RETURN_VALUE, } def VMeval(c): context = dict(locals={}, globals=globs, code=c) bci = iter(c.co_code) for bytecode in bci: func = opfuncs[ord(bytecode)] if func.func_code.co_argcount==1: ret = func(context) else: args = ord(bci.next()), ord(bci.next()) ret = func(context, args) if ret: return ret def evaluate(expr): return VMeval(compile(expr, 'userinput', 'eval')) |
显然,这个版本的实际版本会更长一些(有119个操作码,其中24个是数学相关的)。添加
虽然这种方法比简单表达式的简单语法分析器长一些(参见上面关于只是抓取编译常量),但它很容易扩展到更复杂的输入,并且不需要处理语法(
好的,所以eval的问题在于它可以轻松地逃脱它的沙箱,即使你摆脱了
在处理公式时,唯一有效的小数使用是在
1 2 3 | import re inp = re.sub(r"\.(?![0-9])","", inp) val = eval(inp, {'__builtins__':None}) |
请注意,虽然python通常将
这是一个非常迟到的回复,但我认为有用,以备将来参考。而不是编写自己的数学解析器(虽然上面的pyparsing示例很棒),您可以使用SymPy。我没有很多经验,但它包含了比任何人可能为特定应用程序编写的更强大的数学引擎,并且基本的表达式评估非常简单:
1 2 3 4 | >>> import sympy >>> x, y, z = sympy.symbols('x y z') >>> sympy.sympify("x**3 + sin(y)").evalf(subs={x:1, y:-3}) 0.858879991940133 |
非常酷!一个
您可以使用ast模块并编写NodeVisitor,以验证每个节点的类型是否为白名单的一部分。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | import ast, math locals = {key: value for (key,value) in vars(math).items() if key[0] != '_'} locals.update({"abs": abs,"complex": complex,"min": min,"max": max,"pow": pow,"round": round}) class Visitor(ast.NodeVisitor): def visit(self, node): if not isinstance(node, self.whitelist): raise ValueError(node) return super().visit(node) whitelist = (ast.Module, ast.Expr, ast.Load, ast.Expression, ast.Add, ast.Sub, ast.UnaryOp, ast.Num, ast.BinOp, ast.Mult, ast.Div, ast.Pow, ast.BitOr, ast.BitAnd, ast.BitXor, ast.USub, ast.UAdd, ast.FloorDiv, ast.Mod, ast.LShift, ast.RShift, ast.Invert, ast.Call, ast.Name) def evaluate(expr, locals = {}): if any(elem in expr for elem in ' #') : raise ValueError(expr) try: node = ast.parse(expr.strip(), mode='eval') Visitor().visit(node) return eval(compile(node,"<string>","eval"), {'__builtins__': None}, locals) except Exception: raise ValueError(expr) |
因为它通过白名单而不是黑名单工作,所以它是安全的。它可以访问的唯一函数和变量是您明确授予它访问权限的函数和变量。我使用与数学相关的函数填充了一个字典,因此您可以根据需要轻松提供对它们的访问,但您必须明确使用它。
如果字符串尝试调用尚未提供的函数,或者调用任何方法,则会引发异常,并且不会执行该异常。
因为它使用Python内置的解析器和赋值器,所以它也继承了Python的优先级和提升规则。
1 2 3 4 | >>> evaluate("7 + 9 * (2 << 2)") 79 >>> evaluate("6 // 2 + 0.0") 3.0 |
以上代码仅在Python 3上进行了测试。
如果需要,可以在此函数上添加超时装饰器。
[我知道这是一个老问题,但值得指出新的有用解决方案,因为它们弹出]
从python3.6开始,这个功能现在内置于语言中,创造了"f-strings"。
请参阅:PEP 498 - 文字字符串插值
例如(注意
1 2 | f'{2**4}' => '16' |
我想我会使用
这是我在不使用eval的情况下解决问题的方法。适用于Python2和Python3。它不适用于负数。
1 | $ python -m pytest test.py |
test.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | from solution import Solutions class SolutionsTestCase(unittest.TestCase): def setUp(self): self.solutions = Solutions() def test_evaluate(self): expressions = [ '2+3=5', '6+4/2*2=10', '3+2.45/8=3.30625', '3**3*3/3+3=30', '2^4=6' ] results = [x.split('=')[1] for x in expressions] for e in range(len(expressions)): if '.' in results[e]: results[e] = float(results[e]) else: results[e] = int(results[e]) self.assertEqual( results[e], self.solutions.evaluate(expressions[e]) ) |
solution.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | class Solutions(object): def evaluate(self, exp): def format(res): if '.' in res: try: res = float(res) except ValueError: pass else: try: res = int(res) except ValueError: pass return res def splitter(item, op): mul = item.split(op) if len(mul) == 2: for x in ['^', '*', '/', '+', '-']: if x in mul[0]: mul = [mul[0].split(x)[1], mul[1]] if x in mul[1]: mul = [mul[0], mul[1].split(x)[0]] elif len(mul) > 2: pass else: pass for x in range(len(mul)): mul[x] = format(mul[x]) return mul exp = exp.replace(' ', '') if '=' in exp: res = exp.split('=')[1] res = format(res) exp = exp.replace('=%s' % res, '') while '^' in exp: if '^' in exp: itm = splitter(exp, '^') res = itm[0] ^ itm[1] exp = exp.replace('%s^%s' % (str(itm[0]), str(itm[1])), str(res)) while '**' in exp: if '**' in exp: itm = splitter(exp, '**') res = itm[0] ** itm[1] exp = exp.replace('%s**%s' % (str(itm[0]), str(itm[1])), str(res)) while '/' in exp: if '/' in exp: itm = splitter(exp, '/') res = itm[0] / itm[1] exp = exp.replace('%s/%s' % (str(itm[0]), str(itm[1])), str(res)) while '*' in exp: if '*' in exp: itm = splitter(exp, '*') res = itm[0] * itm[1] exp = exp.replace('%s*%s' % (str(itm[0]), str(itm[1])), str(res)) while '+' in exp: if '+' in exp: itm = splitter(exp, '+') res = itm[0] + itm[1] exp = exp.replace('%s+%s' % (str(itm[0]), str(itm[1])), str(res)) while '-' in exp: if '-' in exp: itm = splitter(exp, '-') res = itm[0] - itm[1] exp = exp.replace('%s-%s' % (str(itm[0]), str(itm[1])), str(res)) return format(exp) |
在干净的命名空间中使用
1 2 3 | >>> ns = {'__builtins__': None} >>> eval('2 ** 4', ns) 16 |
干净的命名空间应该防止注入。例如:
1 2 3 4 5 | >>> eval('__builtins__.__import__("os").system("echo got through")', ns) Traceback (most recent call last): File"<stdin>", line 1, in <module> File"<string>", line 1, in <module> AttributeError: 'NoneType' object has no attribute '__import__' |
否则你会得到:
1 2 3 | >>> eval('__builtins__.__import__("os").system("echo got through")') got through 0 |
您可能想要访问数学模块:
1 2 3 4 5 | >>> import math >>> ns = vars(math).copy() >>> ns['__builtins__'] = None >>> eval('cos(pi/3)', ns) 0.50000000000000011 |