Suppose I have a function in Python that includes mathematical expressions from Python's base and some mathematical expressions from Numpy and Scipy, including maybe some distributions. As a running example, consider:
import numpy as np
from scipy.stats import *
def my_process(args):
""" My process
"""
x1 = norm.rvs(loc=0, scale=1)
x2 = x1 + norm.rvs(loc=2, scale=0.5)
x3 = (x1 * np.exp(x2)) + norm.rvs(loc=-1, scale=2)
return x1, x2, x3
I want to write an interpreter of this function and make each one of the appearing variables into a class, which generically is written as follows:
class genericProcess():
def __init__(self):
pass
def process(self, parents):
""" This needs to be implemented for each class
"""
raise NotImplementedError
So for our example function, we would interpret the given function as the following three classes:
class x1Process(genericProcess):
def __init__(self):
pass
def process(self):
return norm.rvs(loc=0, scale=1)
class x2Process(genericProcess):
def __init__(self):
pass
def process(self, parents):
return parents["x1"] + norm.rvs(loc=2, scale=0.5)
class x3Process(genericProcess):
def __init__(self):
pass
def process(self, parents):
return (parents["x1"] * parents["x2"]) + norm.rvs(loc=-1, scale=2)
Is this even possible at all? if yes, what would be the first steps to start implementing it, if not, what would make the problem well-posed so that I can start implementing it? For example, I thought having a string instead of a function might make the problem simpler, although I am not sure.
EDIT:
Thanks to the comments I can make the question a bit more concrete. I want a function, called "my_interpreter" that takes as input a user specified function and outputs a dictionary where each key is a line of the function (or alternatively each key is one of the return elements of the function), and each item of the dictionary is a class that implements the "process" method of the "genericProcess" class. I our running example:
interpreted_function_dictionary = my_interpreter(my_process)
with
interpreted_function = {
"x1": x1Process,
"x2": x2Process,
"x3": x3Process
}
It's difficult to intercept definition. You would need to parse the code with ast
as suggested in the comments.
sympy
An alternative way of doing it is replacing all the math operations into their symbolic representation, which are executable at a later time. The sympy
package does exactly that and should contain most math operations you need. There is also the sympy.stats
which has most of the stats functions. (Very similar to symbolic computation in matlab
with syms
.)
To use sympy
with numpy
backend, you can use their lambdify
function, eg
from sympy import sin, lambdify
from sympy.abc import x
expr = sin(x)/x
f = lambdify(x, expr, "numpy")
As of version 1.11, it doesn't seem to support scipy
yet.
Similar to sympy
, you can create wrapper classes for all the math operations that would return an expression instead of the result. Then, each expression would be your process
and you can evaluate each expression to get the resulting value.
Not sure if this fits OP's requirement.
from dataclasses import dataclass, field
from typing import Any, ClassVar
import numpy as np
import scipy
@dataclass
class EvaluatableExpression:
name: str
args: Any = field(default_factory=tuple)
kwargs: Any = field(default_factory=dict)
package: ClassVar = None
def evaluate(self):
# recursively evaluate any executable args and kwargs
args = (arg.evaluate() if isinstance(arg, EvaluatableExpression) else arg for arg in self.args)
kwargs = {k: v.evaluate() if isinstance(v, EvaluatableExpression) else v for k, v in self.kwargs.items()}
return getattr(self.package, self.name)(*args, **kwargs)
@dataclass
class NumpyFunc(EvaluatableExpression):
package: ClassVar = np
@dataclass
class ScipyFunc(EvaluatableExpression):
package: ClassVar = scipy
@dataclass
class ScipyStats(EvaluatableExpression):
stats_package: str = ''
def __post_init__(self):
self.package = getattr(scipy.stats, self.stats_package)
For python math, you can handle them using magic methods:
@dataclass
class PythonMath(EvaluatableExpression):
def evaluate(self):
# the function names are names of magic methods, e.g. '__add__',
# assuming only binary ops on args[0] and args[1]
op0 = self.args[0]
self.package = op0.evaluate() if isinstance(op0, EvaluatableExpression) else op0
# save args and load args later so it doesn't change args before and after evaluation
temp_args = self.args
self.args = self.args[1:]
result = super().evaluate()
self.args = temp_args
return result
@dataclass
class Operand:
content: Any
def __add__(self, other):
return PythonMath(name='__add__', args=(self.content, other))
def __sub__(self, other):
return PythonMath(name='__sub__', args=(self.content, other))
def __mul__(self, other):
return PythonMath(name='__mul__', args=(self.content, other))
def __truediv__(self, other):
return PythonMath(name='__truediv__', args=(self.content, other))
...
For Operand
it's not possible to catch magic methods
with __getattr__
or __getattribute__
. You can write custom metaclass to do that to simplify copying and pasting code.
def process(args):
""" My process
"""
x1 = ScipyStats(stats_package='norm', name='rvs', kwargs={'loc': 0, 'scale': 1})
x2 = Operand(x1) + ScipyStats(stats_package='norm', name='rvs', kwargs={'loc': 2, 'scale': 0.5})
x3 = Operand(Operand(x1) * NumpyFunc(name='exp', args=(x2,))) + ScipyStats(stats_package='norm', name='rvs',
kwargs={'loc': -1, 'scale': 0.5})
return x1, x2, x3
Now, all the returned variables will be "expressions". We can see
>>> print(x[0])
ScipyStats(name='rvs', args=(), kwargs={'loc': 0, 'scale': 1}, stats_package='norm')
>>> print(x[1])
PythonMath(name='__add__', args=(ScipyStats(name='rvs', args=(), kwargs={'loc': 0, 'scale': 1}, stats_package='norm'), ScipyStats(name='rvs', args=(), kwargs={'loc': 2, 'scale': 0.5}, stats_package='norm')), kwargs={})
>>> print(x[2])
PythonMath(name='__add__', args=(PythonMath(name='__mul__', args=(ScipyStats(name='rvs', args=(), kwargs={'loc': 0, 'scale': 1}, stats_package='norm'), NumpyFunc(name='exp', args=(PythonMath(name='__add__', args=(ScipyStats(name='rvs', args=(), kwargs={'loc': 0, 'scale': 1}, stats_package='norm'), ScipyStats(name='rvs', args=(), kwargs={'loc': 2, 'scale': 0.5}, stats_package='norm')), kwargs={}),), kwargs={})),
And evaluating them gives:
>>> print(x[0].evaluate())
-1.331802485169775
>>> print(x[1].evaluate())
0.7789471967940289
>>> print(x[2].evaluate())
-60.03245897617831
Of course, you can make defining math expression prettier and more concise by defining aliases, eg borrowing from pyspark
library
def _create_function(name, doc=""):
""" Create a function for aggregator by name"""
def _(*args, **kwargs):
package, new_name = name.split('__')
if package == 'np':
cls = NumpyFunc
elif package == 'scipy':
cls = ScipyFunc
elif package == 'ss':
cls = ScipyStats
return cls(func=new_name, args=args, kwargs=kwargs)
_.__name__ = name
_.__doc__ = doc
return _
ALL = [f'np__{func}' for func in np.ma.__all__] + [f'scipy__{func}' for func in ...] +
...
for func_dict in ALL:
for _name, _doc in func_dict.items():
globals()[_name] = _create_function(_name, _doc)
del _name, _doc
Then you can have something like:
x1 = ss__norm_rvs(loc=0, scale=1)
x2 = Operand(x1) + ss__norm_rvs(loc=2, scale=0.5)
x3 = Operand(Operand(x1) * np__exp(x2)) + ss__norm_rvs(loc=-1, scale=2)
You could even get rid of the pesky Operand
by making everything a subclass of Operand
.
Hope this helps.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.