[英]Exceptions catching performance in python
我知道 python 中的异常在try
时很快,但在捕获时可能很昂贵。
这是否意味着:
try:
some code
except MyException:
pass
比这快吗?
try:
some code
except MyException as e:
pass
除了 Francesco 的回答之外,似乎捕获的(相对)昂贵的部分之一是异常匹配:
>>> timeit.timeit('try:\n raise KeyError\nexcept KeyError:\n pass', number=1000000 )
1.1587663322268327
>>> timeit.timeit('try:\n raise KeyError\nexcept:\n pass', number=1000000 )
0.9180641582179874
查看(CPython 2)反汇编:
>>> def f():
... try:
... raise KeyError
... except KeyError:
... pass
...
>>> def g():
... try:
... raise KeyError
... except:
... pass
...
>>> dis.dis(f)
2 0 SETUP_EXCEPT 10 (to 13)
3 3 LOAD_GLOBAL 0 (KeyError)
6 RAISE_VARARGS 1
9 POP_BLOCK
10 JUMP_FORWARD 17 (to 30)
4 >> 13 DUP_TOP
14 LOAD_GLOBAL 0 (KeyError)
17 COMPARE_OP 10 (exception match)
20 POP_JUMP_IF_FALSE 29
23 POP_TOP
24 POP_TOP
25 POP_TOP
5 26 JUMP_FORWARD 1 (to 30)
>> 29 END_FINALLY
>> 30 LOAD_CONST 0 (None)
33 RETURN_VALUE
>>> dis.dis(g)
2 0 SETUP_EXCEPT 10 (to 13)
3 3 LOAD_GLOBAL 0 (KeyError)
6 RAISE_VARARGS 1
9 POP_BLOCK
10 JUMP_FORWARD 7 (to 20)
4 >> 13 POP_TOP
14 POP_TOP
15 POP_TOP
5 16 JUMP_FORWARD 1 (to 20)
19 END_FINALLY
>> 20 LOAD_CONST 0 (None)
23 RETURN_VALUE
请注意,catch 块无论如何都会加载 Exception 并将其与KeyError
匹配。 确实,将except KeyError as ke
情况:
>>> def f2():
... try:
... raise KeyError
... except KeyError as ke:
... pass
...
>>> dis.dis(f2)
2 0 SETUP_EXCEPT 10 (to 13)
3 3 LOAD_GLOBAL 0 (KeyError)
6 RAISE_VARARGS 1
9 POP_BLOCK
10 JUMP_FORWARD 19 (to 32)
4 >> 13 DUP_TOP
14 LOAD_GLOBAL 0 (KeyError)
17 COMPARE_OP 10 (exception match)
20 POP_JUMP_IF_FALSE 31
23 POP_TOP
24 STORE_FAST 0 (ke)
27 POP_TOP
5 28 JUMP_FORWARD 1 (to 32)
>> 31 END_FINALLY
>> 32 LOAD_CONST 0 (None)
35 RETURN_VALUE
唯一的区别是单个STORE_FAST
来存储异常值(在匹配的情况下)。 同样,有几个异常匹配:
>>> def f():
... try:
... raise ValueError
... except KeyError:
... pass
... except IOError:
... pass
... except SomeOtherError:
... pass
... except:
... pass
...
>>> dis.dis(f)
2 0 SETUP_EXCEPT 10 (to 13)
3 3 LOAD_GLOBAL 0 (ValueError)
6 RAISE_VARARGS 1
9 POP_BLOCK
10 JUMP_FORWARD 55 (to 68)
4 >> 13 DUP_TOP
14 LOAD_GLOBAL 1 (KeyError)
17 COMPARE_OP 10 (exception match)
20 POP_JUMP_IF_FALSE 29
23 POP_TOP
24 POP_TOP
25 POP_TOP
5 26 JUMP_FORWARD 39 (to 68)
6 >> 29 DUP_TOP
30 LOAD_GLOBAL 2 (IOError)
33 COMPARE_OP 10 (exception match)
36 POP_JUMP_IF_FALSE 45
39 POP_TOP
40 POP_TOP
41 POP_TOP
7 42 JUMP_FORWARD 23 (to 68)
8 >> 45 DUP_TOP
46 LOAD_GLOBAL 3 (SomeOtherError)
49 COMPARE_OP 10 (exception match)
52 POP_JUMP_IF_FALSE 61
55 POP_TOP
56 POP_TOP
57 POP_TOP
9 58 JUMP_FORWARD 7 (to 68)
10 >> 61 POP_TOP
62 POP_TOP
63 POP_TOP
11 64 JUMP_FORWARD 1 (to 68)
67 END_FINALLY
>> 68 LOAD_CONST 0 (None)
71 RETURN_VALUE
将复制异常并尝试将其与列出的每个异常进行匹配,直到找到匹配为止,这(可能)被暗示为“糟糕的捕获性能”。
我认为两者在速度上是一样的:
>>> timeit.timeit('try:\n raise KeyError\nexcept KeyError:\n pass', number=1000000 )
0.7168641227143269
>>> timeit.timeit('try:\n raise KeyError\nexcept KeyError as e:\n pass', number=1000000 )
0.7733279216613766
捕获并不昂贵,看起来相对较慢的部分是堆栈跟踪本身的创建以及堆栈的后续展开(如果需要)。
我所知道的所有允许您捕获堆栈跟踪的基于堆栈的语言都需要执行这些操作。
raise
被调用时收集堆栈信息。 请注意,Java 1.7 允许您抑制堆栈收集,它的速度要快得多,但您会丢失很多有用的信息。 语言没有明智的方法来知道谁将捕获它,因此忽略异常无济于事,因为无论如何它都必须执行大部分工作。与上述两种操作相比,catch 是微不足道的。 下面是一些代码来证明随着堆栈深度的增加性能会下降。
#!/usr/bin/env python
import os
import re
import time
import pytest
max_depth = 10
time_start = [0] * (max_depth + 1)
time_stop = [0] * (max_depth + 1)
time_total = [0] * (max_depth + 1)
depth = []
for x in range(0, max_depth):
depth.append(x)
@pytest.mark.parametrize('i', depth)
def test_stack(benchmark, i):
benchmark.pedantic(catcher2, args=(i,i), rounds=10, iterations=1000)
#@pytest.mark.parametrize('d', depth)
#def test_recursion(benchmark, d):
# benchmark.pedantic(catcher, args=(d,), rounds=50, iterations=50)
def catcher(i, depth):
try:
ping(i, depth)
except Exception:
time_total[depth] += time.clock() - time_start[depth]
def recurse(i, depth):
if(d > 0):
recurse(--i, depth)
thrower(depth)
def catcher2(i, depth):
global time_total
global time_start
try:
ping(i, depth)
except Exception:
time_total[depth] += time.clock() - time_start[depth]
def thrower(depth):
global time_start
time_start[depth] = time.clock()
raise Exception('wtf')
def ping(i, depth):
if(i < 1): thrower(i, depth)
return pong(i, depth)
def pong(i, depth):
if(i < 0): thrower(i,depth)
return ping(i - 4, depth)
if __name__ == "__main__":
rounds = 200000
class_time = 0
class_start = time.clock()
for round in range(0, rounds):
ex = Exception()
class_time = time.clock() - class_start
print("%d ex = Exception()'s %f" % (rounds, class_time))
for depth in range(0, max_depth):
#print("Depth %d" % depth)
for round in range(0, rounds):
catcher(depth, depth)
for rep in range(0, max_depth):
print("depth=%d time=%f" % (rep, time_total[rep]/1000000))
输出是,时间(时间是相对的)调用Exception()
200000 ex = Exception()'s 0.040469
depth=0 time=0.103843
depth=1 time=0.246050
depth=2 time=0.401459
depth=3 time=0.565742
depth=4 time=0.736362
depth=5 time=0.921993
depth=6 time=1.102257
depth=7 time=1.278089
depth=8 time=1.463500
depth=9 time=1.657082
比我更py.test
Python 的人可能能够让py.test
在最后打印时间。
请注意,几周前有一个与此非常相似的关于 Java 的问题。 无论使用何种语言,这都是一个非常有用的线程......
Python 程序由代码块构成。 块是作为一个单元执行的一段 Python 程序文本。 在 Python 中,核心块表示为 struct basicblock:
cpython/Python/compile.c
typedef struct basicblock_ {
/* Each basicblock in a compilation unit is linked via b_list in the
reverse order that the block are allocated. b_list points to the next
block, not to be confused with b_next, which is next by control flow. */
struct basicblock_ *b_list;
/* number of instructions used */
int b_iused;
/* length of instruction array (b_instr) */
int b_ialloc;
/* pointer to an array of instructions, initially NULL */
struct instr *b_instr;
/* If b_next is non-NULL, it is a pointer to the next
block reached by normal control flow. */
struct basicblock_ *b_next;
/* b_seen is used to perform a DFS of basicblocks. */
unsigned b_seen : 1;
/* b_return is true if a RETURN_VALUE opcode is inserted. */
unsigned b_return : 1;
/* depth of stack upon entry of block, computed by stackdepth() */
int b_startdepth;
/* instruction offset for block, computed by assemble_jump_offsets() */
int b_offset;
} basicblock;
循环、try/except 和 try/finally 语句处理了不同的事情。 对于这 3 个语句,使用了框架块:
cpython/Python/compile.c
enum fblocktype { LOOP, EXCEPT, FINALLY_TRY, FINALLY_END };
struct fblockinfo {
enum fblocktype fb_type;
basicblock *fb_block;
};
代码块在执行帧中执行。
cpython/包含/frameobject.h
typedef struct _frame {
PyObject_VAR_HEAD
struct _frame *f_back; /* previous frame, or NULL */
PyCodeObject *f_code; /* code segment */
PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
PyObject *f_globals; /* global symbol table (PyDictObject) */
PyObject *f_locals; /* local symbol table (any mapping) */
PyObject **f_valuestack; /* points after the last local */
/* Next free slot in f_valuestack. Frame creation sets to f_valuestack.
Frame evaluation usually NULLs it, but a frame that yields sets it
to the current stack top. */
PyObject **f_stacktop;
PyObject *f_trace; /* Trace function */
/* In a generator, we need to be able to swap between the exception
state inside the generator and the exception state of the calling
frame (which shouldn't be impacted when the generator "yields"
from an except handler).
These three fields exist exactly for that, and are unused for
non-generator frames. See the save_exc_state and swap_exc_state
functions in ceval.c for details of their use. */
PyObject *f_exc_type, *f_exc_value, *f_exc_traceback;
/* Borrowed reference to a generator, or NULL */
PyObject *f_gen;
int f_lasti; /* Last instruction if called */
/* Call PyFrame_GetLineNumber() instead of reading this field
directly. As of 2.3 f_lineno is only valid when tracing is
active (i.e. when f_trace is set). At other times we use
PyCode_Addr2Line to calculate the line from the current
bytecode index. */
int f_lineno; /* Current line number */
int f_iblock; /* index in f_blockstack */
char f_executing; /* whether the frame is still executing */
PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
} PyFrameObject;
帧包含一些管理信息(用于调试)并确定代码块执行完成后继续执行的位置和方式。 当您使用 'as' 语句(在 'import something as' 或 'except Exception as' 语句中)时,您只需执行名称绑定操作。 即 Python 只需在框架对象的 *f_locals 符号表中添加对对象的引用。 因此不会在运行时没有开销。
但是在解析时你会有一些开销。
cpython/Modules/parsermodule.c
static int
validate_except_clause(node *tree)
{
int nch = NCH(tree);
int res = (validate_ntype(tree, except_clause)
&& ((nch == 1) || (nch == 2) || (nch == 4))
&& validate_name(CHILD(tree, 0), "except"));
if (res && (nch > 1))
res = validate_test(CHILD(tree, 1));
if (res && (nch == 4))
res = (validate_name(CHILD(tree, 2), "as")
&& validate_ntype(CHILD(tree, 3), NAME));
return (res);
}
但是,在我看来,这可以忽略
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.