【CPython3.6源码分析】Python 异常控制

前言

本章将通过字节码指令,探究 Python 中try-except等异常控制语句的实现及原理。阅读本章前需了解PyCodeObject/PyFrameObject

开局一张陈儒先生著作《Python 源码剖析》中的附图:
异常机制流程图

1/0

1
2
3
4
5
6
7
8
9
10
1/0

co_consts: (1, 0, None)

1 0 LOAD_CONST 0 (1)
2 LOAD_CONST 1 (0)
4 BINARY_TRUE_DIVIDE
6 POP_TOP
8 LOAD_CONST 2 (None)
10 RETURN_VALUE

其他的先不谈,先来看上面的字节码。4+6 就是一个/运算。很明显,重点落在BINARY_TRUE_DIVIDE中。

BINARY_TRUE_DIVIDE

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#define BINARY_TRUE_DIVIDE       27

TARGET(BINARY_TRUE_DIVIDE) {
/* TOS = TOS1 / TOS */
PyObject *divisor = POP();
PyObject *dividend = TOP();
PyObject *quotient = PyNumber_TrueDivide(dividend, divisor);
Py_DECREF(dividend);
Py_DECREF(divisor);
SET_TOP(quotient);
if (quotient == NULL)
goto error;
DISPATCH();
}

PyNumber_TrueDivide -> binary_op(v, w, NB_SLOT(nb_true_divide), "/")
-> binary_op1(v, w, op_slot)
-> v->ob_type->tp_as_number[op_slot](v, w)
-> long_true_divide(v, w)

1/0最终进入到longobject.c中去执行,源码中有很长一段关于除法运算的注释,有兴趣可以看看。我们这里只关注异常处理的部分。

long_true_divide

1
2
3
4
5
6
7
8
9
10
11
// longobject.c.long_true_divide
if (Py_ABS(Py_SIZE(b)) == 0) {
PyErr_SetString(PyExc_ZeroDivisionError,
"division by zero");
goto error;
}
overflow:
PyErr_SetString(PyExc_OverflowError,
"integer division result too large for a float");
error:
return NULL;

在前面的部分中,其实我们已经看见了 Python 关于故障报错的处理方式,ZeroDivisionError同样是通过这种方式实现。那么,问题来了,这个PyExc_ZeroDivisionError又是个什么东西?

PyErrorObject

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// pyport.h
define PyAPI_DATA(RTYPE) extern RTYPE

// pyerrors.h
PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError;

typedef struct {
PyException_HEAD
} PyBaseExceptionObject;

typedef struct {
PyException_HEAD
PyObject *msg;
PyObject *filename;
PyObject *lineno;
PyObject *offset;
PyObject *text;
PyObject *print_file_and_line;
} PySyntaxErrorObject;

PyAPI_FUNC(void) PyErr_SetString(
PyObject *exception,
const char *string /* decoded from utf-8 */
);

果然,一切皆为对象。long_true_divide 似乎就干了两件事,SetString&Return NULLBINARY_TRUE_DIVIDE先把结果入栈SET_TOP(),判断结果为空,goto error。

goto error

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
error:
why = WHY_EXCEPTION; // !!!
/* Log traceback info. */
PyTraceBack_Here(f);

fast_block_end:
/* Unwind stacks if a (pseudo) exception occurred */
while (why != WHY_NOT && f->f_iblock > 0) {
...
} /* unwind stack */

/* End the loop if we still have an error (or return) */
if (why != WHY_NOT)
break;
} /* main loop */

emmm,代码很长。从上面的轮廓可以看见,在 error 中 TraceBack。在 fast_block_end 中,尝试在栈链上逐级捕捉错误。否则,最终退出循环,结束程序。

PyTraceBack_Here

1
2
3
4
5
6
7
8
9
10
11
12
13
// traceback.c.133
int PyTraceBack_Here(PyFrameObject *frame)
{
PyObject *exc, *val, *tb, *newtb;
/* 先保存,后清空 */
PyErr_Fetch(&exc, &val, &tb);
// 构建新的 traceback,形成链表
newtb = (PyObject *)newtracebackobject((PyTracebackObject *)tb,frame);
// 将新的异常对象存储到线程状态对象中
PyErr_Restore(exc, val, newtb);
Py_XDECREF(tb);
return 0;
}

如上,代码很简单,将 Frame 的错误信息保存起来,构建一个新的 tb,插入到原有的链中。

PyErr_Fetch

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// errors.c.339
void
PyErr_Fetch(PyObject **p_type, PyObject **p_value, PyObject **p_traceback)
{
// 保存线程异常对象,并清空
PyThreadState *tstate = PyThreadState_GET();

*p_type = tstate->curexc_type;
*p_value = tstate->curexc_value;
*p_traceback = tstate->curexc_traceback;

tstate->curexc_type = NULL;
tstate->curexc_value = NULL;
tstate->curexc_traceback = NULL;
}

newtracebackobject

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// traceback.c.111
static PyTracebackObject *
newtracebackobject(PyTracebackObject *next, PyFrameObject *frame)
{
PyTracebackObject *tb;
...
tb = PyObject_GC_New(PyTracebackObject, &PyTraceBack_Type);
if (tb != NULL) {
Py_XINCREF(next);
tb->tb_next = next; // 关键步骤,链表插入
Py_XINCREF(frame);
tb->tb_frame = frame;
tb->tb_lasti = frame->f_lasti;
tb->tb_lineno = PyFrame_GetLineNumber(frame);
PyObject_GC_Track(tb);
}
return tb;
}

try-except

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
try:
1 / 0
except ZeroDivisionError:
pass

co_consts: (1, 0, None)
co_names: ('ZeroDivisionError',)

1 0 SETUP_EXCEPT 12 (to 14)
/* 在FOR控制流中已经提到,等效于 SETUP_LOOP,构建一个 TryBlock
PyFrame_BlockSetup(
PyFrameObject * = f,
int type = opcode = SETUP_EXCEPT,
int handler = INSTR_OFFSET() + oparg = 14,
int level = STACK_LEVEL() = stack_pointer - f->f_valuestack
)
b = &f->f_blockstack[f->f_iblock++]; 对b 进行赋值;
*/

2 2 LOAD_CONST 0 (1)
4 LOAD_CONST 1 (0)
6 BINARY_TRUE_DIVIDE
/* PyErr_SetString(PyExc_ZeroDivisionError);
SET_TOP(NULL);
goto error;
PyTraceBack_Here();
goto fast_block_end;
*/

8 POP_TOP
10 POP_BLOCK
12 JUMP_FORWARD 20 (to 34) // JUMPBY(20)
3 >> 14 DUP_TOP
16 LOAD_NAME 0 (ZeroDivisionError)
18 COMPARE_OP 10 (exception match)
20 POP_JUMP_IF_FALSE 32
22 POP_TOP
24 POP_TOP
26 POP_TOP
4 28 POP_EXCEPT
30 JUMP_FORWARD 2 (to 34)
>> 32 END_FINALLY
>> 34 LOAD_CONST 2 (None)
36 RETURN_VALUE

前面关于goto error的问题,我们先放一放,先看一段字节码指令。一段try-except的代码,编译结果高达30+行,即使是try 1/1结果也是30+行。

前面我们已经知道1/0 对应 6+8。如果顺序执行,字节码12,就直接跳转到字节码34,结束程序,这显然是不可能的。那么必然,在BINARY_TRUE_DIVIDE中会发生些什么。想起来没有?对,goto erorr!接下来就到了fast_block_end

fast_block_end

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
why = WHY_EXCEPTION;

while (why != WHY_NOT && f->f_iblock > 0) {
/* Peek at the current block */
PyTryBlock *b = &f->f_blockstack[f->f_iblock - 1];

/* Now we have to pop the block. */
f->f_iblock--;

if (b->b_type == EXCEPT_HANDLER) { // == 257
UNWIND_EXCEPT_HANDLER(b);
continue;
}
UNWIND_BLOCK(b);
if (b->b_type == SETUP_LOOP && why == WHY_BREAK) {
why = WHY_NOT;
JUMPTO(b->b_handler);
break;
}

if (why == WHY_EXCEPTION && (b->b_type == SETUP_EXCEPT
|| b->b_type == SETUP_FINALLY)) {
// 判断是否在 try-except 块中
PyObject *exc, *val, *tb;
int handler = b->b_handler;

/* 在FOR控制流 SETUP_LOOP 中有提到
从数组中获取一个新的 TryBlock,并放入信息
PyFrame_BlockSetup(
PyFrameObject * = f,
int type = EXCEPT_HANDLER,
int handler = -1,
int level = STACK_LEVEL() = stack_pointer - f->f_valuestack
)
构建一个新的 block 用于处理异常
*/
PyFrame_BlockSetup(f, EXCEPT_HANDLER, -1, STACK_LEVEL());

/* 在 PyTraceBack_Here 中,已经构建了一个新的 tb
此处,将异常信息压栈
*/
PUSH(tstate->exc_traceback);
PUSH(tstate->exc_value);
if (tstate->exc_type != NULL) {
PUSH(tstate->exc_type);
}
else {
Py_INCREF(Py_None);
PUSH(Py_None);
}

/* 先保存,后清空 线程异常对象 */
PyErr_Fetch(&exc, &val, &tb);

/* 详见 errors.c。
递归调用 PyErr_NormalizeExceptionEx()
*/
PyErr_NormalizeException(&exc, &val, &tb);
if (tb != NULL)
PyException_SetTraceback(val, tb);
else
PyException_SetTraceback(val, Py_None);
Py_INCREF(exc);
tstate->exc_type = exc;
Py_INCREF(val);
tstate->exc_value = val;
tstate->exc_traceback = tb;
if (tb == NULL)
tb = Py_None;
Py_INCREF(tb);

/* 压入运行时栈 */
PUSH(tb);
PUSH(val);
PUSH(exc);

/* 结束异常发现阶段,跳转到 handler,进行异常处理 */
why = WHY_NOT;
JUMPTO(handler);
break;
}
if (b->b_type == SETUP_FINALLY) {
if (why & (WHY_RETURN | WHY_CONTINUE))
PUSH(retval);
PUSH(PyLong_FromLong((long)why));
why = WHY_NOT;
JUMPTO(b->b_handler);
break;
}
} /* unwind stack */

程序抛出故障,将沿着 tb 链,逐级寻找一个 try-except最终找到一个except,将跳转到 handler,否则直接退出程序,显示栈中压入的错误信息。而这个hander,是跟字节码SETUP_EXCEPT/SETUP_FINALLY`对应的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
1           0 SETUP_EXCEPT            12 (to 14)
...
3 >> 14 DUP_TOP
16 LOAD_NAME 0 (ZeroDivisionError)
18 COMPARE_OP 10 (exception match)
20 POP_JUMP_IF_FALSE 32
22 POP_TOP
24 POP_TOP
26 POP_TOP
4 28 POP_EXCEPT
30 JUMP_FORWARD 2 (to 34)
>> 32 END_FINALLY
>> 34 LOAD_CONST 2 (None)
36 RETURN_VALUE

在随后的字节码指令中,进行异常比较。若异常匹配,将POP三连,扔掉栈中的异常信息 跳转到正常代码。当异常不匹配时,直接跳转到END_FINALLY

END_FINALLY

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
opcode:: END_FINALLY
Terminates a :keyword:`finally` clause. The interpreter recalls whether the
exception has to be re-raised, or whether the function returns, and continues
with the outer-next block.

PREDICTED(END_FINALLY);
TARGET(END_FINALLY) {
PyObject *status = POP();
if (PyLong_Check(status)) {
... // 处理 with 上下文
}
else if (PyExceptionClass_Check(status)) {
PyObject *exc = POP();
PyObject *tb = POP();
PyErr_Restore(status, exc, tb);
why = WHY_EXCEPTION;
goto fast_block_end;
}
else if (status != Py_None) {
...
}
Py_DECREF(status);
DISPATCH();
}

异常不匹配,意味着异常并未被成功捕捉,需要再次进行抛出。END_FINALLY就是干这个的。如上,在判断语句中,能够清除的发现,将异常对象从栈中取出,重新放回线程状态对象中。再次设置 why 的状态,Python 虚拟机重新进入异常发生状态