【CPython3.6源码分析】Python 环境初始化

参考

前言

一步步走来,我们已经了解了 Python对象机制,字节码执行机制,函数调用,类实例创建机制。这些内容都是在 Python 环境已经初始化完成后,才进行的工作。那么是时候看看,运行环境初始化,都干了些什么。

main

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
// python.c.18
int
main(int argc, char **argv){
...
res = Py_Main(argc, argv_copy);
...
return res
};

// Modules/main.c.348
int
Py_Main(int argc, wchar_t **argv){
...
Py_Initialize();
...
if (command) {
sts = run_command(command, &cf);
PyMem_RawFree(command);
} else if (module) {
sts = (RunModule(module, 1) != 0);
}
else {
...
/* 处理 fp、filename */
sts = run_file(fp, filename, &cf);
}
...
return sts /* 0 正常退出,1 异常退出,2 参数列表不正确 */
}

可以看到,跟普通的 C 程序一样,通过入口 main 进入到 Py_Main,在调用Py_Initialize进行初始化,然后根据参数列表进行分发。

Py_Initialize

1
2
3
4
5
6
7
8
void Py_Initialize()

Initialize the Python interpreter. In an application embedding Python, this should be called before using any other Python/C API functions

This initializes the table of loaded modules (sys.modules),
and creates the fundamental modules builtins, __main__ and sys.
It also initializes the module search path (sys.path).
It does not set sys.argv; use PySys_SetArgvEx() for that.

在 Python 初始化以前,只能调用少量的 C API。调用 Py_Initialize() 将完成对环境的初始化。其中很重要的是创建 models builtins, __main__ and sys 等内置 modules。PyInitialize最终调用_Py_InitializeEx_Private

_PyRandom_Init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// Py_Initialize -> Py_InitializeEx(1) -> _Py_InitializeEx_Private(1, 1)

void
_Py_InitializeEx_Private(int install_sigs, int install_importlib)
{
PyInterpreterState *interp;
PyThreadState *tstate;
PyObject *bimod, *sysmod, *pstderr;
char *p;
extern void _Py_ReadyTypes(void);

if (initialized)
return;
initialized = 1;
_Py_Finalizing = NULL;

/*
Hash randomization is enabled. Generate a per-process secret,
using PYTHONHASHSEED if provided.
*/
_PyRandom_Init();

嘿,朋友,你知道 hash(1) != hash(1) 吗?

InterpreterState & ThreadState

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/* 模拟进程状态 PyInterpreterState 见 pystate.h.26

interp->next = interp_head;
interp->tstate_head = NULL;

interp->eval_frame = _PyEval_EvalFrameDefault;
interp_head = interp;
*/
interp = PyInterpreterState_New();
/* 模拟线程状态
tstate->interp = interp;

tstate->prev = NULL;
tstate->next = interp->tstate_head;
if (tstate->next)
tstate->next->prev = tstate;
interp->tstate_head = tstate;
*/
tstate = PyThreadState_New(interp);

/* 宏:SET_TSTATE() 设置全局变量 _PyThreadState_Current */
(void) PyThreadState_Swap(tstate);

此处,来张图最好理解,图片来自《Python 源码剖析》P318。
InterpreterState 与 ThreadState联系

ReayType & Init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* 调用 PyType_Ready,对 Py*_Type 进行初始化 */
_Py_ReadyTypes();

if (!_PyFrame_Init())
Py_FatalError("Py_Initialize: can't init frames");
/* 嘿,还记得 small_ints 小整数对象池吗? */
if (!_PyLong_Init())
Py_FatalError("Py_Initialize: can't init longs");

if (!PyByteArray_Init())
Py_FatalError("Py_Initialize: can't init bytearray");
/* 判断机器是否使用 IEEE 浮点数格式 */
if (!_PyFloat_Init())
Py_FatalError("Py_Initialize: can't init float");

/* Init Unicode implementation; relies on the codec registry */
if (_PyUnicode_Init() < 0)
Py_FatalError("Py_Initialize: can't initialize unicode");
if (_PyStructSequence_Init() < 0)
Py_FatalError("Py_Initialize: can't initialize structseq");

/* initialize builtin exceptions */
_PyExc_Init(bimod);

builtins

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
    interp->modules = PyDict_New();
bimod = _PyBuiltin_Init();
_PyImport_FixupBuiltin(bimod, "builtins");
interp->builtins = PyModule_GetDict(bimod);

PyObject *
_PyBuiltin_Init(void)
{
mod = PyModule_Create(&builtinsmodule);
dict = PyModule_GetDict(mod); // d = ((PyModuleObject *)m) -> md_dict;
#define SETBUILTIN(NAME, OBJECT) \
if (PyDict_SetItemString(dict, NAME, (PyObject *)OBJECT) < 0) \
return NULL;

SETBUILTIN("None", Py_None);
SETBUILTIN("type", &PyType_Type);
SETBUILTIN("object", &PyBaseObject_Type);
...
return mod;
}

static struct PyModuleDef builtinsmodule = {
PyModuleDef_HEAD_INIT,
"builtins",
builtin_doc,
...
};

如上,创建我们已经熟悉的builtins,并且在其__dict__中放入我们熟悉的type/object等。
然后再将解释器interp->builtins直接指向该__dict__

同时在_PyImport_FixupBuiltin中,调用_PyImport_FixupExtensionObject,完成extensions的初始化。

sysmod

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
    sysmod = _PySys_Init();
interp->sysdict = PyModule_GetDict(sysmod);
_PyImport_FixupBuiltin(sysmod, "sys");
PySys_SetPath(Py_GetPath());
PyDict_SetItemString(interp->sysdict, "modules",
interp->modules);

PyObject *
_PySys_Init(void){
m = PyModule_Create(&sysmodule);
sysdict = PyModule_GetDict(m);
SET_SYS_FROM_STRING("hash_info",
get_hash_info());
...
return m;
}

对 sys module 进行同样的操作。此时的内存布局如下图,同样来自《Python 源码剖析》P327。
完成 sys module 创建后的内存布局

stderr

1
2
3
4
5
6
7
8
/* Set up a preliminary stderr printer until we have enough
infrastructure for the io module in place. */
pstderr = PyFile_NewStdPrinter(fileno(stderr));
if (pstderr == NULL)
Py_FatalError("Py_Initialize: can't set preliminary stderr");
_PySys_SetObjectId(&PyId_stderr, pstderr);
PySys_SetObject("__stderr__", pstderr);
Py_DECREF(pstderr);

恩,这就是sys.__stderr__的由来。

再次 Init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
    /* interp->builtins_copy = PyDict_Copy(interp->builtins); */
_PyImport_Init();

/* 设置 sys.meta_path sys.path_importer_cache sys.path_hooks */
_PyImportHooks_Init();

/* Initialize _warnings. */
_PyWarnings_Init();

if (!install_importlib)
return;

if (_PyTime_Init() < 0)
Py_FatalError("Py_Initialize: can't initialize time");

import_init(interp, sysmod);

/* initialize the faulthandler module */
if (_PyFaulthandler_Init())
Py_FatalError("Py_Initialize: can't initialize faulthandler");

if (initfsencoding(interp) < 0)
Py_FatalError("Py_Initialize: unable to load the file system codec");

if (install_sigs)
initsigs(); /* Signal handling stuff, including initintr() */

if (_PyTraceMalloc_Init() < 0)
Py_FatalError("Py_Initialize: can't initialize tracemalloc");

/* Initialize warnings. */
if (PySys_HasWarnOptions()) {
PyObject *warnings_module = PyImport_ImportModule("warnings");
if (warnings_module == NULL) {
fprintf(stderr, "'import warnings' failed; traceback:\n");
PyErr_Print();
}
Py_XDECREF(warnings_module);
}
}

main

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
    initmain(interp); /* Module __main__ */
if (initstdio() < 0)
// 设置 sys.__stdin__、__stdout__、__stderr__
Py_FatalError(
"Py_Initialize: can't initialize sys standard streams");

static void
initmain(PyInterpreterState *interp)
{
PyObject *m, *d, *loader, *ann_dict;
m = PyImport_AddModule("__main__");
d = PyModule_GetDict(m);
ann_dict = PyDict_New();
PyDict_SetItemString(d, "__annotations__", ann_dict);
PyObject *bimod = PyImport_ImportModule("builtins");
PyDict_SetItemString(d, "__builtins__", bimod);
PyObject *loader = PyObject_GetAttrString(interp->importlib,
"BuiltinImporter");
PyDict_SetItemString(d, "__loader__", loader);
}

PyObject *
PyImport_AddModuleObject(PyObject *name){
// 获取到 PyThreadState_GET()->interp->modules
PyObject *modules = PyImport_GetModuleDict();

// 创建一个新的 PyModuleObject,m->md_name = str: __main__
m = PyModule_NewObject(name);

// interp->modules[str: __main__] = model object: __main__
PyDict_SetItem(modules, name, m);
return m;
}

创建了一个名字为__main__的module,并且在其__dict__中放入了一些属性。

initsite

1
2
3
4
5
6
7
8
9
10
11
    if (!Py_NoSiteFlag)
initsite(); /* Module site */

static void
initsite(void)
{
PyObject *m;
m = PyImport_ImportModule("site");
...
}
// PyImport_ImportModule -> PyImport_Import

在前面 sysmod 中,有一个步骤是PySys_SetPath(Py_GetPath());,它完成了 Python 模块加载路径的设置。但完成第三方库搜索路径设置,还是在PyImport_Import中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
PyObject *
PyImport_Import(PyObject *module_name)
{
/* Initialize constant string objects */
import_str = PyUnicode_InternFromString("__import__");
builtins_str = PyUnicode_InternFromString("__builtins__");
silly_list = PyList_New(0);

/* Get the builtins from current globals */
builtins = PyImport_ImportModuleLevel("builtins",
NULL, NULL, NULL, 0);
globals = Py_BuildValue("{OO}", builtins_str, builtins);

import = PyObject_GetItem(builtins, import_str);

r = PyObject_CallFunction(import, "OOOOi", module_name, globals,
globals, silly_list, 0, NULL);
return r;
}

在此处导入了模块 site.py 。好高兴,终于看见一个 py 文件了~~

1
2
3
4
5
6
7
8
9
10
11
// site.py
"""
Append module search paths for third-party packages to sys.path.
* This module is automatically imported during initialization. *

On Unix (including Mac OSX), it starts with sys.prefix and sys.exec_prefix
(if different) and appends lib/python<version>/site-packages.

On other platforms (such as Windows), it tries each of the
prefixes directly, as well as with lib/site-packages appended.
"""

从注释中能发现,根据不同平台,把不同的路径加入到 sys.path 中。具体内容可以参考源文件。

小结

_Py_InitializeEx_Private经过漫长的步骤后,终于完成了初始化工作,跳转到 main 继续运行。看图:
完成初始化后的环境

run

1
2
3
4
5
6
7
8
run_command(command, &cf);  -> PyRun_SimpleStringFlags -> PyRun_StringFlags
-> run_mod -> PyEval_EvalCode -> PyEval_EvalCodeEx -> 执行字节码

run_file(fp, filename, &cf); -> PyRun_AnyFileExFlags -> PyRun_InteractiveLoopFlags
-> PyRun_InteractiveOneObjectEx -> 进入交互式环境 -> run_mod

run_file(fp, filename, &cf); -> PyRun_AnyFileExFlags -> PyRun_SimpleFileExFlags
-> PyRun_FileExFlags -> run_mod -> PyEval_EvalCode -> 执行字节码

Python 环境初始化完成后,根据参数的不同,进入不同的模式。从上面的调用链可以发现,最后必然都殊途同归,进入到字节码的执行环节,区别就在与字节码的获取方式不一样。