Micropython的编译过程是如何实现的呢

4、编译

将Micropython程序编译后并不是生成CPU可以直接执行的二进制指令，而是生成了一种平台无关格式的中间代码，称为bytecode。
Micropython的编译过程由mp_compile函数
（py/mp_compile.c）实现。

/* py/compile.c */
mp_obj_t mp_compile(mp_parse_tree_t *parse_tree, qstr source_file, bool is_repl) {
mp_raw_code_t *rc = mp_compile_to_raw_code(parse_tree, source_file, is_repl);
// return function that executes the outer module
return mp_make_function_from_raw_code(rc, MP_OBJ_NULL, MP_OBJ_NULL);
}
mp_compile_to_raw_code以解析阶段生成的解析树为输入，生成bytecode：

/* py/compile.c */
mp_raw_code_t *mp_compile_to_raw_code(mp_parse_tree_t *parse_tree, qstr source_file, bool is_repl) {
// put compiler state on the stack, it's relatively small
compiler_t comp_state = {0};
...
scope_t *module_scope = scope_new_and_link(comp, SCOPE_MODULE, parse_tree->root, emit_opt);
// create standard emitter; it's used at least for MP_PASS_SCOPE
emit_t *emit_bc = emit_bc_new();
// compile pass 1
comp->emit = emit_bc;
...
for (scope_t *s = comp->scope_head; s != NULL && comp->compile_error == MP_OBJ_NULL; s = s->next) {
...
compile_scope(comp, s, MP_PASS_SCOPE);
...
}
...
// compile pass 2 and 3
...
for (scope_t *s = comp->scope_head; s != NULL && comp->compile_error == MP_OBJ_NULL; s = s->next) {
...
{
...
// need a pass to compute stack size
compile_scope(comp, s, MP_PASS_STACK_SIZE);
// second last pass: compute code size
if (comp->compile_error == MP_OBJ_NULL) {
compile_scope(comp, s, MP_PASS_CODE_SIZE);
}
// final pass: emit code
if (comp->compile_error == MP_OBJ_NULL) {
compile_scope(comp, s, MP_PASS_EMIT);
}
}
}
...
// free the emitters
emit_bc_free(emit_bc);
...
// free the parse tree
mp_parse_tree_clear(parse_tree);
// free the scopes
mp_raw_code_t *outer_raw_code = module_scope->raw_code;
for (scope_t *s = module_scope; s;) {
scope_t *next = s->next;
scope_free(s);
s = next;
}
if (comp->compile_error != MP_OBJ_NULL) {
nlr_raise(comp->compile_error);
} else {
return outer_raw_code;
}
}

整个编译过程分为4轮：

MP_PASS_SCOPE - 确定标识符ID和种类以及标号数
MP_PASS_STACK_SIZE - 确定需要的最大栈大小
MP_PASS_CODE_SIZE - 确定bytecode的大小
MP_PASS_EMIT - 生成bytecode

至于为什么要分成4轮进行，没有深究。有兴趣的话可以研究一下编译原理。

mp_compile_to_raw_code函数的返回值是一个mp_raw_code_t结构体指针：

/* py/emitglue.h */
typedef struct _mp_raw_code_t {
mp_uint_t kind : 3; // of type mp_raw_code_kind_t
mp_uint_t scope_flags : 7;
mp_uint_t n_pos_args : 11;
const void *fun_data;
const mp_uint_t *const_table;
#if MICROPY_PERSISTENT_CODE_SAVE
size_t fun_data_len;
uint16_t n_obj;
uint16_t n_raw_code;
#if MICROPY_PY_SYS_SETTRACE
mp_bytecode_prelude_t prelude;
// line_of_definition is a Python source line where the raw_code was
// created e.g. MP_BC_MAKE_FUNCTION. This is different from lineno info
// stored in prelude, which provides line number for first statement of
// a function. Required to properly implement "call" trace event.
mp_uint_t line_of_definition;
#endif
#if MICROPY_EMIT_MACHINE_CODE
uint16_t prelude_offset;
uint16_t n_qstr;
mp_qstr_link_entry_t *qstr_link;
#endif
#endif
#if MICROPY_EMIT_MACHINE_CODE
mp_uint_t type_sig; // for viper, compressed as 2-bit types; ret is MSB, then arg0, arg1, etc
#endif
} mp_raw_code_t;

其中：

kind为MP_CODE_BYTECODE
fun_data为生成的bytecode相关信息，包括了与源代码相关的信息，比如源文件名、跳行信息等，还有实际的bytecode
仍然以我们的lcd.py为例子，对应的fun_data内容如下：

源代码相关信息

PRELUDE_sig 1
PRELUDE_size 1
qstr_simple_name 2
qstr source_file 2
8 | (2 << 5) 1 // bytes_to_skip | (lines_to_skip << 5)
9 | (1 << 5) 1 // bytes_to_skip | (lines_to_skip << 5)
0 1 // end of line number info
bytecode
MP_BC_LOAD_CONST_SMALL_INT_MULTI + MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS 1
MP_BC_LOAD_CONST_FALSE + (MP_TOKEN_KW_NONE - MP_TOKEN_KW_FALSE) 1
MP_BC_IMPORT_NAME qstr_lcd[0:7] qstr_lcd[8:15] 3
MP_BC_STORE_NAME qstr_lcd[0:7] qstr_lcd[8:15] 3
MP_BC_LOAD_NAME qstr_lcd[0:7] qstr_lcd[8:15] 3
MP_BC_LOAD_METHOD qstr_init[0:7] qstr_init[8:15] 3
MP_BC_CALL_METHOD 1
0 1 // (n_keyword << 8) | n_positional
MP_BC_POP_TOP 1
MP_BC_LOAD_NAME qstr_print[0:7] qstr_print[8:15] 3
MP_BC_LOAD_CONST_STRING qstr_hello[0:7] qstr_hello[8:15] 3
MP_BC_CALL_FUNCTION 1
1 1
MP_BC_POP_TOP 1
MP_BC_LOAD_CONST_FALSE + (MP_TOKEN_KW_NONE - MP_TOKEN_KW_FALSE) 1
MP_BC_RETURN_VALUE 1
其实bytecode也比较容易理解，比如MP_BC_IMPORT_NAME qstr_lcd[0:7] qstr_lcd[8:15]一行对应的就是源代码中的import lcd。

5、执行

5.1 mp_type_fun_bc类型对象

在mp_compile中通过mp_compile_to_raw_code生成了bytecode，然后调用mp_make_function_from_raw_code以bytecode为输入构造了一个mp_type_fun_bc类型的对象。

/* py/emitglue.c */
mp_obj_t mp_make_function_from_raw_code(const mp_raw_code_t *rc, mp_obj_t def_args, mp_obj_t def_kw_args) {
...
// make the function, depending on the raw code kind
mp_obj_t fun;
switch (rc->kind) {
...
default:
// rc->kind should always be set and BYTECODE is the only remaining case
assert(rc->kind == MP_CODE_BYTECODE);
fun = mp_obj_new_fun_bc(def_args, def_kw_args, rc->fun_data, rc->const_table);
// check for generator functions and if so change the type of the object
if ((rc->scope_flags & MP_SCOPE_FLAG_GENERATOR) != 0) {
((mp_obj_base_t *)MP_OBJ_TO_PTR(fun))->type = &mp_type_gen_wrap;
}
#if MICROPY_PY_SYS_SETTRACE
mp_obj_fun_bc_t *self_fun = (mp_obj_fun_bc_t *)MP_OBJ_TO_PTR(fun);
self_fun->rc = rc;
#endif
break;
}
return fun;
}
mp_make_function_from_raw_code调用mp_obj_new_fun_bc:

/* py/objfun.c */
mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args_in, mp_obj_t def_kw_args, const byte *code, const mp_uint_t *const_table) {
size_t n_def_args = 0;
size_t n_extra_args = 0;
mp_obj_tuple_t *def_args = MP_OBJ_TO_PTR(def_args_in);
if (def_args_in != MP_OBJ_NULL) {
assert(mp_obj_is_type(def_args_in, &mp_type_tuple));
n_def_args = def_args->len;
n_extra_args = def_args->len;
}
if (def_kw_args != MP_OBJ_NULL) {
n_extra_args += 1;
}
mp_obj_fun_bc_t *o = m_new_obj_var(mp_obj_fun_bc_t, mp_obj_t, n_extra_args);
o->base.type = &mp_type_fun_bc;
o->globals = mp_globals_get();
o->bytecode = code;
o->const_table = const_table;
if (def_args != NULL) {
memcpy(o->extra_args, def_args->items, n_def_args * sizeof(mp_obj_t));
}
if (def_kw_args != MP_OBJ_NULL) {
o->extra_args[n_def_args] = def_kw_args;
}
return MP_OBJ_FROM_PTR(o);
}

构造了一个mp_obj_fun_bc_t结构体，设置类型为mp_type_fun_bc，并设置了bytecode、const_table等。

5.2 执行过程

执行过程的入口为mp_call_function_0函数，参数是上面构造的mp_obj_fun_bc_t结构体。

/* py/runtime.c */
mp_obj_t mp_call_function_0(mp_obj_t fun) {
return mp_call_function_n_kw(fun, 0, 0, NULL);
}
// args contains, eg: arg0 arg1 key0 value0 key1 value1
mp_obj_t mp_call_function_n_kw(mp_obj_t fun_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
// TODO improve this: fun object can specify its type and we parse here the arguments,
// passing to the function arrays of fixed and keyword arguments
DEBUG_OP_printf("calling function %p(n_args=" UINT_FMT ", n_kw=" UINT_FMT ", args=%p)\n", fun_in, n_args, n_kw, args);
// get the type
const mp_obj_type_t *type = mp_obj_get_type(fun_in);
// do the call
if (type->call != NULL) {
return type->call(fun_in, n_args, n_kw, args);
}
#if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE
mp_raise_TypeError(MP_ERROR_TEXT("object not callable"));
#else
mp_raise_msg_varg(&mp_type_TypeError,
MP_ERROR_TEXT("'%s' object isn't callable"), mp_obj_get_type_str(fun_in));
#endif
}
mp_call_function_0->mp_call_function_n_kw->type->call，这里的type是什么？是获取的fun_in的类型，也就是mp_obj_fun_bc_t的类型。还记得吗，构造这个结构体时给其设定的类型为mp_type_fun_bc。因此这里的type->call就是调用mp_type_fun_bc类型的call方法。
mp_type_func_bc类型在py/objfun.c中定义：

/* py/objfun.c */
const mp_obj_type_t mp_type_fun_bc = {
{ &mp_type_type },
.flags = MP_TYPE_FLAG_BINDS_SELF,
.name = MP_QSTR_function,
#if MICROPY_CPYTHON_COMPAT
.print = fun_bc_print,
#endif
.call = fun_bc_call,
.unary_op = mp_generic_unary_op,
#if MICROPY_PY_FUNCTION_ATTRS
.attr = mp_obj_fun_bc_attr,
#endif
};

可见该类型的call方法就是fun_bc_call，它再调用mp_execute_bytecode函数来执行bytecode。

bytecode是平台无关的中间代码格式，它最终在mp_execute_bytecode函数中被虚拟机解释执行。

/* py/vm.c */
mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp_obj_t inject_exc) {
...
// outer exception handling loop
for (;;) {
nlr_buf_t nlr;
outer_dispatch_loop:
if (nlr_push(&nlr) == 0) {
...
// loop to execute byte code
for (;;) {
dispatch_loop:
#if MICROPY_OPT_COMPUTED_GOTO
DISPATCH();
#else
TRACE(ip);
MARK_EXC_IP_GLOBAL();
TRACE_TICK(ip, sp, false);
switch (*ip++) {
#endif
ENTRY(MP_BC_LOAD_CONST_FALSE):
PUSH(mp_const_false);
DISPATCH();
ENTRY(MP_BC_LOAD_CONST_NONE):
PUSH(mp_const_none);
DISPATCH();
ENTRY(MP_BC_LOAD_CONST_TRUE):
PUSH(mp_const_true);
DISPATCH();
...
ENTRY_DEFAULT:
...
}
}
}
}
}

在一个for循环内，逐步读取bytecode的内容，通过switch语句判断读取到的bytecode内容并执行相应的case分支。

以源代码lcd.py生成的bytecode中的MP_BC_IMPORT_NAME qstr_lcd[0:7] qstr_lcd[8:15]为例，当switch语句读取到MP_BC_IMPORT_NAME时就会执行如下分支：

ENTRY(MP_BC_IMPORT_NAME): {
                FRAME_UPDATE();
                MARK_EXC_IP_SELECTIVE();
                DECODE_QSTR;
                mp_obj_t obj = POP();
                SET_TOP(mp_import_name(qst, obj, TOP()));
                DISPATCH();
            }

在这个分支中会调用mp_import_name函数来完成最终的操作。

原作者：Remember

更多回帖

王芳

Micropython的编译过程是如何实现的呢

相关帖子

小白也能快速学会的Micropython编译指南

记录下为自己的板子编译MicroPython的过程

MicroPython TPYBoard v201 简单的web服务器实现过程

编译esp32s2的micropython固件（一）

MicroPython TPYBoard v201 简单的web服务器实现过程

怎样去编译micropython的源码并对其进行测试呢

C语言的编译过程

FPGA的编译过程讨论

C语言编译过程

编译UCOSII的源码过程

20万+工程师都在用，免费PCB检查工具