[swarthmore cs75] Compiler 2 – Boa
课程回顾
Swarthmore学院16年开的编译系统课,总共10次大作业。本随笔记录了相关的课堂笔记以及第4次大作业。
- A-Normal Form
在80年代,函数式语言编译器主要使用Continuation-passing style(CPS)作为中间代码表示形式。 1992年Sabry和Felleisen引入了另一种和CPS一样简单的表示形式:A-normal form(ANF),并且证明了:使用ANF作为中间代码表示形式能够和使用CPS一样容易生成汇编代码并进行代码优化。
Why:为什么要转化为ANF的形式?从下面的例子可以看到,在计算第二个表达式的时候,必须首先把(2 - 3)的计算结果存在某个地方。难道需要另外的逻辑,把中间结果存储到esp中?但是这样做的话扩展性就会很差,这样就有了ANF表示形式(let..in...的编译过程会对变量进行处理)。
源码 x86汇编 ANF形式(参考:实现一) 简化的ANF(参考:实现二) (5 + 4) + (3 + 2) mov EAX, 5
add EAX, 4
add EAX, 3
add EAX, 2let v1 = 5 + 4 in
let v2 = 3 + 2 in
let v3 = v1 + v2 in
v3let v1 = 5 + 4 in
let v2 = 3 + 2 in
v1 + v2(2 - 3) + (4 * 5) mov EAX, 2
sub EAX, 3
?????let v1 = 2 - 3 in
let v2 = 4 * 5 in
let v3 = v1 + v2 in
v3let v1 = 2 - 3 in
let v2 = 4 * 5 in
v1 + v2How:如何将一个算数表达式转换为ANF表达式?下面提供了两种实现:
Intput Languagetype expr =
| Num of int
| Id of string
| Plus of expr * expr
Restricted Language
type immexpr =
| ImmNum of int
| ImmId of string type cexpr =
| CPlus of immexpr * immexpr
| CImmExpr of immexpr type aexpr =
| ALet of string * cexpr * aexpr
| ACExpr of cexpr
实现一:
let rec anf (e : expr) (expr_with_hole : (immexpr -> aexpr)) =
match e with
| Num(n) -> (expr_with_hole (ImmNum(n)))
| Id(x) -> (expr_with_hole (ImmId(x)))
| Plus(left, right) ->
let varname = gen_temp "v" in
anf left (fun limm ->
anf right (fun rimm ->
ALet(varname, CPlus(limm, rimm),
(expr_with_hole (ImmId(varname))))))输入:
anf (Plus(Plus(Num(5), Num(4)), Plus(Num(3), Num(2)))) (fun imm -> ACExpr(CImmExpr(imm)))
......
=> anf (Plus(Plus(Num(5), Num(4)), Plus(Num(3), Num(2)))) (fun imm -> ACExpr(CImmExpr(imm))) => anf Plus(Num(5), Num(4)) (fun limm ->
anf Plus(Num(3), Num(2)) (fun rimm ->
ALet("v1", CPlus(limm, rimm),
((fun imm -> ACExpr(CImmExpr(imm))) (ImmId("v1")))))) => anf Num(5) (fun limm ->
anf Num(4) (fun rimm ->
ALet("v2", CPlus(limm, rimm), ((fun limm ->
anf Plus(Num(3), Num(2)) (fun rimm ->
ALet("v1", CPlus(limm, rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1")))))) (ImmId("v2")))))) => anf Num(4) (fun rimm ->
ALet("v2", CPlus(ImmNum(5), rimm), ((fun limm ->
anf Plus(Num(3), Num(2)) (fun rimm ->
ALet("v1", CPlus(limm, rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1")))))) (ImmId("v2"))))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)), ((fun limm ->
anf Plus(Num(3), Num(2)) (fun rimm ->
ALet("v1", CPlus(limm, rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1")))))) (ImmId("v2")))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)), (
anf Plus(Num(3), Num(2)) (fun rimm ->
ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1"))))))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
anf Num(3) (fun limm ->
anf Num(2) (fun rimm ->
ALet("v3", CPlus(limm, rimm),
((fun rimm ->
ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3"))))))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
anf Num(3) (fun limm ->
anf Num(2) (fun rimm ->
ALet("v3", CPlus(ImmNum(3), rimm)),
((fun rimm ->
ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3")))))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
anf Num(2) (fun rimm ->
ALet("v3", CPlus(ImmNum(3), rimm)),
((fun rimm ->
ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3"))))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
((fun rimm ->
ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3")))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
((fun rimm ->
ALet("v1", CPlus(ImmId("v2"), rimm), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1"))))) (ImmId("v3")))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
ALet("v1", CPlus(ImmId("v2"), ImmId("v3")), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1"))))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
ALet("v1", CPlus(ImmId("v2"), ImmId("v3")), ((fun imm ->
ACExpr(CImmExpr(imm))) (ImmId("v1"))))) => ALet("v2", CPlus(ImmNum(5), ImmNum(4)),
ALet("v3", CPlus(ImmNum(3), ImmNum(2))),
ALet("v1", CPlus(ImmId("v2"), ImmId("v3")),
ACExpr(CImmExpr(ImmId("v1")))))
输出:
ALet ("v2", CPlus (ImmNum(5), ImmNum(4)),
ALet ("v3", CPlus (ImmNum(3), ImmNum(2)),
ALet ("v1", CPlus (ImmId("v2"), ImmId("v3")), ACExpr (CImmExpr (ImmId ("v1"))))))实现二:
let rec anf_c (e : expr) (expr_with_c_hole : cexpr -> aexpr) : aexpr =
match e with
| Num(n) -> expr_with_c_hole (CImmExpr(ImmNum(n)))
| Id(x) -> expr_with_c_hole (CImmExpr(ImmId(x)))
| Plus(left, right) ->
anf_imm left (fun limm ->
anf_imm right (fun rimm ->
(expr_with_c_hole (CPlus(limm, rimm))))) and anf_imm (e : expr) (expr_with_imm_hole : immexpr -> aexpr) : aexpr =
match e with
| Num(n) -> (expr_with_imm_hole (ImmNum(n)))
| Id(x) -> (expr_with_imm_hole (ImmId(x)))
| Plus(left, right) ->
let varname = gen_temp "v" in
anf_imm left (fun limm ->
anf_imm right (fun rimm ->
ALet(varname, CPlus(limm, rimm),
(expr_with_imm_hole (ImmId(varname))))))
输入:
anf_c (Plus(Plus(Num(5), Num(4)), Plus(Num(3), Num(2)))) (fun c -> ACExpr(c))
......
anf_c (Plus(Plus(Num(5), Num(4)), Plus(Num(3), Num(2)))) (fun c -> ACExpr(c)) => anf_imm Plus(Num(5), Num(4)) (fun limm ->
anf_imm Plus(Num(3), Num(2)) (fun rimm ->
((fun c -> ACExpr(c)) (CPlus(limm, rimm))))) => anf_imm Num(5) (fun limm ->
anf_imm Num(4) (fun rimm ->
ALet("v1", CPlus(limm, rimm), ((fun limm ->
anf_imm Plus(Num(3), Num(2)) (fun rimm ->
((fun c -> ACExpr(c)) (CPlus(limm, rimm))))) (ImmId("v1")))))) => anf_imm Num(4) (fun rimm ->
ALet("v1", CPlus(ImmNum(5), rimm), ((fun limm ->
anf_imm Plus(Num(3), Num(2)) (fun rimm ->
((fun c -> ACExpr(c)) (CPlus(limm, rimm))))) (ImmId("v1"))))) => ALet("v1", CPlus(ImmNum(5), Num(4)), ((fun limm ->
anf_imm Plus(Num(3), Num(2)) (fun rimm ->
((fun c -> ACExpr(c)) (CPlus(limm, rimm))))) (ImmId("v1")))) => ALet("v1", CPlus(ImmNum(5), Num(4)),
anf_imm Plus(Num(3), Num(2)) (fun rimm ->
((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), rimm))))) => ALet("v1", CPlus(ImmNum(5), Num(4)),
anf_imm Num(3) (fun limm ->
anf_imm Num(2) (fun rimm ->
ALet("v2", CPlus(limm, rimm), ((fun rimm ->
((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), rimm)))) (ImmId("v2"))))))) => ALet("v1", CPlus(ImmNum(5), Num(4)),
anf_imm Num(2) (fun rimm ->
ALet("v2", CPlus(ImmNum(3), rimm), ((fun rimm ->
((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), rimm)))) (ImmId("v2")))))) => ALet("v1", CPlus(ImmNum(5), Num(4)),
ALet("v2", CPlus(ImmNum(3), ImmNum(2)), ((fun rimm ->
((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), rimm)))) (ImmId("v2"))))) => ALet("v1", CPlus(ImmNum(5), Num(4)),
ALet("v2", CPlus(ImmNum(3), ImmNum(2)),
((fun c -> ACExpr(c)) (CPlus(ImmId("v1"), ImmId("v2")))))) => ALet("v1", CPlus(ImmNum(5), Num(4)),
ALet("v2", CPlus(ImmNum(3), ImmNum(2)),
ACExpr(CPlus(ImmId("v1"), ImmId("v2")))))
输出:
> ALet ("v1", CPlus (ImmNum(5), ImmNum(4)),
ALet ("v2", CPlus (ImmNum(3), ImmNum(2)),
ACExpr (CPlus (ImmId("v1"), ImmId("v2")))))
编程作业
本次大作业是为Boa编程语言实现一个小型编译器,其编译过程为:boa源代码 -> expr(user-facing) -> aexpr(compiler-facing) -> instruction list(x86_32汇编)。
具体语法
boa源代码<expr> :=
| let <bindings> in <expr>
| if <expr>: <expr> else: <expr>
| <binop-expr> <binop-expr> :=
| <number>
| <identifier>
| add1(<expr>)
| sub1(<expr>)
| <expr> + <expr>
| <expr> - <expr>
| <expr> * <expr>
| ( <expr> ) <bindings> :=
| <identifier> = <expr>
| <identifier> = <expr>, <bindings>
抽象语法
expr(user-facing)type prim1 =
| Add1
| Sub1 type prim2 =
| Plus
| Minus
| Times type expr =
| ELet of (string * expr) list * expr
| EPrim1 of prim1 * expr
| EPrim2 of prim2 * expr * expr
| EIf of expr * expr * expr
| ENumber of int
| EId of string
aexpr(compiler-facing)
type immexpr =
| ImmNumber of int
| ImmId of string and cexpr =
| CPrim1 of prim1 * immexpr
| CPrim2 of prim2 * immexpr * immexpr
| CIf of immexpr * aexpr * aexpr
| CImmExpr of immexpr and aexpr =
| ALet of string * cexpr * aexpr
| ACExpr of cexpr
* 程序例子(每行分别表示boa/expr/aexpr或pretty-print)
+ 例1:
```text
# 输出 41
41
ENumber(41)
ACExpr(CImmExpr(ImmNumber(41)))
```
+ 例2:
```text
# 输出4
sub1(5)
EPrim1(Sub1, ENum(5))
ALet("temp_unary_1", CPrim1(Sub1, ImmNumber(55)),
ACExpr(CImmExpr(ImmId("temp_unary_1"))))
```
+ 例3:
```text
# 输出8
if 5 - 5: 6 else: 8
EIf(EPrim2(Minus, ENumber(5), ENumber(5)), ENumber(6), ENumber(8))
ALet("temp_binary_2", CPrim2(Minus, CImmExpr(ImmNumber(5)), CImmExpr(ImmNumber(5))),
ALet("temp_if_1", CIf(ImmId("temp_binary_2"), ACExpr(CImmExpr(ImmNumber(6))), ACExpr(CImmExpr(ImmNumber(8)))),
ACExpr(CImmExpr(ImmId("temp_if_1")))))
```
+ 例4:
```text
# 输出14
(5 + 4) + (3 + 2)
EPrim2(Plus, EPrim2(Plus, ENumber(5), ENumber(4)), EPrim2(Plus, ENumber(3), ENumber(2)))
ALet("temp_binary_2", CPrim2(Plus, ImmNumber(5), ImmNumber(4)),
ALet("temp_binary_3", CPrim2(Plus, ImmNumber(3), ImmNumber(2)),
ALet("temp_binary_1", CPrim2(Plus, ImmId("temp_binary_2"), ImmId("temp_binary_3")), ACExpr(CImmExpr(ImmId("temp_binary_1"))))))
```
+ 例5:
```text
# 输出10
let x = (let y=10 in y), z=9 in x
ELet([("x", ELet([("y", ENumber(10))], EId("y"))); ("z", ENumber(9));], EId("x"))
ALet("y", CImmExpr(ImmNumber(10)),
ALet("x", CImmExpr(ImmId("y")),
ALet("z", CImmExpr(ImmNumber(9)), ACExpr(CImmExpr(ImmId("x"))))))
```
+ 例6:
```text
# 输出10
let x = 10, y = 9 in
if (x - y) * 2: x else: y
ELet([("x", ENumber(10)); ("y", ENumber(9))],
EIf(EPrim2(Times, EPrim2(Minus, EId("x"), EId("y")), ENumber(2)),
EId("x"),
EId("y")))
# pretty-print
(let x = 10 in
(let y = 9 in
(let temp_binary_3 = (x - y) in
(let temp_binary_2 = (temp_binary_3 * 2) in
(let temp_if_1 = (if temp_binary_2: x else: y)
in temp_if_1)))))
```
+ 例7:
```text
# 输出25
let c1 = 1 in
let c2 = 0 in
(let x = (if c1: 5 + 5 else: 6 * 2) in
(let y = (if c2: x * 3 else: x + 5) in
(x + y)))
ELet([("c1", ENumber(1));], ELet([("c2", ENumber(0));],
ELet([("x", EIf(EId("c1"), EPrim2(Plus, ENumber(5), ENumber(5)), EPrim2(Times, ENumber(6), ENumber(2))))],
ELet([("y", EIf(EId("c2"), EPrim2(Plus, EId("x"), ENumber(3)), EPrim2(Plus, EId("x"), ENumber(5))))],
EPrim2(Plus, EId("x"), EId("y"))))))
# pretty-print
(let c1 = 1 in
(let c2 = 0 in
(let temp_if_1 = (if c1: (let temp_binary_7 = (5 + 5) in temp_binary_7) else: (let temp_binary_6 = (6 * 2) in temp_binary_6)) in
(let x = temp_if_1 in
(let temp_if_2 = (if c2: (let temp_binary_5 = (x + 3) in temp_binary_5) else: (let temp_binary_4 = (x + 5) in temp_binary_4)) in
(let y = temp_if_2 in
(let temp_binary_3 = (x + y) in
temp_binary_3)))))))
```
* 将expr类型编译为aexpr类型
输出可以参考上述程序例子生成的aexpr格式。
```ocaml
let rec anf_k (e : expr) (k : immexpr -> aexpr) : aexpr =
match e with
| EPrim1(op, e) ->
let tmp = gen_temp "unary" in
anf_k e (fun imm -> ALet(tmp, CPrim1(op, imm), k (ImmId(tmp))))
| ELet(binds, body) ->
let rec helper binds =
match binds with
| [] -> anf_k body k
| (id, e)::rest -> anf_k e (fun imm -> ALet(id, CImmExpr(imm), (helper rest)))
in
helper binds
| EPrim2(op, left, right) ->
let tmp = gen_temp "binary" in
anf_k left (fun limm ->
anf_k right (fun rimm ->
ALet(tmp, CPrim2(op, limm, rimm), k (ImmId(tmp)))))
| EIf(cond, thn, els) ->
let tmp = gen_temp "if" in
let ret = (fun imm -> ACExpr(CImmExpr(imm))) in
anf_k cond (fun immcond ->
ALet(tmp, CIf(immcond, anf_k thn ret, anf_k els ret), (k (ImmId(tmp)))))
| ENumber(n) ->
(k (ImmNumber(n)))
| EId(name) ->
(k (ImmId(name)))
将cexpr类型编译为instruction list(生成汇编代码)
根据不同子类型,需要执行不同的操作:- CImmExpr:只需要把相应的数字或id变量值移动到eax寄存器即可。
- CPrim1:递归对表达式求值,然后根据Add1/Sub1,对eax寄存器进行+1/-1操作。
- CPrim2:把左操作数移动到eax寄存器中,然后根据Plus/Minus/Times,用右表达式值对eax寄存器进行+/-/*的操作。
- CIf:条件语句生成的汇编代码结构如下所示,只需要按照格式拼接就行。
cmp eax, 0 ; check if eax is equal to 0
je else_branch
; commands for then branch go here
jmp end_of_if
else_branch:
; commands for else branch go here
end_of_if:
let acompile_imm_arg (i : immexpr) _ (env : (string * int) list) : arg =
match i with
| ImmNumber(n) -> Const(n)
| ImmId(name) ->
match (find env name) with
| Some(si) -> RegOffset((-4) * si, ESP)
| None -> failwith (sprintf "An identifier is unbound (there is no surrounding let binding for %s)" name) let acompile_imm (i : immexpr) (si : int) (env : (string * int) list) : instruction list =
[ IMov(Reg(EAX), acompile_imm_arg i si env) ] let rec acompile_step (s : cexpr) (si : int) (env : (string * int) list) : instruction list =
match s with
| CImmExpr(i) -> acompile_imm i si env
| CPrim1(op, e) ->
let prelude = acompile_imm e si env in
begin match op with
| Add1 ->
prelude @ [
IAdd(Reg(EAX), Const(1))
]
| Sub1 ->
prelude @ [
IAdd(Reg(EAX), Const(-1))
]
end
| CPrim2(op, left, right) ->
let prelude = acompile_imm left si env in
let arg = acompile_imm_arg right si env in
begin match op with
| Plus ->
prelude @ [
IAdd(Reg(EAX), arg)
]
| Minus ->
prelude @ [
ISub(Reg(EAX), arg)
]
| Times ->
prelude @ [
IMul(Reg(EAX), arg)
]
end
| CIf(cond, thn, els) ->
let tmp_else = gen_temp "else" in
let tmp_endif = gen_temp "endif" in
(acompile_imm cond si env) @
[
ICmp(Reg(EAX), Const(0));
IJe(tmp_else);
] @
(acompile_expr thn si env) @
[
IJmp(tmp_endif);
ILabel(tmp_else);
] @
(acompile_expr els si env) @
[
ILabel(tmp_endif);
] and acompile_expr (e : aexpr) (si : int) (env : (string * int) list) : instruction list =
match e with
| ALet(id, e, body) ->
let prelude = acompile_step e (si + 1) env in
let body = acompile_expr body (si + 1) ((id, si)::env) in
prelude @ [
IMov(RegOffset(-4 * si, ESP), Reg(EAX))
] @ body
| ACExpr(s) -> acompile_step s si env
参考资料
starter-boa
cs75-anf
cs4410-anf
A-Normalization: Why and How
最新文章
- [LeetCode] Alien Dictionary 另类字典
- jquery.cookie() 方法的使用(读取、写入、删除)
- --hdu 2191 悼念512汶川大地震遇难同胞——珍惜现在,感恩生活(多重背包)
- 为什么anylase和scenaio中的平均响应时间差别会这么大?
- UL LI 布局 TAB 切换条
- 关于HTTP返回码301、302区别与SEO
- 【JavaScript】JavaScript函数的参数
- jquey(判断文本框输入的网址链接是否符合规则)
- Firefox实用插件记录
- Let&#39;s Encrypt 免费SSL证书
- pytho字符串处理内置方法一览表
- SFTP多文件上传,删除
- Android之MainActivity类
- SEND EMAIL SO_DOCUMENT_SEND_API1
- 在同一台电脑安装python 2 和3,并且怎样安装各自的pip和模块
- Python调用jar包中的方法
- 用 TensorFlow 实现 k-means 聚类代码解析
- Hibernate HQL多表查询
- Redis(三):Redis数据类型
- 随机森林(Random Forest)详解(转)