SSA IN GO

  |   0 评论   |   0 浏览

关于源码到语法树的部分编译在 这里

编译阶段 src/cmd/compile/internal/gc/main.go 中 存有如下一段代码

initssaconfig()

	// Just before compilation, compile itabs found on
	// the right side of OCONVIFACE so that methods
	// can be de-virtualized during compilation.
	Curfn = nil
	peekitabs()

	// Phase 8: Compile top level functions.
	// Don't use range--walk can add functions to xtop.
	timings.Start("be", "compilefuncs")
	fcount = 0
	for i := 0; i < len(xtop); i++ {
		n := xtop[i]
		if n.Op == ODCLFUNC {
			funccompile(n)
			fcount++
		}
	}
	timings.AddEvent(fcount, "funcs")

	if nsavederrors+nerrors == 0 {
		fninit(xtop)
	}

	compileFunctions()

这段代码会初始化 ssa 配置,配置结束后会调用 fucncompile 对函数进行编译

initssaconfig

func initssaconfig() {
	types_ := ssa.NewTypes()

	if thearch.SoftFloat {
		softfloatInit()
	}

	// Generate a few pointer types that are uncommon in the frontend but common in the backend.
	// Caching is disabled in the backend, so generating these here avoids allocations.
	_ = types.NewPtr(types.Types[TINTER])                             // *interface{}
	_ = types.NewPtr(types.NewPtr(types.Types[TSTRING]))              // **string
	_ = types.NewPtr(types.NewPtr(types.Idealstring))                 // **string
	_ = types.NewPtr(types.NewSlice(types.Types[TINTER]))             // *[]interface{}
	_ = types.NewPtr(types.NewPtr(types.Bytetype))                    // **byte
	_ = types.NewPtr(types.NewSlice(types.Bytetype))                  // *[]byte
	_ = types.NewPtr(types.NewSlice(types.Types[TSTRING]))            // *[]string
	_ = types.NewPtr(types.NewSlice(types.Idealstring))               // *[]string
	_ = types.NewPtr(types.NewPtr(types.NewPtr(types.Types[TUINT8]))) // ***uint8
	_ = types.NewPtr(types.Types[TINT16])                             // *int16
	_ = types.NewPtr(types.Types[TINT64])                             // *int64
	_ = types.NewPtr(types.Errortype)                                 // *error
	types.NewPtrCacheEnabled = false
	ssaConfig = ssa.NewConfig(thearch.LinkArch.Name, *types_, Ctxt, Debug['N'] == 0)
	if thearch.LinkArch.Name == "386" {
		ssaConfig.Set387(thearch.Use387)
	}
	ssaConfig.SoftFloat = thearch.SoftFloat
	ssaConfig.Race = flag_race
	ssaCaches = make([]ssa.Cache, nBackendWorkers)

首先 NewTypes 初始化一个新的 Types 结构体,调用 NewPtr 函数,缓存类型中的信息,Types 中存储了所有基本类型对应的指针

type Types struct {
	Bool       *types.Type
	Int8       *types.Type
	Int16      *types.Type
	Int32      *types.Type
	Int64      *types.Type
	UInt8      *types.Type
	UInt16     *types.Type
	UInt32     *types.Type
	UInt64     *types.Type
	Int        *types.Type
	Float32    *types.Type
	Float64    *types.Type
	UInt       *types.Type
	Uintptr    *types.Type
	String     *types.Type
	BytePtr    *types.Type // TODO: use unsafe.Pointer instead?
	Int32Ptr   *types.Type
	UInt32Ptr  *types.Type
	IntPtr     *types.Type
	UintptrPtr *types.Type
	Float32Ptr *types.Type
	Float64Ptr *types.Type
	BytePtrPtr *types.Type
}

// NewTypes creates and populates a Types.
func NewTypes() *Types {
	t := new(Types)
	t.SetTypPtrs()
	return t
}

// SetTypPtrs populates t.
func (t *Types) SetTypPtrs() {
	t.Bool = types.Types[types.TBOOL]
	t.Int8 = types.Types[types.TINT8]
	t.Int16 = types.Types[types.TINT16]
	t.Int32 = types.Types[types.TINT32]
	t.Int64 = types.Types[types.TINT64]
	t.UInt8 = types.Types[types.TUINT8]
	t.UInt16 = types.Types[types.TUINT16]
	t.UInt32 = types.Types[types.TUINT32]
	t.UInt64 = types.Types[types.TUINT64]
	t.Int = types.Types[types.TINT]
	t.Float32 = types.Types[types.TFLOAT32]
	t.Float64 = types.Types[types.TFLOAT64]
	t.UInt = types.Types[types.TUINT]
	t.Uintptr = types.Types[types.TUINTPTR]
	t.String = types.Types[types.TSTRING]
	t.BytePtr = types.NewPtr(types.Types[types.TUINT8])
	t.Int32Ptr = types.NewPtr(types.Types[types.TINT32])
	t.UInt32Ptr = types.NewPtr(types.Types[types.TUINT32])
	t.IntPtr = types.NewPtr(types.Types[types.TINT])
	t.UintptrPtr = types.NewPtr(types.Types[types.TUINTPTR])
	t.Float32Ptr = types.NewPtr(types.Types[types.TFLOAT32])
	t.Float64Ptr = types.NewPtr(types.Types[types.TFLOAT64])
	t.BytePtrPtr = types.NewPtr(types.NewPtr(types.Types[types.TUINT8]))
}

NewPtr 根据类型生成指向类型的指针并缓存在当前 Type中

func NewPtr(elem *Type) *Type {

	// 如果无类型
	if elem == nil {

		// 直接报错
		Fatalf("NewPtr: pointer to elem Type is nil")
	}


	// 如果缓存过 直接返回 ptr
	if t := elem.Cache.ptr; t != nil {
		if t.Elem() != elem {
			Fatalf("NewPtr: elem mismatch")
		}
		return t
	}


	// 剩下的就是,如果没有缓存,那么把t放入type的Cache中返回t
	t := New(TPTR)
	t.Extra = Ptr{Elem: elem}
	t.Width = int64(Widthptr)
	t.Align = uint8(Widthptr)
	if NewPtrCacheEnabled {
		elem.Cache.ptr = t
	}
	return t
}

而后,根据当前的 CPU 架构初始化 SSA 配置

ssaConfig = ssa.NewConfig(thearch.LinkArch.Name, *types_, Ctxt, Debug['N'] == 0)

NewConfig 会根据传入的 CPU 架构设置用于生成中间代码和机器码的函数,当前编译器使用的指针、寄存器大小、可用寄存器列表、掩码等编译选项

// Set up some runtime functions we'll need to call.
	assertE2I = sysfunc("assertE2I")
	assertE2I2 = sysfunc("assertE2I2")
	assertI2I = sysfunc("assertI2I")
	assertI2I2 = sysfunc("assertI2I2")
	deferproc = sysfunc("deferproc")
	deferprocStack = sysfunc("deferprocStack")
	Deferreturn = sysfunc("deferreturn")
	Duffcopy = sysvar("duffcopy")             // asm func with special ABI
	Duffzero = sysvar("duffzero")             // asm func with special ABI
	gcWriteBarrier = sysvar("gcWriteBarrier") // asm func with special ABI
	goschedguarded = sysfunc("goschedguarded")
	growslice = sysfunc("growslice")
	msanread = sysfunc("msanread")
	msanwrite = sysfunc("msanwrite")
	newobject = sysfunc("newobject")
	newproc = sysfunc("newproc")
	panicdivide = sysfunc("panicdivide")

最后初始化一些编译器用到的 Go 语言运行时函数

这些函数会在对应的 runtime 包结构体 Pkg 中创建一个新的符号 obj.LSym,表示上述的方法已经被注册到运行时 runtime 包中,我们在后面的中间代码生成中直接使用这些方法,我们在这里看到的 deferprocdeferreturn 就是 Go 语言用于实现 defer 关键字的运行时函数

遍历和转换

图片.png

图片.png

funccompile中调用了compile compile又调用了walk来遍历节点

类似 walk 的函数还有

func walk(fn *Node)
func walkappend(n *Node, init *Nodes, dst *Node) *Node
...
func walkrange(n *Node) *Node
func walkselect(sel *Node)
func walkselectcases(cases *Nodes) []*Node
func walkstmt(n *Node) *Node
func walkstmtlist(s []*Node)
func walkswitch(sw *Node)

walk 走过每个节点,讲一些关键字和 builtin 函数转换成 函数调用

图片.png

比如 make 将转换成特定类型的 make,new 也将转换成 newobject

所有转换完成的调用在 src/cmd/compile/internal/gc/builtin/runtime.go 都可以看到

图片.png

文件中代码的作用只是让编译器能够找到对应符号的函数定义而已,真正的函数实现都在另一个 src/runtime 包中

在 compile 的最后

图片.png

使用 compileSSA 将遍历替换完成的抽象语法树转换成中间代码

其中含有 buildssa 函数,实现了 ssa 优化中的特性 例如 静态单赋值等等

AST to SSA

buildssa

stmtList 函数将每隔一个节点,翻译成 IR

// Convert the AST-based IR to the SSA-based IR
	s.stmtList(fn.Func.Enter)
	s.stmtList(fn.Nbody)
func (s *state) stmt(n *Node) {
	...
	switch n.Op {
	case OCALLMETH, OCALLINTER:
		s.call(n, callNormal)
		if n.Op == OCALLFUNC && n.Left.Op == ONAME && n.Left.Class() == PFUNC {
			if fn := n.Left.Sym.Name; compiling_runtime && fn == "throw" ||
				n.Left.Sym.Pkg == Runtimepkg && (fn == "throwinit" || fn == "gopanic" || fn == "panicwrap" || fn == "block" || fn == "panicmakeslicelen" || fn == "panicmakeslicecap") {
				m := s.mem()
				b := s.endBlock()
				b.Kind = ssa.BlockExit
				b.SetControl(m)
			}
		}
	case ODEFER:
		s.call(n.Left, callDefer)
	case OGO:
		s.call(n.Left, callGo)
	...
	}
}

在遇到函数调用、方法调用、使用 defer 或者 go 关键字时都会执行 call 生成调用函数的 SSA 节点

switch cas , 实现 defer ,go 等

func (s *state) call(n *Node, k callKind) *ssa.Value {
	...
	var call *ssa.Value
	switch {
	case k == callDefer:
		call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, deferproc, s.mem())
	case k == callGo:
		call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, newproc, s.mem())
	case sym != nil:
		call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, sym.Linksym(), s.mem())
	..
	}
	...
}
  • 如果这里使用的是 defer 关键字,就会插入 deferproc 函数;
  • 如果使用 go 创建新的 Goroutine 时会插入 newproc 函数符号;
  • 在遇到其他情况时会插入表示普通函数对应的符号;
compiling hello
hello func(int) int
  b1:
    v1 = InitMem <mem>
    v2 = SP <uintptr>
    v3 = SB <uintptr> DEAD
    v4 = LocalAddr <*int> {a} v2 v1 DEAD
    v5 = LocalAddr <*int> {~r1} v2 v1
    v6 = Arg <int> {a}
    v7 = Const64 <int> [0] DEAD
    v8 = Const64 <int> [2]
    v9 = Add64 <int> v6 v8 (c[int])
    v10 = VarDef <mem> {~r1} v1
    v11 = Store <mem> {int} v5 v9 v10
    Ret v11

src/cmd/compile/internal/gc/ssa.go
这个拥有将近 7000 行代码的文件包含用于处理不同节点的各种方法,编译器会根据节点类型的不同在一个巨型 switch 语句处理不同的情况,这也是我们在编译器这种独特的场景下才能看到的现象。

而后进入

var passes = [...]pass{
	// TODO: combine phielim and copyelim into a single pass?
	{name: "number lines", fn: numberLines, required: true},
	{name: "early phielim", fn: phielim},
	{name: "early copyelim", fn: copyelim},
	{name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt
	{name: "short circuit", fn: shortcircuit},
	{name: "decompose args", fn: decomposeArgs, required: true},
	{name: "decompose user", fn: decomposeUser, required: true},
	{name: "pre-opt deadcode", fn: deadcode},
	{name: "opt", fn: opt, required: true},               // NB: some generic rules know the name of the opt pass. TODO: split required rules and optimizing rules
	{name: "zero arg cse", fn: zcse, required: true},     // required to merge OpSB values
	{name: "opt deadcode", fn: deadcode, required: true}, // remove any blocks orphaned during opt
	{name: "generic cse", fn: cse},
	{name: "phiopt", fn: phiopt},
	{name: "gcse deadcode", fn: deadcode, required: true}, // clean out after cse and phiopt
	{name: "nilcheckelim", fn: nilcheckelim},
	{name: "prove", fn: prove},
	{name: "early fuse", fn: fuseEarly},
	{name: "decompose builtin", fn: decomposeBuiltIn, required: true},
	{name: "softfloat", fn: softfloat, required: true},
	{name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
	{name: "dead auto elim", fn: elimDeadAutosGeneric},
	{name: "generic deadcode", fn: deadcode, required: true}, // remove dead stores, which otherwise mess up store chain
	{name: "check bce", fn: checkbce},
	{name: "branchelim", fn: branchelim},
	{name: "late fuse", fn: fuseLate},
	{name: "dse", fn: dse},
	{name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
	{name: "insert resched checks", fn: insertLoopReschedChecks,
		disabled: objabi.Preemptibleloops_enabled == 0}, // insert resched checks in loops.
	{name: "lower", fn: lower, required: true},
	{name: "addressing modes", fn: addressingModes, required: false},
	{name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
	{name: "lowered cse", fn: cse},
	{name: "elim unread autos", fn: elimUnreadAutos},
	{name: "tighten tuple selectors", fn: tightenTupleSelectors, required: true},
	{name: "lowered deadcode", fn: deadcode, required: true},
	{name: "checkLower", fn: checkLower, required: true},
	{name: "late phielim", fn: phielim},
	{name: "late copyelim", fn: copyelim},
	{name: "tighten", fn: tighten}, // move values closer to their uses
	{name: "late deadcode", fn: deadcode},
	{name: "critical", fn: critical, required: true}, // remove critical edges
	{name: "phi tighten", fn: phiTighten},            // place rematerializable phi args near uses to reduce value lifetimes
	{name: "likelyadjust", fn: likelyadjust},
	{name: "layout", fn: layout, required: true},     // schedule blocks
	{name: "schedule", fn: schedule, required: true}, // schedule values
	{name: "late nilcheck", fn: nilcheckelim2},
	{name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register
	{name: "regalloc", fn: regalloc, required: true},   // allocate int & float registers + stack slots
	{name: "loop rotate", fn: loopRotate},
	{name: "stackframe", fn: stackframe, required: true},
	{name: "trim", fn: trim}, // remove empty blocks
}

passes 数组,每个数组有个 function,require 标志是否为必须的,
中间码经过 50 多个函数的过滤产生出最终代码

其中包含了 ssa 降级过程

降级从 parse-lower 开始

var passes = [...]pass{
	...
	{name: "lower", fn: lower, required: true},
	{name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
	{name: "lowered cse", fn: cse},
	...
	{name: "trim", fn: trim}, // remove empty blocks
}

它会将 SSA 的中间代码转换成机器特定的指令

func lower(f *Func) {
	applyRewrite(f, f.Config.lowerBlock, f.Config.lowerValue)
}

f.Config.lowerBlock,f.Config.lowerValue 会调用你机器的架构所对应的函数来执行

比如 x86

func rewriteValue386(v *Value) bool {
	switch v.Op {
	case Op386ADCL:
		return rewriteValue386_Op386ADCL_0(v)
	case Op386ADDL:
		return rewriteValue386_Op386ADDL_0(v) || rewriteValue386_Op386ADDL_10(v) || rewriteValue386_Op386ADDL_20(v)
	...
	}
}

func rewriteValue386_Op386ADCL_0(v *Value) bool {
	// match: (ADCL x (MOVLconst [c]) f)
	// cond:
	// result: (ADCLconst [c] x f)
	for {
		_ = v.Args[2]
		x := v.Args[0]
		v_1 := v.Args[1]
		if v_1.Op != Op386MOVLconst {
			break
		}
		c := v_1.AuxInt
		f := v.Args[2]
		v.reset(Op386ADCLconst)
		v.AuxInt = c
		v.AddArg(x)
		v.AddArg(f)
		return true
	}
	...
}

同样也是巨大的 switch case ,转译不同的字符成为另一种字符

rewriteValue386_Op386ADCL_0 函数就会使用 ADCLconst 替换 ADCLMOVLconst 两条指令,它能通过对指令的压缩和优化减少在目标硬件上执行所需要的时间和资源。

func compileSSA(fn *Node, worker int) {
	f := buildssa(fn, worker)
	pp := newProgs(fn, worker)
	defer pp.Free()
	genssa(f, pp)

	pp.Flush()
}

当 ssa 结束后,genssa 函数创建新的 obj.Progs 结构体存入 pp 中
pp 调用 flush

func (pp *Progs) Flush() {
	plist := &obj.Plist{Firstpc: pp.Text, Curfn: pp.curfn}
	obj.Flushplist(Ctxt, plist, pp.NewProg, myimportpath)
}

flush 调用 src/cmd/internal/obj 中的汇编器将 SSA 转化为汇编代码

最后直到 main 函数执行完,转化为机器代码

参考资料

https://draveness.me