5.6 LLVM

    首先我们通过著名的 helloWorld 来熟悉下 LLVM 的使用。

    将 C 源码转换成 LLVM 汇编码:

    生成的 LLVM IR 如下:

    1. ; ModuleID = 'hello.c'
    2. source_filename = "hello.c"
    3. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
    4. target triple = "x86_64-unknown-linux-gnu"
    5. @.str = private unnamed_addr constant [14 x i8] c"hello, world\0A\00", align 1
    6. define i32 @main() #0 {
    7. %1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i32 0, i32 0))
    8. ret i32 0
    9. declare i32 @printf(i8*, ...) #1
    10. attributes #0 = { noinline nounwind optnone sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
    11. attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
    12. !llvm.module.flags = !{!0, !1, !2}
    13. !llvm.ident = !{!3}
    14. !0 = !{i32 1, !"wchar_size", i32 4}
    15. !3 = !{!"clang version 5.0.1 (tags/RELEASE_501/final)"}

    LLVM bitcode 有两部分组成:位流,以及将 LLVM IR 编码成位流的编码格式。使用汇编器 llvm-as 将 LLVM IR 转换成 bitcode:

    结果如下:

    1. $ file hello.bc
    2. hello.bc: LLVM IR bitcode
    3. $ xxd -g1 hello.bc | head -n5
    4. 00000000: 42 43 c0 de 35 14 00 00 05 00 00 00 62 0c 30 24 BC..5.......b.0$
    5. 00000010: 49 59 be 66 ee d3 7e 2d 44 01 32 05 00 00 00 00 IY.f..~-D.2.....
    6. 00000020: 21 0c 00 00 4d 02 00 00 0b 02 21 00 02 00 00 00 !...M.....!.....
    7. 00000030: 13 00 00 00 07 81 23 91 41 c8 04 49 06 10 32 39 ......#.A..I..29
    8. 00000040: 92 01 84 0c 25 05 08 19 1e 04 8b 62 80 10 45 02 ....%......b..E.

    反过来将 bitcode 转回 LLVM IR 也是可以的,使用反汇编器 llvm-dis:

    1. $ llvm-dis hello.bc -o hello.ll

    接下来使用静态编译器 llc 命令可以将 bitcode 编译为特定架构的汇编语言:

    1. $ llc -march=x86-64 hello.bc -o hello.s

    也可以使用 clang 来生成,结果是一样的:

    结果如下: