Rust std::fs中为什么要定义inner方法

最近在看Rust标准库源码的时候,偶然发现std::fs的很多方法中都定义了类似的inner方法,比如说read方法:

pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
    fn inner(path: &Path) -> io::Result<Vec<u8>> {
        let mut file = File::open(path)?;
        let size = file.metadata().map(|m| m.len() as usize).ok();
        let mut bytes = Vec::new();
        bytes.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
        io::default_read_to_end(&mut file, &mut bytes, size)?;
        Ok(bytes)
    }
    inner(path.as_ref())
}

这种写法看上去很不自然,但是是有很大的作用的——优化编译速度的程度大小。要讲清楚这些,首先要知道Rust的编译流程是怎么样的。

Rust编译流程是怎样的

graph LR 源代码 --词法分析--> tokens; tokens --语法分析--> AST; AST --lowering --> HIR; HIR --lowering --> MIR; MIR --> LLVM-IR; LLVM-IR --LLVM处理--> 二进制程序;

大体上Rust程序编译逻辑如上图所示:

  1. 经过词法分析/语法分析生成抽象语法树AST

  2. AST lowering生成HIR,这个过程主要做了desugar,将async等语法糖解析出来

  3. HIR 继续lowering生成MIR,这个过程主要做了类型检查和一些优化

  4. MIR 生成LLVM-IR,这个过程主要做了借用检查和单态化(Monomorphization)

  5. 最终LLVM-IR经过LLVM处理生成二进制程序

这几步中MIR生成LLVM-IR这一步会做单态化,单态化和Rust的泛型实现相关。

Rust泛型是怎么实现的

大部分编程语言都支持泛型,因为泛型对程序员真的很有用。有的语言刚开始不支持泛型,后来承受不了压力还是把泛型加上了(说的就是你,golang)。但是每一个语言的泛型实现都不太一样。

我对于Java比较熟悉,Java的泛型其实只在编译期保留信息,可以用来做一下类型检查。编译出来的代码会做所谓的“类型擦除”:假设泛型类型没有限制,就会变成Object。

那么Rust泛型是怎么实现的呢,就是上面提到的单态化。简单来讲,就是将代码中所有涉及到的实现全部实现一份出来。

这里举一个简单的例子,我们可以写一个样例程序然后用rust playground生成LLVM-IR就可以了,因为上面说过LLVM-IR中是含有单态化后的内容的。

fn generic_func<T>(_: T) {
  todo!()
}

fn main() {
  generic_func(123);
  generic_func("test");
}

左上角选择生成LLVM-IR就行。

生成的内容很多,只要搜一下generic_func就能找到两个实现的IR块:

; playground::generic_func
; Function Attrs: nonlazybind uwtable
define internal void @_ZN10playground12generic_func17h45a1fa811d4f4f6fE(i32 %_1) unnamed_addr #1 personality ptr @rust_eh_personality !dbg !182 {
start:
  %0 = alloca [16 x i8], align 8
  %_1.dbg.spill = alloca [4 x i8], align 4
  store i32 %_1, ptr %_1.dbg.spill, align 4
  call void @llvm.dbg.declare(metadata ptr %_1.dbg.spill, metadata !188, metadata !DIExpression()), !dbg !191
; invoke core::panicking::panic
  invoke void @_ZN4core9panicking5panic17haa799295486846efE(ptr align 1 @alloc_aa07815cbcb2365f7aca41cc8941a0c4, i64 19, ptr align 8 @alloc_795402d6b8ec7fca732c5aa54258c873) #6
          to label %unreachable unwind label %cleanup, !dbg !192

bb1:                                              ; preds = %cleanup
  %1 = load ptr, ptr %0, align 8, !dbg !193
  %2 = getelementptr inbounds i8, ptr %0, i64 8, !dbg !193
  %3 = load i32, ptr %2, align 8, !dbg !193
  %4 = insertvalue { ptr, i32 } poison, ptr %1, 0, !dbg !193
  %5 = insertvalue { ptr, i32 } %4, i32 %3, 1, !dbg !193
  resume { ptr, i32 } %5, !dbg !193

cleanup:                                          ; preds = %start
  %6 = landingpad { ptr, i32 }
          cleanup
  %7 = extractvalue { ptr, i32 } %6, 0
  %8 = extractvalue { ptr, i32 } %6, 1
  store ptr %7, ptr %0, align 8
  %9 = getelementptr inbounds i8, ptr %0, i64 8
  store i32 %8, ptr %9, align 8
  br label %bb1

unreachable:                                      ; preds = %start
  unreachable
}

; playground::generic_func
; Function Attrs: nonlazybind uwtable
define internal void @_ZN10playground12generic_func17he4eadbb8a85ae789E(ptr align 1 %_1.0, i64 %_1.1) unnamed_addr #1 personality ptr @rust_eh_personality !dbg !194 {
start:
  %0 = alloca [16 x i8], align 8
  %_1.dbg.spill = alloca [16 x i8], align 8
  store ptr %_1.0, ptr %_1.dbg.spill, align 8
  %1 = getelementptr inbounds i8, ptr %_1.dbg.spill, i64 8
  store i64 %_1.1, ptr %1, align 8
  call void @llvm.dbg.declare(metadata ptr %_1.dbg.spill, metadata !203, metadata !DIExpression()), !dbg !206
; invoke core::panicking::panic
  invoke void @_ZN4core9panicking5panic17haa799295486846efE(ptr align 1 @alloc_aa07815cbcb2365f7aca41cc8941a0c4, i64 19, ptr align 8 @alloc_795402d6b8ec7fca732c5aa54258c873) #6
          to label %unreachable unwind label %cleanup, !dbg !207

bb1:                                              ; preds = %cleanup
  %2 = load ptr, ptr %0, align 8, !dbg !208
  %3 = getelementptr inbounds i8, ptr %0, i64 8, !dbg !208
  %4 = load i32, ptr %3, align 8, !dbg !208
  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0, !dbg !208
  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1, !dbg !208
  resume { ptr, i32 } %6, !dbg !208

cleanup:                                          ; preds = %start
  %7 = landingpad { ptr, i32 }
          cleanup
  %8 = extractvalue { ptr, i32 } %7, 0
  %9 = extractvalue { ptr, i32 } %7, 1
  store ptr %8, ptr %0, align 8
  %10 = getelementptr inbounds i8, ptr %0, i64 8
  store i32 %9, ptr %10, align 8
  br label %bb1

unreachable:                                      ; preds = %start
  unreachable
}

这段IR如果仔细看也能看到两种类型的入参

# 对应传入123的方法,类型是i32
define internal void @_ZN10playground12generic_func17h45a1fa811d4f4f6fE(i32 %_1)
# 对应传入test的方法,类型是一个ptr指针,实际就是&str
define internal void @_ZN10playground12generic_func17he4eadbb8a85ae789E(ptr align 1 %_1.0, i64 %_1.1)

回到问题

现在回到最初的问题,我们知道每多一种类型,编译出来的方法就越多,生成的二进制就会越大,编译速度就会越快。那使用inner方法的作用具体又是什么呢?

我们可以再写一个样例代码,生成LLVM-IR看一下,代码如下:

use std::path::{Path, PathBuf};

fn do_something(path: &Path) {
    println!("{:?}", path);
}

fn use_inner<P: AsRef<Path>>(path: P) {
    fn inner(path: &Path) {
        do_something(path);
        do_something(path);
        do_something(path);
        do_something(path);
        do_something(path);
    }
    inner(path.as_ref());
}

fn not_use_inner<P: AsRef<Path>>(path: P) {
    let ref_path = path.as_ref();
    do_something(ref_path);
    do_something(ref_path);
    do_something(ref_path);
    do_something(ref_path);
    do_something(ref_path);
}

fn main() {
    not_use_inner(PathBuf::new());
    not_use_inner("test1");
    not_use_inner("test2".to_string());
    
    use_inner(PathBuf::new());
    use_inner("test1");
    use_inner("test2".to_string());
}

这里实现了带inner方法和不带inner方法的泛型方法,并且分别使用了三种类型进行调用。not_use_inner和use_inner都会有三种实现,这个是能确定的,分别找一个实现具体看下:

; playground::not_use_inner
; Function Attrs: nonlazybind uwtable
define internal void @_ZN10playground13not_use_inner17h51b6d7791febf211E(ptr align 1 %0, i64 %1) unnamed_addr #2 personality ptr @rust_eh_personality !dbg !2852 {
start:
  %ref_path.dbg.spill = alloca [16 x i8], align 8
  %2 = alloca [16 x i8], align 8
  %path = alloca [16 x i8], align 8
  store ptr %0, ptr %path, align 8
  %3 = getelementptr inbounds i8, ptr %path, i64 8
  store i64 %1, ptr %3, align 8
  call void @llvm.dbg.declare(metadata ptr %path, metadata !2854, metadata !DIExpression()), !dbg !2857
; invoke <&T as core::convert::AsRef<U>>::as_ref
  %4 = invoke { ptr, i64 } @"_ZN55_$LT$$RF$T$u20$as$u20$core..convert..AsRef$LT$U$GT$$GT$6as_ref17h3e46ed100e8d0d09E"(ptr align 8 %path)
          to label %bb1 unwind label %cleanup, !dbg !2858

bb8:                                              ; preds = %cleanup
  %5 = load ptr, ptr %2, align 8, !dbg !2859
  %6 = getelementptr inbounds i8, ptr %2, i64 8, !dbg !2859
  %7 = load i32, ptr %6, align 8, !dbg !2859
  %8 = insertvalue { ptr, i32 } poison, ptr %5, 0, !dbg !2859
  %9 = insertvalue { ptr, i32 } %8, i32 %7, 1, !dbg !2859
  resume { ptr, i32 } %9, !dbg !2859

cleanup:                                          ; preds = %bb5, %bb4, %bb3, %bb2, %bb1, %start
  %10 = landingpad { ptr, i32 }
          cleanup
  %11 = extractvalue { ptr, i32 } %10, 0
  %12 = extractvalue { ptr, i32 } %10, 1
  store ptr %11, ptr %2, align 8
  %13 = getelementptr inbounds i8, ptr %2, i64 8
  store i32 %12, ptr %13, align 8
  br label %bb8

bb1:                                              ; preds = %start
  %ref_path.0 = extractvalue { ptr, i64 } %4, 0, !dbg !2858
  %ref_path.1 = extractvalue { ptr, i64 } %4, 1, !dbg !2858
  store ptr %ref_path.0, ptr %ref_path.dbg.spill, align 8, !dbg !2858
  %14 = getelementptr inbounds i8, ptr %ref_path.dbg.spill, i64 8, !dbg !2858
  store i64 %ref_path.1, ptr %14, align 8, !dbg !2858
  call void @llvm.dbg.declare(metadata ptr %ref_path.dbg.spill, metadata !2855, metadata !DIExpression()), !dbg !2860
; invoke playground::do_something
  invoke void @_ZN10playground12do_something17ha0105cf466725f6fE(ptr align 1 %ref_path.0, i64 %ref_path.1)
          to label %bb2 unwind label %cleanup, !dbg !2861

bb2:                                              ; preds = %bb1
; invoke playground::do_something
  invoke void @_ZN10playground12do_something17ha0105cf466725f6fE(ptr align 1 %ref_path.0, i64 %ref_path.1)
          to label %bb3 unwind label %cleanup, !dbg !2862

bb3:                                              ; preds = %bb2
; invoke playground::do_something
  invoke void @_ZN10playground12do_something17ha0105cf466725f6fE(ptr align 1 %ref_path.0, i64 %ref_path.1)
          to label %bb4 unwind label %cleanup, !dbg !2863

bb4:                                              ; preds = %bb3
; invoke playground::do_something
  invoke void @_ZN10playground12do_something17ha0105cf466725f6fE(ptr align 1 %ref_path.0, i64 %ref_path.1)
          to label %bb5 unwind label %cleanup, !dbg !2864

bb5:                                              ; preds = %bb4
; invoke playground::do_something
  invoke void @_ZN10playground12do_something17ha0105cf466725f6fE(ptr align 1 %ref_path.0, i64 %ref_path.1)
          to label %bb6 unwind label %cleanup, !dbg !2865

bb6:                                              ; preds = %bb5
  ret void, !dbg !2866
}

; playground::use_inner
; Function Attrs: nonlazybind uwtable
define internal void @_ZN10playground9use_inner17h501c146b86d30f10E(ptr align 1 %0, i64 %1) unnamed_addr #2 personality ptr @rust_eh_personality !dbg !2803 {
start:
  %2 = alloca [16 x i8], align 8
  %path = alloca [16 x i8], align 8
  store ptr %0, ptr %path, align 8
  %3 = getelementptr inbounds i8, ptr %path, i64 8
  store i64 %1, ptr %3, align 8
  call void @llvm.dbg.declare(metadata ptr %path, metadata !2807, metadata !DIExpression()), !dbg !2810
; invoke <&T as core::convert::AsRef<U>>::as_ref
  %4 = invoke { ptr, i64 } @"_ZN55_$LT$$RF$T$u20$as$u20$core..convert..AsRef$LT$U$GT$$GT$6as_ref17h3e46ed100e8d0d09E"(ptr align 8 %path)
          to label %bb1 unwind label %cleanup, !dbg !2811

bb4:                                              ; preds = %cleanup
  %5 = load ptr, ptr %2, align 8, !dbg !2812
  %6 = getelementptr inbounds i8, ptr %2, i64 8, !dbg !2812
  %7 = load i32, ptr %6, align 8, !dbg !2812
  %8 = insertvalue { ptr, i32 } poison, ptr %5, 0, !dbg !2812
  %9 = insertvalue { ptr, i32 } %8, i32 %7, 1, !dbg !2812
  resume { ptr, i32 } %9, !dbg !2812

cleanup:                                          ; preds = %bb1, %start
  %10 = landingpad { ptr, i32 }
          cleanup
  %11 = extractvalue { ptr, i32 } %10, 0
  %12 = extractvalue { ptr, i32 } %10, 1
  store ptr %11, ptr %2, align 8
  %13 = getelementptr inbounds i8, ptr %2, i64 8
  store i32 %12, ptr %13, align 8
  br label %bb4

bb1:                                              ; preds = %start
  %_3.0 = extractvalue { ptr, i64 } %4, 0, !dbg !2811
  %_3.1 = extractvalue { ptr, i64 } %4, 1, !dbg !2811
; invoke playground::use_inner::inner
  invoke void @_ZN10playground9use_inner5inner17h3c9d482410cce508E(ptr align 1 %_3.0, i64 %_3.1)
          to label %bb2 unwind label %cleanup, !dbg !2813

bb2:                                              ; preds = %bb1
  ret void, !dbg !2814
}

可以看到,use_inner的行数明显小于not_use_inner。因为不论inner内部逻辑多么复杂,每一种泛型实现只需要调用inner方法就行了,而不需要在泛型实现中去调用复杂的逻辑。

所以,在 Rust 的泛型方法中,只要可以将非泛型的代码逻辑拆出来,都可以将这段逻辑定义成一个内部的方法,这样对于编译速度和生成的二进制大小都有好处。

参考链接:
  1. https://rustc-dev-guide.rust-lang.org/overview.html

  2. https://rustc-dev-guide.rust-lang.org/backend/monomorph.html

  3. https://blog.rust-lang.org/2016/04/19/MIR.html

  4. https://thume.ca/2019/07/14/a-tour-of-metaprogramming-models-for-generics/

  5. https://www.pingcap.com/blog/generics-and-compile-time-in-rust/