diff --git a/elf/arch-riscv.cc b/elf/arch-riscv.cc index 10250e3036..c258172d60 100644 --- a/elf/arch-riscv.cc +++ b/elf/arch-riscv.cc @@ -364,6 +364,31 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { break; } case R_RISCV_GOT_HI20: + if (ctx.arg.relax && + sym.is_pcrel_linktime_const(ctx) && + i + 3 < rels.size() && + rels[i + 1].r_type == R_RISCV_RELAX && + rels[i + 2].r_type == R_RISCV_PCREL_LO12_I && + rels[i + 2].r_offset == rels[i].r_offset + 4 && + file.symbols[rels[i + 2].r_sym]->value == r_offset && + rels[i + 3].r_type == R_RISCV_RELAX) { + // AUIPC + LD is used to load a value from a GOT slot. If the + // value is actually a PC-relative link-time constant, we can + // rewrite AUIPC + LD with AUIPC + ADDI to eliminate the memory + // load. + i64 val = S + A - P; + if ((i32)val == val) { + // auipc , %hi20(val) + write_utype(loc, val); + + // addi , , %lo12(val) + i64 rd = get_rd(rel.r_offset); + *(ul32 *)(loc + 4) = 0b0010011 | (rd << 15) | (rd << 7); + write_itype(loc + 4, val); + i += 3; + break; + } + } write_utype(loc, G + GOT + A - P); break; case R_RISCV_TLS_GOT_HI20: