diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6ae40922..c21a5f23 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4.1.7 with: name: riscv-abi.pdf path: ./ @@ -73,7 +73,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4.1.7 with: name: riscv-abi.pdf path: ./ diff --git a/riscv-abi.adoc b/riscv-abi.adoc index f4070c91..74bea9d9 100644 --- a/riscv-abi.adoc +++ b/riscv-abi.adoc @@ -5,6 +5,7 @@ include::prelude.adoc[] endif::[] include::introduction.adoc[] + include::riscv-cc.adoc[] include::riscv-elf.adoc[] diff --git a/riscv-atomic.adoc b/riscv-atomic.adoc index b8aa220c..c4f70029 100644 --- a/riscv-atomic.adoc +++ b/riscv-atomic.adoc @@ -107,7 +107,7 @@ the `lr` and `sc` instructions. |`atomic_(memory_order_acq_rel)` |`amo.{w\|d}.aqrl` |4 -|`atomic_(memory_order_seq_cst)` |`amo.{w\|d}.aqrl` |4 +|`atomic_(memory_order_seq_cst)` |`amo.{w\|d}.aqrl` |4, 5 |=== @@ -135,7 +135,7 @@ the `lr` and `sc` instructions. === Meaning of notes in table -1) Depends on a load instruction with an RCsc aquire annotation, +1) Depends on a load instruction with an RCsc acquire annotation, or a store instruction with an RCsc release annotation. These are currently under discussion, but the specification has not yet been approved. @@ -148,6 +148,11 @@ mappings with code generated by a compiler using those older mappings. 4) Currently only directly possible for 32- and 64-bit operands. +5) atomic_compare_exchange operations with a memory_order_seq_cst failure +ordering are considered to have a note 3 annotation. +To remove the note 3 annotation the amocas operation must be prepended with a +leading fence (`fence rw,rw; amocas.{w\|d}.aqrl`). + == Ztso atomics mappings This specifies additional mappings of C and {Cpp} atomic operations to RISC-V @@ -157,7 +162,8 @@ For each construct, we provide a mapping that assumes only the A and Ztso extension. All mappings interoperate correctly with the RVWMO mappings, and with the -original "Table A.6" mappings. +original "Table A.6" mappings, _except_ that mappings marked with note 3 do not +interoperate with the original "Table A.6" mappings. We present the mappings as a table in 3 sections, as above. @@ -167,32 +173,32 @@ We present the mappings as a table in 3 sections, as above. |=== |C/{Cpp} Construct |Ztso Mapping |Notes -|`atomic_load(memory_order_acquire)` |`l{b\|h\|w\|d}` | 5 +|`atomic_load(memory_order_acquire)` |`l{b\|h\|w\|d}` | 6 -|`atomic_load(memory_order_seq_cst)` |`fence rw,rw; l{b\|h\|w\|d}` | 5 +|`atomic_load(memory_order_seq_cst)` |`fence rw,rw; l{b\|h\|w\|d}` | 6 -|`atomic_store(memory_order_release)` |`s{b\|h\|w\|d}` | 5 +|`atomic_store(memory_order_release)` |`s{b\|h\|w\|d}` | 6 -|`atomic_store(memory_order_seq_cst)` |`s{b\|h\|w\|d}; fence rw, rw` | 5 +|`atomic_store(memory_order_seq_cst)` |`s{b\|h\|w\|d}; fence rw, rw` | 6 -|`atomic_thread_fence(memory_order_acquire)` |`nop` | 5 +|`atomic_thread_fence(memory_order_acquire)` |`nop` | 6 -|`atomic_thread_fence(memory_order_release)` |`nop` | 5 +|`atomic_thread_fence(memory_order_release)` |`nop` | 6 -|`atomic_thread_fence(memory_order_acq_rel)` |`nop` | 5 +|`atomic_thread_fence(memory_order_acq_rel)` |`nop` | 6 |=== [cols="<20,<20,<4",options="header",] |=== |C/{Cpp} Construct |Ztso AMO Mapping |Notes -|`atomic_(memory_order_acquire)` |`amo.{w\|d}` |4, 5 +|`atomic_(memory_order_acquire)` |`amo.{w\|d}` |4, 6 -|`atomic_(memory_order_release)` |`amo.{w\|d}` |4, 5 +|`atomic_(memory_order_release)` |`amo.{w\|d}` |4, 6 -|`atomic_(memory_order_acq_rel)` |`amo.{w\|d}` |4, 5 +|`atomic_(memory_order_acq_rel)` |`amo.{w\|d}` |4, 6 -|`atomic_(memory_order_seq_cst)` |`amo.{w\|d}` |4, 5 +|`atomic_(memory_order_seq_cst)` |`amo.{w\|d}` |4, 5, 6 |=== @@ -201,21 +207,30 @@ We present the mappings as a table in 3 sections, as above. |C/{Cpp} Construct |Ztso LR/SC Mapping |Notes |`atomic_(memory_order_acquire)` -|`loop:lr.{w\|d}; ; sc.{w\|d}; bnez loop` |4, 5 +|`loop:lr.{w\|d}; ; sc.{w\|d}; bnez loop` |4, 6 |`atomic_(memory_order_release)` -|`loop:lr.{w\|d}; ; sc.{w\|d}; bnez loop` |4, 5 +|`loop:lr.{w\|d}; ; sc.{w\|d}; bnez loop` |4, 6 |`atomic_(memory_order_acq_rel)` -|`loop:lr.{w\|d}; ; sc.{w\|d}; bnez loop` |4, 5 +|`loop:lr.{w\|d}; ; sc.{w\|d}; bnez loop` |4, 6 |=== === Meaning of notes in table +3) Incompatible with the original "Table A.6" mapping. Do not combine these +mappings with code generated by a compiler using those older mappings. +(This was mostly used by the initial LLVM implementations for RISC-V.) + 4) Currently only directly possible for 32- and 64-bit operands. -5) Requires the Ztso extension. +5) atomic_compare_exchange operations with a memory_order_seq_cst failure +ordering are considered to have a note 3 annotation. +To remove the note 3 annotation the amocas operation must be prepended with a +leading fence (`fence rw,rw; amocas.{w\|d}`). + +6) Requires the Ztso extension. == Other conventions diff --git a/riscv-elf.adoc b/riscv-elf.adoc index e1979a7b..edcd1e02 100644 --- a/riscv-elf.adoc +++ b/riscv-elf.adoc @@ -122,9 +122,62 @@ This model is similar to the medium any code model, but uses the # Calculate address of non-local symbol .Ltmp3: auipc a0, %got_pcrel_hi(symbol) - l[w|d] a0, a0, %pcrel_lo(.Ltmp3) + l[w|d] a0, %pcrel_lo(.Ltmp3)(a0) ---- +=== Large code model + +The `large` code model allows the code to address the whole RV64 address space. +Thus, this model is only available for RV64. By putting object addresses +into literal pools, a 64-bit address literal can be loaded from the pool. + +NOTE: Because calculating the pool entry address must use `auipc` and +`addi` or `ld`, each pool entry has to be located within the range +between -2GiB and +2GiB from its access intructions. In general, the pool +is appeneded in .text section or put into .rodata section. + +[,asm] +---- + # Get address of a symbol + # Literal pool +.LCPI0: + .8byte symbol + ... +.Ltmp0: auipc a0, %pcrel_hi(.LCPI0) + ld a0, %pcrel_lo(.Ltmp0)(a0) +---- + +This model also changes the function call patterns. An external function +address must be loaded from a literal pool entry, and use `jalr` to jump to +the target function. + + +NOTE: Same as getting address of symbol, each pool entry has to be located +within the range between -2GiB and +2GiB from its access intructions. The +function call can reach the whole 64-bit address space. + +NOTE: The code generation of function call may be changed after the range +extension thunk is implemented. The compiler can emit `call` directly, +and leave the model variation to the linker which could decide to jump +via the literal pool or not. + +[,asm] +---- + # Function call + # Literal pool +.LCPI1: + .8byte function + ... +.Ltmp1: auipc a0, %pcrel_hi(.LCPI1) + ld a0, %pcrel_lo(.Ltmp1)(a0) + jalr a0 +---- + +NOTE: Large code model is disallowed to be used with PIC code model. + +NOTE: There will be more different code generation strategies for different +usage purposes in the future. + == Dynamic Linking Any functions that use registers in a way that is incompatible with @@ -495,14 +548,22 @@ Description:: Additional information about the relocation <| S - P .2+| 65 .2+| TLSDESC_CALL .2+| Static | .2+| Annotate call to TLS descriptor resolver function, `%tlsdesc_call(address of %tlsdesc_hi)`, for relaxation purposes only <| -.2+| 66-191 .2+| *Reserved* .2+| - | .2+| Reserved for future standard use +.2+| 66-190 .2+| *Reserved* .2+| - | .2+| Reserved for future standard use + <| +.2+| 191 .2+| VENDOR .2+| Static | .2+| Paired with a vendor-specific relocation and must be placed immediately before it, indicates which vendor owns the relocation. <| .2+| 192-255 .2+| *Reserved* .2+| - | .2+| Reserved for nonstandard ABI extensions <| |=== Nonstandard extensions are free to use relocation numbers 192-255 for any -purpose. These relocations may conflict with other nonstandard extensions. +purpose. These vendor-specific relocations must be preceded by a +`R_RISCV_VENDOR` relocation against a vendor ID symbol. + +Where possible, tools should present relocation as their vendor-specific +relocation types, otherwise a generic name of `R_RISCV_CUSTOM` must +be shown. Data contained in paired `RISCV_VENDOR` can be used to select the +appropriate vendor when performing relocations. This section and later ones contain fragments written in assembler. The precise assembler syntax, including that of the relocations, is described in the @@ -515,6 +576,16 @@ and fill the space with a single ULEB128-encoded value. This is achieved by prepending the redundant `0x80` byte as necessary. The linker must not alter the length of the ULEB128-encoded value. +==== Vendor identifiers + +Vendor identifiers are dummy symbols used in the corresponding `R_RISCV_VENDOR` +relocation (irrespective of ELF class/XLEN) and must be unique amongst all +vendors providing custom relocations. Vendor identifiers may be suffixed with a +tag to provide extra relocations for a given vendor. + +NOTE: Please refer to the _RISC-V Toolchain Conventions_ +<> for the full list. + ==== Calculation Symbols <> provides details on the variables used in relocation @@ -643,6 +714,20 @@ The PLT (Procedure Linkage Table) exists to allow function calls between dynamically linked shared objects. Each dynamic object has its own GOT (Global Offset Table) and PLT (Procedure Linkage Table). +RISC-V has defined several PLT styles, which used for different situation, +the default PLT sytle should be used if the program is not met the condition for +using all other PLT sytle. + +[[plt-style]] +.PLT styles +[cols="1,2"] +[width=70%] +|=== +| Default PLT | - +| Unlabeled landing pad PLT | Must use this PLT style when `GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_UNLABELED` is set. +| Function signature based landing pad PLT | Must use this PLT style when `GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_FUNC_SIG` is set. +|=== + The first entry of a shared object PLT is a special entry that calls `_dl_runtime_resolve` to resolve the GOT offset for the called function. The `_dl_runtime_resolve` function in the dynamic loader resolves the @@ -650,8 +735,12 @@ GOT offsets lazily on the first call to any function, except when `LD_BIND_NOW` is set in which case the GOT entries are populated by the dynamic linker before the executable is started. Lazy resolution of GOT entries is intended to speed up program loading by deferring symbol -resolution to the first time the function is called. The first entry -in the PLT occupies two 16 byte entries: +resolution to the first time the function is called. + +The PLT entry is 16 bytes for the default PLT style and the simple landing pad +PLT style, and 32 bytes for the function signature based landing pad PLT style. + +The first entry in the PLT occupies two 16 byte entries for the default PLT style: [,asm] ---- @@ -665,11 +754,48 @@ in the PLT occupies two 16 byte entries: jr t3 ---- -Subsequent function entry stubs in the PLT take up 16 bytes and load a -function pointer from the GOT. On the first call to a function, the -entry redirects to the first PLT entry which calls `_dl_runtime_resolve` -and fills in the GOT entry for subsequent calls to the function: +And occupies three 16 byte entries for the simple landing pad PLT style: +[,asm] +---- +1: lpad 0 + auipc t2, %pcrel_hi(.got.plt) + sub t1, t1, t3 # shifted .got.plt offset + hdr size + 16 + l[w|d] t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve + addi t1, t1, -(hdr size + 16) # shifted .got.plt offset + addi t0, t2, %pcrel_lo(1b) # &.got.plt + srli t1, t1, log2(16/PTRSIZE) # .got.plt offset + l[w|d] t0, PTRSIZE(t0) # link map + jr t3 + nop + nop + nop +---- + +The function signature based landing pad PLT style occupies 48 byte entries: + +[,asm] +---- +1: lpad 0 + sub t1, t1, t3 # shifted .got.plt offset + hdr size + 20 + auipc t3, %pcrel_hi(.got.plt) + addi t0, t3, %pcrel_lo(1b) # &.got.plt + l[w|d] t3, %pcrel_lo(1b)(t3) # _dl_runtime_resolve + addi t1, t1, -(hdr size + 20) # shifted .got.plt offset + srli t1, t1, log2(32/PTRSIZE) # .got.plt offset + l[w|d] t0, PTRSIZE(t0) # link map + jr t3 + nop + nop + nop +---- + +Subsequent function entry stubs in the PLT take up 16 bytes or 32 bytes depends +on the style. +On the first call to a function, the entry redirects to the first PLT entry +which calls `_dl_runtime_resolve` and fills in the GOT entry for subsequent +calls to the function. +The code sequences of the PLT entry for the default PLT style: [,asm] ---- 1: auipc t3, %pcrel_hi(function@.got.plt) @@ -678,6 +804,28 @@ and fills in the GOT entry for subsequent calls to the function: nop ---- +The code sequences of the PLT entry for the the simple landing pad PLT style: +[,asm] +---- +1: lpad 0 + auipc t3, %pcrel_hi(function@.got.plt) + l[w|d] t3, %pcrel_lo(1b)(t3) + jalr t1, t3 +---- + +The code sequences of the PLT entry for the the function signature based landing pad PLT style: +[,asm] +---- +1: lpad + auipc t3, %pcrel_hi(function@.got.plt) + l[w|d] t3, %pcrel_lo(1b)(t3) + lui t2, + jalr t1, t3 + nop + nop + nop +---- + ==== Procedure Calls `R_RISCV_CALL` and `R_RISCV_CALL_PLT` relocations are associated with @@ -1361,6 +1509,65 @@ that a linker or runtime loader needs to check for compatibility. The linker should ignore and discard unknown bits in program properties, and issue warnings or errors. +<> provides details of the RISC-V ELF program property; the +meaning of each column is given below: + + +Name:: The name of the program property type, omitting the prefix of `GNU_PROPERTY_RISCV_`. + +Value:: The type value for the program property type. + +Size:: The data type size hold within this program property type. + +Description:: Additional information about the program property type. + + +[[rv-prog-prop-type]] +.RISC-V-specific program property types +[cols="3,3,2,5"] +[width=100%] +|=== +| Name | Value | Size | Description + +| FEATURE_1_AND | 0xc0000000 | 4-bytes | RISC-V processor-specific features used in program. +|=== + +==== GNU_PROPERTY_RISCV_FEATURE_1_AND + + +`GNU_PROPERTY_RISCV_FEATURE_1_AND` describes a set of features, where each bit +represents a different feature. The linker should perform a bitwise AND +operation when merging different objects. + +[%autowidth] +|=== +| Bit | Bit Name +| 0 | GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_UNLABELED +| 1 | GNU_PROPERTY_RISCV_FEATURE_1_CFI_SS +| 2 | GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_FUNC_SIG +|=== + +`GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_UNLABELED` This bit indicate that all +executable sections are built to be compatible with the landing pad mechanism +provided by the `Zicfilp` extension. An executable or shared library with this +bit set is required to generate PLTs with the landing pad (`lpad`) instruction, +and all label are set to `0`. + +`GNU_PROPERTY_RISCV_FEATURE_1_CFI_SS`: This bit indicate that all executable +sections are built to be compatible with the shadow stack mechanism provided by +the `Zicfiss` extension. Loading an executable or shared library with this bit +set requires the execution environment to provide either the `Zicfiss` extension +or the `Zimop` extension. When the executable or shared library is compiled with +compressed instructions then loading an executable with this bit set requires +the execution environment to provide the `Zicfiss` extension or to provide both +the `Zcmop` and `Zimop` extensions. + +`GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_FUNC_SIG` This bit indicate that all executable +sections are built to be compatible with the landing pad mechanism provided by +the `Zicfilp` extension. An executable or shared library with this bit set is +required to generate PLTs with the landing pad (`lpad`) instruction, and all +label are set to a value which hashed from its function signature. + === Mapping Symbol The section can have a mixture of code and data or code with different ISAs. @@ -1405,6 +1612,124 @@ is not enough for the disassembler to disassemble the `rv64gcv` version correctly. Specifying ISA string appropriately with the two memcpy instruction mapping symbols helps the disassembler to disassemble instructions correctly. +== Label Value Compuatation for Function Signature based Scheme Landing Pad + +The label value for the function signature-based labeling scheme landing pad is +computed from the hash of the function signature string, which follows the same +scheme as the "Function types" mangling rule defined in the _Itanium {Cpp} ABI_ +<>. The function signature will also use the "Compression" rule +defined in the _Itanium {Cpp} ABI_. + +The label value is derived from the lower 20 bits of the MD5 hash result of the +function signature string. If the lower 20 bits are all zeros, use the next +20 bits, and continue using the next 20 bits until a non-zero value is obtained. +If less than 20 bits are available in the final segment, the remaining bits +will be zero-filled to make up 20 bits. If all 128 bits are zeros, the lower +20 bits of the MD5 hash result of the string "RISC-V" are used. + +Additionally, here are a few specific rules: + +- `main` function uses the signature of + `(int, pointer to pointer to char) returning int` (`FiiPPcE`). +- `_dl_runtime_resolve` uses zero for the landing pad. +- The 'Y' component in the `` should be ignored. +- `` should be ignored. +- {Cpp} member functions should use the "Pointer-to-member types" mangling rule + defined in the Itanium C++ ABI <> with the following + additional rules: + - Member functions should use `v` for `` rather than the actual + class name. For example, use `1v` instead of `3foo` for the `` + in `class foo`. This rule only applies to the `` at the top level + of ``, and does not affect cases where an argument + contains a pointer to a member type. + - The return type of a virtual class member function, if it is a pointer or + reference to a class type, should have its class type mangled as `class v` + rather than the declared class type. Const and volatile type qualifiers + should be ignored if this rule applies. Multi-level pointers or references + are exempted from this rule. + - Class destructors should use the signature `void (*)(void*)` (`FvPvE`). + - Static functions should follow the rules of non-member functions. +- `wchar_t` should match the type of the target platform. For example, on + Linux, it uses `int`, so it mangles to `i` rather than `w` for {Cpp}. +- Functions with an empty parameter list are treated as explicitly declaring + that they take no parameters (having `void` as the parameter list). + + +NOTE: The special rule for the return type of virtual class member functions is + defined to handle covariant return types. + +NOTE: Class destructors generally should not be called via indirect call, but + they may be registered as program destructors via `__cxa_atexit`. + Therefore, they must match the signature of the argument of + `__cxa_atexit`, which is `void (*)(void*)`. + +NOTE: `` is ignored due to C++ standard backward compatibility, + as it was introduced after {Cpp}17. + +Example: + +[,cxx] +---- + +double foo(int, float *); + +class Base +{ +public: + virtual void memfunc1(); + virtual void memfunc2(int); + virtual Base *memfunc3(int); +}; + +class Derived : public Base +{ +public: + virtual void memfunc1() override; + virtual Derived *memfunc3(int) override; + virtual void memfunc4(double); + void memfunc5(); +}; + +class OtherBase +{ +public: + virtual void memfunc2(int); +}; + +class OtherClass; + +class DerivedDerived : public Derived, OtherBase +{ +public: + virtual void memfunc2(int) override; + virtual DerivedDerived *memfunc3(int) override; + virtual void memfunc4(double) override; + DerivedDerived *memfunc6(); + OtherClass *memfunc7(float); + OtherClass &memfunc8(); + OtherClass memfunc9(float); + int *memfunc10(); +}; + +---- + +The function signatures for the above functions are described below: + +- `foo` is encoded as `FdiPfE`. +- `Base::memfunc1` and `Derived::memfunc1` are both encoded as `M1vFvvE`. +- `Base::memfunc2`, `OtherBase::memfunc2`, and `DerivedDerived::memfunc2` + is all encoded as `M1vFviE`. +- `Base::memfunc3`, `Derived::memfunc3`, and `DerivedDerived::memfunc3` are + encoded as `M1vFPviE`. +- `Derived::memfunc4` and `DerivedDerived::memfunc4` are both encoded as + `M1vFvdE`. +- `Derived::memfunc5` is encoded as `M1vFvvE`. +- `DerivedDerived::memfunc6` and `DerivedDerived::memfunc7` are encoded as + `M1vFPviE`. +- `DerivedDerived::memfunc8` is encoded as `M1vFRvvE`. +- `DerivedDerived::memfunc9` is encoded as `M1vF10OtherClassvE`. +- `DerivedDerived::memfunc10` is encoded as `M1vFPivE`. + == Linker Relaxation At link time, when all the memory objects have been resolved, the code sequence @@ -2020,3 +2345,6 @@ https://github.com/riscv/riscv-code-size-reduction * [[[rvv-intrinsic-doc]]] "RISC-V Vector Extension Intrinsic Document" https://github.com/riscv-non-isa/rvv-intrinsic-doc + +* [[[rv-toolchain-conventions]]] "RISC-V Toolchain Conventions" +https://github.com/riscv-non-isa/riscv-toolchain-conventions