Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[clang] Worse code gen when default constructing with () than {} #122560

Open
davidstone opened this issue Jan 11, 2025 · 2 comments
Open

[clang] Worse code gen when default constructing with () than {} #122560

davidstone opened this issue Jan 11, 2025 · 2 comments

Comments

@davidstone
Copy link
Contributor

The following translation unit

long get();
void unused() noexcept;

struct s {
	~s() {
		if (m0 != 0) {
			unused();
		}
	}

	long m0 = 0;
	long m1 = 0;
};


s f() {
#if defined USE_BRACES
	auto x = s{};
#else
	auto x = s();
#endif
	x.m1 = get();
	return x;
}

Generates the following code when compiled with -DUSE_BRACES -O3 -emit-llvm:

%struct.s = type { i64, i64 }

define dso_local void @f()(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.s) align 8 initializes((0, 16)) %agg.result) local_unnamed_addr #0 personality ptr @__gxx_personality_v0 !dbg !21 {
entry:
    #dbg_value(ptr %agg.result, !24, !DIExpression(DW_OP_deref), !25)
  %m1 = getelementptr inbounds nuw i8, ptr %agg.result, i64 8, !dbg !26
  store i64 0, ptr %agg.result, align 8, !dbg !26
  %call = tail call noundef i64 @get()(), !dbg !27
  store i64 %call, ptr %m1, align 8, !dbg !28
  ret void, !dbg !34
}

declare !dbg !35 noundef i64 @get()() local_unnamed_addr #1

declare i32 @__gxx_personality_v0(...)

attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

but generates the following when compiled with -O3 -emit-llvm:

%struct.s = type { i64, i64 }

define dso_local void @f()(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.s) align 8 initializes((0, 16)) %agg.result) local_unnamed_addr #0 personality ptr @__gxx_personality_v0 !dbg !21 {
entry:
    #dbg_value(ptr %agg.result, !24, !DIExpression(DW_OP_deref), !25)
    #dbg_value(ptr %agg.result, !26, !DIExpression(), !31)
  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %agg.result, i8 0, i64 16, i1 false), !dbg !33
  %call = tail call noundef i64 @get()(), !dbg !34
  %m1 = getelementptr inbounds nuw i8, ptr %agg.result, i64 8, !dbg !35
  store i64 %call, ptr %m1, align 8, !dbg !36
  ret void, !dbg !42
}

declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1

declare !dbg !43 noundef i64 @get()() local_unnamed_addr #2

declare i32 @__gxx_personality_v0(...)

attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

These correspond to the assembly

-DUSE_BRACES:

f():
        push    rbx
        mov     rbx, rdi
        mov     qword ptr [rdi], 0
        call    get()@PLT
        mov     qword ptr [rbx + 8], rax
        mov     rax, rbx
        pop     rbx
        ret

and without:

f():
        push    rbx
        mov     rbx, rdi
        xorps   xmm0, xmm0
        movups  xmmword ptr [rdi], xmm0
        call    get()@PLT
        mov     qword ptr [rbx + 8], rax
        mov     rax, rbx
        pop     rbx
        ret

See it live: https://godbolt.org/z/hfcPeTsfo

@llvmbot llvmbot added the clang Clang issues not falling into any other category label Jan 11, 2025
@davidstone
Copy link
Contributor Author

Note that constructing with the C++20 syntax of s(0, 0) or s(0) is also sufficient to get the good code generation. It is specifically s() that falls into the slow path.

@EugeneZelenko EugeneZelenko added clang:codegen and removed clang Clang issues not falling into any other category labels Jan 11, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 11, 2025

@llvm/issue-subscribers-clang-codegen

Author: David Stone (davidstone)

The following translation unit
long get();
void unused() noexcept;

struct s {
	~s() {
		if (m0 != 0) {
			unused();
		}
	}

	long m0 = 0;
	long m1 = 0;
};


s f() {
#if defined USE_BRACES
	auto x = s{};
#else
	auto x = s();
#endif
	x.m1 = get();
	return x;
}

Generates the following code when compiled with -DUSE_BRACES -O3 -emit-llvm:

%struct.s = type { i64, i64 }

define dso_local void @<!-- -->f()(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.s) align 8 initializes((0, 16)) %agg.result) local_unnamed_addr #<!-- -->0 personality ptr @<!-- -->__gxx_personality_v0 !dbg !21 {
entry:
    #dbg_value(ptr %agg.result, !24, !DIExpression(DW_OP_deref), !25)
  %m1 = getelementptr inbounds nuw i8, ptr %agg.result, i64 8, !dbg !26
  store i64 0, ptr %agg.result, align 8, !dbg !26
  %call = tail call noundef i64 @<!-- -->get()(), !dbg !27
  store i64 %call, ptr %m1, align 8, !dbg !28
  ret void, !dbg !34
}

declare !dbg !35 noundef i64 @<!-- -->get()() local_unnamed_addr #<!-- -->1

declare i32 @<!-- -->__gxx_personality_v0(...)

attributes #<!-- -->0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #<!-- -->1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

but generates the following when compiled with -O3 -emit-llvm:

%struct.s = type { i64, i64 }

define dso_local void @<!-- -->f()(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.s) align 8 initializes((0, 16)) %agg.result) local_unnamed_addr #<!-- -->0 personality ptr @<!-- -->__gxx_personality_v0 !dbg !21 {
entry:
    #dbg_value(ptr %agg.result, !24, !DIExpression(DW_OP_deref), !25)
    #dbg_value(ptr %agg.result, !26, !DIExpression(), !31)
  tail call void @<!-- -->llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %agg.result, i8 0, i64 16, i1 false), !dbg !33
  %call = tail call noundef i64 @<!-- -->get()(), !dbg !34
  %m1 = getelementptr inbounds nuw i8, ptr %agg.result, i64 8, !dbg !35
  store i64 %call, ptr %m1, align 8, !dbg !36
  ret void, !dbg !42
}

declare void @<!-- -->llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #<!-- -->1

declare !dbg !43 noundef i64 @<!-- -->get()() local_unnamed_addr #<!-- -->2

declare i32 @<!-- -->__gxx_personality_v0(...)

attributes #<!-- -->0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #<!-- -->1 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #<!-- -->2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

These correspond to the assembly

-DUSE_BRACES:

f():
        push    rbx
        mov     rbx, rdi
        mov     qword ptr [rdi], 0
        call    get()@<!-- -->PLT
        mov     qword ptr [rbx + 8], rax
        mov     rax, rbx
        pop     rbx
        ret

and without:

f():
        push    rbx
        mov     rbx, rdi
        xorps   xmm0, xmm0
        movups  xmmword ptr [rdi], xmm0
        call    get()@<!-- -->PLT
        mov     qword ptr [rbx + 8], rax
        mov     rax, rbx
        pop     rbx
        ret

See it live: https://godbolt.org/z/hfcPeTsfo

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

3 participants