Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SLPVectorizer] Miscompilation #122583

Closed
dtcxzyw opened this issue Jan 11, 2025 · 1 comment
Closed

[SLPVectorizer] Miscompilation #122583

dtcxzyw opened this issue Jan 11, 2025 · 1 comment

Comments

@dtcxzyw
Copy link
Member

dtcxzyw commented Jan 11, 2025

Reproducer: https://godbolt.org/z/7eEEeeKoo
Sorry, I cannot provide alive2 link since llvm.vector.insert.v8i64.v4i64 is not supported.

; bin/opt -passes=slp-vectorizer reduced.ll -S -o opt.ll

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@j = global [4 x i64] zeroinitializer

define i32 @main() {
entry:
  %.pre.i = load i64, ptr getelementptr inbounds nuw (i8, ptr @j, i64 24), align 8
  %.pre50.i = load i64, ptr getelementptr inbounds nuw (i8, ptr @j, i64 16), align 16
  %.pre51.i = load i64, ptr getelementptr inbounds nuw (i8, ptr @j, i64 8), align 8
  %.pre52.i = load i64, ptr @j, align 16
  %0 = or i64 %.pre51.i, 0
  %1 = trunc i64 %.pre.i to i32
  %2 = add i32 %1, 0
  %3 = trunc i64 %.pre50.i to i32
  %4 = add i32 %3, 0
  %5 = trunc i64 %.pre51.i to i32
  %6 = add i32 %5, 0
  %7 = trunc i64 0 to i32
  %8 = add i32 %5, 0
  %9 = add i32 %7, 0
  %10 = add i32 %1, 0
  %11 = add i32 %3, 0
  %12 = add i32 %5, 0
  %13 = add i32 %7, 0
  %14 = trunc i64 %.pre.i to i32
  %15 = add i32 %14, 0
  %16 = trunc i64 %.pre50.i to i32
  %17 = add i32 %16, 0
  %18 = trunc i64 %.pre51.i to i32
  %19 = add i32 %18, 0
  %20 = trunc i64 %.pre52.i to i32
  %conv14.1.i = or i32 %9, %13
  %21 = or i32 %conv14.1.i, %6
  %22 = or i32 %21, %8
  %23 = or i32 %22, %12
  %24 = or i32 %23, %4
  %25 = or i32 %24, %11
  %26 = or i32 %25, %2
  %27 = or i32 %26, %10
  %28 = or i32 %27, %15
  %29 = or i32 %28, %17
  %30 = or i32 %29, %19
  %31 = add i32 %14, 0
  %32 = add i32 %16, 0
  %33 = add i32 %18, 0
  %34 = add i32 %20, 0
  %35 = add i32 %14, 0
  %36 = add i32 %16, 0
  %37 = add i32 %18, 0
  %38 = add i32 %20, 0
  %39 = add i32 %14, 0
  %40 = add i32 %16, 0
  %41 = add i32 %18, 0
  %42 = add i32 %20, 0
  %inc.3.3.i.1 = or i64 %.pre52.i, 0
  %conv14.i.1 = or i32 %38, %34
  %conv14.1.i.1 = or i32 %conv14.i.1, %42
  %conv14.3.i.1 = or i32 %conv14.1.i.1, %33
  %conv14.145.i.1 = or i32 %conv14.3.i.1, %37
  %conv14.1.1.i.1 = or i32 %conv14.145.i.1, %41
  %conv14.3.1.i.1 = or i32 %conv14.1.1.i.1, %32
  %conv14.247.i.1 = or i32 %conv14.3.1.i.1, %36
  %conv14.1.2.i.1 = or i32 %conv14.247.i.1, %40
  %conv14.3.2.i.1 = or i32 %conv14.1.2.i.1, %31
  %conv14.349.i.1 = or i32 %conv14.3.2.i.1, %35
  %conv14.1.3.i.1 = or i32 %conv14.349.i.1, %39
  %conv14.3.3.i.1 = or i32 %conv14.1.3.i.1, %30
  ret i32 %conv14.3.3.i.1
}

Output:

source_filename = "/app/example.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i32 @main() {
  %0 = load <4 x i64>, ptr @j, align 16
  %1 = or i64 poison, 0
  %2 = shufflevector <4 x i64> %0, <4 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
  %3 = shufflevector <4 x i64> %0, <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
  %4 = shufflevector <8 x i64> %3, <8 x i64> <i64 poison, i64 poison, i64 poison, i64 poison, i64 0, i64 poison, i64 poison, i64 poison>, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 1, i32 poison, i32 poison>
  %5 = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> %4, <4 x i64> %0, i64 0)
  %6 = trunc <8 x i64> %5 to <8 x i32>
  %7 = shufflevector <8 x i32> %6, <8 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5>
  %8 = add <16 x i32> %7, zeroinitializer
  %9 = extractelement <4 x i64> %0, i32 0
  %inc.3.3.i.1 = or i64 %9, 0
  %10 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %8)
  %11 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> poison)
  %op.rdx = or i32 %10, %11
  ret i32 %op.rdx
}

declare <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64>, <4 x i64>, i64 immarg) #0

declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) #0

declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

lli output:

> bin/lli reduced.ll
> echo $?
0
> bin/lli opt.ll
> echo $?
255

llubi output:
Before:

> ./llubi reduced.ll --verbose
Entering function main
  %0 = getelementptr inbounds nuw i8, ptr @j, i64 24 -> Ptr 40[@j + 24]
  %.pre.i = load i64, ptr %0, align 8 -> i64 0
  %1 = getelementptr inbounds nuw i8, ptr @j, i64 16 -> Ptr 32[@j + 16]
  %.pre50.i = load i64, ptr %1, align 16 -> i64 0
  %2 = getelementptr inbounds nuw i8, ptr @j, i64 8 -> Ptr 24[@j + 8]
  %.pre51.i = load i64, ptr %2, align 8 -> i64 0
  %.pre52.i = load i64, ptr @j, align 16 -> i64 0
  %3 = or i64 %.pre51.i, 0 -> i64 0
  %4 = trunc i64 %.pre.i to i32 -> i32 0
  %5 = add i32 %4, 0 -> i32 0
  %6 = trunc i64 %.pre50.i to i32 -> i32 0
  %7 = add i32 %6, 0 -> i32 0
  %8 = trunc i64 %.pre51.i to i32 -> i32 0
  %9 = add i32 %8, 0 -> i32 0
  %10 = trunc i64 0 to i32 -> i32 0
  %11 = add i32 %8, 0 -> i32 0
  %12 = add i32 %10, 0 -> i32 0
  %13 = add i32 %4, 0 -> i32 0
  %14 = add i32 %6, 0 -> i32 0
  %15 = add i32 %8, 0 -> i32 0
  %16 = add i32 %10, 0 -> i32 0
  %17 = trunc i64 %.pre.i to i32 -> i32 0
  %18 = add i32 %17, 0 -> i32 0
  %19 = trunc i64 %.pre50.i to i32 -> i32 0
  %20 = add i32 %19, 0 -> i32 0
  %21 = trunc i64 %.pre51.i to i32 -> i32 0
  %22 = add i32 %21, 0 -> i32 0
  %23 = trunc i64 %.pre52.i to i32 -> i32 0
  %conv14.1.i = or i32 %12, %16 -> i32 0
  %24 = or i32 %conv14.1.i, %9 -> i32 0
  %25 = or i32 %24, %11 -> i32 0
  %26 = or i32 %25, %15 -> i32 0
  %27 = or i32 %26, %7 -> i32 0
  %28 = or i32 %27, %14 -> i32 0
  %29 = or i32 %28, %5 -> i32 0
  %30 = or i32 %29, %13 -> i32 0
  %31 = or i32 %30, %18 -> i32 0
  %32 = or i32 %31, %20 -> i32 0
  %33 = or i32 %32, %22 -> i32 0
  %34 = add i32 %17, 0 -> i32 0
  %35 = add i32 %19, 0 -> i32 0
  %36 = add i32 %21, 0 -> i32 0
  %37 = add i32 %23, 0 -> i32 0
  %38 = add i32 %17, 0 -> i32 0
  %39 = add i32 %19, 0 -> i32 0
  %40 = add i32 %21, 0 -> i32 0
  %41 = add i32 %23, 0 -> i32 0
  %42 = add i32 %17, 0 -> i32 0
  %43 = add i32 %19, 0 -> i32 0
  %44 = add i32 %21, 0 -> i32 0
  %45 = add i32 %23, 0 -> i32 0
  %inc.3.3.i.1 = or i64 %.pre52.i, 0 -> i64 0
  %conv14.i.1 = or i32 %41, %37 -> i32 0
  %conv14.1.i.1 = or i32 %conv14.i.1, %45 -> i32 0
  %conv14.3.i.1 = or i32 %conv14.1.i.1, %36 -> i32 0
  %conv14.145.i.1 = or i32 %conv14.3.i.1, %40 -> i32 0
  %conv14.1.1.i.1 = or i32 %conv14.145.i.1, %44 -> i32 0
  %conv14.3.1.i.1 = or i32 %conv14.1.1.i.1, %35 -> i32 0
  %conv14.247.i.1 = or i32 %conv14.3.1.i.1, %39 -> i32 0
  %conv14.1.2.i.1 = or i32 %conv14.247.i.1, %43 -> i32 0
  %conv14.3.2.i.1 = or i32 %conv14.1.2.i.1, %34 -> i32 0
  %conv14.349.i.1 = or i32 %conv14.3.2.i.1, %38 -> i32 0
  %conv14.1.3.i.1 = or i32 %conv14.349.i.1, %42 -> i32 0
  %conv14.3.3.i.1 = or i32 %conv14.1.3.i.1, %33 -> i32 0
  ret i32 %conv14.3.3.i.1
Exiting function main

After:

> ./llubi opt.ll --verbose
Entering function main
  %0 = load <4 x i64>, ptr @j, align 16 -> { i64 0, i64 0, i64 0, i64 0 }
  %1 = or i64 poison, 1 -> poison
  %2 = or i64 poison, 0 -> poison
  %3 = shufflevector <4 x i64> %0, <4 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison> -> { poison, poison, poison, poison, poison, i64 0, poison, poison }
  %4 = shufflevector <4 x i64> %0, <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> -> { i64 0, i64 0, i64 0, i64 0, poison, poison, poison, poison }
  %5 = shufflevector <8 x i64> %4, <8 x i64> <i64 poison, i64 poison, i64 poison, i64 poison, i64 0, i64 poison, i64 poison, i64 poison>, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 1, i32 poison, i32 poison> -> { poison, poison, poison, poison, i64 0, i64 0, poison, poison }
  %6 = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> %5, <4 x i64> %0, i64 0) -> { i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, poison, poison }
  %7 = trunc <8 x i64> %6 to <8 x i32> -> { i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, poison, poison }
  %8 = shufflevector <8 x i32> %7, <8 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5> -> { i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }
  %9 = add <16 x i32> %8, zeroinitializer -> { i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }
  %10 = extractelement <4 x i64> %0, i32 0 -> i64 0
  %inc.3.3.i.1 = or i64 %10, 0 -> i64 0
  %11 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %9) -> i32 0
  %12 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> poison) -> poison
  %op.rdx = or i32 %11, %12 -> poison
  ret i32 %op.rdx
Exiting function main

UB triggered: Return a poison value
Exited with immediate UB.
Stacktrace:
@dtcxzyw
Copy link
Member Author

dtcxzyw commented Jan 11, 2025

llvm version: 6329355

kazutakahirata pushed a commit to kazutakahirata/llvm-project that referenced this issue Jan 13, 2025
When extracts are vectorized and it has some poison values instead of
instructions, need to correctly set the vectorized operand not as
poison, but as a main vector operand of the main extract instruction.

Fixes llvm#122583
DKLoehr pushed a commit to DKLoehr/llvm-project that referenced this issue Jan 17, 2025
When extracts are vectorized and it has some poison values instead of
instructions, need to correctly set the vectorized operand not as
poison, but as a main vector operand of the main extract instruction.

Fixes llvm#122583
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant