fix mha compile bug. add bestla to python test

intel · Jun 3, 2024 · 1c9be78 · 1c9be78
1 parent 93339d7
commit 1c9be78
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 2 deletions.
diff --git a/.github/workflows/unit-test-llmruntime.yml b/.github/workflows/unit-test-llmruntime.yml
@@ -5,6 +5,7 @@ on:
     branches: [main]
     paths:
       - neural_speed/**
+      - bestla/** 
       - tests/**
       - .github/workflows/unit-test-llmruntime.yml
       - .github/workflows/unitTest/**

diff --git a/neural_speed/core/layers/mha_dense.cpp b/neural_speed/core/layers/mha_dense.cpp
@@ -73,9 +73,10 @@ bool bestla_reordered_attn_fp32_support(const attn_shape_t* params) {
   // TODO(Yi): check K V's layout
   if (_cd->AMX_BF16()) return true;
 #endif
-  // use avx2 and f16c on avx2 platforms
-  // todo: check avx2 mha on sever
+#if CompileAVX2()
   return _cd->AVX2();
+#endif
+  return false;
 }
 // kv cache sizes in bytes per layer per batch per beam for;
 void bestla_reordered_attn_fp32_batch_kv_info(const kv_shape_t* params, kv_cache_info_t* out) {