diff --git a/.github/container/test-maxtext.sh b/.github/container/test-maxtext.sh
index ebb2afcdc..1119b6724 100755
--- a/.github/container/test-maxtext.sh
+++ b/.github/container/test-maxtext.sh
@@ -228,7 +228,7 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
 
 export BASE_XLA_FLAGS=${BASE_XLA_FLAGS:---xla_gpu_enable_latency_hiding_scheduler=true
                 --xla_gpu_enable_triton_gemm=false
-                --xla_gpu_graph_level=0 
+                --xla_gpu_enable_command_buffer=
                 --xla_gpu_all_reduce_combine_threshold_bytes=1073741824 
                 --xla_gpu_all_gather_combine_threshold_bytes=1073741824 
                 --xla_gpu_reduce_scatter_combine_threshold_bytes=134217728
diff --git a/rosetta/docs/PGLE.md b/rosetta/docs/PGLE.md
index 2425ddffe..bc3ce5d1e 100644
--- a/rosetta/docs/PGLE.md
+++ b/rosetta/docs/PGLE.md
@@ -62,7 +62,7 @@ In order to get the best performance with PGLE, here is a list of all recommende
 ```
 export XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=true
 --xla_gpu_enable_triton_gemm=false
---xla_gpu_graph_level=0
+--xla_gpu_enable_command_buffer=
 --xla_gpu_all_reduce_combine_threshold_bytes=1073741824
 --xla_gpu_all_gather_combine_threshold_bytes=1073741824
 --xla_gpu_reduce_scatter_combine_threshold_bytes=1073741824
diff --git a/rosetta/rosetta/projects/maxtext/README.md b/rosetta/rosetta/projects/maxtext/README.md
index 44baa19ef..97eac185d 100644
--- a/rosetta/rosetta/projects/maxtext/README.md
+++ b/rosetta/rosetta/projects/maxtext/README.md
@@ -69,7 +69,7 @@ The [GPU Performance document](../../../docs/GPU_performance.md) provides a deta
 ```
 XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=true
             --xla_gpu_enable_triton_gemm=false
-            --xla_gpu_graph_level=0
+            --xla_gpu_enable_command_buffer=
             --xla_gpu_all_reduce_combine_threshold_bytes=1073741824 
             --xla_gpu_all_gather_combine_threshold_bytes=1073741824 
             --xla_gpu_reduce_scatter_combine_threshold_bytes=134217728
diff --git a/rosetta/rosetta/projects/maxtext/scripts/example_slurm.sub b/rosetta/rosetta/projects/maxtext/scripts/example_slurm.sub
index 0ca3fd802..a9d62e55c 100644
--- a/rosetta/rosetta/projects/maxtext/scripts/example_slurm.sub
+++ b/rosetta/rosetta/projects/maxtext/scripts/example_slurm.sub
@@ -54,7 +54,7 @@ export NCCL_IB_SL=1
 # Set XLA Flags
 export XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=true
                 --xla_gpu_enable_triton_gemm=false
-                --xla_gpu_graph_level=0
+                --xla_gpu_enable_command_buffer=
                 --xla_gpu_all_reduce_combine_threshold_bytes=1073741824
                 --xla_gpu_all_gather_combine_threshold_bytes=1073741824
                 --xla_gpu_reduce_scatter_combine_threshold_bytes=134217728
diff --git a/rosetta/rosetta/projects/maxtext/xla_flags/llama2-7b-1N8G.env b/rosetta/rosetta/projects/maxtext/xla_flags/llama2-7b-1N8G.env
index d999f5b5e..3730855fc 100644
--- a/rosetta/rosetta/projects/maxtext/xla_flags/llama2-7b-1N8G.env
+++ b/rosetta/rosetta/projects/maxtext/xla_flags/llama2-7b-1N8G.env
@@ -5,7 +5,7 @@ THRESHOLD_BYTES=1073741824
 export XLA_FLAGS="\
     --xla_gpu_enable_latency_hiding_scheduler=true \
     --xla_gpu_enable_triton_gemm=false \
-    --xla_gpu_graph_level=0 \
+    --xla_gpu_enable_command_buffer= \
     --xla_gpu_enable_highest_priority_async_stream=true \
     --xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
     --xla_gpu_all_gather_combine_threshold_bytes=$((THRESHOLD_BYTES/(NUM_NODES*NUM_GPUS))) \
diff --git a/rosetta/rosetta/projects/pax/README.md b/rosetta/rosetta/projects/pax/README.md
index d1829b847..a249fd461 100644
--- a/rosetta/rosetta/projects/pax/README.md
+++ b/rosetta/rosetta/projects/pax/README.md
@@ -141,7 +141,7 @@ For the the 126M model, we recommend setting `--xla_gpu_all_reduce_combine_thres
 BASE_XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=true
                 --xla_gpu_enable_triton_gemm=false
                 --xla_gpu_all_reduce_combine_threshold_bytes=33554432
-                --xla_gpu_graph_level=0" bash run_pile_multinode.sh ...
+                --xla_gpu_enable_command_buffer=" bash run_pile_multinode.sh ...
 ```
 
 # Configs
diff --git a/rosetta/rosetta/projects/pax/xla_flags/common.env b/rosetta/rosetta/projects/pax/xla_flags/common.env
index 26c819143..139544734 100644
--- a/rosetta/rosetta/projects/pax/xla_flags/common.env
+++ b/rosetta/rosetta/projects/pax/xla_flags/common.env
@@ -6,7 +6,7 @@ export XLA_FLAGS="\
     --xla_gpu_enable_highest_priority_async_stream=true \
     --xla_gpu_enable_triton_softmax_fusion=false \
     --xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
-    --xla_gpu_graph_level=0 \
+    --xla_gpu_enable_command_buffer= \
     "
 export XLA_PYTHON_CLIENT_MEM_FRACTION=0.8
 unset THRESHOLD_BYTES
diff --git a/rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env b/rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env
index e5b97b466..15159305b 100644
--- a/rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env
+++ b/rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env
@@ -6,7 +6,7 @@ export XLA_FLAGS="\
     --xla_gpu_enable_highest_priority_async_stream=true \
     --xla_gpu_enable_triton_softmax_fusion=false \
     --xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
-    --xla_gpu_graph_level=0 \
+    --xla_gpu_enable_command_buffer= \
     --xla_gpu_enable_cudnn_fmha=false \
     "
 export XLA_PYTHON_CLIENT_MEM_FRACTION=0.8
diff --git a/rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env b/rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env
index e48b76dcf..cc2ef61b6 100644
--- a/rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env
+++ b/rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env
@@ -8,7 +8,7 @@ export XLA_FLAGS="\
     --xla_gpu_enable_highest_priority_async_stream=true \
     --xla_gpu_enable_triton_softmax_fusion=false \
     --xla_gpu_all_reduce_combine_threshold_bytes=${ALL_REDUCE_THRESHOLD_BYTES} \
-    --xla_gpu_graph_level=0 \
+    --xla_gpu_enable_command_buffer= \
     --xla_gpu_all_gather_combine_threshold_bytes=${ALL_GATHER_THRESHOLD_BYTES} \
     --xla_gpu_reduce_scatter_combine_threshold_bytes=${REDUCE_SCATTER_THRESHOLD_BYTES} \
     --xla_gpu_enable_pipelined_all_gather=true \