Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow nsys 2024.7 installation. #1176

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/container/install-nsight.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export DEBIAN_FRONTEND=noninteractive
export TZ=America/Los_Angeles

apt-get update
apt-get install -y nsight-compute nsight-systems-cli-2024.6.1
apt-get install -y nsight-compute nsight-systems-cli
apt-get clean

rm -rf /var/lib/apt/lists/*
28 changes: 17 additions & 11 deletions .github/container/nsys_jax/nsys_jax/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,23 @@ def _load_nvtx_gpu_proj_trace_single(
"Stack Level": "Lvl",
"TID": "TID",
}
if set(df.columns) == alt_rename_map.keys():
tsl_prefix = ""
df = df.rename(
columns={k: v for k, v in alt_rename_map.items() if v is not None}
)
df["ProjDurMs"] = 1e-6 * (df.pop("End") - df["Start"])
df["ProjStartMs"] = 1e-6 * df.pop("Start")
df["RangeStack"] = df["RangeStack"].map(
lambda stack: ":" + ":".join(map(str, stack))
)
# TODO: add OrigDurMs, OrigStartMs
alt_rename_map_2024_7 = alt_rename_map | {
"Device ID": "DeviceId",
"Stream ID": "StreamId",
}
for rename_map in [alt_rename_map, alt_rename_map_2024_7]:
if set(df.columns) == rename_map.keys():
tsl_prefix = ""
df = df.rename(
columns={k: v for k, v in rename_map.items() if v is not None}
)
df["ProjDurMs"] = 1e-6 * (df.pop("End") - df["Start"])
df["ProjStartMs"] = 1e-6 * df.pop("Start")
df["RangeStack"] = df["RangeStack"].map(
lambda stack: ":" + ":".join(map(str, stack))
)
# TODO: add OrigDurMs, OrigStartMs
break
else:
tsl_prefix = "TSL:"
df = df.drop(columns=["Style"])
Expand Down
116 changes: 114 additions & 2 deletions .github/container/nsys_jax/nsys_jax/scripts/patch_nsys.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
import subprocess

patch_content = r"""diff --git a/nsys_recipe/lib/nvtx.py b/nsys_recipe/lib/nvtx.py
nsys_2024_5_and_6_patch_content = r"""diff --git a/nsys_recipe/lib/nvtx.py b/nsys_recipe/lib/nvtx.py
index 2470043..7abf892 100644
--- a/nsys_recipe/lib/nvtx.py
+++ b/nsys_recipe/lib/nvtx.py
Expand All @@ -29,6 +29,111 @@
"rangeId": "Range ID",
"""

nsys_2024_7_patch_content = r'''diff --git a/nsys_recipe/lib/nvtx.py b/nsys_recipe/lib/nvtx.py
index 1e958f8..d08bb99 100644
--- a/nsys_recipe/lib/nvtx.py
+++ b/nsys_recipe/lib/nvtx.py
@@ -162,7 +162,7 @@ def _compute_gpu_projection_df(nvtx_df, cuda_df, cuda_nvtx_index_map):
starts.append(start)
ends.append(end)

- return (
+ df = (
pd.DataFrame(
{
"text": nvtx_df.loc[list(nvtx_gpu_end_dict.keys()) + indices, "text"],
@@ -172,11 +172,44 @@ def _compute_gpu_projection_df(nvtx_df, cuda_df, cuda_nvtx_index_map):
"tid": nvtx_df.loc[list(nvtx_gpu_end_dict.keys()) + indices, "tid"],
}
)
- .sort_values(by=["start", "end"], ascending=[True, False])
+ ).reset_index()
+
+ return (
+ df.sort_values(by=["start", "end", "index"], ascending=[True, False, True])
+ .drop(columns=["index"])
.reset_index(drop=True)
)


+def _compute_grouped_gpu_projection_df(
+ nvtx_df, cuda_df, cuda_nvtx_index_map, per_gpu=False, per_stream=False
+):
+ group_by_elements = []
+ if per_stream:
+ group_by_elements.append("streamId")
+ if per_gpu:
+ group_by_elements.append("deviceId")
+
+ if not group_by_elements:
+ return _compute_gpu_projection_df(nvtx_df, cuda_df, cuda_nvtx_index_map)
+
+ dfs = []
+ cuda_gdf = cuda_df.groupby(group_by_elements)
+
+ for group_keys, cuda_group_df in cuda_gdf:
+ df = _compute_gpu_projection_df(nvtx_df, cuda_group_df, cuda_nvtx_index_map)
+ if df.empty:
+ continue
+
+ if per_stream:
+ df["streamId"] = group_keys[group_by_elements.index("streamId")]
+ if per_gpu:
+ df["deviceId"] = group_keys[group_by_elements.index("deviceId")]
+ dfs.append(df)
+
+ return pd.concat(dfs, ignore_index=True)
+
+
def _find_cuda_nvtx_ranges(nvtx_df, cuda_df):
"""Find the NVTX ranges that enclose each CUDA operation.

@@ -258,8 +291,8 @@ def project_nvtx_onto_gpu(nvtx_df, cuda_df):

cuda_tid_df = cuda_gdf.get_group(global_tid)
cuda_nvtx_index_map = _find_cuda_nvtx_ranges(nvtx_tid_df, cuda_tid_df)
- df = _compute_gpu_projection_df(
- filtered_nvtx_df, cuda_tid_df, cuda_nvtx_index_map
+ df = _compute_grouped_gpu_projection_df(
+ filtered_nvtx_df, cuda_tid_df, cuda_nvtx_index_map, True, True
)
dfs.append(df)

diff --git a/nsys_recipe/lib/table_config.py b/nsys_recipe/lib/table_config.py
index e412c4f..db9449e 100644
--- a/nsys_recipe/lib/table_config.py
+++ b/nsys_recipe/lib/table_config.py
@@ -48,6 +48,7 @@ def get_cuda_gpu_dict():
"deviceId",
"contextId",
"greenContextId",
+ "streamId",
],
"CUPTI_ACTIVITY_KIND_MEMSET": [
"correlationId",
@@ -57,6 +58,7 @@ def get_cuda_gpu_dict():
"deviceId",
"contextId",
"greenContextId",
+ "streamId",
],
}

diff --git a/nsys_recipe/recipes/nvtx_gpu_proj_trace/nvtx_gpu_proj_trace.py b/nsys_recipe/recipes/nvtx_gpu_proj_trace/nvtx_gpu_proj_trace.py
index 2f05d50..e52dabe 100644
--- a/nsys_recipe/recipes/nvtx_gpu_proj_trace/nvtx_gpu_proj_trace.py
+++ b/nsys_recipe/recipes/nvtx_gpu_proj_trace/nvtx_gpu_proj_trace.py
@@ -107,6 +107,8 @@ class NvtxGpuProjTrace(recipe.Recipe):
"rangeId": "Range ID",
"parentId": "Parent ID",
"rangeStack": "Range Stack",
+ "deviceId": "Device ID",
+ "streamId": "Stream ID",
}

proj_nvtx_df = proj_nvtx_df.rename(columns=name_dict)[name_dict.values()]
'''


def main():
"""
Expand All @@ -41,7 +146,14 @@ def main():
r"^NVIDIA Nsight Systems version (\d+\.\d+\.\d+)\.\d+-\d+v\d+$", nsys_version
)
assert m is not None, f"Could not parse: {nsys_version}"
if m.group(1) in {"2024.5.1", "2024.6.1"}:
match m.group(1):
case "2024.5.1" | "2024.6.1":
patch_content = nsys_2024_5_and_6_patch_content
case "2024.7.1":
patch_content = nsys_2024_7_patch_content
case _:
patch_content = None
if patch_content is not None:
print(f"Patching Nsight Systems version {m.group(1)}")
# e.g. /opt/nvidia/nsight-systems-cli/2024.7.1/target-linux-x64
tdir = os.path.dirname(os.path.realpath(nsys))
Expand Down
Loading