Enable Qwen models familly #236

Workflow file for this run

.github/workflows/pr_tests.yml at eaf4cdc

	name: GPU Enabled Integration Test on PRs

	on:
	pull_request:
	branches:
	- main
	push:
	branches:
	- ci-*

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	env:
	OPTIMUM_NVIDIA_IS_CI: ON
	HF_HUB_ENABLE_HF_TRANSFER: ON

	jobs:
	run_gpu_tests:
	strategy:
	fail-fast: false
	matrix:
	config:
	- name: Optimum-Nvidia Test Suite
	image: nvidia/cuda:12.6.3-devel-ubuntu24.04
	gpu_target: ["aws-g6-12xlarge-cache", "aws-g5-12xlarge-cache"]

	name: ${{ matrix.config.name }}
	runs-on:
	group: "${{matrix.gpu_target}}"

	container:
	image: ${{ matrix.config.image }}
	options: --mount type=tmpfs,destination=/tmp --privileged --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/
	env:
	HF_TOKEN: ${{ secrets.OPTIMUM_NVIDIA_HUB_READ_TOKEN }}

	defaults:
	run:
	shell: bash

	steps:
	- name: Change ownership of /github/home
	run: chown -R $(whoami) /github/home

	- uses: actions/setup-python@v5
	with:
	python-version: '3.10'

	- name: Checkout optimum-nvidia
	uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- name: Install dependencies
	run: \|
	apt update && apt install -y libmpich-dev libopenmpi-dev openmpi-bin git
	python3 -m pip install --upgrade -e .[quality,tests] --extra-index-url https://pypi.nvidia.com

	- name: Run nvidia-smi
	run: \|
	nvidia-smi

	- name: Print TensorRT-LLM version
	run: \|
	python -c "from tensorrt_llm import __version__; print(__version__)"

	- name: Run optimum-nvidia test-suite
	run: \|
	pytest -s -v -p no:warnings -o log_cli=true --ignore=tests/cli tests/

	run_optimum_cli_tests:
	strategy:
	fail-fast: false
	matrix:
	config:
	- name: Optimum-Nvidia CLI Test Suite
	image: nvidia/cuda:12.6.3-devel-ubuntu24.04
	gpu_target: ["aws-g6-12xlarge-cache", "aws-g5-12xlarge-cache"]

	name: ${{ matrix.config.name }}
	runs-on:
	group: "${{matrix.gpu_target}}"

	container:
	image: ${{ matrix.config.image }}
	options: --mount type=tmpfs,destination=/tmp --privileged --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/
	env:
	HF_TOKEN: ${{ secrets.OPTIMUM_NVIDIA_HUB_READ_TOKEN }}

	defaults:
	run:
	shell: bash

	steps:
	- name: Change ownership of /github/home
	run: chown -R $(whoami) /github/home

	- uses: actions/setup-python@v5
	with:
	python-version: '3.10'

	- name: Checkout optimum-nvidia
	uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- name: Install dependencies
	run: \|
	apt update && apt install -y openmpi-bin libopenmpi-dev git
	python3 -m pip install --upgrade -e .[quality,tests] --extra-index-url https://pypi.nvidia.com

	- name: Run nvidia-smi
	run: \|
	nvidia-smi

	- name: Print TensorRT-LLM version
	run: \|
	python -c "from tensorrt_llm import __version__; print(__version__)"

	- name: Run optimum-nvidia cli test-suite
	run: \|
	pytest -rx -s -p no:warnings tests/cli

	- name: Tailscale Wait
	if: ${{ failure() \|\| runner.debug == '1' }}
	uses: huggingface/tailscale-action@main
	with:
	waitForSSH: true
	authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
	slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
	slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Enable Qwen models familly #236

Workflow file

Enable Qwen models familly #236

Jobs

Run details

Workflow file for this run