Skip to content

Commit

Permalink
[PENG-1676] Add re-submit endpoint to API's job_submissions router (#643
Browse files Browse the repository at this point in the history
)

* feat(api): add clone endpoint to job-submissions

* code review

* add unit tests and enhance implementation

* fix(api): cloned entries getting the create_at and updated_at from parent

* code review
  • Loading branch information
fschuch authored Nov 8, 2024
1 parent c805f81 commit 6350267
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 2 deletions.
3 changes: 1 addition & 2 deletions jobbergate-api/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
This file keeps track of all notable changes to jobbergate-api

## Unreleased

- Added clone capability to job submissions so they can be resubmitted to the cluster when needed [PENG-1676, ASP-4597]

## 5.4.0a2 -- 2024-11-06
- Added back `libpq-dev` and `gcc` to the Dockerfile
Expand All @@ -19,7 +19,6 @@ This file keeps track of all notable changes to jobbergate-api
- For Job Script Template files
- For Job Script Template workflow files


## 5.3.0 -- 2024-09-09

- Fixed issue preventing the creation of a job script from an empty template file
Expand Down
33 changes: 33 additions & 0 deletions jobbergate-api/jobbergate_api/apps/job_submissions/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,39 @@ async def job_submission_create(
return new_job_submission


@router.post(
"/clone/{id}",
status_code=status.HTTP_201_CREATED,
response_model=JobSubmissionDetailedView,
description="Endpoint for cloning a job submission under the CREATED status for a new run on the cluster",
)
async def job_submission_clone(
id: int = Path(...),
secure_services: SecureService = Depends(
secure_services(Permissions.ADMIN, Permissions.JOB_SUBMISSIONS_CREATE, ensure_email=True)
),
):
"""Clone a job_submission given its id."""
logger.info(f"Cloning job submission {id=}")

original_instance = await secure_services.crud.job_submission.get(id)

if original_instance.job_script_id is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Cannot resubmit a job submission without a parent job script",
)

cloned_instance = await secure_services.crud.job_submission.clone_instance(
original_instance,
owner_email=secure_services.identity_payload.email,
status=JobSubmissionStatus.CREATED,
slurm_job_id=None,
)

return cloned_instance


@router.get(
"/{id}",
description="Endpoint to get a job_submission",
Expand Down
5 changes: 5 additions & 0 deletions jobbergate-api/jobbergate_api/apps/job_submissions/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@
description="The arguments used to submit the job to the slurm queue",
example=["--exclusive", "--job-name=example-job"],
),
cloned_from_id=MetaField(
description="Indicates the id this entry has been cloned from, if any.",
example=101,
),
)


Expand Down Expand Up @@ -159,6 +163,7 @@ class JobSubmissionBaseView(TableResource):
client_id: str
status: JobSubmissionStatus
slurm_job_state: Optional[SlurmJobState] = None
cloned_from_id: Optional[int] = None

model_config = ConfigDict(json_schema_extra=job_submission_meta_mapper)

Expand Down
3 changes: 3 additions & 0 deletions jobbergate-api/jobbergate_api/apps/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ async def clone_instance(self, original_instance: CrudModel, **incoming_data) ->
data.update(incoming_data)
data["cloned_from_id"] = original_instance.id

data.pop("created_at", None)
data.pop("updated_at", None)

return await self.create(**data)

async def delete(self, locator: Any) -> None:
Expand Down
85 changes: 85 additions & 0 deletions jobbergate-api/tests/apps/job_submissions/test_routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import json
from datetime import datetime

import pytest
from fastapi import status
Expand Down Expand Up @@ -350,6 +351,90 @@ async def test_create_job_submission_without_client_id(
assert response.status_code == status.HTTP_400_BAD_REQUEST


@pytest.mark.parametrize("permission", (Permissions.ADMIN, Permissions.JOB_SUBMISSIONS_CREATE))
async def test_clone_job_submission__success(
permission,
fill_job_script_data,
fill_job_submission_data,
client,
inject_security_header,
tester_email,
job_script_data_as_string,
synth_session,
synth_services,
):
base_job_script = await synth_services.crud.job_script.create(**fill_job_script_data())

job_script_file_name = "entrypoint.sh"

await synth_services.file.job_script.upsert(
parent_id=base_job_script.id,
filename=job_script_file_name,
upload_content=job_script_data_as_string,
file_type="ENTRYPOINT",
)

inserted_job_script_id = base_job_script.id
slurm_job_id = 1234

create_data = fill_job_submission_data(
job_script_id=inserted_job_script_id,
slurm_job_id=slurm_job_id,
status=JobSubmissionStatus.ABORTED,
owner_email=tester_email,
)
original_instance = await synth_services.crud.job_submission.create(**create_data)

new_owner_email = "new_" + tester_email

inject_security_header(new_owner_email, permission)
response = await client.post(f"/jobbergate/job-submissions/clone/{original_instance.id}")

assert response.status_code == status.HTTP_201_CREATED, f"Clone failed: {response.text}"
response_data = response.json()

assert response_data["cloned_from_id"] == original_instance.id
assert response_data["id"] != original_instance.id
assert response_data["owner_email"] == new_owner_email
assert response_data["slurm_job_id"] is None
assert response_data["status"] == JobSubmissionStatus.CREATED
assert datetime.fromisoformat(response_data["created_at"]) > original_instance.created_at
assert datetime.fromisoformat(response_data["updated_at"]) > original_instance.updated_at

assert response_data["client_id"] == original_instance.client_id
assert response_data["description"] == original_instance.description
assert response_data["execution_directory"] == original_instance.execution_directory
assert response_data["job_script_id"] == original_instance.job_script_id
assert response_data["name"] == original_instance.name
assert response_data["sbatch_arguments"] == original_instance.sbatch_arguments


async def test_clone_job_submission__fail_unauthorized(fill_job_submission_data, client, synth_services):
original_instance = await synth_services.crud.job_submission.create(**fill_job_submission_data())

response = await client.post(f"/jobbergate/job-submissions/clone/{original_instance.id}")

assert response.status_code == status.HTTP_401_UNAUTHORIZED


async def test_clone_job_submission__not_found(inject_security_header, tester_email, client, synth_services):
assert (await synth_services.crud.job_submission.count()) == 0
inject_security_header(tester_email, Permissions.JOB_SUBMISSIONS_CREATE)
response = await client.post("/jobbergate/job-submissions/clone/0")

assert response.status_code == status.HTTP_404_NOT_FOUND


async def test_clone_job_submission__no_parent_job_script(
inject_security_header, tester_email, fill_job_submission_data, client, synth_services
):
original_instance = await synth_services.crud.job_submission.create(**fill_job_submission_data())
inject_security_header(tester_email, Permissions.JOB_SUBMISSIONS_CREATE)
response = await client.post(f"/jobbergate/job-submissions/clone/{original_instance.id}")

assert response.status_code == status.HTTP_400_BAD_REQUEST


@pytest.mark.parametrize("permission", (Permissions.ADMIN, Permissions.JOB_SUBMISSIONS_READ))
async def test_get_job_submission_by_id(
permission,
Expand Down
2 changes: 2 additions & 0 deletions jobbergate-api/tests/apps/test_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ async def test_clone_instance__success(self, dummy_crud_service, tester_email):
assert cloned_instance.name == original_instance.name
assert cloned_instance.description == original_instance.description
assert cloned_instance.cloned_from_id == original_instance.id
assert cloned_instance.created_at > original_instance.created_at
assert cloned_instance.updated_at > original_instance.updated_at

async def test_clone_instance__type_error_on_unknown_column(self, dummy_crud_service, tester_email):
"""
Expand Down

0 comments on commit 6350267

Please sign in to comment.