diff --git a/jobbergate-api/CHANGELOG.md b/jobbergate-api/CHANGELOG.md index a3cb8818..fa95d139 100644 --- a/jobbergate-api/CHANGELOG.md +++ b/jobbergate-api/CHANGELOG.md @@ -3,7 +3,7 @@ This file keeps track of all notable changes to jobbergate-api ## Unreleased - +- Added clone capability to job submissions so they can be resubmitted to the cluster when needed [PENG-1676, ASP-4597] ## 5.4.0a2 -- 2024-11-06 - Added back `libpq-dev` and `gcc` to the Dockerfile @@ -19,7 +19,6 @@ This file keeps track of all notable changes to jobbergate-api - For Job Script Template files - For Job Script Template workflow files - ## 5.3.0 -- 2024-09-09 - Fixed issue preventing the creation of a job script from an empty template file diff --git a/jobbergate-api/jobbergate_api/apps/job_submissions/routers.py b/jobbergate-api/jobbergate_api/apps/job_submissions/routers.py index 45e06534..d9f69a0d 100644 --- a/jobbergate-api/jobbergate_api/apps/job_submissions/routers.py +++ b/jobbergate-api/jobbergate_api/apps/job_submissions/routers.py @@ -87,6 +87,39 @@ async def job_submission_create( return new_job_submission +@router.post( + "/clone/{id}", + status_code=status.HTTP_201_CREATED, + response_model=JobSubmissionDetailedView, + description="Endpoint for cloning a job submission under the CREATED status for a new run on the cluster", +) +async def job_submission_clone( + id: int = Path(...), + secure_services: SecureService = Depends( + secure_services(Permissions.ADMIN, Permissions.JOB_SUBMISSIONS_CREATE, ensure_email=True) + ), +): + """Clone a job_submission given its id.""" + logger.info(f"Cloning job submission {id=}") + + original_instance = await secure_services.crud.job_submission.get(id) + + if original_instance.job_script_id is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Cannot resubmit a job submission without a parent job script", + ) + + cloned_instance = await secure_services.crud.job_submission.clone_instance( + original_instance, + owner_email=secure_services.identity_payload.email, + status=JobSubmissionStatus.CREATED, + slurm_job_id=None, + ) + + return cloned_instance + + @router.get( "/{id}", description="Endpoint to get a job_submission", diff --git a/jobbergate-api/jobbergate_api/apps/job_submissions/schemas.py b/jobbergate-api/jobbergate_api/apps/job_submissions/schemas.py index caa9f5cb..06cc074a 100644 --- a/jobbergate-api/jobbergate_api/apps/job_submissions/schemas.py +++ b/jobbergate-api/jobbergate_api/apps/job_submissions/schemas.py @@ -104,6 +104,10 @@ description="The arguments used to submit the job to the slurm queue", example=["--exclusive", "--job-name=example-job"], ), + cloned_from_id=MetaField( + description="Indicates the id this entry has been cloned from, if any.", + example=101, + ), ) @@ -159,6 +163,7 @@ class JobSubmissionBaseView(TableResource): client_id: str status: JobSubmissionStatus slurm_job_state: Optional[SlurmJobState] = None + cloned_from_id: Optional[int] = None model_config = ConfigDict(json_schema_extra=job_submission_meta_mapper) diff --git a/jobbergate-api/jobbergate_api/apps/services.py b/jobbergate-api/jobbergate_api/apps/services.py index b4591d5a..75af183d 100644 --- a/jobbergate-api/jobbergate_api/apps/services.py +++ b/jobbergate-api/jobbergate_api/apps/services.py @@ -231,6 +231,9 @@ async def clone_instance(self, original_instance: CrudModel, **incoming_data) -> data.update(incoming_data) data["cloned_from_id"] = original_instance.id + data.pop("created_at", None) + data.pop("updated_at", None) + return await self.create(**data) async def delete(self, locator: Any) -> None: diff --git a/jobbergate-api/tests/apps/job_submissions/test_routers.py b/jobbergate-api/tests/apps/job_submissions/test_routers.py index 05b50e42..d52c532f 100644 --- a/jobbergate-api/tests/apps/job_submissions/test_routers.py +++ b/jobbergate-api/tests/apps/job_submissions/test_routers.py @@ -3,6 +3,7 @@ """ import json +from datetime import datetime import pytest from fastapi import status @@ -350,6 +351,90 @@ async def test_create_job_submission_without_client_id( assert response.status_code == status.HTTP_400_BAD_REQUEST +@pytest.mark.parametrize("permission", (Permissions.ADMIN, Permissions.JOB_SUBMISSIONS_CREATE)) +async def test_clone_job_submission__success( + permission, + fill_job_script_data, + fill_job_submission_data, + client, + inject_security_header, + tester_email, + job_script_data_as_string, + synth_session, + synth_services, +): + base_job_script = await synth_services.crud.job_script.create(**fill_job_script_data()) + + job_script_file_name = "entrypoint.sh" + + await synth_services.file.job_script.upsert( + parent_id=base_job_script.id, + filename=job_script_file_name, + upload_content=job_script_data_as_string, + file_type="ENTRYPOINT", + ) + + inserted_job_script_id = base_job_script.id + slurm_job_id = 1234 + + create_data = fill_job_submission_data( + job_script_id=inserted_job_script_id, + slurm_job_id=slurm_job_id, + status=JobSubmissionStatus.ABORTED, + owner_email=tester_email, + ) + original_instance = await synth_services.crud.job_submission.create(**create_data) + + new_owner_email = "new_" + tester_email + + inject_security_header(new_owner_email, permission) + response = await client.post(f"/jobbergate/job-submissions/clone/{original_instance.id}") + + assert response.status_code == status.HTTP_201_CREATED, f"Clone failed: {response.text}" + response_data = response.json() + + assert response_data["cloned_from_id"] == original_instance.id + assert response_data["id"] != original_instance.id + assert response_data["owner_email"] == new_owner_email + assert response_data["slurm_job_id"] is None + assert response_data["status"] == JobSubmissionStatus.CREATED + assert datetime.fromisoformat(response_data["created_at"]) > original_instance.created_at + assert datetime.fromisoformat(response_data["updated_at"]) > original_instance.updated_at + + assert response_data["client_id"] == original_instance.client_id + assert response_data["description"] == original_instance.description + assert response_data["execution_directory"] == original_instance.execution_directory + assert response_data["job_script_id"] == original_instance.job_script_id + assert response_data["name"] == original_instance.name + assert response_data["sbatch_arguments"] == original_instance.sbatch_arguments + + +async def test_clone_job_submission__fail_unauthorized(fill_job_submission_data, client, synth_services): + original_instance = await synth_services.crud.job_submission.create(**fill_job_submission_data()) + + response = await client.post(f"/jobbergate/job-submissions/clone/{original_instance.id}") + + assert response.status_code == status.HTTP_401_UNAUTHORIZED + + +async def test_clone_job_submission__not_found(inject_security_header, tester_email, client, synth_services): + assert (await synth_services.crud.job_submission.count()) == 0 + inject_security_header(tester_email, Permissions.JOB_SUBMISSIONS_CREATE) + response = await client.post("/jobbergate/job-submissions/clone/0") + + assert response.status_code == status.HTTP_404_NOT_FOUND + + +async def test_clone_job_submission__no_parent_job_script( + inject_security_header, tester_email, fill_job_submission_data, client, synth_services +): + original_instance = await synth_services.crud.job_submission.create(**fill_job_submission_data()) + inject_security_header(tester_email, Permissions.JOB_SUBMISSIONS_CREATE) + response = await client.post(f"/jobbergate/job-submissions/clone/{original_instance.id}") + + assert response.status_code == status.HTTP_400_BAD_REQUEST + + @pytest.mark.parametrize("permission", (Permissions.ADMIN, Permissions.JOB_SUBMISSIONS_READ)) async def test_get_job_submission_by_id( permission, diff --git a/jobbergate-api/tests/apps/test_services.py b/jobbergate-api/tests/apps/test_services.py index 71ef746f..74aae684 100644 --- a/jobbergate-api/tests/apps/test_services.py +++ b/jobbergate-api/tests/apps/test_services.py @@ -108,6 +108,8 @@ async def test_clone_instance__success(self, dummy_crud_service, tester_email): assert cloned_instance.name == original_instance.name assert cloned_instance.description == original_instance.description assert cloned_instance.cloned_from_id == original_instance.id + assert cloned_instance.created_at > original_instance.created_at + assert cloned_instance.updated_at > original_instance.updated_at async def test_clone_instance__type_error_on_unknown_column(self, dummy_crud_service, tester_email): """