diff --git a/UI/src/app/modules/core/services/dataset/dataset-variable.service.ts b/UI/src/app/modules/core/services/dataset/dataset-variable.service.ts index e3a15ed31..5108e4cb4 100644 --- a/UI/src/app/modules/core/services/dataset/dataset-variable.service.ts +++ b/UI/src/app/modules/core/services/dataset/dataset-variable.service.ts @@ -46,9 +46,9 @@ export class DatasetVariableService { } } - getVariableById(variableId: number): Observable { + getVariableById(variableId: number, refresh= false): Observable { //simplified implementation for demo purposes - if (this.singleRequestCache.isCached(variableId)) { + if (this.singleRequestCache.isCached(variableId) && !refresh) { return this.singleRequestCache.get(variableId); } else { let getURL = DATASET_VARIABLE_URL + '/' + variableId; diff --git a/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.html b/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.html index fc03309e1..bd150f27b 100644 --- a/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.html +++ b/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.html @@ -7,7 +7,7 @@
Size: {{ getTheFileS
Upload date: {{userDataset.upload_date| date: dateFormat :timeZone}} UTC
-
+
Dataset:
@@ -94,7 +94,8 @@
Upload date: {{userDataset.upload_date| date: dateFormat :ti 'You can choose another name by clicking here. Please note, if the chosen variable name is incorrect, data validation will not be possible.' : 'We could not retrieve the applicable soil moisture variable name. Please choose the proper one from the list. A validation will fail if no proper variable is set.'}}" > - Variable: {{(variableName.shortName$|async)}} ({{variableName.prettyName$|async}}) [{{variableName.unit$|async}}] + Variable: {{(variableName.shortName$|async)}} ({{variableName.prettyName$|async}} + ) [{{variableName.unit$|async}}]
@@ -126,13 +127,13 @@
Upload date: {{userDataset.upload_date| date: dateFormat :ti
Validation list:
-
- {{ind + 1}}) {{validation.val_name}} -
+
+ {{ind + 1}}) {{validation.val_name}} +
-
- No validation has been run with this data +
+ No validation has been run with this data
@@ -153,6 +154,9 @@
Upload date: {{userDataset.upload_date| date: dateFormat :ti
+
+

Your file is still being preprocessed.

+
diff --git a/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.scss b/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.scss index 3ba0863d9..1dbe64fa5 100644 --- a/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.scss +++ b/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.scss @@ -21,12 +21,19 @@ font-size: 1rem; } -#no-validation-box{ - color: lightgray; +.no-validation-box{ + color: darkgray; height: 70%; display: flex; align-items:center; + justify-content: center; } .warning { color: red; } +@import "src/styles"; +.expiry-icons{ + cursor: auto; + font-size: 1.5rem; + padding: 5px; +} diff --git a/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.ts b/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.ts index 9623ba7eb..fc81dbf52 100644 --- a/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.ts +++ b/UI/src/app/modules/user-datasets/components/user-data-row/user-data-row.component.ts @@ -1,4 +1,4 @@ -import {Component, Input, OnInit} from '@angular/core'; +import {Component, Input, OnDestroy, OnInit} from '@angular/core'; import {UserDataFileDto} from '../../services/user-data-file.dto'; import {BehaviorSubject, Observable} from 'rxjs'; import {UserDatasetsService} from '../../services/user-datasets.service'; @@ -15,35 +15,33 @@ import {AuthService} from '../../../core/services/auth/auth.service'; templateUrl: './user-data-row.component.html', styleUrls: ['./user-data-row.component.scss'] }) -export class UserDataRowComponent implements OnInit { +export class UserDataRowComponent implements OnInit, OnDestroy { @Input() userDataset: UserDataFileDto; datasetName$: BehaviorSubject = new BehaviorSubject(''); versionName$: BehaviorSubject = new BehaviorSubject(''); - variableName: { shortName$: BehaviorSubject, prettyName$: BehaviorSubject, unit$: BehaviorSubject } = + variableName: { + shortName$: BehaviorSubject, + prettyName$: BehaviorSubject, + unit$: BehaviorSubject + } = { shortName$: new BehaviorSubject(''), prettyName$: new BehaviorSubject(''), unit$: new BehaviorSubject('') }; - variableUnit: string; datasetFieldName = 'dataset_name'; versionFieldName = 'version_name'; variableFieldName = 'variable_name'; - latFieldName = 'lat_name'; - lonFieldName = 'lon_name'; - timeFiledName = 'time_name'; editDataset = {opened: false}; editVersion = {opened: false}; editVariable = {opened: false}; - editLatName = {opened: false}; - editLonName = {opened: false}; - editTimeName = {opened: false}; dateFormat = 'medium'; timeZone = 'UTC'; + filePreprocessingStatus: any; // variables$: Observable[] = []; @@ -62,12 +60,8 @@ export class UserDataRowComponent implements OnInit { this.datasetVersionService.getVersionById(this.userDataset.version).subscribe(versionData => { this.versionName$.next(versionData.pretty_name); }); - this.datasetVariableService.getVariableById(this.userDataset.variable).subscribe(variableData => { - this.variableName.shortName$.next(variableData.short_name); - this.variableName.prettyName$.next(variableData.pretty_name); - this.variableName.unit$.next(variableData.unit); - // this.variableUnit = variableData.unit; - }); + this.updateVariable(); + this.refreshFilePreprocessingStatus(); } removeDataset(dataFileId: string): void { @@ -80,6 +74,14 @@ export class UserDataRowComponent implements OnInit { }); } + updateVariable(): void { + this.datasetVariableService.getVariableById(this.userDataset.variable, true).subscribe(variableData => { + this.variableName.shortName$.next(variableData.short_name); + this.variableName.prettyName$.next(variableData.pretty_name); + this.variableName.unit$.next(variableData.unit); + }); + } + getDataset(datasetId): Observable { return this.datasetService.getDatasetById(datasetId); } @@ -132,4 +134,31 @@ export class UserDataRowComponent implements OnInit { return this.userDatasetService.getTheSizeInProperUnits(this.userDataset.file_size); } + + refreshFilePreprocessingStatus(): void { + if (!this.userDataset.metadata_submitted) { + this.filePreprocessingStatus = setInterval(() => { + this.userDatasetService.getUserDataFileById(this.userDataset.id).subscribe(data => { + if (data.metadata_submitted) { + this.updateVariable(); + if (this.variableName.prettyName$.value !== 'none') { + this.userDatasetService.refresh.next(true); + } + } + }, + () => { + this.userDatasetService.refresh.next(true); + this.toastService.showErrorWithHeader('File preprocessing failed', + 'File could not be preprocessed. Please make sure that you are uploading a proper file and if the ' + + 'file fulfills our requirements', 10000); + }); + }, 60 * 1000); // one minute + } + } + + + ngOnDestroy(): void { + clearInterval(this.filePreprocessingStatus); + } + } diff --git a/UI/src/app/modules/user-datasets/components/user-file-upload/user-file-upload.component.ts b/UI/src/app/modules/user-datasets/components/user-file-upload/user-file-upload.component.ts index 567502f2d..6815cd54d 100644 --- a/UI/src/app/modules/user-datasets/components/user-file-upload/user-file-upload.component.ts +++ b/UI/src/app/modules/user-datasets/components/user-file-upload/user-file-upload.component.ts @@ -50,9 +50,10 @@ export class UserFileUploadComponent implements OnInit { private verifyZipContent(): void { const zip = new JSZip(); zip.loadAsync(this.file).then(contents => { + console.log(contents.files); const files = Object.keys(contents.files).filter(key => - !['nc', 'nc4', 'csv', 'yml'].includes(key.split('.').reverse()[0])); - if (files.length !== 0){ + !['nc', 'nc4', 'csv', 'yml'].includes(key.split('.').reverse()[0]) && !contents.files[key].dir); + if (files.length !== 0) { this.toastService.showErrorWithHeader('File can not be uploaded', 'The zip file you are trying to upload contains files with no acceptable extensions (i.e. netCDF or csv + yml'); this.file = null; @@ -64,7 +65,7 @@ export class UserFileUploadComponent implements OnInit { this.file = event.target.files[0]; this.isFileTooBig = false; - if (this.authService.currentUser.space_left && this.file.size > this.authService.currentUser.space_left){ + if (this.authService.currentUser.space_left && this.file.size > this.authService.currentUser.space_left) { this.isFileTooBig = true; this.file = null; return null; @@ -92,34 +93,26 @@ export class UserFileUploadComponent implements OnInit { if (this.file) { this.name = 'uploadedFile'; this.spinnerVisible = true; - const upload$ = this.userDatasetService.userFileUpload(this.name, this.file, this.fileName) + const upload$ = this.userDatasetService.userFileUpload(this.name, this.file, this.fileName, this.metadataForm.value) .pipe(finalize(() => this.reset)); this.uploadSub = upload$.subscribe(event => { if (event.type === HttpEventType.UploadProgress) { this.uploadProgress.next(Math.round(100 * (event.loaded / event.total))); } else if (event.type === HttpEventType.Response) { - this.userDatasetService.sendMetadata(this.metadataForm.value, event.body.id).subscribe(() => { - this.userDatasetService.refresh.next(true); - this.authService.init(); - this.resetFile(); - }, - (message) => { - this.spinnerVisible = false; - this.toastService.showErrorWithHeader('Metadata not saved.', - `${message.error.error}.\n Provided metadata could not be saved. Please try again or contact our team.`); - }, - () => { - this.spinnerVisible = false; - this.metadataForm.reset(''); - }); - } else { + this.userDatasetService.refresh.next(true); + this.authService.init(); + this.resetFile(); } }, (message) => { this.spinnerVisible = false; this.toastService.showErrorWithHeader('File not saved', `${message.error.error}.\n File could not be uploaded. Please try again or contact our team.`); + }, + () => { + this.spinnerVisible = false; + this.metadataForm.reset(''); } ); } diff --git a/UI/src/app/modules/user-datasets/services/user-data-file.dto.ts b/UI/src/app/modules/user-datasets/services/user-data-file.dto.ts index d10e5523b..0fedb270c 100644 --- a/UI/src/app/modules/user-datasets/services/user-data-file.dto.ts +++ b/UI/src/app/modules/user-datasets/services/user-data-file.dto.ts @@ -10,7 +10,8 @@ export class UserDataFileDto { public upload_date: Date, public is_used_in_validation: boolean, public file_size: number, - public validation_list: {val_id: string, val_name: string}[] + public validation_list: {val_id: string, val_name: string}[], + public metadata_submitted: boolean ) { } } diff --git a/UI/src/app/modules/user-datasets/services/user-datasets.service.ts b/UI/src/app/modules/user-datasets/services/user-datasets.service.ts index 7ca24be4a..65c11b33c 100644 --- a/UI/src/app/modules/user-datasets/services/user-datasets.service.ts +++ b/UI/src/app/modules/user-datasets/services/user-datasets.service.ts @@ -9,11 +9,9 @@ const urlPrefix = environment.API_URL + 'api'; const uploadUserDataUrl: string = urlPrefix + '/upload-user-data'; const userDataListUrl: string = urlPrefix + '/get-list-of-user-data-files'; const userDataDeleteUrl: string = urlPrefix + '/delete-user-datafile'; -const userDataMetadataUrl: string = urlPrefix + '/user-file-metadata'; -const userDataTestUrl: string = urlPrefix + '/test-user-dataset'; const updateMetadataUrl: string = urlPrefix + '/update-metadata'; +const userDataFileUrl: string = urlPrefix + '/get-user-file-by-id'; -// const validateUserDataUrl: string = urlPrefix + '/validate-user-data'; const csrfToken = '{{csrf_token}}'; const headers = new HttpHeaders({'X-CSRFToken': csrfToken}); @@ -28,32 +26,29 @@ export class UserDatasetsService { constructor(private httpClient: HttpClient) { } - userFileUpload(name, file, fileName): Observable { + userFileUpload(name, file, fileName, metadata): Observable { const formData = new FormData(); formData.append(name, file, fileName); const uploadUrl = uploadUserDataUrl + '/' + fileName + '/'; - return this.httpClient.post(uploadUrl, formData.get(name), {headers, reportProgress: true, observe: 'events', responseType: 'json'}); + const fileHeaders = new HttpHeaders({'X-CSRFToken': csrfToken, fileMetadata: JSON.stringify(metadata)}); + return this.httpClient.post(uploadUrl, formData.get(name), + {headers: fileHeaders, reportProgress: true, observe: 'events', responseType: 'json'}); } getUserDataList(): Observable{ return this.httpClient.get(userDataListUrl); } + getUserDataFileById(fileId: string): Observable{ + const userDataFileUrlWithId = userDataFileUrl + '/' + fileId + '/'; + return this.httpClient.get(userDataFileUrlWithId); + } + deleteUserData(dataFileId: string): Observable{ const deleteUrl = userDataDeleteUrl + '/' + dataFileId + '/'; return this.httpClient.delete(deleteUrl, {headers}); } - sendMetadata(metadataForm: any, fileId: string): Observable { - const metadataUrl = userDataMetadataUrl + '/' + fileId + '/'; - return this.httpClient.post(metadataUrl, metadataForm, {observe: 'response', responseType: 'json'}); - } - - testDataset(dataFileId: string): Observable{ - const testUrl = userDataTestUrl + '/' + dataFileId + '/'; - return this.httpClient.get(testUrl); - } - updateMetadata(fieldName: string, fieldValue: string, dataFileId: string): Observable{ const updateUrl = updateMetadataUrl + '/' + dataFileId + '/'; return this.httpClient.put(updateUrl, {field_name: fieldName, field_value: fieldValue}); @@ -76,12 +71,4 @@ export class UserDatasetsService { return `${Math.round(properSize * 10) / 10} ${units}`; } - - // userFileValidate(name, file, filename): Observable { - // const formData = new FormData(); - // formData.append(name, file, filename); - // const validateUserDataUrlWithFileName = validateUserDataUrl + '/' + file.name + '/'; - // return this.httpClient.put(validateUserDataUrlWithFileName, {file: formData.getAll(name)}); - // } - } diff --git a/api/tests/test_upload_user_data_view.py b/api/tests/test_upload_user_data_view.py index bd0ba42d8..61d05390c 100644 --- a/api/tests/test_upload_user_data_view.py +++ b/api/tests/test_upload_user_data_view.py @@ -1,12 +1,15 @@ +import json import logging from api.tests.test_helper import * -from rest_framework.test import APIClient, APITestCase +from rest_framework.test import APIClient +from django.test.testcases import TransactionTestCase from django.urls import reverse from validator.models import UserDatasetFile, DataVariable from django.conf import settings import shutil from pathlib import Path from api.variable_and_field_names import * +from unittest.mock import patch FILE = 'file' FORMAT_MULTIPART = 'multipart' @@ -29,27 +32,23 @@ def _clean_up_data(file_entry): assert not os.path.exists(outdir) -def _update_file_entry(file_entry): - new_dataset = Dataset() - new_dataset.save() - new_version = DatasetVersion() - new_version.save() - new_variable = DataVariable() - new_variable.save() +def _get_headers(metadata): + return { + 'HTTP_FILEMETADATA': json.dumps(metadata) + } - file_entry.dataset = new_dataset - file_entry.version = new_version - file_entry.variable = new_variable - file_entry.all_variables = [ - {'name': 'soil_moisture', 'long_name': 'Soil Moisture'}, - {'name': 'none', 'long_name': 'Some weird variable'} - ] - file_entry.save() +def mock_preprocess_file(*args, **kwargs): + return -class TestUploadUserDataView(APITestCase): + +class TestUploadUserDataView(TransactionTestCase): + serialized_rollback = True __logger = logging.getLogger(__name__) + databases = '__all__' + allow_database_queries = True + def setUp(self): self.auth_data, self.test_user = create_test_user() self.second_user_data, self.second_test_user = create_alternative_user() @@ -71,19 +70,54 @@ def setUp(self): self.not_netcdf_file = f'{self.user_data_path}/{self.not_netcdf_file_name}' self.upload_data_url_name = 'Upload user data' - self.post_metadata_url_name = 'Post User Data File Metadata' + # self.post_metadata_url_name = 'Post User Data File Metadata' self.get_user_data_url_list_name = "Get User Data Files" self.delete_data_url_name = 'Delete User Data File' self.update_metadata_url_name = 'Update metadata' + self.get_user_file_by_id_url_name = 'Get user file by ID' + + self.metadata_correct = { + USER_DATA_DATASET_FIELD_NAME: 'test_dataset', + USER_DATA_DATASET_FIELD_PRETTY_NAME: 'test_dataset_pretty_name', + USER_DATA_VERSION_FIELD_NAME: 'test_version', + USER_DATA_VERSION_FIELD_PRETTY_NAME: 'test_version_pretty_name' + } + + self.partial_metadata_correct = { + USER_DATA_DATASET_FIELD_NAME: 'test_dataset', + USER_DATA_DATASET_FIELD_PRETTY_NAME: None, + USER_DATA_VERSION_FIELD_NAME: 'test_version', + USER_DATA_VERSION_FIELD_PRETTY_NAME: None + } + + self.metadata_incorrect = { + USER_DATA_DATASET_FIELD_NAME: None, + USER_DATA_DATASET_FIELD_PRETTY_NAME: 'test_dataset', + USER_DATA_VERSION_FIELD_NAME: None, + USER_DATA_VERSION_FIELD_PRETTY_NAME: 'test_version' + } + + self.headers_correct = { + 'HTTP_FILEMETADATA': json.dumps(self.metadata_correct) + } + self.headers_partial_correct = { + 'HTTP_FILEMETADATA': json.dumps(self.metadata_correct) + } def _remove_user_datafiles(self, username): user_data_path = f'{self.user_data_path}/{username}' shutil.rmtree(user_data_path) - def test_file_size_limit(self): - response = self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) - print(dir(self.test_user)) + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_file_size_limit(self, mock_preprocess_file): + headers = _get_headers(self.metadata_correct) + + response = self.client.post( + reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), + {FILE: self.netcdf_file}, + format=FORMAT_MULTIPART, + **headers) + file_entry = UserDatasetFile.objects.get(id=response.json()['id']) # check the size limit assigned to the user @@ -98,7 +132,7 @@ def test_file_size_limit(self): self.test_user.save() response = self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) + {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) assert response.json()['error'] == 'File is too big' assert response.status_code == 500 @@ -107,23 +141,32 @@ def test_file_size_limit(self): self.test_user.save() response = self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) + {FILE: self.netcdf_file}, format=FORMAT_MULTIPART, **headers) - assert response.status_code == 200 + assert response.status_code == 201 assert not self.test_user.space_left file_entry = UserDatasetFile.objects.get(id=response.json()['id']) file_entry.delete() - def test_get_list_of_user_data_files(self): + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_get_list_of_user_data_files(self, mock_preprocess_file): + headers = _get_headers(self.metadata_correct) + # post the same file 3 times, to create 3 different entries - self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) - self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) - self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) + post_response_1 = self.client.post( + reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), + {FILE: self.netcdf_file}, format=FORMAT_MULTIPART, **headers) + assert post_response_1.status_code == 201 + post_response_2 = self.client.post( + reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), + {FILE: self.netcdf_file}, format=FORMAT_MULTIPART, **headers) + assert post_response_2.status_code == 201 + post_response_3 = self.client.post( + reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), + {FILE: self.netcdf_file}, format=FORMAT_MULTIPART, **headers) + assert post_response_3.status_code == 201 response = self.client.get(reverse(self.get_user_data_url_list_name)) existing_files = response.json() @@ -157,16 +200,58 @@ def test_get_list_of_user_data_files(self): assert len(UserDatasetFile.objects.all()) == 0 - def test_delete_user_dataset_and_file(self): - # posting a file to be removed + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_get_user_data_file_by_id(self, mock_preprocess_file): + # post the same file 3 times, to create 3 different entries + headers = _get_headers(self.metadata_correct) post_response = self.client.post( reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) - file_entry_id = post_response.json()['id'] - file_entry = UserDatasetFile.objects.get(pk=file_entry_id) + {FILE: self.netcdf_file}, format=FORMAT_MULTIPART, **headers) + assert post_response.status_code == 201 + + post_data = post_response.json() + file_id = post_data.get('id') + file_entry = UserDatasetFile.objects.get(id=file_id) + + response = self.client.get(reverse(self.get_user_file_by_id_url_name, kwargs={URL_FILE_UUID: file_id})) - _update_file_entry(file_entry) + assert response.status_code == 200 + assert post_data.get('file_name') == file_entry.file_name + assert post_data.get('owner') == file_entry.owner.id + assert post_data.get('dataset') == file_entry.dataset.id + assert post_data.get('version') == file_entry.version.id + assert post_data.get('all_variables') is None + assert post_data.get('metadata_submitted') is False # it's submitted after running a preprocessing function + + self.client.logout() + self.client.login(**self.second_user_data) + + response = self.client.get(reverse(self.get_user_file_by_id_url_name, kwargs={URL_FILE_UUID: file_id})) + assert response.status_code == 404 + assert response.json().get('detail') == 'Not found.' + + response = self.client.get( + reverse(self.get_user_file_by_id_url_name, kwargs={URL_FILE_UUID: '00000000-6c36-0000-0000-599e9a3840ca'})) + + assert response.status_code == 404 + assert response.json().get('detail') == 'Not found.' + + self.client.logout() + self.client.login(**self.auth_data) + file_entry.delete() + + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_delete_user_dataset_and_file(self, mock_preprocess_file): + # posting a file to be removed + + post_response = self.client.post( + reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), + {FILE: self.netcdf_file}, + format=FORMAT_MULTIPART, + **_get_headers(self.metadata_correct)) + + file_entry_id = post_response.json()['id'] assert len(Dataset.objects.all()) == 1 assert len(DatasetVersion.objects.all()) == 1 assert len(DataVariable.objects.all()) == 1 @@ -199,11 +284,14 @@ def test_delete_user_dataset_and_file(self): assert len(UserDatasetFile.objects.all()) == 0 assert len(os.listdir(self.test_user_data_path)) == 0 - def test_upload_user_data_nc_correct(self): + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_upload_user_data_nc_correct(self, mock_preprocess_file): response = self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) + {FILE: self.netcdf_file}, + format=FORMAT_MULTIPART, + **_get_headers(self.metadata_correct)) - assert response.status_code == 200 + assert response.status_code == 201 existing_files = UserDatasetFile.objects.all() @@ -214,10 +302,11 @@ def test_upload_user_data_nc_correct(self): assert len(os.listdir(self.test_user_data_path)) == 1 assert os.path.exists(file_dir) - # metadata hasn't been posted yet, so most of the fields should be empty - assert file_entry.dataset is None - assert file_entry.version is None - assert file_entry.variable is None + assert file_entry.dataset.short_name == self.metadata_correct.get(USER_DATA_DATASET_FIELD_NAME) + assert file_entry.version.short_name == self.metadata_correct.get(USER_DATA_VERSION_FIELD_NAME) + # this one is none, because it's taken from the file and the file preprocessing is skipped for the purpose of + # testing + assert file_entry.variable.short_name == 'none' assert file_entry.owner == self.test_user file_entry.delete() @@ -227,12 +316,15 @@ def test_upload_user_data_nc_correct(self): assert not os.path.exists(file_dir) assert len(os.listdir(self.test_user_data_path)) == 0 - def test_upload_user_data_zip_netcdf_correct(self): + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_upload_user_data_zip_netcdf_correct(self, mock_preprocess_file): response = self.client.post( reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.zipped_netcdf_file_name}), - {FILE: self.zipped_netcdf}, format=FORMAT_MULTIPART) + {FILE: self.zipped_netcdf}, + format=FORMAT_MULTIPART, + **_get_headers(self.metadata_correct)) - assert response.status_code == 200 + assert response.status_code == 201 existing_files = UserDatasetFile.objects.all() @@ -251,12 +343,16 @@ def test_upload_user_data_zip_netcdf_correct(self): assert not os.path.exists(file_dir) assert len(os.listdir(self.test_user_data_path)) == 0 - def test_upload_user_data_zip_csv_correct(self): + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_upload_user_data_zip_csv_correct(self, mock_preprocess_file): response = self.client.post( reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.zipped_csv_file_name}), - {FILE: self.zipped_csv}, format=FORMAT_MULTIPART) + {FILE: self.zipped_csv}, + format=FORMAT_MULTIPART, + **_get_headers(self.metadata_correct) + ) - assert response.status_code == 200 + assert response.status_code == 201 existing_files = UserDatasetFile.objects.all() @@ -279,7 +375,9 @@ def test_upload_user_data_not_porper_extension(self): file_to_upload = _create_test_file(self.not_netcdf_file) response = self.client.post( reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.not_netcdf_file_name}), - {FILE: file_to_upload}, format=FORMAT_MULTIPART) + {FILE: file_to_upload}, + format=FORMAT_MULTIPART, + **_get_headers(self.metadata_correct)) # assert False assert response.status_code == 500 @@ -288,11 +386,15 @@ def test_upload_user_data_not_porper_extension(self): assert len(existing_files) == 0 assert len(os.listdir(self.test_user_data_path)) == 0 - def test_upload_user_data_with_wrong_name(self): + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_upload_user_data_with_wrong_name(self, mock_preprocess_file): file_to_upload = _create_test_file(self.not_netcdf_file) response = self.client.post( reverse(self.upload_data_url_name, kwargs={URL_FILENAME: 'wrong_name'}), - {FILE: file_to_upload}, format=FORMAT_MULTIPART) + {FILE: file_to_upload}, + format=FORMAT_MULTIPART, + **_get_headers(self.metadata_correct) + ) # assert False assert response.status_code == 500 @@ -301,224 +403,32 @@ def test_upload_user_data_with_wrong_name(self): assert len(existing_files) == 0 assert len(os.listdir(self.test_user_data_path)) == 0 - def test_post_user_file_metadata_and_preprocess_file_correct(self): - # I am posting the file to create the proper dataset entry - response = self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) - assert response.status_code == 200 - - # checking if the file entry got saved - existing_files = UserDatasetFile.objects.all() - assert len(existing_files) == 1 - - file_entry = existing_files[0] - # I need to replace the posted file with the original one, because the api post method somehow corrupts netCDFs - # which is not the problem when I post them via angular and I don't want to deal with it right now. - shutil.copy2(self.netcdf_file, file_entry.file.path) - - metadata_correct = { - USER_DATA_DATASET_FIELD_NAME: 'test_dataset', - USER_DATA_DATASET_FIELD_PRETTY_NAME: 'test_dataset_pretty_name', - USER_DATA_VERSION_FIELD_NAME: 'test_version', - USER_DATA_VERSION_FIELD_PRETTY_NAME: 'test_version_pretty_name' - } - # posting metadata as those from the metadata form and checking if it has been done - response_metadata = self.client.post( - reverse(self.post_metadata_url_name, kwargs={URL_FILE_UUID: file_entry.id}), - metadata_correct, format='json') - assert response_metadata.status_code == 200 - - # re-querying file entry - file_entry = UserDatasetFile.objects.get(id=response.json()['id']) - # checking if the posted metadata is proper - assert file_entry.dataset.short_name == metadata_correct[USER_DATA_DATASET_FIELD_NAME] - assert file_entry.dataset.pretty_name == metadata_correct[USER_DATA_DATASET_FIELD_PRETTY_NAME] - assert file_entry.dataset == Dataset.objects.all().last() - assert file_entry.version.short_name == metadata_correct[USER_DATA_VERSION_FIELD_NAME] - assert file_entry.version.pretty_name == metadata_correct[USER_DATA_VERSION_FIELD_PRETTY_NAME] - assert file_entry.version == DatasetVersion.objects.all().last() - - # checking if the proper metadata was retrieved from the file - assert file_entry.variable == DataVariable.objects.all().last() - # the values below are defined in the test file, so if we change the test file we may have to update them - assert file_entry.variable.short_name == 'soil_moisture' - - # check if the timeseries files were created: - timeseries_dir = file_entry.get_raw_file_path + '/timeseries' - assert os.path.exists(timeseries_dir) - assert len(os.listdir(timeseries_dir)) != 0 - - file_entry.delete() - assert len(UserDatasetFile.objects.all()) == 0 - - def test_post_metadata_and_preprocess_file_zip_netcdf(self): - # I am posting the file to create the proper dataset entry - response = self.client.post( - reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.zipped_netcdf_file_name}), - {FILE: self.zipped_netcdf}, format=FORMAT_MULTIPART) - assert response.status_code == 200 - - # checking if the file entry got saved - existing_files = UserDatasetFile.objects.all() - assert len(existing_files) == 1 - - file_entry = existing_files[0] - # I need to replace the posted file with the original one, because the api post method somehow corrupts files - # which is not the problem when I post them via angular and I don't want to deal with it right now. - shutil.copy2(self.zipped_netcdf, file_entry.file.path) - - metadata_correct = { - USER_DATA_DATASET_FIELD_NAME: 'test_dataset', - USER_DATA_DATASET_FIELD_PRETTY_NAME: 'test_dataset_pretty_name', - USER_DATA_VERSION_FIELD_NAME: 'test_version', - USER_DATA_VERSION_FIELD_PRETTY_NAME: 'test_version_pretty_name' - } - # posting metadata as those from the metadata form and checking if it has been done - response_metadata = self.client.post( - reverse(self.post_metadata_url_name, kwargs={URL_FILE_UUID: file_entry.id}), - metadata_correct, format='json') - assert response_metadata.status_code == 200 - - # re-querying file entry - file_entry = UserDatasetFile.objects.get(id=response.json()['id']) - # checking if the posted metadata is proper - assert file_entry.dataset.short_name == metadata_correct[USER_DATA_DATASET_FIELD_NAME] - assert file_entry.dataset.pretty_name == metadata_correct[USER_DATA_DATASET_FIELD_PRETTY_NAME] - assert file_entry.dataset == Dataset.objects.all().last() - assert file_entry.version.short_name == metadata_correct[USER_DATA_VERSION_FIELD_NAME] - assert file_entry.version.pretty_name == metadata_correct[USER_DATA_VERSION_FIELD_PRETTY_NAME] - assert file_entry.version == DatasetVersion.objects.all().last() - - # checking if the proper metadata was retrieved from the file - assert file_entry.variable == DataVariable.objects.all().last() - # the values below are defined in the test file, so if we change the test file we may have to update them - assert file_entry.variable.short_name == 'sm' - - # check if the timeseries files were created: - timeseries_dir = file_entry.get_raw_file_path + '/timeseries' - assert os.path.exists(timeseries_dir) - assert len(os.listdir(timeseries_dir)) != 0 - - file_entry.delete() - assert len(UserDatasetFile.objects.all()) == 0 - - def test_post_metadata_and_preprocess_file_zip_csv(self): - # I am posting the file to create the proper dataset entry - response = self.client.post( - reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.zipped_csv_file_name}), - {FILE: self.zipped_csv}, format=FORMAT_MULTIPART) - assert response.status_code == 200 - - # checking if the file entry got saved - existing_files = UserDatasetFile.objects.all() - assert len(existing_files) == 1 - - file_entry = existing_files[0] - # I need to replace the posted file with the original one, because the api post method somehow corrupts files - # which is not the problem when I post them via angular and I don't want to deal with it right now. - shutil.copy2(self.zipped_csv, file_entry.file.path) - - metadata_correct = { - USER_DATA_DATASET_FIELD_NAME: 'test_dataset', - USER_DATA_DATASET_FIELD_PRETTY_NAME: 'test_dataset_pretty_name', - USER_DATA_VERSION_FIELD_NAME: 'test_version', - USER_DATA_VERSION_FIELD_PRETTY_NAME: 'test_version_pretty_name' - } - # posting metadata as those from the metadata form and checking if it has been done - response_metadata = self.client.post( - reverse(self.post_metadata_url_name, kwargs={URL_FILE_UUID: file_entry.id}), - metadata_correct, format='json') - assert response_metadata.status_code == 200 - - # re-querying file entry - file_entry = UserDatasetFile.objects.get(id=response.json()['id']) - # checking if the posted metadata is proper - assert file_entry.dataset.short_name == metadata_correct[USER_DATA_DATASET_FIELD_NAME] - assert file_entry.dataset.pretty_name == metadata_correct[USER_DATA_DATASET_FIELD_PRETTY_NAME] - assert file_entry.dataset == Dataset.objects.all().last() - assert file_entry.version.short_name == metadata_correct[USER_DATA_VERSION_FIELD_NAME] - assert file_entry.version.pretty_name == metadata_correct[USER_DATA_VERSION_FIELD_PRETTY_NAME] - assert file_entry.version == DatasetVersion.objects.all().last() - - # checking if the proper metadata was retrieved from the file - assert file_entry.variable == DataVariable.objects.all().last() - # the values below are defined in the test file, so if we change the test file we may have to update them - assert file_entry.variable.short_name == 'soil_moisture' - - # check if the timeseries files were created: - timeseries_dir = file_entry.get_raw_file_path + '/timeseries' - assert os.path.exists(timeseries_dir) - assert len(os.listdir(timeseries_dir)) != 0 - - file_entry.delete() - assert len(UserDatasetFile.objects.all()) == 0 - def test_post_incorrect_metadata_form(self): # I am posting the file to create the proper dataset entry, I don't need to copy it, as it won't be processed response = self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) - assert response.status_code == 200 - - # checking if the file entry got saved - existing_files = UserDatasetFile.objects.all() - assert len(existing_files) == 1 - - file_entry = existing_files[0] - metadata_correct = { - USER_DATA_DATASET_FIELD_NAME: None, - USER_DATA_DATASET_FIELD_PRETTY_NAME: 'test_dataset_pretty_name', - USER_DATA_VERSION_FIELD_NAME: None, - USER_DATA_VERSION_FIELD_PRETTY_NAME: 'test_version_pretty_name' - } - # posting metadata as those from the metadata form and checking if it has been done - response_metadata = self.client.post( - reverse(self.post_metadata_url_name, kwargs={URL_FILE_UUID: file_entry.id}), - metadata_correct, format='json') - assert response_metadata.status_code == 500 - existing_files = UserDatasetFile.objects.all() - assert len(existing_files) == 0 - - def test_preprocess_corrupted_file(self): - """ For some reason, when a netCDF is posted with django rest client it gets corrupted, so with this test I want - to check two things: 1. if the corrupted file exception is handled, 2. if at some point they change something - and the file won't be spoiled anymore. If this test starts failing at some point that might be the indicator""" - - # I am posting the file to create the proper dataset entry - response = self.client.post(reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) - assert response.status_code == 200 - - # checking if the file entry got saved - existing_files = UserDatasetFile.objects.all() - assert len(existing_files) == 1 + {FILE: self.netcdf_file}, + format=FORMAT_MULTIPART, + **_get_headers(self.metadata_incorrect)) + assert response.status_code == 500 - file_entry = existing_files[0] - # pretty names are not required - metadata_correct = { - USER_DATA_DATASET_FIELD_NAME: 'test_dataset', - USER_DATA_DATASET_FIELD_PRETTY_NAME: None, - USER_DATA_VERSION_FIELD_NAME: 'test_version', - USER_DATA_VERSION_FIELD_PRETTY_NAME: None - } - # posting metadata as those from the metadata form and checking if it has been done - response_metadata = self.client.post( - reverse(self.post_metadata_url_name, kwargs={URL_FILE_UUID: file_entry.id}), - metadata_correct, format='json') - assert response_metadata.status_code == 500 - assert response_metadata.json()['error'] == 'Provided file does not fulfill requirements.' existing_files = UserDatasetFile.objects.all() assert len(existing_files) == 0 - def test_update_metadata(self): + @patch('api.views.upload_user_data_view.preprocess_file', side_effect=mock_preprocess_file) + def test_update_metadata(self, mock_preprocess_file): file_post_response = self.client.post( reverse(self.upload_data_url_name, kwargs={URL_FILENAME: self.netcdf_file_name}), - {FILE: self.netcdf_file}, format=FORMAT_MULTIPART) + {FILE: self.netcdf_file}, + format=FORMAT_MULTIPART, + **_get_headers(self.metadata_correct)) - assert file_post_response.status_code == 200 + assert file_post_response.status_code == 201 file_id = file_post_response.json()['id'] file_entry = UserDatasetFile.objects.get(pk=file_id) - _update_file_entry(file_entry) + file_entry.all_variables = [{"name": "soil_moisture", "units": "%", "long_name": "Soil Moisture"}, + {"name": "ssm_noise", "units": "%", "long_name": "Surface Soil Moisture Noise"}] + file_entry.save() # update variable name variable_new_name = 'soil_moisture' diff --git a/api/urls.py b/api/urls.py index 36cabe6f3..4ab09221b 100644 --- a/api/urls.py +++ b/api/urls.py @@ -24,7 +24,7 @@ extend_result, publish_result, add_validation, remove_validation, get_publishing_form, copy_validation_results from api.views.serving_file_view import * from api.views.local_api_view import get_list_of_countries -from api.views.settings_view import settings +from api.views.settings_view import backend_settings from api.views.upload_user_data_view import * # schema_view = get_schema_view( @@ -102,7 +102,7 @@ path('sign-up', signup_post, name='Sign up'), path('user-update', user_update, name='User update'), path('user-delete', user_delete, name='User delete'), - path('settings', settings, name="Settings"), + path('settings', backend_settings, name="Settings"), path('get-graphic-file', get_graphic_file, name='Get graphic file'), path('publishing-form', get_publishing_form, name='Get publishing form'), path('copy-validation', copy_validation_results, name='Copy validation results'), @@ -112,8 +112,8 @@ path('upload-user-data//', upload_user_data, name='Upload user data'), path('get-list-of-user-data-files', get_list_of_user_data_files, name='Get User Data Files'), path('delete-user-datafile//', delete_user_dataset_and_file, name='Delete User Data File'), - path('user-file-metadata//', post_user_file_metadata_and_preprocess_file, - name='Post User Data File Metadata'), + path('get-user-file-by-id//', get_user_data_file_by_id, + name='Get user file by ID'), path('update-metadata//', update_metadata, name='Update metadata'), path('scaling-methods', get_scaling_methods, name='Scaling methods') # path('test-user-dataset//', test_user_data, name='Test user data'), diff --git a/api/views/dataset_view.py b/api/views/dataset_view.py index 8692bc37a..ccfb992c0 100644 --- a/api/views/dataset_view.py +++ b/api/views/dataset_view.py @@ -17,9 +17,7 @@ def dataset(request): user = request.user datasets = Dataset.objects.filter(user=None) if user_data and user.is_authenticated: - user_datasets = Dataset.objects.filter(user=user).filter(user_dataset__isnull=False) - print(user_datasets) - print(Dataset.objects.filter(user=user).filter(user_dataset__isnull=True)) + user_datasets = Dataset.objects.filter(user=user).filter(user_dataset__isnull=False).exclude(storage_path='') datasets = datasets.union(user_datasets) serializer = DatasetSerializer(datasets, many=True) diff --git a/api/views/settings_view.py b/api/views/settings_view.py index 4993a549c..4dbcdd674 100644 --- a/api/views/settings_view.py +++ b/api/views/settings_view.py @@ -10,7 +10,7 @@ @api_view(['GET']) @permission_classes([AllowAny]) -def settings(request): +def backend_settings(request): settings_model = Settings.objects.all() serializer = SettingsSerializer(settings_model, many=True) diff --git a/api/views/upload_user_data_view.py b/api/views/upload_user_data_view.py index 606a0422b..359a1ee9b 100644 --- a/api/views/upload_user_data_view.py +++ b/api/views/upload_user_data_view.py @@ -1,3 +1,5 @@ +import json + from django.http import JsonResponse, HttpResponse from django.shortcuts import get_object_or_404 from rest_framework import status @@ -12,8 +14,11 @@ from validator.models import UserDatasetFile, DatasetVersion, DataVariable, Dataset from api.variable_and_field_names import * import logging -from qa4sm_preprocessing.utils import * from validator.validation.globals import USER_DATASET_MIN_ID, USER_DATASET_VERSION_MIN_ID, USER_DATASET_VARIABLE_MIN_ID +from multiprocessing.context import Process + +from validator.validation.user_data_processing import user_data_preprocessing +from django.db import transaction, connections __logger = logging.getLogger(__name__) @@ -72,14 +77,14 @@ def create_version_entry(version_name, version_pretty_name, dataset_pretty_name, raise Exception(version_serializer.errors) -def create_dataset_entry(dataset_name, dataset_pretty_name, version, variable, user, file_entry): +def create_dataset_entry(dataset_name, dataset_pretty_name, version, variable, user): # TODO: update variables current_max_id = Dataset.objects.all().last().id if Dataset.objects.all() else 0 dataset_data = { 'short_name': dataset_name, 'pretty_name': dataset_pretty_name, 'help_text': f'Dataset {dataset_pretty_name} provided by user {user}.', - 'storage_path': file_entry.get_raw_file_path, + 'storage_path': '', 'detailed_description': 'Data provided by a user', 'source_reference': 'Data provided by a user', 'citation': 'Data provided by a user', @@ -131,43 +136,17 @@ def update_file_entry(file_entry, dataset, version, variable, user, all_variable return response -def get_sm_variable_names(variables): - key_sm_words = ['water', 'soil', 'moisture', 'soil_moisture', 'sm', 'ssm', 'water_content', 'soil', 'moisture', - 'swi', 'swvl1', 'soilmoi'] - key_error_words = ['error', 'bias', 'uncertainty'] - candidates = [variable for variable in variables if any([word in variable.lower() for word in key_sm_words]) - and not any([word in variable.lower() for word in key_error_words])] - - sm_variables = [{ - 'name': var, - 'long_name': variables[var].get("long_name", var), - 'units': variables[var].get("units") if variables[var].get("units") else 'n.a.' - } for var in candidates] - - if len(sm_variables) > 0: - return sm_variables[0] - else: - return {'name': '--none--', - 'long_name': '--none--', - 'units': 'n.a.'} - - -def get_variables_from_the_reader(reader): - variables = reader.variable_description() - variables_dict_list = [ - {'name': variable, - 'long_name': variables[variable].get("long_name", variables[variable].get("standard_name", variable)), - 'units': variables[variable].get("units") if variables[variable].get("units") else 'n.a.' - } - for variable in variables - ] - - return variables_dict_list +def preprocess_file(file_serializer, file_raw_path): + connections.close_all() + p = Process(target=user_data_preprocessing, kwargs={"file_uuid": file_serializer.data['id'], + "file_path": file_raw_path + file_serializer.data[ + 'file_name'], + "file_raw_path": file_raw_path}) + p.start() + return # API VIEWS - - @api_view(['GET']) @permission_classes([IsAuthenticated]) def get_list_of_user_data_files(request): @@ -181,6 +160,18 @@ def get_list_of_user_data_files(request): return JsonResponse({'message': 'We could not return the list of your datafiles'}, status=500) +@api_view(['GET']) +@permission_classes([IsAuthenticated]) +def get_user_data_file_by_id(request, file_uuid): + file_entry = get_object_or_404(UserDatasetFile, pk=file_uuid) + + if file_entry.owner != request.user: + return JsonResponse({'detail': 'Not found.'}, status=404) + + serializer = UploadSerializer(file_entry, many=False) + return JsonResponse(serializer.data, status=200, safe=False) + + @api_view(['DELETE']) @permission_classes([IsAuthenticated]) def delete_user_dataset_and_file(request, file_uuid): @@ -233,60 +224,6 @@ def __init__(self, message): self.message = message -@api_view(['PUT', 'POST']) -@permission_classes([IsAuthenticated]) -def post_user_file_metadata_and_preprocess_file(request, file_uuid): - serializer = UserFileMetadataSerializer(data=request.data) - file_entry = get_object_or_404(UserDatasetFile, id=file_uuid) - file_entry.metadata_submitted = True - file_entry.save() - - if serializer.is_valid(): - # first the file will be preprocessed - try: - gridded_reader = preprocess_user_data(file_entry.file.path, file_entry.get_raw_file_path + '/timeseries') - except Exception as e: - print(e, type(e)) - file_entry.delete() - return JsonResponse({'error': 'Provided file does not fulfill requirements.'}, status=500, safe=False) - - sm_variable = get_sm_variable_names(gridded_reader.variable_description()) - all_variables = get_variables_from_the_reader(gridded_reader) - - dataset_name = request.data[USER_DATA_DATASET_FIELD_NAME] - dataset_pretty_name = request.data[USER_DATA_DATASET_FIELD_PRETTY_NAME] if request.data[ - USER_DATA_DATASET_FIELD_PRETTY_NAME] else dataset_name - version_name = request.data[USER_DATA_VERSION_FIELD_NAME] - version_pretty_name = request.data[USER_DATA_VERSION_FIELD_PRETTY_NAME] if request.data[ - USER_DATA_VERSION_FIELD_PRETTY_NAME] else version_name - # - # creating version entry - new_version = create_version_entry(version_name, version_pretty_name, dataset_pretty_name, request.user) - # creating variable entry - - new_variable = create_variable_entry(sm_variable['name'], sm_variable['long_name'], dataset_pretty_name, - request.user, sm_variable['units']) - # for sm_variable in sm_variables: - # new_variable = create_variable_entry( - # sm_variable['name'], - # sm_variable['long_name'], - # dataset_pretty_name, - # request.user) - # creating dataset entry - new_dataset = create_dataset_entry(dataset_name, dataset_pretty_name, new_version, new_variable, request.user, - file_entry) - # updating file entry - file_data_updated = update_file_entry(file_entry, new_dataset, new_version, new_variable, request.user, - all_variables) - - return JsonResponse(file_data_updated['data'], status=file_data_updated['status'], safe=False) - - else: - print(serializer.errors) - file_entry.delete() - return JsonResponse(serializer.errors, status=500, safe=False) - - def _verify_file_extension(file_name): return file_name.endswith('.nc4') or file_name.endswith('.nc') or file_name.endswith('.zip') @@ -304,20 +241,53 @@ def upload_user_data(request, filename): if request.user.space_left and file.size > request.user.space_left: return JsonResponse({'error': 'File is too big'}, status=500, safe=False) + # get metadata + metadata = json.loads(request.META.get('HTTP_FILEMETADATA')) + + serializer = UserFileMetadataSerializer(data=metadata) + + if not serializer.is_valid(): + print(serializer.errors) + return JsonResponse(serializer.errors, status=500, safe=False) + + dataset_name = metadata[USER_DATA_DATASET_FIELD_NAME] + dataset_pretty_name = metadata[USER_DATA_DATASET_FIELD_PRETTY_NAME] if metadata[ + USER_DATA_DATASET_FIELD_PRETTY_NAME] else dataset_name + version_name = metadata[USER_DATA_VERSION_FIELD_NAME] + version_pretty_name = metadata[USER_DATA_VERSION_FIELD_PRETTY_NAME] if metadata[ + USER_DATA_VERSION_FIELD_PRETTY_NAME] else version_name + + # creating version entry + new_version = create_version_entry(version_name, version_pretty_name, dataset_pretty_name, request.user) + new_variable = create_variable_entry('none', 'none', dataset_pretty_name, + request.user, 'n.a.') + + new_dataset = create_dataset_entry(dataset_name, dataset_pretty_name, new_version, new_variable, request.user) + file_data = { 'file': file, 'file_name': filename, 'owner': request.user.pk, - 'dataset': None, - 'version': None, - 'variable': None, + 'dataset': new_dataset.pk, + 'version': new_version.pk, + 'variable': new_variable.pk, 'upload_date': timezone.now() } - file_serializer = UploadSerializer(data=file_data) + file_serializer = UploadFileSerializer(data=file_data) if file_serializer.is_valid(): + # saving file file_serializer.save() - return JsonResponse(file_serializer.data, status=200, safe=False) + # need to get the path and assign it to the dataset as well as pass it to preprocessing function, so I don't + # have to open the db connection before file preprocessing. + file_raw_path = file_serializer.data['get_raw_file_path'] + # now I can assign proper storage path + new_dataset.storage_path = file_raw_path + new_dataset.save() + + preprocess_file(file_serializer, file_raw_path) + + return JsonResponse(file_serializer.data, status=201, safe=False) else: print(file_serializer.errors) return JsonResponse(file_serializer.errors, status=500, safe=False) @@ -330,6 +300,20 @@ class Meta: fields = get_fields_as_list(UserDatasetFile) +class UploadFileSerializer(ModelSerializer): + class Meta: + model = UserDatasetFile + fields = get_fields_as_list(UserDatasetFile) + + requires_context = True + + def create(self, validated_data): + instance = super().create(validated_data) + with transaction.atomic(): + instance.save() + return instance + + class DatasetSerializer(ModelSerializer): # this serializer do not verify filters field, as the field is required and for now we don't provide any class Meta: diff --git a/environment/qa4sm_env.yml b/environment/qa4sm_env.yml index 9db3fe021..ffe6b11c6 100644 --- a/environment/qa4sm_env.yml +++ b/environment/qa4sm_env.yml @@ -249,7 +249,7 @@ dependencies: - pygeobase==0.4.0 - pygeogrids==0.4.2 - pynetcf==0.2.2 - - pytesmo==0.14.3 + - pytesmo==0.14.4 - pytest-cov==3.0.0 - pytest-django==4.5.2 - pytest-mpl==0.15.1 diff --git a/validator/mailer.py b/validator/mailer.py index 13aea83c9..458d7efbe 100644 --- a/validator/mailer.py +++ b/validator/mailer.py @@ -6,9 +6,26 @@ from django.conf import settings from api.frontend_urls import get_angular_url +# from validator.models import UserDatasetFile + __logger = logging.getLogger(__name__) +def send_failed_preprocessing_notification(file_entry): + __logger.info(f'Sending mail about failed preprocessing of the {file_entry.id} to user {file_entry.owner}...') + guidelines_url = settings.SITE_URL + get_angular_url('my-datasets') + + subject = '[QA4SM] File preprocessing failed' + body = f"""Dear {file_entry.owner.first_name} {file_entry.owner.last_name}, \n\n + Your file containing data for dataset {file_entry.dataset.pretty_name}, version {file_entry.version.pretty_name} \ + could not be processed. Please check if you uploaded proper file and if your file hs been prepared according to our\ + guidelines ({guidelines_url}). In case of further problem, please contact our team.\n\nBest regards,\nQA4SM team'""" + + _send_email(recipients=[file_entry.owner.email], + subject=subject, + body=body) + + def send_val_done_notification(val_run): __logger.info('Sending mail about validation {} to user {}...'.format(val_run.id, val_run.user)) diff --git a/validator/validation/user_data_processing.py b/validator/validation/user_data_processing.py new file mode 100644 index 000000000..fd6b0be62 --- /dev/null +++ b/validator/validation/user_data_processing.py @@ -0,0 +1,76 @@ +from django.shortcuts import get_object_or_404 +from qa4sm_preprocessing.utils import preprocess_user_data + +from validator.models import UserDatasetFile, DataVariable +from validator.mailer import send_failed_preprocessing_notification + + +def get_sm_variable_names(variables): + key_sm_words = ['water', 'soil', 'moisture', 'soil_moisture', 'sm', 'ssm', 'water_content', 'soil', 'moisture', + 'swi', 'swvl1', 'soilmoi'] + key_error_words = ['error', 'bias', 'uncertainty'] + candidates = [variable for variable in variables if any([word in variable.lower() for word in key_sm_words]) + and not any([word in variable.lower() for word in key_error_words])] + + sm_variables = [{ + 'name': var, + 'long_name': variables[var].get("long_name", var), + 'units': variables[var].get("units") if variables[var].get("units") else 'n.a.' + } for var in candidates] + + if len(sm_variables) > 0: + return sm_variables[0] + else: + return {'name': '--none--', + 'long_name': '--none--', + 'units': 'n.a.'} + + +def get_variables_from_the_reader(reader): + variables = reader.variable_description() + variables_dict_list = [ + {'name': variable, + 'long_name': variables[variable].get("long_name", variables[variable].get("standard_name", variable)), + 'units': variables[variable].get("units") if variables[variable].get("units") else 'n.a.' + } + for variable in variables + ] + + return variables_dict_list + + +def user_data_preprocessing(file_uuid, file_path, file_raw_path): + try: + gridded_reader = preprocess_user_data(file_path, file_raw_path + '/timeseries') + except Exception as e: + file_entry = get_object_or_404(UserDatasetFile, id=file_uuid) + send_failed_preprocessing_notification(file_entry) + file_entry.delete() + return + + # I get file entry here and not before preprocessing, as the preprocessing takes time and if a db connection is + # opened for too long, there is an error thrown. I would have to close the connection and open it one more time + file_entry = get_object_or_404(UserDatasetFile, id=file_uuid) + sm_variable = get_sm_variable_names(gridded_reader.variable_description()) + all_variables = get_variables_from_the_reader(gridded_reader) + + variable_entry = DataVariable.objects.get(pk=file_entry.variable_id) + # new_variable_data = { + # 'help_text': f'Variable {variable_name} of dataset {dataset_name} provided by user {user}.', + # 'min_value': max_value, + # 'max_value': min_value, + # 'unit': variable_unit if variable_unit else 'n.a.' + # } + # update variable + variable_entry.short_name = sm_variable['name'] + variable_entry.pretty_name = sm_variable['long_name'] + variable_entry.help_text = f'Variable {sm_variable["long_name"]} of dataset ' \ + f'{file_entry.dataset.pretty_name} provided by user {file_entry.owner.username}.', + variable_entry.unit = sm_variable['units'] if sm_variable['units'] else 'n.a.' + variable_entry.save() + + file_entry.all_variables = all_variables + file_entry.metadata_submitted = True + file_entry.save() + + return