Skip to content

Commit

Permalink
test: add CKAN harvester test of delete_missing option
Browse files Browse the repository at this point in the history
  • Loading branch information
danielcoelhocgu committed May 9, 2024
1 parent 6cdc217 commit 3f59de1
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 3 deletions.
135 changes: 133 additions & 2 deletions ckanext/harvest/tests/harvesters/mock_ckan.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def do_GET(self):
if params['start'] != '0':
datasets = []
elif set(params.keys()) == set(['rows', 'start']):
datasets = ['dataset1', DATASETS[1]['name']]
datasets = [d['name'] for d in DATASETS]
elif set(params.keys()) == set(['fq', 'rows', 'start']) and \
params['fq'] == '-organization:org1':
datasets = [DATASETS[1]['name']]
Expand Down Expand Up @@ -489,7 +489,138 @@ def convert_dataset_to_restful_form(dataset):
"revision_id": "3bd6ced3-35b2-4b20-94e2-c596e24bc375",
"date_released": "30/7/2010",
"theme-primary": "Towns & Cities"
}
},
{
"id": "243592d4-5a74-4139-8a9a-02c8a5fd0caf",
"name": "painel-integridade-publica",
"author": None,
"author_email": None,
"creator_user_id": "a0e37bbb-9859-43f8-836a-fc40406d83de",
"isopen": True,
"license_id": "odc-pddl",
"license_title": "Open Data Commons Public Domain Dedication and License (PDDL)",
"license_url": "http://www.opendefinition.org/licenses/odc-pddl",
"maintainer": "Coordenação-Geral de Monitoramento e Avaliação (CGMAV/DIPIN/SIP)",
"maintainer_email": "sip.dipin@cgu.gov.br",
"metadata_created": "2023-06-12T20:57:57.479383",
"metadata_modified": "2023-06-12T20:57:57.479390",
"notes": "O Painel Integridade Pública permite conferir informações sobre a estruturação, "
"a execução e o monitoramento de programas de integridade em órgãos e entidades "
"do Governo Federal",
"num_resources": 3,
"num_tags": 3,
"organization": {
"id": "539d7aca-df8e-4033-ad9d-191444f1322d",
"name": "controladoria-geral-da-uniao",
"title": "Controladoria-Geral da União",
"type": "organization",
"description": "A Controladoria-Geral da União (CGU) é o órgão"
"de controle interno do Governo Federal",
"image_url": "https://dados.gov.br/api/publico/s3/19987297-90c6-472b-9ee2-c2106fb6afbb.png",
"created": "2016-08-19T08:03:46.882221",
"is_organization": True,
"approval_status": "approved",
"state": "active"
},
"owner_org": "539d7aca-df8e-4033-ad9d-191444f1322d",
"private": False,
"state": "active",
"title": "Painel Integridade Pública",
"type": "dataset",
"url": None,
"version": "1.0",
"extras": [],
"groups": [],
"resources": [
{
"cache_last_updated": None,
"cache_url": None,
"created": "2023-06-12T20:57:57.618980",
"description": "Mapeamento",
"format": "CSV",
"hash": "",
"id": "309d3d05-3a30-45fd-a4b1-caf8c53e94fc",
"last_modified": None,
"mimetype": None,
"mimetype_inner": None,
"name": "2023",
"package_id": "243592d4-5a74-4139-8a9a-02c8a5fd0caf",
"position": 0,
"resource_type": None,
"size": 0,
"state": "active",
"url": "https://dadosabertos-download.cgu.gov.br/IntegridadePublica/Dados_Mapeamento_CSV.csv",
"url_type": None,
},
{
"cache_last_updated": None,
"cache_url": None,
"created": "2023-06-12T20:57:57.618985",
"description": "Dados coletados pela CGU",
"format": "CSV",
"hash": "",
"id": "71433fcb-a6a9-433e-a3fc-d9f8acbc75b8",
"idTipo": 1,
"last_modified": None,
"mimetype": None,
"mimetype_inner": None,
"name": "2022",
"package_id": "243592d4-5a74-4139-8a9a-02c8a5fd0caf",
"position": 1,
"resource_type": None,
"size": 0,
"state": "active",
"url": "https://dadosabertos-download.cgu.gov.br/IntegridadePublica/Dados_Etapa_2_CSV.csv",
"url_type": None,
},
{
"cache_last_updated": None,
"cache_url": None,
"created": "2023-06-12T20:57:57.618987",
"description": "Dados coletados pela CGU",
"format": "CSV",
"hash": "",
"id": "88554739-e83d-44fe-b6ad-f8e49ad7a1e6",
"last_modified": None,
"metadata_modified": "2023-06-12T20:57:57.380779",
"mimetype": None,
"mimetype_inner": None,
"name": "2021",
"package_id": "243592d4-5a74-4139-8a9a-02c8a5fd0caf",
"position": 2,
"resource_type": None,
"size": 0,
"state": "active",
"url": "https://dadosabertos-download.cgu.gov.br/IntegridadePublica/Dados_Etapa_1_CSV.csv",
"url_type": None,
},
],
"tags": [
{
"display_name": "Integridade",
"id": "a709b28c-936c-4714-99ef-cd77cb144b9f",
"name": "Integridade",
"state": "active",
"vocabulary_id": None,
},
{
"display_name": "Integridade Pública",
"id": "2b52014c-da45-4837-80de-0dfe467a9a51",
"name": "Integridade Pública",
"state": "active",
"vocabulary_id": None,
},
{
"display_name": "Programa de Integridade",
"id": "ee16b70a-41e2-41ec-a521-11049aecf767",
"name": "Programa de Integridade",
"state": "active",
"vocabulary_id": None,
},
],
"relationships_as_subject": [],
"relationships_as_object": [],
},
]

INVALID_TAGS = [
Expand Down
34 changes: 33 additions & 1 deletion ckanext/harvest/tests/harvesters/test_ckanharvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ckanext.harvest.harvesters.ckanharvester import ContentFetchError
from ckanext.harvest.tests.factories import (HarvestSourceObj, HarvestJobObj,
HarvestObjectObj)
from ckanext.harvest.tests.lib import run_harvest
from ckanext.harvest.tests.lib import run_harvest, run_harvest_job
import ckanext.harvest.model as harvest_model
from ckanext.harvest.harvesters.base import HarvesterBase
from ckanext.harvest.harvesters.ckanharvester import CKANHarvester
Expand Down Expand Up @@ -357,3 +357,35 @@ def test_get_content_handles_http_error(
harvester._get_content("http://test.example.gov.uk")

assert str(context.value) == 'HTTP error: 404 http://test.example.gov.uk'

def test_delete_missing(self):
config = {'delete_missing': True}
harvester = CKANHarvester()

# Create harvest source
source = HarvestSourceObj(
url='http://localhost:%s' % mock_ckan.PORT,
config=json.dumps(config),
source_type=harvester.info()['name'])

# Run the first harvest process
job = HarvestJobObj(source=source, run=False)
results_by_guid = run_harvest_job(job, harvester)

assert mock_ckan.DATASETS[2]['id'] in results_by_guid

# Delete one dataset in mock server and rerun harvest process
datasets = copy.deepcopy(mock_ckan.DATASETS)
deleted_id = datasets[2]['id']
del datasets[2]
with patch('ckanext.harvest.tests.harvesters.mock_ckan.DATASETS',
datasets):
job = HarvestJobObj(source=source, run=False)
results_by_guid = run_harvest_job(job, harvester)

assert deleted_id in results_by_guid
assert results_by_guid[deleted_id]['report_status'] == 'deleted'

# Check if dataset is in deleted state
deleted_dataset = call_action('package_show', None, id=deleted_id)
assert deleted_dataset['state'] == 'deleted'

0 comments on commit 3f59de1

Please sign in to comment.