From 7053fdb0dc6c30101466aaf748f656bb0d0bb47d Mon Sep 17 00:00:00 2001 From: mourya-33 <134511711+mourya-33@users.noreply.github.com> Date: Tue, 12 Nov 2024 02:52:05 -0500 Subject: [PATCH 01/44] Updating overly permissive policies tagged by checkov for environment role using least privilege principles (#1632) ### Feature or Bugfix - Bugfix ### Detail This PR is to restrict the overly permissive permissions in the environment role IAM policies that are tagged as FAILURES by checkov. Instead of using Get* and List* we specify the actions and type of resources granted. In addition, this PR uses KMS keys to encrypt the custom resources Lambdas in the S3 datasets module ### Relates - https://github.com/data-dot-all/dataall/issues/1524 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? N/A - What precautions are you taking before deserializing the data you consume? N/A - Is injection prevented by parametrizing queries? N/A - Have you ensured no `eval` or similar functions are used? N/A - Does this PR introduce any functionality or component that requires authorization? N/A - How have you ensured it respects the existing AuthN/AuthZ mechanisms? N/A - Are you logging failed auth attempts? N/A - Are you using or adding any cryptographic features? N/A - Do you use a standard proven implementations? N/A - Are the used keys controlled by the customer? Where are they stored? N/A - Are you introducing any new policies/roles/users? N/A - Have you used the least-privilege principle? How? Restricting overly permissive permissions to the required resources / principle only. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --------- Co-authored-by: Mourya Darivemula --- .../env_role_core_policies/cloudformation.py | 13 +++++-- .../env_role_core_policies/service_policy.py | 14 ++++++- .../cdk/dataset_custom_resources_extension.py | 32 +++++++++++++++ .../cdk/env_role_dataset_glue_policy.py | 39 ++++++++++++++++++- 4 files changed, 91 insertions(+), 7 deletions(-) diff --git a/backend/dataall/core/environment/cdk/env_role_core_policies/cloudformation.py b/backend/dataall/core/environment/cdk/env_role_core_policies/cloudformation.py index 75be97e9b..0cfbc3f1a 100644 --- a/backend/dataall/core/environment/cdk/env_role_core_policies/cloudformation.py +++ b/backend/dataall/core/environment/cdk/env_role_core_policies/cloudformation.py @@ -24,10 +24,6 @@ def get_statements(self, group_permissions, **kwargs): 'cloudformation:ListImports', 'cloudformation:DescribeAccountLimits', 'cloudformation:DescribeStackDriftDetectionStatus', - 'cloudformation:Cancel*', - 'cloudformation:Continue*', - 'cloudformation:CreateChangeSet', - 'cloudformation:ExecuteChangeSet', 'cloudformation:CreateStackSet', 'cloudformation:Get*', 'cloudformation:Describe*', @@ -36,6 +32,15 @@ def get_statements(self, group_permissions, **kwargs): ], resources=['*'], ), + iam.PolicyStatement( + actions=[ + 'cloudformation:Cancel*', + 'cloudformation:Continue*', + 'cloudformation:CreateChangeSet', + 'cloudformation:ExecuteChangeSet', + ], + resources=[f'arn:aws:cloudformation:*:{self.account}:*/{self.resource_prefix}*'], + ), iam.PolicyStatement( # sid="DeleteTeamCloudFormation", actions=[ diff --git a/backend/dataall/core/environment/cdk/env_role_core_policies/service_policy.py b/backend/dataall/core/environment/cdk/env_role_core_policies/service_policy.py index 0e68dc694..0ad5b67ff 100644 --- a/backend/dataall/core/environment/cdk/env_role_core_policies/service_policy.py +++ b/backend/dataall/core/environment/cdk/env_role_core_policies/service_policy.py @@ -64,14 +64,24 @@ def generate_policies(self) -> [aws_iam.ManagedPolicy]: 'events:ListRuleNamesByTarget', 'iam:list*', 'iam:Get*', - 'iam:CreatePolicy', - 'iam:CreateServiceLinkedRole', 'tag:GetResources', 'tag:GetTagValues', 'tag:GetTagKeys', ], resources=['*'], ), + aws_iam.PolicyStatement( + sid='IAMCreatePolicy', + effect=aws_iam.Effect.ALLOW, + actions=[ + 'iam:CreatePolicy', + 'iam:CreateServiceLinkedRole', + ], + resources=[ + f'arn:aws:iam::{self.account}:policy/{self.resource_prefix}*', + f'arn:aws:iam::{self.account}:role/aws-service-role/*', + ], + ), aws_iam.PolicyStatement( sid='CreateServiceRole', actions=[ diff --git a/backend/dataall/modules/s3_datasets/cdk/dataset_custom_resources_extension.py b/backend/dataall/modules/s3_datasets/cdk/dataset_custom_resources_extension.py index 2686a3679..3f9b7a57e 100644 --- a/backend/dataall/modules/s3_datasets/cdk/dataset_custom_resources_extension.py +++ b/backend/dataall/modules/s3_datasets/cdk/dataset_custom_resources_extension.py @@ -29,6 +29,34 @@ def extent(setup: EnvironmentSetup): setup=setup, environment=_environment, group_roles=setup.group_roles, default_role=setup.default_role ) + lambda_env_key = kms.Key( + setup, + f'{_environment.resourcePrefix}-ds-cst-lambda-env-var-key', + removal_policy=RemovalPolicy.DESTROY, + alias=f'{_environment.resourcePrefix}-ds-cst-lambda-env-var-key', + enable_key_rotation=True, + policy=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + resources=['*'], + effect=iam.Effect.ALLOW, + principals=[ + iam.AccountPrincipal(account_id=_environment.AwsAccountId), + ], + actions=['kms:*'], + ), + iam.PolicyStatement( + resources=['*'], + effect=iam.Effect.ALLOW, + principals=[ + iam.ServicePrincipal(service='lambda.amazonaws.com'), + ], + actions=['kms:GenerateDataKey*', 'kms:Decrypt'], + ), + ], + ), + ) + # Lakeformation default settings custom resource # Set PivotRole as Lake Formation data lake admin entry_point = str( @@ -55,6 +83,7 @@ def extent(setup: EnvironmentSetup): 'DEFAULT_ENV_ROLE_ARN': _environment.EnvironmentDefaultIAMRoleArn, 'DEFAULT_CDK_ROLE_ARN': _environment.CDKRoleArn, }, + environment_encryption=lambda_env_key, dead_letter_queue_enabled=True, dead_letter_queue=lakeformation_cr_dlq, on_failure=lambda_destination.SqsDestination(lakeformation_cr_dlq), @@ -64,6 +93,7 @@ def extent(setup: EnvironmentSetup): setup, f'{_environment.resourcePrefix}LakeformationDefaultSettingsProvider', on_event_handler=lf_default_settings_custom_resource, + provider_function_env_encryption=lambda_env_key, ) default_lf_settings = CustomResource( @@ -119,6 +149,7 @@ def extent(setup: EnvironmentSetup): 'DEFAULT_ENV_ROLE_ARN': _environment.EnvironmentDefaultIAMRoleArn, 'DEFAULT_CDK_ROLE_ARN': _environment.CDKRoleArn, }, + environment_encryption=lambda_env_key, dead_letter_queue_enabled=True, dead_letter_queue=gluedb_lf_cr_dlq, on_failure=lambda_destination.SqsDestination(gluedb_lf_cr_dlq), @@ -130,6 +161,7 @@ def extent(setup: EnvironmentSetup): setup, f'{_environment.resourcePrefix}GlueDbCustomResourceProvider', on_event_handler=gluedb_lf_custom_resource, + provider_function_env_encryption=lambda_env_key, ) ssm.StringParameter( setup, diff --git a/backend/dataall/modules/s3_datasets/cdk/env_role_dataset_glue_policy.py b/backend/dataall/modules/s3_datasets/cdk/env_role_dataset_glue_policy.py index ca7604e27..18c294ceb 100644 --- a/backend/dataall/modules/s3_datasets/cdk/env_role_dataset_glue_policy.py +++ b/backend/dataall/modules/s3_datasets/cdk/env_role_dataset_glue_policy.py @@ -100,11 +100,48 @@ def get_statements(self, group_permissions, **kwargs): effect=iam.Effect.ALLOW, actions=[ 'glue:Get*', - 'glue:List*', + 'glue:ListDevEndpoints', + 'glue:ListBlueprints', + 'glue:ListRegistries', + 'glue:ListTriggers', + 'glue:ListUsageProfiles', + 'glue:ListCrawlers', + 'glue:ListCrawls', + 'glue:ListJobs', + 'glue:ListCustomEntityTypes', + 'glue:ListSessions', + 'glue:ListWorkflows', 'glue:BatchGet*', ], resources=['*'], ), + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=[ + 'glue:ListDataQualityRuleRecommendationRuns', + 'glue:ListSchemaVersions', + 'glue:QuerySchemaVersionMetadata', + 'glue:ListMLTransforms', + 'glue:ListStatements', + 'glue:ListSchemas', + 'glue:ListDataQualityRulesetEvaluationRuns', + 'glue:ListTableOptimizerRuns', + 'glue:GetMLTaskRuns', + 'glue:ListDataQualityRulesets', + 'glue:ListDataQualityResults', + 'glue:GetMLTransforms', + ], + resources=[ + f'arn:aws:glue:*:{self.account}:schema/*', + f'arn:aws:glue:*:{self.account}:registry/*', + f'arn:aws:glue:*:{self.account}:dataQualityRuleset/*', + f'arn:aws:glue:*:{self.account}:table/*/*', + f'arn:aws:glue:*:{self.account}:database/*', + f'arn:aws:glue:*:{self.account}:mlTransform/*', + f'arn:aws:glue:*:{self.account}:catalog', + f'arn:aws:glue:*:{self.account}:session/*', + ], + ), iam.PolicyStatement( # sid="GlueCreateS3Bucket", effect=iam.Effect.ALLOW, From c26963c5bd425ab1ed9f63f72ba81e8d3e9d8866 Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Tue, 12 Nov 2024 09:30:14 -0500 Subject: [PATCH 02/44] Update sanitization technique (#1692) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Bugfix - Add Validation Checks for the Following Mutations - UpdateGroupTenantPermissions —> Ensure valid Group - CreateWorksheet —> Ensure valid Group Owner - Add Sanitization for Inputs using `tags.contains({{{term}}}` to properly handle non-alphanumeric chars - Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/base/utils/naming_convention.py | 34 +++++++++++++++---- .../db/environment_repositories.py | 5 ++- .../db/organization_repositories.py | 6 +++- .../services/tenant_policy_service.py | 8 +++++ .../dataall/core/vpc/db/vpc_repositories.py | 4 +++ .../modules/datapipelines/api/input_types.py | 10 ------ .../datasets_base/db/dataset_repositories.py | 25 ++++++++++---- .../notebooks/db/notebook_repository.py | 8 ++++- .../s3_datasets/api/profiling/input_types.py | 10 ------ .../api/storage_location/input_types.py | 9 ----- .../modules/s3_datasets/cdk/dataset_stack.py | 1 + .../s3_datasets/db/dataset_repositories.py | 8 +++-- .../modules/worksheets/api/resolvers.py | 3 ++ .../worksheets/db/worksheet_repositories.py | 8 ++++- deploy/stacks/lambda_api.py | 6 +--- tests/core/permissions/test_tenant.py | 19 ++++++++++- 16 files changed, 110 insertions(+), 54 deletions(-) diff --git a/backend/dataall/base/utils/naming_convention.py b/backend/dataall/base/utils/naming_convention.py index 178bb1882..d9101c8ba 100644 --- a/backend/dataall/base/utils/naming_convention.py +++ b/backend/dataall/base/utils/naming_convention.py @@ -1,28 +1,46 @@ from enum import Enum - +import re from .slugify import slugify class NamingConventionPattern(Enum): - S3 = {'regex': '[^a-zA-Z0-9-]', 'separator': '-', 'max_length': 63} + S3 = { + 'regex': '[^a-zA-Z0-9-]', + 'separator': '-', + 'max_length': 63, + 'valid_external_regex': '(?!(^xn--|.+-s3alias$))^[a-z0-9][a-z0-9-]{1,61}[a-z0-9]$', + } + KMS = {'regex': '[^a-zA-Z0-9-]$', 'separator': '-', 'max_length': 63, 'valid_external_regex': '^[a-zA-Z0-9_-]+$'} IAM = {'regex': '[^a-zA-Z0-9-_]', 'separator': '-', 'max_length': 63} # Role names up to 64 chars IAM_POLICY = {'regex': '[^a-zA-Z0-9-_]', 'separator': '-', 'max_length': 128} # Policy names up to 128 chars - GLUE = {'regex': '[^a-zA-Z0-9_]', 'separator': '_', 'max_length': 240} # Limit 255 - 15 extra chars buffer + GLUE = { + 'regex': '[^a-zA-Z0-9_]', + 'separator': '_', + 'max_length': 240, + 'valid_external_regex': '^[a-zA-Z0-9_]+$', + } # Limit 255 - 15 extra chars buffer GLUE_ETL = {'regex': '[^a-zA-Z0-9-]', 'separator': '-', 'max_length': 52} NOTEBOOK = {'regex': '[^a-zA-Z0-9-]', 'separator': '-', 'max_length': 63} MLSTUDIO_DOMAIN = {'regex': '[^a-zA-Z0-9-]', 'separator': '-', 'max_length': 63} DEFAULT = {'regex': '[^a-zA-Z0-9-_]', 'separator': '-', 'max_length': 63} + DEFAULT_SEARCH = {'regex': '[^a-zA-Z0-9-_ ]'} OPENSEARCH = {'regex': '[^a-z0-9-]', 'separator': '-', 'max_length': 27} OPENSEARCH_SERVERLESS = {'regex': '[^a-z0-9-]', 'separator': '-', 'max_length': 31} + DATA_FILTERS = {'regex': '[^a-z0-9_]', 'separator': '_', 'max_length': 31} + REDSHIFT_DATASHARE = { + 'regex': '[^a-zA-Z0-9_]', + 'separator': '_', + 'max_length': 1000, + } # Maximum length of 2147483647 class NamingConventionService: def __init__( self, target_label: str, - target_uri: str, pattern: NamingConventionPattern, - resource_prefix: str, + target_uri: str = '', + resource_prefix: str = '', ): self.target_label = target_label self.target_uri = target_uri if target_uri else '' @@ -37,4 +55,8 @@ def build_compliant_name(self) -> str: separator = NamingConventionPattern[self.service].value['separator'] max_length = NamingConventionPattern[self.service].value['max_length'] suffix = f'-{self.target_uri}' if len(self.target_uri) else '' - return f"{slugify(self.resource_prefix + '-' + self.target_label[:(max_length- len(self.resource_prefix + self.target_uri))] + suffix, regex_pattern=fr'{regex}', separator=separator, lowercase=True)}" + return f"{slugify(self.resource_prefix + '-' + self.target_label[:(max_length - len(self.resource_prefix + self.target_uri))] + suffix, regex_pattern=fr'{regex}', separator=separator, lowercase=True)}" + + def sanitize(self): + regex = NamingConventionPattern[self.service].value['regex'] + return re.sub(regex, '', self.target_label) diff --git a/backend/dataall/core/environment/db/environment_repositories.py b/backend/dataall/core/environment/db/environment_repositories.py index 7aca43b40..860cfb6fa 100644 --- a/backend/dataall/core/environment/db/environment_repositories.py +++ b/backend/dataall/core/environment/db/environment_repositories.py @@ -1,3 +1,4 @@ +from dataall.base.utils.naming_convention import NamingConventionPattern, NamingConventionService from dataall.core.environment.db.environment_models import ( EnvironmentParameter, Environment, @@ -281,7 +282,9 @@ def query_user_environments(session, username, groups, filter) -> Query: or_( Environment.label.ilike('%' + term + '%'), Environment.description.ilike('%' + term + '%'), - Environment.tags.contains(f'{{{term}}}'), + Environment.tags.contains( + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=term).sanitize()}}}' + ), Environment.region.ilike('%' + term + '%'), ) ) diff --git a/backend/dataall/core/organizations/db/organization_repositories.py b/backend/dataall/core/organizations/db/organization_repositories.py index 0fbf23935..4134e7fc6 100644 --- a/backend/dataall/core/organizations/db/organization_repositories.py +++ b/backend/dataall/core/organizations/db/organization_repositories.py @@ -7,6 +7,8 @@ from dataall.core.organizations.db import organization_models as models from dataall.core.environment.db.environment_models import Environment from dataall.base.context import get_context +from dataall.base.utils.naming_convention import NamingConventionPattern, NamingConventionService + logger = logging.getLogger(__name__) @@ -45,7 +47,9 @@ def query_user_organizations(session, username, groups, filter) -> Query: or_( models.Organization.label.ilike('%' + filter.get('term') + '%'), models.Organization.description.ilike('%' + filter.get('term') + '%'), - models.Organization.tags.contains(f"{{{filter.get('term')}}}"), + models.Organization.tags.contains( + f"{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=filter.get('term')).sanitize()}}}" + ), ) ) return query.order_by(models.Organization.label).distinct() diff --git a/backend/dataall/core/permissions/services/tenant_policy_service.py b/backend/dataall/core/permissions/services/tenant_policy_service.py index 2d1d1511b..d0c953d09 100644 --- a/backend/dataall/core/permissions/services/tenant_policy_service.py +++ b/backend/dataall/core/permissions/services/tenant_policy_service.py @@ -8,12 +8,17 @@ from dataall.core.permissions.db.tenant.tenant_repositories import TenantRepository from dataall.core.permissions.services.permission_service import PermissionService from dataall.core.permissions.db.tenant.tenant_models import Tenant +from dataall.base.services.service_provider_factory import ServiceProviderFactory import logging +import os from functools import wraps log = logging.getLogger('Permissions') +ENVNAME = os.getenv('envname', 'local') +REGION = os.getenv('AWS_REGION', 'eu-west-1') + class RequestValidationService: @staticmethod @@ -69,6 +74,9 @@ def validate_update_group_permission_params(data): raise exceptions.RequiredParameter('permissions') if not data.get('groupUri'): raise exceptions.RequiredParameter('groupUri') + groups = ServiceProviderFactory.get_service_provider_instance().list_groups(envname=ENVNAME, region=REGION) + if data.get('groupUri') not in groups: + raise exceptions.InvalidInput('groupUri', data.get('groupUri'), ' a valid group') class TenantPolicyValidationService: diff --git a/backend/dataall/core/vpc/db/vpc_repositories.py b/backend/dataall/core/vpc/db/vpc_repositories.py index f5b4865b6..4fbca29bf 100644 --- a/backend/dataall/core/vpc/db/vpc_repositories.py +++ b/backend/dataall/core/vpc/db/vpc_repositories.py @@ -4,6 +4,7 @@ from dataall.base.db import exceptions from dataall.core.vpc.db.vpc_models import Vpc +from dataall.base.utils.naming_convention import NamingConventionPattern, NamingConventionService log = logging.getLogger(__name__) @@ -48,6 +49,9 @@ def query_environment_networks(session, uri, filter): or_( Vpc.label.ilike('%' + term + '%'), Vpc.VpcId.ilike('%' + term + '%'), + Vpc.tags.contains( + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=term).sanitize()}}}' + ), ) ) return query.order_by(Vpc.label) diff --git a/backend/dataall/modules/datapipelines/api/input_types.py b/backend/dataall/modules/datapipelines/api/input_types.py index 3d6b4556a..0b026f38c 100644 --- a/backend/dataall/modules/datapipelines/api/input_types.py +++ b/backend/dataall/modules/datapipelines/api/input_types.py @@ -45,16 +45,6 @@ ], ) -DataPipelineEnvironmentFilter = gql.InputType( - name='DataPipelineEnvironmentFilter', - arguments=[ - gql.Argument(name='term', type=gql.String), - gql.Argument(name='page', type=gql.Integer), - gql.Argument(name='pageSize', type=gql.Integer), - gql.Argument(name='pipelineUri', type=gql.String), - ], -) - DataPipelineBrowseInput = gql.InputType( name='DataPipelineBrowseInput', arguments=[ diff --git a/backend/dataall/modules/datasets_base/db/dataset_repositories.py b/backend/dataall/modules/datasets_base/db/dataset_repositories.py index 05dab4c93..41152e394 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_repositories.py +++ b/backend/dataall/modules/datasets_base/db/dataset_repositories.py @@ -6,6 +6,10 @@ from dataall.base.db.exceptions import ObjectNotFound from dataall.core.activity.db.activity_models import Activity from dataall.modules.datasets_base.db.dataset_models import DatasetBase +from dataall.base.utils.naming_convention import ( + NamingConventionService, + NamingConventionPattern, +) logger = logging.getLogger(__name__) @@ -65,9 +69,11 @@ def _query_all_user_datasets(session, username, groups, all_subqueries: List[Que term = filter['term'] query = query.filter( or_( - DatasetBase.description.ilike(term + '%%'), - DatasetBase.label.ilike(term + '%%'), - DatasetBase.tags.contains(f'{{{term}}}'), + DatasetBase.label.ilike('%' + term + '%'), + DatasetBase.description.ilike('%' + term + '%'), + DatasetBase.tags.contains( + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=term).sanitize()}}}' + ), ) ) return query.order_by(DatasetBase.label).distinct(DatasetBase.datasetUri, DatasetBase.label) @@ -90,10 +96,14 @@ def _query_user_datasets(session, username, groups, filter) -> Query: ) ) if filter and filter.get('term'): + term = filter['term'] query = query.filter( or_( - DatasetBase.description.ilike(filter.get('term') + '%%'), - DatasetBase.label.ilike(filter.get('term') + '%%'), + DatasetBase.label.ilike('%' + term + '%'), + DatasetBase.description.ilike('%' + term + '%'), + DatasetBase.tags.contains( + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=term).sanitize()}}}' + ), ) ) return query.order_by(DatasetBase.label).distinct(DatasetBase.datasetUri, DatasetBase.label) @@ -124,8 +134,9 @@ def _query_environment_datasets(session, uri, filter) -> Query: or_( DatasetBase.label.ilike('%' + term + '%'), DatasetBase.description.ilike('%' + term + '%'), - DatasetBase.tags.contains(f'{{{term}}}'), - DatasetBase.region.ilike('%' + term + '%'), + DatasetBase.tags.contains( + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=term).sanitize()}}}' + ), ) ) return query.order_by(DatasetBase.label) diff --git a/backend/dataall/modules/notebooks/db/notebook_repository.py b/backend/dataall/modules/notebooks/db/notebook_repository.py index f5219fdde..384bc2f67 100644 --- a/backend/dataall/modules/notebooks/db/notebook_repository.py +++ b/backend/dataall/modules/notebooks/db/notebook_repository.py @@ -10,6 +10,10 @@ from dataall.base.db import paginate from dataall.modules.notebooks.db.notebook_models import SagemakerNotebook from dataall.core.environment.services.environment_resource_manager import EnvironmentResource +from dataall.base.utils.naming_convention import ( + NamingConventionService, + NamingConventionPattern, +) class NotebookRepository(EnvironmentResource): @@ -51,7 +55,9 @@ def _query_user_notebooks(self, username, groups, filter) -> Query: or_( SagemakerNotebook.description.ilike(term + '%%'), SagemakerNotebook.label.ilike(term + '%%'), - SagemakerNotebook.tags.contains(f'{{{term}}}'), + SagemakerNotebook.tags.contains( + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=term).sanitize()}}}' + ), ) ) return query.order_by(SagemakerNotebook.label) diff --git a/backend/dataall/modules/s3_datasets/api/profiling/input_types.py b/backend/dataall/modules/s3_datasets/api/profiling/input_types.py index 52d31e832..655564dc9 100644 --- a/backend/dataall/modules/s3_datasets/api/profiling/input_types.py +++ b/backend/dataall/modules/s3_datasets/api/profiling/input_types.py @@ -8,13 +8,3 @@ gql.Argument('tableUri', gql.String), ], ) - - -DatasetProfilingRunFilter = gql.InputType( - name='DatasetProfilingRunFilter', - arguments=[ - gql.Argument(name='page', type=gql.Integer), - gql.Argument(name='pageSize', type=gql.Integer), - gql.Argument(name='term', type=gql.String), - ], -) diff --git a/backend/dataall/modules/s3_datasets/api/storage_location/input_types.py b/backend/dataall/modules/s3_datasets/api/storage_location/input_types.py index 99eb89686..a68ad82c3 100644 --- a/backend/dataall/modules/s3_datasets/api/storage_location/input_types.py +++ b/backend/dataall/modules/s3_datasets/api/storage_location/input_types.py @@ -30,12 +30,3 @@ gql.Argument('pageSize', gql.Integer), ], ) - - -DatasetAccessPointFilter = gql.InputType( - name='DatasetAccessPointFilter', - arguments=[ - gql.Argument(name='page', type=gql.Integer), - gql.Argument(name='pageSize', type=gql.Integer), - ], -) diff --git a/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py b/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py index bb5b51c6d..81a59c4d5 100644 --- a/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py +++ b/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py @@ -11,6 +11,7 @@ Duration, CfnResource, CustomResource, + RemovalPolicy, Tags, ) from aws_cdk.aws_glue import CfnCrawler diff --git a/backend/dataall/modules/s3_datasets/db/dataset_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_repositories.py index 4c1283cb2..11eb12d18 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_repositories.py @@ -219,7 +219,9 @@ def query_environment_group_datasets(session, env_uri, group_uri, filter) -> Que or_( S3Dataset.label.ilike('%' + term + '%'), S3Dataset.description.ilike('%' + term + '%'), - S3Dataset.tags.contains(f'{{{term}}}'), + S3Dataset.tags.contains( + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=term).sanitize()}}}' + ), S3Dataset.region.ilike('%' + term + '%'), ) ) @@ -236,7 +238,9 @@ def query_environment_imported_datasets(session, uri, filter) -> Query: or_( S3Dataset.label.ilike('%' + term + '%'), S3Dataset.description.ilike('%' + term + '%'), - S3Dataset.tags.contains(f'{{{term}}}'), + S3Dataset.tags.contains( + f'{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=term).sanitize()}}}' + ), S3Dataset.region.ilike('%' + term + '%'), ) ) diff --git a/backend/dataall/modules/worksheets/api/resolvers.py b/backend/dataall/modules/worksheets/api/resolvers.py index 450667217..1c72ff4fe 100644 --- a/backend/dataall/modules/worksheets/api/resolvers.py +++ b/backend/dataall/modules/worksheets/api/resolvers.py @@ -11,6 +11,9 @@ def create_worksheet(context: Context, source, input: dict = None): raise exceptions.RequiredParameter(input) if not input.get('SamlAdminGroupName'): raise exceptions.RequiredParameter('groupUri') + if input.get('SamlAdminGroupName') not in context.groups: + raise exceptions.InvalidInput('groupUri', input.get('SamlAdminGroupName'), " a user's groups") + if not input.get('label'): raise exceptions.RequiredParameter('label') diff --git a/backend/dataall/modules/worksheets/db/worksheet_repositories.py b/backend/dataall/modules/worksheets/db/worksheet_repositories.py index aea51761b..a2b4b8054 100644 --- a/backend/dataall/modules/worksheets/db/worksheet_repositories.py +++ b/backend/dataall/modules/worksheets/db/worksheet_repositories.py @@ -8,6 +8,10 @@ from dataall.core.environment.services.environment_resource_manager import EnvironmentResource from dataall.base.db import paginate from dataall.modules.worksheets.db.worksheet_models import Worksheet, WorksheetQueryResult +from dataall.base.utils.naming_convention import ( + NamingConventionService, + NamingConventionPattern, +) class WorksheetRepository(EnvironmentResource): @@ -41,7 +45,9 @@ def query_user_worksheets(session, username, groups, filter) -> Query: or_( Worksheet.label.ilike('%' + filter.get('term') + '%'), Worksheet.description.ilike('%' + filter.get('term') + '%'), - Worksheet.tags.contains(f"{{{filter.get('term')}}}"), + Worksheet.tags.contains( + f"{{{NamingConventionService(pattern=NamingConventionPattern.DEFAULT_SEARCH, target_label=filter.get('term')).sanitize()}}}" + ), ) ) return query.order_by(Worksheet.label) diff --git a/deploy/stacks/lambda_api.py b/deploy/stacks/lambda_api.py index 797a9097f..629cb3122 100644 --- a/deploy/stacks/lambda_api.py +++ b/deploy/stacks/lambda_api.py @@ -135,11 +135,7 @@ def __init__( self.api_handler_dlq = self.set_dlq(f'{resource_prefix}-{envname}-graphql-dlq') api_handler_sg = self.create_lambda_sgs(envname, 'apihandler', resource_prefix, vpc) - api_handler_env = { - 'envname': envname, - 'LOG_LEVEL': log_level, - 'REAUTH_TTL': str(reauth_ttl) - } + api_handler_env = {'envname': envname, 'LOG_LEVEL': log_level, 'REAUTH_TTL': str(reauth_ttl)} # Check if custom domain exists and if it exists email notifications could be enabled. Create a env variable which stores the domain url. This is used for sending data.all share weblinks in the email notifications. if custom_domain and custom_domain.get('hosted_zone_name', None): api_handler_env['frontend_domain_url'] = f'https://{custom_domain.get("hosted_zone_name", None)}' diff --git a/tests/core/permissions/test_tenant.py b/tests/core/permissions/test_tenant.py index 58a74c615..c2e7abbd2 100644 --- a/tests/core/permissions/test_tenant.py +++ b/tests/core/permissions/test_tenant.py @@ -3,6 +3,13 @@ MANAGE_GROUPS, MANAGE_ORGANIZATIONS, ) +from unittest.mock import MagicMock + + +def mock_cognito_client(mocker): + mock_client = MagicMock() + mocker.patch('dataall.modules.notifications.services.ses_email_notification_service.Cognito', mock_client) + return mock_client def test_list_tenant_permissions(client, user, group, tenant): @@ -53,7 +60,17 @@ def test_list_tenant_permissions(client, user, group, tenant): assert group.name in [node.groupUri for node in response.data.listTenantGroups.nodes] -def test_update_permissions(client, user, group, tenant): +def test_update_permissions(mocker, client, user, group, tenant): + # Mock Cognito Client + cognito_client = mock_cognito_client(mocker) + cognito_client().list_groups.return_value = [TenantPolicyRepository.ADMIN_GROUP, group.name] + + # Mock the ServiceProviderFactory Call + mocker.patch( + 'dataall.modules.notifications.services.ses_email_notification_service.ServiceProviderFactory.get_service_provider_instance', + return_value=cognito_client(), + ) + response = client.query( """ mutation updateGroupTenantPermissions($input:UpdateGroupTenantPermissionsInput!){ From 5939060916a19ec3cc3711451a3bbb2be2433489 Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Wed, 13 Nov 2024 08:32:18 -0500 Subject: [PATCH 03/44] Fix/input validation (#1693) ### Feature or Bugfix - Bugfix ### Detail - Allow `:` and `.` chars ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/base/utils/naming_convention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/dataall/base/utils/naming_convention.py b/backend/dataall/base/utils/naming_convention.py index d9101c8ba..3d366b811 100644 --- a/backend/dataall/base/utils/naming_convention.py +++ b/backend/dataall/base/utils/naming_convention.py @@ -23,7 +23,7 @@ class NamingConventionPattern(Enum): NOTEBOOK = {'regex': '[^a-zA-Z0-9-]', 'separator': '-', 'max_length': 63} MLSTUDIO_DOMAIN = {'regex': '[^a-zA-Z0-9-]', 'separator': '-', 'max_length': 63} DEFAULT = {'regex': '[^a-zA-Z0-9-_]', 'separator': '-', 'max_length': 63} - DEFAULT_SEARCH = {'regex': '[^a-zA-Z0-9-_ ]'} + DEFAULT_SEARCH = {'regex': '[^a-zA-Z0-9-_:. ]'} OPENSEARCH = {'regex': '[^a-z0-9-]', 'separator': '-', 'max_length': 27} OPENSEARCH_SERVERLESS = {'regex': '[^a-z0-9-]', 'separator': '-', 'max_length': 31} DATA_FILTERS = {'regex': '[^a-z0-9_]', 'separator': '_', 'max_length': 31} From 53a1205fab45fb0c79db7e8ee3a37508c59e500f Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:44:11 +0100 Subject: [PATCH 04/44] Move worksheet logic to service layer (#1696) ### Feature or Bugfix - Refactoring ### Detail Moved business logic of Worksheets to service layer. Needed #1694 ### Relates #1694 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../modules/worksheets/api/resolvers.py | 37 +--- .../worksheets/services/worksheet_service.py | 175 ++++++++++-------- 2 files changed, 104 insertions(+), 108 deletions(-) diff --git a/backend/dataall/modules/worksheets/api/resolvers.py b/backend/dataall/modules/worksheets/api/resolvers.py index 1c72ff4fe..97e4c9edf 100644 --- a/backend/dataall/modules/worksheets/api/resolvers.py +++ b/backend/dataall/modules/worksheets/api/resolvers.py @@ -13,31 +13,18 @@ def create_worksheet(context: Context, source, input: dict = None): raise exceptions.RequiredParameter('groupUri') if input.get('SamlAdminGroupName') not in context.groups: raise exceptions.InvalidInput('groupUri', input.get('SamlAdminGroupName'), " a user's groups") - if not input.get('label'): raise exceptions.RequiredParameter('label') - with context.engine.scoped_session() as session: - return WorksheetService.create_worksheet( - session=session, - username=context.username, - data=input, - ) + return WorksheetService.create_worksheet(data=input) def update_worksheet(context: Context, source, worksheetUri: str = None, input: dict = None): - with context.engine.scoped_session() as session: - return WorksheetService.update_worksheet( - session=session, username=context.username, uri=worksheetUri, data=input - ) + return WorksheetService.update_worksheet(uri=worksheetUri, data=input) def get_worksheet(context: Context, source, worksheetUri: str = None): - with context.engine.scoped_session() as session: - return WorksheetService.get_worksheet( - session=session, - uri=worksheetUri, - ) + return WorksheetService.get_worksheet(uri=worksheetUri) def resolve_user_role(context: Context, source: Worksheet): @@ -51,24 +38,12 @@ def resolve_user_role(context: Context, source: Worksheet): def list_worksheets(context, source, filter: dict = None): if not filter: filter = {} - with context.engine.scoped_session() as session: - return WorksheetRepository.paginated_user_worksheets( - session=session, - username=context.username, - groups=context.groups, - uri=None, - data=filter, - check_perm=True, - ) + return WorksheetService.list_user_worksheets(filter) def run_sql_query(context: Context, source, environmentUri: str = None, worksheetUri: str = None, sqlQuery: str = None): - with context.engine.scoped_session() as session: - return WorksheetService.run_sql_query( - session=session, uri=environmentUri, worksheetUri=worksheetUri, sqlQuery=sqlQuery - ) + return WorksheetService.run_sql_query(uri=environmentUri, worksheetUri=worksheetUri, sqlQuery=sqlQuery) def delete_worksheet(context, source, worksheetUri: str = None): - with context.engine.scoped_session() as session: - return WorksheetService.delete_worksheet(session=session, uri=worksheetUri) + return WorksheetService.delete_worksheet(uri=worksheetUri) diff --git a/backend/dataall/modules/worksheets/services/worksheet_service.py b/backend/dataall/modules/worksheets/services/worksheet_service.py index 128f2af94..e07ce63ce 100644 --- a/backend/dataall/modules/worksheets/services/worksheet_service.py +++ b/backend/dataall/modules/worksheets/services/worksheet_service.py @@ -3,6 +3,7 @@ from dataall.core.activity.db.activity_models import Activity from dataall.core.environment.services.environment_service import EnvironmentService from dataall.base.db import exceptions +from dataall.base.context import get_context from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.modules.worksheets.aws.athena_client import AthenaClient @@ -23,7 +24,7 @@ class WorksheetService: @staticmethod - def get_worksheet_by_uri(session, uri: str) -> Worksheet: + def _get_worksheet_by_uri(session, uri: str) -> Worksheet: if not uri: raise exceptions.RequiredParameter(param_name='worksheetUri') worksheet = WorksheetRepository.find_worksheet_by_uri(session, uri) @@ -33,94 +34,114 @@ def get_worksheet_by_uri(session, uri: str) -> Worksheet: @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_WORKSHEETS) - def create_worksheet(session, username, data=None) -> Worksheet: - worksheet = Worksheet( - owner=username, - label=data.get('label'), - description=data.get('description', 'No description provided'), - tags=data.get('tags'), - chartConfig={'dimensions': [], 'measures': [], 'chartType': 'bar'}, - SamlAdminGroupName=data['SamlAdminGroupName'], - ) - - session.add(worksheet) - session.commit() - - activity = Activity( - action='WORKSHEET:CREATE', - label='WORKSHEET:CREATE', - owner=username, - summary=f'{username} created worksheet {worksheet.name} ', - targetUri=worksheet.worksheetUri, - targetType='worksheet', - ) - session.add(activity) - - ResourcePolicyService.attach_resource_policy( - session=session, - group=data['SamlAdminGroupName'], - permissions=WORKSHEET_ALL, - resource_uri=worksheet.worksheetUri, - resource_type=Worksheet.__name__, - ) + def create_worksheet(data=None) -> Worksheet: + context = get_context() + with context.db_engine.scoped_session() as session: + worksheet = Worksheet( + owner=context.username, + label=data.get('label'), + description=data.get('description', 'No description provided'), + tags=data.get('tags'), + chartConfig={'dimensions': [], 'measures': [], 'chartType': 'bar'}, + SamlAdminGroupName=data['SamlAdminGroupName'], + ) + + session.add(worksheet) + session.commit() + + activity = Activity( + action='WORKSHEET:CREATE', + label='WORKSHEET:CREATE', + owner=context.username, + summary=f'{context.username} created worksheet {worksheet.name} ', + targetUri=worksheet.worksheetUri, + targetType='worksheet', + ) + session.add(activity) + + ResourcePolicyService.attach_resource_policy( + session=session, + group=data['SamlAdminGroupName'], + permissions=WORKSHEET_ALL, + resource_uri=worksheet.worksheetUri, + resource_type=Worksheet.__name__, + ) return worksheet @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_WORKSHEETS) @ResourcePolicyService.has_resource_permission(UPDATE_WORKSHEET) - def update_worksheet(session, username, uri, data=None): - worksheet = WorksheetService.get_worksheet_by_uri(session, uri) - for field in data.keys(): - setattr(worksheet, field, data.get(field)) - session.commit() - - activity = Activity( - action='WORKSHEET:UPDATE', - label='WORKSHEET:UPDATE', - owner=username, - summary=f'{username} updated worksheet {worksheet.name} ', - targetUri=worksheet.worksheetUri, - targetType='worksheet', - ) - session.add(activity) - return worksheet + def update_worksheet(uri, data=None): + context = get_context() + with context.db_engine.scoped_session() as session: + worksheet = WorksheetService._get_worksheet_by_uri(session, uri) + for field in data.keys(): + setattr(worksheet, field, data.get(field)) + session.commit() + + activity = Activity( + action='WORKSHEET:UPDATE', + label='WORKSHEET:UPDATE', + owner=context.username, + summary=f'{context.username} updated worksheet {worksheet.name} ', + targetUri=worksheet.worksheetUri, + targetType='worksheet', + ) + session.add(activity) + return worksheet @staticmethod @ResourcePolicyService.has_resource_permission(GET_WORKSHEET) - def get_worksheet(session, uri): - worksheet = WorksheetService.get_worksheet_by_uri(session, uri) - return worksheet + def get_worksheet(uri): + with get_context().db_engine.scoped_session() as session: + worksheet = WorksheetService._get_worksheet_by_uri(session, uri) + return worksheet + + @staticmethod + def list_user_worksheets(filter): + context = get_context() + with context.db_engine.scoped_session() as session: + return WorksheetRepository.paginated_user_worksheets( + session=session, + username=context.username, + groups=context.groups, + uri=None, + data=filter, + check_perm=True, + ) @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_WORKSHEETS) @ResourcePolicyService.has_resource_permission(DELETE_WORKSHEET) - def delete_worksheet(session, uri) -> bool: - worksheet = WorksheetService.get_worksheet_by_uri(session, uri) - session.delete(worksheet) - ResourcePolicyService.delete_resource_policy( - session=session, - group=worksheet.SamlAdminGroupName, - resource_uri=uri, - resource_type=Worksheet.__name__, - ) - return True + def delete_worksheet(uri) -> bool: + with get_context().db_engine.scoped_session() as session: + worksheet = WorksheetService._get_worksheet_by_uri(session, uri) + session.delete(worksheet) + ResourcePolicyService.delete_resource_policy( + session=session, + group=worksheet.SamlAdminGroupName, + resource_uri=uri, + resource_type=Worksheet.__name__, + ) + return True @staticmethod @ResourcePolicyService.has_resource_permission(RUN_ATHENA_QUERY) - def run_sql_query(session, uri, worksheetUri, sqlQuery): - environment = EnvironmentService.get_environment_by_uri(session, uri) - worksheet = WorksheetService.get_worksheet_by_uri(session, worksheetUri) - - env_group = EnvironmentService.get_environment_group( - session, worksheet.SamlAdminGroupName, environment.environmentUri - ) - - cursor = AthenaClient.run_athena_query( - aws_account_id=environment.AwsAccountId, - env_group=env_group, - s3_staging_dir=f's3://{environment.EnvironmentDefaultBucketName}/athenaqueries/{env_group.environmentAthenaWorkGroup}/', - region=environment.region, - sql=sqlQuery, - ) - - return AthenaClient.convert_query_output(cursor) + def run_sql_query(uri, worksheetUri, sqlQuery): + with get_context().db_engine.scoped_session() as session: + environment = EnvironmentService.get_environment_by_uri(session, uri) + worksheet = WorksheetService._get_worksheet_by_uri(session, worksheetUri) + + env_group = EnvironmentService.get_environment_group( + session, worksheet.SamlAdminGroupName, environment.environmentUri + ) + + cursor = AthenaClient.run_athena_query( + aws_account_id=environment.AwsAccountId, + env_group=env_group, + s3_staging_dir=f's3://{environment.EnvironmentDefaultBucketName}/athenaqueries/{env_group.environmentAthenaWorkGroup}/', + region=environment.region, + sql=sqlQuery, + ) + + return AthenaClient.convert_query_output(cursor) From 692d223a5c54d88da535b2490ad538af2f0958c9 Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Thu, 14 Nov 2024 12:13:32 -0500 Subject: [PATCH 05/44] **WITH FIXES** Separating Out Access Logging (#1695) - Refactoring - Move access logging to a separate environment logging bucket (rather than env default bucket used for athena queries and profiling job artifacts) - Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .checkov.baseline | 52 ++---------------- backend/dataall/core/environment/api/types.py | 1 + .../core/environment/cdk/environment_stack.py | 32 ++++++++--- .../core/environment/db/environment_models.py | 1 + .../services/environment_service.py | 7 +++ .../modules/s3_datasets/cdk/dataset_stack.py | 2 +- .../04d92886fabe_add_consumption_roles.py | 10 +++- ...8e35e39e1e_invite_env_groups_as_readers.py | 14 ++++- .../49c6b18ed814_add_env_logs_bucket.py | 55 +++++++++++++++++++ ...1ac7a85a2_drop_remove_group_permissions.py | 14 ++++- tests/modules/conftest.py | 1 + .../test_environment_stack_with_dataset.py | 19 +++++++ 12 files changed, 145 insertions(+), 63 deletions(-) create mode 100644 backend/migrations/versions/49c6b18ed814_add_env_logs_bucket.py diff --git a/.checkov.baseline b/.checkov.baseline index e0bbfce12..19875f2e3 100644 --- a/.checkov.baseline +++ b/.checkov.baseline @@ -417,7 +417,7 @@ ] }, { - "file": "/cdk.out/asset.3045cb6b4340be1e173df6dcf6248d565aa849ceda3e2cf2c2f221ccee4bc1d6/pivotRole.yaml", + "file": "/cdk.out/asset.05d71d8b69cd4483d3c9db9120b556b718c72f349debbb79d461c74c4964b350/pivotRole.yaml", "findings": [ { "resource": "AWS::IAM::ManagedPolicy.PivotRolePolicy0", @@ -490,12 +490,6 @@ { "file": "/checkov_environment_synth.json", "findings": [ - { - "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy19AC37181", - "check_ids": [ - "CKV_AWS_111" - ] - }, { "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy2E85AF510", "check_ids": [ @@ -508,24 +502,6 @@ "CKV_AWS_111" ] }, - { - "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy5A19E75CA", - "check_ids": [ - "CKV_AWS_109" - ] - }, - { - "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicyCC720210", - "check_ids": [ - "CKV_AWS_109" - ] - }, - { - "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy1A0C96958", - "check_ids": [ - "CKV_AWS_111" - ] - }, { "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy2B12D381A", "check_ids": [ @@ -538,18 +514,6 @@ "CKV_AWS_111" ] }, - { - "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy3E3CBA9E", - "check_ids": [ - "CKV_AWS_109" - ] - }, - { - "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy56D7DC525", - "check_ids": [ - "CKV_AWS_109" - ] - }, { "resource": "AWS::Lambda::Function.CustomCDKBucketDeployment8693BB64968944B69AAFB0CC9EB8756C81C01536", "check_ids": [ @@ -563,16 +527,14 @@ "resource": "AWS::Lambda::Function.GlueDatabaseLFCustomResourceHandler7FAF0F82", "check_ids": [ "CKV_AWS_115", - "CKV_AWS_117", - "CKV_AWS_173" + "CKV_AWS_117" ] }, { "resource": "AWS::Lambda::Function.LakeformationDefaultSettingsHandler2CBEDB06", "check_ids": [ "CKV_AWS_115", - "CKV_AWS_117", - "CKV_AWS_173" + "CKV_AWS_117" ] }, { @@ -580,8 +542,7 @@ "check_ids": [ "CKV_AWS_115", "CKV_AWS_116", - "CKV_AWS_117", - "CKV_AWS_173" + "CKV_AWS_117" ] }, { @@ -589,12 +550,11 @@ "check_ids": [ "CKV_AWS_115", "CKV_AWS_116", - "CKV_AWS_117", - "CKV_AWS_173" + "CKV_AWS_117" ] }, { - "resource": "AWS::S3::Bucket.EnvironmentDefaultBucket78C3A8B0", + "resource": "AWS::S3::Bucket.EnvironmentDefaultLogBucket7F0EFAB3", "check_ids": [ "CKV_AWS_18" ] diff --git a/backend/dataall/core/environment/api/types.py b/backend/dataall/core/environment/api/types.py index 229593dd8..a95d943c4 100644 --- a/backend/dataall/core/environment/api/types.py +++ b/backend/dataall/core/environment/api/types.py @@ -100,6 +100,7 @@ gql.Field('subscriptionsConsumersTopicName', type=gql.String), gql.Field('subscriptionsProducersTopicName', type=gql.String), gql.Field('EnvironmentDefaultBucketName', type=gql.String), + gql.Field('EnvironmentLogsBucketName', type=gql.String), gql.Field('EnvironmentDefaultAthenaWorkGroup', type=gql.String), gql.Field( name='networks', diff --git a/backend/dataall/core/environment/cdk/environment_stack.py b/backend/dataall/core/environment/cdk/environment_stack.py index ca4a27190..d13d0acc1 100644 --- a/backend/dataall/core/environment/cdk/environment_stack.py +++ b/backend/dataall/core/environment/cdk/environment_stack.py @@ -165,30 +165,44 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): f'arn:aws:iam::{self._environment.AwsAccountId}:role/{self.pivot_role_name}', ) - # Environment S3 Bucket - default_environment_bucket = s3.Bucket( + # Environment Logging S3 Bucket + default_environment_log_bucket = s3.Bucket( self, - 'EnvironmentDefaultBucket', - bucket_name=self._environment.EnvironmentDefaultBucketName, + 'EnvironmentDefaultLogBucket', + bucket_name=self._environment.EnvironmentLogsBucketName, encryption=s3.BucketEncryption.S3_MANAGED, removal_policy=RemovalPolicy.RETAIN, block_public_access=s3.BlockPublicAccess.BLOCK_ALL, versioned=True, enforce_ssl=True, ) - default_environment_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN) - self.default_environment_bucket = default_environment_bucket - - default_environment_bucket.add_to_resource_policy( + default_environment_log_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN) + default_environment_log_bucket.add_to_resource_policy( iam.PolicyStatement( sid='AWSLogDeliveryWrite', effect=iam.Effect.ALLOW, principals=[iam.ServicePrincipal('logging.s3.amazonaws.com')], actions=['s3:PutObject', 's3:PutObjectAcl'], - resources=[f'{default_environment_bucket.bucket_arn}/*'], + resources=[f'{default_environment_log_bucket.bucket_arn}/*'], ) ) + # Environment S3 Bucket + default_environment_bucket = s3.Bucket( + self, + 'EnvironmentDefaultBucket', + bucket_name=self._environment.EnvironmentDefaultBucketName, + encryption=s3.BucketEncryption.S3_MANAGED, + removal_policy=RemovalPolicy.RETAIN, + block_public_access=s3.BlockPublicAccess.BLOCK_ALL, + versioned=True, + enforce_ssl=True, + server_access_logs_bucket=default_environment_log_bucket, + server_access_logs_prefix=f'access_logs/{self._environment.EnvironmentDefaultBucketName}/', + ) + default_environment_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN) + self.default_environment_bucket = default_environment_bucket + default_environment_bucket.add_lifecycle_rule( abort_incomplete_multipart_upload_after=Duration.days(7), noncurrent_version_transitions=[ diff --git a/backend/dataall/core/environment/db/environment_models.py b/backend/dataall/core/environment/db/environment_models.py index e56135e97..c4890850a 100644 --- a/backend/dataall/core/environment/db/environment_models.py +++ b/backend/dataall/core/environment/db/environment_models.py @@ -25,6 +25,7 @@ class Environment(Resource, Base): EnvironmentDefaultIAMRoleImported = Column(Boolean, default=False) EnvironmentDefaultIAMRoleArn = Column(String, nullable=False) EnvironmentDefaultBucketName = Column(String) + EnvironmentLogsBucketName = Column(String) EnvironmentDefaultAthenaWorkGroup = Column(String) roleCreated = Column(Boolean, nullable=False, default=False) diff --git a/backend/dataall/core/environment/services/environment_service.py b/backend/dataall/core/environment/services/environment_service.py index 795e3a63b..9e2b5d6a6 100644 --- a/backend/dataall/core/environment/services/environment_service.py +++ b/backend/dataall/core/environment/services/environment_service.py @@ -255,6 +255,13 @@ def create_environment(uri, data=None): resource_prefix=env.resourcePrefix, ).build_compliant_name() + env.EnvironmentLogsBucketName = NamingConventionService( + target_uri=env.environmentUri, + target_label='env-access-logs', + pattern=NamingConventionPattern.S3, + resource_prefix=env.resourcePrefix, + ).build_compliant_name() + env.EnvironmentDefaultAthenaWorkGroup = NamingConventionService( target_uri=env.environmentUri, target_label=env.label, diff --git a/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py b/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py index 81a59c4d5..ffbff7369 100644 --- a/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py +++ b/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py @@ -193,7 +193,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): server_access_logs_bucket=s3.Bucket.from_bucket_name( self, 'EnvAccessLogsBucket', - f'{env.EnvironmentDefaultBucketName}', + f'{env.EnvironmentLogsBucketName}', ), server_access_logs_prefix=f'access_logs/{dataset.S3BucketName}/', enforce_ssl=True, diff --git a/backend/migrations/versions/04d92886fabe_add_consumption_roles.py b/backend/migrations/versions/04d92886fabe_add_consumption_roles.py index e15a34972..b7bde887f 100644 --- a/backend/migrations/versions/04d92886fabe_add_consumption_roles.py +++ b/backend/migrations/versions/04d92886fabe_add_consumption_roles.py @@ -12,9 +12,8 @@ from sqlalchemy.dialects import postgresql from sqlalchemy.ext.declarative import declarative_base -from dataall.core.environment.db.environment_models import Environment from dataall.core.environment.services.environment_service import EnvironmentService -from dataall.base.db import utils +from dataall.base.db import utils, Resource from datetime import datetime from dataall.core.permissions.services.permission_service import PermissionService @@ -33,6 +32,11 @@ Base = declarative_base() +class Environment(Resource, Base): + __tablename__ = 'environment' + environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) + + class EnvironmentGroup(Base): __tablename__ = 'environment_group_permission' groupUri = Column(String, primary_key=True) @@ -123,7 +127,7 @@ def upgrade(): bind = op.get_bind() session = orm.Session(bind=bind) print('Back-filling consumer role permissions for environments...') - envs = EnvironmentService.list_all_active_environments(session=session) + envs = session.query(Environment).filter(Environment.deleted.is_(None)).all() for env in envs: groups = EnvironmentService.get_all_environment_groups(session=session, uri=env.environmentUri, filter=None) for group in groups: diff --git a/backend/migrations/versions/328e35e39e1e_invite_env_groups_as_readers.py b/backend/migrations/versions/328e35e39e1e_invite_env_groups_as_readers.py index 5f243a8ff..43655246e 100644 --- a/backend/migrations/versions/328e35e39e1e_invite_env_groups_as_readers.py +++ b/backend/migrations/versions/328e35e39e1e_invite_env_groups_as_readers.py @@ -7,12 +7,14 @@ """ from alembic import op -from sqlalchemy import orm -from dataall.core.environment.db.environment_models import EnvironmentGroup, Environment +from sqlalchemy import orm, Column, String +from sqlalchemy.ext.declarative import declarative_base +from dataall.core.environment.db.environment_models import EnvironmentGroup from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.core.permissions.services.organization_permissions import GET_ORGANIZATION from dataall.core.organizations.db import organization_models as models from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.base.db import utils, Resource # revision identifiers, used by Alembic. revision = '328e35e39e1e' @@ -20,6 +22,14 @@ branch_labels = None depends_on = None +Base = declarative_base() + + +class Environment(Resource, Base): + __tablename__ = 'environment' + environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) + organizationUri = Column(String, nullable=False) + def get_session(): bind = op.get_bind() diff --git a/backend/migrations/versions/49c6b18ed814_add_env_logs_bucket.py b/backend/migrations/versions/49c6b18ed814_add_env_logs_bucket.py new file mode 100644 index 000000000..f521d3337 --- /dev/null +++ b/backend/migrations/versions/49c6b18ed814_add_env_logs_bucket.py @@ -0,0 +1,55 @@ +"""add_env_logs_bucket + +Revision ID: 49c6b18ed814 +Revises: b21f86882012 +Create Date: 2024-11-13 19:16:18.030415 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy import orm, Column, String +from sqlalchemy.ext.declarative import declarative_base + +from dataall.base.db import Resource, utils +from dataall.base.utils.naming_convention import ( + NamingConventionService, + NamingConventionPattern, +) + +# revision identifiers, used by Alembic. +revision = '49c6b18ed814' +down_revision = '797dd1012be1' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Environment(Resource, Base): + __tablename__ = 'environment' + environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) + resourcePrefix = Column(String, nullable=False, default='dataall') + EnvironmentLogsBucketName = Column(String, nullable=True) + + +def upgrade(): + op.add_column('environment', sa.Column('EnvironmentLogsBucketName', sa.String(), nullable=True)) + bind = op.get_bind() + session = orm.Session(bind=bind) + environments = session.query(Environment).all() + for env in environments: + env.EnvironmentLogsBucketName = NamingConventionService( + target_uri=env.environmentUri, + target_label='env-access-logs', + pattern=NamingConventionPattern.S3, + resource_prefix=env.resourcePrefix, + ).build_compliant_name() + session.commit() + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('environment', 'EnvironmentLogsBucketName') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/a991ac7a85a2_drop_remove_group_permissions.py b/backend/migrations/versions/a991ac7a85a2_drop_remove_group_permissions.py index 64ded7dd5..424e22f8f 100644 --- a/backend/migrations/versions/a991ac7a85a2_drop_remove_group_permissions.py +++ b/backend/migrations/versions/a991ac7a85a2_drop_remove_group_permissions.py @@ -10,9 +10,11 @@ from dataall.core.permissions.services.environment_permissions import REMOVE_ENVIRONMENT_GROUP from dataall.core.permissions.db.resource_policy.resource_policy_models import ResourcePolicy from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.base.db import utils, Resource + from alembic import op -from sqlalchemy import orm -from dataall.core.environment.db.environment_models import Environment +from sqlalchemy import orm, Column, String +from sqlalchemy.ext.declarative import declarative_base # revision identifiers, used by Alembic. revision = 'a991ac7a85a2' @@ -21,6 +23,14 @@ depends_on = None +Base = declarative_base() + + +class Environment(Resource, Base): + __tablename__ = 'environment' + environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) + + def get_session(): bind = op.get_bind() session = orm.Session(bind=bind) diff --git a/tests/modules/conftest.py b/tests/modules/conftest.py index 499c7bd26..bc65a489e 100644 --- a/tests/modules/conftest.py +++ b/tests/modules/conftest.py @@ -89,6 +89,7 @@ def factory(org, envname, owner, group, account, region='eu-west-1', desc='test' EnvironmentDefaultIAMRoleName=role, EnvironmentDefaultIAMRoleArn=f'arn:aws:iam::{account}:role/{role}', EnvironmentDefaultBucketName='defaultbucketname1234567789', + EnvironmentLogsBucketName='logsbucketname1234567789', CDKRoleArn=f'arn:aws::{account}:role/EnvRole', EnvironmentDefaultAthenaWorkGroup='DefaultWorkGroup', ) diff --git a/tests/modules/s3_datasets/test_environment_stack_with_dataset.py b/tests/modules/s3_datasets/test_environment_stack_with_dataset.py index c27d335b9..852233008 100644 --- a/tests/modules/s3_datasets/test_environment_stack_with_dataset.py +++ b/tests/modules/s3_datasets/test_environment_stack_with_dataset.py @@ -122,6 +122,25 @@ def test_resources_created(env_fixture, org_fixture, mocker): }, count=1, ) + + # Assert that we have created: + template.resource_properties_count_is( + type='AWS::S3::Bucket', + props={ + 'BucketName': env_fixture.EnvironmentLogsBucketName, + 'BucketEncryption': { + 'ServerSideEncryptionConfiguration': [{'ServerSideEncryptionByDefault': {'SSEAlgorithm': 'AES256'}}] + }, + 'PublicAccessBlockConfiguration': { + 'BlockPublicAcls': True, + 'BlockPublicPolicy': True, + 'IgnorePublicAcls': True, + 'RestrictPublicBuckets': True, + }, + }, + count=1, + ) + template.resource_properties_count_is( type='AWS::Lambda::Function', props={ From 6206d98a30582c10cf5eca8dd5dd66750c35165d Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Fri, 15 Nov 2024 05:58:21 -0500 Subject: [PATCH 06/44] return EnvironmentLogsBucketName from integraiton test getEnv query (#1697) - Bugfix - Fix integration test teardown of environment bug on cleaning up EnvironmentLogsBucketName - #1695 Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/core/environment/cdk/environment_stack.py | 5 ++++- tests_new/integration_tests/core/environment/queries.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/dataall/core/environment/cdk/environment_stack.py b/backend/dataall/core/environment/cdk/environment_stack.py index d13d0acc1..dc01971b6 100644 --- a/backend/dataall/core/environment/cdk/environment_stack.py +++ b/backend/dataall/core/environment/cdk/environment_stack.py @@ -601,7 +601,10 @@ def create_integration_tests_role(self): 's3:DeleteObject', ], effect=iam.Effect.ALLOW, - resources=['arn:aws:s3:::dataalltesting*'], + resources=[ + 'arn:aws:s3:::dataalltesting*', + 'arn:aws:s3:::dataall-env-access-logs*', + ], ) ) self.test_role.add_to_policy( diff --git a/tests_new/integration_tests/core/environment/queries.py b/tests_new/integration_tests/core/environment/queries.py index a965d1588..0d0de44c6 100644 --- a/tests_new/integration_tests/core/environment/queries.py +++ b/tests_new/integration_tests/core/environment/queries.py @@ -11,6 +11,7 @@ tags SamlGroupName EnvironmentDefaultBucketName +EnvironmentLogsBucketName EnvironmentDefaultIAMRoleArn EnvironmentDefaultIAMRoleName EnvironmentDefaultIAMRoleImported From 375f44189f85577d1cbdd357fb4eef290eb91efa Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Fri, 15 Nov 2024 05:59:11 -0500 Subject: [PATCH 07/44] add explicit token duration config for both JWTs (#1698) - Enhancement - Add explicit token duration (60 min) over default 60 min - #1682 Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- deploy/stacks/cognito.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deploy/stacks/cognito.py b/deploy/stacks/cognito.py index 661dc87a0..3a28225f2 100644 --- a/deploy/stacks/cognito.py +++ b/deploy/stacks/cognito.py @@ -99,6 +99,7 @@ def __init__( domain_prefix=f"{resource_prefix.replace('-', '')}{envname}{self.region.replace('-', '')}{self.account}" ), ) + jwt_token_duration = 180 if with_approval_tests else 60 self.client = cognito.UserPoolClient( self, f'AppClient-{envname}', @@ -106,6 +107,8 @@ def __init__( auth_flows=AuthFlow(user_password=with_approval_tests, user_srp=True, custom=True), prevent_user_existence_errors=True, refresh_token_validity=Duration.minutes(cognito_user_session_timeout_inmins), + id_token_validity=Duration.minutes(jwt_token_duration), + access_token_validity=Duration.minutes(jwt_token_duration), ) if enable_cw_rum: From d766bd48eb16393a9431da4b83d9b813ca23e524 Mon Sep 17 00:00:00 2001 From: Petros Kalos Date: Thu, 26 Sep 2024 14:20:23 +0300 Subject: [PATCH 08/44] **INSERTED** migrate local server to FastAPI (#1577) - Refactoring * Migrate local server Flask to FastAPI * Upgrade and align versions of important libraries (aws-cdk-lib/boto3) * Simplify requirements files (removed explicitly defined deps that were not required or were implicitly pulled by other packages). Penting succesful run on dev pipeline Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/base/cdkproxy/requirements.txt | 13 +-- backend/dataall/base/context.py | 2 +- .../data_pipeline_blueprint/requirements.txt | 3 +- backend/local_graphql_server.py | 87 ++++++++----------- backend/requirements.txt | 8 +- deploy/requirements.txt | 8 +- deploy/stacks/cdk_nag_exclusions.py | 4 + deploy/stacks/pipeline.py | 6 +- docker-compose.yaml | 2 +- tests/client.py | 35 ++++---- tests/conftest.py | 4 +- tests/requirements.txt | 4 +- 12 files changed, 82 insertions(+), 94 deletions(-) diff --git a/backend/dataall/base/cdkproxy/requirements.txt b/backend/dataall/base/cdkproxy/requirements.txt index f55e149a6..894243316 100644 --- a/backend/dataall/base/cdkproxy/requirements.txt +++ b/backend/dataall/base/cdkproxy/requirements.txt @@ -1,17 +1,12 @@ -aws-cdk-lib==2.99.0 -boto3==1.28.23 -boto3-stubs==1.28.23 -botocore==1.31.23 +aws-cdk-lib==2.160.0 +boto3==1.35.26 +boto3-stubs==1.35.26 cdk-nag==2.7.2 -constructs==10.0.73 -starlette==0.36.3 -fastapi == 0.109.2 -Flask==2.3.2 +fastapi == 0.115.0 PyYAML==6.0 requests==2.32.2 tabulate==0.8.9 uvicorn==0.15.0 werkzeug==3.0.3 -constructs>=10.0.0,<11.0.0 git-remote-codecommit==1.16 aws-ddk-core==1.3.0 \ No newline at end of file diff --git a/backend/dataall/base/context.py b/backend/dataall/base/context.py index b271b3e18..3df892e18 100644 --- a/backend/dataall/base/context.py +++ b/backend/dataall/base/context.py @@ -4,7 +4,7 @@ that in the request scope The class uses Flask's approach to handle request: ThreadLocal -That approach should work fine for AWS Lambdas and local server that uses Flask app +That approach should work fine for AWS Lambdas and local server that uses FastApi app """ from dataclasses import dataclass diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt index 4067e0fd9..7351d1274 100644 --- a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt @@ -1,3 +1,2 @@ -aws-cdk-lib==2.103.1 -constructs>=10.0.0,<11.0.0 +aws-cdk-lib==2.160.0 aws-ddk-core==1.3.0 diff --git a/backend/local_graphql_server.py b/backend/local_graphql_server.py index 1ea96a732..40c765896 100644 --- a/backend/local_graphql_server.py +++ b/backend/local_graphql_server.py @@ -1,24 +1,23 @@ +import logging import os import jwt from ariadne import graphql_sync from ariadne.constants import PLAYGROUND_HTML -from flask import Flask, request, jsonify -from flask_cors import CORS +from fastapi import FastAPI, Request from graphql import parse +from starlette.middleware.cors import CORSMiddleware +from starlette.responses import JSONResponse, HTMLResponse from dataall.base.api import get_executable_schema -from dataall.core.tasks.service_handlers import Worker -from dataall.core.permissions.services.tenant_permissions import TENANT_ALL -from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService - -from dataall.base.db import get_engine, Base -from dataall.base.searchproxy import connect, run_query -from dataall.base.loader import load_modules, ImportMode from dataall.base.config import config from dataall.base.context import set_context, dispose_context, RequestContext - -import logging +from dataall.base.db import get_engine, Base +from dataall.base.loader import load_modules, ImportMode +from dataall.base.searchproxy import connect, run_query +from dataall.core.permissions.services.tenant_permissions import TENANT_ALL +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService +from dataall.core.tasks.service_handlers import Worker logger = logging.getLogger('graphql') logger.propagate = False @@ -45,10 +44,14 @@ def __init__(self, **kwargs): schema = get_executable_schema() -# app = GraphQL(schema, debug=True) - -app = Flask(__name__) -CORS(app) +app = FastAPI(debug=True) +app.add_middleware( + CORSMiddleware, + allow_origins=['*'], + allow_credentials=True, + allow_methods=['*'], + allow_headers=['*'], +) def request_context(headers, mock=False): @@ -87,67 +90,61 @@ def request_context(headers, mock=False): return context.__dict__ -@app.route('/graphql', methods=['OPTIONS']) +@app.options('/graphql') def opt(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return '

Hello

', 200 + return HTMLResponse('

Hello

') -@app.route('/esproxy', methods=['OPTIONS']) +@app.options('/esproxy') def esproxyopt(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return '

Hello

', 200 + return HTMLResponse('

Hello

') -@app.route('/graphql', methods=['GET']) +@app.get('/graphql') def graphql_playground(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return PLAYGROUND_HTML, 200 + return HTMLResponse(PLAYGROUND_HTML) -@app.route('/esproxy', methods=['POST']) -def esproxy(): - body = request.data.decode('utf-8') - print(body) +@app.post('/esproxy') +async def esproxy(request: Request): + body = (await request.body()).decode('utf-8') + logger.info('body %s', body) return run_query(es=es, index='dataall-index', body=body) -@app.route('/graphql', methods=['POST']) -def graphql_server(): - print('.............................') - # GraphQL queries are always sent as POST - logger.debug(request.data) - data = request.get_json() - print('*** Request ***', request.data) - logger.info(data) +@app.post('/graphql') +async def graphql_server(request: Request): + logger.info('.............................') + data = await request.json() + logger.info('Request payload %s', data) # Extract the GraphQL query string from the 'query' key in the data dictionary query_string = data.get('query') if not query_string: - return jsonify({'error': 'GraphQL query not provided'}), 400 + return JSONResponse({'error': 'GraphQL query not provided'}, 400) try: query = parse(query_string) except Exception as e: - return jsonify({'error': str(e)}), 400 + return JSONResponse({'error': str(e)}, 400) - print('***** Printing Query ****** \n\n') - print(query) + logger.info('Request query %s', query.to_dict()) context = request_context(request.headers, mock=True) logger.debug(context) - # Note: Passing the request to the context is optional. - # In Flask, the current request is always accessible as flask.request success, result = graphql_sync( schema, data, @@ -157,14 +154,4 @@ def graphql_server(): dispose_context() status_code = 200 if success else 400 - return jsonify(result), status_code - - -if __name__ == '__main__': - logger.info('Starting dataall flask local application') - app.run( - debug=True, # nosec - threaded=False, - host='0.0.0.0', - port=5000, - ) + return JSONResponse(result, status_code) diff --git a/backend/requirements.txt b/backend/requirements.txt index 5fccee041..d50a72380 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,10 +1,7 @@ ariadne==0.17.0 aws-xray-sdk==2.4.3 -boto3==1.28.23 -botocore==1.31.23 -fastapi == 0.109.2 -Flask==3.0.3 -flask-cors==4.0.1 +boto3==1.35.26 +fastapi == 0.115.0 nanoid==2.0.0 opensearch-py==1.0.0 PyAthena==2.3.0 @@ -14,5 +11,4 @@ PyYAML==6.0 requests==2.32.2 requests_aws4auth==1.1.1 sqlalchemy==1.3.24 -starlette==0.36.3 alembic==1.13.1 \ No newline at end of file diff --git a/deploy/requirements.txt b/deploy/requirements.txt index a67fb2621..11626e22c 100644 --- a/deploy/requirements.txt +++ b/deploy/requirements.txt @@ -1,8 +1,6 @@ -aws-cdk-lib==2.115.0 -boto3-stubs==1.20.20 -boto3==1.28.23 -botocore==1.31.23 +aws-cdk-lib==2.160.0 +boto3==1.35.26 +boto3-stubs==1.35.26 cdk-nag==2.7.2 typeguard==4.2.1 cdk-klayers==0.3.0 -constructs>=10.0.0,<11.0.0 diff --git a/deploy/stacks/cdk_nag_exclusions.py b/deploy/stacks/cdk_nag_exclusions.py index a9948088b..e1c80e0d2 100644 --- a/deploy/stacks/cdk_nag_exclusions.py +++ b/deploy/stacks/cdk_nag_exclusions.py @@ -28,6 +28,10 @@ 'id': 'AwsSolutions-CB3', 'reason': 'Access to docker daemon is required to build docker images', }, + { + 'id': 'AwsSolutions-SMG4', + 'reason': 'Database is used for test purposes', + }, ] BACKEND_STACK_CDK_NAG_EXCLUSIONS = [ diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index 8664ae269..c68879750 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -792,7 +792,7 @@ def set_cloudfront_stage(self, target_env): 'echo "credential_source = EcsContainer" >> ~/.aws/config', 'aws sts get-caller-identity --profile buildprofile', 'export AWS_PROFILE=buildprofile', - 'pip install boto3==1.34.35', + 'pip install boto3==1.35.26', 'pip install beautifulsoup4', 'python deploy/configs/frontend_config.py', 'export AWS_DEFAULT_REGION=us-east-1', @@ -864,7 +864,7 @@ def cw_rum_config_action(self, target_env): 'aws sts get-caller-identity --profile buildprofile', 'export AWS_PROFILE=buildprofile', 'pip install --upgrade pip', - 'pip install boto3==1.34.35', + 'pip install boto3==1.35.26', 'python deploy/configs/rum_config.py', ], role=self.expanded_codebuild_role.without_policy_updates(), @@ -931,7 +931,7 @@ def set_albfront_stage(self, target_env, repository_name): 'echo "credential_source = EcsContainer" >> ~/.aws/config', 'aws sts get-caller-identity --profile buildprofile', 'export AWS_PROFILE=buildprofile', - 'pip install boto3==1.34.35', + 'pip install boto3==1.35.26', 'pip install beautifulsoup4', 'python deploy/configs/frontend_config.py', 'unset AWS_PROFILE', diff --git a/docker-compose.yaml b/docker-compose.yaml index 6b10cfeac..2e9597772 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -37,7 +37,7 @@ services: dockerfile: docker/dev/Dockerfile args: CONTAINER_UID: ${UID} - entrypoint: /bin/bash -c "../build/wait-for-it.sh elasticsearch:9200 -t 30 && python3.9 local_graphql_server.py" + entrypoint: /bin/bash -c "../build/wait-for-it.sh elasticsearch:9200 -t 30 && uvicorn local_graphql_server:app --host 0.0.0.0 --port 5000 --reload" expose: - 5000 ports: diff --git a/tests/client.py b/tests/client.py index 5f5346f0a..908709acf 100644 --- a/tests/client.py +++ b/tests/client.py @@ -1,12 +1,16 @@ -import typing import json +import typing + from ariadne import graphql_sync from ariadne.constants import PLAYGROUND_HTML -from flask import Flask, request, jsonify, Response +from fastapi import FastAPI from munch import DefaultMunch +from starlette.requests import Request +from starlette.responses import JSONResponse, HTMLResponse + from dataall.base.api import get_executable_schema -from dataall.base.context import set_context, dispose_context, RequestContext from dataall.base.config import config +from dataall.base.context import set_context, dispose_context, RequestContext config.set_property('cdk_proxy_url', 'mock_url') @@ -22,40 +26,41 @@ def query( groups: typing.List[str] = ['-'], **variables, ): - response: Response = self.client.post( + if not isinstance(username, str): + username = username.username + response = self.client.post( '/graphql', json={'query': f""" {query} """, 'variables': variables}, headers={'groups': json.dumps(groups), 'username': username}, ) - return DefaultMunch.fromDict(response.get_json()) + return DefaultMunch.fromDict(json.loads(response.text)) def create_app(db): - app = Flask('tests') + app = FastAPI(debug=True) schema = get_executable_schema() - @app.route('/', methods=['OPTIONS']) + @app.options('/') def opt(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return '

Hello

', 200 + return HTMLResponse('

Hello

') - @app.route('/graphql', methods=['GET']) + @app.get('/graphql') def graphql_playground(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return PLAYGROUND_HTML, 200 + return HTMLResponse(PLAYGROUND_HTML) - @app.route('/graphql', methods=['POST']) - def graphql_server(): + @app.post('/graphql') + async def graphql_server(request: Request): # GraphQL queries are always sent as POST # Note: Passing the request to the context is optional. - # In Flask, the current request is always accessible as flask.request - data = request.get_json() + data = await request.json() username = request.headers.get('Username', 'anonym') user_id = request.headers.get('Username', 'anonym_id') @@ -72,6 +77,6 @@ def graphql_server(): dispose_context() status_code = 200 if success else 400 - return jsonify(result), status_code + return JSONResponse(result, status_code) return app diff --git a/tests/conftest.py b/tests/conftest.py index a8e1951b5..7a4b68df0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,8 @@ from unittest.mock import MagicMock import pytest +from starlette.testclient import TestClient + from dataall.base.db import get_engine, create_schema_and_tables, Engine from dataall.base.loader import load_modules, ImportMode, list_loaded_modules from glob import glob @@ -72,7 +74,7 @@ def app(db): @pytest.fixture(scope='module') def client(app) -> ClientWrapper: - with app.test_client() as client: + with TestClient(app) as client: yield ClientWrapper(client) diff --git a/tests/requirements.txt b/tests/requirements.txt index c19b64d12..85e3fb149 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -3,4 +3,6 @@ pytest==7.3.1 pytest-cov==3.0.0 pytest-mock==3.6.1 pytest-dependency==0.5.1 -werkzeug==3.0.3 \ No newline at end of file +httpx==0.27.2 +werkzeug==3.0.3 +assertpy==1.1.0 \ No newline at end of file From 6913fe3782f9132c40909ccefb3beb273c91bdef Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Fri, 15 Nov 2024 09:02:28 -0500 Subject: [PATCH 09/44] update fastapi dependency (#1699) ### Feature or Bugfix - Bugfix ### Detail - Update fastapi dependency ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/base/cdkproxy/requirements.txt | 2 +- backend/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataall/base/cdkproxy/requirements.txt b/backend/dataall/base/cdkproxy/requirements.txt index 894243316..6be9aa706 100644 --- a/backend/dataall/base/cdkproxy/requirements.txt +++ b/backend/dataall/base/cdkproxy/requirements.txt @@ -2,7 +2,7 @@ aws-cdk-lib==2.160.0 boto3==1.35.26 boto3-stubs==1.35.26 cdk-nag==2.7.2 -fastapi == 0.115.0 +fastapi == 0.115.5 PyYAML==6.0 requests==2.32.2 tabulate==0.8.9 diff --git a/backend/requirements.txt b/backend/requirements.txt index d50a72380..a744f2fc1 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,7 +1,7 @@ ariadne==0.17.0 aws-xray-sdk==2.4.3 boto3==1.35.26 -fastapi == 0.115.0 +fastapi == 0.115.5 nanoid==2.0.0 opensearch-py==1.0.0 PyAthena==2.3.0 From 75d0fd1b00fd448e79077c911745820acef27f0e Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Mon, 18 Nov 2024 17:28:29 +0100 Subject: [PATCH 10/44] **WITH FIXES** Upgrade "cross-spawn" to "7.0.5" (#1701) - Dependency - Upgrade cross spawn to avoid https://github.com/advisories/GHSA-3xgq-45jj-v275 - https://github.com/advisories/GHSA-3xgq-45jj-v275 Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- frontend/package-lock.json | 345 ++++--------------------------------- frontend/package.json | 25 ++- frontend/yarn.lock | 70 +------- 3 files changed, 62 insertions(+), 378 deletions(-) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index d40dba0b8..f780a4c18 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -17159,18 +17159,30 @@ } }, "node_modules/cross-spawn": { - "version": "6.0.5", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz", - "integrity": "sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==", + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.5.tgz", + "integrity": "sha512-ZVJrKKYunU38/76t0RMOulHOnUcbU9GbpWKAOZ0mhjr7CX6FVrH+4FrAapSOekrgFQ3f/8gwMEuIft0aKq6Hug==", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/cross-spawn/node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", "dependencies": { - "nice-try": "^1.0.4", - "path-key": "^2.0.1", - "semver": "^5.5.0", - "shebang-command": "^1.2.0", - "which": "^1.2.9" + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" }, "engines": { - "node": ">=4.8" + "node": ">= 8" } }, "node_modules/crypto-js": { @@ -18232,65 +18244,6 @@ "node": ">=8.0.0" } }, - "node_modules/env-cmd/node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", - "dev": true, - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/env-cmd/node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/env-cmd/node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/env-cmd/node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/env-cmd/node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, "node_modules/envinfo": { "version": "7.13.0", "resolved": "https://registry.npmjs.org/envinfo/-/envinfo-7.13.0.tgz", @@ -19084,19 +19037,6 @@ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" }, - "node_modules/eslint/node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, "node_modules/eslint/node_modules/globals": { "version": "13.24.0", "resolved": "https://registry.npmjs.org/globals/-/globals-13.24.0.tgz", @@ -19119,33 +19059,6 @@ "node": ">=8" } }, - "node_modules/eslint/node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "engines": { - "node": ">=8" - } - }, - "node_modules/eslint/node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/eslint/node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "engines": { - "node": ">=8" - } - }, "node_modules/eslint/node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", @@ -19168,20 +19081,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/eslint/node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, "node_modules/espree": { "version": "9.6.1", "resolved": "https://registry.npmjs.org/espree/-/espree-9.6.1.tgz", @@ -19314,60 +19213,6 @@ "url": "https://github.com/sindresorhus/execa?sponsor=1" } }, - "node_modules/execa/node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/execa/node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "engines": { - "node": ">=8" - } - }, - "node_modules/execa/node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/execa/node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "engines": { - "node": ">=8" - } - }, - "node_modules/execa/node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, "node_modules/exit": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/exit/-/exit-0.1.2.tgz", @@ -19803,46 +19648,6 @@ "url": "https://github.com/sponsors/isaacs" } }, - "node_modules/foreground-child/node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/foreground-child/node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "engines": { - "node": ">=8" - } - }, - "node_modules/foreground-child/node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/foreground-child/node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "engines": { - "node": ">=8" - } - }, "node_modules/foreground-child/node_modules/signal-exit": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", @@ -19854,20 +19659,6 @@ "url": "https://github.com/sponsors/isaacs" } }, - "node_modules/foreground-child/node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, "node_modules/fork-ts-checker-webpack-plugin": { "version": "6.5.3", "resolved": "https://registry.npmjs.org/fork-ts-checker-webpack-plugin/-/fork-ts-checker-webpack-plugin-6.5.3.tgz", @@ -26977,11 +26768,6 @@ "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" }, - "node_modules/nice-try": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz", - "integrity": "sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==" - }, "node_modules/no-case": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz", @@ -27142,14 +26928,6 @@ "node": ">=8" } }, - "node_modules/npm-run-path/node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "engines": { - "node": ">=8" - } - }, "node_modules/nprogress": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/nprogress/-/nprogress-0.2.0.tgz", @@ -27679,11 +27457,11 @@ } }, "node_modules/path-key": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-2.0.1.tgz", - "integrity": "sha512-fEHGKCSmUSDPv4uoj8AlD+joPlq3peND+HRYyxFz4KPw4z926S/b8rIuFs2FYJg3BwsxJf6A9/3eIdLaYC+9Dw==", + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", "engines": { - "node": ">=4" + "node": ">=8" } }, "node_modules/path-parse": { @@ -29559,19 +29337,6 @@ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" }, - "node_modules/react-dev-utils/node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, "node_modules/react-dev-utils/node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -29588,33 +29353,6 @@ "node": ">= 12.13.0" } }, - "node_modules/react-dev-utils/node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "engines": { - "node": ">=8" - } - }, - "node_modules/react-dev-utils/node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/react-dev-utils/node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "engines": { - "node": ">=8" - } - }, "node_modules/react-dev-utils/node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", @@ -29626,20 +29364,6 @@ "node": ">=8" } }, - "node_modules/react-dev-utils/node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, "node_modules/react-devtools-core": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/react-devtools-core/-/react-devtools-core-5.2.0.tgz", @@ -30641,6 +30365,7 @@ "version": "5.7.2", "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz", "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==", + "peer": true, "bin": { "semver": "bin/semver" } @@ -30846,22 +30571,22 @@ "integrity": "sha512-y0m1JoUZSlPAjXVtPPW70aZWfIL/dSP7AFkRnniLCrK/8MDKog3TySTBmckD+RObVxH0v4Tox67+F14PdED2oQ==" }, "node_modules/shebang-command": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-1.2.0.tgz", - "integrity": "sha512-EV3L1+UQWGor21OmnvojK36mhg+TyIKDh3iFBKBohr5xeXIhNBcx8oWdgkTEEQ+BEFFYdLRuqMfd5L84N1V5Vg==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", "dependencies": { - "shebang-regex": "^1.0.0" + "shebang-regex": "^3.0.0" }, "engines": { - "node": ">=0.10.0" + "node": ">=8" } }, "node_modules/shebang-regex": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-1.0.0.tgz", - "integrity": "sha512-wpoSFAxys6b2a2wHZ1XpDSgD7N9iVjg29Ph9uV/uaP9Ex/KXlkTZTeddxDPSYQpgvzKLGJke2UU0AzoGCjNIvQ==", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", "engines": { - "node": ">=0.10.0" + "node": ">=8" } }, "node_modules/shell-quote": { diff --git a/frontend/package.json b/frontend/package.json index 9afe2219b..6806715d6 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -29,6 +29,7 @@ "@mui/styles": "^5.5.1", "@mui/x-data-grid": "^5.17.26", "@mui/x-date-pickers": "^5.0.0", + "@mui/x-tree-view": "^6.17.0", "@reduxjs/toolkit": "^1.8.0", "@testing-library/jest-dom": "^5.16.2", "@testing-library/react": "^12.1.4", @@ -68,7 +69,7 @@ "simplebar-react": "^2.3.6", "web-vitals": "^2.1.4", "yup": "^0.32.11", - "braces": "3.0.3" + "webpack": "^5.94.0" }, "overrides": { "aws-amplify": { @@ -91,8 +92,15 @@ "ip": "1.1.9", "follow-redirects": "1.15.6", "webpack-dev-middleware": "5.3.4", - "express": "4.19.2", - "ejs": "3.1.10" + "express": "4.20.0", + "ejs": "3.1.10", + "fast-xml-parser": "4.4.1", + "path-to-regexp": "0.1.10", + "body-parser": "^1.20.3", + "send": "0.19.0", + "rollup": "3.29.5", + "http-proxy-middleware": "2.0.7", + "cross-spawn": "7.0.5" }, "resolutions": { "react-redux": "^7.2.6", @@ -105,9 +113,16 @@ "ip": "1.1.9", "follow-redirects": "1.15.6", "webpack-dev-middleware": "5.3.4", - "express": "4.19.2", + "express": "4.20.0", "ejs": "3.1.10", - "ws": "^8.17.1" + "ws": "^8.17.1", + "fast-xml-parser": "4.4.1", + "path-to-regexp": "0.1.10", + "body-parser": "^1.20.3", + "send": "0.19.0", + "rollup": "3.29.5", + "http-proxy-middleware": "2.0.7", + "cross-spawn": "7.0.5" }, "devDependencies": { "env-cmd": "^10.1.0", diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 92c81e4f7..2b4812c78 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -7036,39 +7036,10 @@ cross-fetch@^3.0.4, cross-fetch@^3.1.5: dependencies: node-fetch "^2.6.12" -cross-spawn@^6.0.5: - version "6.0.5" - resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz" - integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ== - dependencies: - nice-try "^1.0.4" - path-key "^2.0.1" - semver "^5.5.0" - shebang-command "^1.2.0" - which "^1.2.9" - -cross-spawn@^7.0.0: - version "7.0.3" - resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz" - integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== - dependencies: - path-key "^3.1.0" - shebang-command "^2.0.0" - which "^2.0.1" - -cross-spawn@^7.0.2: - version "7.0.3" - resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz" - integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== - dependencies: - path-key "^3.1.0" - shebang-command "^2.0.0" - which "^2.0.1" - -cross-spawn@^7.0.3: - version "7.0.3" - resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz" - integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== +cross-spawn@7.0.5: + version "7.0.5" + resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.5.tgz" + integrity sha512-ZVJrKKYunU38/76t0RMOulHOnUcbU9GbpWKAOZ0mhjr7CX6FVrH+4FrAapSOekrgFQ3f/8gwMEuIft0aKq6Hug== dependencies: path-key "^3.1.0" shebang-command "^2.0.0" @@ -11284,11 +11255,6 @@ neo-async@^2.5.0, neo-async@^2.6.2: resolved "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz" integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw== -nice-try@^1.0.4: - version "1.0.5" - resolved "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz" - integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ== - no-case@^3.0.4: version "3.0.4" resolved "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz" @@ -11747,17 +11713,7 @@ path-is-absolute@^1.0.0: resolved "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz" integrity sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg== -path-key@^2.0.1: - version "2.0.1" - resolved "https://registry.npmjs.org/path-key/-/path-key-2.0.1.tgz" - integrity sha512-fEHGKCSmUSDPv4uoj8AlD+joPlq3peND+HRYyxFz4KPw4z926S/b8rIuFs2FYJg3BwsxJf6A9/3eIdLaYC+9Dw== - -path-key@^3.0.0: - version "3.1.1" - resolved "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz" - integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== - -path-key@^3.1.0: +path-key@^3.0.0, path-key@^3.1.0: version "3.1.1" resolved "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz" integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== @@ -13441,7 +13397,7 @@ selfsigned@^2.1.1, selfsigned@^2.4.1: "@types/node-forge" "^1.3.0" node-forge "^1" -semver@^5.5.0, semver@^5.6.0: +semver@^5.6.0: version "5.7.2" resolved "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz" integrity sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g== @@ -13601,13 +13557,6 @@ shallowequal@^1.1.0: resolved "https://registry.npmjs.org/shallowequal/-/shallowequal-1.1.0.tgz" integrity sha512-y0m1JoUZSlPAjXVtPPW70aZWfIL/dSP7AFkRnniLCrK/8MDKog3TySTBmckD+RObVxH0v4Tox67+F14PdED2oQ== -shebang-command@^1.2.0: - version "1.2.0" - resolved "https://registry.npmjs.org/shebang-command/-/shebang-command-1.2.0.tgz" - integrity sha512-EV3L1+UQWGor21OmnvojK36mhg+TyIKDh3iFBKBohr5xeXIhNBcx8oWdgkTEEQ+BEFFYdLRuqMfd5L84N1V5Vg== - dependencies: - shebang-regex "^1.0.0" - shebang-command@^2.0.0: version "2.0.0" resolved "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz" @@ -13615,11 +13564,6 @@ shebang-command@^2.0.0: dependencies: shebang-regex "^3.0.0" -shebang-regex@^1.0.0: - version "1.0.0" - resolved "https://registry.npmjs.org/shebang-regex/-/shebang-regex-1.0.0.tgz" - integrity sha512-wpoSFAxys6b2a2wHZ1XpDSgD7N9iVjg29Ph9uV/uaP9Ex/KXlkTZTeddxDPSYQpgvzKLGJke2UU0AzoGCjNIvQ== - shebang-regex@^3.0.0: version "3.0.0" resolved "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz" @@ -15121,7 +15065,7 @@ which-typed-array@^1.1.13, which-typed-array@^1.1.14, which-typed-array@^1.1.15, gopd "^1.0.1" has-tostringtag "^1.0.2" -which@^1.2.9, which@^1.3.1: +which@^1.3.1: version "1.3.1" resolved "https://registry.npmjs.org/which/-/which-1.3.1.tgz" integrity sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ== From aedaa29523b62cff0ecb4857680efb455bdd09ed Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:23:59 +0100 Subject: [PATCH 11/44] Add MANAGE_SHARES permissions (#1702) - Feature This PR introduces `MANAGE_SHARES` permission to enable data.all admins the ability to enable/disable shares permissions at the application-level. The new permission would get created in the savepermissions Lambda trigger; but by default the permissions would be disabled for all existing groups in the platform. This would cause breaking changes and admins would need to enable this permission manually for each group. To avoid this, this PR includes a migration script that creates the permission and attaches it to existing groups. - [x] Test migration script locally - [x] Test migration script in CICD - [x] Perform share mutations in real AWS deployment (approve share object, submit, add items) with tenant permissions - [x] Perform share mutations in real AWS deployment (approve share object, submit, add items) WITHOUT tenant permissions (See screenshot) ![image](https://github.com/user-attachments/assets/961194a1-4e72-4399-8c20-f6962956ef8d) Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../services/share_item_service.py | 7 ++ .../services/share_object_service.py | 9 +++ .../shares_base/services/share_permissions.py | 7 ++ ...2e1362d4cb_add_tenant_share_permissions.py | 67 +++++++++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 backend/migrations/versions/af2e1362d4cb_add_tenant_share_permissions.py diff --git a/backend/dataall/modules/shares_base/services/share_item_service.py b/backend/dataall/modules/shares_base/services/share_item_service.py index 7237e4f4d..bba21acbc 100644 --- a/backend/dataall/modules/shares_base/services/share_item_service.py +++ b/backend/dataall/modules/shares_base/services/share_item_service.py @@ -1,6 +1,7 @@ import logging from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.tasks.service_handlers import Worker from dataall.base.context import get_context from dataall.core.tasks.db.task_models import Task @@ -26,6 +27,7 @@ REMOVE_ITEM, LIST_ENVIRONMENT_SHARED_WITH_OBJECTS, APPROVE_SHARE_OBJECT, + MANAGE_SHARES, ) from dataall.modules.shares_base.services.share_processor_manager import ShareProcessorManager from dataall.modules.datasets_base.db.dataset_repositories import DatasetBaseRepository @@ -41,6 +43,7 @@ def _get_share_uri(session, uri): return share.shareUri @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(GET_SHARE_OBJECT) def verify_items_share_object(uri, item_uris): context = get_context() @@ -56,6 +59,7 @@ def verify_items_share_object(uri, item_uris): return True @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(APPROVE_SHARE_OBJECT) def reapply_items_share_object(uri, item_uris): context = get_context() @@ -71,6 +75,7 @@ def reapply_items_share_object(uri, item_uris): return True @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(GET_SHARE_OBJECT) def revoke_items_share_object(uri, revoked_uris): context = get_context() @@ -123,6 +128,7 @@ def revoke_items_share_object(uri, revoked_uris): return share @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(ADD_ITEM) def add_shared_item(uri: str, data: dict = None): context = get_context() @@ -155,6 +161,7 @@ def add_shared_item(uri: str, data: dict = None): return share_item @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(REMOVE_ITEM, parent_resource=_get_share_uri) def remove_shared_item(uri: str): with get_context().db_engine.scoped_session() as session: diff --git a/backend/dataall/modules/shares_base/services/share_object_service.py b/backend/dataall/modules/shares_base/services/share_object_service.py index 8e191beae..22ee5eced 100644 --- a/backend/dataall/modules/shares_base/services/share_object_service.py +++ b/backend/dataall/modules/shares_base/services/share_object_service.py @@ -4,6 +4,7 @@ from dataall.core.tasks.service_handlers import Worker from dataall.base.context import get_context from dataall.core.activity.db.activity_models import Activity +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.environment.db.environment_models import EnvironmentGroup, ConsumptionRole from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.environment.services.managed_iam_policies import PolicyManager @@ -34,6 +35,7 @@ CREATE_SHARE_OBJECT, DELETE_SHARE_OBJECT, GET_SHARE_OBJECT, + MANAGE_SHARES, ) from dataall.modules.shares_base.services.share_processor_manager import ShareProcessorManager from dataall.modules.datasets_base.db.dataset_repositories import DatasetBaseRepository @@ -61,6 +63,7 @@ def get_share_object(uri): return ShareObjectRepository.get_share_by_uri(session, uri) @classmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(CREATE_SHARE_OBJECT) def create_share_object( cls, @@ -213,6 +216,7 @@ def create_share_object( return share @classmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(SUBMIT_SHARE_OBJECT) def submit_share_object(cls, uri: str): context = get_context() @@ -254,6 +258,7 @@ def submit_share_object(cls, uri: str): return share @classmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(APPROVE_SHARE_OBJECT) def approve_share_object(cls, uri: str): context = get_context() @@ -286,6 +291,7 @@ def approve_share_object(cls, uri: str): return share @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(SUBMIT_SHARE_OBJECT) def update_share_request_purpose(uri: str, request_purpose) -> bool: with get_context().db_engine.scoped_session() as session: @@ -295,6 +301,7 @@ def update_share_request_purpose(uri: str, request_purpose) -> bool: return True @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(REJECT_SHARE_OBJECT) def update_share_reject_purpose(uri: str, reject_purpose) -> bool: with get_context().db_engine.scoped_session() as session: @@ -304,6 +311,7 @@ def update_share_reject_purpose(uri: str, reject_purpose) -> bool: return True @classmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(REJECT_SHARE_OBJECT) def reject_share_object(cls, uri: str, reject_purpose: str): context = get_context() @@ -322,6 +330,7 @@ def reject_share_object(cls, uri: str, reject_purpose: str): return share @classmethod + @TenantPolicyService.has_tenant_permission(MANAGE_SHARES) @ResourcePolicyService.has_resource_permission(DELETE_SHARE_OBJECT) def delete_share_object(cls, uri: str): with get_context().db_engine.scoped_session() as session: diff --git a/backend/dataall/modules/shares_base/services/share_permissions.py b/backend/dataall/modules/shares_base/services/share_permissions.py index e2238b68d..a821aba46 100644 --- a/backend/dataall/modules/shares_base/services/share_permissions.py +++ b/backend/dataall/modules/shares_base/services/share_permissions.py @@ -14,6 +14,10 @@ RESOURCES_ALL_WITH_DESC, ) +from dataall.core.permissions.services.tenant_permissions import TENANT_ALL, TENANT_ALL_WITH_DESC + +MANAGE_SHARES = 'MANAGE_SHARES' + ADD_ITEM = 'ADD_ITEM' REMOVE_ITEM = 'REMOVE_ITEM' SUBMIT_SHARE_OBJECT = 'SUBMIT_SHARE_OBJECT' @@ -66,3 +70,6 @@ RESOURCES_ALL_WITH_DESC[CREATE_SHARE_OBJECT] = 'Create dataset Share requests for this environment' RESOURCES_ALL_WITH_DESC[LIST_ENVIRONMENT_SHARED_WITH_OBJECTS] = 'LIST_ENVIRONMENT_SHARED_WITH_OBJECTS' + +TENANT_ALL.append(MANAGE_SHARES) +TENANT_ALL_WITH_DESC[MANAGE_SHARES] = 'Manage Data Share Objects' diff --git a/backend/migrations/versions/af2e1362d4cb_add_tenant_share_permissions.py b/backend/migrations/versions/af2e1362d4cb_add_tenant_share_permissions.py new file mode 100644 index 000000000..f0e72db31 --- /dev/null +++ b/backend/migrations/versions/af2e1362d4cb_add_tenant_share_permissions.py @@ -0,0 +1,67 @@ +"""add_tenant_share_permissions + +Revision ID: af2e1362d4cb +Revises: 49c6b18ed814 +Create Date: 2024-11-18 15:23:08.215870 + +""" + +from alembic import op +from sqlalchemy import orm +from sqlalchemy.sql import and_ +from dataall.core.permissions.services.permission_service import PermissionService +from dataall.core.permissions.db.tenant.tenant_models import TenantPolicy +from dataall.core.permissions.db.tenant.tenant_policy_repositories import TenantPolicyRepository +from dataall.modules.shares_base.services.share_permissions import MANAGE_SHARES +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService + +# revision identifiers, used by Alembic. +revision = 'af2e1362d4cb' +down_revision = '49c6b18ed814' +branch_labels = None +depends_on = None +TENANT_NAME = 'dataall' + + +def upgrade(): + from dataall.core.permissions.db.permission.permission_models import Permission, PermissionType + + # Ensure all permissions including MANAGE_SHARES are created in the db + bind = op.get_bind() + session = orm.Session(bind=bind) + PermissionService.init_permissions(session) + + # listTenantGroups + tenant_groups = ( + session.query( + TenantPolicy.principalId.label('name'), + TenantPolicy.principalId.label('groupUri'), + ) + .filter( + and_( + TenantPolicy.principalType == 'GROUP', + TenantPolicy.principalId != 'DAAdministrators', + ) + ) + .all() + ) + # updateGroupTenantPermissions and add MANAGE_SHARES + for group in tenant_groups: + policy = TenantPolicyRepository.find_tenant_policy( + session=session, + group_uri=group.groupUri, + tenant_name=TENANT_NAME, + ) + already_associated = TenantPolicyRepository.has_group_tenant_permission( + session, + group_uri=group.groupUri, + permission_name=MANAGE_SHARES, + tenant_name=TENANT_NAME, + ) + + if not already_associated: + TenantPolicyService.associate_permission_to_tenant_policy(session, policy, MANAGE_SHARES) + + +def downgrade(): + pass From 5c71242fa2878a0c02634fb089d8eaebf9ed1340 Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Wed, 20 Nov 2024 07:37:36 -0500 Subject: [PATCH 12/44] Disable introspection on prod sizing (#1704) - - Disable introspection on `prod_sizing` - Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/api_handler.py | 10 +++++++++- deploy/stacks/lambda_api.py | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/backend/api_handler.py b/backend/api_handler.py index e46113546..bbc1f1e03 100644 --- a/backend/api_handler.py +++ b/backend/api_handler.py @@ -23,6 +23,7 @@ from dataall.base.db import get_engine from dataall.base.loader import load_modules, ImportMode +from graphql.pyutils import did_you_mean logger = logging.getLogger() logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) @@ -32,6 +33,11 @@ for name in ['boto3', 's3transfer', 'botocore', 'boto']: logging.getLogger(name).setLevel(logging.ERROR) +ALLOW_INTROSPECTION = True if os.getenv('ALLOW_INTROSPECTION') == 'True' else False + +if not ALLOW_INTROSPECTION: + did_you_mean.__globals__['MAX_LENGTH'] = 0 + load_modules(modes={ImportMode.API}) SCHEMA = bootstrap_schema() TYPE_DEFS = gql(SCHEMA.gql(with_directives=False)) @@ -137,7 +143,9 @@ def handler(event, context): else: raise Exception(f'Could not initialize user context from event {event}') - success, response = graphql_sync(schema=executable_schema, data=query, context_value=app_context) + success, response = graphql_sync( + schema=executable_schema, data=query, context_value=app_context, introspection=ALLOW_INTROSPECTION + ) dispose_context() response = json.dumps(response) diff --git a/deploy/stacks/lambda_api.py b/deploy/stacks/lambda_api.py index 629cb3122..7f6c174ce 100644 --- a/deploy/stacks/lambda_api.py +++ b/deploy/stacks/lambda_api.py @@ -135,7 +135,12 @@ def __init__( self.api_handler_dlq = self.set_dlq(f'{resource_prefix}-{envname}-graphql-dlq') api_handler_sg = self.create_lambda_sgs(envname, 'apihandler', resource_prefix, vpc) - api_handler_env = {'envname': envname, 'LOG_LEVEL': log_level, 'REAUTH_TTL': str(reauth_ttl)} + api_handler_env = { + 'envname': envname, + 'LOG_LEVEL': log_level, + 'REAUTH_TTL': str(reauth_ttl), + 'ALLOW_INTROSPECTION': str(not prod_sizing), + } # Check if custom domain exists and if it exists email notifications could be enabled. Create a env variable which stores the domain url. This is used for sending data.all share weblinks in the email notifications. if custom_domain and custom_domain.get('hosted_zone_name', None): api_handler_env['frontend_domain_url'] = f'https://{custom_domain.get("hosted_zone_name", None)}' From 4933f1dbbf8a363b9e60674acc52eae52c148fc6 Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Wed, 20 Nov 2024 11:29:23 -0500 Subject: [PATCH 13/44] Add snyk workflow on schedule (#1705) ### Feature or Bugfix - Feature ### Detail - Add weekly run of snyk ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .github/workflows/snyk.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/snyk.yaml diff --git a/.github/workflows/snyk.yaml b/.github/workflows/snyk.yaml new file mode 100644 index 000000000..4372775ee --- /dev/null +++ b/.github/workflows/snyk.yaml @@ -0,0 +1,23 @@ +name: Snyk + +on: + workflow_dispatch: + + schedule: + - cron: "0 9 * * 1" # runs each Monday at 9:00 UTC + +permissions: + contents: read + security-events: write + +jobs: + security: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run Snyk to check for vulnerabilities + uses: snyk/actions/python-3.9@master + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + args: --severity-threshold=high \ No newline at end of file From a4cfc6ccfdc5e19d674b7a97b65dac6248dfc8cb Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Wed, 20 Nov 2024 11:29:32 -0500 Subject: [PATCH 14/44] Bump python runtime to bump cdk klayers cryptography version (#1707) ### Feature or Bugfix - Bugfix ### Detail - Bump python runtime and cdk klayers cryptography version for custom authorizer ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- deploy/stacks/lambda_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stacks/lambda_api.py b/deploy/stacks/lambda_api.py index 7f6c174ce..05109a870 100644 --- a/deploy/stacks/lambda_api.py +++ b/deploy/stacks/lambda_api.py @@ -260,7 +260,7 @@ def __init__( ) # Initialize Klayers - runtime = _lambda.Runtime.PYTHON_3_9 + runtime = _lambda.Runtime.PYTHON_3_12 klayers = Klayers(self, python_version=runtime, region=self.region) # get the latest layer version for the cryptography package From b117eb85107cff31838275c45ee155aca4661195 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:29:56 +0100 Subject: [PATCH 15/44] tenant-permission tests (#1694) - Feature Add unit tests that verify that MANAGE_X permissions are applied to all Mutations except for an OPT_OUT list of Mutations and to a subset of OPT_IN queries. The OPT_OUT mutations are either: - admin actions that can only be performed by the tenants. Applying permissions in this case does not make sense. - platform "support" features such as feed, notification, votes. No object needs to be protected in this case. - v2.7.0 features which will be addressed in a separate PR The OPT_IN queries are operations that retrieve credentials or redirect URLs that allow the user to effectively create/update data.all objects. Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/base/feature_toggle_checker.py | 3 + .../stacks/db/target_type_repositories.py | 11 ++- .../core/stacks/services/stack_service.py | 25 ++++++ .../services/dashboard_quicksight_service.py | 4 +- .../dataall/modules/datapipelines/__init__.py | 11 ++- backend/dataall/modules/mlstudio/__init__.py | 8 +- backend/dataall/modules/notebooks/__init__.py | 8 +- .../dataall/modules/s3_datasets/__init__.py | 8 +- .../s3_datasets/api/profiling/resolvers.py | 8 +- .../api/storage_location/resolvers.py | 11 ++- .../services/dataset_column_service.py | 6 +- .../services/dataset_profiling_service.py | 4 +- .../dataset_table_data_filter_service.py | 0 .../services/s3_share_service.py | 1 + .../modules/worksheets/api/resolvers.py | 2 - .../worksheets/services/worksheet_service.py | 5 ++ tests/conftest.py | 25 ++++-- tests/test_tenant_unauthorized.py | 88 +++++++++++++++++++ 18 files changed, 197 insertions(+), 31 deletions(-) create mode 100644 backend/dataall/modules/s3_datasets/services/dataset_table_data_filter_service.py create mode 100644 tests/test_tenant_unauthorized.py diff --git a/backend/dataall/base/feature_toggle_checker.py b/backend/dataall/base/feature_toggle_checker.py index b6d681646..f8a4651f0 100644 --- a/backend/dataall/base/feature_toggle_checker.py +++ b/backend/dataall/base/feature_toggle_checker.py @@ -2,6 +2,8 @@ Contains decorators that check if a feature has been enabled or not """ +import functools + from dataall.base.config import config from dataall.base.utils.decorator_utls import process_func @@ -10,6 +12,7 @@ def is_feature_enabled(config_property: str): def decorator(f): fn, fn_decorator = process_func(f) + @functools.wraps(fn) def decorated(*args, **kwargs): value = config.get_property(config_property) if not value: diff --git a/backend/dataall/core/stacks/db/target_type_repositories.py b/backend/dataall/core/stacks/db/target_type_repositories.py index c175ed307..ca5770649 100644 --- a/backend/dataall/core/stacks/db/target_type_repositories.py +++ b/backend/dataall/core/stacks/db/target_type_repositories.py @@ -5,6 +5,7 @@ GET_ENVIRONMENT, UPDATE_ENVIRONMENT, ) +from dataall.core.permissions.services.tenant_permissions import MANAGE_ENVIRONMENTS logger = logging.getLogger(__name__) @@ -14,10 +15,11 @@ class TargetType: _TARGET_TYPES = {} - def __init__(self, name, read_permission, write_permission): + def __init__(self, name, read_permission, write_permission, tenant_permission): self.name = name self.read_permission = read_permission self.write_permission = write_permission + self.tenant_permission = tenant_permission TargetType._TARGET_TYPES[name] = self @@ -31,6 +33,11 @@ def get_resource_read_permission_name(target_type): TargetType.is_supported_target_type(target_type) return TargetType._TARGET_TYPES[target_type].read_permission + @staticmethod + def get_resource_tenant_permission_name(target_type): + TargetType.is_supported_target_type(target_type) + return TargetType._TARGET_TYPES[target_type].tenant_permission + @staticmethod def is_supported_target_type(target_type): if target_type not in TargetType._TARGET_TYPES: @@ -41,4 +48,4 @@ def is_supported_target_type(target_type): ) -TargetType('environment', GET_ENVIRONMENT, UPDATE_ENVIRONMENT) +TargetType('environment', GET_ENVIRONMENT, UPDATE_ENVIRONMENT, MANAGE_ENVIRONMENTS) diff --git a/backend/dataall/core/stacks/services/stack_service.py b/backend/dataall/core/stacks/services/stack_service.py index 5da1c4ba6..55e4506a3 100644 --- a/backend/dataall/core/stacks/services/stack_service.py +++ b/backend/dataall/core/stacks/services/stack_service.py @@ -4,6 +4,7 @@ import logging from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.stacks.aws.cloudformation import CloudFormation from dataall.core.stacks.services.keyvaluetag_service import KeyValueTagService from dataall.core.tasks.service_handlers import Worker @@ -163,6 +164,13 @@ def update_stack_by_target_uri(target_uri, target_type): StackRequestVerifier.verify_target_type_and_uri(target_uri, target_type) context = get_context() with context.db_engine.scoped_session() as session: + TenantPolicyService.check_user_tenant_permission( + session=session, + username=context.username, + groups=context.groups, + permission_name=TargetType.get_resource_tenant_permission_name(target_type), + tenant_name=TenantPolicyService.TENANT_NAME, + ) ResourcePolicyService.check_user_resource_permission( session=session, username=context.username, @@ -178,6 +186,23 @@ def update_stack_by_target_uri(target_uri, target_type): def update_stack_tags(input): StackRequestVerifier.validate_update_tag_input(input) target_uri = input.get('targetUri') + target_type = input.get('targetType') + context = get_context() + with context.db_engine.scoped_session() as session: + TenantPolicyService.check_user_tenant_permission( + session=session, + username=context.username, + groups=context.groups, + permission_name=TargetType.get_resource_tenant_permission_name(target_type), + tenant_name=TenantPolicyService.TENANT_NAME, + ) + ResourcePolicyService.check_user_resource_permission( + session=session, + username=context.username, + groups=context.groups, + resource_uri=target_uri, + permission_name=TargetType.get_resource_update_permission_name(target_type), + ) kv_tags = KeyValueTagService.update_key_value_tags( uri=target_uri, data=input, diff --git a/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py b/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py index 8b7eb9a9e..1b60b2122 100644 --- a/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py +++ b/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py @@ -8,10 +8,11 @@ from dataall.base.db.exceptions import UnauthorizedOperation, TenantUnauthorized, AWSResourceNotFound from dataall.core.permissions.services.tenant_permissions import TENANT_ALL from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.modules.dashboards.db.dashboard_repositories import DashboardRepository from dataall.modules.dashboards.db.dashboard_models import Dashboard from dataall.modules.dashboards.aws.dashboard_quicksight_client import DashboardQuicksightClient -from dataall.modules.dashboards.services.dashboard_permissions import GET_DASHBOARD, CREATE_DASHBOARD +from dataall.modules.dashboards.services.dashboard_permissions import GET_DASHBOARD, CREATE_DASHBOARD, MANAGE_DASHBOARDS from dataall.base.utils import Parameter @@ -58,6 +59,7 @@ def get_quicksight_reader_url(cls, uri): return client.get_anonymous_session(dashboard_id=dash.DashboardId) @classmethod + @TenantPolicyService.has_tenant_permission(MANAGE_DASHBOARDS) @ResourcePolicyService.has_resource_permission(CREATE_DASHBOARD) def get_quicksight_designer_url(cls, uri: str): context = get_context() diff --git a/backend/dataall/modules/datapipelines/__init__.py b/backend/dataall/modules/datapipelines/__init__.py index ad96a73c1..7b1a56334 100644 --- a/backend/dataall/modules/datapipelines/__init__.py +++ b/backend/dataall/modules/datapipelines/__init__.py @@ -28,14 +28,17 @@ def __init__(self): from dataall.modules.feed.api.registry import FeedRegistry, FeedDefinition from dataall.modules.datapipelines.db.datapipelines_models import DataPipeline from dataall.modules.datapipelines.db.datapipelines_repositories import DatapipelinesRepository - from dataall.modules.datapipelines.services.datapipelines_permissions import GET_PIPELINE, UPDATE_PIPELINE - + from dataall.modules.datapipelines.services.datapipelines_permissions import ( + GET_PIPELINE, + UPDATE_PIPELINE, + MANAGE_PIPELINES, + ) import dataall.modules.datapipelines.api FeedRegistry.register(FeedDefinition('DataPipeline', DataPipeline)) - TargetType('pipeline', GET_PIPELINE, UPDATE_PIPELINE) - TargetType('cdkpipeline', GET_PIPELINE, UPDATE_PIPELINE) + TargetType('pipeline', GET_PIPELINE, UPDATE_PIPELINE, MANAGE_PIPELINES) + TargetType('cdkpipeline', GET_PIPELINE, UPDATE_PIPELINE, MANAGE_PIPELINES) EnvironmentResourceManager.register(DatapipelinesRepository()) diff --git a/backend/dataall/modules/mlstudio/__init__.py b/backend/dataall/modules/mlstudio/__init__.py index e3d1d2f19..2e50fb64e 100644 --- a/backend/dataall/modules/mlstudio/__init__.py +++ b/backend/dataall/modules/mlstudio/__init__.py @@ -20,9 +20,13 @@ def __init__(self): from dataall.core.stacks.db.target_type_repositories import TargetType import dataall.modules.mlstudio.api from dataall.modules.mlstudio.services.mlstudio_service import SagemakerStudioEnvironmentResource - from dataall.modules.mlstudio.services.mlstudio_permissions import GET_SGMSTUDIO_USER, UPDATE_SGMSTUDIO_USER + from dataall.modules.mlstudio.services.mlstudio_permissions import ( + GET_SGMSTUDIO_USER, + UPDATE_SGMSTUDIO_USER, + MANAGE_SGMSTUDIO_USERS, + ) - TargetType('mlstudio', GET_SGMSTUDIO_USER, UPDATE_SGMSTUDIO_USER) + TargetType('mlstudio', GET_SGMSTUDIO_USER, UPDATE_SGMSTUDIO_USER, MANAGE_SGMSTUDIO_USERS) EnvironmentResourceManager.register(SagemakerStudioEnvironmentResource()) diff --git a/backend/dataall/modules/notebooks/__init__.py b/backend/dataall/modules/notebooks/__init__.py index 5fd8900da..0fc22ea07 100644 --- a/backend/dataall/modules/notebooks/__init__.py +++ b/backend/dataall/modules/notebooks/__init__.py @@ -17,9 +17,13 @@ def is_supported(modes): def __init__(self): import dataall.modules.notebooks.api from dataall.core.stacks.db.target_type_repositories import TargetType - from dataall.modules.notebooks.services.notebook_permissions import GET_NOTEBOOK, UPDATE_NOTEBOOK + from dataall.modules.notebooks.services.notebook_permissions import ( + GET_NOTEBOOK, + UPDATE_NOTEBOOK, + MANAGE_NOTEBOOKS, + ) - TargetType('notebook', GET_NOTEBOOK, UPDATE_NOTEBOOK) + TargetType('notebook', GET_NOTEBOOK, UPDATE_NOTEBOOK, MANAGE_NOTEBOOKS) log.info('API of sagemaker notebooks has been imported') diff --git a/backend/dataall/modules/s3_datasets/__init__.py b/backend/dataall/modules/s3_datasets/__init__.py index f0b73a6d0..dbd4f458c 100644 --- a/backend/dataall/modules/s3_datasets/__init__.py +++ b/backend/dataall/modules/s3_datasets/__init__.py @@ -41,7 +41,11 @@ def __init__(self): from dataall.modules.s3_datasets.indexers.table_indexer import DatasetTableIndexer import dataall.modules.s3_datasets.api - from dataall.modules.s3_datasets.services.dataset_permissions import GET_DATASET, UPDATE_DATASET + from dataall.modules.s3_datasets.services.dataset_permissions import ( + GET_DATASET, + UPDATE_DATASET, + MANAGE_DATASETS, + ) from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, S3Dataset @@ -73,7 +77,7 @@ def __init__(self): add_vote_type('dataset', DatasetIndexer) - TargetType('dataset', GET_DATASET, UPDATE_DATASET) + TargetType('dataset', GET_DATASET, UPDATE_DATASET, MANAGE_DATASETS) EnvironmentResourceManager.register(DatasetRepository()) diff --git a/backend/dataall/modules/s3_datasets/api/profiling/resolvers.py b/backend/dataall/modules/s3_datasets/api/profiling/resolvers.py index b92b8d065..5eae47bb5 100644 --- a/backend/dataall/modules/s3_datasets/api/profiling/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/profiling/resolvers.py @@ -10,6 +10,11 @@ log = logging.getLogger(__name__) +def _validate_uri(uri): + if not uri: + raise RequiredParameter('URI') + + def resolve_dataset(context, source: DatasetProfilingRun): if not source: return None @@ -17,8 +22,7 @@ def resolve_dataset(context, source: DatasetProfilingRun): def start_profiling_run(context: Context, source, input: dict = None): - if 'datasetUri' not in input: - raise RequiredParameter('datasetUri') + _validate_uri(input.get('datasetUri')) return DatasetProfilingService.start_profiling_run( uri=input['datasetUri'], table_uri=input.get('tableUri'), glue_table_name=input.get('GlueTableName') diff --git a/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py b/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py index 212332652..3d6847029 100644 --- a/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py @@ -6,13 +6,16 @@ from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, S3Dataset -@is_feature_enabled('modules.s3_datasets.features.file_actions') -def create_storage_location(context, source, datasetUri: str = None, input: dict = None): - if 'prefix' not in input: - raise RequiredParameter('prefix') +def _validate_input(input: dict): if 'label' not in input: raise RequiredParameter('label') + if 'prefix' not in input: + raise RequiredParameter('prefix') + +@is_feature_enabled('modules.s3_datasets.features.file_actions') +def create_storage_location(context, source, datasetUri: str = None, input: dict = None): + _validate_input(input) return DatasetLocationService.create_storage_location(uri=datasetUri, data=input) diff --git a/backend/dataall/modules/s3_datasets/services/dataset_column_service.py b/backend/dataall/modules/s3_datasets/services/dataset_column_service.py index eb7c19a00..c4a45af5c 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_column_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_column_service.py @@ -1,4 +1,5 @@ from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.tasks.service_handlers import Worker from dataall.base.aws.sts import SessionHelper from dataall.base.context import get_context @@ -6,11 +7,10 @@ from dataall.modules.s3_datasets.aws.glue_table_client import GlueTableClient from dataall.modules.s3_datasets.db.dataset_column_repositories import DatasetColumnRepository from dataall.modules.s3_datasets.db.dataset_table_repositories import DatasetTableRepository -from dataall.modules.s3_datasets.services.dataset_permissions import UPDATE_DATASET_TABLE +from dataall.modules.s3_datasets.services.dataset_permissions import UPDATE_DATASET_TABLE, MANAGE_DATASETS from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, DatasetTableColumn from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification -from dataall.modules.s3_datasets.services.dataset_permissions import PREVIEW_DATASET_TABLE class DatasetColumnService: @@ -44,6 +44,7 @@ def paginate_active_columns_for_table(uri: str, filter=None): return DatasetColumnRepository.paginate_active_columns_for_table(session, uri, filter) @classmethod + @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission( UPDATE_DATASET_TABLE, parent_resource=_get_dataset_uri, param_name='table_uri' ) @@ -58,6 +59,7 @@ def sync_table_columns(cls, table_uri: str): return cls.paginate_active_columns_for_table(uri=table_uri, filter={}) @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission( UPDATE_DATASET_TABLE, parent_resource=_get_dataset_uri_for_column, param_name='column_uri' ) diff --git a/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py b/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py index be94ce51c..87f644197 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py @@ -1,6 +1,7 @@ import json from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.tasks.service_handlers import Worker from dataall.base.context import get_context from dataall.core.environment.db.environment_models import Environment @@ -11,7 +12,7 @@ from dataall.modules.s3_datasets.aws.s3_profiler_client import S3ProfilerClient from dataall.modules.s3_datasets.db.dataset_profiling_repositories import DatasetProfilingRepository from dataall.modules.s3_datasets.db.dataset_table_repositories import DatasetTableRepository -from dataall.modules.s3_datasets.services.dataset_permissions import PROFILE_DATASET_TABLE, GET_DATASET +from dataall.modules.s3_datasets.services.dataset_permissions import PROFILE_DATASET_TABLE, GET_DATASET, MANAGE_DATASETS from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification from dataall.modules.s3_datasets.db.dataset_models import DatasetProfilingRun, DatasetTable @@ -20,6 +21,7 @@ class DatasetProfilingService: @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission(PROFILE_DATASET_TABLE) def start_profiling_run(uri, table_uri, glue_table_name): context = get_context() diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_data_filter_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_data_filter_service.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/modules/s3_datasets_shares/services/s3_share_service.py b/backend/dataall/modules/s3_datasets_shares/services/s3_share_service.py index 255544bc7..f7603c961 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/s3_share_service.py +++ b/backend/dataall/modules/s3_datasets_shares/services/s3_share_service.py @@ -175,6 +175,7 @@ def list_shared_tables_by_env_dataset(dataset_uri: str, env_uri: str): ] @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET) def get_dataset_shared_assume_role_url(uri): context = get_context() diff --git a/backend/dataall/modules/worksheets/api/resolvers.py b/backend/dataall/modules/worksheets/api/resolvers.py index 97e4c9edf..280cf468c 100644 --- a/backend/dataall/modules/worksheets/api/resolvers.py +++ b/backend/dataall/modules/worksheets/api/resolvers.py @@ -11,8 +11,6 @@ def create_worksheet(context: Context, source, input: dict = None): raise exceptions.RequiredParameter(input) if not input.get('SamlAdminGroupName'): raise exceptions.RequiredParameter('groupUri') - if input.get('SamlAdminGroupName') not in context.groups: - raise exceptions.InvalidInput('groupUri', input.get('SamlAdminGroupName'), " a user's groups") if not input.get('label'): raise exceptions.RequiredParameter('label') diff --git a/backend/dataall/modules/worksheets/services/worksheet_service.py b/backend/dataall/modules/worksheets/services/worksheet_service.py index e07ce63ce..faf10f93b 100644 --- a/backend/dataall/modules/worksheets/services/worksheet_service.py +++ b/backend/dataall/modules/worksheets/services/worksheet_service.py @@ -36,6 +36,10 @@ def _get_worksheet_by_uri(session, uri: str) -> Worksheet: @TenantPolicyService.has_tenant_permission(MANAGE_WORKSHEETS) def create_worksheet(data=None) -> Worksheet: context = get_context() + if data['SamlAdminGroupName'] not in context.groups: + raise exceptions.UnauthorizedOperation( + 'CREATE_WORKSHEET', f"user {context.username} does not belong to group {data['SamlAdminGroupName']}" + ) with context.db_engine.scoped_session() as session: worksheet = Worksheet( owner=context.username, @@ -126,6 +130,7 @@ def delete_worksheet(uri) -> bool: return True @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_WORKSHEETS) @ResourcePolicyService.has_resource_permission(RUN_ATHENA_QUERY) def run_sql_query(uri, worksheetUri, sqlQuery): with get_context().db_engine.scoped_session() as session: diff --git a/tests/conftest.py b/tests/conftest.py index 7a4b68df0..a7501f6cc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -93,18 +93,24 @@ def user3(): yield User('david') -def _create_group(db, tenant, name, user): +@pytest.fixture(scope='module', autouse=True) +def userNoTenantPermissions(): + yield User('noPermissionsUser') + + +def _create_group(db, tenant, name, user, attach_permissions=True): with db.scoped_session() as session: group = Group(name=name, label=name, owner=user.username) session.add(group) session.commit() - TenantPolicyService.attach_group_tenant_policy( - session=session, - group=name, - permissions=TENANT_ALL, - tenant_name=tenant.name, - ) + if attach_permissions: + TenantPolicyService.attach_group_tenant_policy( + session=session, + group=name, + permissions=TENANT_ALL, + tenant_name=tenant.name, + ) return group @@ -133,6 +139,11 @@ def not_in_org_group(db, tenant, user): yield _create_group(db, tenant, 'NotInOrgGroup', user) +@pytest.fixture(scope='module') +def groupNoTenantPermissions(db, tenant, userNoTenantPermissions): + yield _create_group(db, tenant, 'groupNoTenantPermissions', userNoTenantPermissions, attach_permissions=False) + + @pytest.fixture(scope='module', autouse=True) def tenant(db, permissions): with db.scoped_session() as session: diff --git a/tests/test_tenant_unauthorized.py b/tests/test_tenant_unauthorized.py new file mode 100644 index 000000000..f87b5dde7 --- /dev/null +++ b/tests/test_tenant_unauthorized.py @@ -0,0 +1,88 @@ +from unittest.mock import MagicMock, patch +import pytest +from assertpy import assert_that +from dataall.base.api import bootstrap +from dataall.base.loader import load_modules, ImportMode +from dataall.base.context import RequestContext +from dataall.base.db.exceptions import TenantUnauthorized +import inspect + + +load_modules(modes={ImportMode.API}) + +OPT_OUT_MUTATIONS = { + 'Mutation.updateGroupTenantPermissions': 'admin action. No need for tenant permission check', + 'Mutation.updateSSMParameter': 'admin action. No need for tenant permission check', + 'Mutation.createQuicksightDataSourceSet': 'admin action. No need for tenant permission check', + 'Mutation.startMaintenanceWindow': 'admin action. No need for tenant permission check', + 'Mutation.stopMaintenanceWindow': 'admin action. No need for tenant permission check', + 'Mutation.startReindexCatalog': 'admin action. No need for tenant permission check', + 'Mutation.markNotificationAsRead': 'tenant permissions do not apply to support notifications', + 'Mutation.deleteNotification': 'tenant permissions do not apply to support notifications', + 'Mutation.postFeedMessage': 'tenant permissions do not apply to support feed messages', + 'Mutation.upVote': 'tenant permissions do not apply to support votes', + 'Mutation.createAttachedMetadataForm': 'outside of this PR to be able to backport to v2.6.2', + 'Mutation.deleteAttachedMetadataForm': 'outside of this PR to be able to backport to v2.6.2', + 'Mutation.createRedshiftConnection': 'outside of this PR to be able to backport to v2.6.2', + 'Mutation.deleteRedshiftConnection': 'outside of this PR to be able to backport to v2.6.2', + 'Mutation.addConnectionGroupPermission': 'outside of this PR to be able to backport to v2.6.2', + 'Mutation.deleteConnectionGroupPermission': 'outside of this PR to be able to backport to v2.6.2', +} + +OPT_IN_QUERIES = [ + 'Query.generateEnvironmentAccessToken', + 'Query.getEnvironmentAssumeRoleUrl', + 'Query.getSagemakerStudioUserPresignedUrl', + 'Query.getSagemakerNotebookPresignedUrl', + 'Query.getDatasetAssumeRoleUrl', + 'Query.getDatasetPresignedUrl', + 'Query.getAuthorSession', + 'Query.getDatasetSharedAssumeRoleUrl', + 'Query.runAthenaSqlQuery', +] + +ALL_RESOLVERS = {(_type, field) for _type in bootstrap().types for field in _type.fields if field.resolver} + + +@pytest.fixture(scope='function') +def mock_input_validation(mocker): + mocker.patch('dataall.modules.mlstudio.api.resolvers.RequestValidator', MagicMock()) + mocker.patch( + 'dataall.modules.mlstudio.services.mlstudio_service.SagemakerStudioCreationRequest.from_dict', MagicMock() + ) + mocker.patch('dataall.modules.notebooks.api.resolvers.RequestValidator', MagicMock()) + mocker.patch('dataall.modules.notebooks.services.notebook_service.NotebookCreationRequest.from_dict', MagicMock()) + mocker.patch('dataall.modules.s3_datasets.api.profiling.resolvers._validate_uri', MagicMock()) + mocker.patch('dataall.modules.s3_datasets.api.storage_location.resolvers._validate_input', MagicMock()) + mocker.patch('dataall.modules.s3_datasets.api.dataset.resolvers.RequestValidator', MagicMock()) + mocker.patch( + 'dataall.core.stacks.db.target_type_repositories.TargetType.get_resource_tenant_permission_name', + return_value='MANAGE_ENVIRONMENTS', + ) + mocker.patch('dataall.modules.shares_base.api.resolvers.RequestValidator', MagicMock()) + + +@pytest.mark.parametrize( + '_type,field', + [ + pytest.param(_type, field, id=f'{_type.name}.{field.name}') + for _type, field in ALL_RESOLVERS + if _type.name in ['Query', 'Mutation'] + ], +) +@patch('dataall.base.context._request_storage') +def test_unauthorized_tenant_permissions( + mock_local, _type, field, mock_input_validation, db, userNoTenantPermissions, groupNoTenantPermissions +): + if _type.name == 'Mutation' and f'{_type.name}.{field.name}' in OPT_OUT_MUTATIONS.keys(): + pytest.skip(f'Skipping test for {field.name}: {OPT_OUT_MUTATIONS[f"{_type.name}.{field.name}"]}') + if _type.name == 'Query' and f'{_type.name}.{field.name}' not in OPT_IN_QUERIES: + pytest.skip(f'Skipping test for {field.name}: This Query does not require a tenant permission check.') + assert_that(field.resolver).is_not_none() + mock_local.context = RequestContext( + db, userNoTenantPermissions.username, [groupNoTenantPermissions.groupUri], userNoTenantPermissions + ) + # Mocking arguments + iargs = {arg: MagicMock() for arg in inspect.signature(field.resolver).parameters.keys()} + # Assert Unauthorized exception is raised + assert_that(field.resolver).raises(TenantUnauthorized).when_called_with(**iargs).contains('UnauthorizedOperation') From 01c63a485b44dcf879c636634a4f701402ae5dc9 Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Thu, 21 Nov 2024 10:48:40 -0500 Subject: [PATCH 16/44] Fix Snyk Workflow to Find Project Deps (#1708) ### Feature or Bugfix - Bugfix ### Detail - Add args `--all-projects --detection-depth=5` for Snyk to find project Dep - Add MakeFile command to install all Python Deps before running `snyk test` - Noted as a requirement in [Snyk Docs](https://docs.snyk.io/scm-ide-and-ci-cd-integrations/snyk-ci-cd-integrations/github-actions-for-snyk-setup-and-checking-for-vulnerabilities/snyk-python-action) ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .github/workflows/snyk.yaml | 11 ++++++++++- Makefile | 8 +++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/snyk.yaml b/.github/workflows/snyk.yaml index 4372775ee..30e2c041b 100644 --- a/.github/workflows/snyk.yaml +++ b/.github/workflows/snyk.yaml @@ -12,12 +12,21 @@ permissions: jobs: security: + strategy: + matrix: + python-version: [3.9] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install All Requirements + run: make install - name: Run Snyk to check for vulnerabilities uses: snyk/actions/python-3.9@master env: SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} with: - args: --severity-threshold=high \ No newline at end of file + args: --all-projects --detection-depth=5 --severity-threshold=high diff --git a/Makefile b/Makefile index 888927042..8fd5e85cd 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ venv: @python3 -m venv "venv" @/bin/bash -c "source venv/bin/activate" -install: upgrade-pip install-deploy install-backend install-cdkproxy install-tests +install: upgrade-pip install-deploy install-backend install-cdkproxy install-tests install-integration-tests install-custom-auth install-userguide upgrade-pip: pip install --upgrade pip setuptools @@ -36,6 +36,12 @@ install-tests: install-integration-tests: pip install -r tests_new/integration_tests/requirements.txt +install-custom-auth: + pip install -r deploy/custom_resources/custom_authorizer/requirements.txt + +install-userguide: + pip install -r documentation/userguide/requirements.txt + lint: pip install ruff ruff check --fix From ebb91ff41357ed445751e4751356b719d274366e Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Mon, 25 Nov 2024 08:30:45 +0100 Subject: [PATCH 17/44] Added permission check - is tenant to update SSM parameters API (#1714) ### Feature or Bugfix - Feature ### Detail - Added service function and check if the user is a tenant for the updateSSM API call ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/core/permissions/api/resolvers.py | 18 ++------------- .../services/tenant_policy_service.py | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/backend/dataall/core/permissions/api/resolvers.py b/backend/dataall/core/permissions/api/resolvers.py index de35d596b..6cbceee12 100644 --- a/backend/dataall/core/permissions/api/resolvers.py +++ b/backend/dataall/core/permissions/api/resolvers.py @@ -1,11 +1,5 @@ import logging -import os - -from dataall.base.aws.sts import SessionHelper -from dataall.base.aws.parameter_store import ParameterStoreManager -from dataall.base.db.exceptions import RequiredParameter -from dataall.core.permissions.services.permission_service import PermissionService -from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService, TenantActionsService log = logging.getLogger(__name__) @@ -26,12 +20,4 @@ def list_tenant_groups(context, source, filter=None): def update_ssm_parameter(context, source, name: str = None, value: str = None): - current_account = SessionHelper.get_account() - region = os.getenv('AWS_REGION', 'eu-west-1') - response = ParameterStoreManager.update_parameter( - AwsAccountId=current_account, - region=region, - parameter_name=f'/dataall/{os.getenv("envname", "local")}/quicksightmonitoring/{name}', - parameter_value=value, - ) - return response + return TenantActionsService.update_monitoring_ssm_parameter(name, value) diff --git a/backend/dataall/core/permissions/services/tenant_policy_service.py b/backend/dataall/core/permissions/services/tenant_policy_service.py index d0c953d09..d8096d248 100644 --- a/backend/dataall/core/permissions/services/tenant_policy_service.py +++ b/backend/dataall/core/permissions/services/tenant_policy_service.py @@ -9,6 +9,8 @@ from dataall.core.permissions.services.permission_service import PermissionService from dataall.core.permissions.db.tenant.tenant_models import Tenant from dataall.base.services.service_provider_factory import ServiceProviderFactory +from dataall.base.aws.sts import SessionHelper +from dataall.base.aws.parameter_store import ParameterStoreManager import logging import os from functools import wraps @@ -121,6 +123,26 @@ def validate_permissions(session, tenant_name, g_permissions, group): return tenant_group_permissions +class TenantActionsService: + @staticmethod + def update_monitoring_ssm_parameter(name, value): + # raises UnauthorizedOperation exception, if there is no admin access + context = get_context() + TenantPolicyValidationService.validate_admin_access( + context.username, context.groups, 'UPDATE_SSM_PARAMETER_MONITORING' + ) + + current_account = SessionHelper.get_account() + region = os.getenv('AWS_REGION', 'eu-west-1') + response = ParameterStoreManager.update_parameter( + AwsAccountId=current_account, + region=region, + parameter_name=f'/dataall/{os.getenv("envname", "local")}/quicksightmonitoring/{name}', + parameter_value=value, + ) + return response + + class TenantPolicyService: TENANT_NAME = 'dataall' From 7f0dab766990b944449e14b640ad6bf748f7bef5 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Mon, 25 Nov 2024 08:31:34 +0100 Subject: [PATCH 18/44] Add GET_SHARE_OBJECT permissions to get data filters API (#1717) - Bugfix - Add GET_SHARE_OBJECT permissions to get data filters API - Cosmetic changes on shares_base module - Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/modules/shares_base/api/resolvers.py | 10 ++++------ .../modules/shares_base/services/share_item_service.py | 8 +++++--- .../modules/shares_base/services/share_logs_service.py | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/backend/dataall/modules/shares_base/api/resolvers.py b/backend/dataall/modules/shares_base/api/resolvers.py index dc43640ba..efc427870 100644 --- a/backend/dataall/modules/shares_base/api/resolvers.py +++ b/backend/dataall/modules/shares_base/api/resolvers.py @@ -265,12 +265,10 @@ def list_shares_in_my_outbox(context: Context, source, filter: dict = None): def list_shared_with_environment_data_items(context: Context, source, environmentUri: str = None, filter: dict = None): if not filter: filter = {} - with context.engine.scoped_session() as session: - return ShareItemService.paginated_shared_with_environment_datasets( - session=session, - uri=environmentUri, - data=filter, - ) + return ShareItemService.paginated_shared_with_environment_datasets( + uri=environmentUri, + data=filter, + ) def update_share_request_purpose(context: Context, source, shareUri: str = None, requestPurpose: str = None): diff --git a/backend/dataall/modules/shares_base/services/share_item_service.py b/backend/dataall/modules/shares_base/services/share_item_service.py index bba21acbc..86ff79ac6 100644 --- a/backend/dataall/modules/shares_base/services/share_item_service.py +++ b/backend/dataall/modules/shares_base/services/share_item_service.py @@ -208,6 +208,8 @@ def list_shareable_objects(share, filter, is_revokable=False): @staticmethod @ResourcePolicyService.has_resource_permission(LIST_ENVIRONMENT_SHARED_WITH_OBJECTS) - def paginated_shared_with_environment_datasets(session, uri, data) -> dict: - share_item_shared_states = ShareStatusRepository.get_share_item_shared_states() - return ShareObjectRepository.paginate_shared_datasets(session, uri, data, share_item_shared_states) + def paginated_shared_with_environment_datasets(uri, data) -> dict: + context = get_context() + with context.db_engine.scoped_session() as session: + share_item_shared_states = ShareStatusRepository.get_share_item_shared_states() + return ShareObjectRepository.paginate_shared_datasets(session, uri, data, share_item_shared_states) diff --git a/backend/dataall/modules/shares_base/services/share_logs_service.py b/backend/dataall/modules/shares_base/services/share_logs_service.py index 6d29e5155..ced1975ed 100644 --- a/backend/dataall/modules/shares_base/services/share_logs_service.py +++ b/backend/dataall/modules/shares_base/services/share_logs_service.py @@ -21,7 +21,7 @@ def check_view_log_permissions(username, groups, shareUri): return ds.stewards in groups or ds.SamlAdminGroupName in groups or username == ds.owner @staticmethod - def get_share_logs_name_query(shareUri): + def _get_share_logs_name_query(shareUri): log.info(f'Get share Logs stream name for share {shareUri}') query = f"""fields @logStream @@ -32,7 +32,7 @@ def get_share_logs_name_query(shareUri): return query @staticmethod - def get_share_logs_query(log_stream_name): + def _get_share_logs_query(log_stream_name): query = f"""fields @timestamp, @message, @logStream, @log as @logGroup | sort @timestamp asc | filter @logStream like "{log_stream_name}" @@ -52,7 +52,7 @@ def get_share_logs(shareUri): envname = os.getenv('envname', 'local') log_group_name = f"/{Parameter().get_parameter(env=envname, path='resourcePrefix')}/{envname}/ecs/share-manager" - query_for_name = ShareLogsService.get_share_logs_name_query(shareUri=shareUri) + query_for_name = ShareLogsService._get_share_logs_name_query(shareUri=shareUri) name_query_result = CloudWatch.run_query( query=query_for_name, log_group_name=log_group_name, @@ -63,7 +63,7 @@ def get_share_logs(shareUri): name = name_query_result[0]['logStream'] - query = ShareLogsService.get_share_logs_query(log_stream_name=name) + query = ShareLogsService._get_share_logs_query(log_stream_name=name) results = CloudWatch.run_query( query=query, log_group_name=log_group_name, From 1265669834bc3b724bc7c36639364a74dc412edf Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Mon, 25 Nov 2024 08:31:58 +0100 Subject: [PATCH 19/44] **WITH FIXES** Add permissions on list datasets for env group + cosmetic S3 Datasets (#1718) - Feature For the `listS3DatasetsOwnedByEnvGroup` API call this PR introduces a permission check to evaluate if the user has `LIST_ENVIRONMENT_DATASETS` in the environment and on top of that it checks that the input groupUri is one of the groups of the user performing the call. + some cosmetic changes: internal functions prefixed with `_` Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../s3_datasets/api/dataset/resolvers.py | 2 +- .../s3_datasets/services/dataset_service.py | 30 ++++++++++++------- tests/modules/s3_datasets/conftest.py | 2 +- .../test_import_dataset_check_unit.py | 12 ++++---- tests/modules/s3_datasets_shares/conftest.py | 2 +- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py b/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py index 6f25a6293..dee6d79ab 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py @@ -150,7 +150,7 @@ def list_datasets_owned_by_env_group( ): if not filter: filter = {} - return DatasetService.list_datasets_owned_by_env_group(environmentUri, groupUri, filter) + return DatasetService.list_datasets_owned_by_env_group(uri=environmentUri, group_uri=groupUri, data=filter) class RequestValidator: diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index 908e38551..7d189cc0a 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -38,6 +38,7 @@ DATASET_READ, IMPORT_DATASET, ) +from dataall.modules.datasets_base.services.dataset_list_permissions import LIST_ENVIRONMENT_DATASETS from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets_base.db.dataset_repositories import DatasetBaseRepository from dataall.modules.datasets_base.services.datasets_enums import DatasetRole @@ -85,13 +86,13 @@ def _attach_additional_steward_permissions(cls, session, dataset, new_stewards): interface.extend_attach_steward_permissions(session, dataset, new_stewards) @classmethod - def _delete_additional_steward__permissions(cls, session, dataset): + def _delete_additional_steward_permissions(cls, session, dataset): """All permissions from other modules that need to be deleted to stewards""" for interface in cls._interfaces: interface.extend_delete_steward_permissions(session, dataset) @staticmethod - def check_dataset_account(session, environment): + def _check_dataset_account(session, environment): dashboards_enabled = EnvironmentService.get_boolean_env_param(session, environment, 'dashboardsEnabled') if dashboards_enabled: quicksight_subscription = QuicksightClient.check_quicksight_enterprise_subscription( @@ -105,7 +106,7 @@ def check_dataset_account(session, environment): return True @staticmethod - def check_imported_resources(dataset: S3Dataset): + def _check_imported_resources(dataset: S3Dataset): if dataset.importedGlueDatabase: if len(dataset.GlueDatabaseName) > NamingConventionPattern.GLUE.value.get('max_length'): raise exceptions.InvalidInput( @@ -158,11 +159,11 @@ def create_dataset(uri, admin_group, data: dict): context = get_context() with context.db_engine.scoped_session() as session: environment = EnvironmentService.get_environment_by_uri(session, uri) - DatasetService.check_dataset_account(session=session, environment=environment) + DatasetService._check_dataset_account(session=session, environment=environment) dataset = DatasetRepository.build_dataset(username=context.username, env=environment, data=data) if dataset.imported: - DatasetService.check_imported_resources(dataset) + DatasetService._check_imported_resources(dataset) dataset = DatasetRepository.create_dataset(session=session, env=environment, dataset=dataset, data=data) DatasetBucketRepository.create_dataset_bucket(session, dataset, data) @@ -251,13 +252,13 @@ def update_dataset(uri: str, data: dict): with get_context().db_engine.scoped_session() as session: dataset = DatasetRepository.get_dataset_by_uri(session, uri) environment = EnvironmentService.get_environment_by_uri(session, dataset.environmentUri) - DatasetService.check_dataset_account(session=session, environment=environment) + DatasetService._check_dataset_account(session=session, environment=environment) username = get_context().username dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, uri) if data and isinstance(data, dict): if data.get('imported', False): - DatasetService.check_imported_resources(dataset) + DatasetService._check_imported_resources(dataset) for k in data.keys(): if k not in ['stewards', 'KmsAlias']: @@ -453,11 +454,18 @@ def _create_dataset_stack(session, dataset: S3Dataset) -> Stack: ) @staticmethod - def list_datasets_owned_by_env_group(env_uri: str, group_uri: str, data: dict): - with get_context().db_engine.scoped_session() as session: + @ResourcePolicyService.has_resource_permission(LIST_ENVIRONMENT_DATASETS) + def list_datasets_owned_by_env_group(uri: str, group_uri: str, data: dict): + context = get_context() + if group_uri not in context.groups: + raise exceptions.UnauthorizedOperation( + action='LIST_ENVIRONMENT_GROUP_DATASETS', + message=f'User: {context.username} is not a member of the team {group_uri}', + ) + with context.db_engine.scoped_session() as session: return DatasetRepository.paginated_environment_group_datasets( session=session, - env_uri=env_uri, + env_uri=uri, group_uri=group_uri, data=data, ) @@ -482,7 +490,7 @@ def _transfer_stewardship_to_owners(session, dataset): resource_uri=tableUri, ) - DatasetService._delete_additional_steward__permissions(session, dataset) + DatasetService._delete_additional_steward_permissions(session, dataset) return dataset @staticmethod diff --git a/tests/modules/s3_datasets/conftest.py b/tests/modules/s3_datasets/conftest.py index 432bd4fa7..13c6fba2c 100644 --- a/tests/modules/s3_datasets/conftest.py +++ b/tests/modules/s3_datasets/conftest.py @@ -19,7 +19,7 @@ @pytest.fixture(scope='module', autouse=True) def patch_dataset_methods(module_mocker): module_mocker.patch( - 'dataall.modules.s3_datasets.services.dataset_service.DatasetService.check_dataset_account', return_value=True + 'dataall.modules.s3_datasets.services.dataset_service.DatasetService._check_dataset_account', return_value=True ) module_mocker.patch( 'dataall.modules.s3_datasets.services.dataset_service.DatasetService._deploy_dataset_stack', return_value=True diff --git a/tests/modules/s3_datasets/test_import_dataset_check_unit.py b/tests/modules/s3_datasets/test_import_dataset_check_unit.py index e8021c528..492f5130a 100644 --- a/tests/modules/s3_datasets/test_import_dataset_check_unit.py +++ b/tests/modules/s3_datasets/test_import_dataset_check_unit.py @@ -13,7 +13,7 @@ def test_s3_managed_bucket_import(mock_aws_client): mock_encryption_bucket(mock_aws_client, 'AES256', None) - assert DatasetService.check_imported_resources(dataset) + assert DatasetService._check_imported_resources(dataset) def test_s3_managed_bucket_but_bucket_encrypted_with_kms(mock_aws_client): @@ -21,7 +21,7 @@ def test_s3_managed_bucket_but_bucket_encrypted_with_kms(mock_aws_client): mock_encryption_bucket(mock_aws_client, 'aws:kms', 'any') with pytest.raises(RequiredParameter): - DatasetService.check_imported_resources(dataset) + DatasetService._check_imported_resources(dataset) def test_s3_managed_bucket_but_alias_provided(mock_aws_client): @@ -29,7 +29,7 @@ def test_s3_managed_bucket_but_alias_provided(mock_aws_client): mock_encryption_bucket(mock_aws_client, 'AES256', None) with pytest.raises(InvalidInput): - DatasetService.check_imported_resources(dataset) + DatasetService._check_imported_resources(dataset) def test_kms_encrypted_bucket_but_key_not_exist(mock_aws_client): @@ -39,7 +39,7 @@ def test_kms_encrypted_bucket_but_key_not_exist(mock_aws_client): mock_existing_alias(mock_aws_client) with pytest.raises(AWSResourceNotFound): - DatasetService.check_imported_resources(dataset) + DatasetService._check_imported_resources(dataset) def test_kms_encrypted_bucket_but_key_is_wrong(mock_aws_client): @@ -51,7 +51,7 @@ def test_kms_encrypted_bucket_but_key_is_wrong(mock_aws_client): mock_key_id(mock_aws_client, kms_id) with pytest.raises(InvalidInput): - DatasetService.check_imported_resources(dataset) + DatasetService._check_imported_resources(dataset) def test_kms_encrypted_bucket_imported(mock_aws_client): @@ -62,7 +62,7 @@ def test_kms_encrypted_bucket_imported(mock_aws_client): mock_existing_alias(mock_aws_client, f'alias/{alias}') mock_key_id(mock_aws_client, kms_id) - assert DatasetService.check_imported_resources(dataset) + assert DatasetService._check_imported_resources(dataset) def mock_encryption_bucket(mock_aws_client, algorithm, kms_id=None): diff --git a/tests/modules/s3_datasets_shares/conftest.py b/tests/modules/s3_datasets_shares/conftest.py index 432bd4fa7..13c6fba2c 100644 --- a/tests/modules/s3_datasets_shares/conftest.py +++ b/tests/modules/s3_datasets_shares/conftest.py @@ -19,7 +19,7 @@ @pytest.fixture(scope='module', autouse=True) def patch_dataset_methods(module_mocker): module_mocker.patch( - 'dataall.modules.s3_datasets.services.dataset_service.DatasetService.check_dataset_account', return_value=True + 'dataall.modules.s3_datasets.services.dataset_service.DatasetService._check_dataset_account', return_value=True ) module_mocker.patch( 'dataall.modules.s3_datasets.services.dataset_service.DatasetService._deploy_dataset_stack', return_value=True From 385cd0d95dd9d412c57d860513ee58f075aa9250 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Mon, 25 Nov 2024 08:32:48 +0100 Subject: [PATCH 20/44] Add GET_WORKSHEET permission in RUN_SQL_QUERY (#1716) ### Feature or Bugfix - Bugfix ### Detail In the run_sql query in Worksheets we are checking the permissions of the user to execute the query if the user has environment-level permissions to execute queries. This does not prevent a user to use another team's worksheets to execute athena queries. This means that the user would use other team permissions to query data. This PR retrieves the worksheet using the service decorated get_worksheet function ### Relates ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/modules/worksheets/services/worksheet_service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/dataall/modules/worksheets/services/worksheet_service.py b/backend/dataall/modules/worksheets/services/worksheet_service.py index faf10f93b..ffea373d9 100644 --- a/backend/dataall/modules/worksheets/services/worksheet_service.py +++ b/backend/dataall/modules/worksheets/services/worksheet_service.py @@ -132,6 +132,7 @@ def delete_worksheet(uri) -> bool: @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_WORKSHEETS) @ResourcePolicyService.has_resource_permission(RUN_ATHENA_QUERY) + @ResourcePolicyService.has_resource_permission(GET_WORKSHEET, param_name='worksheetUri') def run_sql_query(uri, worksheetUri, sqlQuery): with get_context().db_engine.scoped_session() as session: environment = EnvironmentService.get_environment_by_uri(session, uri) From 6382d91ef57ecf522d07e5969c1f541de0e3f054 Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Tue, 26 Nov 2024 09:25:51 -0500 Subject: [PATCH 21/44] Unify Logger Config for Tasks (#1709) - Refactoring - Unify Logger Config in Backend (focused on `/tasks`) - Fix Log Level setting - https://github.com/data-dot-all/dataall/issues/1680 - https://github.com/data-dot-all/dataall/pull/1662 Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/__init__.py | 11 +++++++++++ .../core/environment/tasks/env_stacks_updater.py | 5 +---- backend/dataall/core/stacks/tasks/cdkproxy.py | 4 ---- .../modules/catalog/tasks/catalog_indexer_task.py | 4 ---- .../modules/omics/tasks/omics_workflows_fetcher.py | 4 ---- .../modules/s3_datasets/tasks/tables_syncer.py | 4 ---- .../tasks/dataset_subscription_task.py | 4 ---- .../tasks/subscriptions/sqs_poller.py | 5 +---- .../tasks/persistent_email_reminders_task.py | 4 ---- .../modules/shares_base/tasks/share_manager_task.py | 5 ----- .../modules/shares_base/tasks/share_reapplier_task.py | 4 ---- .../modules/shares_base/tasks/share_verifier_task.py | 4 ---- 12 files changed, 13 insertions(+), 45 deletions(-) diff --git a/backend/dataall/__init__.py b/backend/dataall/__init__.py index a6387d880..5a78164b2 100644 --- a/backend/dataall/__init__.py +++ b/backend/dataall/__init__.py @@ -1,2 +1,13 @@ from . import core, version from .base import utils, db, api +import logging +import os +import sys + +logging.basicConfig( + level=os.environ.get('LOG_LEVEL', 'INFO'), + handlers=[logging.StreamHandler(sys.stdout)], + format='[%(levelname)s] %(message)s', +) +for name in ['boto3', 's3transfer', 'botocore', 'boto', 'urllib3']: + logging.getLogger(name).setLevel(logging.ERROR) diff --git a/backend/dataall/core/environment/tasks/env_stacks_updater.py b/backend/dataall/core/environment/tasks/env_stacks_updater.py index ecf6b72f9..40b9a14c6 100644 --- a/backend/dataall/core/environment/tasks/env_stacks_updater.py +++ b/backend/dataall/core/environment/tasks/env_stacks_updater.py @@ -12,11 +12,8 @@ from dataall.base.db import get_engine from dataall.base.utils import Parameter -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) + RETRIES = 30 SLEEP_TIME = 30 diff --git a/backend/dataall/core/stacks/tasks/cdkproxy.py b/backend/dataall/core/stacks/tasks/cdkproxy.py index 198f80081..f0aa7d6d5 100644 --- a/backend/dataall/core/stacks/tasks/cdkproxy.py +++ b/backend/dataall/core/stacks/tasks/cdkproxy.py @@ -5,11 +5,7 @@ from dataall.base.cdkproxy.cdk_cli_wrapper import deploy_cdk_stack from dataall.base.db import get_engine -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) logger = logging.getLogger(__name__) -logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) if __name__ == '__main__': diff --git a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py index 032739db9..807b712a3 100644 --- a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py +++ b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py @@ -9,11 +9,7 @@ from dataall.base.loader import load_modules, ImportMode from dataall.base.utils.alarm_service import AlarmService -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) class CatalogIndexerTask: diff --git a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py index ecfff37db..d20d69cfc 100644 --- a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py +++ b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py @@ -11,11 +11,7 @@ from dataall.modules.omics.db.omics_repository import OmicsRepository -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) def fetch_omics_workflows(engine): diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py index bfd4feaad..9bd47dd3c 100644 --- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py @@ -16,11 +16,7 @@ from dataall.modules.s3_datasets.indexers.dataset_indexer import DatasetIndexer from dataall.modules.s3_datasets.services.dataset_alarm_service import DatasetAlarmService -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) def sync_tables(engine): diff --git a/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py b/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py index cf6a9ec05..a08fb5cf8 100644 --- a/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py +++ b/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py @@ -22,11 +22,7 @@ from dataall.modules.shares_base.db.share_object_models import ShareObject from dataall.modules.shares_base.services.share_notification_service import DataSharingNotificationType -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) # TODO: review this task usage and remove if not needed diff --git a/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py b/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py index a122b8915..89497e62a 100644 --- a/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py +++ b/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py @@ -6,11 +6,8 @@ import boto3 from botocore.exceptions import ClientError -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) + ENVNAME = os.getenv('envname', 'local') region = os.getenv('AWS_REGION', 'eu-west-1') diff --git a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py index e9982c6c7..e120614f1 100644 --- a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py +++ b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py @@ -10,11 +10,7 @@ from dataall.modules.datasets_base.db.dataset_repositories import DatasetBaseRepository -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) def persistent_email_reminders(engine): diff --git a/backend/dataall/modules/shares_base/tasks/share_manager_task.py b/backend/dataall/modules/shares_base/tasks/share_manager_task.py index 65da67ca4..c7ae66515 100644 --- a/backend/dataall/modules/shares_base/tasks/share_manager_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_manager_task.py @@ -6,12 +6,7 @@ from dataall.base.db import get_engine from dataall.base.loader import load_modules, ImportMode -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) - if __name__ == '__main__': try: diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py index 32eebd5ab..225f069bd 100644 --- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py @@ -11,11 +11,7 @@ from dataall.base.loader import load_modules, ImportMode -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) class EcsBulkShareRepplyService: diff --git a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py index 36c677b33..90948fc9c 100644 --- a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py +++ b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py @@ -9,11 +9,7 @@ from dataall.base.loader import load_modules, ImportMode -root = logging.getLogger() -if not root.hasHandlers(): - root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -log.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) def verify_shares(engine): From fce34550ce7ac94bc3b9350059bc8326b79dc87c Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Tue, 26 Nov 2024 09:26:12 -0500 Subject: [PATCH 22/44] Change Snyk Actions (#1713) ### Feature or Bugfix ### Detail - Change GitHub Action step from using `snyk/actions/python-3.9@master` to `snyk/actions/setup@master` - `snyk/actions/setup@master` will just install Snyk CLI and we add step to explicitly call `snyk test ...` with our arguments - Changed because `snyk/actions/python-3.9@master` was setting up some virtual env and not finding the installed dependencies from `make install` (leading to Snyk skipping over the checks on `requirements.txt`) - Alternatives Explored - Specifying `package-manager` to pip rather than poetry (Poetry shell was being created that did not carry over installed deps from before using `snyk/actions/python-3.9@master`) - But not supported with `all-projects` flag - Adding configuration to `pyproject.toml` to prevent venv creation (could not find a working solution) - Using venvs and exporting PATH env variable to be used later by Snyk action step (could not find a working solution) ### Relates - https://github.com/data-dot-all/dataall/pull/1708 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .github/workflows/snyk.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/snyk.yaml b/.github/workflows/snyk.yaml index 30e2c041b..41986f7df 100644 --- a/.github/workflows/snyk.yaml +++ b/.github/workflows/snyk.yaml @@ -25,7 +25,7 @@ jobs: - name: Install All Requirements run: make install - name: Run Snyk to check for vulnerabilities - uses: snyk/actions/python-3.9@master + run: snyk test --all-projects --detection-depth=5 --severity-threshold=high env: SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} with: From 095c20889b6c7b983189da6bb5cb14489481219a Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:28:11 +0100 Subject: [PATCH 23/44] Added permissions to Quicksight monitoring service layer (#1715) ### Feature or Bugfix - Feature ### Detail - Added permissions to Quicksight monitoring service layer: it now checks that the user belongs to the tenant group ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dashboards/services/dashboard_quicksight_service.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py b/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py index 1b60b2122..478a2c8c4 100644 --- a/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py +++ b/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py @@ -71,6 +71,7 @@ def get_quicksight_designer_url(cls, uri: str): @staticmethod def get_monitoring_dashboard_id(): + DashboardQuicksightService._check_user_must_be_admin() current_account = SessionHelper.get_account() dashboard_id = ParameterStoreManager.get_parameter_value( AwsAccountId=current_account, @@ -87,6 +88,7 @@ def get_monitoring_dashboard_id(): @staticmethod def get_monitoring_vpc_connection_id(): + DashboardQuicksightService._check_user_must_be_admin() current_account = SessionHelper.get_account() vpc_connection_id = ParameterStoreManager.get_parameter_value( AwsAccountId=current_account, @@ -103,6 +105,7 @@ def get_monitoring_vpc_connection_id(): @classmethod def create_quicksight_data_source_set(cls, vpc_connection_id): + cls._check_user_must_be_admin() client = cls._client() client.register_user_in_group(group_name='dataall', user_role='AUTHOR') From d318a7b6e4bcb97318cb281746c1b6c06390d602 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Thu, 28 Nov 2024 14:08:15 +0100 Subject: [PATCH 24/44] Add LIST_ENVIRONMENT_DATASETS permission for listing shared datasets and cleanup unused code (#1719) - Bugfix Added permission check on the list datasets API calls from the S3 shares module. Ensuring that only environment members can see environment shared datasets. ++ remove some unused code Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../db/dataset_profiling_repositories.py | 11 ----------- .../services/dataset_profiling_service.py | 6 ------ .../s3_datasets_shares/api/resolvers.py | 6 ++---- .../services/s3_share_service.py | 18 +++++++++++++----- 4 files changed, 15 insertions(+), 26 deletions(-) diff --git a/backend/dataall/modules/s3_datasets/db/dataset_profiling_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_profiling_repositories.py index 001fcb1b6..6676d42ee 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_profiling_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_profiling_repositories.py @@ -50,17 +50,6 @@ def get_profiling_run(session, profiling_run_uri=None, glue_job_run_id=None, glu ) return run - @staticmethod - def list_profiling_runs(session, dataset_uri): - # TODO filter is always default - filter = {} - q = ( - session.query(DatasetProfilingRun) - .filter(DatasetProfilingRun.datasetUri == dataset_uri) - .order_by(DatasetProfilingRun.created.desc()) - ) - return paginate(q, page=filter.get('page', 1), page_size=filter.get('pageSize', 20)).to_dict() - @staticmethod def list_table_profiling_runs(session, table_uri): # TODO filter is always default diff --git a/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py b/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py index 87f644197..7b06797b9 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py @@ -63,12 +63,6 @@ def resolve_profiling_run_status(run_uri): session.add(task) Worker.queue(engine=context.db_engine, task_ids=[task.taskUri]) - @staticmethod - @ResourcePolicyService.has_resource_permission(GET_DATASET) - def list_profiling_runs(uri): - with get_context().db_engine.scoped_session() as session: - return DatasetProfilingRepository.list_profiling_runs(session, uri) - @classmethod def get_dataset_table_profiling_run(cls, uri: str): with get_context().db_engine.scoped_session() as session: diff --git a/backend/dataall/modules/s3_datasets_shares/api/resolvers.py b/backend/dataall/modules/s3_datasets_shares/api/resolvers.py index f77076a7a..d298f9dda 100644 --- a/backend/dataall/modules/s3_datasets_shares/api/resolvers.py +++ b/backend/dataall/modules/s3_datasets_shares/api/resolvers.py @@ -65,10 +65,8 @@ def get_s3_consumption_data(context: Context, source, shareUri: str): def list_shared_databases_tables_with_env_group(context: Context, source, environmentUri: str, groupUri: str): - return S3ShareService.list_shared_databases_tables_with_env_group(environmentUri=environmentUri, groupUri=groupUri) + return S3ShareService.list_shared_databases_tables_with_env_group(uri=environmentUri, group_uri=groupUri) def resolve_shared_db_name(context: Context, source, **kwargs): - return S3ShareService.resolve_shared_db_name( - source.GlueDatabaseName, source.shareUri, source.targetEnvAwsAccountId, source.targetEnvRegion - ) + return S3ShareService.resolve_shared_db_name(source.GlueDatabaseName, source.shareUri) diff --git a/backend/dataall/modules/s3_datasets_shares/services/s3_share_service.py b/backend/dataall/modules/s3_datasets_shares/services/s3_share_service.py index f7603c961..8a0f9bf50 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/s3_share_service.py +++ b/backend/dataall/modules/s3_datasets_shares/services/s3_share_service.py @@ -1,7 +1,7 @@ import logging from warnings import warn -from dataall.base.db import utils +from dataall.base.db import utils, exceptions from dataall.base.context import get_context from dataall.base.aws.sts import SessionHelper from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService @@ -10,6 +10,7 @@ from dataall.core.tasks.db.task_models import Task from dataall.core.tasks.service_handlers import Worker from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository +from dataall.modules.datasets_base.services.dataset_list_permissions import LIST_ENVIRONMENT_DATASETS from dataall.modules.shares_base.db.share_state_machines_repositories import ShareStatusRepository from dataall.modules.shares_base.services.share_item_service import ShareItemService from dataall.modules.shares_base.services.share_permissions import GET_SHARE_OBJECT @@ -164,13 +165,14 @@ def reapply_share_items_for_dataset(uri: str): return True @staticmethod - def list_shared_tables_by_env_dataset(dataset_uri: str, env_uri: str): + @ResourcePolicyService.has_resource_permission(LIST_ENVIRONMENT_DATASETS) + def list_shared_tables_by_env_dataset(uri: str, dataset_uri: str): context = get_context() with context.db_engine.scoped_session() as session: return [ {'tableUri': t.tableUri, 'GlueTableName': t.GlueTableName} for t in S3ShareObjectRepository.query_dataset_tables_shared_with_env( - session, env_uri, dataset_uri, context.username, context.groups + session, uri, dataset_uri, context.username, context.groups ) ] @@ -247,11 +249,17 @@ def get_s3_consumption_data(uri): } @staticmethod - def list_shared_databases_tables_with_env_group(environmentUri: str, groupUri: str): + @ResourcePolicyService.has_resource_permission(LIST_ENVIRONMENT_DATASETS) + def list_shared_databases_tables_with_env_group(uri: str, group_uri: str): context = get_context() + if group_uri not in context.groups: + raise exceptions.UnauthorizedOperation( + action='LIST_ENVIRONMENT_GROUP_DATASETS', + message=f'User: {context.username} is not a member of the owner team', + ) with context.db_engine.scoped_session() as session: return S3ShareObjectRepository.query_shared_glue_databases( - session=session, groups=context.groups, env_uri=environmentUri, group_uri=groupUri + session=session, groups=context.groups, env_uri=uri, group_uri=group_uri ) @staticmethod From 9d95e37eab7663033793950ae9620fcf3b7bbb47 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Thu, 28 Nov 2024 17:19:06 +0100 Subject: [PATCH 25/44] Add omics create_run unauthorized test and improve other tests (#1723) ### Feature or Bugfix - Feature ### Detail In the functional tests (`/tests`) - Add a new test to check create_omics_run permissions - improve the assertions in the other unauthorized tests As a result we achieve a 97% coverage for omics service (the remaining 3% is an edge case that results from a messy clean-up of the db) image ### Relates ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- tests/modules/omics/test_omics.py | 348 ++++++++++-------------------- 1 file changed, 112 insertions(+), 236 deletions(-) diff --git a/tests/modules/omics/test_omics.py b/tests/modules/omics/test_omics.py index b48d40e5c..fa7fda7d4 100644 --- a/tests/modules/omics/test_omics.py +++ b/tests/modules/omics/test_omics.py @@ -1,145 +1,131 @@ -import pytest +from assertpy import assert_that -def test_create_omics_run(run1, group): - """ - Tests creation of omics Run +def delete_omics_run(client, runUri, user, group): + query = """ + mutation deleteOmicsRun($input: OmicsDeleteInput!) { + deleteOmicsRun(input: $input) + } """ - assert run1.runUri - assert run1.SamlAdminGroupName == group.name - assert run1.label == 'my omics run' + return client.query( + query, + input={ + 'runUris': [runUri], + 'deleteFromAWS': True, + }, + username=user.username, + groups=[group.name], + ) -def test_list_user_omics_runs(client, user, group, run1): +def list_omics_runs(client, user, group, filter=None): query = """ - query listOmicsRuns($filter: OmicsFilter) { - listOmicsRuns(filter: $filter) { - count - page - pages - hasNext - hasPrevious - nodes { - runUri - workflowUri - name - owner - SamlAdminGroupName - outputDatasetUri - description - label - created - tags - environment { - label - name - environmentUri - AwsAccountId - region - SamlGroupName - } - organization { - label - name - organizationUri - } - workflow { - label - name - workflowUri - id - description - parameterTemplate - type - } - status { - status - statusMessage + query listOmicsRuns($filter: OmicsFilter) { + listOmicsRuns(filter: $filter) { + count + page + pages + hasNext + hasPrevious + nodes { + runUri + workflowUri + name + owner + SamlAdminGroupName + outputDatasetUri + description + label + created + tags + environment { + label + name + environmentUri + AwsAccountId + region + SamlGroupName + } + organization { + label + name + organizationUri + } + workflow { + label + name + workflowUri + id + description + parameterTemplate + type + } + status { + status + statusMessage + } + } } } - } - } - """ + """ - response = client.query( + return client.query( query, - filter=None, + filter=filter, username=user.username, groups=[group.name], ) - assert response.data.listOmicsRuns['count'] == 1 - assert len(response.data.listOmicsRuns['nodes']) == 1 - response = client.query( - query, - filter={'term': 'my omics'}, - username=user.username, - groups=[group.name], - ) - assert response.data.listOmicsRuns['count'] == 1 - assert len(response.data.listOmicsRuns['nodes']) == 1 +def test_create_omics_run(run1, group): + """ + Tests creation of omics Run + """ + assert run1.runUri + assert run1.SamlAdminGroupName == group.name + assert run1.label == 'my omics run' -def test_nopermissions_list_user_omics_runs(client, user2, group2, run1): - query = """ - query listOmicsRuns($filter: OmicsFilter) { - listOmicsRuns(filter: $filter) { - count - page - pages - hasNext - hasPrevious - nodes { - runUri - workflowUri - name - owner - SamlAdminGroupName - outputDatasetUri - description - label - created - tags - environment { - label - name - environmentUri - AwsAccountId - region - SamlGroupName - } - organization { - label - name - organizationUri - } - workflow { +def test_create_omics_run_unauthorized(client, user2, group2, env_fixture, workflow1, dataset1): + response = client.query( + """ + mutation createOmicsRun($input: NewOmicsRunInput!) { + createOmicsRun(input: $input) { label - name - workflowUri - id - description - parameterTemplate - type - } - status { - status - statusMessage + runUri + SamlAdminGroupName } } - } - } - """ - - response = client.query( - query, - filter=None, + """, + input={ + 'label': 'my omics run', + 'SamlAdminGroupName': group2.name, + 'environmentUri': env_fixture.environmentUri, + 'workflowUri': workflow1.workflowUri, + 'destination': dataset1.datasetUri, + 'parameterTemplate': '{"something"}', + }, username=user2.username, groups=[group2.name], ) - assert response.data.listOmicsRuns['count'] == 0 - assert len(response.data.listOmicsRuns['nodes']) == 0 + assert_that(response.errors[0].message).contains( + 'UnauthorizedOperation', 'CREATE_OMICS_RUN', env_fixture.environmentUri + ) + + +def test_list_user_omics_runs(client, user, group, run1): + response = list_omics_runs(client, user, group) + assert_that(response.data.listOmicsRuns['count']).is_equal_to(1) + assert_that(response.data.listOmicsRuns['nodes'][0]['runUri']).is_equal_to(run1.runUri) + + response = list_omics_runs(client, user, group, filter={'term': 'my omics'}) + assert_that(response.data.listOmicsRuns['count']).is_equal_to(1) + assert_that(response.data.listOmicsRuns['nodes'][0]['runUri']).is_equal_to(run1.runUri) + + +def test_list_user_omics_runs_unauthorized(client, user2, group2, run1): + response = list_omics_runs(client, user2, group2) + assert_that(response.data.listOmicsRuns['count']).is_equal_to(0) def test_list_omics_workflows(client, user, group, workflow1): @@ -201,123 +187,13 @@ def test_get_omics_workflow(client, user, group, workflow1): assert response.data.getOmicsWorkflow['type'] == workflow1.type -def test_delete_omics_run_does_not_exist(client, user, group, run1): - query = """ - mutation deleteOmicsRun($input: OmicsDeleteInput!) { - deleteOmicsRun(input: $input) - } - """ - - response = client.query( - query, - input={ - 'runUris': ['random-string'], - 'deleteFromAWS': True, - }, - username=user.username, - groups=[group.name], - ) - print(response) - print(response.data) - assert not response.data.deleteOmicsRun - - -def test_nopermissions_delete_omics_run(client, user2, group2, run1): - query = """ - mutation deleteOmicsRun($input: OmicsDeleteInput!) { - deleteOmicsRun(input: $input) - } - """ - - response = client.query( - query, - input={ - 'runUris': [run1.runUri], - 'deleteFromAWS': True, - }, - username=user2.username, - groups=[group2.name], - ) - print(response) - print(response.data) - assert not response.data.deleteOmicsRun +def test_delete_omics_run_unauthorized(client, user2, group2, run1): + response = delete_omics_run(client, run1.runUri, user2, group2) + assert_that(response.errors[0].message).contains('UnauthorizedOperation', 'DELETE_OMICS_RUN', run1.runUri) def test_delete_omics_run(client, user, group, run1): - query = """ - mutation deleteOmicsRun($input: OmicsDeleteInput!) { - deleteOmicsRun(input: $input) - } - """ - - response = client.query( - query, - input={ - 'runUris': [run1.runUri], - 'deleteFromAWS': True, - }, - username=user.username, - groups=[group.name], - ) - print(response) - print(response.data) - assert response.data.deleteOmicsRun - query = """ - query listOmicsRuns($filter: OmicsFilter) { - listOmicsRuns(filter: $filter) { - count - page - pages - hasNext - hasPrevious - nodes { - runUri - workflowUri - name - owner - SamlAdminGroupName - outputDatasetUri - description - label - created - tags - environment { - label - name - environmentUri - AwsAccountId - region - SamlGroupName - } - organization { - label - name - organizationUri - } - workflow { - label - name - workflowUri - id - description - parameterTemplate - type - } - status { - status - statusMessage - } - } - } - } - """ - - response = client.query( - query, - filter=None, - username=user.username, - groups=[group.name], - ) - - assert response.data.listOmicsRuns['count'] == 0 - assert len(response.data.listOmicsRuns['nodes']) == 0 + response = delete_omics_run(client, run1.runUri, user, group) + assert_that(response.data.deleteOmicsRun).is_true() + response = list_omics_runs(client, user, group) + assert_that(response.data.listOmicsRuns['count']).is_equal_to(0) From 6df8588e3b4ea47f57b4fb5c6f57d1cb9cb2e36b Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Fri, 29 Nov 2024 16:03:11 +0100 Subject: [PATCH 26/44] Introduce is_owner permissions to Glossary mutations + add new integration tests (#1721) - Feature - Bugfix - Refactoring In the frontend Glossary operations that involve creating, modifying or deleting (WRITE) glossary resources are limited to the Glossary admins. To mimic this behavior in the backend this PR introduces permission checks that ensure that only the glossary admins can execute mutations on the glossary. In addition, the PR includes integration tests for the unauthorized testing scenarios. deployed Lambda in real AWS account - tested glossary owners can create, update and delete nodes - tested unauthorized users cannot execute API mutations programatically. They obtain errors of the type: `An error occurred (UnauthorizedOperation) when calling GLOSSARY MUTATION operation:\n User johndoe@amazon.com is not the admin of the glossary Sesssion glossary1.\n ", "locations": [{"line": 2, "column": 3}], "path": ["updateCategory"]}]}% ` Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../catalog/services/glossaries_service.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/backend/dataall/modules/catalog/services/glossaries_service.py b/backend/dataall/modules/catalog/services/glossaries_service.py index 92ba22142..d98f85bce 100644 --- a/backend/dataall/modules/catalog/services/glossaries_service.py +++ b/backend/dataall/modules/catalog/services/glossaries_service.py @@ -1,6 +1,8 @@ +from functools import wraps import logging from dataall.base.context import get_context +from dataall.base.db import exceptions from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository @@ -15,6 +17,33 @@ def _session(): return get_context().db_engine.scoped_session() +class GlossariesResourceAccess: + @staticmethod + def is_owner(): + def decorator(f): + @wraps(f) + def wrapper(*args, **kwargs): + uri = kwargs.get('uri') + if not uri: + raise KeyError(f"{f.__name__} doesn't have parameter uri.") + context = get_context() + with context.db_engine.scoped_session() as session: + node = GlossaryRepository.get_node(session=session, uri=uri) + while node.nodeType != 'G': + node = GlossaryRepository.get_node(session=session, uri=node.parentUri) + if node and (node.admin in context.groups): + return f(*args, **kwargs) + else: + raise exceptions.UnauthorizedOperation( + action='GLOSSARY MUTATION', + message=f'User {context.username} is not the admin of the glossary {node.label}.', + ) + + return wrapper + + return decorator + + class GlossariesService: @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES) @@ -24,12 +53,14 @@ def create_glossary(data: dict = None) -> GlossaryNode: @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES) + @GlossariesResourceAccess.is_owner() def create_category(uri: str, data: dict = None): with _session() as session: return GlossaryRepository.create_category(session=session, uri=uri, data=data) @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES) + @GlossariesResourceAccess.is_owner() def create_term(uri: str, data: dict = None): with _session() as session: return GlossaryRepository.create_term(session=session, uri=uri, data=data) @@ -95,12 +126,14 @@ def get_link_target(targetUri: str, targetType: str): @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES) + @GlossariesResourceAccess.is_owner() def update_node(uri: str = None, data: dict = None): with _session() as session: return GlossaryRepository.update_node(session=session, uri=uri, data=data) @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES) + @GlossariesResourceAccess.is_owner() def delete_node(uri: str = None): with _session() as session: return GlossaryRepository.delete_node(session=session, uri=uri) @@ -108,6 +141,7 @@ def delete_node(uri: str = None): @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES) def approve_term_association(linkUri: str): + # is_owner permissions checked in GlossaryRepository.approve_term_association with _session() as session: return GlossaryRepository.approve_term_association( session=session, username=get_context().username, groups=get_context().groups, linkUri=linkUri @@ -116,6 +150,7 @@ def approve_term_association(linkUri: str): @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES) def dismiss_term_association(linkUri: str): + # is_owner permissions checked in GlossaryRepository.dismiss_term_association with _session() as session: return GlossaryRepository.dismiss_term_association( session=session, username=get_context().username, groups=get_context().groups, linkUri=linkUri From 222c15ce90e0b8bbd1c6d087ee9a3dcccc4d19ce Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Tue, 3 Dec 2024 08:20:15 +0100 Subject: [PATCH 27/44] Refactor env permissions + modify getTrustAccount (#1712) - Refactoring - Feature - Add permissions to getTrustedAccount API - Remove usage of central account in administrator view dashboard tab - refactor environment Service functions to use decorator for resource policies - Added LINK_ENVIRONMENT permissions instead of GET_ORGANIZATION to `get_pivot_role`, `get_external_id`, `get_pivot_role_template` - [X] CICD deployment completes - Add permissions to getTrustedAccount API - [X] in environment creation form view we can get the trusted account - Remove usage of central account in administrator view dashboard tab - [X] admin view renders without issue - refactor environment Service functions to use decorator for resource policies - [X] enable_subscriptions with unauthorized user = unauthorized - [X] enable_subscriptions with authorized user = success - [X] disable_subscriptions with unauthorized user = unauthorized - [X] disable_subscriptions with authorized user = success - [X] get environment assume role url with unauthorized user = unauthorized -- it throws error of user does not belong to group - [X] get environment assume role url with authorized user = success - [X] get environment access token with unauthorized user = unauthorized - [X] get environment access token with authorized user = success - Added LINK_ENVIRONMENT permissions instead of GET_ORGANIZATION to `get_pivot_role`, `get_external_id`, `get_pivot_role_template` - [X] Now we get an Unauthorized error message when LINK_ENVIRONMENT permissions are missing before hitting the create Environment button - Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/core/environment/api/queries.py | 1 + .../dataall/core/environment/api/resolvers.py | 27 ++-- .../services/environment_service.py | 146 ++++++------------ backend/dataall/core/groups/api/resolvers.py | 3 +- .../dataall/modules/dashboards/api/queries.py | 9 -- .../modules/dashboards/api/resolvers.py | 4 - .../services/dashboard_quicksight_service.py | 5 - .../AdministratorDashboardViewer.js | 54 +------ .../services/getPlatformAuthorSession.js | 12 -- .../modules/Administration/services/index.js | 1 - .../Environments/services/getTrustAccount.js | 12 ++ .../modules/Environments/services/index.js | 1 + .../views/EnvironmentCreateForm.js | 14 +- .../graphql/Environment/getTrustAccount.js | 9 -- .../src/services/graphql/Environment/index.js | 1 - tests/conftest.py | 4 +- .../modules/dashboards/queries.py | 0 17 files changed, 91 insertions(+), 212 deletions(-) delete mode 100644 frontend/src/modules/Administration/services/getPlatformAuthorSession.js create mode 100644 frontend/src/modules/Environments/services/getTrustAccount.js delete mode 100644 frontend/src/services/graphql/Environment/getTrustAccount.js create mode 100644 tests_new/integration_tests/modules/dashboards/queries.py diff --git a/backend/dataall/core/environment/api/queries.py b/backend/dataall/core/environment/api/queries.py index a1bd9bc57..3d9e84567 100644 --- a/backend/dataall/core/environment/api/queries.py +++ b/backend/dataall/core/environment/api/queries.py @@ -32,6 +32,7 @@ getTrustAccount = gql.QueryField( name='getTrustAccount', + args=[gql.Argument(name='organizationUri', type=gql.NonNullableType(gql.String))], type=gql.String, resolver=get_trust_account, test_scope='Environment', diff --git a/backend/dataall/core/environment/api/resolvers.py b/backend/dataall/core/environment/api/resolvers.py index 537e6de93..2f3301c74 100644 --- a/backend/dataall/core/environment/api/resolvers.py +++ b/backend/dataall/core/environment/api/resolvers.py @@ -18,10 +18,8 @@ log = logging.getLogger() -def get_trust_account(context: Context, source, **kwargs): - current_account = SessionHelper.get_account() - print('current_account = ', current_account) - return current_account +def get_trust_account(context: Context, source, organizationUri): + return EnvironmentService.get_trust_account(uri=organizationUri) def create_environment(context: Context, source, input={}): @@ -203,8 +201,7 @@ def resolve_user_role(context: Context, source: Environment): def list_environment_group_permissions(context, source, environmentUri: str = None, groupUri: str = None): - with context.engine.scoped_session() as session: - return EnvironmentService.list_group_permissions(session=session, uri=environmentUri, group_uri=groupUri) + return EnvironmentService.list_group_permissions(uri=environmentUri, group_uri=groupUri) @is_feature_enabled('core.features.env_aws_actions') @@ -214,12 +211,12 @@ def get_environment_assume_role_url( environmentUri: str = None, groupUri: str = None, ): - return EnvironmentService.get_environment_assume_role_url(environmentUri=environmentUri, groupUri=groupUri) + return EnvironmentService.get_environment_assume_role_url(uri=environmentUri, groupUri=groupUri) @is_feature_enabled('core.features.env_aws_actions') def generate_environment_access_token(context, source, environmentUri: str = None, groupUri: str = None): - credentials = EnvironmentService.generate_environment_access_token(environmentUri=environmentUri, groupUri=groupUri) + credentials = EnvironmentService.generate_environment_access_token(uri=environmentUri, groupUri=groupUri) return json.dumps(credentials) @@ -245,31 +242,33 @@ def delete_environment(context: Context, source, environmentUri: str = None, del def enable_subscriptions(context: Context, source, environmentUri: str = None, input: dict = None): - EnvironmentService.enable_subscriptions(environmentUri, input) + EnvironmentService.enable_subscriptions(uri=environmentUri, input=input) StackService.deploy_stack(targetUri=environmentUri) return True def disable_subscriptions(context: Context, source, environmentUri: str = None): - EnvironmentService.disable_subscriptions(environmentUri) + EnvironmentService.disable_subscriptions(uri=environmentUri) StackService.deploy_stack(targetUri=environmentUri) return True def get_pivot_role_template(context: Context, source, organizationUri=None): - return EnvironmentService.get_template_from_resource_bucket(organizationUri, 'pivot_role_prefix') + return EnvironmentService.get_template_from_resource_bucket(uri=organizationUri, template_name='pivot_role_prefix') def get_cdk_exec_policy_template(context: Context, source, organizationUri=None): - return EnvironmentService.get_template_from_resource_bucket(organizationUri, 'cdk_exec_policy_prefix') + return EnvironmentService.get_template_from_resource_bucket( + uri=organizationUri, template_name='cdk_exec_policy_prefix' + ) def get_external_id(context: Context, source, organizationUri=None): - return EnvironmentService.get_external_id(organizationUri) + return EnvironmentService.get_external_id(uri=organizationUri) def get_pivot_role_name(context: Context, source, organizationUri=None): - return EnvironmentService.get_pivot_role(organizationUri) + return EnvironmentService.get_pivot_role(uri=organizationUri) def resolve_environment(context, source, **kwargs): diff --git a/backend/dataall/core/environment/services/environment_service.py b/backend/dataall/core/environment/services/environment_service.py index 9e2b5d6a6..febaefd42 100644 --- a/backend/dataall/core/environment/services/environment_service.py +++ b/backend/dataall/core/environment/services/environment_service.py @@ -135,7 +135,7 @@ def validate_org_group(org_uri, group, session): class EnvironmentService: @staticmethod - def validate_permissions(session, uri, g_permissions, group): + def _validate_permissions(session, uri, g_permissions, group): """ g_permissions: coming from frontend = ENVIRONMENT_INVITATION_REQUEST @@ -160,7 +160,7 @@ def validate_permissions(session, uri, g_permissions, group): ) @staticmethod - def get_pivot_role_as_part_of_environment(): + def _get_pivot_role_as_part_of_environment(): ssm_param = ParameterStoreManager.get_parameter_value( region=os.getenv('AWS_REGION', 'eu-west-1'), parameter_path=f"/dataall/{os.getenv('envname', 'local')}/pivotRole/enablePivotRoleAutoCreate", @@ -168,7 +168,7 @@ def get_pivot_role_as_part_of_environment(): return ssm_param == 'True' @staticmethod - def check_cdk_resources(account_id, region, data) -> str: + def _check_cdk_resources(account_id, region, data) -> str: """ Check if all necessary cdk resources exists in the account :return : pivot role name @@ -181,7 +181,7 @@ def check_cdk_resources(account_id, region, data) -> str: log.info('Checking cdk resources for environment.') - pivot_role_as_part_of_environment = EnvironmentService.get_pivot_role_as_part_of_environment() + pivot_role_as_part_of_environment = EnvironmentService._get_pivot_role_as_part_of_environment() log.info(f'Pivot role as part of environment = {pivot_role_as_part_of_environment}') cdk_look_up_role_arn = SessionHelper.get_cdk_look_up_role_arn(accountid=account_id, region=region) @@ -216,6 +216,11 @@ def check_cdk_resources(account_id, region, data) -> str: return cdk_role_name + @staticmethod + @ResourcePolicyService.has_resource_permission(LINK_ENVIRONMENT) + def get_trust_account(uri): + return SessionHelper.get_account() + @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS) @ResourcePolicyService.has_resource_permission(LINK_ENVIRONMENT) @@ -223,7 +228,7 @@ def create_environment(uri, data=None): context = get_context() with context.db_engine.scoped_session() as session: EnvironmentRequestValidationService.validate_creation_params(data, uri, session) - cdk_role_name = EnvironmentService.check_cdk_resources(data.get('AwsAccountId'), data.get('region'), data) + cdk_role_name = EnvironmentService._check_cdk_resources(data.get('AwsAccountId'), data.get('region'), data) env = Environment( organizationUri=data.get('organizationUri'), label=data.get('label', 'Unnamed'), @@ -323,7 +328,7 @@ def update_environment(uri, data=None): with get_context().db_engine.scoped_session() as session: environment = EnvironmentService.get_environment_by_uri(session, uri) previous_resource_prefix = environment.resourcePrefix - EnvironmentService.check_cdk_resources( + EnvironmentService._check_cdk_resources( account_id=environment.AwsAccountId, region=environment.region, data=data ) @@ -366,7 +371,7 @@ def invite_group(uri, data=None) -> (Environment, EnvironmentGroup): group: str = data['groupUri'] with get_context().db_engine.scoped_session() as session: - EnvironmentService.validate_permissions(session, uri, data['permissions'], group) + EnvironmentService._validate_permissions(session, uri, data['permissions'], group) environment = EnvironmentService.get_environment_by_uri(session, uri) @@ -493,7 +498,7 @@ def update_group_permissions(uri, data=None): group = data['groupUri'] with get_context().db_engine.scoped_session() as session: - EnvironmentService.validate_permissions(session, uri, data['permissions'], group) + EnvironmentService._validate_permissions(session, uri, data['permissions'], group) environment = EnvironmentService.get_environment_by_uri(session, uri) @@ -521,7 +526,7 @@ def update_group_permissions(uri, data=None): @staticmethod @ResourcePolicyService.has_resource_permission(environment_permissions.LIST_ENVIRONMENT_GROUP_PERMISSIONS) - def list_group_permissions(session, uri, group_uri): + def list_group_permissions(uri, group_uri): # the permission checked with get_context().db_engine.scoped_session() as session: return EnvironmentService.list_group_permissions_internal(session, uri, group_uri) @@ -919,7 +924,7 @@ def get_boolean_env_param(session, env: Environment, param: str) -> bool: return param is not None and param.value.lower() == 'true' @staticmethod - def is_user_invited(uri): + def _is_user_invited(uri): context = get_context() with context.db_engine.scoped_session() as session: return EnvironmentRepository.is_user_invited_to_environment(session=session, groups=context.groups, uri=uri) @@ -930,23 +935,17 @@ def resolve_user_role(environment: Environment): return EnvironmentPermission.Owner.value elif environment.SamlGroupName in get_context().groups: return EnvironmentPermission.Admin.value - elif EnvironmentService.is_user_invited(environment.environmentUri): + elif EnvironmentService._is_user_invited(environment.environmentUri): return EnvironmentPermission.Invited.value return EnvironmentPermission.NotInvited.value @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS) - def enable_subscriptions(environmentUri: str = None, input: dict = None): + @ResourcePolicyService.has_resource_permission(ENABLE_ENVIRONMENT_SUBSCRIPTIONS) + def enable_subscriptions(uri, input: dict = None): context = get_context() with context.db_engine.scoped_session() as session: - ResourcePolicyService.check_user_resource_permission( - session=session, - username=context.username, - groups=context.groups, - resource_uri=environmentUri, - permission_name=ENABLE_ENVIRONMENT_SUBSCRIPTIONS, - ) - environment = EnvironmentService.get_environment_by_uri(session, environmentUri) + environment = EnvironmentService.get_environment_by_uri(session, uri) if input.get('producersTopicArn'): environment.subscriptionsProducersTopicName = input.get('producersTopicArn') environment.subscriptionsProducersTopicImported = True @@ -972,17 +971,11 @@ def enable_subscriptions(environmentUri: str = None, input: dict = None): @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS) - def disable_subscriptions(environment_uri: str = None): + @ResourcePolicyService.has_resource_permission(ENABLE_ENVIRONMENT_SUBSCRIPTIONS) + def disable_subscriptions(uri): context = get_context() with context.db_engine.scoped_session() as session: - ResourcePolicyService.check_user_resource_permission( - session=session, - username=context.username, - groups=context.groups, - resource_uri=environment_uri, - permission_name=ENABLE_ENVIRONMENT_SUBSCRIPTIONS, - ) - environment = EnvironmentService.get_environment_by_uri(session, environment_uri) + environment = EnvironmentService.get_environment_by_uri(session, uri) environment.subscriptionsConsumersTopicName = None environment.subscriptionsConsumersTopicImported = False @@ -1034,20 +1027,11 @@ def _get_environment_group_aws_session(session, username, groups, environment, g @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS) - def get_environment_assume_role_url( - environmentUri: str = None, - groupUri: str = None, - ): + @ResourcePolicyService.has_resource_permission(CREDENTIALS_ENVIRONMENT) + def get_environment_assume_role_url(uri, groupUri): context = get_context() with context.db_engine.scoped_session() as session: - ResourcePolicyService.check_user_resource_permission( - session=session, - username=context.username, - groups=context.groups, - resource_uri=environmentUri, - permission_name=CREDENTIALS_ENVIRONMENT, - ) - environment = EnvironmentService.get_environment_by_uri(session, environmentUri) + environment = EnvironmentService.get_environment_by_uri(session, uri) url = SessionHelper.get_console_access_url( EnvironmentService._get_environment_group_aws_session( session=session, @@ -1062,17 +1046,11 @@ def get_environment_assume_role_url( @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS) - def generate_environment_access_token(environmentUri: str = None, groupUri: str = None): + @ResourcePolicyService.has_resource_permission(CREDENTIALS_ENVIRONMENT) + def generate_environment_access_token(uri, groupUri): context = get_context() with context.db_engine.scoped_session() as session: - ResourcePolicyService.check_user_resource_permission( - session=session, - username=context.username, - groups=context.groups, - resource_uri=environmentUri, - permission_name=CREDENTIALS_ENVIRONMENT, - ) - environment = EnvironmentService.get_environment_by_uri(session, environmentUri) + environment = EnvironmentService.get_environment_by_uri(session, uri) c = EnvironmentService._get_environment_group_aws_session( session=session, username=context.username, @@ -1087,16 +1065,8 @@ def generate_environment_access_token(environmentUri: str = None, groupUri: str } @staticmethod - def get_pivot_role(organization_uri): - context = get_context() - with context.db_engine.scoped_session() as session: - ResourcePolicyService.check_user_resource_permission( - session=session, - username=context.username, - groups=context.groups, - resource_uri=organization_uri, - permission_name=GET_ORGANIZATION, - ) + @ResourcePolicyService.has_resource_permission(LINK_ENVIRONMENT) + def get_pivot_role(uri): pivot_role_name = SessionHelper.get_delegation_role_name(region='') if not pivot_role_name: raise exceptions.AWSResourceNotFound( @@ -1106,47 +1076,31 @@ def get_pivot_role(organization_uri): return pivot_role_name @staticmethod - def get_external_id(organization_uri): - context = get_context() - with context.db_engine.scoped_session() as session: - ResourcePolicyService.check_user_resource_permission( - session=session, - username=context.username, - groups=context.groups, - resource_uri=organization_uri, - permission_name=GET_ORGANIZATION, + @ResourcePolicyService.has_resource_permission(LINK_ENVIRONMENT) + def get_external_id(uri): + external_id = SessionHelper.get_external_id_secret() + if not external_id: + raise exceptions.AWSResourceNotFound( + action='GET_EXTERNAL_ID', + message='External Id could not be found on AWS Secretsmanager', ) - external_id = SessionHelper.get_external_id_secret() - if not external_id: - raise exceptions.AWSResourceNotFound( - action='GET_EXTERNAL_ID', - message='External Id could not be found on AWS Secretsmanager', - ) - return external_id + return external_id @staticmethod - def get_template_from_resource_bucket(organization_uri, template_name): - context = get_context() - with context.db_engine.scoped_session() as session: - ResourcePolicyService.check_user_resource_permission( - session=session, - username=context.username, - groups=context.groups, - resource_uri=organization_uri, - permission_name=GET_ORGANIZATION, + @ResourcePolicyService.has_resource_permission(LINK_ENVIRONMENT) + def get_template_from_resource_bucket(uri, template_name): + envname = os.getenv('envname', 'local') + region = os.getenv('AWS_REGION', 'eu-central-1') + + resource_bucket = Parameter().get_parameter(env=envname, path='s3/resources_bucket_name') + template_key = Parameter().get_parameter(env=envname, path=f's3/{template_name}') + if not resource_bucket or not template_key: + raise AWSResourceNotFound( + action='GET_TEMPLATE', + message=f'{template_name} Yaml template file could not be found on Amazon S3 bucket', ) - envname = os.getenv('envname', 'local') - region = os.getenv('AWS_REGION', 'eu-central-1') - - resource_bucket = Parameter().get_parameter(env=envname, path='s3/resources_bucket_name') - template_key = Parameter().get_parameter(env=envname, path=f's3/{template_name}') - if not resource_bucket or not template_key: - raise AWSResourceNotFound( - action='GET_TEMPLATE', - message=f'{template_name} Yaml template file could not be found on Amazon S3 bucket', - ) - return S3_client.get_presigned_url(region, resource_bucket, template_key) + return S3_client.get_presigned_url(region, resource_bucket, template_key) @staticmethod @ResourcePolicyService.has_resource_permission(environment_permissions.GET_ENVIRONMENT) diff --git a/backend/dataall/core/groups/api/resolvers.py b/backend/dataall/core/groups/api/resolvers.py index ae7c28323..9434db6b5 100644 --- a/backend/dataall/core/groups/api/resolvers.py +++ b/backend/dataall/core/groups/api/resolvers.py @@ -14,8 +14,7 @@ def resolve_group_environment_permissions(context, source, environmentUri): if not source: return None - with context.engine.scoped_session() as session: - return EnvironmentService.list_group_permissions(session=session, uri=environmentUri, group_uri=source.groupUri) + return EnvironmentService.list_group_permissions(uri=environmentUri, group_uri=source.groupUri) def resolve_group_tenant_permissions(context, source): diff --git a/backend/dataall/modules/dashboards/api/queries.py b/backend/dataall/modules/dashboards/api/queries.py index 9912375b8..a4b005adc 100644 --- a/backend/dataall/modules/dashboards/api/queries.py +++ b/backend/dataall/modules/dashboards/api/queries.py @@ -3,7 +3,6 @@ get_dashboard, get_monitoring_dashboard_id, get_monitoring_vpc_connection_id, - get_quicksight_author_session, get_quicksight_designer_url, get_quicksight_reader_session, get_quicksight_reader_url, @@ -37,14 +36,6 @@ resolver=get_monitoring_vpc_connection_id, ) -getPlatformAuthorSession = gql.QueryField( - name='getPlatformAuthorSession', - args=[ - gql.Argument(name='awsAccount', type=gql.NonNullableType(gql.String)), - ], - type=gql.String, - resolver=get_quicksight_author_session, -) getPlatformReaderSession = gql.QueryField( name='getPlatformReaderSession', diff --git a/backend/dataall/modules/dashboards/api/resolvers.py b/backend/dataall/modules/dashboards/api/resolvers.py index 86d0fb671..68d35b3a8 100644 --- a/backend/dataall/modules/dashboards/api/resolvers.py +++ b/backend/dataall/modules/dashboards/api/resolvers.py @@ -124,10 +124,6 @@ def create_quicksight_data_source_set(context, source, vpcConnectionId: str = No return DashboardQuicksightService.create_quicksight_data_source_set(vpcConnectionId) -def get_quicksight_author_session(context, source, awsAccount: str = None): - return DashboardQuicksightService.get_quicksight_author_session(awsAccount) - - def get_quicksight_reader_session(context, source, dashboardId: str = None): return DashboardQuicksightService.get_quicksight_reader_session(dashboardId) diff --git a/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py b/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py index 478a2c8c4..67edc6a19 100644 --- a/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py +++ b/backend/dataall/modules/dashboards/services/dashboard_quicksight_service.py @@ -119,11 +119,6 @@ def create_quicksight_data_source_set(cls, vpc_connection_id): return datasource_id - @classmethod - def get_quicksight_author_session(cls, aws_account): - DashboardQuicksightService._check_user_must_be_admin() - return cls._client(aws_account).get_author_session() - @classmethod def get_quicksight_reader_session(cls, dashboard_uri): cls._check_user_must_be_admin() diff --git a/frontend/src/modules/Administration/components/AdministratorDashboardViewer.js b/frontend/src/modules/Administration/components/AdministratorDashboardViewer.js index 8c660e0cb..d00cef940 100644 --- a/frontend/src/modules/Administration/components/AdministratorDashboardViewer.js +++ b/frontend/src/modules/Administration/components/AdministratorDashboardViewer.js @@ -1,4 +1,4 @@ -import { AddOutlined, ArrowRightAlt } from '@mui/icons-material'; +import { AddOutlined } from '@mui/icons-material'; import { LoadingButton } from '@mui/lab'; import { Box, @@ -17,12 +17,11 @@ import * as ReactIf from 'react-if'; import * as Yup from 'yup'; import { useSettings } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; -import { getTrustAccount, useClient } from 'services'; +import { useClient } from 'services'; import { createQuicksightDataSourceSet, getMonitoringDashboardId, getMonitoringVPCConnectionId, - getPlatformAuthorSession, getPlatformReaderSession, updateSSMParameter } from '../services'; @@ -35,21 +34,10 @@ export const DashboardViewer = () => { const { settings } = useSettings(); const [dashboardId, setDashboardId] = useState(''); const [vpcConnectionId, setVpcConnectionId] = useState(''); - const [trustedAccount, setTrustedAccount] = useState(null); const [dashboardRef] = useState(createRef()); const [sessionUrl, setSessionUrl] = useState(null); - const [isOpeningSession, setIsOpeningSession] = useState(false); const [isCreatingDataSource, setIsCreatingDataSource] = useState(false); - const fetchTrustedAccount = useCallback(async () => { - const response = await client.query(getTrustAccount()); - if (!response.errors) { - setTrustedAccount(response.data.getTrustAccount); - } else { - dispatch({ type: SET_ERROR, error: response.errors[0].message }); - } - }, [client, dispatch]); - const fetchMonitoringVPCConnectionId = useCallback(async () => { const response = await client.query(getMonitoringVPCConnectionId()); if (!response.errors) { @@ -99,16 +87,12 @@ export const DashboardViewer = () => { fetchMonitoringVPCConnectionId().catch((e) => dispatch({ type: SET_ERROR, error: e.message }) ); - fetchTrustedAccount().catch((e) => - dispatch({ type: SET_ERROR, error: e.message }) - ); } }, [ client, dispatch, fetchMonitoringDashboardId, - fetchMonitoringVPCConnectionId, - fetchTrustedAccount + fetchMonitoringVPCConnectionId ]); async function submitVpc(values, setStatus, setSubmitting, setErrors) { @@ -183,19 +167,6 @@ export const DashboardViewer = () => { setIsCreatingDataSource(false); } - const startAuthorSession = async () => { - setIsOpeningSession(true); - const response = await client.query( - getPlatformAuthorSession(trustedAccount) - ); - if (!response.errors) { - window.open(response.data.getPlatformAuthorSession, '_blank'); - } else { - dispatch({ type: SET_ERROR, error: response.errors[0].message }); - } - setIsOpeningSession(false); - }; - return ( @@ -206,8 +177,9 @@ export const DashboardViewer = () => { - 1. Enable Quicksight Enterprise Edition in AWS Account ={' '} - {trustedAccount}. Check the user guide for more details. + 1. Enable Quicksight Enterprise Edition in the infrastructure + data.all central account. Check the user guide for more + details. @@ -343,20 +315,6 @@ export const DashboardViewer = () => { Dashboard. Check the user guide for more details. - - - } - variant="outlined" - onClick={startAuthorSession} - sx={{ mt: 1, mb: 2, ml: 2 }} - > - Start Quicksight session - - - diff --git a/frontend/src/modules/Administration/services/getPlatformAuthorSession.js b/frontend/src/modules/Administration/services/getPlatformAuthorSession.js deleted file mode 100644 index 391dd9e99..000000000 --- a/frontend/src/modules/Administration/services/getPlatformAuthorSession.js +++ /dev/null @@ -1,12 +0,0 @@ -import { gql } from 'apollo-boost'; - -export const getPlatformAuthorSession = (awsAccount) => ({ - variables: { - awsAccount - }, - query: gql` - query getPlatformAuthorSession($awsAccount: String!) { - getPlatformAuthorSession(awsAccount: $awsAccount) - } - ` -}); diff --git a/frontend/src/modules/Administration/services/index.js b/frontend/src/modules/Administration/services/index.js index a07cd1853..c74d17891 100644 --- a/frontend/src/modules/Administration/services/index.js +++ b/frontend/src/modules/Administration/services/index.js @@ -1,7 +1,6 @@ export * from './createQuicksightDataSourceSet'; export * from './getMonitoringDashboardId'; export * from './getMonitoringVPCConnectionId'; -export * from './getPlatformAuthorSession'; export * from './getPlatformReaderSession'; export * from './listTenantGroups'; export * from './listTenantPermissions'; diff --git a/frontend/src/modules/Environments/services/getTrustAccount.js b/frontend/src/modules/Environments/services/getTrustAccount.js new file mode 100644 index 000000000..2a1f91eb4 --- /dev/null +++ b/frontend/src/modules/Environments/services/getTrustAccount.js @@ -0,0 +1,12 @@ +import { gql } from 'apollo-boost'; + +export const getTrustAccount = (organizationUri) => ({ + variables: { + organizationUri + }, + query: gql` + query GetTrustAccount($organizationUri: String!) { + getTrustAccount(organizationUri: $organizationUri) + } + ` +}); diff --git a/frontend/src/modules/Environments/services/index.js b/frontend/src/modules/Environments/services/index.js index e52b10a61..a4831a20a 100644 --- a/frontend/src/modules/Environments/services/index.js +++ b/frontend/src/modules/Environments/services/index.js @@ -12,6 +12,7 @@ export * from './getPivotRoleExternalId'; export * from './getPivotRoleName'; export * from './getPivotRolePresignedUrl'; export * from './getCDKExecPolicyPresignedUrl.js'; +export * from './getTrustAccount'; export * from './inviteGroup'; export * from './listAllEnvironmentConsumptionRoles'; export * from './listAllEnvironmentGroups'; diff --git a/frontend/src/modules/Environments/views/EnvironmentCreateForm.js b/frontend/src/modules/Environments/views/EnvironmentCreateForm.js index bbe55e522..e2c2a8275 100644 --- a/frontend/src/modules/Environments/views/EnvironmentCreateForm.js +++ b/frontend/src/modules/Environments/views/EnvironmentCreateForm.js @@ -35,7 +35,8 @@ import { getPivotRoleExternalId, getPivotRoleName, getPivotRolePresignedUrl, - getCDKExecPolicyPresignedUrl + getCDKExecPolicyPresignedUrl, + getTrustAccount } from '../services'; import { SanitizedHTML, @@ -45,12 +46,7 @@ import { useSettings } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; -import { - getOrganization, - getTrustAccount, - useClient, - useGroups -} from 'services'; +import { getOrganization, useClient, useGroups } from 'services'; import { AwsRegions, isAnyEnvironmentModuleEnabled, @@ -84,13 +80,13 @@ const EnvironmentCreateForm = (props) => { setLoading(false); }, [client, dispatch, params.uri]); const fetchTrustedAccount = useCallback(async () => { - const response = await client.query(getTrustAccount()); + const response = await client.query(getTrustAccount(params.uri)); if (!response.errors) { setTrustedAccount(response.data.getTrustAccount); } else { dispatch({ type: SET_ERROR, error: response.errors[0].message }); } - }, [client, dispatch]); + }, [client, dispatch, params.uri]); const getRoleName = useCallback(async () => { const response = await client.query(getPivotRoleName(params.uri)); if (!response.errors) { diff --git a/frontend/src/services/graphql/Environment/getTrustAccount.js b/frontend/src/services/graphql/Environment/getTrustAccount.js deleted file mode 100644 index 97aba5e40..000000000 --- a/frontend/src/services/graphql/Environment/getTrustAccount.js +++ /dev/null @@ -1,9 +0,0 @@ -import { gql } from 'apollo-boost'; - -export const getTrustAccount = () => ({ - query: gql` - query GetTrustAccount { - getTrustAccount - } - ` -}); diff --git a/frontend/src/services/graphql/Environment/index.js b/frontend/src/services/graphql/Environment/index.js index 96d75039d..6da42b990 100644 --- a/frontend/src/services/graphql/Environment/index.js +++ b/frontend/src/services/graphql/Environment/index.js @@ -1,4 +1,3 @@ -export * from './getTrustAccount'; export * from './listAllGroups'; export * from './listAllConsumptionRoles'; export * from './listEnvironmentConsumptionRoles'; diff --git a/tests/conftest.py b/tests/conftest.py index a7501f6cc..b79afb102 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -187,11 +187,11 @@ def patch_stack_tasks(module_mocker): @pytest.fixture(scope='module', autouse=True) def patch_check_env(module_mocker): module_mocker.patch( - 'dataall.core.environment.services.environment_service.EnvironmentService.check_cdk_resources', + 'dataall.core.environment.services.environment_service.EnvironmentService._check_cdk_resources', return_value='CDKROLENAME', ) module_mocker.patch( - 'dataall.core.environment.services.environment_service.EnvironmentService.get_pivot_role_as_part_of_environment', + 'dataall.core.environment.services.environment_service.EnvironmentService._get_pivot_role_as_part_of_environment', return_value=False, ) diff --git a/tests_new/integration_tests/modules/dashboards/queries.py b/tests_new/integration_tests/modules/dashboards/queries.py new file mode 100644 index 000000000..e69de29bb From 5c2b167d744b4fa50e3bae82eeca0c9789851865 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:15:56 +0100 Subject: [PATCH 28/44] Avoid infinite loop in glossaries checks (#1725) ### Feature or Bugfix - Feature - Bugfix ### Detail In some edge cases where a category and term is orphan and does not have a Glossary as parent we would run into an infinite loop in the glossaries permission check. This PR adds a maximum depth level (which in reality is lower, categories can only host terms, the REAL_MAX_DEPTH=3) ### Relates - #1721 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/modules/catalog/services/glossaries_service.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/dataall/modules/catalog/services/glossaries_service.py b/backend/dataall/modules/catalog/services/glossaries_service.py index d98f85bce..3e8b8f6ea 100644 --- a/backend/dataall/modules/catalog/services/glossaries_service.py +++ b/backend/dataall/modules/catalog/services/glossaries_service.py @@ -29,8 +29,11 @@ def wrapper(*args, **kwargs): context = get_context() with context.db_engine.scoped_session() as session: node = GlossaryRepository.get_node(session=session, uri=uri) - while node.nodeType != 'G': + MAX_GLOSSARY_DEPTH = 10 + depth = 0 + while node.nodeType != 'G' and depth <= MAX_GLOSSARY_DEPTH: node = GlossaryRepository.get_node(session=session, uri=node.parentUri) + depth += 1 if node and (node.admin in context.groups): return f(*args, **kwargs) else: From ab1f6e5802b96bbc6877c1f552fb726e3acbc0b8 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:04:54 +0100 Subject: [PATCH 29/44] Feed consistent permissions (#1722) - Feature - Bugfix The Feeds module is used in the frontend in several modules. Some restrict access to admins only and some don't. In this PR we unify the behavior. ONLY ADMINS CAN SEE THE FEED in the frontend. - Dashboards: accessible to any user -----> add isAdmin - PIpelines: accessible to any user -----> add isAdmin - Redshift_Datasets: accessible to admin users only - Redshift_Tables : accessible to admin users only - S3_Datasets: accessible to admin users only - Folders: accessible to admin users only - Tables: accessible to admin users only Alongside the frontend changes, the backend should follow the same logic and restrict the API calls with permissions checks. That is what this PR does, it introduces resource permission checks depending on the Feed targetType with GET_X permission checks. - [x] Add security-focused tests for unauthorized cases Screenshot 2024-11-26 at 14 49 56 - [X] UI shows chat button for admins (creators or admin team) - verified in Datasets and Dashboards - [X] UI does not show chat button for non-admins - verified in Datasets and Dashboards - [x] Deploy in AWS - Call getFeed, postFeedMessage with resource admin (with GET permissions) and get feed - [X] Dataset - [x] Table - [x] Folder - [X] Redshift Dataset - [X] Redshift Table - [x] Dashboard - Call getFeed, postFeedMessage with another team not the resource admin (with UPDATE permissions) and get unauthorized response: - [X] Dataset - [x] Table - [x] Folder - [x] Redshift Dataset - [x] Redshift Table - [x] Dashboard - Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/modules/dashboards/__init__.py | 4 +- .../dataall/modules/datapipelines/__init__.py | 2 +- backend/dataall/modules/feed/api/registry.py | 5 +++ backend/dataall/modules/feed/api/resolvers.py | 2 +- .../modules/feed/services/feed_service.py | 39 +++++++++++++++---- .../dataall/modules/s3_datasets/__init__.py | 8 ++-- .../modules/Dashboards/views/DashboardView.js | 22 ++++++----- .../modules/Pipelines/views/PipelineView.js | 34 ++++++++++------ 8 files changed, 79 insertions(+), 37 deletions(-) diff --git a/backend/dataall/modules/dashboards/__init__.py b/backend/dataall/modules/dashboards/__init__.py index ffbc8e92d..068a1f97f 100644 --- a/backend/dataall/modules/dashboards/__init__.py +++ b/backend/dataall/modules/dashboards/__init__.py @@ -5,7 +5,6 @@ from dataall.base.loader import ImportMode, ModuleInterface - log = logging.getLogger(__name__) @@ -33,8 +32,9 @@ def __init__(self): from dataall.modules.catalog.indexers.registry import GlossaryRegistry, GlossaryDefinition from dataall.modules.vote.services.vote_service import add_vote_type from dataall.modules.dashboards.indexers.dashboard_indexer import DashboardIndexer + from dataall.modules.dashboards.services.dashboard_permissions import GET_DASHBOARD - FeedRegistry.register(FeedDefinition('Dashboard', Dashboard)) + FeedRegistry.register(FeedDefinition('Dashboard', Dashboard, GET_DASHBOARD)) GlossaryRegistry.register( GlossaryDefinition( diff --git a/backend/dataall/modules/datapipelines/__init__.py b/backend/dataall/modules/datapipelines/__init__.py index 7b1a56334..171a6a311 100644 --- a/backend/dataall/modules/datapipelines/__init__.py +++ b/backend/dataall/modules/datapipelines/__init__.py @@ -35,7 +35,7 @@ def __init__(self): ) import dataall.modules.datapipelines.api - FeedRegistry.register(FeedDefinition('DataPipeline', DataPipeline)) + FeedRegistry.register(FeedDefinition('DataPipeline', DataPipeline, GET_PIPELINE)) TargetType('pipeline', GET_PIPELINE, UPDATE_PIPELINE, MANAGE_PIPELINES) TargetType('cdkpipeline', GET_PIPELINE, UPDATE_PIPELINE, MANAGE_PIPELINES) diff --git a/backend/dataall/modules/feed/api/registry.py b/backend/dataall/modules/feed/api/registry.py index 3fe72f245..8db914263 100644 --- a/backend/dataall/modules/feed/api/registry.py +++ b/backend/dataall/modules/feed/api/registry.py @@ -10,6 +10,7 @@ class FeedDefinition: target_type: str model: Type[Resource] + permission: str class FeedRegistry(UnionTypeRegistry): @@ -25,6 +26,10 @@ def register(cls, definition: FeedDefinition): def find_model(cls, target_type: str): return cls._DEFINITIONS[target_type].model + @classmethod + def find_permission(cls, target_type: str): + return cls._DEFINITIONS[target_type].permission + @classmethod def find_target(cls, obj: Resource): for target_type, definition in cls._DEFINITIONS.items(): diff --git a/backend/dataall/modules/feed/api/resolvers.py b/backend/dataall/modules/feed/api/resolvers.py index a3bcca622..e971d90bf 100644 --- a/backend/dataall/modules/feed/api/resolvers.py +++ b/backend/dataall/modules/feed/api/resolvers.py @@ -43,4 +43,4 @@ def resolve_feed_messages(context: Context, source: Feed, filter: dict = None): _required_uri(source.targetUri) if not filter: filter = {} - return FeedService.list_feed_messages(targetUri=source.targetUri, filter=filter) + return FeedService.list_feed_messages(targetUri=source.targetUri, targetType=source.targetType, filter=filter) diff --git a/backend/dataall/modules/feed/services/feed_service.py b/backend/dataall/modules/feed/services/feed_service.py index 364b2a575..69d271186 100644 --- a/backend/dataall/modules/feed/services/feed_service.py +++ b/backend/dataall/modules/feed/services/feed_service.py @@ -6,8 +6,10 @@ import logging from dataall.base.context import get_context +from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.modules.feed.db.feed_models import FeedMessage from dataall.modules.feed.db.feed_repository import FeedRepository +from dataall.modules.feed.api.registry import FeedRegistry logger = logging.getLogger(__name__) @@ -27,10 +29,6 @@ def targetType(self): return self._targetType -def _session(): - return get_context().db_engine.scoped_session() - - class FeedService: """ Encapsulate the logic of interactions with Feeds. @@ -41,6 +39,15 @@ def get_feed( targetUri: str = None, targetType: str = None, ) -> Feed: + context = get_context() + with context.db_engine.scoped_session() as session: + ResourcePolicyService.check_user_resource_permission( + session=session, + username=context.username, + groups=context.groups, + resource_uri=targetUri, + permission_name=FeedRegistry.find_permission(target_type=targetType), + ) return Feed(targetUri=targetUri, targetType=targetType) @staticmethod @@ -49,17 +56,33 @@ def post_feed_message( targetType: str = None, content: str = None, ): - with _session() as session: + context = get_context() + with context.db_engine.scoped_session() as session: + ResourcePolicyService.check_user_resource_permission( + session=session, + username=context.username, + groups=context.groups, + resource_uri=targetUri, + permission_name=FeedRegistry.find_permission(target_type=targetType), + ) m = FeedMessage( targetUri=targetUri, targetType=targetType, - creator=get_context().username, + creator=context.username, content=content, ) session.add(m) return m @staticmethod - def list_feed_messages(targetUri: str, filter: dict = None): - with _session() as session: + def list_feed_messages(targetUri: str, targetType: str, filter: dict = None): + context = get_context() + with context.db_engine.scoped_session() as session: + ResourcePolicyService.check_user_resource_permission( + session=session, + username=context.username, + groups=context.groups, + resource_uri=targetUri, + permission_name=FeedRegistry.find_permission(target_type=targetType), + ) return FeedRepository(session).paginated_feed_messages(uri=targetUri, filter=filter) diff --git a/backend/dataall/modules/s3_datasets/__init__.py b/backend/dataall/modules/s3_datasets/__init__.py index dbd4f458c..42872063d 100644 --- a/backend/dataall/modules/s3_datasets/__init__.py +++ b/backend/dataall/modules/s3_datasets/__init__.py @@ -44,14 +44,16 @@ def __init__(self): from dataall.modules.s3_datasets.services.dataset_permissions import ( GET_DATASET, UPDATE_DATASET, + GET_DATASET_TABLE, + GET_DATASET_FOLDER, MANAGE_DATASETS, ) from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, S3Dataset - FeedRegistry.register(FeedDefinition('DatasetStorageLocation', DatasetStorageLocation)) - FeedRegistry.register(FeedDefinition('DatasetTable', DatasetTable)) - FeedRegistry.register(FeedDefinition('Dataset', S3Dataset)) + FeedRegistry.register(FeedDefinition('DatasetStorageLocation', DatasetStorageLocation, GET_DATASET_FOLDER)) + FeedRegistry.register(FeedDefinition('DatasetTable', DatasetTable, GET_DATASET_TABLE)) + FeedRegistry.register(FeedDefinition('Dataset', S3Dataset, GET_DATASET)) GlossaryRegistry.register( GlossaryDefinition( diff --git a/frontend/src/modules/Dashboards/views/DashboardView.js b/frontend/src/modules/Dashboards/views/DashboardView.js index 03abaa021..d093546e7 100644 --- a/frontend/src/modules/Dashboards/views/DashboardView.js +++ b/frontend/src/modules/Dashboards/views/DashboardView.js @@ -227,16 +227,18 @@ const DashboardView = () => { onClick={() => upVoteDashboard(dashboard.dashboardUri)} upVotes={upVotes || 0} /> - + {isAdmin && ( + + )} + {isAdmin && ( + + )}