-
Notifications
You must be signed in to change notification settings - Fork 135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix several accidentally quadratic functions #800
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,3 +73,6 @@ def __init__( | |
comment: Optional[str] = None, | ||
): | ||
check_types_and_set_values(self, locals()) | ||
|
||
def __hash__(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
return hash((self.spdx_element_id, self.related_spdx_element_id, self.relationship_type)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
# SPDX-FileCopyrightText: 2022 spdx contributors | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
from beartype.typing import Dict, List, Optional | ||
from beartype.typing import Dict, List, Optional, Set | ||
|
||
from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors | ||
from spdx_tools.spdx.model import Relationship, RelationshipType | ||
|
@@ -35,9 +35,9 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]: | |
document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", [])) | ||
doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID") | ||
|
||
existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments( | ||
existing_relationships_without_comments: Set[Relationship] = set(self.get_all_relationships_without_comments( | ||
relationships | ||
) | ||
)) | ||
relationships.extend( | ||
parse_field_or_log_error( | ||
self.logger, | ||
|
@@ -52,9 +52,6 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]: | |
) | ||
|
||
package_dicts: List[Dict] = input_doc_dict.get("packages", []) | ||
existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit confused by the removal of this code. The semantics will be different between this and previous call right? This one would include the additional relationships found from the relationships.extend call above. To preserve the previous behavior, it seems that you would instead want to simply retrieve the set identified by parser_field_or_log_error, and then extend relationships with that set. But then you could also update the existing_relationships_without_comments from that new set as well. The question is whether you are purposefully changing the semantics here or not. |
||
relationships | ||
) | ||
|
||
relationships.extend( | ||
parse_field_or_log_error( | ||
|
@@ -110,7 +107,7 @@ def parse_relationship_type(relationship_type_str: str) -> RelationshipType: | |
return relationship_type | ||
|
||
def parse_document_describes( | ||
self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: List[Relationship] | ||
self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: Set[Relationship] | ||
) -> List[Relationship]: | ||
logger = Logger() | ||
describes_relationships = [] | ||
|
@@ -131,10 +128,11 @@ def parse_document_describes( | |
return describes_relationships | ||
|
||
def parse_has_files( | ||
self, package_dicts: List[Dict], existing_relationships: List[Relationship] | ||
self, package_dicts: List[Dict], existing_relationships: Set[Relationship] | ||
) -> List[Relationship]: | ||
# assume existing relationships are stripped of comments | ||
logger = Logger() | ||
|
||
contains_relationships = [] | ||
for package in package_dicts: | ||
package_spdx_id: Optional[str] = package.get("SPDXID") | ||
|
@@ -160,7 +158,7 @@ def parse_has_files( | |
return contains_relationships | ||
|
||
def check_if_relationship_exists( | ||
self, relationship: Relationship, existing_relationships: List[Relationship] | ||
self, relationship: Relationship, existing_relationships: Set[Relationship] | ||
) -> bool: | ||
# assume existing relationships are stripped of comments | ||
if relationship in existing_relationships: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
|
||
import re | ||
|
||
from beartype.typing import List | ||
from beartype.typing import List, Set | ||
|
||
from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids | ||
from spdx_tools.spdx.model import Document, File | ||
|
@@ -23,11 +23,15 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool: | |
|
||
|
||
def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool: | ||
all_spdx_ids_in_document: List[str] = get_list_of_all_spdx_ids(document) | ||
all_spdx_ids_in_document: Set[str] = get_all_spdx_ids(document) | ||
|
||
return spdx_id in all_spdx_ids_in_document | ||
|
||
|
||
def get_all_spdx_ids(document: Document) -> Set[str]: | ||
return set(get_list_of_all_spdx_ids(document)) | ||
|
||
|
||
def get_list_of_all_spdx_ids(document: Document) -> List[str]: | ||
all_spdx_ids_in_document: List[str] = [document.creation_info.spdx_id] | ||
all_spdx_ids_in_document.extend(get_contained_spdx_element_ids(document)) | ||
|
@@ -44,7 +48,7 @@ def is_external_doc_ref_present_in_document(external_ref_id: str, document: Docu | |
|
||
|
||
def validate_spdx_id( | ||
spdx_id: str, document: Document, check_document: bool = False, check_files: bool = False | ||
spdx_id: str, document: Document, all_spdx_ids: Set[str], check_document: bool = False, check_files: bool = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should you not also have an files_spdx_ids: Set[str] argument for the check_files case? My SPDX file has over 70K entries. The check_document seems redundant here, since you could simply make all_spdx_ids: Optional[Set[str]] instead and go by whether it was passed as None or not. |
||
) -> List[str]: | ||
"""Test that the given spdx_id (and a potential DocumentRef to an external document) is valid | ||
and, if it is a reference, actually exists in the document. Optionally checks files or the whole document | ||
|
@@ -87,7 +91,7 @@ def validate_spdx_id( | |
) | ||
|
||
if check_document: | ||
if not is_spdx_id_present_in_document(spdx_id, document): | ||
if not spdx_id in all_spdx_ids: | ||
validation_messages.append(f'did not find the referenced spdx_id "{spdx_id}" in the SPDX document') | ||
|
||
if check_files: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is not a correct hash of the relationship. One might instead return the triple and use it for lookups in the cache