Skip to content
This repository has been archived by the owner on Apr 27, 2021. It is now read-only.

Commit

Permalink
Release beta 20
Browse files Browse the repository at this point in the history
  • Loading branch information
hannes-ucsc committed Oct 8, 2019
2 parents 37b621f + 1536ce3 commit 5790e16
Show file tree
Hide file tree
Showing 7 changed files with 1,172 additions and 17 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/label.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: Add `orange` label to new issues
on: [issues]
jobs:
label:
runs-on: ubuntu-latest
steps:
- uses: HumanCellAtlas/azul-github-labeler-action@releases/v1
with:
repo-token: "${{secrets.GITHUB_TOKEN}}"
label: orange
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="hca-metadata-api",
version="1.0b19",
version="1.0b20",
license='MIT',
install_requires=[
'dataclasses >= 0.6'
Expand All @@ -11,7 +11,8 @@
extras_require={
"dss": [
'hca == 5.2.0',
'urllib3 >= 1.23'
'urllib3 >= 1.23',
'requests >= 2.19.1'
],
"examples": [
'jupyter >= 1.0.0'
Expand Down
2 changes: 2 additions & 0 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,13 +588,15 @@ class File(LinkedEntity):
from_processes: MutableMapping[UUID4, Process] = field(repr=False)
to_processes: MutableMapping[UUID4, Process]
manifest_entry: ManifestEntry
content_description: Set[str]

def __init__(self, json: JSON, manifest: Mapping[str, ManifestEntry]):
super().__init__(json)
content = json.get('content', json)
core = content['file_core']
self.format = lookup(core, 'format', 'file_format')
self.manifest_entry = manifest[core['file_name']]
self.content_description = {ontology_label(cd) for cd in core.get('content_description', [])}
self.from_processes = {}
self.to_processes = {}

Expand Down
70 changes: 63 additions & 7 deletions src/humancellatlas/data/metadata/helpers/dss.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,37 @@
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache
import logging
from typing import List, Optional, Tuple
import os
from typing import (
List,
Optional,
Tuple,
Mapping,
Any,
)
from unittest.mock import patch

from hca.dss import DSSClient
from requests import Session
from urllib3 import Timeout

from humancellatlas.data.metadata.api import JSON

logger = logging.getLogger(__name__)


@lru_cache(maxsize=1)
def default_num_workers():
return os.cpu_count() * 5


def download_bundle_metadata(client: DSSClient,
replica: str,
uuid: str,
version: Optional[str] = None,
directurls: bool = False,
presignedurls: bool = False,
num_workers: Optional[int] = None) -> Tuple[str, List[JSON], JSON]:
num_workers: Optional[int] = default_num_workers()) -> Tuple[str, List[JSON], JSON]:
"""
Download the metadata for a given bundle from the HCA data store (DSS).
Expand All @@ -39,8 +54,8 @@ def download_bundle_metadata(client: DSSClient,
exclusive with the directurls parameter. Note this parameter, similar to the `directurls`,
is a temporary parameter, and it's not guaranteed to stay in this place in the future.
:param num_workers: The size of the thread pool to use for downloading metadata files in parallel. If None, the
default pool size will be used, typically a small multiple of the number of cores on the system
:param num_workers: The size of the thread pool to use for downloading metadata files in parallel. If absent, the
default pool size will be used, a small multiple of the number of cores on the system
executing this function. If 0, no thread pool will be used and all files will be downloaded
sequentially by the current thread.
Expand Down Expand Up @@ -110,14 +125,55 @@ def download_file(item):
return bundle['version'], manifest, dict(metadata_files)


def dss_client(deployment: str = 'prod') -> DSSClient:
def dss_client(deployment: str = 'prod', num_workers: int = default_num_workers()) -> DSSClient:
"""
Return a DSS client to DSS production or the specified DSS deployment.
:param deployment: The name of a DSS deployment like `dev`, `integration`, `staging` or `prod`.
:param deployment: The name of a DSS deployment like `dev`, `integration`,
`staging` or `prod`.
:param num_workers: The number of threads that will be using this client.
This value used to adequately size the HTTP connection
pool which avoids discarding connections unnecessarily
as indicated by the accompanying `Connection pool is
full, discarding connection` warning.
"""
deployment = "" if deployment == "prod" else deployment + "."
swagger_url = f'https://dss.{deployment}data.humancellatlas.org/v1/swagger.json'
client = DSSClient(swagger_url=swagger_url)
client = _DSSClient(swagger_url=swagger_url,
adapter_args=None if num_workers is None else dict(pool_maxsize=num_workers))
client.timeout_policy = Timeout(connect=10, read=40)
return client


class _DSSClient(DSSClient):
"""
A DSSClient with certain extensions and fixes.
"""

def __init__(self, *args, adapter_args: Optional[Mapping[str, Any]] = None, **kwargs):
"""
Pass `adapter_args=dict(pool_maxsize=num_threads)` in order to avoid the resource warnings.
:param args: positional arguments to pass to DSSClient constructor
:param adapter_args: optional keyword arguments to request's HTTPAdapter class
:param kwargs: keyword arguments to pass to DSSClient constructor
"""
self._adapter_args = adapter_args # yes, this must come first
super().__init__(*args, **kwargs)

def _set_retry_policy(self, session: Session):
if self._adapter_args is None:
super()._set_retry_policy(session)
else:
from requests.sessions import HTTPAdapter

class MyHTTPAdapter(HTTPAdapter):

# noinspection PyMethodParameters
def __init__(self_, *args, **kwargs):
kwargs.update(self._adapter_args)
super().__init__(*args, **kwargs)

with patch('hca.util.HTTPAdapter', new=MyHTTPAdapter):
super()._set_retry_policy(session)
Loading

0 comments on commit 5790e16

Please sign in to comment.