diff --git a/app.py b/app.py index 752e0c9..ed593d4 100644 --- a/app.py +++ b/app.py @@ -6,9 +6,9 @@ app = FastAPI() -vec : Vectorizer -meta_config : Meta -logger = getLogger('uvicorn') +vec: Vectorizer +meta_config: Meta +logger = getLogger("uvicorn") @app.on_event("startup") @@ -20,16 +20,22 @@ def startup_event(): cuda_per_process_memory_fraction = 1.0 if "CUDA_PER_PROCESS_MEMORY_FRACTION" in os.environ: try: - cuda_per_process_memory_fraction = float(os.getenv("CUDA_PER_PROCESS_MEMORY_FRACTION")) + cuda_per_process_memory_fraction = float( + os.getenv("CUDA_PER_PROCESS_MEMORY_FRACTION") + ) except ValueError: - logger.error(f"Invalid CUDA_PER_PROCESS_MEMORY_FRACTION (should be between 0.0-1.0)") + logger.error( + f"Invalid CUDA_PER_PROCESS_MEMORY_FRACTION (should be between 0.0-1.0)" + ) if 0.0 <= cuda_per_process_memory_fraction <= 1.0: - logger.info(f"CUDA_PER_PROCESS_MEMORY_FRACTION set to {cuda_per_process_memory_fraction}") - cuda_support=False - cuda_core="" + logger.info( + f"CUDA_PER_PROCESS_MEMORY_FRACTION set to {cuda_per_process_memory_fraction}" + ) + cuda_support = False + cuda_core = "" if cuda_env is not None and cuda_env == "true" or cuda_env == "1": - cuda_support=True + cuda_support = True cuda_core = os.getenv("CUDA_CORE") if cuda_core is None or cuda_core == "": cuda_core = "cuda:0" @@ -40,10 +46,15 @@ def startup_event(): # Batch text tokenization enabled by default direct_tokenize = False transformers_direct_tokenize = os.getenv("T2V_TRANSFORMERS_DIRECT_TOKENIZE") - if transformers_direct_tokenize is not None and transformers_direct_tokenize == "true" or transformers_direct_tokenize == "1": + if ( + transformers_direct_tokenize is not None + and transformers_direct_tokenize == "true" + or transformers_direct_tokenize == "1" + ): direct_tokenize = True model_dir = "./models/model" + def get_model_directory() -> (str, bool): if os.path.exists(f"{model_dir}/model_name"): with open(f"{model_dir}/model_name", "r") as f: @@ -65,17 +76,27 @@ def log_info_about_onnx(onnx_runtime: bool): if os.path.exists(f"{model_dir}/onnx_quantization_info"): with open(f"{model_dir}/onnx_quantization_info", "r") as f: onnx_quantization_info = f.read() - logger.info(f"Running ONNX vectorizer with quantized model for {onnx_quantization_info}") + logger.info( + f"Running ONNX vectorizer with quantized model for {onnx_quantization_info}" + ) model_name, use_sentence_transformer_vectorizer = get_model_directory() onnx_runtime = get_onnx_runtime() log_info_about_onnx(onnx_runtime) meta_config = Meta(model_dir, model_name, use_sentence_transformer_vectorizer) - vec = Vectorizer(model_dir, cuda_support, cuda_core, cuda_per_process_memory_fraction, - meta_config.get_model_type(), meta_config.get_architecture(), - direct_tokenize, onnx_runtime, use_sentence_transformer_vectorizer, - model_name) + vec = Vectorizer( + model_dir, + cuda_support, + cuda_core, + cuda_per_process_memory_fraction, + meta_config.get_model_type(), + meta_config.get_architecture(), + direct_tokenize, + onnx_runtime, + use_sentence_transformer_vectorizer, + model_name, + ) @app.get("/.well-known/live", response_class=Response) @@ -96,8 +117,6 @@ async def read_item(item: VectorInput, response: Response): vector = await vec.vectorize(item.text, item.config) return {"text": item.text, "vector": vector.tolist(), "dim": len(vector)} except Exception as e: - logger.exception( - 'Something went wrong while vectorizing data.' - ) + logger.exception("Something went wrong while vectorizing data.") response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR return {"error": str(e)} diff --git a/custom_prerequisites.py b/custom_prerequisites.py index 42a13ea..5f19d83 100755 --- a/custom_prerequisites.py +++ b/custom_prerequisites.py @@ -2,4 +2,4 @@ import nltk -nltk.download('punkt') +nltk.download("punkt") diff --git a/download.py b/download.py index 3c04630..0b025a6 100755 --- a/download.py +++ b/download.py @@ -91,7 +91,9 @@ def quantization_config(onnx_cpu_arch: str): os.remove(f"{model_dir}/model.onnx") # Save information about ONNX runtime save_to_file(f"{model_dir}/onnx_runtime", onnx_runtime) - tokenizer = AutoTokenizer.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained( + model_name, trust_remote_code=trust_remote_code + ) tokenizer.save_pretrained(onnx_path) diff --git a/requirements-test.txt b/requirements-test.txt index dfc5234..f8c6a0d 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,13 +1,14 @@ requests==2.32.3 -transformers==4.42.4 -fastapi==0.112.0 -uvicorn==0.30.5 +transformers==4.44.2 +fastapi==0.115.0 +uvicorn==0.31.0 nltk==3.9.1 -torch==2.4.0 +torch==2.4.1 sentencepiece==0.2.0 -sentence-transformers==3.0.1 -optimum==1.21.2 -onnxruntime==1.18.1 -onnx==1.16.2 +sentence-transformers==3.1.1 +optimum==1.22.0 +onnxruntime==1.19.2 +onnx==1.17.0 numpy==1.26.4 +einops==0.8.0 pytest diff --git a/requirements.txt b/requirements.txt index d2c8e98..d3f99d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,12 @@ -transformers==4.42.4 -fastapi==0.112.0 -uvicorn==0.30.5 +transformers==4.44.2 +fastapi==0.115.0 +uvicorn==0.31.0 nltk==3.9.1 -torch==2.4.0 +torch==2.4.1 sentencepiece==0.2.0 -sentence-transformers==3.0.1 -optimum==1.21.2 -onnxruntime==1.18.1 -onnx==1.16.2 +sentence-transformers==3.1.1 +optimum==1.22.0 +onnxruntime==1.19.2 +onnx==1.17.0 numpy==1.26.4 +einops==0.8.0 diff --git a/smoke_test.py b/smoke_test.py index 92a55a7..46f57a4 100755 --- a/smoke_test.py +++ b/smoke_test.py @@ -5,11 +5,11 @@ class SmokeTest(unittest.TestCase): def setUp(self): - self.url = 'http://localhost:8000' + self.url = "http://localhost:8000" for i in range(0, 100): try: - res = requests.get(self.url + '/.well-known/ready') + res = requests.get(self.url + "/.well-known/ready") if res.status_code == 204: return else: @@ -21,17 +21,17 @@ def setUp(self): raise Exception("did not start up") def test_well_known_ready(self): - res = requests.get(self.url + '/.well-known/ready') + res = requests.get(self.url + "/.well-known/ready") self.assertEqual(res.status_code, 204) def test_well_known_live(self): - res = requests.get(self.url + '/.well-known/live') + res = requests.get(self.url + "/.well-known/live") self.assertEqual(res.status_code, 204) def test_meta(self): - res = requests.get(self.url + '/meta') + res = requests.get(self.url + "/meta") self.assertEqual(res.status_code, 200) self.assertIsInstance(res.json(), dict) @@ -39,7 +39,7 @@ def test_meta(self): def test_vectorizing(self): def try_to_vectorize(url): print(f"url: {url}") - req_body = {'text': 'The London Eye is a ferris wheel at the River Thames.'} + req_body = {"text": "The London Eye is a ferris wheel at the River Thames."} res = requests.post(url, json=req_body) resBody = res.json() @@ -49,7 +49,7 @@ def try_to_vectorize(url): # below tests that what we deem a reasonable vector is returned. We are # aware of 384 and 768 dim vectors, which should both fall in that # range - self.assertTrue(len(resBody['vector']) > 100) + self.assertTrue(len(resBody["vector"]) > 100) print(f"vector dimensions are: {len(resBody['vector'])}") try_to_vectorize(self.url + "/vectors/") diff --git a/test_app.py b/test_app.py index f088cb1..656cfb0 100644 --- a/test_app.py +++ b/test_app.py @@ -11,7 +11,7 @@ def wait_for_uvicorn_start(): - url = 'http://localhost:8000/.well-known/ready' + url = "http://localhost:8000/.well-known/ready" for i in range(0, 100): try: @@ -19,8 +19,7 @@ def wait_for_uvicorn_start(): if res.status_code == 204: return else: - raise Exception( - "status code is {}".format(res.status_code)) + raise Exception("status code is {}".format(res.status_code)) except Exception as e: print("Attempt {}: {}".format(i, e)) time.sleep(2) @@ -32,10 +31,15 @@ def run_server(): uvicorn.run(app) -@pytest.fixture(params=["t5-small", - "distilroberta-base", - "vblagoje/dpr-ctx_encoder-single-lfqa-wiki", - "vblagoje/dpr-question_encoder-single-lfqa-wiki"], scope="function") +@pytest.fixture( + params=[ + "t5-small", + "distilroberta-base", + "vblagoje/dpr-ctx_encoder-single-lfqa-wiki", + "vblagoje/dpr-question_encoder-single-lfqa-wiki", + ], + scope="function", +) def server(request): os.environ["MODEL_NAME"] = request.param subprocess.call("python download.py", shell=True) @@ -48,12 +52,12 @@ def server(request): def test_vectorizing(server): wait_for_uvicorn_start() - url = 'http://127.0.0.1:8000/vectors/' - req_body = {'text': 'The London Eye is a ferris wheel at the River Thames.'} + url = "http://127.0.0.1:8000/vectors/" + req_body = {"text": "The London Eye is a ferris wheel at the River Thames."} res = requests.post(url, json=req_body) resBody = res.json() - vectorized_text = resBody['vector'] + vectorized_text = resBody["vector"] assert 200 == res.status_code @@ -66,14 +70,15 @@ def test_vectorizing(server): # now let's try two sentences - req_body = {'text': 'The London Eye is a ferris wheel at the River Thames. Here is the second sentence.'} + req_body = { + "text": "The London Eye is a ferris wheel at the River Thames. Here is the second sentence." + } res = requests.post(url, json=req_body) resBody = res.json() - vectorized_text = resBody['vector'] + vectorized_text = resBody["vector"] assert 200 == res.status_code assert type(vectorized_text) is list assert 128 <= len(vectorized_text) <= 1024 - diff --git a/vectorizer.py b/vectorizer.py index 75e0a74..b284bde 100644 --- a/vectorizer.py +++ b/vectorizer.py @@ -11,16 +11,22 @@ from optimum.onnxruntime import ORTModelForFeatureExtraction from pydantic import BaseModel from sentence_transformers import SentenceTransformer -from transformers import (AutoModel, AutoTokenizer, DPRContextEncoder, - DPRQuestionEncoder, T5ForConditionalGeneration, - T5Tokenizer) +from transformers import ( + AutoModel, + AutoTokenizer, + DPRContextEncoder, + DPRQuestionEncoder, + T5ForConditionalGeneration, + T5Tokenizer, +) from config import TRUST_REMOTE_CODE # limit transformer batch size to limit parallel inference, otherwise we run # into memory problems MAX_BATCH_SIZE = 25 # TODO: take from config -DEFAULT_POOL_METHOD="masked_mean" +DEFAULT_POOL_METHOD = "masked_mean" + class VectorInputConfig(BaseModel): pooling_strategy: str @@ -34,20 +40,42 @@ class VectorInput(BaseModel): class Vectorizer: executor: ThreadPoolExecutor - def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, - model_type: str, architecture: str, direct_tokenize: bool, onnx_runtime: bool, - use_sentence_transformer_vectorizer: bool, model_name: str): + def __init__( + self, + model_path: str, + cuda_support: bool, + cuda_core: str, + cuda_per_process_memory_fraction: float, + model_type: str, + architecture: str, + direct_tokenize: bool, + onnx_runtime: bool, + use_sentence_transformer_vectorizer: bool, + model_name: str, + ): self.executor = ThreadPoolExecutor() if onnx_runtime: self.vectorizer = ONNXVectorizer(model_path) else: - if model_type == 't5' or use_sentence_transformer_vectorizer: - self.vectorizer = SentenceTransformerVectorizer(model_path, model_name, cuda_core) + if model_type == "t5" or use_sentence_transformer_vectorizer: + self.vectorizer = SentenceTransformerVectorizer( + model_path, model_name, cuda_core + ) else: - self.vectorizer = HuggingFaceVectorizer(model_path, cuda_support, cuda_core, cuda_per_process_memory_fraction, model_type, architecture, direct_tokenize) + self.vectorizer = HuggingFaceVectorizer( + model_path, + cuda_support, + cuda_core, + cuda_per_process_memory_fraction, + model_type, + architecture, + direct_tokenize, + ) async def vectorize(self, text: str, config: VectorInputConfig): - return await asyncio.wrap_future(self.executor.submit(self.vectorizer.vectorize, text, config)) + return await asyncio.wrap_future( + self.executor.submit(self.vectorizer.vectorize, text, config) + ) class SentenceTransformerVectorizer: @@ -56,8 +84,10 @@ class SentenceTransformerVectorizer: def __init__(self, model_path: str, model_name: str, cuda_core: str): self.cuda_core = cuda_core - self.model = SentenceTransformer(model_name, cache_folder=model_path, device=self.get_device()) - self.model.eval() # make sure we're in inference mode, not training + self.model = SentenceTransformer( + model_name, cache_folder=model_path, device=self.get_device() + ) + self.model.eval() # make sure we're in inference mode, not training def get_device(self) -> Optional[str]: if self.cuda_core is not None and self.cuda_core != "": @@ -65,7 +95,12 @@ def get_device(self) -> Optional[str]: return None def vectorize(self, text: str, config: VectorInputConfig): - embedding = self.model.encode([text], device=self.get_device(), convert_to_tensor=False, convert_to_numpy=True) + embedding = self.model.encode( + [text], + device=self.get_device(), + convert_to_tensor=False, + convert_to_numpy=True, + ) return embedding[0] @@ -75,23 +110,38 @@ class ONNXVectorizer: def __init__(self, model_path) -> None: onnx_path = Path(model_path) - self.model = ORTModelForFeatureExtraction.from_pretrained(onnx_path, file_name="model_quantized.onnx", - trust_remote_code=TRUST_REMOTE_CODE) - self.tokenizer = AutoTokenizer.from_pretrained(onnx_path, trust_remote_code=TRUST_REMOTE_CODE) + self.model = ORTModelForFeatureExtraction.from_pretrained( + onnx_path, + file_name="model_quantized.onnx", + trust_remote_code=TRUST_REMOTE_CODE, + ) + self.tokenizer = AutoTokenizer.from_pretrained( + onnx_path, trust_remote_code=TRUST_REMOTE_CODE + ) def mean_pooling(self, model_output, attention_mask): - token_embeddings = model_output[0] #First element of model_output contains all token embeddings - input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() - return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) + token_embeddings = model_output[ + 0 + ] # First element of model_output contains all token embeddings + input_mask_expanded = ( + attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + ) + return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( + input_mask_expanded.sum(1), min=1e-9 + ) def vectorize(self, text: str, config: VectorInputConfig): - encoded_input = self.tokenizer([text], padding=True, truncation=True, return_tensors='pt') + encoded_input = self.tokenizer( + [text], padding=True, truncation=True, return_tensors="pt" + ) # Compute token embeddings with torch.no_grad(): model_output = self.model(**encoded_input) # Perform pooling - sentence_embeddings = self.mean_pooling(model_output, encoded_input['attention_mask']) + sentence_embeddings = self.mean_pooling( + model_output, encoded_input["attention_mask"] + ) # Normalize embeddings sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) @@ -106,29 +156,48 @@ class HuggingFaceVectorizer: model_type: str direct_tokenize: bool - def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, model_type: str, architecture: str, direct_tokenize: bool): + def __init__( + self, + model_path: str, + cuda_support: bool, + cuda_core: str, + cuda_per_process_memory_fraction: float, + model_type: str, + architecture: str, + direct_tokenize: bool, + ): self.cuda = cuda_support self.cuda_core = cuda_core self.cuda_per_process_memory_fraction = cuda_per_process_memory_fraction self.model_type = model_type self.direct_tokenize = direct_tokenize - self.model_delegate: HFModel = ModelFactory.model(model_type, architecture, cuda_support, cuda_core) + self.model_delegate: HFModel = ModelFactory.model( + model_type, architecture, cuda_support, cuda_core + ) self.model = self.model_delegate.create_model(model_path) if self.cuda: self.model.to(self.cuda_core) if self.cuda_per_process_memory_fraction: - torch.cuda.set_per_process_memory_fraction(self.cuda_per_process_memory_fraction) - self.model.eval() # make sure we're in inference mode, not training + torch.cuda.set_per_process_memory_fraction( + self.cuda_per_process_memory_fraction + ) + self.model.eval() # make sure we're in inference mode, not training self.tokenizer = self.model_delegate.create_tokenizer(model_path) - nltk.data.path.append('./nltk_data') + nltk.data.path.append("./nltk_data") - def tokenize(self, text:str): - return self.tokenizer(text, padding=True, truncation=True, max_length=500, - add_special_tokens = True, return_tensors="pt") + def tokenize(self, text: str): + return self.tokenizer( + text, + padding=True, + truncation=True, + max_length=500, + add_special_tokens=True, + return_tensors="pt", + ) def get_embeddings(self, batch_results): return self.model_delegate.get_embeddings(batch_results) @@ -151,7 +220,11 @@ def vectorize(self, text: str, config: VectorInputConfig): return batch_sum_vectors.detach() else: # tokenize text - sentences = sent_tokenize(' '.join(text.split(),)) + sentences = sent_tokenize( + " ".join( + text.split(), + ) + ) num_sentences = len(sentences) number_of_batch_vectors = math.ceil(num_sentences / MAX_BATCH_SIZE) batch_sum_vectors = 0 @@ -162,8 +235,12 @@ def vectorize(self, text: str, config: VectorInputConfig): tokens = self.tokenize(sentences[start_index:end_index]) if self.cuda: tokens.to(self.cuda_core) - batch_results = self.get_batch_results(tokens, sentences[start_index:end_index]) - batch_sum_vectors += self.pool_embedding(batch_results, tokens, config) + batch_results = self.get_batch_results( + tokens, sentences[start_index:end_index] + ) + batch_sum_vectors += self.pool_embedding( + batch_results, tokens, config + ) return batch_sum_vectors.detach() / num_sentences @@ -177,11 +254,15 @@ def __init__(self, cuda_support: bool, cuda_core: str): self.cuda_core = cuda_core def create_tokenizer(self, model_path): - self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) + self.tokenizer = AutoTokenizer.from_pretrained( + model_path, trust_remote_code=TRUST_REMOTE_CODE + ) return self.tokenizer def create_model(self, model_path): - self.model = AutoModel.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) + self.model = AutoModel.from_pretrained( + model_path, trust_remote_code=TRUST_REMOTE_CODE + ) return self.model def get_embeddings(self, batch_results): @@ -195,7 +276,9 @@ def pool_embedding(self, batch_results, tokens, config: VectorInputConfig): if pooling_method == "cls": return self.get_embeddings(batch_results)[:, 0, :].sum(0) elif pooling_method == "masked_mean": - return self.pool_sum(self.get_embeddings(batch_results), tokens['attention_mask']) + return self.pool_sum( + self.get_embeddings(batch_results), tokens["attention_mask"] + ) else: raise Exception(f"invalid pooling method '{pooling_method}'") @@ -210,8 +293,12 @@ def pool_method_from_config(self, config: VectorInputConfig): def get_sum_embeddings_mask(self, embeddings, input_mask_expanded): if self.cuda: - sum_embeddings = torch.sum(embeddings * input_mask_expanded, 1).to(self.cuda_core) - sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9).to(self.cuda_core) + sum_embeddings = torch.sum(embeddings * input_mask_expanded, 1).to( + self.cuda_core + ) + sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9).to( + self.cuda_core + ) return sum_embeddings, sum_mask else: sum_embeddings = torch.sum(embeddings * input_mask_expanded, 1) @@ -219,8 +306,12 @@ def get_sum_embeddings_mask(self, embeddings, input_mask_expanded): return sum_embeddings, sum_mask def pool_sum(self, embeddings, attention_mask): - input_mask_expanded = attention_mask.unsqueeze(-1).expand(embeddings.size()).float() - sum_embeddings, sum_mask = self.get_sum_embeddings_mask(embeddings, input_mask_expanded) + input_mask_expanded = ( + attention_mask.unsqueeze(-1).expand(embeddings.size()).float() + ) + sum_embeddings, sum_mask = self.get_sum_embeddings_mask( + embeddings, input_mask_expanded + ) sentences = sum_embeddings / sum_mask return sentences.sum(0) @@ -234,13 +325,17 @@ def __init__(self, architecture: str, cuda_support: bool, cuda_core: str): def create_model(self, model_path): if self.architecture == "DPRQuestionEncoder": - self.model = DPRQuestionEncoder.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) + self.model = DPRQuestionEncoder.from_pretrained( + model_path, trust_remote_code=TRUST_REMOTE_CODE + ) else: - self.model = DPRContextEncoder.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) + self.model = DPRContextEncoder.from_pretrained( + model_path, trust_remote_code=TRUST_REMOTE_CODE + ) return self.model def get_batch_results(self, tokens, text): - return self.model(tokens['input_ids'], tokens['attention_mask']) + return self.model(tokens["input_ids"], tokens["attention_mask"]) def pool_embedding(self, batch_results, tokens, config: VectorInputConfig): # no pooling needed for DPR @@ -257,18 +352,22 @@ def __init__(self, cuda_support: bool, cuda_core: str): self.cuda_core = cuda_core def create_model(self, model_path): - self.model = T5ForConditionalGeneration.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) + self.model = T5ForConditionalGeneration.from_pretrained( + model_path, trust_remote_code=TRUST_REMOTE_CODE + ) return self.model def create_tokenizer(self, model_path): - self.tokenizer = T5Tokenizer.from_pretrained(model_path, trust_remote_code=TRUST_REMOTE_CODE) + self.tokenizer = T5Tokenizer.from_pretrained( + model_path, trust_remote_code=TRUST_REMOTE_CODE + ) return self.tokenizer def get_embeddings(self, batch_results): return batch_results["encoder_last_hidden_state"] def get_batch_results(self, tokens, text): - input_ids, attention_mask = tokens['input_ids'], tokens['attention_mask'] + input_ids, attention_mask = tokens["input_ids"], tokens["attention_mask"] target_encoding = self.tokenizer( text, padding="longest", max_length=500, truncation=True @@ -279,16 +378,18 @@ def get_batch_results(self, tokens, text): else: labels = torch.tensor(labels) - return self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels) + return self.model( + input_ids=input_ids, attention_mask=attention_mask, labels=labels + ) class ModelFactory: @staticmethod def model(model_type, architecture, cuda_support: bool, cuda_core: str): - if model_type == 't5': + if model_type == "t5": return T5Model(cuda_support, cuda_core) - elif model_type == 'dpr': + elif model_type == "dpr": return DPRModel(architecture, cuda_support, cuda_core) else: return HFModel(cuda_support, cuda_core)