Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix T5 model embedding generation #67

Merged
merged 2 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,15 @@ def startup_event():
if transformers_direct_tokenize is not None and transformers_direct_tokenize == "true" or transformers_direct_tokenize == "1":
direct_tokenize = True

meta_config = Meta('./models/model')
vec = Vectorizer('./models/model', cuda_support, cuda_core, cuda_per_process_memory_fraction,
def get_model_directory() -> str:
if os.path.exists("./models/model/model_name"):
with open("./models/model/model_name", "r") as f:
model_name = f.read()
return f"./models/model/{model_name}"
return "./models/model"

meta_config = Meta(get_model_directory())
vec = Vectorizer(get_model_directory(), cuda_support, cuda_core, cuda_per_process_memory_fraction,
meta_config.getModelType(), meta_config.get_architecture(), direct_tokenize)


Expand Down
13 changes: 3 additions & 10 deletions cicd/docker_push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,18 @@ set -eou pipefail
# - any commit is pushed as :<model>-latest
# - any commit is pushed as :<model>
git_hash=
pr=
remote_repo=${REMOTE_REPO?Variable REMOTE_REPO is required}
model_name=${MODEL_NAME?Variable MODEL_NAME is required}
original_model_name=$model_name
docker_username=${DOCKER_USERNAME?Variable DOCKER_USERNAME is required}
docker_password=${DOCKER_PASSWORD?Variable DOCKER_PASSWORD is required}
git_tag=${GITHUB_REF##*/}
original_model_name=$model_name
git_tag=$GITHUB_REF_NAME

function main() {
init
echo "git ref type is $GITHUB_REF_TYPE"
echo "git ref name is $GITHUB_REF_NAME"
echo "git branch is $GIT_BRANCH"
echo "git tag is $git_tag"
echo "pr is $pr"
push_tag
}

Expand All @@ -35,18 +32,14 @@ function init() {
fi

git_hash="$(git rev-parse HEAD | head -c 7)"
pr=false
if [ ! -z "$GIT_PULL_REQUEST" ]; then
pr="$GIT_PULL_REQUEST"
fi

docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
docker buildx create --use
echo "$docker_password" | docker login -u "$docker_username" --password-stdin
}

function push_tag() {
if [ ! -z "$git_tag" ]; then
if [ ! -z "$git_tag" ] && [ "$GITHUB_REF_TYPE" == "tag" ]; then
tag_git="$remote_repo:$model_name-$git_tag"
tag_latest="$remote_repo:$model_name-latest"
tag="$remote_repo:$model_name"
Expand Down
2 changes: 1 addition & 1 deletion cicd/docker_push_custom_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ git_hash=
remote_repo=${REMOTE_REPO?Variable REMOTE_REPO is required}
docker_username=${DOCKER_USERNAME?Variable DOCKER_USERNAME is required}
docker_password=${DOCKER_PASSWORD?Variable DOCKER_PASSWORD is required}
git_tag=${GITHUB_REF##*/}
git_tag=$GITHUB_REF_NAME

function main() {
init
Expand Down
35 changes: 22 additions & 13 deletions download.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
AutoTokenizer,
AutoConfig,
)
from sentence_transformers import SentenceTransformer


model_dir = './models/model'
model_name = os.getenv('MODEL_NAME', None)
force_automodel = os.getenv('FORCE_AUTOMODEL', False)
if not model_name:
Expand All @@ -22,21 +24,28 @@

print(f"Downloading model {model_name} from huggingface model hub")
config = AutoConfig.from_pretrained(model_name)
if config.architectures and not force_automodel:
print(f"Using class {config.architectures[0]} to load model weights")
mod = __import__('transformers', fromlist=[config.architectures[0]])
try:
klass_architecture = getattr(mod, config.architectures[0])
model = klass_architecture.from_pretrained(model_name)
except AttributeError:
print(f"{config.architectures[0]} not found in transformers, fallback to AutoModel")
model = AutoModel.from_pretrained(model_name)
model_type = config.to_dict()['model_type']

if model_type is not None and model_type == "t5":
SentenceTransformer(model_name, cache_folder=model_dir)
with open(f"{model_dir}/model_name", "w") as f:
f.write(model_name.replace("/", "_"))
else:
model = AutoModel.from_pretrained(model_name)
if config.architectures and not force_automodel:
print(f"Using class {config.architectures[0]} to load model weights")
mod = __import__('transformers', fromlist=[config.architectures[0]])
try:
klass_architecture = getattr(mod, config.architectures[0])
model = klass_architecture.from_pretrained(model_name)
except AttributeError:
print(f"{config.architectures[0]} not found in transformers, fallback to AutoModel")
model = AutoModel.from_pretrained(model_name)
else:
model = AutoModel.from_pretrained(model_name)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

model.save_pretrained('./models/model')
tokenizer.save_pretrained('./models/model')
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)

nltk.download('punkt', download_dir='./nltk_data')
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ uvicorn==0.21.1
nltk==3.8.1
torch==2.0.0
sentencepiece==0.1.97
sentence-transformers==2.2.2
pytest
9 changes: 5 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
transformers==4.29.2
fastapi==0.95.2
uvicorn==0.22.0
transformers==4.34.0
fastapi==0.103.2
uvicorn==0.23.2
nltk==3.8.1
torch==2.0.1
torch==2.1.0
sentencepiece==0.1.99
sentence-transformers==2.2.2
1 change: 1 addition & 0 deletions smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def try_to_vectorize(url):
# aware of 384 and 768 dim vectors, which should both fall in that
# range
self.assertTrue(len(resBody['vector']) > 100)
print(f"vector dimensions are: {len(resBody['vector'])}")

try_to_vectorize(self.url + "/vectors/")
try_to_vectorize(self.url + "/vectors")
Expand Down
43 changes: 36 additions & 7 deletions vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
DPRContextEncoder,
DPRQuestionEncoder,
)
from sentence_transformers import SentenceTransformer


# limit transformer batch size to limit parallel inference, otherwise we run
Expand All @@ -29,14 +30,47 @@ class VectorInput(BaseModel):
text: str
config: Optional[VectorInputConfig] = None


class Vectorizer:
executor: ThreadPoolExecutor

def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, model_type: str, architecture: str, direct_tokenize: bool):
self.executor = ThreadPoolExecutor()
if model_type == 't5':
self.vectorizer = SentenceTransformerVectorizer(model_path, cuda_core)
else:
self.vectorizer = HuggingFaceVectorizer(model_path, cuda_support, cuda_core, cuda_per_process_memory_fraction, model_type, architecture, direct_tokenize)

async def vectorize(self, text: str, config: VectorInputConfig):
return await asyncio.wrap_future(self.executor.submit(self.vectorizer.vectorize, text, config))


class SentenceTransformerVectorizer:
model: SentenceTransformer
cuda_core: str

def __init__(self, model_path: str, cuda_core: str):
self.cuda_core = cuda_core
self.model = SentenceTransformer(model_path, device=self.get_device())
self.model.eval() # make sure we're in inference mode, not training

def get_device(self) -> Optional[str]:
if self.cuda_core is not None and self.cuda_core != "":
return self.cuda_core
return None

def vectorize(self, text: str, config: VectorInputConfig):
embedding = self.model.encode([text], device=self.get_device(), convert_to_tensor=False, convert_to_numpy=True)
return embedding[0]


class HuggingFaceVectorizer:
model: AutoModel
tokenizer: AutoTokenizer
cuda: bool
cuda_core: str
model_type: str
direct_tokenize: bool
executor: ThreadPoolExecutor

def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, model_type: str, architecture: str, direct_tokenize: bool):
self.cuda = cuda_support
Expand All @@ -56,8 +90,6 @@ def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per

self.tokenizer = self.model_delegate.create_tokenizer(model_path)

self.executor = ThreadPoolExecutor()

nltk.data.path.append('./nltk_data')

def tokenize(self, text:str):
Expand All @@ -73,7 +105,7 @@ def get_batch_results(self, tokens, text):
def pool_embedding(self, batch_results, tokens, config):
return self.model_delegate.pool_embedding(batch_results, tokens, config)

def _vectorize(self, text: str, config: VectorInputConfig):
def vectorize(self, text: str, config: VectorInputConfig):
with torch.no_grad():
if self.direct_tokenize:
# create embeddings without tokenizing text
Expand All @@ -100,9 +132,6 @@ def _vectorize(self, text: str, config: VectorInputConfig):
batch_sum_vectors += self.pool_embedding(batch_results, tokens, config)
return batch_sum_vectors.detach() / num_sentences

async def vectorize(self, text: str, config: VectorInputConfig):
return await asyncio.wrap_future(self.executor.submit(self._vectorize, text, config))


class HFModel:

Expand Down