Skip to content

Commit

Permalink
gwellianau ar gyfer 22.06/CV9 improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
DewiBrynJones committed Jun 16, 2022
1 parent 513be4f commit a286b97
Show file tree
Hide file tree
Showing 20 changed files with 358 additions and 311 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
train/models
train/homedir
train/logs
train/data
*pycache*
*.json
*.pid
*.log
*.lock
gh
18 changes: 1 addition & 17 deletions inference/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,4 @@
FROM ubuntu:20.04
MAINTAINER Uned Technolegau Iaith, Prifysgol Bangor University, Language Technologies Unit

LABEL maintainer="techiaith"
LABEL repository="wav2vec2-xlsr-ft-cy"

ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=Europe/London

RUN apt update -q \
&& apt install -y -qq tzdata bash build-essential git curl wget software-properties-common \
vim ca-certificates libffi-dev libssl-dev libsndfile1 libbz2-dev liblzma-dev locales \
libboost-all-dev libboost-tools-dev libboost-thread-dev cmake \
python python3 python3-pip python3-setuptools python3-dev curl zip zlib1g-dev vim \
ffmpeg sox alsa-utils \
&& python3 -m pip install --upgrade pip

FROM techiaith/wav2vec2-xlsr-ft-cy-device

# gosod YouTube downloader
RUN wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl \
Expand Down
54 changes: 54 additions & 0 deletions inference/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
FROM ubuntu:20.04
MAINTAINER Uned Technolegau Iaith, Prifysgol Bangor University, Language Technologies Unit

LABEL maintainer="techiaith"
LABEL repository="wav2vec2-xlsr-ft-cy"

ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=Europe/London

RUN apt update -q \
&& apt install -y -qq tzdata bash build-essential git curl wget software-properties-common \
vim ca-certificates libffi-dev libssl-dev libsndfile1 libbz2-dev liblzma-dev locales \
libboost-all-dev libboost-tools-dev libboost-thread-dev cmake \
python python3 python3-pip python3-setuptools python3-dev curl zip zlib1g-dev vim \
ffmpeg sox alsa-utils \
&& python3 -m pip install --upgrade pip


# gosod YouTube downloader
RUN wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl \
&& chmod a+rx /usr/local/bin/youtube-dl

#
ARG MODEL_VERSION
ARG WAV2VEC2_MODEL_NAME

#
ENV MODEL_VERSION=${MODEL_VERSION}
ENV WAV2VEC2_MODEL_NAME=${WAV2VEC2_MODEL_NAME}

# Set the locale
RUN locale-gen cy_GB.UTF-8
ENV LANG cy_GB.UTF-8
ENV LANGUAGE cy_GB:en
ENV LC_ALL cy_GB.UTF-8

# Install local Python files and dependencies..
RUN mkdir -p /wav2vec2

WORKDIR /wav2vec2

COPY python/requirements.txt /wav2vec2/
RUN pip3 install -r requirements.txt

ENV PATH="${PATH}:/wav2vec2"
ENV PYTHONPATH "${PYTHONPATH}:/wav2vec2"

# install ctc-decode
RUN git clone --recursive https://github.com/parlance/ctcdecode.git /tmp/ctcdecode \
&& cd /tmp/ctcdecode && pip3 install .

COPY python /wav2vec2/
COPY speech.wav /wav2vec2/

15 changes: 15 additions & 0 deletions inference/Dockerfile.gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04

LABEL maintainer="techiaith"
LABEL repository="wav2vec2-xlsr-ft-cy"

ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=Europe/London

RUN apt update -q \
&& apt install -y -qq tzdata bash build-essential git curl wget software-properties-common \
vim ca-certificates libffi-dev libssl-dev libsndfile1 libbz2-dev liblzma-dev locales \
libboost-all-dev libboost-tools-dev libboost-thread-dev cmake \
python python3 python3-pip python3-setuptools python3-dev curl zip zlib1g-dev vim \
ffmpeg sox alsa-utils \
&& python3 -m pip install --upgrade pip
22 changes: 12 additions & 10 deletions inference/Makefile
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
default: build

$(eval DEVICE = cpu)
#$(eval DEVICE = gpu)
#$(eval DEVICE = cpu)
$(eval DEVICE = gpu)

config:
# to use a local model, provide the full /models/.... path for WAV2VEC2_MODEL_NAME and
# leave the MODEL_VERSION blank empty string.
$(eval WAV2VEC2_MODEL_NAME = techiaith/wav2vec2-xlsr-ft-cy)
$(eval MODEL_VERSION = 21.08)
$(eval WAV2VEC2_MODEL_NAME = techiaith/wav2vec2-xls-r-1b-ft-cy)
$(eval MODEL_VERSION = 22.06)
mkdir -p ${PWD}/data/


build: config
docker build --rm -f Dockerfile.${DEVICE} -t techiaith/wav2vec2-xlsr-ft-cy-device .
docker build --rm -t techiaith/wav2vec2-xlsr-ft-cy \
--build-arg WAV2VEC2_MODEL_NAME=${WAV2VEC2_MODEL_NAME} \
--build-arg MODEL_VERSION=${MODEL_VERSION} \
.


build-user: config
docker build --rm -t techiaith/wav2vec2-xlsr-ft-cy-${USER} \
--build-arg WAV2VEC2_MODEL_NAME=${WAV2VEC2_MODEL_NAME} \
--build-arg MODEL_VERSION=${MODEL_VERSION} \
Expand All @@ -36,12 +44,6 @@ run-cpu:
techiaith/wav2vec2-xlsr-ft-cy-${USER}


fetch-test:
if [ ! -d "data/corpws-profi-adnabod-lleferydd" ]; then \
mkdir -p data; \
cd data && git clone -b fersiwn2 --single-branch https://git.techiaith.bangor.ac.uk/data-porth-technolegau-iaith/corpws-profi-adnabod-lleferydd.git; \
fi

stop: config
-docker stop techiaith-wav2vec2-xlsr-ft-cy-${USER}
-docker rm techiaith-wav2vec2-xlsr-ft-cy-${USER}
Expand Down
29 changes: 19 additions & 10 deletions inference/python/speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,31 +28,40 @@ class SpeechToText:

def __init__(self, models_root_dir='', wav2vec2_model_path='', version='', language_model_path='', ctc_with_lm=False):

self.device = "cpu"
if torch.cuda.is_available():
self.device="cuda"

print ("wav2vec loading to device %s" % self.device)

if len(wav2vec2_model_path)==0:
self.wav2vec2_model_path = os.environ["WAV2VEC2_MODEL_NAME"]

# @todo - improve.
if len(language_model_path)==0:
self.language_model_path = os.path.join(os.environ["WAV2VEC2_MODEL_NAME"], "kenlm")

#
if len(version)==0:
self.version=os.environ["MODEL_VERSION"]

#
self.processor, self.model, self.vocab, self.ctcdecoder, self.kenlm_ctcdecoder = models.create(self.wav2vec2_model_path, self.version)


self.device = "cpu"
if torch.cuda.is_available():
self.device="cuda"
self.model.cuda()

def model_name():
print ("wav2vec loaded to device %s" % self.device)



def get_model_name(self):
return self.wav2vec2_model_path

def language_model():
def get_language_model(self):
return self.language_model_path

def model_version():
def get_model_version(self):
return self.version

def get_device(self):
return self.device

def split_frames(self, frames, aggressiveness):

Expand Down
142 changes: 0 additions & 142 deletions inference/python/test.py

This file was deleted.

4 changes: 2 additions & 2 deletions inference/server/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ build: config
run: config
mkdir -p ${PWD}/log/
mkdir -p ${PWD}/recordings/
docker run --name techiaith-wav2vec2-xlsr-ft-server-cy \
docker run --gpus all --name techiaith-wav2vec2-xlsr-ft-server-cy \
--restart=always \
-it -d -p ${PORT_NUMBER}:8008 \
-v ${PWD}/recordings/:/recordings \
-v ${PWD}/../models/:/models \
-v ${PWD}/../recordings/:/recordings \
-v ${PWD}/log/:/var/log/wav2vec2 \
techiaith/wav2vec2-xlsr-ft-server-cy

Expand Down
13 changes: 8 additions & 5 deletions inference/server/python/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ def index(self):
def versions(self):
result = {
'version': 1,
'model_name': self.stt.acoustic_model,
'language_model_name': self.stt.language_model,
'model_version': self.stt.model_version
'model_name': self.stt.get_model_name(),
'language_model_name': self.stt.get_language_model(),
'model_version': self.stt.get_model_version(),
'device': self.stt.get_device()
}
return result

Expand All @@ -56,7 +57,7 @@ def speech_to_text(self, soundfile, max_segment_length=5, max_segment_words=14,
break
wavfile.write(data)

#cherrypy.log("tmp file written to %s" % upload_tmp_filepath)
cherrypy.log("tmp file written to %s" % upload_tmp_filepath)

result = {
'version':1
Expand Down Expand Up @@ -91,7 +92,9 @@ def speech_to_text(self, soundfile, max_segment_length=5, max_segment_words=14,
'transcripts': transcripts
})

Path(upload_tmp_filepath).unlink()
cherrypy.log(str(result))

#Path(upload_tmp_filepath).unlink()

return result

Expand Down
Loading

0 comments on commit a286b97

Please sign in to comment.