From b8c2b6489e363e16785963256ea2c51c51b7ac6d Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Thu, 23 Nov 2023 12:18:01 +0000 Subject: [PATCH 1/6] feat(tgi): fetch TGI .proto if not provided This allows to build the server outside of the Dockerfile more easily. Note that when built out of the Dockerfile on Debian/Ubuntu, only the wheel can be installed. This might be related to an issue with python-pip. --- text-generation-inference/server/Makefile | 28 +++++++++++++------ .../server/pyproject.toml | 3 ++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/text-generation-inference/server/Makefile b/text-generation-inference/server/Makefile index 6ae41d4ca..adcaf6488 100644 --- a/text-generation-inference/server/Makefile +++ b/text-generation-inference/server/Makefile @@ -13,14 +13,6 @@ src_dir := $(mkfile_dir)/$(pkg_name) sources := $(wildcard $(src_dir)/*.py) deployed_sources := $(subst $(src_dir), $(pkg_dir), $(sources)) -# Three python files are generated for each protobuf -protobufs := $(wildcard $(PROTODIR)/*.proto) -pkg_pb_dir := $(pkg_dir)/pb -generated_sources_base := $(foreach proto, $(protobufs), $(proto:.proto=_pb2.py)) -generated_sources := $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base)) -generated_sources += $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base:.py=.pyi)) -generated_sources += $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base:.py=_grpc.py)) - # Static files are just copied define COPY @@ -37,6 +29,24 @@ $(pkg_dir)/%.py: $(src_dir)/%.py # Generated files are produced by grpcio tools +# If not provided, fetch proto files from TGI +ifndef PROTODIR +PROTODIR := $(BUILDDIR)/tgi/proto +endif + +$(BUILDDIR)/tgi/proto/%.proto: + install -d $(BUILDDIR)/tgi + curl -L https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.0.2.tar.gz --output $(BUILDDIR)/tgi/sources.tar.gz + tar -C $(BUILDDIR)/tgi -xf $(BUILDDIR)/tgi/sources.tar.gz --strip-components=1 + +# Three python files are generated for each protobuf +protobufs := $(PROTODIR)/generate.proto +pkg_pb_dir := $(pkg_dir)/pb +generated_sources_base := $(foreach proto, $(protobufs), $(proto:.proto=_pb2.py)) +generated_sources := $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base)) +generated_sources += $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base:.py=.pyi)) +generated_sources += $(subst $(PROTODIR), $(pkg_pb_dir), $(generated_sources_base:.py=_grpc.py)) + $(pkg_pb_dir)/%_pb2.py $(pkg_pb_dir)/%_pb2.pyi $(pkg_pb_dir)/%_pb2_grpc.py: $(PROTODIR)/%.proto mkdir -p $(pkg_pb_dir) python -m grpc_tools.protoc -I$(PROTODIR) --python_out=$(pkg_pb_dir) \ @@ -44,4 +54,4 @@ $(pkg_pb_dir)/%_pb2.py $(pkg_pb_dir)/%_pb2.pyi $(pkg_pb_dir)/%_pb2_grpc.py: $(PR sed -i -e 's/^\(import.*pb2\)/from . \1/g' $(pkg_pb_dir)/$*_pb2_grpc.py gen-server: $(BUILDDIR)/pyproject.toml $(deployed_sources) $(generated_sources) - python -m build $(BUILDDIR) --sdist + python -m build $(BUILDDIR) diff --git a/text-generation-inference/server/pyproject.toml b/text-generation-inference/server/pyproject.toml index 7261e8d44..bc1eed640 100644 --- a/text-generation-inference/server/pyproject.toml +++ b/text-generation-inference/server/pyproject.toml @@ -18,5 +18,8 @@ dependencies = [ 'loguru == 0.6.0' ] +[tool.setuptools] +packages = ["text_generation_server", "text_generation_server.pb"] + [project.scripts] text-generation-server = 'text_generation_server.cli:app' From 289cd827890e2985bab697e2a212cc17b4f4ed61 Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Fri, 24 Nov 2023 09:10:34 +0000 Subject: [PATCH 2/6] feat(tgi): align server version to optimum-neuron --- text-generation-inference/Dockerfile | 10 +++++----- text-generation-inference/server/Makefile | 2 ++ text-generation-inference/server/pyproject.toml | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/text-generation-inference/Dockerfile b/text-generation-inference/Dockerfile index 5b2036c7f..c92479478 100644 --- a/text-generation-inference/Dockerfile +++ b/text-generation-inference/Dockerfile @@ -51,6 +51,10 @@ RUN apt-get update -y \ && apt-get clean RUN pip3 --no-cache-dir install --upgrade pip +# VERSION is a mandatory parameter +ARG VERSION +RUN test -n ${VERSION:?} + # Python server build image FROM base AS pyserver @@ -66,15 +70,11 @@ WORKDIR /pyserver COPY text-generation-inference/server server COPY --from=tgi /tgi/proto proto RUN pip3 install -r server/build-requirements.txt -RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server +RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto VERSION=${VERSION} make -C server gen-server # Neuron base image (used for deployment) FROM base AS neuron -# VERSION is a mandatory parameter -ARG VERSION -RUN test -n ${VERSION:?} - # Install system prerequisites RUN apt-get update -y \ && apt-get install -y --no-install-recommends \ diff --git a/text-generation-inference/server/Makefile b/text-generation-inference/server/Makefile index adcaf6488..de1c403f8 100644 --- a/text-generation-inference/server/Makefile +++ b/text-generation-inference/server/Makefile @@ -1,6 +1,7 @@ # Initialize base variables pkg_name := text_generation_server BUILDDIR ?= $(CURDIR)/build_$(pkg_name) +VERSION ?= 0.0.1 mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) mkfile_dir := $(dir $(mkfile_path)) pkg_dir := $(BUILDDIR)/$(pkg_name) @@ -22,6 +23,7 @@ endef $(BUILDDIR)/pyproject.toml: $(mkfile_dir)/pyproject.toml mkdir -p $(BUILDDIR) $(COPY) + sed -i -e 's/version = "VERSION"/version = \"${VERSION}\"/' $@ $(pkg_dir)/%.py: $(src_dir)/%.py mkdir -p $(pkg_dir) diff --git a/text-generation-inference/server/pyproject.toml b/text-generation-inference/server/pyproject.toml index bc1eed640..869d341cb 100644 --- a/text-generation-inference/server/pyproject.toml +++ b/text-generation-inference/server/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "text-generation-server" -version = "0.0.1" +version = "VERSION" authors = [{name="David Corvoysier", email="david@huggingface.co" }] description = "TGI compatible inference server for AWS Neuronx platforms" dependencies = [ From 83910e99c251c15012cfcc6a8c28540ee3316093 Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Fri, 24 Nov 2023 11:05:59 +0000 Subject: [PATCH 3/6] test(Makefile): added test_tgi target --- Makefile | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 118b37484..71011ca12 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ clean: rwildcard=$(wildcard $1) $(foreach d,$1,$(call rwildcard,$(addsuffix /$(notdir $d),$(wildcard $(dir $d)*)))) -VERSION := $(shell python -W ignore -c "from optimum.neuron.version import __version__; print(__version__)") +VERSION := $(shell gawk 'match($$0, /__version__ = "(.*)"/, a) {print a[1]}' optimum/neuron/version.py) PACKAGE_DIST = dist/optimum-neuron-$(VERSION).tar.gz PACKAGE_WHEEL = dist/optimum_neuron-$(VERSION)-py3-none-any.whl @@ -71,6 +71,20 @@ build_dist: ${PACKAGE_DIST} ${PACKAGE_WHEEL} pypi_upload: ${PACKAGE_DIST} ${PACKAGE_WHEEL} python -m twine upload ${PACKAGE_DIST} ${PACKAGE_WHEEL} +# Tests + test_installs: python -m pip install .[tests] python -m pip install git+https://github.com/huggingface/transformers.git + +# Stand-alone TGI server for unit tests outside of TGI container +tgi_server: + python -m pip install -r text-generation-inference/server/build-requirements.txt + make -C text-generation-inference/server clean + VERSION=${VERSION} make -C text-generation-inference/server gen-server + +tgi_test: tgi_server + python -m pip install .[neuronx] pytest + find text-generation-inference -name "text_generation_server-$(VERSION)-py3-none-any.whl" \ + -exec python -m pip install --force-reinstall {} \; + python -m pytest -s text-generation-inference/tests From 595bf8da35811b9d12733caf9bf749185976f33b Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Fri, 24 Nov 2023 11:07:54 +0000 Subject: [PATCH 4/6] test(tgi): test decoding with streamed tokens --- .../tests/test_generator_slot.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 text-generation-inference/tests/test_generator_slot.py diff --git a/text-generation-inference/tests/test_generator_slot.py b/text-generation-inference/tests/test_generator_slot.py new file mode 100644 index 000000000..2f243b5d4 --- /dev/null +++ b/text-generation-inference/tests/test_generator_slot.py @@ -0,0 +1,61 @@ +import pytest +import torch +from text_generation_server.generator import Slot +from text_generation_server.pb.generate_pb2 import Request +from transformers import AutoTokenizer, GenerationConfig + + +TOKENIZERS = ["NousResearch/Llama-2-7b-hf", "gpt2"] + + +@pytest.fixture(params=TOKENIZERS) +def tokenizer(request): + t = AutoTokenizer.from_pretrained(request.param) + t.padding_side = "left" + t.pad_token_id = t.eos_token_id + return t + + +@pytest.mark.parametrize( + "input_text, generated_text", + [ + [ + "It was a bright cold day in April, and the clocks were striking thirteen.", + " Winston Smith, his chin nuzzled into his breast in an effort to escape the vile wind," + " slipped quickly through the glass doors of Victory Mansions, though not quickly enough" + " to prevent a swirl of gritty dust from entering along with him.", + ], + ["This sentence is written in chinese:", "我很感谢你的热情"], + ["Some text might contain a lot of emojis like 😃", "😍💪 👉 👀"], + ], + ids=["spaces", "chinese-utf8", "emojis"], +) +def test_decode_streaming(tokenizer, input_text, generated_text): + slot = Slot(0, tokenizer) + request = Request(id=0, inputs=input_text) + slot.assign(request, GenerationConfig()) + assert slot.cached_text == input_text + + inputs = tokenizer(input_text, padding="max_length", max_length=len(input_text) + 1, return_tensors="pt") + input_ids = inputs["input_ids"][0] + attention_mask = inputs["attention_mask"][0] + generated_tokens = tokenizer(generated_text, add_special_tokens=False)["input_ids"] + + # We need to regenerate the full text as the tokenizer might change it (extra spaces might be added) + all_input_ids = torch.cat([input_ids, torch.tensor(generated_tokens)]) + full_text = tokenizer.decode(all_input_ids, skip_special_tokens=True) + regenerated_text = full_text[len(input_text) :] + + # Initialize the slot with the inputs + slot.reset(input_ids, attention_mask, selector=None) + + assert slot.generated_tokens == 0 + + # Simulate an iterative generation (i.e. don't call select and use known tokens instead) + decoded_text = "" + for i in range(len(generated_tokens)): + text = slot.append(generated_tokens[i]) + assert slot.generated_tokens == i + 1 + decoded_text += text + + assert decoded_text == regenerated_text From 667e238eb233a47aefd7dc1533c32edcff2aaa23 Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Fri, 24 Nov 2023 14:58:26 +0000 Subject: [PATCH 5/6] ci: added TGI workflow --- .github/workflows/test_inf2_tgi.yml | 40 +++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .github/workflows/test_inf2_tgi.yml diff --git a/.github/workflows/test_inf2_tgi.yml b/.github/workflows/test_inf2_tgi.yml new file mode 100644 index 000000000..fe0cb87e6 --- /dev/null +++ b/.github/workflows/test_inf2_tgi.yml @@ -0,0 +1,40 @@ +name: Optimum neuron / Test TGI on INF2 + +on: + push: + branches: [ main ] + paths: + - "setup.py" + - "optimum/**.py" + - "text-generation-inference/**" + pull_request: + branches: [ main ] + paths: + - "setup.py" + - "optimum/**.py" + - "text-generation-inference/**" + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + do-the-job: + name: Run TGI tests + runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner + env: + AWS_REGION: us-east-1 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install python and create venv + run: | + sudo apt install python3.8-venv -y + python3 -m venv aws_neuron_venv_pytorch + source aws_neuron_venv_pytorch/bin/activate + python -m pip install -U pip + python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com + - name: Run TGI server python tests + run: | + source aws_neuron_venv_pytorch/bin/activate + HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} make tgi_test From 7aefeacee77c2cb317caa1f887c4698e9f8869a4 Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Wed, 7 Feb 2024 08:39:28 +0000 Subject: [PATCH 6/6] fix(tgi): use git ignored build dir name --- text-generation-inference/server/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text-generation-inference/server/Makefile b/text-generation-inference/server/Makefile index de1c403f8..da5e38ffb 100644 --- a/text-generation-inference/server/Makefile +++ b/text-generation-inference/server/Makefile @@ -1,6 +1,6 @@ # Initialize base variables pkg_name := text_generation_server -BUILDDIR ?= $(CURDIR)/build_$(pkg_name) +BUILDDIR ?= $(CURDIR)/build VERSION ?= 0.0.1 mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) mkfile_dir := $(dir $(mkfile_path))