diff --git a/docs/hub/_toctree.yml b/docs/hub/_toctree.yml index f185ff955..1a6e2305d 100644 --- a/docs/hub/_toctree.yml +++ b/docs/hub/_toctree.yml @@ -59,6 +59,8 @@ title: AllenNLP - local: asteroid title: Asteroid + - local: ctransformers + title: CTransformers - local: espnet title: ESPnet - local: fastai diff --git a/docs/hub/ctransformers.md b/docs/hub/ctransformers.md new file mode 100644 index 000000000..87244d94f --- /dev/null +++ b/docs/hub/ctransformers.md @@ -0,0 +1,40 @@ +# Using `ctransformers` at Hugging Face + +`ctransformers` is a library that provides Python bindings for the Transformer models implemented in C/C++ using GGML library. It supports several state-of-the-art language [models](https://github.com/marella/ctransformers#supported-models). + +## Exploring `ctransformers` in the Hub + +You can find `ctransformers` models by filtering at the left of the [models page](https://huggingface.co/models?library=ctransformers&sort=downloads). + +## Installation + +```sh +pip install ctransformers +``` + +## Using existing models + +Load model directly: + +```py +from ctransformers import AutoModelForCausalLM, AutoTokenizer + +model = AutoModelForCausalLM.from_pretrained("marella/gpt-2-ggml", hf=True) +tokenizer = AutoTokenizer.from_pretrained(model) +``` + +Use a pipeline as a high-level helper: + +```py +from transformers import pipeline + +pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) +``` + +[Run in Google Colab](https://colab.research.google.com/drive/1FVSLfTJ2iBbQ1oU2Rqz0MkpJbaB_5Got) + +If you want to see how to load a specific model, you can click `Use in CTransformers` and you will be given a working snippet that you can load it! + +## Additional resources + +- CTransformers [library](https://github.com/marella/ctransformers). diff --git a/docs/hub/models-libraries.md b/docs/hub/models-libraries.md index db77e2f0d..18c20a52e 100644 --- a/docs/hub/models-libraries.md +++ b/docs/hub/models-libraries.md @@ -12,6 +12,7 @@ The table below summarizes the supported libraries and their level of integratio | [AllenNLP](https://github.com/allenai/allennlp) | An open-source NLP research library, built on PyTorch. | ✅ | ✅ | ✅ | ❌ | | [Asteroid](https://github.com/asteroid-team/asteroid) | Pytorch-based audio source separation toolkit | ✅ | ✅ | ✅ | ❌ | | [BERTopic](https://github.com/MaartenGr/BERTopic) | BERTopic is a topic modeling library for text and images | ✅ | ✅ | ✅ | ✅ | +| [CTransformers](https://github.com/marella/ctransformers) | Python bindings for the Transformer models implemented in C/C++ using GGML library | ❌ | ❌ | ✅ | ❌ | | [docTR](https://github.com/mindee/doctr) | Models and datasets for OCR-related tasks in PyTorch & TensorFlow | ✅ | ✅ | ✅ | ❌ | | [ESPnet](https://github.com/espnet/espnet) | End-to-end speech processing toolkit (e.g. TTS) | ✅ | ✅ | ✅ | ❌ | | [fastai](https://github.com/fastai/fastai) | Library to train fast and accurate models with state-of-the-art outputs. | ✅ | ✅ | ✅ | ✅ | diff --git a/js/src/lib/interfaces/Libraries.ts b/js/src/lib/interfaces/Libraries.ts index 252a87908..c89d3f214 100644 --- a/js/src/lib/interfaces/Libraries.ts +++ b/js/src/lib/interfaces/Libraries.ts @@ -12,6 +12,7 @@ export enum ModelLibrary { "allennlp" = "allenNLP", "asteroid" = "Asteroid", "bertopic" = "BERTopic", + "ctransformers" = "CTransformers", "diffusers" = "Diffusers", "doctr" = "docTR", "espnet" = "ESPnet", @@ -122,6 +123,19 @@ const bertopic = (model: ModelData) => model = BERTopic.load("${model.id}")`]; +const ctransformers = (model: ModelData) => + [ + `# Load model directly +from ctransformers import AutoModelForCausalLM, AutoTokenizer + +model = AutoModelForCausalLM.from_pretrained("${model.id}", hf=True) +tokenizer = AutoTokenizer.from_pretrained(model)`, + `# Use a pipeline as a high-level helper +from transformers import pipeline + +pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)`, + ]; + const diffusers_default = (model: ModelData) => [`from diffusers import DiffusionPipeline @@ -577,6 +591,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS: Partial = { "tabular-regression": ["sklearn"], "tabular-to-text": ["transformers"], "text-classification": ["adapter-transformers", "spacy", "transformers", "transformers.js"], - "text-generation": ["transformers", "transformers.js"], + "text-generation": ["ctransformers", "transformers", "transformers.js"], "text-retrieval": [], "text-to-image": [], "text-to-speech": ["espnet", "tensorflowtts"],