From 53c540ec9aced291fb56fc6122156352a4f0e17d Mon Sep 17 00:00:00 2001 From: Luis Date: Mon, 11 Mar 2024 18:48:35 +0000 Subject: [PATCH] First commit --- .dockerignore | 17 ++++++++++++ .gitignore | 2 ++ cog.yaml | 19 +++++++++++++ predict.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+) create mode 100644 .dockerignore create mode 100644 cog.yaml create mode 100644 predict.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4522d57 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +# The .dockerignore file excludes files from the container build process. +# +# https://docs.docker.com/engine/reference/builder/#dockerignore-file + +# Exclude Git files +.git +.github +.gitignore + +# Exclude Python cache files +__pycache__ +.mypy_cache +.pytest_cache +.ruff_cache + +# Exclude Python virtual environment +/venv diff --git a/.gitignore b/.gitignore index dda2140..4dbcbaf 100644 --- a/.gitignore +++ b/.gitignore @@ -413,3 +413,5 @@ Sessionx.vim tags # Persistent undo [._]*.un~ + +.cog diff --git a/cog.yaml b/cog.yaml new file mode 100644 index 0000000..97a33c3 --- /dev/null +++ b/cog.yaml @@ -0,0 +1,19 @@ +# Configuration for Cog ⚙️ +# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md + +build: + gpu: true + python_version: "3.9" + python_packages: + - "accelerate==0.27.2" + - "attrdict==2.0.1" + - "einops==0.7.0" + - "sentencepiece==0.2.0" + - "torch==2.0.1" + - "torchvision==0.15.2" + - "transformers>=4.38.2" + - "timm>=0.9.16" + - "hf_transfer==0.1.6" + +# predict.py defines how predictions are run on your model +predict: "predict.py:Predictor" diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..ab3115e --- /dev/null +++ b/predict.py @@ -0,0 +1,77 @@ +# Prediction interface for Cog ⚙️ +# https://github.com/replicate/cog/blob/main/docs/python.md + +from cog import BasePredictor, Input, Path +import os +import torch +from threading import Thread +from transformers import AutoModelForCausalLM +from deepseek_vl.utils.io import load_pil_images +from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM + +# Enable faster download speed +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" +MODEL_NAME = "deepseek-ai/deepseek-vl-7b-base" +CACHE_DIR = "checkpoints" + + +class Predictor(BasePredictor): + def setup(self) -> None: + """Load the model into memory to make running multiple predictions efficient""" + self.vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained( + MODEL_NAME, + cache_dir=CACHE_DIR + ) + self.tokenizer = self.vl_chat_processor.tokenizer + vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained( + MODEL_NAME, + torch_dtype=torch.bfloat16, + cache_dir=CACHE_DIR + ) + self.vl_gpt = vl_gpt.to('cuda') + + @torch.inference_mode() + def predict( + self, + image: Path = Input(description="Input image"), + prompt: str = Input(description="Input prompt", default="Describe the image"), + max_new_tokens: int = Input(description="Maximum number of tokens to generate", default=512) + ) -> str: + """Run a single prediction on the model""" + conversation = [ + { + "role": "User", + "content": ""+prompt, + "images": [str(image)] + }, + { + "role": "Assistant", + "content": "" + } + ] + + # load images and prepare for inputs + pil_images = load_pil_images(conversation) + prepare_inputs = self.vl_chat_processor( + conversations=conversation, + images=pil_images, + force_batchify=True + ).to('cuda') + + # run image encoder to get the image embeddings + inputs_embeds = self.vl_gpt.prepare_inputs_embeds(**prepare_inputs) + + # run the model to get the response + outputs = self.vl_gpt.language_model.generate( + inputs_embeds=inputs_embeds, + attention_mask=prepare_inputs.attention_mask, + pad_token_id=self.tokenizer.eos_token_id, + bos_token_id=self.tokenizer.bos_token_id, + eos_token_id=self.tokenizer.eos_token_id, + max_new_tokens=max_new_tokens, + do_sample=False, + use_cache=True + ) + + answer = self.tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True) + return answer