mirror of
https://github.com/deepseek-ai/DeepSeek-VL.git
synced 2025-04-19 10:09:09 -04:00
First commit
This commit is contained in:
parent
f0e10dbb86
commit
53c540ec9a
17
.dockerignore
Normal file
17
.dockerignore
Normal file
@ -0,0 +1,17 @@
|
||||
# The .dockerignore file excludes files from the container build process.
|
||||
#
|
||||
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||
|
||||
# Exclude Git files
|
||||
.git
|
||||
.github
|
||||
.gitignore
|
||||
|
||||
# Exclude Python cache files
|
||||
__pycache__
|
||||
.mypy_cache
|
||||
.pytest_cache
|
||||
.ruff_cache
|
||||
|
||||
# Exclude Python virtual environment
|
||||
/venv
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -413,3 +413,5 @@ Sessionx.vim
|
||||
tags
|
||||
# Persistent undo
|
||||
[._]*.un~
|
||||
|
||||
.cog
|
||||
|
19
cog.yaml
Normal file
19
cog.yaml
Normal file
@ -0,0 +1,19 @@
|
||||
# Configuration for Cog ⚙️
|
||||
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
|
||||
|
||||
build:
|
||||
gpu: true
|
||||
python_version: "3.9"
|
||||
python_packages:
|
||||
- "accelerate==0.27.2"
|
||||
- "attrdict==2.0.1"
|
||||
- "einops==0.7.0"
|
||||
- "sentencepiece==0.2.0"
|
||||
- "torch==2.0.1"
|
||||
- "torchvision==0.15.2"
|
||||
- "transformers>=4.38.2"
|
||||
- "timm>=0.9.16"
|
||||
- "hf_transfer==0.1.6"
|
||||
|
||||
# predict.py defines how predictions are run on your model
|
||||
predict: "predict.py:Predictor"
|
77
predict.py
Normal file
77
predict.py
Normal file
@ -0,0 +1,77 @@
|
||||
# Prediction interface for Cog ⚙️
|
||||
# https://github.com/replicate/cog/blob/main/docs/python.md
|
||||
|
||||
from cog import BasePredictor, Input, Path
|
||||
import os
|
||||
import torch
|
||||
from threading import Thread
|
||||
from transformers import AutoModelForCausalLM
|
||||
from deepseek_vl.utils.io import load_pil_images
|
||||
from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
|
||||
|
||||
# Enable faster download speed
|
||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
||||
MODEL_NAME = "deepseek-ai/deepseek-vl-7b-base"
|
||||
CACHE_DIR = "checkpoints"
|
||||
|
||||
|
||||
class Predictor(BasePredictor):
|
||||
def setup(self) -> None:
|
||||
"""Load the model into memory to make running multiple predictions efficient"""
|
||||
self.vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(
|
||||
MODEL_NAME,
|
||||
cache_dir=CACHE_DIR
|
||||
)
|
||||
self.tokenizer = self.vl_chat_processor.tokenizer
|
||||
vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
|
||||
MODEL_NAME,
|
||||
torch_dtype=torch.bfloat16,
|
||||
cache_dir=CACHE_DIR
|
||||
)
|
||||
self.vl_gpt = vl_gpt.to('cuda')
|
||||
|
||||
@torch.inference_mode()
|
||||
def predict(
|
||||
self,
|
||||
image: Path = Input(description="Input image"),
|
||||
prompt: str = Input(description="Input prompt", default="Describe the image"),
|
||||
max_new_tokens: int = Input(description="Maximum number of tokens to generate", default=512)
|
||||
) -> str:
|
||||
"""Run a single prediction on the model"""
|
||||
conversation = [
|
||||
{
|
||||
"role": "User",
|
||||
"content": "<image_placeholder>"+prompt,
|
||||
"images": [str(image)]
|
||||
},
|
||||
{
|
||||
"role": "Assistant",
|
||||
"content": ""
|
||||
}
|
||||
]
|
||||
|
||||
# load images and prepare for inputs
|
||||
pil_images = load_pil_images(conversation)
|
||||
prepare_inputs = self.vl_chat_processor(
|
||||
conversations=conversation,
|
||||
images=pil_images,
|
||||
force_batchify=True
|
||||
).to('cuda')
|
||||
|
||||
# run image encoder to get the image embeddings
|
||||
inputs_embeds = self.vl_gpt.prepare_inputs_embeds(**prepare_inputs)
|
||||
|
||||
# run the model to get the response
|
||||
outputs = self.vl_gpt.language_model.generate(
|
||||
inputs_embeds=inputs_embeds,
|
||||
attention_mask=prepare_inputs.attention_mask,
|
||||
pad_token_id=self.tokenizer.eos_token_id,
|
||||
bos_token_id=self.tokenizer.bos_token_id,
|
||||
eos_token_id=self.tokenizer.eos_token_id,
|
||||
max_new_tokens=max_new_tokens,
|
||||
do_sample=False,
|
||||
use_cache=True
|
||||
)
|
||||
|
||||
answer = self.tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
|
||||
return answer
|
Loading…
Reference in New Issue
Block a user