change module name to deepseek_vl2
@ -102,8 +102,8 @@ pip install -e .
|
|||||||
import torch
|
import torch
|
||||||
from transformers import AutoModelForCausalLM
|
from transformers import AutoModelForCausalLM
|
||||||
|
|
||||||
from deepseek_vl.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
|
from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
|
||||||
from deepseek_vl.utils.io import load_pil_images
|
from deepseek_vl2.utils.io import load_pil_images
|
||||||
|
|
||||||
|
|
||||||
# specify the path to the model
|
# specify the path to the model
|
||||||
|
@ -559,7 +559,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
|
|||||||
for j in range(0, best_width, self.image_size):
|
for j in range(0, best_width, self.image_size):
|
||||||
images_list.append(
|
images_list.append(
|
||||||
self.image_transform(local_view.crop((j, i, j + self.image_size, i + self.image_size))))
|
self.image_transform(local_view.crop((j, i, j + self.image_size, i + self.image_size))))
|
||||||
|
|
||||||
"""record height / width crop num"""
|
"""record height / width crop num"""
|
||||||
num_width_tiles, num_height_tiles = best_width // self.image_size, best_height // self.image_size
|
num_width_tiles, num_height_tiles = best_width // self.image_size, best_height // self.image_size
|
||||||
images_spatial_crop.append([num_width_tiles, num_height_tiles])
|
images_spatial_crop.append([num_width_tiles, num_height_tiles])
|
@ -22,8 +22,8 @@ from __future__ import annotations
|
|||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from deepseek_vl.serve.app_modules.presets import gr
|
from deepseek_vl2.serve.app_modules.presets import gr
|
||||||
from deepseek_vl.serve.app_modules.utils import convert_asis, convert_mdtext, detect_converted_mark
|
from deepseek_vl2.serve.app_modules.utils import convert_asis, convert_mdtext, detect_converted_mark
|
||||||
|
|
||||||
|
|
||||||
def compact_text_chunks(self, prompt, text_chunks: List[str]) -> List[str]:
|
def compact_text_chunks(self, prompt, text_chunks: List[str]) -> List[str]:
|
||||||
@ -58,8 +58,8 @@ def postprocess(
|
|||||||
return temp
|
return temp
|
||||||
|
|
||||||
|
|
||||||
with open("deepseek_vl/serve/assets/custom.js", "r", encoding="utf-8") as f, open(
|
with open("deepseek_vl2/serve/assets/custom.js", "r", encoding="utf-8") as f, open(
|
||||||
"deepseek_vl/serve/assets/Kelpy-Codos.js", "r", encoding="utf-8"
|
"deepseek_vl2/serve/assets/Kelpy-Codos.js", "r", encoding="utf-8"
|
||||||
) as f2:
|
) as f2:
|
||||||
customJS = f.read()
|
customJS = f.read()
|
||||||
kelpyCodos = f2.read()
|
kelpyCodos = f2.read()
|
@ -35,7 +35,7 @@ from pygments import highlight
|
|||||||
from pygments.formatters import HtmlFormatter
|
from pygments.formatters import HtmlFormatter
|
||||||
from pygments.lexers import ClassNotFound, get_lexer_by_name, guess_lexer
|
from pygments.lexers import ClassNotFound, get_lexer_by_name, guess_lexer
|
||||||
|
|
||||||
from deepseek_vl.serve.app_modules.presets import (
|
from deepseek_vl2.serve.app_modules.presets import (
|
||||||
ALREADY_CONVERTED_MARK,
|
ALREADY_CONVERTED_MARK,
|
||||||
BOX2COLOR,
|
BOX2COLOR,
|
||||||
MAX_IMAGE_SIZE,
|
MAX_IMAGE_SIZE,
|
||||||
@ -50,9 +50,9 @@ def configure_logger():
|
|||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
timestr = time.strftime("%Y%m%d-%H%M%S")
|
timestr = time.strftime("%Y%m%d-%H%M%S")
|
||||||
os.makedirs("deepseek_vl/serve/logs", exist_ok=True)
|
os.makedirs("deepseek_vl2/serve/logs", exist_ok=True)
|
||||||
file_handler = logging.FileHandler(
|
file_handler = logging.FileHandler(
|
||||||
f"deepseek_vl/serve/logs/{timestr}_gradio_log.log"
|
f"deepseek_vl2/serve/logs/{timestr}_gradio_log.log"
|
||||||
)
|
)
|
||||||
console_handler = logging.StreamHandler()
|
console_handler = logging.StreamHandler()
|
||||||
|
|
||||||
@ -301,7 +301,7 @@ def parse_ref_bbox(response, image):
|
|||||||
text_x = box[0]
|
text_x = box[0]
|
||||||
text_y = box[1] - 20
|
text_y = box[1] - 20
|
||||||
text_color = box_color
|
text_color = box_color
|
||||||
font = ImageFont.truetype('./deepseek_vl/serve/assets/simsun.ttc', size=20)
|
font = ImageFont.truetype('./deepseek_vl2/serve/assets/simsun.ttc', size=20)
|
||||||
draw.text((text_x, text_y), label, font=font, fill=text_color)
|
draw.text((text_x, text_y), label, font=font, fill=text_color)
|
||||||
|
|
||||||
return image
|
return image
|
Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB |
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
Before Width: | Height: | Size: 81 KiB After Width: | Height: | Size: 81 KiB |
Before Width: | Height: | Size: 153 KiB After Width: | Height: | Size: 153 KiB |
Before Width: | Height: | Size: 266 KiB After Width: | Height: | Size: 266 KiB |
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB |
Before Width: | Height: | Size: 190 KiB After Width: | Height: | Size: 190 KiB |
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 56 KiB |
@ -30,8 +30,8 @@ from transformers import (
|
|||||||
TextIteratorStreamer,
|
TextIteratorStreamer,
|
||||||
)
|
)
|
||||||
|
|
||||||
from deepseek_vl.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
|
from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
|
||||||
from deepseek_vl.models.conversation import Conversation
|
from deepseek_vl2.models.conversation import Conversation
|
||||||
|
|
||||||
|
|
||||||
def load_model(model_path, dtype=torch.bfloat16):
|
def load_model(model_path, dtype=torch.bfloat16):
|
@ -27,8 +27,8 @@ from transformers import AutoModelForCausalLM
|
|||||||
|
|
||||||
def load_pretrained_model(model_path: str):
|
def load_pretrained_model(model_path: str):
|
||||||
|
|
||||||
from deepseek_vl.models.processing_deepseek_vl_v2 import DeepseekVLV2Processor
|
from deepseek_vl2.models.processing_deepseek_vl_v2 import DeepseekVLV2Processor
|
||||||
from deepseek_vl.models.modeling_deepseek_vl_v2 import DeepseekVLV2ForCausalLM
|
from deepseek_vl2.models.modeling_deepseek_vl_v2 import DeepseekVLV2ForCausalLM
|
||||||
|
|
||||||
vl_chat_processor = DeepseekVLV2Processor.from_pretrained(model_path)
|
vl_chat_processor = DeepseekVLV2Processor.from_pretrained(model_path)
|
||||||
tokenizer = vl_chat_processor.tokenizer
|
tokenizer = vl_chat_processor.tokenizer
|
@ -24,8 +24,8 @@ from transformers import AutoModelForCausalLM
|
|||||||
|
|
||||||
import PIL.Image
|
import PIL.Image
|
||||||
|
|
||||||
from deepseek_vl.models import DeepseekVLV2ForCausalLM, DeepseekVLV2Processor
|
from deepseek_vl2.models import DeepseekVLV2ForCausalLM, DeepseekVLV2Processor
|
||||||
from deepseek_vl.serve.app_modules.utils import parse_ref_bbox
|
from deepseek_vl2.serve.app_modules.utils import parse_ref_bbox
|
||||||
|
|
||||||
|
|
||||||
def load_pil_images(conversations: List[Dict[str, str]]) -> List[PIL.Image.Image]:
|
def load_pil_images(conversations: List[Dict[str, str]]) -> List[PIL.Image.Image]:
|
||||||
|
@ -3,12 +3,12 @@ requires = ["setuptools>=40.6.0", "wheel"]
|
|||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "deepseek_vl"
|
name = "deepseek_vl2"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
description = "DeepSeek-VL"
|
description = "DeepSeek-VL2"
|
||||||
authors = [{name = "DeepSeek-AI"}]
|
authors = [{name = "DeepSeek-AI"}]
|
||||||
license = {file = "LICENSE-CODE"}
|
license = {file = "LICENSE-CODE"}
|
||||||
urls = {homepage = "https://github.com/deepseek-ai/DeepSeek-VL"}
|
urls = {homepage = "https://github.com/deepseek-ai/DeepSeek-VL2"}
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|