From cf21982aa5143e9606f2071440abe906a2c45419 Mon Sep 17 00:00:00 2001 From: zxy Date: Wed, 19 Feb 2025 13:46:33 +0800 Subject: [PATCH 1/2] optimize lib dependencies --- deepseek_vl2/models/modeling_deepseek_vl_v2.py | 1 - deepseek_vl2/models/siglip_vit.py | 3 ++- requirements.txt | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/deepseek_vl2/models/modeling_deepseek_vl_v2.py b/deepseek_vl2/models/modeling_deepseek_vl_v2.py index 957464f..97288fd 100644 --- a/deepseek_vl2/models/modeling_deepseek_vl_v2.py +++ b/deepseek_vl2/models/modeling_deepseek_vl_v2.py @@ -1,4 +1,3 @@ -from attrdict import AttrDict from dataclasses import dataclass import logging import gc diff --git a/deepseek_vl2/models/siglip_vit.py b/deepseek_vl2/models/siglip_vit.py index 67f30e8..f06c25f 100644 --- a/deepseek_vl2/models/siglip_vit.py +++ b/deepseek_vl2/models/siglip_vit.py @@ -13,7 +13,6 @@ from timm.layers import ( ) from timm.models._manipulate import named_apply, checkpoint_seq, adapt_input_conv from transformers.modeling_utils import is_flash_attn_2_available -from xformers.ops import memory_efficient_attention from functools import partial @@ -134,6 +133,8 @@ class Attention(nn.Module): self.proj_drop = nn.Dropout(proj_drop) if proj_drop > 0. else nn.Identity() def forward(self, x: torch.Tensor) -> torch.Tensor: + from xformers.ops import memory_efficient_attention + B, N, C = x.shape qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim) diff --git a/requirements.txt b/requirements.txt index b89b147..02d0661 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,6 @@ xformers>=0.0.21 timm>=0.9.16 accelerate sentencepiece -attrdict einops # for gradio demo From b6e557c73e284014e835097478c69bface0a0bde Mon Sep 17 00:00:00 2001 From: zxy Date: Wed, 19 Feb 2025 19:30:58 +0800 Subject: [PATCH 2/2] recover attrdict --- deepseek_vl2/models/modeling_deepseek_vl_v2.py | 1 + requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/deepseek_vl2/models/modeling_deepseek_vl_v2.py b/deepseek_vl2/models/modeling_deepseek_vl_v2.py index 97288fd..957464f 100644 --- a/deepseek_vl2/models/modeling_deepseek_vl_v2.py +++ b/deepseek_vl2/models/modeling_deepseek_vl_v2.py @@ -1,3 +1,4 @@ +from attrdict import AttrDict from dataclasses import dataclass import logging import gc diff --git a/requirements.txt b/requirements.txt index 02d0661..b89b147 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ xformers>=0.0.21 timm>=0.9.16 accelerate sentencepiece +attrdict einops # for gradio demo