From c3b9858a408d63edc9d68cb27848db889541b635 Mon Sep 17 00:00:00 2001 From: stack-heap-overflow Date: Tue, 7 May 2024 17:51:42 +0800 Subject: [PATCH] Update README.md --- README.md | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9ec28d8..15f23cf 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,7 @@ We also provide OpenAI-Compatible API at DeepSeek Platform: [platform.deepseek.c ### Inference with Huggingface's Transformers You can directly employ [Huggingface's Transformers](https://github.com/huggingface/transformers) for model inference. -### Text Completion +#### Text Completion ```python import torch from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig @@ -210,7 +210,7 @@ result = tokenizer.decode(outputs[0], skip_special_tokens=True) print(result) ``` -### Chat Completion +#### Chat Completion ```python import torch from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig @@ -257,6 +257,33 @@ Assistant: {assistant_message_1}<|end▁of▁sentence|>User: {user_message_2 Assistant: ``` +### Inference with vLLM (recommended) +To utilize [vLLM](https://github.com/vllm-project/vllm) for model inference, please merge this Pull Request into your vLLM codebase: https://github.com/vllm-project/vllm/pull/4650. + +```python +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams + +max_model_len, tp_size = 8192, 8 +model_name = "deepseek-ai/DeepSeek-V2-Chat" +tokenizer = AutoTokenizer.from_pretrained(model_name) +llm = LLM(model=model_name, tensor_parallel_size=tp_size, max_model_len=max_model_len, trust_remote_code=True, enforce_eager=True) +sampling_params = SamplingParams(temperature=0.3, max_tokens=256, stop_token_ids=[tokenizer.eos_token_id]) + +messages_list = [ + [{"role": "user", "content": "Who are you?"}], + [{"role": "user", "content": "Translate the following content into Chinese directly: DeepSeek-V2 adopts innovative architectures to guarantee economical training and efficient inference."}], + [{"role": "user", "content": "Write a piece of quicksort code in C++."}], +] + +prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in messages_list] + +outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params) + +generated_text = [output.outputs[0].text for output in outputs] +print(generated_text) +``` + ## 8. License This code repository is licensed under [the MIT License](LICENSE-CODE). The use of DeepSeek-V2 Base/Chat models is subject to [the Model License](LICENSE-MODEL). DeepSeek-V2 series (including Base and Chat) supports commercial use.