Merge a4126eca81 into ef9f91e2b6

2025-07-12 20:19:05 -04:00 · 2025-02-26 18:15:40 +08:00 · 2025-02-26 18:15:40 +08:00 · 2b3a8cba4d
commit 2b3a8cba4d
parent ef9f91e2b6 a4126eca81
3 changed files with 218 additions and 1 deletions
--- a/client.py
+++ b/client.py
@ -0,0 +1,148 @@
 import json
 import time
 import sys
 import os
 from gradio_client import Client, handle_file
 # Replace with the actual server URL if different
 ip = "127.0.0.1"
 port = "8080"
 # Define the user prompt (caption)
 user_prompt = "Thoroughly and carefully describe this image."
 files = []
 output_file = "output.json"
 # Hyperparameters
 temperature = 0.6
 top_k = 50
 top_p = 0.9
 max_tokens = 100
 startAt = 0
 argumentStart = 1
 if len(sys.argv) > 1:
    for i in range(0, len(sys.argv)):
        if sys.argv[i] == "--ip":
            ip = sys.argv[i + 1]
            argumentStart += 2
        if sys.argv[i] == "--directory":
            directory = sys.argv[i + 1]
            argumentStart += 2
            # Populate files with image (.jpg, .png) contents of directory
            if os.path.isdir(directory):
                directoryList = os.listdir(directory)
                directoryList.sort()
                for file in directoryList:
                    if file.lower().endswith(('.jpg', '.png', '.jpeg', '.txt')):
                        files.append(os.path.join(directory, file))
            else:
                print(f"Error: Directory '{directory}' does not exist.")
                sys.exit(1)
        elif sys.argv[i] == "--start":
            startAt = int(sys.argv[i + 1])
            argumentStart += 2
        elif sys.argv[i] == "--port":
            port = sys.argv[i + 1]
            argumentStart += 2
        elif sys.argv[i] == "--prompt":
            user_prompt = sys.argv[i + 1]
            argumentStart += 2
        elif sys.argv[i] == "--temperature":
            temperature = float(sys.argv[i + 1])
            argumentStart += 2
        elif sys.argv[i] == "--top_k":
            top_k = int(sys.argv[i + 1])
            argumentStart += 2
        elif sys.argv[i] == "--top_p":
            top_p = float(sys.argv[i + 1])
            argumentStart += 2
        elif sys.argv[i] == "--max_tokens":
            max_tokens = int(sys.argv[i + 1])
            argumentStart += 2
        elif sys.argv[i] in ("--output", "-o"):
            output_file = sys.argv[i + 1]
            argumentStart += 2
 # Initialize the Gradio client with the server URL
 client = Client(f"http://{ip}:{port}")
 results = {"prompt": user_prompt}
 for i in range(argumentStart, len(sys.argv)):
    files.append(sys.argv[i])
 # Make sure the list is sorted
 files.sort()
 # Possibly start at a specific index
 for i in range(startAt, len(files)):
    # Grab the next image path
    image_path = files[i]
    # Count start time
    start = time.time()
    # Make query to VLLM
    try:
        imageFile = None
        this_user_prompt = user_prompt
        if image_path.endswith('.txt'):
            with open(image_path, 'r') as txt_file:
                this_user_prompt = txt_file.read().strip()
        else:
            imageFile = handle_file(image_path)
        # Reset state 
        result = client.predict(api_name="/reset_state" )
        # Send the image file path and the prompt to the Gradio app for processing
        result = client.predict(
            input_images=[imageFile],           # Provide the file path directly
            input_text=this_user_prompt,     # Adapted prompt parameter
            api_name="/transfer_input"
        )
        result = client.predict(
 		    chatbot=[],
            temperature=temperature,
            #top_k=top_k,
            top_p=top_p,
            max_length_tokens=max_tokens, # Adapted max_tokens parameter
 		    repetition_penalty=1.1,
 		    max_context_length_tokens=4096,
 		    #model_select_dropdown="deepseek-ai/deepseek-vl2-tiny",
            api_name="/predict"
        )
    except Exception as e:
        print(f"Failed to complete job at index {i}: {e}")
        output_file = f"partial_until_{i}_{output_file}"
        break
    # Calculate elapsed time
    seconds = time.time() - start
    remaining = (len(files) - i) * seconds
    hz = 1 / (seconds + 0.0001)
    # Output the result
    #print("result[0][0][1] ",result[0][0][1])
    question = this_user_prompt #Don't try to recover it from the list..
    response = result[0][0][1]
    # Print on screen
    print(f"Processing {1 + i}/{len(files)} | {hz:.2f} Hz / remaining {remaining / 60:.2f} minutes")
    print(f"Image: {image_path}\nResponse: {response}")
    # Store each path as the key pointing to each description
    results[image_path] = response
 # Save results to JSON
 print(f"\n\n\nStoring results in JSON file {output_file}")
 with open(output_file, "w") as outfile:
    json.dump(results, outfile, indent=4)
--- a/scripts/linux_setup.sh
+++ b/scripts/linux_setup.sh
@ -0,0 +1,68 @@
 #!/bin/bash 
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 cd "$DIR"
 cd ..
 if [ -d venv/ ]
 then
 echo "Found a virtual environment" 
 source venv/bin/activate
 else 
 echo "Creating a virtual environment"
 #Simple dependency checker that will apt-get stuff if something is missing
 # sudo apt-get install python3-venv python3-pip
 SYSTEM_DEPENDENCIES="python3-venv python3-pip zip libhdf5-dev"
 for REQUIRED_PKG in $SYSTEM_DEPENDENCIES
 do
 PKG_OK=$(dpkg-query -W --showformat='${Status}\n' $REQUIRED_PKG|grep "install ok installed")
 echo "Checking for $REQUIRED_PKG: $PKG_OK"
 if [ "" = "$PKG_OK" ]; then
  echo "No $REQUIRED_PKG. Setting up $REQUIRED_PKG."
  #If this is uncommented then only packages that are missing will get prompted..
  #sudo apt-get --yes install $REQUIRED_PKG
  #if this is uncommented then if one package is missing then all missing packages are immediately installed..
  sudo apt-get install $SYSTEM_DEPENDENCIES  
  break
 fi
 done
 #------------------------------------------------------------------------------
 python3 -m venv venv
 source venv/bin/activate
 fi 
 #git clone https://github.com/deepseek-ai/DeepSeek-VL2
 #cd DeepSeek-VL2
 #python3 -m venv venv
 #source venv/bin/activate
 #Make sure pip is up to date
 python3 -m pip install --upgrade pip
 python3 -m pip install -e .
 python3 -m pip install -e .[gradio]
 python3 -m pip install joblib wheel
 python3 -m pip install flash-attn --no-build-isolation
 python3 -m pip install xformers
 python3 -m pip install --upgrade gradio
 #You can now run using :
 #CUDA_VISIBLE_DEVICES=2 python3 web_demo.py --model_name "deepseek-ai/deepseek-vl2-tiny"  --port 8080
 echo "From now on you can run the web demo using: "
 DEMO_DIR=`pwd`
 echo "cd $DEMO_DIR"
 echo "source venv/bin/activate"
 echo "python3 web_demo.py --model_name \"deepseek-ai/deepseek-vl2-tiny\" --port 8080"
 exit 0
--- a/web_demo.py
+++ b/web_demo.py
@ -663,7 +663,8 @@ if __name__ == "__main__":
    demo.title = "DeepSeek-VL2 Chatbot"
    reload_javascript()
-    demo.queue(concurrency_count=CONCURRENT_COUNT, max_size=MAX_EVENTS).launch(
+    demo.queue(#concurrency_count=CONCURRENT_COUNT, #<- for some reason this emmits an error!
        max_size=MAX_EVENTS).launch(
        # share=False,
        share=True,
        favicon_path="deepseek_vl2/serve/assets/favicon.ico",