add a client utility that can script questions to the web demo

2025-07-11 19:58:51 -04:00 · 2025-01-07 15:56:35 +02:00 · 2025-01-07 15:56:35 +02:00 · db6c917643
commit db6c917643
parent 5c818a06a4
1 changed files with 145 additions and 0 deletions
--- a/client.py
+++ b/client.py
@ -0,0 +1,145 @@
+import json
+import time
+import sys
+import os
+from gradio_client import Client, handle_file
+
+# Replace with the actual server URL if different
+ip = "127.0.0.1"
+port = "8080"
+
+# Define the user prompt (caption)
+user_prompt = "Thoroughly and carefully describe this image."
+
+files = []
+output_file = "output.json"
+
+# Hyperparameters
+temperature = 0.6
+top_k = 50
+top_p = 0.9
+max_tokens = 100
+
+startAt = 0
+
+argumentStart = 1
+if len(sys.argv) > 1:
+    for i in range(0, len(sys.argv)):
+        if sys.argv[i] == "--ip":
+            ip = sys.argv[i + 1]
+            argumentStart += 2
+        if sys.argv[i] == "--directory":
+            directory = sys.argv[i + 1]
+            argumentStart += 2
+            # Populate files with image (.jpg, .png) contents of directory
+            if os.path.isdir(directory):
+                directoryList = os.listdir(directory)
+                directoryList.sort()
+                for file in directoryList:
+                    if file.lower().endswith(('.jpg', '.png', '.jpeg', '.txt')):
+                        files.append(os.path.join(directory, file))
+            else:
+                print(f"Error: Directory '{directory}' does not exist.")
+                sys.exit(1)
+        elif sys.argv[i] == "--start":
+            startAt = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--port":
+            port = sys.argv[i + 1]
+            argumentStart += 2
+        elif sys.argv[i] == "--prompt":
+            user_prompt = sys.argv[i + 1]
+            argumentStart += 2
+        elif sys.argv[i] == "--temperature":
+            temperature = float(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--top_k":
+            top_k = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--top_p":
+            top_p = float(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--max_tokens":
+            max_tokens = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] in ("--output", "-o"):
+            output_file = sys.argv[i + 1]
+            argumentStart += 2
+
+# Initialize the Gradio client with the server URL
+client = Client(f"http://{ip}:{port}")
+
+results = {"prompt": user_prompt}
+
+for i in range(argumentStart, len(sys.argv)):
+    files.append(sys.argv[i])
+
+# Make sure the list is sorted
+files.sort()
+
+# Possibly start at a specific index
+for i in range(startAt, len(files)):
+    # Grab the next image path
+    image_path = files[i]
+
+    # Count start time
+    start = time.time()
+
+    # Make query to VLLM
+    try:
+        imageFile = None
+        this_user_prompt = user_prompt
+        if image_path.endswith('.txt'):
+            with open(image_path, 'r') as txt_file:
+                this_user_prompt = txt_file.read().strip()
+        else:
+            imageFile = handle_file(image_path)
+
+        # Send the image file path and the prompt to the Gradio app for processing
+        result = client.predict(
+            input_images=[imageFile],           # Provide the file path directly
+            input_text=this_user_prompt,     # Adapted prompt parameter
+            api_name="/transfer_input"
+        )
+
+        result = client.predict(
+		    chatbot=[],
+            temperature=temperature,
+            #top_k=top_k,
+            top_p=top_p,
+            max_length_tokens=max_tokens, # Adapted max_tokens parameter
+		    repetition_penalty=1.1,
+		    max_context_length_tokens=4096,
+		    model_select_dropdown="deepseek-ai/deepseek-vl2-tiny",
+            api_name="/predict"
+        )
+
+
+
+    except Exception as e:
+        print(f"Failed to complete job at index {i}: {e}")
+        output_file = f"partial_until_{i}_{output_file}"
+        break
+
+    # Calculate elapsed time
+    seconds = time.time() - start
+    remaining = (len(files) - i) * seconds
+    hz = 1 / (seconds + 0.0001)
+
+    # Output the result
+    print("result[0][0][1] ",result[0][0][1])
+    question = this_user_prompt #Don't try to recover it from the list..
+    response = result[0][0][1]
+
+    # Print on screen
+    print(f"Processing {1 + i}/{len(files)} | {hz:.2f} Hz / remaining {remaining / 60:.2f} minutes")
+    print(f"Image: {image_path}\nResponse: {response}")
+
+    # Store each path as the key pointing to each description
+    results[image_path] = response
+
+# Save results to JSON
+print(f"\n\n\nStoring results in JSON file {output_file}")
+with open(output_file, "w") as outfile:
+    json.dump(results, outfile, indent=4)
+