DeepSeek-V3/inference/test_kernel.py

26 lines
901 B
Python

import torch
from kernel import decode # Assuming kernel.py is in the same folder
from model import DummyTransformer # The dummy transformer we just created
# Instantiate the dummy model
model = DummyTransformer()
# Define a sample input (a small sequence of token IDs, e.g., from GPT tokenizer)
input_ids = torch.randint(0, 50257, (1, 10)) # Batch size of 1, sequence length of 10
position = 5 # We are generating the next token at position 5
# Call the decode function
logits_or_probs = decode(
input_ids=input_ids,
position=position,
model=model,
apply_softmax=True, # Toggle softmax to get probabilities instead of raw logits
top_k=10, # Set top-k filtering
top_p=0.9, # Set top-p filtering (nucleus sampling)
device='cpu' # Can switch to 'cuda' if you have a GPU
)
# Print the output
print("Output probabilities (softmax applied):")
print(logits_or_probs)