#!/usr/bin/env python3 """ Auto-generated Python script from markdown code blocks """ # --- Code Block 1 --- import tinker service_client = tinker.ServiceClient() print("Available models:") for item in service_client.get_server_capabilities().supported_models: print("- " + item.model_name) # --- Code Block 2 --- base_model = "Qwen/Qwen3-VL-30B-A3B-Instruct" training_client = service_client.create_lora_training_client( base_model=base_model ) # --- Code Block 3 --- # Create some training examples examples = [ {"input": "banana split", "output": "anana-bay plit-say"}, {"input": "quantum physics", "output": "uantum-qay ysics-phay"}, {"input": "donut shop", "output": "onut-day op-shay"}, {"input": "pickle jar", "output": "ickle-pay ar-jay"}, {"input": "space exploration", "output": "ace-spay exploration-way"}, {"input": "rubber duck", "output": "ubber-ray uck-day"}, {"input": "coding wizard", "output": "oding-cay izard-way"}, ] # Convert examples into the format expected by the training client from tinker import types # Get the tokenizer from the training client tokenizer = training_client.get_tokenizer() def process_example(example: dict, tokenizer) -> types.Datum: # Format the input with Input/Output template # For most real use cases, you'll want to use a renderer / chat template, # (see later docs) but here, we'll keep it simple. prompt = f"English: {example['input']}\nPig Latin:" prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True) prompt_weights = [0] * len(prompt_tokens) # Add a space before the output string, and finish with double newline completion_tokens = tokenizer.encode(f" {example['output']}\n\n", add_special_tokens=False) completion_weights = [1] * len(completion_tokens) tokens = prompt_tokens + completion_tokens weights = prompt_weights + completion_weights input_tokens = tokens[:-1] target_tokens = tokens[1:] # We're predicting the next token, so targets need to be shifted. weights = weights[1:] # A datum is a single training example for the loss function. # It has model_input, which is the input sequence that'll be passed into the LLM, # loss_fn_inputs, which is a dictionary of extra inputs used by the loss function. return types.Datum( model_input=types.ModelInput.from_ints(tokens=input_tokens), loss_fn_inputs=dict(weights=weights, target_tokens=target_tokens) ) processed_examples = [process_example(ex, tokenizer) for ex in examples] # Visualize the first example for debugging purposes datum0 = processed_examples[0] print(f"{'Input':<20} {'Target':<20} {'Weight':<10}") print("-" * 50) for i, (inp, tgt, wgt) in enumerate(zip(datum0.model_input.to_ints(), datum0.loss_fn_inputs['target_tokens'].tolist(), datum0.loss_fn_inputs['weights'].tolist())): print(f"{repr(tokenizer.decode([inp])):<20} {repr(tokenizer.decode([tgt])):<20} {wgt:<10}") # --- Code Block 4 --- image_data = requests.get("https://thinkingmachines.ai/blog/on-policy-distillation/images/chess.png").content model_input = tinker.ModelInput(chunks=[ types.EncodedTextChunk(tokens=tokenizer.encode("<|im_start|>user\n<|vision_start|>")), types.ImageChunk(data=image_data, format="png"), types.EncodedTextChunk(tokens=tokenizer.encode("<|vision_end|>What is this?<|im_end|>\n<|im_start|>assistant\n")), ]) # --- Code Block 5 --- import numpy as np for _ in range(6): fwdbwd_future = training_client.forward_backward(processed_examples, "cross_entropy") optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4)) # Wait for the results fwdbwd_result = fwdbwd_future.result() optim_result = optim_future.result() # fwdbwd_result contains the logprobs of all the tokens we put in. Now we can compute the weighted # average log loss per token. logprobs = np.concatenate([output['logprobs'].tolist() for output in fwdbwd_result.loss_fn_outputs]) weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in processed_examples]) print(f"Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}") # --- Code Block 6 --- # First, create a sampling client. We need to transfer weights sampling_client = training_client.save_weights_and_get_sampling_client(name='pig-latin-model') # Now, we can sample from the model. prompt = types.ModelInput.from_ints(tokenizer.encode("English: coffee break\nPig Latin:")) params = types.SamplingParams(max_tokens=20, temperature=0.0, stop=["\n"]) # Greedy sampling future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8) result = future.result() print("Responses:") for i, seq in enumerate(result.sequences): print(f"{i}: {repr(tokenizer.decode(seq.tokens))}") # --- Code Block 7 --- prompt = types.ModelInput.from_ints(tokenizer.encode("How many r's are in the word strawberry?")) sample_response = sampling_client.sample( prompt=prompt, num_samples=1, sampling_params=tinker.SamplingParams(max_tokens=1), # Must be at least 1 token, represents prefill step include_prompt_logprobs=True, ).result() # example: [None, -9.5, -1.6, -8.8, -3.5, -8.3, ...] print(sample_response.prompt_logprobs) # --- Code Block 8 --- sampling_client.compute_logprobs(prompt).result() # --- Code Block 9 --- sample_response = sampling_client.sample( prompt=prompt, num_samples=1, sampling_params=tinker.SamplingParams(max_tokens=1), include_prompt_logprobs=True, topk_prompt_logprobs=5, ).result() # example: [None, [(14924, -1.2), (755, -2.2), ...], [(25, -1.6), (3137, -2.4), ...], ...] sample_response.topk_prompt_logprobs # --- Code Block 10 --- import requests import tinker from transformers import AutoTokenizer model_name = "Qwen/Qwen3-VL-30B-A3B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) service_client = tinker.ServiceClient() training_client = await service_client.create_lora_training_client_async(base_model=model_name, rank=32) sampling_client = await training_client.save_weights_and_get_sampling_client_async(name="sampler") # Grab an image and ask a question image_data = requests.get("https://thinkingmachines.ai/blog/on-policy-distillation/images/chess.png").content model_input = tinker.ModelInput(chunks=[ tinker.types.EncodedTextChunk(tokens=tokenizer.encode("<|im_start|>user\n<|vision_start|>")), tinker.types.ImageChunk(data=image_data, format="png"), tinker.types.EncodedTextChunk(tokens=tokenizer.encode("<|vision_end|>What is this?<|im_end|>\n<|im_start|>assistant\n")), ]) result = await sampling_client.sample_async(prompt=model_input, num_samples=1, sampling_params=tinker.types.SamplingParams(max_tokens=100)) print(tokenizer.decode(result.sequences[0].tokens))