Output log probabilities
Check your docs version
These docs are for the new Anyscale design. If you started using Anyscale before April 2024, use Version 1.0.0 of the docs. If you're transitioning to Anyscale Preview, see the guide for how to migrate.
When using logprobs
the LLM outputs the log probabilities of each output token during generation.
There are two relevant parameters for this mode:
logprobs
:default=False
- When set toTrue
the LLM outputs the log probabilities of each output token during generation.top_logprobs
:default=None
- When set to an integer value, the LLM outputs the log probabilities of the toptop_logprobs
most likely tokens at each token position during generation.top_logprobs
must be between 0 and 5.
warning
Anyscale doesn't support meta-llama/Llama-2-70b-chat-hf
and meta-llama/Llama-2-13b-chat-hf
.
Example
- cURL
- Python
- OpenAI Python SDK
curl "$ANYSCALE_BASE_URL/chat/completions" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $ANYSCALE_API_KEY" \
-d '{
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Say 'Test'."}],
"temperature": 0.7,
"logprobs": true,
"top_logprobs": 1
}'
import os
import requests
s = requests.Session()
api_base = os.getenv("ANYSCALE_BASE_URL")
token = os.getenv("ANYSCALE_API_KEY")
url = f"{api_base}/chat/completions"
body = {
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Say 'Test'."}
],
"temperature": 0.7,
"logprobs": True,
"top_logprobs": 1
}
with s.post(url, headers={"Authorization": f"Bearer {token}"}, json=body) as resp:
print(resp.json())
import openai
client = openai.OpenAI(
base_url = "https://api.endpoints.anyscale.com/v1",
api_key = "esecret_YOUR_API_KEY")
# Note: not all arguments are currently supported and will be ignored by the backend.
chat_completion = client.chat.completions.create(
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Say 'Test'."}
],
temperature=0.7,
logprobs=True,
top_logprobs=1
)
print(chat_completion.model_dump())
Example output:
{"id":"mistralai/Mixtral-8x7B-Instruct-v0.1",
"object":"text_completion",
"created":1705450393,
"model":"mistralai/Mixtral-8x7B-Instruct-v0.1",
"choices": [{"message":
{"role":"assistant",
"content":"Test.",
"tool_calls":null,
"tool_call_id":null},
"index":0,
"finish_reason":"stop",
"logprobs":{"content":
[
{"token":"Test",
"logprob":-0.12771208584308624,
"bytes":[84,101,115,116],
"top_logprobs": [
{"logprob":-0.12771208584308624,
"token":"Test",
"bytes":[84,101,115,116]
}
]
},
{"token":".",
"logprob":-0.0008685392094776034,
"bytes":[46],
"top_logprobs": [
{"logprob":-0.0008685392094776034,
"token":".",
"bytes":[46]
}
]
},
{"token":"",
"logprob":0.0,
"bytes":[],
"top_logprobs":[
{"logprob":0.0,
"token":"",
"bytes":[]
}
]
}
]
}
}],
"usage": {"prompt_tokens": 26,
"completion_tokens": 3,
"total_tokens":29
}
}