Streaming chatbot
Changes to Anyscale Endpoints API
Effective August 1, 2024 Anyscale Endpoints API will be available exclusively through the fully Hosted Anyscale Platform. Multi-tenant access to LLM models will be removed.
With the Hosted Anyscale Platform, you can access the latest GPUs billed by the second, and deploy models on your own dedicated instances. Enjoy full customization to build your end-to-end applications with Anyscale. Get started today.
Add your Anyscale Endpoints token and run the following code to create a local chatbot.
import sys
import openai
ANYSCALE_ENDPOINT_TOKEN = "YOUR_ANYSCALE_TOKEN"
class OpenAIChatAgent:
def __init__(self, model: str):
#This simple example, doesn't modify the past conversation.
#Eventually you run out of context window, but this should be enough for a 30-step conversation.
#You need to either trim the message history or summarize it for longer conversations.
self.message_history = []
self.model = model
self.oai_client = openai.OpenAI(
base_url = "https://api.endpoints.anyscale.com/v1",
api_key=ANYSCALE_ENDPOINT_TOKEN
)
def greet(self):
return None
def process_input(self, input: str):
self.update_message_history(input)
response = self.oai_client.chat.completions.create(
model = self.model,
messages = self.message_history,
stream = True
)
words = ''
for tok in response:
delta = tok.choices[0].delta
if not delta: # End token
self.message_history.append({
'role': 'assistant',
'content': words
})
break
elif delta.content:
words += delta.content
yield delta.content
else:
continue
def update_message_history(self, inp):
self.message_history.append({
'role': 'user',
'content': inp
})
agent = OpenAIChatAgent("meta-llama/Llama-2-70b-chat-hf")
sys.stdout.write("Let's have a chat. (Enter `quit` to exit)\n")
while True:
sys.stdout.write('> ')
inp = input()
if inp == 'quit':
break
for word in agent.process_input(inp):
sys.stdout.write(word)
sys.stdout.flush()
sys.stdout.write('\n')