Skip to main content

Using OpenAI API

Compressa has an integrated API layer compatible with OpenAI's Chat Completion API. This means developers can use the existing OpenAI client library (including the Langchain library) and adapt current code with minimal changes to work with Compressa.

OpenAI API Without Streaming

from openai import OpenAI

client = OpenAI(
base_url="http://your_address:8080/v1",
api_key="Your_user_API_key"
)

response = client.chat.completions.create(
model="Compressa-LLM",
messages=[
{"role": "system", "content": "You can write funny jokes."},
{"role": "user", "content": "Write a short and funny joke about a programmer."}
],
stream=False
)

print(response.choices[0].message.content)

You can also enable token streaming option available in the OpenAI client

#pip install openai - if you don't have this package yet

from openai import OpenAI

client = OpenAI(
base_url="http://your_address:8080/v1",
api_key="Your_user_API_key"
)

completion = client.chat.completions.create(
model="Compressa-LLM",
messages=[
{"role": "system", "content": "You can write funny jokes."},
{"role": "user", "content": "Write a short and funny joke about a programmer."}
],
stream=True
)

for chunk in completion:
print(chunk.choices[0].delta)

#ChoiceDelta(content='A', function_call=None, refusal=None, role='assistant', tool_calls=None)
#ChoiceDelta(content=' p', function_call=None, refusal=None, role='assistant', tool_calls=None)
#ChoiceDelta(content='r', function_call=None, refusal=None, role='assistant', tool_calls=None)
#ChoiceDelta(content='o', function_call=None, refusal=None, role='assistant', tool_calls=None)
#...