Using OpenAI API
Compressa has an integrated API layer compatible with OpenAI's Chat Completion API. This means developers can use the existing OpenAI client library (including the Langchain library) and adapt current code with minimal changes to work with Compressa.
OpenAI API Without Streaming
- Python (клиент OpenAI)
- Python (OpenAI Langchain)
from openai import OpenAI
client = OpenAI(
base_url="http://your_address:8080/v1",
api_key="Your_user_API_key"
)
response = client.chat.completions.create(
model="Compressa-LLM",
messages=[
{"role": "system", "content": "You can write funny jokes."},
{"role": "user", "content": "Write a short and funny joke about a programmer."}
],
stream=False
)
print(response.choices[0].message.content)
# pip install langchain langchain-openai openai
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage
llm = ChatOpenAI(
base_url="http://your_address:8080/v1",
api_key="Your_user_API_key",
model="Compressa-LLM"
)
messages = [
SystemMessage(content="You can write funny jokes."),
HumanMessage(content="Write a short and funny joke about a programmer."),
]
ai_msg = llm.invoke(messages)
print(f"Model response: {ai_msg.content}")
You can also enable token streaming option available in the OpenAI client
#pip install openai - if you don't have this package yet
from openai import OpenAI
client = OpenAI(
base_url="http://your_address:8080/v1",
api_key="Your_user_API_key"
)
completion = client.chat.completions.create(
model="Compressa-LLM",
messages=[
{"role": "system", "content": "You can write funny jokes."},
{"role": "user", "content": "Write a short and funny joke about a programmer."}
],
stream=True
)
for chunk in completion:
print(chunk.choices[0].delta)
#ChoiceDelta(content='A', function_call=None, refusal=None, role='assistant', tool_calls=None)
#ChoiceDelta(content=' p', function_call=None, refusal=None, role='assistant', tool_calls=None)
#ChoiceDelta(content='r', function_call=None, refusal=None, role='assistant', tool_calls=None)
#ChoiceDelta(content='o', function_call=None, refusal=None, role='assistant', tool_calls=None)
#...