1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
from typing import Tuple
import backoff
import dotenv
from openai import AsyncOpenAI, RateLimitError
client: AsyncOpenAI | None = None
def get_openai_client() -> AsyncOpenAI:
global client
if client is None:
dotenv.load_dotenv()
client = AsyncOpenAI()
return client
seed = 42
@backoff.on_exception(backoff.expo, RateLimitError)
async def create_completion_openai(
messages: list[Tuple[str, str]],
model: str = "gpt-4o-mini",
temperature=0,
max_completion_tokens=2048,
top_p=0,
frequency_penalty=0,
presence_penalty=0,
store=False,
logprobs=False,
):
response = await get_openai_client().chat.completions.create(
model=model,
messages=[
{
"role": role,
"content": prompt
} for role, prompt in messages
],
response_format={"type": "text"},
temperature=temperature,
max_completion_tokens=max_completion_tokens,
top_p=top_p,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty,
store=store,
logprobs=logprobs,
top_logprobs=5 if logprobs else None,
seed=seed,
)
if logprobs:
return response.choices[0].message.content, response.choices[0].logprobs
else:
return response.choices[0].message.content
|