Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions automatic_prompt_engineer/ape.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def simple_ape(dataset,
eval_template='Instruction: [PROMPT]\nInput: [INPUT]\nOutput: [OUTPUT]',
prompt_gen_template=None,
demos_template='Input: [INPUT]\nOutput: [OUTPUT]',
eval_model='text-davinci-002',
prompt_gen_model='text-davinci-002',
eval_model='gpt-3.5-turbo',
prompt_gen_model='gpt-3.5-turbo',
prompt_gen_mode='forward',
num_prompts=50,
eval_rounds=20,
Expand Down Expand Up @@ -60,7 +60,7 @@ def simple_eval(dataset,
prompts,
eval_template='Instruction: [PROMPT]\nInput: [INPUT]\nOutput: [OUTPUT]',
demos_template='Input: [INPUT]\nOutput: [OUTPUT]',
eval_model='text-davinci-002',
eval_model='gpt-3.5-turbo',
num_samples=50):
"""
Function that wraps the evaluate_prompts function to make it easier to use.
Expand All @@ -87,8 +87,8 @@ def simple_estimate_cost(dataset,
eval_template='Instruction: [PROMPT]\nInput: [INPUT]\nOutput: [OUTPUT]',
prompt_gen_template=None,
demos_template='Input: [INPUT]\nOutput: [OUTPUT]',
eval_model='text-davinci-002',
prompt_gen_model='text-davinci-002',
eval_model='gpt-3.5-turbo',
prompt_gen_model='gpt-3.5-turbo',
prompt_gen_mode='forward',
num_prompts=50,
eval_rounds=20,
Expand Down
6 changes: 3 additions & 3 deletions automatic_prompt_engineer/configs/bandits.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ generation:
name: GPT_forward # the name of the model used for prompt generation
batch_size: 500 # the maximum batch size used for prompt generation
gpt_config: # the configuration of the GPT model used for prompt generation (these are fed directly to the openai function)
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.9
max_tokens: 50
top_p: 0.9
Expand All @@ -27,7 +27,7 @@ evaluation:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.7
max_tokens: 200
top_p: 1.0
Expand All @@ -38,7 +38,7 @@ demo:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.7
max_tokens: 200
top_p: 1.0
Expand Down
6 changes: 3 additions & 3 deletions automatic_prompt_engineer/configs/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ generation:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.9
max_tokens: 50
top_p: 0.9
Expand All @@ -20,7 +20,7 @@ evaluation:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.7
max_tokens: 200
top_p: 1.0
Expand All @@ -31,7 +31,7 @@ demo:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.7
max_tokens: 200
top_p: 1.0
Expand Down
5 changes: 3 additions & 2 deletions automatic_prompt_engineer/evaluation/likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,10 @@ def __init__(self, prompts, log_probs, num_samples):
def _compute_avg_likelihood(self, prompts, log_probs, num_samples):
i = 0
prompt_log_probs = []
for prompt in prompts:
# TODO: Prompts and NumSamples should be the loop strengths
for log_prob in log_probs:
prompt_log_probs.append([])
for _ in range(num_samples):
for _ in range(len(log_probs)):
lps = log_probs[i]
prompt_log_probs[-1].append(sum(lps) / len(lps))
i += 1
Expand Down
100 changes: 63 additions & 37 deletions automatic_prompt_engineer/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
import time
from tqdm import tqdm
from abc import ABC, abstractmethod

import openai
from openai import OpenAI

SYSTEM_PROMPT = 'You are a kick ass prompt engineer, you are given with input variables and the output generated by an LLM. Find the right prompt for this batch of input and outputs'

from automatic_prompt_engineer import utils

gpt_costs_per_thousand = {
'davinci': 0.0200,
Expand All @@ -18,10 +22,11 @@
def model_from_config(config, disable_tqdm=True):
"""Returns a model based on the config."""
model_type = config["name"]
client = OpenAI()
if model_type == "GPT_forward":
return GPT_Forward(config, disable_tqdm=disable_tqdm)
return GPT_Forward(config, client, disable_tqdm=disable_tqdm)
elif model_type == "GPT_insert":
return GPT_Insert(config, disable_tqdm=disable_tqdm)
return GPT_Insert(config, client, disable_tqdm=disable_tqdm)
raise ValueError(f"Unknown model type: {model_type}")


Expand Down Expand Up @@ -54,11 +59,12 @@ def log_probs(self, text, log_prob_range):
class GPT_Forward(LLM):
"""Wrapper for GPT-3."""

def __init__(self, config, needs_confirmation=False, disable_tqdm=True):
def __init__(self, config, client, needs_confirmation=False, disable_tqdm=True):
"""Initializes the model."""
self.config = config
self.needs_confirmation = needs_confirmation
self.disable_tqdm = disable_tqdm
self.client = client

def confirm_cost(self, texts, n, max_tokens):
total_estimated_cost = 0
Expand Down Expand Up @@ -155,18 +161,21 @@ def __generate_text(self, prompt, n):
for i in range(len(prompt)):
prompt[i] = prompt[i].replace('[APE]', '').strip()
response = None

messages = utils.get_messages(prompt)

while response is None:
try:
response = openai.Completion.create(
**config, prompt=prompt)
response = self.client.chat.completions.create(
**config, messages=messages)
except Exception as e:
if 'is greater than the maximum' in str(e):
raise BatchSizeException()
print(e)
print('Retrying...')
time.sleep(5)

return [response['choices'][i]['text'] for i in range(len(response['choices']))]
return [response.choices[i].message.content for i in range(len(response.choices))]

def __complete(self, prompt, n):
"""Generates text from the model and returns the log prob data."""
Expand All @@ -178,10 +187,13 @@ def __complete(self, prompt, n):
for i in range(len(prompt)):
prompt[i] = prompt[i].replace('[APE]', '').strip()
response = None

messages = utils.get_messages(prompt)

while response is None:
try:
response = openai.Completion.create(
**config, prompt=prompt)
response = self.client.chat.completions.create(
**config, messages=messages)
except Exception as e:
print(e)
print('Retrying...')
Expand All @@ -199,42 +211,51 @@ def __log_probs(self, text, log_prob_range=None):
assert lower_index >= 0
assert upper_index - 1 < len(text[i])
config = self.config['gpt_config'].copy()
config['logprobs'] = 1
config['echo'] = True
config['max_tokens'] = 0
if isinstance(text, list):
text = [f'\n{text[i]}' for i in range(len(text))]
else:
text = f'\n{text}'
config['logprobs'] = True
config['top_logprobs'] = 1
# config['echo'] = True
config['max_tokens'] = 50
# if isinstance(text, list):
# text = [f'\n{text[i]}' for i in range(len(text))]
# else:
# text = f'\n{text}'
response = None
messages = utils.get_messages(text)

while response is None:
try:
response = openai.Completion.create(
**config, prompt=text)
response = self.client.chat.completions.create(
**config, messages=messages)
except Exception as e:
print(e)
print('Retrying...')
time.sleep(5)
log_probs = [response['choices'][i]['logprobs']['token_logprobs'][1:]
for i in range(len(response['choices']))]
tokens = [response['choices'][i]['logprobs']['tokens'][1:]
for i in range(len(response['choices']))]
offsets = [response['choices'][i]['logprobs']['text_offset'][1:]
for i in range(len(response['choices']))]

# Subtract 1 from the offsets to account for the newline
for i in range(len(offsets)):
offsets[i] = [offset - 1 for offset in offsets[i]]

if log_prob_range is not None:
# First, we need to find the indices of the tokens in the log probs
# that correspond to the tokens in the log_prob_range
for i in range(len(log_probs)):
lower_index, upper_index = self.get_token_indices(
offsets[i], log_prob_range[i])
log_probs = []
tokens = []
idx = 0
jdx = 0

try:
for i in range(len(response.choices)):
idx = i
this_log_probs = []
this_tokens = []
choice = response.choices[i]
for j in range(len(choice.logprobs.content)):
jdx = j
this_log_probs.append(choice.logprobs.content[j].logprob)
this_tokens.append(choice.logprobs.content[j].token)

log_probs[i] = log_probs[i][lower_index:upper_index]
tokens.append(this_tokens)

except AttributeError:
tokens[i] = tokens[i][lower_index:upper_index]

finally:
print(f"logprobs {response.choices[idx].logprobs.content[jdx]}")

return log_probs, tokens

def get_token_indices(self, offsets, log_prob_range):
Expand All @@ -258,11 +279,12 @@ def get_token_indices(self, offsets, log_prob_range):

class GPT_Insert(LLM):

def __init__(self, config, needs_confirmation=False, disable_tqdm=True):
def __init__(self, config, client, needs_confirmation=False, disable_tqdm=True):
"""Initializes the model."""
self.config = config
self.needs_confirmation = needs_confirmation
self.disable_tqdm = disable_tqdm
self.client = client

def confirm_cost(self, texts, n, max_tokens):
total_estimated_cost = 0
Expand Down Expand Up @@ -314,10 +336,14 @@ def __generate_text(self, prompt, n):
prefix = prompt[0].split('[APE]')[0]
suffix = prompt[0].split('[APE]')[1]
response = None

messages = utils.get_messages(prompt)


while response is None:
try:
response = openai.Completion.create(
**config, prompt=prefix, suffix=suffix)
response = self.client.chat.completions.create(
**config, messages=messages, suffix=suffix)
except Exception as e:
print(e)
print('Retrying...')
Expand Down
6 changes: 3 additions & 3 deletions experiments/configs/instruction_induction.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ generation:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.9
max_tokens: 50
top_p: 0.9
Expand All @@ -20,7 +20,7 @@ evaluation:
name: GPT_forward
batch_size: 20
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.7
max_tokens: 200
top_p: 1.0
Expand All @@ -31,7 +31,7 @@ demo:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.7
max_tokens: 200
top_p: 1.0
Expand Down
6 changes: 3 additions & 3 deletions experiments/configs/truthful_qa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ generation:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.9
max_tokens: 50
top_p: 0.9
Expand All @@ -19,7 +19,7 @@ evaluation:
name: GPT_forward
batch_size: 20
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.7
max_tokens: 200
top_p: 1.0
Expand All @@ -30,7 +30,7 @@ demo:
name: GPT_forward
batch_size: 500
gpt_config:
model: text-davinci-002
model: gpt-3.5-turbo
temperature: 0.7
max_tokens: 200
top_p: 1.0
Expand Down