Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
logs/*
*.pyc
.vscode/*
12 changes: 12 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
},
]
}
4 changes: 4 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"python.analysis.autoImportCompletions": true,
"python.analysis.typeCheckingMode": "basic"
}
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,32 @@ After installation, activate the environment with
conda activate llmtime
```
If you prefer not using conda, you can also install the dependencies listed in `install.sh` manually.
Please note that any API key will be required only if you use its model.

Add your openai api key to `~/.bashrc` with
```
echo "export OPENAI_API_KEY=<your key>" >> ~/.bashrc
```
Windows Powershell command
```
$env:OPENAI_API_KEY='YOUR_API_KEY'
```
Add your mistral api key to `~/.bashrc` with
```
echo "export MISTRAL_KEY=<your key>" >> ~/.bashrc
```
Windows Powershell command
```
$env:MISTRAL_KEY='YOUR_API_KEY'
```
Add your Google Gemini Pro api key to `~/.bashrc` with
```
echo "export GEMINI_PRO_KEY=<your key>" >> ~/.bashrc
```
Windows Powershell command
```
$env:GEMINI_PRO_KEY='YOUR_API_KEY'
```

Finally, if you have a diffferent OpenAI API base, change it in your `~/.bashrc` with
```
Expand Down
7,814 changes: 7,814 additions & 0 deletions data/SPY_max_daily.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

135 changes: 113 additions & 22 deletions demo.ipynb

Large diffs are not rendered by default.

91 changes: 57 additions & 34 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import openai
openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
from time import perf_counter
from data.serialize import SerializerSettings
from models.utils import grid_iter
from models.promptcast import get_promptcast_predictions_data
Expand All @@ -15,35 +15,28 @@
from data.small_context import get_datasets
from models.validation_likelihood_tuning import get_autotuned_predictions_data

def plot_preds(train, test, pred_dict, model_name, show_samples=False):

def plot_prds_ploty(title,train, test, pred_dict, model_name, show_samples=False):
pred = pred_dict['median']
pred = pd.Series(pred, index=test.index)
plt.figure(figsize=(8, 6), dpi=100)
plt.plot(train)
plt.plot(test, label='Truth', color='black')
plt.plot(pred, label=model_name, color='purple')
fig = go.Figure()
fig.add_trace(go.Scatter(x=train.index, y=train, mode='lines', name='Train'))
fig.add_trace(go.Scatter(x=test.index, y=test, mode='lines', name='Truth'))
fig.add_trace(go.Scatter(x=pred.index, y=pred, mode='lines', name=model_name))
# shade 90% confidence interval
samples = pred_dict['samples']
lower = np.quantile(samples, 0.05, axis=0)
upper = np.quantile(samples, 0.95, axis=0)
plt.fill_between(pred.index, lower, upper, alpha=0.3, color='purple')
if show_samples:
samples = pred_dict['samples']
# convert df to numpy array
samples = samples.values if isinstance(samples, pd.DataFrame) else samples
for i in range(min(10, samples.shape[0])):
plt.plot(pred.index, samples[i], color='purple', alpha=0.3, linewidth=1)
plt.legend(loc='upper left')
fig.add_trace(go.Scatter(x=pred.index, y=samples[i], mode='lines', line_color='rgba(0,0,0,0.3)'))
fig.update_layout(title=model_name, xaxis_title='Date', yaxis_title=title, showlegend=True)
if 'NLL/D' in pred_dict:
nll = pred_dict['NLL/D']
if nll is not None:
plt.text(0.03, 0.85, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
plt.show()



print(torch.cuda.max_memory_allocated())
print()
fig.update_layout(title= f'NLL/D: {nll:.2f}')
fig.show()

gpt4_hypers = dict(
alpha=0.3,
Expand All @@ -54,10 +47,6 @@ def plot_preds(train, test, pred_dict, model_name, show_samples=False):
)

mistral_api_hypers = dict(
alpha=0.3,
basic=True,
temp=1.0,
top_p=0.8,
settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
)

Expand All @@ -78,6 +67,11 @@ def plot_preds(train, test, pred_dict, model_name, show_samples=False):
settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
)

gemini_pro_hypers = dict(
settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
)



promptcast_hypers = dict(
temp=0.7,
Expand All @@ -100,35 +94,64 @@ def plot_preds(train, test, pred_dict, model_name, show_samples=False):
'LLMA2': {'model': 'llama-7b', **llma2_hypers},
'mistral': {'model': 'mistral', **llma2_hypers},
'mistral-api-tiny': {'model': 'mistral-api-tiny', **mistral_api_hypers},
'mistral-api-small': {'model': 'mistral-api-tiny', **mistral_api_hypers},
'mistral-api-medium': {'model': 'mistral-api-tiny', **mistral_api_hypers},
'mistral-api-small': {'model': 'mistral-api-small', **mistral_api_hypers},
'mistral-api-medium': {'model': 'mistral-api-medium', **mistral_api_hypers},
'mistral-api-stocks-tiny': {'model': 'mistral-api-tiny', **mistral_api_hypers},
'mistral-api-stocks-small': {'model': 'mistral-api-small', **mistral_api_hypers},
'mistral-api-stocks-medium': {'model': 'mistral-api-stocks-medium', **mistral_api_hypers},
'gemini-pro': {'model': 'gemini-pro', **gemini_pro_hypers},
'ARIMA': arima_hypers,

'fingpt': {'model': 'fingpt', **llma2_hypers},
}


#uncomment to use a model
model_predict_fns = {
#'LLMA2': get_llmtime_predictions_data,
#'mistral': get_llmtime_predictions_data,
#'LLMTime GPT-4': get_llmtime_predictions_data,
'mistral-api-tiny': get_llmtime_predictions_data
#'mistral-api-tiny': get_llmtime_predictions_data,
#'mistral-api-stocks-medium': get_llmtime_predictions_data,
#'gemini-pro': get_llmtime_predictions_data,
'fingpt': get_llmtime_predictions_data
}



model_names = list(model_predict_fns.keys())

datasets = get_datasets()
ds_name = 'AirPassengersDataset'


ds_name = 'SPY Index Daily'
df = pd.read_csv('data/SPY_max_daily.csv')


dfTrain = df.iloc[0:int(len(df)*0.8*0.5)]
dfTrain = dfTrain.set_index('Date')
train = dfTrain.iloc[:,1]
dfTest = df[int(len(df)*0.8*0.5):int(len(df)*0.5)]
dfTest = dfTest.set_index('Date')
test = dfTest.iloc[:,1]

"""""
datasets = get_datasets()
ds_name = 'AirPassengersDataset'
data = datasets[ds_name]
train, test = data # or change to your own data
out = {}
"""""



for model in model_names: # GPT-4 takes a about a minute to run
model_hypers[model].update({'dataset_name': ds_name}) # for promptcast
out = {}
start_time = perf_counter()
for model in model_names:
model_hypers[model].update({'dataset_name': ds_name})
hypers = list(grid_iter(model_hypers[model]))
num_samples = 10
pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)
pred_dict = get_llmtime_predictions_data(train, test, model, model_hypers[model]['settings'],num_samples)
out[model] = pred_dict
plot_preds(train, test, pred_dict, model, show_samples=True)
plot_prds_ploty(ds_name,train, test, pred_dict, model, show_samples=True)
passed_time = perf_counter() - start_time
print(f"Execution time {passed_time}")


28 changes: 28 additions & 0 deletions finbert_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from typing import Tuple
device = "cuda:0" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert").to(device)
labels = ["positive", "negative", "neutral"]

def estimate_sentiment(news):
if news:
tokens = tokenizer(news, return_tensors="pt", padding=True).to(device)

result = model(tokens["input_ids"], attention_mask=tokens["attention_mask"])[
"logits"
]
result = torch.nn.functional.softmax(torch.sum(result, 0), dim=-1)
probability = result[torch.argmax(result)]
sentiment = labels[torch.argmax(result)]
return probability, sentiment
else:
return 0, labels[-1]


if __name__ == "__main__":
tensor, sentiment = estimate_sentiment(['markets responded negatively to the news!','traders were displeased!'])
print(tensor, sentiment)
print(torch.cuda.is_available())
4 changes: 3 additions & 1 deletion install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,7 @@ pip install multiprocess
pip install SentencePiece
pip install accelerate
pip install gdown
pip install mistralai #for mistral models
pip install mistralai
pip install plotly
pip google-generativeai
conda deactivate
Loading