ngruver · zaizou · Feb 20, 2024 · Feb 20, 2024 · Feb 20, 2024 · Feb 20, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+logs/*
+*.pyc
+.vscode/*
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,12 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python Debugger: Current File",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${file}",
+      "console": "integratedTerminal",
+    },
+  ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+    "python.analysis.autoImportCompletions": true,
+    "python.analysis.typeCheckingMode": "basic"
+}
diff --git a/README.md b/README.md
@@ -19,15 +19,32 @@ After installation, activate the environment with
 conda activate llmtime
 ```
 If you prefer not using conda, you can also install the dependencies listed in `install.sh` manually. 
+Please note that  any API key will be required only if you use its model.
 
 Add your openai api key to `~/.bashrc` with
 ```
 echo "export OPENAI_API_KEY=<your key>" >> ~/.bashrc
 ```
+Windows Powershell command
+```
+$env:OPENAI_API_KEY='YOUR_API_KEY'
+```
 Add your mistral api key to `~/.bashrc` with
 ```
 echo "export MISTRAL_KEY=<your key>" >> ~/.bashrc
 ```
+Windows Powershell command
+```
+$env:MISTRAL_KEY='YOUR_API_KEY'
+```
+Add your Google Gemini Pro api key to `~/.bashrc` with
+```
+echo "export GEMINI_PRO_KEY=<your key>" >> ~/.bashrc
+```
+Windows Powershell command
+```
+$env:GEMINI_PRO_KEY='YOUR_API_KEY'
+```
 
 Finally, if you have a diffferent OpenAI API base, change it in your `~/.bashrc` with
 ```

diff --git a/data/SPY_max_daily.csv b/data/SPY_max_daily.csv
diff --git a/data/__init__.py b/data/__init__.py
@@ -0,0 +1 @@
+
diff --git a/demo.ipynb b/demo.ipynb
diff --git a/demo.py b/demo.py
@@ -4,9 +4,9 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
+import plotly.graph_objects as go
 import openai
-openai.api_key = os.environ['OPENAI_API_KEY']
-openai.api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
+from time import perf_counter
 from data.serialize import SerializerSettings
 from models.utils import grid_iter
 from models.promptcast import get_promptcast_predictions_data
@@ -15,35 +15,28 @@
 from data.small_context import get_datasets
 from models.validation_likelihood_tuning import get_autotuned_predictions_data
 
-def plot_preds(train, test, pred_dict, model_name, show_samples=False):
+
+def plot_prds_ploty(title,train, test, pred_dict, model_name, show_samples=False):
     pred = pred_dict['median']
     pred = pd.Series(pred, index=test.index)
-    plt.figure(figsize=(8, 6), dpi=100)
-    plt.plot(train)
-    plt.plot(test, label='Truth', color='black')
-    plt.plot(pred, label=model_name, color='purple')
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=train.index, y=train, mode='lines', name='Train'))
+    fig.add_trace(go.Scatter(x=test.index, y=test, mode='lines', name='Truth'))
+    fig.add_trace(go.Scatter(x=pred.index, y=pred, mode='lines', name=model_name))
     # shade 90% confidence interval
     samples = pred_dict['samples']
-    lower = np.quantile(samples, 0.05, axis=0)
-    upper = np.quantile(samples, 0.95, axis=0)
-    plt.fill_between(pred.index, lower, upper, alpha=0.3, color='purple')
     if show_samples:
         samples = pred_dict['samples']
         # convert df to numpy array
         samples = samples.values if isinstance(samples, pd.DataFrame) else samples
         for i in range(min(10, samples.shape[0])):
-            plt.plot(pred.index, samples[i], color='purple', alpha=0.3, linewidth=1)
-    plt.legend(loc='upper left')
+            fig.add_trace(go.Scatter(x=pred.index, y=samples[i], mode='lines', line_color='rgba(0,0,0,0.3)'))
+    fig.update_layout(title=model_name, xaxis_title='Date', yaxis_title=title, showlegend=True)
     if 'NLL/D' in pred_dict:
         nll = pred_dict['NLL/D']
         if nll is not None:
-            plt.text(0.03, 0.85, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.5))
-    plt.show()
-
-
-
-print(torch.cuda.max_memory_allocated())
-print()
+            fig.update_layout(title= f'NLL/D:  {nll:.2f}')
+    fig.show()
 
 gpt4_hypers = dict(
     alpha=0.3,
@@ -54,10 +47,6 @@ def plot_preds(train, test, pred_dict, model_name, show_samples=False):
 )
 
 mistral_api_hypers = dict(
-    alpha=0.3,
-    basic=True,
-    temp=1.0,
-    top_p=0.8,
     settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
 )
 
@@ -78,6 +67,11 @@ def plot_preds(train, test, pred_dict, model_name, show_samples=False):
     settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
 )
 
+gemini_pro_hypers = dict(
+    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
+)
+
+
 
 promptcast_hypers = dict(
     temp=0.7,
@@ -100,35 +94,64 @@ def plot_preds(train, test, pred_dict, model_name, show_samples=False):
      'LLMA2': {'model': 'llama-7b', **llma2_hypers},
      'mistral': {'model': 'mistral', **llma2_hypers},
      'mistral-api-tiny': {'model': 'mistral-api-tiny', **mistral_api_hypers},
-     'mistral-api-small': {'model': 'mistral-api-tiny', **mistral_api_hypers},
-     'mistral-api-medium': {'model': 'mistral-api-tiny', **mistral_api_hypers},
+     'mistral-api-small': {'model': 'mistral-api-small', **mistral_api_hypers},
+     'mistral-api-medium': {'model': 'mistral-api-medium', **mistral_api_hypers},
+     'mistral-api-stocks-tiny': {'model': 'mistral-api-tiny', **mistral_api_hypers},
+     'mistral-api-stocks-small': {'model': 'mistral-api-small', **mistral_api_hypers},
+     'mistral-api-stocks-medium': {'model': 'mistral-api-stocks-medium', **mistral_api_hypers},
+     'gemini-pro': {'model': 'gemini-pro', **gemini_pro_hypers},
      'ARIMA': arima_hypers,
-
+     'fingpt': {'model': 'fingpt', **llma2_hypers},
  }
 
 
+#uncomment to use a model
 model_predict_fns = {
     #'LLMA2': get_llmtime_predictions_data,
     #'mistral': get_llmtime_predictions_data,
     #'LLMTime GPT-4': get_llmtime_predictions_data,
-    'mistral-api-tiny': get_llmtime_predictions_data
+    #'mistral-api-tiny': get_llmtime_predictions_data,
+    #'mistral-api-stocks-medium': get_llmtime_predictions_data,
+    #'gemini-pro': get_llmtime_predictions_data,
+    'fingpt': get_llmtime_predictions_data
 }
 
 
+
 model_names = list(model_predict_fns.keys())
 
-datasets = get_datasets()
-ds_name = 'AirPassengersDataset'
 
 
+ds_name = 'SPY Index Daily'
+df = pd.read_csv('data/SPY_max_daily.csv')
+
+
+dfTrain = df.iloc[0:int(len(df)*0.8*0.5)]
+dfTrain = dfTrain.set_index('Date')
+train = dfTrain.iloc[:,1]
+dfTest = df[int(len(df)*0.8*0.5):int(len(df)*0.5)]
+dfTest = dfTest.set_index('Date')
+test = dfTest.iloc[:,1]
+
+"""""
+datasets = get_datasets()
+ds_name = 'AirPassengersDataset'
 data = datasets[ds_name]
 train, test = data # or change to your own data
-out = {}
+"""""
+
+
 
-for model in model_names: # GPT-4 takes a about a minute to run
-    model_hypers[model].update({'dataset_name': ds_name}) # for promptcast
+out = {}
+start_time = perf_counter()
+for model in model_names:
+    model_hypers[model].update({'dataset_name': ds_name})
     hypers = list(grid_iter(model_hypers[model]))
     num_samples = 10
-    pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)
+    pred_dict = get_llmtime_predictions_data(train, test, model, model_hypers[model]['settings'],num_samples)
     out[model] = pred_dict
-    plot_preds(train, test, pred_dict, model, show_samples=True)
+    plot_prds_ploty(ds_name,train, test, pred_dict, model, show_samples=True)
+passed_time = perf_counter() - start_time
+print(f"Execution time  {passed_time}")
+
+
diff --git a/finbert_utils.py b/finbert_utils.py
@@ -0,0 +1,28 @@
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+from typing import Tuple 
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
+model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert").to(device)
+labels = ["positive", "negative", "neutral"]
+
+def estimate_sentiment(news):
+    if news:
+        tokens = tokenizer(news, return_tensors="pt", padding=True).to(device)
+
+        result = model(tokens["input_ids"], attention_mask=tokens["attention_mask"])[
+            "logits"
+        ]
+        result = torch.nn.functional.softmax(torch.sum(result, 0), dim=-1)
+        probability = result[torch.argmax(result)]
+        sentiment = labels[torch.argmax(result)]
+        return probability, sentiment
+    else:
+        return 0, labels[-1]
+
+
+if __name__ == "__main__":
+    tensor, sentiment = estimate_sentiment(['markets responded negatively to the news!','traders were displeased!'])
+    print(tensor, sentiment)
+    print(torch.cuda.is_available())
diff --git a/install.sh b/install.sh
@@ -16,5 +16,7 @@ pip install multiprocess
 pip install SentencePiece
 pip install accelerate
 pip install gdown
-pip install mistralai #for mistral models
+pip install mistralai
+pip install plotly
+pip google-generativeai
 conda deactivate