|
| 1 | +"""Upload a batch of inferences to the Openlayer platform.""" |
| 2 | + |
| 3 | +import os |
| 4 | +import tarfile |
| 5 | +import tempfile |
| 6 | +import time |
| 7 | +from typing import Optional |
| 8 | +import httpx |
| 9 | + |
| 10 | +import pandas as pd |
| 11 | + |
| 12 | +from ... import Openlayer |
| 13 | +from ..._utils import maybe_transform |
| 14 | +from ...types.inference_pipelines import data_stream_params |
| 15 | +from .. import utils |
| 16 | +from . import StorageType, _upload |
| 17 | + |
| 18 | + |
| 19 | +def upload_batch_inferences( |
| 20 | + client: Openlayer, |
| 21 | + inference_pipeline_id: str, |
| 22 | + dataset_df: pd.DataFrame, |
| 23 | + config: data_stream_params.Config, |
| 24 | + storage_type: Optional[StorageType] = None, |
| 25 | +) -> None: |
| 26 | + """Uploads a batch of inferences to the Openlayer platform.""" |
| 27 | + uploader = _upload.Uploader(client, storage_type) |
| 28 | + object_name = f"batch_data_{time.time()}_{inference_pipeline_id}.tar.gz" |
| 29 | + |
| 30 | + # Fetch presigned url |
| 31 | + presigned_url_response = client.storage.presigned_url.create( |
| 32 | + object_name=object_name, |
| 33 | + ) |
| 34 | + |
| 35 | + # Write dataset and config to temp directory |
| 36 | + with tempfile.TemporaryDirectory() as tmp_dir: |
| 37 | + temp_file_path = f"{tmp_dir}/dataset.csv" |
| 38 | + dataset_df.to_csv(temp_file_path, index=False) |
| 39 | + |
| 40 | + # Copy relevant files to tmp dir |
| 41 | + config["label"] = "production" |
| 42 | + utils.write_yaml( |
| 43 | + maybe_transform(config, data_stream_params.Config), |
| 44 | + f"{tmp_dir}/dataset_config.yaml", |
| 45 | + ) |
| 46 | + |
| 47 | + tar_file_path = os.path.join(tmp_dir, object_name) |
| 48 | + with tarfile.open(tar_file_path, mode="w:gz") as tar: |
| 49 | + tar.add(tmp_dir, arcname=os.path.basename("monitoring_data")) |
| 50 | + |
| 51 | + # Upload to storage |
| 52 | + uploader.upload( |
| 53 | + file_path=tar_file_path, |
| 54 | + object_name=object_name, |
| 55 | + presigned_url_response=presigned_url_response, |
| 56 | + ) |
| 57 | + |
| 58 | + # Notify the backend |
| 59 | + client.post( |
| 60 | + f"/inference-pipelines/{inference_pipeline_id}/data", |
| 61 | + cast_to=httpx.Response, |
| 62 | + body={ |
| 63 | + "storageUri": presigned_url_response.storage_uri, |
| 64 | + "performDataMerge": False, |
| 65 | + }, |
| 66 | + ) |
0 commit comments