diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dfe5ddb --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.10-slim +RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY . . \ No newline at end of file diff --git a/api.py b/api.py new file mode 100644 index 0000000..feb3fca --- /dev/null +++ b/api.py @@ -0,0 +1,144 @@ +# api.py + +import os +import pickle +import json +import time +from functools import lru_cache +import requests # Importante: para hacer peticiones HTTP +import pandas as pd +from fastapi import FastAPI, HTTPException, Response +from pydantic import BaseModel +from typing import Dict, Any, List + +from prometheus_client import Histogram, generate_latest + +# --- Modelos Pydantic (se mantienen igual) --- +class PredictionRequest(BaseModel): + features: Dict[str, float] + +class PredictionResponse(BaseModel): + prediction: Any + model_details: Dict[str, str] + +# --- Métricas de Prometheus (se mantienen igual) --- +INFERENCE_LATENCY = Histogram( + 'inference_latency_seconds', + 'Latencia de las peticiones de inferencia (en segundos)', + ['dataset', 'model_type'] +) + +# --- Aplicación FastAPI (SIN LIFESPAN) --- +app = FastAPI( + title="API de Inferencia (v3 - Desacoplada)", + version="3.0.0", + description="Provee acceso a modelos de ML consumiendo la API de Registro." +) + +# URL del servicio de gestión, leído del entorno +MANAGEMENT_API_URL = os.environ.get("MANAGEMENT_API_URL", "http://localhost:9000") + +# --- NUEVAS Funciones de Ayuda --- +@lru_cache(maxsize=32) +def get_artifact_from_management_api(dataset: str, model_type: str, artifact_name: str): + """ + Llama a la management-api para obtener un artefacto, lo deserializa y lo cachea. + """ + print(f"CACHE MISS: Pidiendo a management-api: {dataset}/{model_type}/{artifact_name}") + try: + url = f"{MANAGEMENT_API_URL}/registry/artifacts/{dataset}/{model_type}/{artifact_name}" + response = requests.get(url, timeout=10) + + response.raise_for_status() # Lanza una excepción para códigos de error (4xx o 5xx) + + serialized_obj_bytes = response.content + + # Deserializar según el tipo + if artifact_name == "metrics": + return json.loads(serialized_obj_bytes.decode('utf-8')) + else: + return pickle.loads(serialized_obj_bytes) + + except requests.exceptions.HTTPError as e: + status_code = e.response.status_code if e.response else 503 + try: + # Intentar parsear el detalle del error que viene de management-api + detail = e.response.json().get("detail", e.response.text) + except json.JSONDecodeError: + detail = e.response.text + raise HTTPException(status_code=status_code, detail=f"Error desde management-api: {detail}") + except requests.exceptions.RequestException as e: + raise HTTPException(status_code=503, detail=f"No se pudo contactar a management-api: {e}") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error inesperado al procesar artefacto: {e}") + +# --- Endpoints de la API (REESCRITOS PARA USAR LA NUEVA LÓGICA) --- + +@app.get("/models", summary="Listar todos los modelos disponibles") +def list_models(): + """Consulta a la management-api para obtener la lista de modelos.""" + try: + url = f"{MANAGEMENT_API_URL}/registry/models" + response = requests.get(url, timeout=10) + response.raise_for_status() + return response.json() + except Exception as e: + raise HTTPException(status_code=503, detail=f"No se pudo conectar a management-api para listar modelos: {e}") + +@app.get("/models/{dataset}/{model_type}/features", summary="Obtener características de un modelo") +def get_model_features(dataset: str, model_type: str): + """Obtiene la lista de características desde la management-api.""" + try: + features = get_artifact_from_management_api(dataset, model_type, "feature_names") + return {"features": features} + except HTTPException as e: + raise e # Re-lanzar la excepción que ya viene formateada + +@app.get("/models/{dataset}/{model_type}/metrics", summary="Obtener métricas de entrenamiento") +def get_model_metrics(dataset: str, model_type: str): + """Obtiene las métricas del modelo desde la management-api.""" + try: + metrics = get_artifact_from_management_api(dataset, model_type, "metrics") + return metrics + except HTTPException as e: + raise e + +@app.post("/predict/{dataset}/{model_type}", response_model=PredictionResponse, summary="Realizar una predicción") +def predict(dataset: str, model_type: str, request: PredictionRequest): + """Realiza una predicción usando un modelo y registra la latencia.""" + start_time = time.time() + required_features = [] # Para el mensaje de error + try: + pipeline = get_artifact_from_management_api(dataset, model_type, "pipeline") + required_features = get_artifact_from_management_api(dataset, model_type, "feature_names") + + input_df = pd.DataFrame([request.features])[required_features] + prediction = pipeline.predict(input_df) + + prediction_value = prediction[0].item() if hasattr(prediction[0], 'item') else prediction[0] + + return PredictionResponse(prediction=prediction_value, model_details={"dataset": dataset, "model_type": model_type}) + + except KeyError as e: + raise HTTPException(status_code=400, detail=f"Falta la característica requerida: {e}. Se requieren: {required_features}") + except HTTPException as e: + raise e + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error durante la predicción: {e}") + finally: + latency = time.time() - start_time + INFERENCE_LATENCY.labels(dataset=dataset, model_type=model_type).observe(latency) + print(f"PREDICT LATENCY: {dataset}/{model_type} -> {latency:.4f}s") + + + +@app.get("/health", summary="Comprobación de Salud", status_code=200) +def health_check(): + """ + Endpoint simple para que Docker Healthcheck pueda verificar que el servicio está vivo. + """ + return {"status": "ok"} + +@app.get("/metrics", summary="Exponer métricas para Prometheus") +def get_prometheus_metrics(): + return Response(content=generate_latest(), media_type="text/plain") \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6172849 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,78 @@ +# docker-compose.yml (VERSIÓN FINAL - RESILIENTE) + +services: + # Los servicios de Ray no cambian. ray-head sigue siendo el SPOF principal por diseño. + ray-head: + build: . + # Ya no usamos container_name para facilitar el escalado, aunque en head no es estrictamente necesario. + ports: + - "8265:8265" + - "10001:10001" + command: | + bash -c " + ray start --head --num-cpus=1 --port=6379 --dashboard-host=0.0.0.0 --include-dashboard=true --ray-client-server-port=10001 --resources='{\"is_head_node\": 1}' && \ + echo 'Nodo HEAD en marcha...' && \ + tail -f /dev/null + " + shm_size: 2.5gb + + ray-worker: # Unificamos los workers para facilitar el escalado + build: . + depends_on: + - ray-head + deploy: + replicas: 2 # Podemos escalar los workers de Ray fácilmente + command: | + bash -c " + ray start --address=ray-head:6379 --num-cpus=2 && \ + echo 'Worker conectado al HEAD' && \ + tail -f /dev/null + " + shm_size: 2.5gb + + # SOLUCIÓN #1: Replicación del servicio crítico + management-api: + build: . + # Ya no se usa container_name para permitir la replicación. + # Ya no se exponen puertos, la comunicación es interna a través de la GUI. + deploy: + replicas: 3 # Creamos un grupo de 3 réplicas para alta disponibilidad. + environment: + - RAY_ADDRESS=ray://ray-head:10001 + - SERVICE_NAME=management-api # Para que el cliente resiliente sepa a quién buscar + command: uvicorn management_api:app --host 0.0.0.0 --port 9000 + # Quitamos el healthcheck porque la GUI ahora es responsable de manejar los fallos. + + # Hacemos lo mismo para el servicio de inferencia para consistencia + api-service: + build: . + deploy: + replicas: 3 # También replicamos el servicio de inferencia. + # ports: + # Exponemos este puerto para que sea más fácil de probar externamente (ej. con curl) + # Pero idealmente, la GUI también usaría un cliente resiliente para este. + # - "8000:8000" + environment: + # El api-service también se beneficia del balanceo de carga interno de Docker + # al hablar con las réplicas de management-api. + - MANAGEMENT_API_URL=http://management-api:9000 + command: uvicorn api:app --host 0.0.0.0 --port 8000 + + # SOLUCIÓN #2: La GUI ahora es el cliente inteligente + gui-service: + build: . + container_name: gui-service # La GUI es única, así que puede tener nombre. + depends_on: + - management-api + - api-service + ports: + - "8501:8501" + environment: + # Pasamos la información necesaria para que el ResilientClient haga su trabajo. + - MANAGEMENT_API_SERVICE_NAME=management-api + - MANAGEMENT_API_PORT=9000 + # La URL de inferencia puede apuntar al nombre del servicio; Docker hará un balanceo de carga simple (round-robin). + - INFERENCE_API_URL=http://api-service:8000 + - RAY_DASHBOARD_URL=http://ray-head:8265 + - RAY_ADDRESS=ray://ray-head:10001 + command: streamlit run gui.py --server.port=8501 --server.address=0.0.0.0 \ No newline at end of file diff --git a/gui.py b/gui.py new file mode 100644 index 0000000..c0d3cdc --- /dev/null +++ b/gui.py @@ -0,0 +1,406 @@ +# gui.py (Versión 6.1 - Funcionalidad Completa Restaurada) + +import streamlit as st +import requests +import os +import pandas as pd +import json +import time +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +import ray +import socket +from itertools import cycle +from monitoring_utils import get_cluster_nodes_status, get_actor_status, get_aggregated_inference_stats +# ============================================================================== +# --- CLASE DE CLIENTE RESILIENTE (SIN CAMBIOS) --- +# ============================================================================== + +class ResilientClient: + """ + Un cliente HTTP que realiza descubrimiento de servicios, balanceo de carga + round-robin y reintentos del lado del cliente. + """ + def __init__(self, service_name, port): + self.service_name = service_name + self.port = port + self.servers = [] + self.server_cycler = cycle([]) + self.discover() + + def discover(self): + """Descubre o redescubre los servidores. Puede ser llamado para refrescar.""" + try: + ips = [info[4][0] for info in socket.getaddrinfo(self.service_name, self.port, socket.AF_INET)] + unique_ips = sorted(list(set(ips))) + self.servers = [f"http://{ip}:{self.port}" for ip in unique_ips] + if self.servers: + print(f"Servidores descubiertos para '{self.service_name}': {self.servers}") + self.server_cycler = cycle(self.servers) + else: + st.error(f"CRÍTICO: No se pudo descubrir ninguna réplica para el servicio '{self.service_name}'.") + self.server_cycler = cycle([]) + except socket.gaierror: + st.error(f"CRÍTICO: El nombre de servicio '{self.service_name}' no se pudo resolver. ¿Está corriendo?") + self.servers = [] + self.server_cycler = cycle([]) + + def make_request(self, method, path, **kwargs): + """ + Realiza una petición HTTP, probando cada servidor en orden hasta que + uno responda o todos fallen. + """ + if not self.servers: + raise Exception(f"No hay servidores disponibles para el servicio '{self.service_name}'.") + for _ in range(len(self.servers)): + server_url = next(self.server_cycler) + full_url = f"{server_url}{path}" + try: + print(f"Cliente Resiliente: Intentando petición a: {full_url}") + response = requests.request(method, full_url, **kwargs) + response.raise_for_status() + print(f"Cliente Resiliente: Petición exitosa a: {full_url}") + return response + except requests.exceptions.RequestException as e: + print(f"Cliente Resiliente: Fallo al contactar a {server_url}: {e}. Probando siguiente réplica...") + st.toast(f"Réplica {server_url} no responde, reintentando...", icon="⚠️") + st.error(f"El servicio '{self.service_name}' no está disponible. Todas las réplicas han fallado.") + self.discover() + raise Exception(f"El servicio '{self.service_name}' no está disponible. Todas las réplicas fallaron.") + +# ============================================================================== +# --- INICIALIZACIÓN Y CONFIGURACIÓN --- +# ============================================================================== + +# --- Intenta importar las utilidades de monitoreo --- +try: + ### CORREGIDO ### - Asegúrate de que esta función también sea resiliente si depende de una API + from monitoring_utils import get_cluster_nodes_status, get_actor_status, get_inference_stats + MONITORING_ENABLED = True +except ImportError: + MONITORING_ENABLED = False + print("ADVERTENCIA: monitoring_utils.py no encontrado. La pestaña de monitoreo estará desactivada.") + +# --- Configuración de la Página y Clientes Resilientes --- +st.set_page_config(layout="wide", page_title="Plataforma Distribuida de ML") + +# Cliente para el servicio de Gestión +MANAGEMENT_API_SERVICE_NAME = os.environ.get("MANAGEMENT_API_SERVICE_NAME", "management-api") +MANAGEMENT_API_PORT = int(os.environ.get("MANAGEMENT_API_PORT", 9000)) +management_client = ResilientClient(MANAGEMENT_API_SERVICE_NAME, MANAGEMENT_API_PORT) + +# Cliente para el servicio de Inferencia +INFERENCE_API_SERVICE_NAME = os.environ.get("INFERENCE_API_SERVICE_NAME", "api-service") +INFERENCE_API_PORT = int(os.environ.get("INFERENCE_API_PORT", 8000)) +inference_client = ResilientClient(INFERENCE_API_SERVICE_NAME, INFERENCE_API_PORT) + +RAY_DASHBOARD_URL = os.environ.get("RAY_DASHBOARD_URL", f"http://localhost:8265") + +# --- Conexión a Ray (solo para la pestaña de monitoreo) --- +if MONITORING_ENABLED and 'ray_initialized' not in st.session_state: + try: + ray.init(address="auto", namespace="mi_plataforma", ignore_reinit_error=True) + st.session_state['ray_initialized'] = True + except Exception as e: + st.session_state['ray_initialized'] = False + st.session_state['ray_init_error'] = e + +# --- Inicialización del Estado de la Sesión --- +if 'analyzed_files' not in st.session_state: + st.session_state['analyzed_files'] = {} +if 'configs' not in st.session_state: + st.session_state['configs'] = {} + +# ============================================================================== +# --- FUNCIONES DE AYUDA (USANDO CLIENTES RESILIENTES) --- +# ============================================================================== + +@st.cache_data(ttl=10) +def get_available_models(): + try: + response = inference_client.make_request("GET", "/models", timeout=5) + return response.json() + except Exception: return {} + +@st.cache_data(ttl=30) +def get_model_metrics(dataset, model_type): + try: + response = inference_client.make_request("GET", f"/models/{dataset}/{model_type}/metrics", timeout=10) + return response.json() + except Exception: return {} + +@st.cache_data(ttl=60) +def fetch_features_for_model(dataset, model): + try: + response = inference_client.make_request("GET", f"/models/{dataset}/{model}/features", timeout=10) + return response.json().get("features", []) + except Exception: return None + +# ============================================================================== +# --- DISEÑO DE LA INTERFAZ --- +# ============================================================================== + +st.title("📊 Plataforma de Aprendizaje Supervisado Distribuido") +st.markdown("---") + +tab_definitions = ["🗂️ Gestión y Entrenamiento", "🤖 Realizar Predicción", "📉 Métricas y Comparativa"] +if MONITORING_ENABLED: + tab_definitions.append("📡 Dashboard de Monitoreo") + +created_tabs = st.tabs(tab_definitions) +tab_gestion = created_tabs[0] +tab_prediccion = created_tabs[1] +tab_metricas = created_tabs[2] +tab_monitoreo = created_tabs[3] if len(created_tabs) > 3 else None + +# ============================================================================== +# --- PESTAÑA 1: GESTIÓN Y ENTRENAMIENTO --- +# ============================================================================== +with tab_gestion: + st.header("Flujo de Trabajo de Entrenamiento") + st.subheader("1. Sube tus Datasets") + uploaded_files = st.file_uploader( + "Selecciona uno o más archivos CSV", type=["csv"], accept_multiple_files=True, label_visibility="collapsed" + ) + + if uploaded_files: + for file in uploaded_files: + if file.name not in st.session_state.analyzed_files: + with st.spinner(f"Analizando `{file.name}`..."): + files_payload = {'file': (file.name, file, 'text/csv')} + try: + response = management_client.make_request("POST", "/datasets/analyze", files=files_payload, timeout=30) + st.session_state.analyzed_files[file.name] = response.json() + st.session_state.configs[file.name] = { + 'dataset_name': os.path.splitext(file.name)[0], + 'target_column': response.json().get('columns', [''])[0], + 'selected_models': ["logistic_regression", "decision_tree"] + } + except Exception as e: + st.error(f"Error de conexión al analizar `{file.name}`: {e}") + st.session_state.analyzed_files[file.name] = {"error": str(e)} + + st.markdown("---") + st.subheader("2. Configura los Parámetros de Entrenamiento") + + if not st.session_state.analyzed_files: + st.info("Sube un archivo CSV para empezar a configurar.") + else: + files_to_configure = list(st.session_state.analyzed_files.keys()) + for filename in files_to_configure: + if filename not in st.session_state.analyzed_files: continue + file_info = st.session_state.analyzed_files[filename] + config = st.session_state.configs.get(filename) + if not config: continue + + with st.container(border=True): + st.markdown(f"**Configuración para `{filename}`**") + if "error" in file_info: + st.error(f"No se pudo procesar: {file_info['error']}") + continue + + col_ds, col_target = st.columns(2) + config['dataset_name'] = col_ds.text_input("Nombre del Dataset", value=config.get('dataset_name', ''), key=f"ds_{filename}") + columns_list = file_info.get("columns", []) + try: + current_index = columns_list.index(config.get('target_column')) + except (ValueError, TypeError): current_index = 0 + config['target_column'] = col_target.selectbox("Columna Objetivo", options=columns_list, index=current_index, key=f"target_{filename}") + config['selected_models'] = st.multiselect("Modelos a Entrenar", options=["logistic_regression", "decision_tree", "random_forest"], default=config.get('selected_models', []), key=f"models_{filename}") + + if st.button(f"🗑️ Eliminar Dataset y Modelos", key=f"delete_{filename}", type="secondary"): + with st.spinner(f"Eliminando '{config['dataset_name']}'..."): + try: + management_client.make_request("DELETE", f"/models/{config['dataset_name']}", timeout=30) + st.session_state.analyzed_files.pop(filename, None) + st.session_state.configs.pop(filename, None) + st.success(f"Dataset '{config['dataset_name']}' eliminado.") + st.cache_data.clear(); st.rerun() + except Exception as e: st.error(f"Error al eliminar: {e}") + + st.markdown("---") + st.subheader("3. Elige tu Modo de Entrenamiento") + + ### RESTAURADO ### - Lógica de entrenamiento individual y por lotes + if not st.session_state.configs: + st.warning("Primero configura al menos un dataset en el paso 2.") + else: + # MODO GRANULAR + with st.container(border=True): + st.markdown("##### Entrenamiento Individual") + dataset_names = [cfg['dataset_name'] for cfg in st.session_state.configs.values() if 'dataset_name' in cfg] + dataset_to_train_individually = st.selectbox("Selecciona un dataset para entrenar:", options=dataset_names) + + if st.button("🚀 Entrenar este Dataset Individualmente"): + filename_to_train, config_to_train = None, None + for fname, cfg in st.session_state.configs.items(): + if cfg.get('dataset_name') == dataset_to_train_individually: + filename_to_train = fname + config_to_train = cfg + break + + if filename_to_train and config_to_train: + if not config_to_train.get('selected_models'): + st.error("Este dataset no tiene modelos seleccionados.") + else: + original_file = next((f for f in uploaded_files if f.name == filename_to_train), None) + if original_file: + with st.spinner(f"Lanzando entrenamiento para `{config_to_train['dataset_name']}`..."): + files_payload = [('files', (filename_to_train, original_file.getvalue(), 'text/csv'))] + config_payload = {'configs': json.dumps([{"dataset_name": config_to_train['dataset_name'], "target_column": config_to_train['target_column'], "filename": filename_to_train}]), 'models_to_train': config_to_train['selected_models']} + try: + management_client.make_request("POST", "/datasets/train_batch", files=files_payload, data=config_payload, timeout=120) + st.success(f"¡Trabajo para '{config_to_train['dataset_name']}' lanzado!"); st.balloons(); st.cache_data.clear() + except Exception as e: st.error(f"Error al lanzar entrenamiento: {e}") + else: st.error(f"El archivo '{filename_to_train}' ya no está disponible.") + + # MODO POR LOTES + with st.container(border=True): + st.markdown("##### Entrenamiento por Lotes") + if st.button("🚀 Entrenar TODOS los Datasets Configurados", type="primary"): + with st.spinner(f"Lanzando lote de entrenamiento..."): + success_count = 0 + for filename, config in st.session_state.configs.items(): + if not config.get('selected_models'): continue + original_file = next((f for f in uploaded_files if f.name == filename), None) + if original_file: + files_payload = [('files', (filename, original_file.getvalue(), 'text/csv'))] + config_payload = {'configs': json.dumps([{"dataset_name": config['dataset_name'], "target_column": config['target_column'], "filename": filename}]), 'models_to_train': config['selected_models']} + try: + management_client.make_request("POST", "/datasets/train_batch", files=files_payload, data=config_payload, timeout=120) + success_count += 1 + st.toast(f"Trabajo para '{config['dataset_name']}' lanzado.", icon="🚀") + except Exception as e: st.error(f"Fallo al lanzar '{config['dataset_name']}': {e}") + st.success(f"¡Lote completado! {success_count} trabajos lanzados."); st.balloons(); st.cache_data.clear() + +# ============================================================================== +# --- PESTAÑA 2: REALIZAR PREDICCIÓN --- +# ============================================================================== +with tab_prediccion: + st.header("🔮 Probar un Modelo Desplegado") + models_data = get_available_models() + if not models_data: + st.warning("No hay modelos disponibles. Entrena uno en la pestaña de 'Gestión'.") + else: + col1, col2 = st.columns(2) + with col1: + dataset_options = list(models_data.keys()) + selected_dataset = st.selectbox("1. Selecciona un Dataset", dataset_options, key="p_ds") + if selected_dataset and models_data.get(selected_dataset): + with col2: + model_options = models_data[selected_dataset].get("available_models", []) + selected_model = st.selectbox("2. Selecciona un Modelo", model_options, key="p_model") + if selected_model: + st.subheader(f"3. Ingresa los valores de las características") + feature_names = fetch_features_for_model(selected_dataset, selected_model) + if feature_names is not None: + with st.form("prediction_form"): + # Se corrigió la lógica para usar un diccionario directamente + feature_inputs = {feat: st.number_input(label=feat, key=f"feat_{feat}", value=0.0, format="%.4f") for feat in feature_names} + if st.form_submit_button("Realizar Predicción", type="primary"): + with st.spinner("Enviando predicción..."): + try: + # El backend espera un diccionario de features, no una lista + response = inference_client.make_request("POST", f"/predict/{selected_dataset}/{selected_model}", json={"features": feature_inputs}, timeout=15) + st.success("Predicción recibida:") + st.json(response.json()) + except Exception as e: st.error(f"Error en la predicción: {e}") + +# ============================================================================== +# --- PESTAÑA 3: MÉTRICAS Y COMPARATIVA --- +# ============================================================================== +with tab_metricas: + st.header("📉 Métricas y Comparativa de Modelos") + if st.button("Refrescar Datos de Métricas"): st.cache_data.clear(); st.rerun() + + models_data_metrics = get_available_models() + if not models_data_metrics: + st.info("No hay modelos entrenados para mostrar métricas.") + else: + ### RESTAURADO ### - Lógica completa de métricas y visualización + plot_data = []; all_metrics_data = [] + for dataset, models in models_data_metrics.items(): + for model_type in models.get("available_models", []): + metrics = get_model_metrics(dataset, model_type) + if metrics and 'accuracy' in metrics: + all_metrics_data.append({'dataset': dataset, 'model_type': model_type, **metrics}) + plot_data.append({'dataset': dataset, 'model_type': model_type, 'accuracy': metrics['accuracy'], 'f1_score_macro': metrics.get('classification_report', {}).get('macro avg', {}).get('f1-score', 0)}) + + if plot_data: + df_plot = pd.DataFrame(plot_data) + st.subheader("Comparativa de Rendimiento General") + col1, col2 = st.columns(2) + with col1: + fig1, ax1 = plt.subplots() + sns.barplot(data=df_plot, x="accuracy", y="dataset", hue="model_type", ax=ax1, orient='h') + ax1.set_title("Precisión (Accuracy)"); ax1.set_xlabel("Accuracy"); ax1.set_ylabel("Dataset") + st.pyplot(fig1) + + with col2: + fig2, ax2 = plt.subplots() + sns.barplot(data=df_plot, x="f1_score_macro", y="dataset", hue="model_type", ax=ax2, orient='h', palette="viridis") + ax2.set_title("F1-Score (Macro Avg)"); ax2.set_xlabel("F1-Score"); ax2.set_ylabel("") + st.pyplot(fig2) + + st.markdown("---") + st.subheader("Análisis Detallado por Modelo") + df_all_metrics = pd.DataFrame(all_metrics_data) + if not df_all_metrics.empty: + selected_idx = st.selectbox("Selecciona un modelo para inspeccionar:", options=df_all_metrics.index, format_func=lambda i: f"{df_all_metrics.loc[i, 'dataset']} / {df_all_metrics.loc[i, 'model_type']}") + if selected_idx is not None: + row = df_all_metrics.loc[selected_idx] + c1, c2 = st.columns(2) + with c1: + st.text("Reporte de Clasificación:") + st.json(row.get('classification_report', {})) + with c2: + cm = row.get('confusion_matrix') + class_report = row.get('classification_report', {}) + class_names = [k for k in class_report.keys() if k not in ['accuracy', 'macro avg', 'weighted avg']] + if cm and class_names: + fig_cm, ax_cm = plt.subplots() + sns.heatmap(np.array(cm), annot=True, fmt='d', cmap='Blues', ax=ax_cm, xticklabels=class_names, yticklabels=class_names) + ax_cm.set_title("Matriz de Confusión"); ax_cm.set_ylabel("Verdadero"); ax_cm.set_xlabel("Predicho") + st.pyplot(fig_cm) + +# ============================================================================== +# --- PESTAÑA 4: DASHBOARD DE MONITOREO --- +# ============================================================================== +if tab_monitoreo is not None: + with tab_monitoreo: + st.header("Dashboard de Monitoreo del Sistema") + if st.button("Refrescar Datos del Dashboard", key="refresh_monitor"): st.rerun() + + if not st.session_state.get('ray_initialized', False): + st.error("Fallo al conectar con el clúster de Ray.") + if 'ray_init_error' in st.session_state: st.exception(st.session_state.get('ray_init_error')) + else: + with st.expander("Estado del Clúster Ray", expanded=True): + st.subheader("Nodos del Clúster") + try: + node_data = get_cluster_nodes_status() + if node_data: st.table(pd.DataFrame(node_data)) + else: st.warning("No se pudieron obtener datos de los nodos.") + except Exception as e: st.error(f"Error obteniendo estado de nodos: {e}") + + with st.expander("Estadísticas de Inferencia (API)", expanded=True): + st.subheader("Rendimiento del Servicio de Predicción") + try: + # --- LLAMADA A LA NUEVA FUNCIÓN DE AGREGACIÓN --- + # Le pasamos el inference_client para que sepa a quiénes preguntar + inference_stats = get_aggregated_inference_stats(inference_client) + + if "error" in inference_stats: + st.error(inference_stats["error"]) + else: + col1, col2 = st.columns(2) + col1.metric("Total de Peticiones", f"{inference_stats.get('total_requests', 0)}") + col2.metric("Latencia Promedio", f"{inference_stats.get('average_latency_ms', 0):.2f} ms") + details_df = pd.DataFrame(inference_stats.get("details_by_model", [])) + if not details_df.empty: + st.markdown("**Desglose por Modelo:**") + st.table(details_df) + except Exception as e: + st.error(f"No se pudieron obtener las estadísticas de inferencia: {e}") + diff --git a/management_api.py b/management_api.py new file mode 100644 index 0000000..3cf1ea4 --- /dev/null +++ b/management_api.py @@ -0,0 +1,223 @@ +# management_api.py + +from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Response +import pandas as pd +import ray +import os +import train +import traceback +from contextlib import asynccontextmanager +from typing import List +import json +import time +import pickle + +# --- Lifespan Manager (Con Carga de Modelos Persistentes) --- +@asynccontextmanager +async def lifespan(app: FastAPI): + print(">>> LIFESPAN (Management API): Iniciando...") + try: + ray_address = os.environ.get("RAY_ADDRESS", "auto") + if not ray.is_initialized(): + ray.init(address=ray_address, namespace="mi_plataforma", ignore_reinit_error=True) + + train.ModelRegistryActor.options( + name="model_registry", + get_if_exists=True, + namespace="mi_plataforma", + lifetime="detached", + max_restarts=-1, + ).remote() + print(">>> LIFESPAN (Management API): Solicitud de creación de ModelRegistryActor enviada.") + + actor_ready = False + for i in range(30): + try: + ray.get_actor("model_registry", namespace="mi_plataforma") + print(f">>> LIFESPAN (Management API): ¡ModelRegistryActor está listo! (Intento {i+1})") + actor_ready = True + break + except ValueError: + print(f">>> LIFESPAN (Management API): Esperando que ModelRegistryActor esté disponible... (Intento {i+1})") + time.sleep(1) + + if not actor_ready: + raise RuntimeError("!!! ERROR CRÍTICO: El ModelRegistryActor no estuvo disponible.") + + # --- NUEVO: Cargar modelos desde el disco al arrancar --- + print(">>> LIFESPAN: Cargando modelos persistentes al registro en memoria...") + actor = ray.get_actor("model_registry", namespace="mi_plataforma") + base_model_dir = "/app/persistent_models" + if os.path.isdir(base_model_dir): + for filename in os.listdir(base_model_dir): + if filename.endswith(".pkl"): + try: + parts = filename.replace(".pkl", "").rsplit("_", 1) + if len(parts) == 2: + dataset_name, model_type = parts + file_path = os.path.join(base_model_dir, filename) + with open(file_path, "rb") as f: + result_dictionary = pickle.load(f) + + actor.register_model.remote(dataset_name, model_type, result_dictionary) + print(f"--- Modelo persistente cargado: {dataset_name}/{model_type}") + except Exception as e: + print(f"--- Error cargando modelo persistente {filename}: {e}") + + except Exception as e: + print(f"!!! LIFESPAN (Management API) ERROR DURANTE EL ARRANQUE: {e} !!!") + traceback.print_exc() + raise e + + yield + + if ray.is_initialized(): + ray.shutdown() + +app = FastAPI(title="API de Gestión y Registro de Modelos", version="2.1.0", lifespan=lifespan) + +# --- Endpoints de Gestión (Sin cambios) --- +@app.post("/datasets/train_batch", summary="Subir y Entrenar un Lote de Datasets") +async def upload_and_train_batch( + configs: str = Form(...), + models_to_train: List[str] = Form(...), + files: List[UploadFile] = File(...) +): + # (El código de este endpoint es correcto, no se necesita cambiar) + print(f"API: Recibiendo petición de entrenamiento en lote para modelos: {models_to_train}") + + try: + dataset_configs = json.loads(configs) + uploaded_files_map = {file.filename: file for file in files} + except Exception as e: + raise HTTPException(status_code=400, detail=f"Error procesando la entrada: {e}") + + launched_jobs_info = [] + errors = [] + + for config in dataset_configs: + dataset_name = config.get("dataset_name") + target_column = config.get("target_column") + filename = config.get("filename") + + if not all([dataset_name, target_column, filename]) or filename not in uploaded_files_map: + errors.append({"dataset_config": config, "error": "Configuración incompleta o archivo no encontrado."}) + continue + + file = uploaded_files_map[filename] + + try: + df = pd.read_csv(file.file) + await file.close() + + if target_column not in df.columns: + errors.append({"dataset_config": config, "error": f"Columna objetivo '{target_column}' no encontrada."}) + continue + + result_message = train.run_complete_training_job( + dataset_name, df, target_column, models_to_train + ) + launched_jobs_info.append({"dataset_name": dataset_name, "status": result_message}) + + except Exception as e: + error_msg = f"Error al lanzar el job para '{dataset_name}': {e}" + errors.append({"dataset_config": config, "error": error_msg}) + traceback.print_exc() + + return { + "message": "Procesamiento de lote completado.", + "launched_jobs": launched_jobs_info, + "errors": errors + } + +@app.delete("/models/{dataset_name}", summary="Eliminar Modelos de un Dataset") +async def delete_models(dataset_name: str): + # (El código de este endpoint es correcto, no se necesita cambiar) + try: + registry_actor = ray.get_actor("model_registry", namespace="mi_plataforma") + success = await registry_actor.delete_dataset_models.remote(dataset_name) + if success: + # Opcional: Borrar también los archivos del disco + base_model_dir = "/app/persistent_models" + for filename in os.listdir(base_model_dir): + if filename.startswith(f"{dataset_name}_") and filename.endswith(".pkl"): + os.remove(os.path.join(base_model_dir, filename)) + return {"message": f"Modelos para '{dataset_name}' eliminados del registro y del disco."} + else: + raise HTTPException(status_code=404, detail=f"No se encontraron modelos para el dataset '{dataset_name}'.") + except ValueError: + raise HTTPException(status_code=503, detail="ModelRegistryActor no disponible.") + +# --- Endpoint de Salud (Sin cambios) --- +@app.get("/health", summary="Comprobación de Salud", status_code=200) +async def health_check(): + return {"status": "ok"} + +# --- Endpoints de Registro (MODIFICADOS) --- +@app.get("/registry/models", summary="[REGISTRY] Listar todos los modelos disponibles") +async def registry_list_models(): + try: + registry_actor = ray.get_actor("model_registry", namespace="mi_plataforma") + models_details = await registry_actor.list_models_details.remote() + return models_details + except ValueError: + raise HTTPException(status_code=503, detail="ModelRegistryActor no disponible.") + +@app.get("/registry/artifacts/{dataset_name}/{model_type}/{artifact_name}", summary="[REGISTRY] Obtener un artefacto de modelo") +async def registry_get_artifact(dataset_name: str, model_type: str, artifact_name: str): + try: + # 1. Intenta obtener el artefacto del actor en memoria (rápido) + registry_actor = ray.get_actor("model_registry", namespace="mi_plataforma") + artifacts_dict = await registry_actor.get_model_artifacts.remote(dataset_name, model_type) + + if not artifacts_dict: + # 2. Si no está en memoria, intenta cargarlo desde el disco (fallback) + model_path = f"/app/persistent_models/{dataset_name}_{model_type}.pkl" + print(f"Artefacto no en memoria. Intentando cargar desde disco: {model_path}") + if not os.path.exists(model_path): + raise HTTPException(status_code=404, detail="Modelo no encontrado ni en memoria ni en disco.") + with open(model_path, "rb") as f: + artifacts_dict = pickle.load(f) + + serialized_artifact = artifacts_dict.get(artifact_name) + + if serialized_artifact is None: + raise HTTPException(status_code=404, detail=f"Artefacto '{artifact_name}' no encontrado en el modelo.") + + return Response(content=serialized_artifact, media_type="application/octet-stream") + + except ValueError: + raise HTTPException(status_code=503, detail="ModelRegistryActor no disponible.") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error interno obteniendo el artefacto: {e}") + +# management_api.py + +# ... (todos tus otros imports y endpoints se mantienen igual) ... + + +# === NUEVO ENDPOINT PARA ANÁLISIS DE ARCHIVOS === +@app.post("/datasets/analyze", summary="Subir y Analizar un CSV para obtener sus columnas") +async def analyze_dataset(file: UploadFile = File(...)): + """ + Recibe un archivo CSV, lo lee con Pandas y devuelve información básica + como los nombres de las columnas y una previsualización de los datos. + """ + try: + # Lee el contenido del archivo en un DataFrame + df = pd.read_csv(file.file) + + # Obtiene los nombres de las columnas + columns = df.columns.tolist() + + # Obtiene una previsualización de las primeras 5 filas en formato JSON + preview = df.head().to_dict(orient="records") + + return { + "filename": file.filename, + "columns": columns, + "preview": preview, + "rows": len(df) + } + except Exception as e: + raise HTTPException(status_code=400, detail=f"No se pudo procesar el archivo. Error: {e}") \ No newline at end of file diff --git a/models_output/breast_cancer/decision_tree/best_pipeline.joblib b/models_output/breast_cancer/decision_tree/best_pipeline.joblib new file mode 100644 index 0000000..caf767f Binary files /dev/null and b/models_output/breast_cancer/decision_tree/best_pipeline.joblib differ diff --git a/models_output/breast_cancer/decision_tree/best_pipeline_20250622_215006.joblib b/models_output/breast_cancer/decision_tree/best_pipeline_20250622_215006.joblib new file mode 100644 index 0000000..caf767f Binary files /dev/null and b/models_output/breast_cancer/decision_tree/best_pipeline_20250622_215006.joblib differ diff --git a/models_output/breast_cancer/decision_tree/best_pipeline_20250622_223716.joblib b/models_output/breast_cancer/decision_tree/best_pipeline_20250622_223716.joblib new file mode 100644 index 0000000..caf767f Binary files /dev/null and b/models_output/breast_cancer/decision_tree/best_pipeline_20250622_223716.joblib differ diff --git a/models_output/breast_cancer/decision_tree/best_pipeline_20250622_223907.joblib b/models_output/breast_cancer/decision_tree/best_pipeline_20250622_223907.joblib new file mode 100644 index 0000000..caf767f Binary files /dev/null and b/models_output/breast_cancer/decision_tree/best_pipeline_20250622_223907.joblib differ diff --git a/models_output/breast_cancer/decision_tree/best_pipeline_20250623_013736.joblib b/models_output/breast_cancer/decision_tree/best_pipeline_20250623_013736.joblib new file mode 100644 index 0000000..caf767f Binary files /dev/null and b/models_output/breast_cancer/decision_tree/best_pipeline_20250623_013736.joblib differ diff --git a/models_output/breast_cancer/decision_tree/best_pipeline_20250623_014435.joblib b/models_output/breast_cancer/decision_tree/best_pipeline_20250623_014435.joblib new file mode 100644 index 0000000..caf767f Binary files /dev/null and b/models_output/breast_cancer/decision_tree/best_pipeline_20250623_014435.joblib differ diff --git a/models_output/breast_cancer/decision_tree/best_pipeline_20250623_021722.joblib b/models_output/breast_cancer/decision_tree/best_pipeline_20250623_021722.joblib new file mode 100644 index 0000000..caf767f Binary files /dev/null and b/models_output/breast_cancer/decision_tree/best_pipeline_20250623_021722.joblib differ diff --git a/models_output/breast_cancer/decision_tree/feature_names.joblib b/models_output/breast_cancer/decision_tree/feature_names.joblib new file mode 100644 index 0000000..4866d0d Binary files /dev/null and b/models_output/breast_cancer/decision_tree/feature_names.joblib differ diff --git a/models_output/breast_cancer/decision_tree/feature_names_20250623_021722.joblib b/models_output/breast_cancer/decision_tree/feature_names_20250623_021722.joblib new file mode 100644 index 0000000..4866d0d Binary files /dev/null and b/models_output/breast_cancer/decision_tree/feature_names_20250623_021722.joblib differ diff --git a/models_output/breast_cancer/decision_tree/imputer_20250622_215006.joblib b/models_output/breast_cancer/decision_tree/imputer_20250622_215006.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/imputer_20250622_215006.joblib differ diff --git a/models_output/breast_cancer/decision_tree/imputer_20250622_223716.joblib b/models_output/breast_cancer/decision_tree/imputer_20250622_223716.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/imputer_20250622_223716.joblib differ diff --git a/models_output/breast_cancer/decision_tree/imputer_20250622_223907.joblib b/models_output/breast_cancer/decision_tree/imputer_20250622_223907.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/imputer_20250622_223907.joblib differ diff --git a/models_output/breast_cancer/decision_tree/imputer_20250623_013736.joblib b/models_output/breast_cancer/decision_tree/imputer_20250623_013736.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/imputer_20250623_013736.joblib differ diff --git a/models_output/breast_cancer/decision_tree/imputer_20250623_014435.joblib b/models_output/breast_cancer/decision_tree/imputer_20250623_014435.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/imputer_20250623_014435.joblib differ diff --git a/models_output/breast_cancer/decision_tree/imputer_20250623_021722.joblib b/models_output/breast_cancer/decision_tree/imputer_20250623_021722.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/imputer_20250623_021722.joblib differ diff --git a/models_output/breast_cancer/decision_tree/metrics.json b/models_output/breast_cancer/decision_tree/metrics.json new file mode 100644 index 0000000..832f314 --- /dev/null +++ b/models_output/breast_cancer/decision_tree/metrics.json @@ -0,0 +1,45 @@ +{ + "accuracy": 0.9210526315789473, + "classification_report": { + "malignant": { + "precision": 0.8666666666666667, + "recall": 0.9285714285714286, + "f1-score": 0.896551724137931, + "support": 42.0 + }, + "benign": { + "precision": 0.9565217391304348, + "recall": 0.9166666666666666, + "f1-score": 0.9361702127659575, + "support": 72.0 + }, + "accuracy": 0.9210526315789473, + "macro avg": { + "precision": 0.9115942028985508, + "recall": 0.9226190476190477, + "f1-score": 0.9163609684519443, + "support": 114.0 + }, + "weighted avg": { + "precision": 0.9234172387490467, + "recall": 0.9210526315789473, + "f1-score": 0.9215739274819479, + "support": 114.0 + } + }, + "confusion_matrix": [ + [ + 39, + 3 + ], + [ + 6, + 66 + ] + ], + "best_hyperparameters": { + "classifier__max_depth": 5, + "classifier__min_samples_split": 5 + }, + "training_duration_sec": 0.2545912265777588 +} \ No newline at end of file diff --git a/models_output/breast_cancer/decision_tree/scaler_20250622_215006.joblib b/models_output/breast_cancer/decision_tree/scaler_20250622_215006.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/scaler_20250622_215006.joblib differ diff --git a/models_output/breast_cancer/decision_tree/scaler_20250622_223716.joblib b/models_output/breast_cancer/decision_tree/scaler_20250622_223716.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/scaler_20250622_223716.joblib differ diff --git a/models_output/breast_cancer/decision_tree/scaler_20250622_223907.joblib b/models_output/breast_cancer/decision_tree/scaler_20250622_223907.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/scaler_20250622_223907.joblib differ diff --git a/models_output/breast_cancer/decision_tree/scaler_20250623_013736.joblib b/models_output/breast_cancer/decision_tree/scaler_20250623_013736.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/scaler_20250623_013736.joblib differ diff --git a/models_output/breast_cancer/decision_tree/scaler_20250623_014435.joblib b/models_output/breast_cancer/decision_tree/scaler_20250623_014435.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/scaler_20250623_014435.joblib differ diff --git a/models_output/breast_cancer/decision_tree/scaler_20250623_021722.joblib b/models_output/breast_cancer/decision_tree/scaler_20250623_021722.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/decision_tree/scaler_20250623_021722.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/best_pipeline.joblib b/models_output/breast_cancer/logistic_regression/best_pipeline.joblib new file mode 100644 index 0000000..635c535 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/best_pipeline.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_215006.joblib b/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_215006.joblib new file mode 100644 index 0000000..304793c Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_215006.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_223716.joblib b/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_223716.joblib new file mode 100644 index 0000000..304793c Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_223716.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_223907.joblib b/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_223907.joblib new file mode 100644 index 0000000..304793c Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/best_pipeline_20250622_223907.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_013736.joblib b/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_013736.joblib new file mode 100644 index 0000000..304793c Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_013736.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_014435.joblib b/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_014435.joblib new file mode 100644 index 0000000..304793c Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_014435.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_021721.joblib b/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_021721.joblib new file mode 100644 index 0000000..eaeaa09 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/best_pipeline_20250623_021721.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/feature_names.joblib b/models_output/breast_cancer/logistic_regression/feature_names.joblib new file mode 100644 index 0000000..4866d0d Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/feature_names.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/feature_names_20250623_021721.joblib b/models_output/breast_cancer/logistic_regression/feature_names_20250623_021721.joblib new file mode 100644 index 0000000..4866d0d Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/feature_names_20250623_021721.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/imputer_20250622_215006.joblib b/models_output/breast_cancer/logistic_regression/imputer_20250622_215006.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/imputer_20250622_215006.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/imputer_20250622_223716.joblib b/models_output/breast_cancer/logistic_regression/imputer_20250622_223716.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/imputer_20250622_223716.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/imputer_20250622_223907.joblib b/models_output/breast_cancer/logistic_regression/imputer_20250622_223907.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/imputer_20250622_223907.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/imputer_20250623_013736.joblib b/models_output/breast_cancer/logistic_regression/imputer_20250623_013736.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/imputer_20250623_013736.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/imputer_20250623_014435.joblib b/models_output/breast_cancer/logistic_regression/imputer_20250623_014435.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/imputer_20250623_014435.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/imputer_20250623_021721.joblib b/models_output/breast_cancer/logistic_regression/imputer_20250623_021721.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/imputer_20250623_021721.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/metrics.json b/models_output/breast_cancer/logistic_regression/metrics.json new file mode 100644 index 0000000..f37f21a --- /dev/null +++ b/models_output/breast_cancer/logistic_regression/metrics.json @@ -0,0 +1,44 @@ +{ + "accuracy": 0.9824561403508771, + "classification_report": { + "malignant": { + "precision": 0.9761904761904762, + "recall": 0.9761904761904762, + "f1-score": 0.9761904761904762, + "support": 42.0 + }, + "benign": { + "precision": 0.9861111111111112, + "recall": 0.9861111111111112, + "f1-score": 0.9861111111111112, + "support": 72.0 + }, + "accuracy": 0.9824561403508771, + "macro avg": { + "precision": 0.9811507936507937, + "recall": 0.9811507936507937, + "f1-score": 0.9811507936507937, + "support": 114.0 + }, + "weighted avg": { + "precision": 0.9824561403508771, + "recall": 0.9824561403508771, + "f1-score": 0.9824561403508771, + "support": 114.0 + } + }, + "confusion_matrix": [ + [ + 41, + 1 + ], + [ + 1, + 71 + ] + ], + "best_hyperparameters": { + "classifier__C": 1.0 + }, + "training_duration_sec": 0.07887697219848633 +} \ No newline at end of file diff --git a/models_output/breast_cancer/logistic_regression/scaler_20250622_215006.joblib b/models_output/breast_cancer/logistic_regression/scaler_20250622_215006.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/scaler_20250622_215006.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/scaler_20250622_223716.joblib b/models_output/breast_cancer/logistic_regression/scaler_20250622_223716.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/scaler_20250622_223716.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/scaler_20250622_223907.joblib b/models_output/breast_cancer/logistic_regression/scaler_20250622_223907.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/scaler_20250622_223907.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/scaler_20250623_013736.joblib b/models_output/breast_cancer/logistic_regression/scaler_20250623_013736.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/scaler_20250623_013736.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/scaler_20250623_014435.joblib b/models_output/breast_cancer/logistic_regression/scaler_20250623_014435.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/scaler_20250623_014435.joblib differ diff --git a/models_output/breast_cancer/logistic_regression/scaler_20250623_021721.joblib b/models_output/breast_cancer/logistic_regression/scaler_20250623_021721.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/logistic_regression/scaler_20250623_021721.joblib differ diff --git a/models_output/breast_cancer/random_forest/best_pipeline.joblib b/models_output/breast_cancer/random_forest/best_pipeline.joblib new file mode 100644 index 0000000..c7a16ac Binary files /dev/null and b/models_output/breast_cancer/random_forest/best_pipeline.joblib differ diff --git a/models_output/breast_cancer/random_forest/best_pipeline_20250622_215007.joblib b/models_output/breast_cancer/random_forest/best_pipeline_20250622_215007.joblib new file mode 100644 index 0000000..da598e8 Binary files /dev/null and b/models_output/breast_cancer/random_forest/best_pipeline_20250622_215007.joblib differ diff --git a/models_output/breast_cancer/random_forest/best_pipeline_20250622_223717.joblib b/models_output/breast_cancer/random_forest/best_pipeline_20250622_223717.joblib new file mode 100644 index 0000000..da598e8 Binary files /dev/null and b/models_output/breast_cancer/random_forest/best_pipeline_20250622_223717.joblib differ diff --git a/models_output/breast_cancer/random_forest/best_pipeline_20250622_223908.joblib b/models_output/breast_cancer/random_forest/best_pipeline_20250622_223908.joblib new file mode 100644 index 0000000..da598e8 Binary files /dev/null and b/models_output/breast_cancer/random_forest/best_pipeline_20250622_223908.joblib differ diff --git a/models_output/breast_cancer/random_forest/best_pipeline_20250623_013737.joblib b/models_output/breast_cancer/random_forest/best_pipeline_20250623_013737.joblib new file mode 100644 index 0000000..da598e8 Binary files /dev/null and b/models_output/breast_cancer/random_forest/best_pipeline_20250623_013737.joblib differ diff --git a/models_output/breast_cancer/random_forest/best_pipeline_20250623_014438.joblib b/models_output/breast_cancer/random_forest/best_pipeline_20250623_014438.joblib new file mode 100644 index 0000000..da598e8 Binary files /dev/null and b/models_output/breast_cancer/random_forest/best_pipeline_20250623_014438.joblib differ diff --git a/models_output/breast_cancer/random_forest/best_pipeline_20250623_021737.joblib b/models_output/breast_cancer/random_forest/best_pipeline_20250623_021737.joblib new file mode 100644 index 0000000..e06cf7c Binary files /dev/null and b/models_output/breast_cancer/random_forest/best_pipeline_20250623_021737.joblib differ diff --git a/models_output/breast_cancer/random_forest/feature_names.joblib b/models_output/breast_cancer/random_forest/feature_names.joblib new file mode 100644 index 0000000..4866d0d Binary files /dev/null and b/models_output/breast_cancer/random_forest/feature_names.joblib differ diff --git a/models_output/breast_cancer/random_forest/feature_names_20250623_021737.joblib b/models_output/breast_cancer/random_forest/feature_names_20250623_021737.joblib new file mode 100644 index 0000000..4866d0d Binary files /dev/null and b/models_output/breast_cancer/random_forest/feature_names_20250623_021737.joblib differ diff --git a/models_output/breast_cancer/random_forest/imputer_20250622_215007.joblib b/models_output/breast_cancer/random_forest/imputer_20250622_215007.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/random_forest/imputer_20250622_215007.joblib differ diff --git a/models_output/breast_cancer/random_forest/imputer_20250622_223717.joblib b/models_output/breast_cancer/random_forest/imputer_20250622_223717.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/random_forest/imputer_20250622_223717.joblib differ diff --git a/models_output/breast_cancer/random_forest/imputer_20250622_223908.joblib b/models_output/breast_cancer/random_forest/imputer_20250622_223908.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/random_forest/imputer_20250622_223908.joblib differ diff --git a/models_output/breast_cancer/random_forest/imputer_20250623_013737.joblib b/models_output/breast_cancer/random_forest/imputer_20250623_013737.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/random_forest/imputer_20250623_013737.joblib differ diff --git a/models_output/breast_cancer/random_forest/imputer_20250623_014438.joblib b/models_output/breast_cancer/random_forest/imputer_20250623_014438.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/random_forest/imputer_20250623_014438.joblib differ diff --git a/models_output/breast_cancer/random_forest/imputer_20250623_021737.joblib b/models_output/breast_cancer/random_forest/imputer_20250623_021737.joblib new file mode 100644 index 0000000..d6bf8b4 Binary files /dev/null and b/models_output/breast_cancer/random_forest/imputer_20250623_021737.joblib differ diff --git a/models_output/breast_cancer/random_forest/metrics.json b/models_output/breast_cancer/random_forest/metrics.json new file mode 100644 index 0000000..569b679 --- /dev/null +++ b/models_output/breast_cancer/random_forest/metrics.json @@ -0,0 +1,45 @@ +{ + "accuracy": 0.956140350877193, + "classification_report": { + "malignant": { + "precision": 0.9512195121951219, + "recall": 0.9285714285714286, + "f1-score": 0.9397590361445783, + "support": 42.0 + }, + "benign": { + "precision": 0.958904109589041, + "recall": 0.9722222222222222, + "f1-score": 0.9655172413793104, + "support": 72.0 + }, + "accuracy": 0.956140350877193, + "macro avg": { + "precision": 0.9550618108920814, + "recall": 0.9503968253968254, + "f1-score": 0.9526381387619444, + "support": 114.0 + }, + "weighted avg": { + "precision": 0.9560729421281235, + "recall": 0.956140350877193, + "f1-score": 0.9560273762928302, + "support": 114.0 + } + }, + "confusion_matrix": [ + [ + 39, + 3 + ], + [ + 2, + 70 + ] + ], + "best_hyperparameters": { + "classifier__max_depth": 5, + "classifier__n_estimators": 100 + }, + "training_duration_sec": 0.9391100406646729 +} \ No newline at end of file diff --git a/models_output/breast_cancer/random_forest/scaler_20250622_215007.joblib b/models_output/breast_cancer/random_forest/scaler_20250622_215007.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/random_forest/scaler_20250622_215007.joblib differ diff --git a/models_output/breast_cancer/random_forest/scaler_20250622_223717.joblib b/models_output/breast_cancer/random_forest/scaler_20250622_223717.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/random_forest/scaler_20250622_223717.joblib differ diff --git a/models_output/breast_cancer/random_forest/scaler_20250622_223908.joblib b/models_output/breast_cancer/random_forest/scaler_20250622_223908.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/random_forest/scaler_20250622_223908.joblib differ diff --git a/models_output/breast_cancer/random_forest/scaler_20250623_013737.joblib b/models_output/breast_cancer/random_forest/scaler_20250623_013737.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/random_forest/scaler_20250623_013737.joblib differ diff --git a/models_output/breast_cancer/random_forest/scaler_20250623_014438.joblib b/models_output/breast_cancer/random_forest/scaler_20250623_014438.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/random_forest/scaler_20250623_014438.joblib differ diff --git a/models_output/breast_cancer/random_forest/scaler_20250623_021737.joblib b/models_output/breast_cancer/random_forest/scaler_20250623_021737.joblib new file mode 100644 index 0000000..2c00951 Binary files /dev/null and b/models_output/breast_cancer/random_forest/scaler_20250623_021737.joblib differ diff --git a/models_output/iris/decision_tree/best_pipeline.joblib b/models_output/iris/decision_tree/best_pipeline.joblib new file mode 100644 index 0000000..dcc4a2e Binary files /dev/null and b/models_output/iris/decision_tree/best_pipeline.joblib differ diff --git a/models_output/iris/decision_tree/best_pipeline_20250622_215006.joblib b/models_output/iris/decision_tree/best_pipeline_20250622_215006.joblib new file mode 100644 index 0000000..dcc4a2e Binary files /dev/null and b/models_output/iris/decision_tree/best_pipeline_20250622_215006.joblib differ diff --git a/models_output/iris/decision_tree/best_pipeline_20250622_223716.joblib b/models_output/iris/decision_tree/best_pipeline_20250622_223716.joblib new file mode 100644 index 0000000..dcc4a2e Binary files /dev/null and b/models_output/iris/decision_tree/best_pipeline_20250622_223716.joblib differ diff --git a/models_output/iris/decision_tree/best_pipeline_20250622_223907.joblib b/models_output/iris/decision_tree/best_pipeline_20250622_223907.joblib new file mode 100644 index 0000000..dcc4a2e Binary files /dev/null and b/models_output/iris/decision_tree/best_pipeline_20250622_223907.joblib differ diff --git a/models_output/iris/decision_tree/best_pipeline_20250623_013736.joblib b/models_output/iris/decision_tree/best_pipeline_20250623_013736.joblib new file mode 100644 index 0000000..dcc4a2e Binary files /dev/null and b/models_output/iris/decision_tree/best_pipeline_20250623_013736.joblib differ diff --git a/models_output/iris/decision_tree/best_pipeline_20250623_014435.joblib b/models_output/iris/decision_tree/best_pipeline_20250623_014435.joblib new file mode 100644 index 0000000..dcc4a2e Binary files /dev/null and b/models_output/iris/decision_tree/best_pipeline_20250623_014435.joblib differ diff --git a/models_output/iris/decision_tree/best_pipeline_20250623_021721.joblib b/models_output/iris/decision_tree/best_pipeline_20250623_021721.joblib new file mode 100644 index 0000000..134be1f Binary files /dev/null and b/models_output/iris/decision_tree/best_pipeline_20250623_021721.joblib differ diff --git a/models_output/iris/decision_tree/feature_names.joblib b/models_output/iris/decision_tree/feature_names.joblib new file mode 100644 index 0000000..8cd8382 Binary files /dev/null and b/models_output/iris/decision_tree/feature_names.joblib differ diff --git a/models_output/iris/decision_tree/feature_names_20250623_021721.joblib b/models_output/iris/decision_tree/feature_names_20250623_021721.joblib new file mode 100644 index 0000000..8cd8382 Binary files /dev/null and b/models_output/iris/decision_tree/feature_names_20250623_021721.joblib differ diff --git a/models_output/iris/decision_tree/imputer_20250622_215006.joblib b/models_output/iris/decision_tree/imputer_20250622_215006.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/decision_tree/imputer_20250622_215006.joblib differ diff --git a/models_output/iris/decision_tree/imputer_20250622_223716.joblib b/models_output/iris/decision_tree/imputer_20250622_223716.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/decision_tree/imputer_20250622_223716.joblib differ diff --git a/models_output/iris/decision_tree/imputer_20250622_223907.joblib b/models_output/iris/decision_tree/imputer_20250622_223907.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/decision_tree/imputer_20250622_223907.joblib differ diff --git a/models_output/iris/decision_tree/imputer_20250623_013736.joblib b/models_output/iris/decision_tree/imputer_20250623_013736.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/decision_tree/imputer_20250623_013736.joblib differ diff --git a/models_output/iris/decision_tree/imputer_20250623_014435.joblib b/models_output/iris/decision_tree/imputer_20250623_014435.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/decision_tree/imputer_20250623_014435.joblib differ diff --git a/models_output/iris/decision_tree/imputer_20250623_021721.joblib b/models_output/iris/decision_tree/imputer_20250623_021721.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/decision_tree/imputer_20250623_021721.joblib differ diff --git a/models_output/iris/decision_tree/metrics.json b/models_output/iris/decision_tree/metrics.json new file mode 100644 index 0000000..ad4c955 --- /dev/null +++ b/models_output/iris/decision_tree/metrics.json @@ -0,0 +1,58 @@ +{ + "accuracy": 0.9666666666666667, + "classification_report": { + "setosa": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 10.0 + }, + "versicolor": { + "precision": 1.0, + "recall": 0.9, + "f1-score": 0.9473684210526315, + "support": 10.0 + }, + "virginica": { + "precision": 0.9090909090909091, + "recall": 1.0, + "f1-score": 0.9523809523809523, + "support": 10.0 + }, + "accuracy": 0.9666666666666667, + "macro avg": { + "precision": 0.9696969696969697, + "recall": 0.9666666666666667, + "f1-score": 0.9665831244778612, + "support": 30.0 + }, + "weighted avg": { + "precision": 0.9696969696969696, + "recall": 0.9666666666666667, + "f1-score": 0.9665831244778613, + "support": 30.0 + } + }, + "confusion_matrix": [ + [ + 10, + 0, + 0 + ], + [ + 0, + 9, + 1 + ], + [ + 0, + 0, + 10 + ] + ], + "best_hyperparameters": { + "classifier__max_depth": 3, + "classifier__min_samples_split": 2 + }, + "training_duration_sec": 0.14702844619750977 +} \ No newline at end of file diff --git a/models_output/iris/decision_tree/scaler_20250622_215006.joblib b/models_output/iris/decision_tree/scaler_20250622_215006.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/decision_tree/scaler_20250622_215006.joblib differ diff --git a/models_output/iris/decision_tree/scaler_20250622_223716.joblib b/models_output/iris/decision_tree/scaler_20250622_223716.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/decision_tree/scaler_20250622_223716.joblib differ diff --git a/models_output/iris/decision_tree/scaler_20250622_223907.joblib b/models_output/iris/decision_tree/scaler_20250622_223907.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/decision_tree/scaler_20250622_223907.joblib differ diff --git a/models_output/iris/decision_tree/scaler_20250623_013736.joblib b/models_output/iris/decision_tree/scaler_20250623_013736.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/decision_tree/scaler_20250623_013736.joblib differ diff --git a/models_output/iris/decision_tree/scaler_20250623_014435.joblib b/models_output/iris/decision_tree/scaler_20250623_014435.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/decision_tree/scaler_20250623_014435.joblib differ diff --git a/models_output/iris/decision_tree/scaler_20250623_021721.joblib b/models_output/iris/decision_tree/scaler_20250623_021721.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/decision_tree/scaler_20250623_021721.joblib differ diff --git a/models_output/iris/logistic_regression/best_pipeline.joblib b/models_output/iris/logistic_regression/best_pipeline.joblib new file mode 100644 index 0000000..d1a3c0f Binary files /dev/null and b/models_output/iris/logistic_regression/best_pipeline.joblib differ diff --git a/models_output/iris/logistic_regression/best_pipeline_20250622_215006.joblib b/models_output/iris/logistic_regression/best_pipeline_20250622_215006.joblib new file mode 100644 index 0000000..1d58fd5 Binary files /dev/null and b/models_output/iris/logistic_regression/best_pipeline_20250622_215006.joblib differ diff --git a/models_output/iris/logistic_regression/best_pipeline_20250622_223716.joblib b/models_output/iris/logistic_regression/best_pipeline_20250622_223716.joblib new file mode 100644 index 0000000..1d58fd5 Binary files /dev/null and b/models_output/iris/logistic_regression/best_pipeline_20250622_223716.joblib differ diff --git a/models_output/iris/logistic_regression/best_pipeline_20250622_223907.joblib b/models_output/iris/logistic_regression/best_pipeline_20250622_223907.joblib new file mode 100644 index 0000000..1d58fd5 Binary files /dev/null and b/models_output/iris/logistic_regression/best_pipeline_20250622_223907.joblib differ diff --git a/models_output/iris/logistic_regression/best_pipeline_20250623_013736.joblib b/models_output/iris/logistic_regression/best_pipeline_20250623_013736.joblib new file mode 100644 index 0000000..1d58fd5 Binary files /dev/null and b/models_output/iris/logistic_regression/best_pipeline_20250623_013736.joblib differ diff --git a/models_output/iris/logistic_regression/best_pipeline_20250623_014435.joblib b/models_output/iris/logistic_regression/best_pipeline_20250623_014435.joblib new file mode 100644 index 0000000..1d58fd5 Binary files /dev/null and b/models_output/iris/logistic_regression/best_pipeline_20250623_014435.joblib differ diff --git a/models_output/iris/logistic_regression/best_pipeline_20250623_021721.joblib b/models_output/iris/logistic_regression/best_pipeline_20250623_021721.joblib new file mode 100644 index 0000000..7a78b71 Binary files /dev/null and b/models_output/iris/logistic_regression/best_pipeline_20250623_021721.joblib differ diff --git a/models_output/iris/logistic_regression/feature_names.joblib b/models_output/iris/logistic_regression/feature_names.joblib new file mode 100644 index 0000000..8cd8382 Binary files /dev/null and b/models_output/iris/logistic_regression/feature_names.joblib differ diff --git a/models_output/iris/logistic_regression/feature_names_20250623_021721.joblib b/models_output/iris/logistic_regression/feature_names_20250623_021721.joblib new file mode 100644 index 0000000..8cd8382 Binary files /dev/null and b/models_output/iris/logistic_regression/feature_names_20250623_021721.joblib differ diff --git a/models_output/iris/logistic_regression/imputer_20250622_215006.joblib b/models_output/iris/logistic_regression/imputer_20250622_215006.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/logistic_regression/imputer_20250622_215006.joblib differ diff --git a/models_output/iris/logistic_regression/imputer_20250622_223716.joblib b/models_output/iris/logistic_regression/imputer_20250622_223716.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/logistic_regression/imputer_20250622_223716.joblib differ diff --git a/models_output/iris/logistic_regression/imputer_20250622_223907.joblib b/models_output/iris/logistic_regression/imputer_20250622_223907.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/logistic_regression/imputer_20250622_223907.joblib differ diff --git a/models_output/iris/logistic_regression/imputer_20250623_013736.joblib b/models_output/iris/logistic_regression/imputer_20250623_013736.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/logistic_regression/imputer_20250623_013736.joblib differ diff --git a/models_output/iris/logistic_regression/imputer_20250623_014435.joblib b/models_output/iris/logistic_regression/imputer_20250623_014435.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/logistic_regression/imputer_20250623_014435.joblib differ diff --git a/models_output/iris/logistic_regression/imputer_20250623_021721.joblib b/models_output/iris/logistic_regression/imputer_20250623_021721.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/logistic_regression/imputer_20250623_021721.joblib differ diff --git a/models_output/iris/logistic_regression/metrics.json b/models_output/iris/logistic_regression/metrics.json new file mode 100644 index 0000000..0ff9a16 --- /dev/null +++ b/models_output/iris/logistic_regression/metrics.json @@ -0,0 +1,57 @@ +{ + "accuracy": 0.9, + "classification_report": { + "setosa": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 10.0 + }, + "versicolor": { + "precision": 0.8888888888888888, + "recall": 0.8, + "f1-score": 0.8421052631578947, + "support": 10.0 + }, + "virginica": { + "precision": 0.8181818181818182, + "recall": 0.9, + "f1-score": 0.8571428571428571, + "support": 10.0 + }, + "accuracy": 0.9, + "macro avg": { + "precision": 0.9023569023569024, + "recall": 0.9, + "f1-score": 0.899749373433584, + "support": 30.0 + }, + "weighted avg": { + "precision": 0.9023569023569025, + "recall": 0.9, + "f1-score": 0.8997493734335839, + "support": 30.0 + } + }, + "confusion_matrix": [ + [ + 10, + 0, + 0 + ], + [ + 0, + 8, + 2 + ], + [ + 0, + 1, + 9 + ] + ], + "best_hyperparameters": { + "classifier__C": 10.0 + }, + "training_duration_sec": 0.06540918350219727 +} \ No newline at end of file diff --git a/models_output/iris/logistic_regression/scaler_20250622_215006.joblib b/models_output/iris/logistic_regression/scaler_20250622_215006.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/logistic_regression/scaler_20250622_215006.joblib differ diff --git a/models_output/iris/logistic_regression/scaler_20250622_223716.joblib b/models_output/iris/logistic_regression/scaler_20250622_223716.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/logistic_regression/scaler_20250622_223716.joblib differ diff --git a/models_output/iris/logistic_regression/scaler_20250622_223907.joblib b/models_output/iris/logistic_regression/scaler_20250622_223907.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/logistic_regression/scaler_20250622_223907.joblib differ diff --git a/models_output/iris/logistic_regression/scaler_20250623_013736.joblib b/models_output/iris/logistic_regression/scaler_20250623_013736.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/logistic_regression/scaler_20250623_013736.joblib differ diff --git a/models_output/iris/logistic_regression/scaler_20250623_014435.joblib b/models_output/iris/logistic_regression/scaler_20250623_014435.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/logistic_regression/scaler_20250623_014435.joblib differ diff --git a/models_output/iris/logistic_regression/scaler_20250623_021721.joblib b/models_output/iris/logistic_regression/scaler_20250623_021721.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/logistic_regression/scaler_20250623_021721.joblib differ diff --git a/models_output/iris/random_forest/best_pipeline.joblib b/models_output/iris/random_forest/best_pipeline.joblib new file mode 100644 index 0000000..70c8648 Binary files /dev/null and b/models_output/iris/random_forest/best_pipeline.joblib differ diff --git a/models_output/iris/random_forest/best_pipeline_20250622_215007.joblib b/models_output/iris/random_forest/best_pipeline_20250622_215007.joblib new file mode 100644 index 0000000..81ec420 Binary files /dev/null and b/models_output/iris/random_forest/best_pipeline_20250622_215007.joblib differ diff --git a/models_output/iris/random_forest/best_pipeline_20250622_223717.joblib b/models_output/iris/random_forest/best_pipeline_20250622_223717.joblib new file mode 100644 index 0000000..81ec420 Binary files /dev/null and b/models_output/iris/random_forest/best_pipeline_20250622_223717.joblib differ diff --git a/models_output/iris/random_forest/best_pipeline_20250622_223907.joblib b/models_output/iris/random_forest/best_pipeline_20250622_223907.joblib new file mode 100644 index 0000000..81ec420 Binary files /dev/null and b/models_output/iris/random_forest/best_pipeline_20250622_223907.joblib differ diff --git a/models_output/iris/random_forest/best_pipeline_20250623_013737.joblib b/models_output/iris/random_forest/best_pipeline_20250623_013737.joblib new file mode 100644 index 0000000..81ec420 Binary files /dev/null and b/models_output/iris/random_forest/best_pipeline_20250623_013737.joblib differ diff --git a/models_output/iris/random_forest/best_pipeline_20250623_014438.joblib b/models_output/iris/random_forest/best_pipeline_20250623_014438.joblib new file mode 100644 index 0000000..81ec420 Binary files /dev/null and b/models_output/iris/random_forest/best_pipeline_20250623_014438.joblib differ diff --git a/models_output/iris/random_forest/best_pipeline_20250623_021731.joblib b/models_output/iris/random_forest/best_pipeline_20250623_021731.joblib new file mode 100644 index 0000000..fe3cd42 Binary files /dev/null and b/models_output/iris/random_forest/best_pipeline_20250623_021731.joblib differ diff --git a/models_output/iris/random_forest/feature_names.joblib b/models_output/iris/random_forest/feature_names.joblib new file mode 100644 index 0000000..8cd8382 Binary files /dev/null and b/models_output/iris/random_forest/feature_names.joblib differ diff --git a/models_output/iris/random_forest/feature_names_20250623_021731.joblib b/models_output/iris/random_forest/feature_names_20250623_021731.joblib new file mode 100644 index 0000000..8cd8382 Binary files /dev/null and b/models_output/iris/random_forest/feature_names_20250623_021731.joblib differ diff --git a/models_output/iris/random_forest/imputer_20250622_215007.joblib b/models_output/iris/random_forest/imputer_20250622_215007.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/random_forest/imputer_20250622_215007.joblib differ diff --git a/models_output/iris/random_forest/imputer_20250622_223717.joblib b/models_output/iris/random_forest/imputer_20250622_223717.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/random_forest/imputer_20250622_223717.joblib differ diff --git a/models_output/iris/random_forest/imputer_20250622_223907.joblib b/models_output/iris/random_forest/imputer_20250622_223907.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/random_forest/imputer_20250622_223907.joblib differ diff --git a/models_output/iris/random_forest/imputer_20250623_013737.joblib b/models_output/iris/random_forest/imputer_20250623_013737.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/random_forest/imputer_20250623_013737.joblib differ diff --git a/models_output/iris/random_forest/imputer_20250623_014438.joblib b/models_output/iris/random_forest/imputer_20250623_014438.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/random_forest/imputer_20250623_014438.joblib differ diff --git a/models_output/iris/random_forest/imputer_20250623_021731.joblib b/models_output/iris/random_forest/imputer_20250623_021731.joblib new file mode 100644 index 0000000..cdab68d Binary files /dev/null and b/models_output/iris/random_forest/imputer_20250623_021731.joblib differ diff --git a/models_output/iris/random_forest/metrics.json b/models_output/iris/random_forest/metrics.json new file mode 100644 index 0000000..e64178a --- /dev/null +++ b/models_output/iris/random_forest/metrics.json @@ -0,0 +1,58 @@ +{ + "accuracy": 0.9, + "classification_report": { + "setosa": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 10.0 + }, + "versicolor": { + "precision": 0.8181818181818182, + "recall": 0.9, + "f1-score": 0.8571428571428571, + "support": 10.0 + }, + "virginica": { + "precision": 0.8888888888888888, + "recall": 0.8, + "f1-score": 0.8421052631578947, + "support": 10.0 + }, + "accuracy": 0.9, + "macro avg": { + "precision": 0.9023569023569024, + "recall": 0.9, + "f1-score": 0.899749373433584, + "support": 30.0 + }, + "weighted avg": { + "precision": 0.9023569023569024, + "recall": 0.9, + "f1-score": 0.8997493734335839, + "support": 30.0 + } + }, + "confusion_matrix": [ + [ + 10, + 0, + 0 + ], + [ + 0, + 9, + 1 + ], + [ + 0, + 2, + 8 + ] + ], + "best_hyperparameters": { + "classifier__max_depth": 5, + "classifier__n_estimators": 50 + }, + "training_duration_sec": 0.6021177768707275 +} \ No newline at end of file diff --git a/models_output/iris/random_forest/scaler_20250622_215007.joblib b/models_output/iris/random_forest/scaler_20250622_215007.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/random_forest/scaler_20250622_215007.joblib differ diff --git a/models_output/iris/random_forest/scaler_20250622_223717.joblib b/models_output/iris/random_forest/scaler_20250622_223717.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/random_forest/scaler_20250622_223717.joblib differ diff --git a/models_output/iris/random_forest/scaler_20250622_223907.joblib b/models_output/iris/random_forest/scaler_20250622_223907.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/random_forest/scaler_20250622_223907.joblib differ diff --git a/models_output/iris/random_forest/scaler_20250623_013737.joblib b/models_output/iris/random_forest/scaler_20250623_013737.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/random_forest/scaler_20250623_013737.joblib differ diff --git a/models_output/iris/random_forest/scaler_20250623_014438.joblib b/models_output/iris/random_forest/scaler_20250623_014438.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/random_forest/scaler_20250623_014438.joblib differ diff --git a/models_output/iris/random_forest/scaler_20250623_021731.joblib b/models_output/iris/random_forest/scaler_20250623_021731.joblib new file mode 100644 index 0000000..dad2258 Binary files /dev/null and b/models_output/iris/random_forest/scaler_20250623_021731.joblib differ diff --git a/models_output/training_graphs/accuracy_by_dataset_model.png b/models_output/training_graphs/accuracy_by_dataset_model.png new file mode 100644 index 0000000..b867629 Binary files /dev/null and b/models_output/training_graphs/accuracy_by_dataset_model.png differ diff --git a/models_output/training_graphs/confusion_matrix_breast_cancer_decision_tree.png b/models_output/training_graphs/confusion_matrix_breast_cancer_decision_tree.png new file mode 100644 index 0000000..e2f7ebf Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_breast_cancer_decision_tree.png differ diff --git a/models_output/training_graphs/confusion_matrix_breast_cancer_logistic_regression.png b/models_output/training_graphs/confusion_matrix_breast_cancer_logistic_regression.png new file mode 100644 index 0000000..52ecdfa Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_breast_cancer_logistic_regression.png differ diff --git a/models_output/training_graphs/confusion_matrix_breast_cancer_random_forest.png b/models_output/training_graphs/confusion_matrix_breast_cancer_random_forest.png new file mode 100644 index 0000000..0502c44 Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_breast_cancer_random_forest.png differ diff --git a/models_output/training_graphs/confusion_matrix_iris_decision_tree.png b/models_output/training_graphs/confusion_matrix_iris_decision_tree.png new file mode 100644 index 0000000..2c9bd8f Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_iris_decision_tree.png differ diff --git a/models_output/training_graphs/confusion_matrix_iris_logistic_regression.png b/models_output/training_graphs/confusion_matrix_iris_logistic_regression.png new file mode 100644 index 0000000..65cfdbb Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_iris_logistic_regression.png differ diff --git a/models_output/training_graphs/confusion_matrix_iris_random_forest.png b/models_output/training_graphs/confusion_matrix_iris_random_forest.png new file mode 100644 index 0000000..e3dbecb Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_iris_random_forest.png differ diff --git a/models_output/training_graphs/confusion_matrix_wine_decision_tree.png b/models_output/training_graphs/confusion_matrix_wine_decision_tree.png new file mode 100644 index 0000000..55dd5c2 Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_wine_decision_tree.png differ diff --git a/models_output/training_graphs/confusion_matrix_wine_logistic_regression.png b/models_output/training_graphs/confusion_matrix_wine_logistic_regression.png new file mode 100644 index 0000000..f216359 Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_wine_logistic_regression.png differ diff --git a/models_output/training_graphs/confusion_matrix_wine_random_forest.png b/models_output/training_graphs/confusion_matrix_wine_random_forest.png new file mode 100644 index 0000000..e4fce33 Binary files /dev/null and b/models_output/training_graphs/confusion_matrix_wine_random_forest.png differ diff --git a/models_output/training_graphs/f1_score_by_dataset_model.png b/models_output/training_graphs/f1_score_by_dataset_model.png new file mode 100644 index 0000000..0d05a7b Binary files /dev/null and b/models_output/training_graphs/f1_score_by_dataset_model.png differ diff --git a/models_output/wine/decision_tree/best_pipeline.joblib b/models_output/wine/decision_tree/best_pipeline.joblib new file mode 100644 index 0000000..ea88092 Binary files /dev/null and b/models_output/wine/decision_tree/best_pipeline.joblib differ diff --git a/models_output/wine/decision_tree/best_pipeline_20250622_215006.joblib b/models_output/wine/decision_tree/best_pipeline_20250622_215006.joblib new file mode 100644 index 0000000..ea88092 Binary files /dev/null and b/models_output/wine/decision_tree/best_pipeline_20250622_215006.joblib differ diff --git a/models_output/wine/decision_tree/best_pipeline_20250622_223716.joblib b/models_output/wine/decision_tree/best_pipeline_20250622_223716.joblib new file mode 100644 index 0000000..ea88092 Binary files /dev/null and b/models_output/wine/decision_tree/best_pipeline_20250622_223716.joblib differ diff --git a/models_output/wine/decision_tree/best_pipeline_20250622_223907.joblib b/models_output/wine/decision_tree/best_pipeline_20250622_223907.joblib new file mode 100644 index 0000000..ea88092 Binary files /dev/null and b/models_output/wine/decision_tree/best_pipeline_20250622_223907.joblib differ diff --git a/models_output/wine/decision_tree/best_pipeline_20250623_013736.joblib b/models_output/wine/decision_tree/best_pipeline_20250623_013736.joblib new file mode 100644 index 0000000..ea88092 Binary files /dev/null and b/models_output/wine/decision_tree/best_pipeline_20250623_013736.joblib differ diff --git a/models_output/wine/decision_tree/best_pipeline_20250623_014435.joblib b/models_output/wine/decision_tree/best_pipeline_20250623_014435.joblib new file mode 100644 index 0000000..ea88092 Binary files /dev/null and b/models_output/wine/decision_tree/best_pipeline_20250623_014435.joblib differ diff --git a/models_output/wine/decision_tree/best_pipeline_20250623_021721.joblib b/models_output/wine/decision_tree/best_pipeline_20250623_021721.joblib new file mode 100644 index 0000000..ea88092 Binary files /dev/null and b/models_output/wine/decision_tree/best_pipeline_20250623_021721.joblib differ diff --git a/models_output/wine/decision_tree/feature_names.joblib b/models_output/wine/decision_tree/feature_names.joblib new file mode 100644 index 0000000..0a66a20 Binary files /dev/null and b/models_output/wine/decision_tree/feature_names.joblib differ diff --git a/models_output/wine/decision_tree/feature_names_20250623_021721.joblib b/models_output/wine/decision_tree/feature_names_20250623_021721.joblib new file mode 100644 index 0000000..0a66a20 Binary files /dev/null and b/models_output/wine/decision_tree/feature_names_20250623_021721.joblib differ diff --git a/models_output/wine/decision_tree/imputer_20250622_215006.joblib b/models_output/wine/decision_tree/imputer_20250622_215006.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/decision_tree/imputer_20250622_215006.joblib differ diff --git a/models_output/wine/decision_tree/imputer_20250622_223716.joblib b/models_output/wine/decision_tree/imputer_20250622_223716.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/decision_tree/imputer_20250622_223716.joblib differ diff --git a/models_output/wine/decision_tree/imputer_20250622_223907.joblib b/models_output/wine/decision_tree/imputer_20250622_223907.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/decision_tree/imputer_20250622_223907.joblib differ diff --git a/models_output/wine/decision_tree/imputer_20250623_013736.joblib b/models_output/wine/decision_tree/imputer_20250623_013736.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/decision_tree/imputer_20250623_013736.joblib differ diff --git a/models_output/wine/decision_tree/imputer_20250623_014435.joblib b/models_output/wine/decision_tree/imputer_20250623_014435.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/decision_tree/imputer_20250623_014435.joblib differ diff --git a/models_output/wine/decision_tree/imputer_20250623_021721.joblib b/models_output/wine/decision_tree/imputer_20250623_021721.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/decision_tree/imputer_20250623_021721.joblib differ diff --git a/models_output/wine/decision_tree/metrics.json b/models_output/wine/decision_tree/metrics.json new file mode 100644 index 0000000..4155351 --- /dev/null +++ b/models_output/wine/decision_tree/metrics.json @@ -0,0 +1,58 @@ +{ + "accuracy": 0.9444444444444444, + "classification_report": { + "class_0": { + "precision": 1.0, + "recall": 0.9166666666666666, + "f1-score": 0.9565217391304348, + "support": 12.0 + }, + "class_1": { + "precision": 0.875, + "recall": 1.0, + "f1-score": 0.9333333333333333, + "support": 14.0 + }, + "class_2": { + "precision": 1.0, + "recall": 0.9, + "f1-score": 0.9473684210526315, + "support": 10.0 + }, + "accuracy": 0.9444444444444444, + "macro avg": { + "precision": 0.9583333333333334, + "recall": 0.9388888888888888, + "f1-score": 0.9457411645054665, + "support": 36.0 + }, + "weighted avg": { + "precision": 0.9513888888888888, + "recall": 0.9444444444444444, + "f1-score": 0.9449614374099499, + "support": 36.0 + } + }, + "confusion_matrix": [ + [ + 11, + 1, + 0 + ], + [ + 0, + 14, + 0 + ], + [ + 0, + 1, + 9 + ] + ], + "best_hyperparameters": { + "classifier__max_depth": 3, + "classifier__min_samples_split": 2 + }, + "training_duration_sec": 0.17041397094726562 +} \ No newline at end of file diff --git a/models_output/wine/decision_tree/scaler_20250622_215006.joblib b/models_output/wine/decision_tree/scaler_20250622_215006.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/decision_tree/scaler_20250622_215006.joblib differ diff --git a/models_output/wine/decision_tree/scaler_20250622_223716.joblib b/models_output/wine/decision_tree/scaler_20250622_223716.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/decision_tree/scaler_20250622_223716.joblib differ diff --git a/models_output/wine/decision_tree/scaler_20250622_223907.joblib b/models_output/wine/decision_tree/scaler_20250622_223907.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/decision_tree/scaler_20250622_223907.joblib differ diff --git a/models_output/wine/decision_tree/scaler_20250623_013736.joblib b/models_output/wine/decision_tree/scaler_20250623_013736.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/decision_tree/scaler_20250623_013736.joblib differ diff --git a/models_output/wine/decision_tree/scaler_20250623_014435.joblib b/models_output/wine/decision_tree/scaler_20250623_014435.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/decision_tree/scaler_20250623_014435.joblib differ diff --git a/models_output/wine/decision_tree/scaler_20250623_021721.joblib b/models_output/wine/decision_tree/scaler_20250623_021721.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/decision_tree/scaler_20250623_021721.joblib differ diff --git a/models_output/wine/logistic_regression/best_pipeline.joblib b/models_output/wine/logistic_regression/best_pipeline.joblib new file mode 100644 index 0000000..afa9ae8 Binary files /dev/null and b/models_output/wine/logistic_regression/best_pipeline.joblib differ diff --git a/models_output/wine/logistic_regression/best_pipeline_20250622_215006.joblib b/models_output/wine/logistic_regression/best_pipeline_20250622_215006.joblib new file mode 100644 index 0000000..30b3d39 Binary files /dev/null and b/models_output/wine/logistic_regression/best_pipeline_20250622_215006.joblib differ diff --git a/models_output/wine/logistic_regression/best_pipeline_20250622_223716.joblib b/models_output/wine/logistic_regression/best_pipeline_20250622_223716.joblib new file mode 100644 index 0000000..30b3d39 Binary files /dev/null and b/models_output/wine/logistic_regression/best_pipeline_20250622_223716.joblib differ diff --git a/models_output/wine/logistic_regression/best_pipeline_20250622_223907.joblib b/models_output/wine/logistic_regression/best_pipeline_20250622_223907.joblib new file mode 100644 index 0000000..30b3d39 Binary files /dev/null and b/models_output/wine/logistic_regression/best_pipeline_20250622_223907.joblib differ diff --git a/models_output/wine/logistic_regression/best_pipeline_20250623_013736.joblib b/models_output/wine/logistic_regression/best_pipeline_20250623_013736.joblib new file mode 100644 index 0000000..30b3d39 Binary files /dev/null and b/models_output/wine/logistic_regression/best_pipeline_20250623_013736.joblib differ diff --git a/models_output/wine/logistic_regression/best_pipeline_20250623_014435.joblib b/models_output/wine/logistic_regression/best_pipeline_20250623_014435.joblib new file mode 100644 index 0000000..30b3d39 Binary files /dev/null and b/models_output/wine/logistic_regression/best_pipeline_20250623_014435.joblib differ diff --git a/models_output/wine/logistic_regression/best_pipeline_20250623_021721.joblib b/models_output/wine/logistic_regression/best_pipeline_20250623_021721.joblib new file mode 100644 index 0000000..af40c54 Binary files /dev/null and b/models_output/wine/logistic_regression/best_pipeline_20250623_021721.joblib differ diff --git a/models_output/wine/logistic_regression/feature_names.joblib b/models_output/wine/logistic_regression/feature_names.joblib new file mode 100644 index 0000000..0a66a20 Binary files /dev/null and b/models_output/wine/logistic_regression/feature_names.joblib differ diff --git a/models_output/wine/logistic_regression/feature_names_20250623_021721.joblib b/models_output/wine/logistic_regression/feature_names_20250623_021721.joblib new file mode 100644 index 0000000..0a66a20 Binary files /dev/null and b/models_output/wine/logistic_regression/feature_names_20250623_021721.joblib differ diff --git a/models_output/wine/logistic_regression/imputer_20250622_215006.joblib b/models_output/wine/logistic_regression/imputer_20250622_215006.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/logistic_regression/imputer_20250622_215006.joblib differ diff --git a/models_output/wine/logistic_regression/imputer_20250622_223716.joblib b/models_output/wine/logistic_regression/imputer_20250622_223716.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/logistic_regression/imputer_20250622_223716.joblib differ diff --git a/models_output/wine/logistic_regression/imputer_20250622_223907.joblib b/models_output/wine/logistic_regression/imputer_20250622_223907.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/logistic_regression/imputer_20250622_223907.joblib differ diff --git a/models_output/wine/logistic_regression/imputer_20250623_013736.joblib b/models_output/wine/logistic_regression/imputer_20250623_013736.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/logistic_regression/imputer_20250623_013736.joblib differ diff --git a/models_output/wine/logistic_regression/imputer_20250623_014435.joblib b/models_output/wine/logistic_regression/imputer_20250623_014435.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/logistic_regression/imputer_20250623_014435.joblib differ diff --git a/models_output/wine/logistic_regression/imputer_20250623_021721.joblib b/models_output/wine/logistic_regression/imputer_20250623_021721.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/logistic_regression/imputer_20250623_021721.joblib differ diff --git a/models_output/wine/logistic_regression/metrics.json b/models_output/wine/logistic_regression/metrics.json new file mode 100644 index 0000000..2979a5d --- /dev/null +++ b/models_output/wine/logistic_regression/metrics.json @@ -0,0 +1,57 @@ +{ + "accuracy": 1.0, + "classification_report": { + "class_0": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 12.0 + }, + "class_1": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 14.0 + }, + "class_2": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 10.0 + }, + "accuracy": 1.0, + "macro avg": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 36.0 + }, + "weighted avg": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 36.0 + } + }, + "confusion_matrix": [ + [ + 12, + 0, + 0 + ], + [ + 0, + 14, + 0 + ], + [ + 0, + 0, + 10 + ] + ], + "best_hyperparameters": { + "classifier__C": 1.0 + }, + "training_duration_sec": 0.06276130676269531 +} \ No newline at end of file diff --git a/models_output/wine/logistic_regression/scaler_20250622_215006.joblib b/models_output/wine/logistic_regression/scaler_20250622_215006.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/logistic_regression/scaler_20250622_215006.joblib differ diff --git a/models_output/wine/logistic_regression/scaler_20250622_223716.joblib b/models_output/wine/logistic_regression/scaler_20250622_223716.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/logistic_regression/scaler_20250622_223716.joblib differ diff --git a/models_output/wine/logistic_regression/scaler_20250622_223907.joblib b/models_output/wine/logistic_regression/scaler_20250622_223907.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/logistic_regression/scaler_20250622_223907.joblib differ diff --git a/models_output/wine/logistic_regression/scaler_20250623_013736.joblib b/models_output/wine/logistic_regression/scaler_20250623_013736.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/logistic_regression/scaler_20250623_013736.joblib differ diff --git a/models_output/wine/logistic_regression/scaler_20250623_014435.joblib b/models_output/wine/logistic_regression/scaler_20250623_014435.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/logistic_regression/scaler_20250623_014435.joblib differ diff --git a/models_output/wine/logistic_regression/scaler_20250623_021721.joblib b/models_output/wine/logistic_regression/scaler_20250623_021721.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/logistic_regression/scaler_20250623_021721.joblib differ diff --git a/models_output/wine/random_forest/best_pipeline.joblib b/models_output/wine/random_forest/best_pipeline.joblib new file mode 100644 index 0000000..2d5be4f Binary files /dev/null and b/models_output/wine/random_forest/best_pipeline.joblib differ diff --git a/models_output/wine/random_forest/best_pipeline_20250622_215007.joblib b/models_output/wine/random_forest/best_pipeline_20250622_215007.joblib new file mode 100644 index 0000000..e0b6466 Binary files /dev/null and b/models_output/wine/random_forest/best_pipeline_20250622_215007.joblib differ diff --git a/models_output/wine/random_forest/best_pipeline_20250622_223717.joblib b/models_output/wine/random_forest/best_pipeline_20250622_223717.joblib new file mode 100644 index 0000000..e0b6466 Binary files /dev/null and b/models_output/wine/random_forest/best_pipeline_20250622_223717.joblib differ diff --git a/models_output/wine/random_forest/best_pipeline_20250622_223907.joblib b/models_output/wine/random_forest/best_pipeline_20250622_223907.joblib new file mode 100644 index 0000000..e0b6466 Binary files /dev/null and b/models_output/wine/random_forest/best_pipeline_20250622_223907.joblib differ diff --git a/models_output/wine/random_forest/best_pipeline_20250623_013737.joblib b/models_output/wine/random_forest/best_pipeline_20250623_013737.joblib new file mode 100644 index 0000000..e0b6466 Binary files /dev/null and b/models_output/wine/random_forest/best_pipeline_20250623_013737.joblib differ diff --git a/models_output/wine/random_forest/best_pipeline_20250623_014438.joblib b/models_output/wine/random_forest/best_pipeline_20250623_014438.joblib new file mode 100644 index 0000000..e0b6466 Binary files /dev/null and b/models_output/wine/random_forest/best_pipeline_20250623_014438.joblib differ diff --git a/models_output/wine/random_forest/best_pipeline_20250623_021731.joblib b/models_output/wine/random_forest/best_pipeline_20250623_021731.joblib new file mode 100644 index 0000000..0eaee2a Binary files /dev/null and b/models_output/wine/random_forest/best_pipeline_20250623_021731.joblib differ diff --git a/models_output/wine/random_forest/feature_names.joblib b/models_output/wine/random_forest/feature_names.joblib new file mode 100644 index 0000000..0a66a20 Binary files /dev/null and b/models_output/wine/random_forest/feature_names.joblib differ diff --git a/models_output/wine/random_forest/feature_names_20250623_021731.joblib b/models_output/wine/random_forest/feature_names_20250623_021731.joblib new file mode 100644 index 0000000..0a66a20 Binary files /dev/null and b/models_output/wine/random_forest/feature_names_20250623_021731.joblib differ diff --git a/models_output/wine/random_forest/imputer_20250622_215007.joblib b/models_output/wine/random_forest/imputer_20250622_215007.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/random_forest/imputer_20250622_215007.joblib differ diff --git a/models_output/wine/random_forest/imputer_20250622_223717.joblib b/models_output/wine/random_forest/imputer_20250622_223717.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/random_forest/imputer_20250622_223717.joblib differ diff --git a/models_output/wine/random_forest/imputer_20250622_223907.joblib b/models_output/wine/random_forest/imputer_20250622_223907.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/random_forest/imputer_20250622_223907.joblib differ diff --git a/models_output/wine/random_forest/imputer_20250623_013737.joblib b/models_output/wine/random_forest/imputer_20250623_013737.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/random_forest/imputer_20250623_013737.joblib differ diff --git a/models_output/wine/random_forest/imputer_20250623_014438.joblib b/models_output/wine/random_forest/imputer_20250623_014438.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/random_forest/imputer_20250623_014438.joblib differ diff --git a/models_output/wine/random_forest/imputer_20250623_021731.joblib b/models_output/wine/random_forest/imputer_20250623_021731.joblib new file mode 100644 index 0000000..494d5a8 Binary files /dev/null and b/models_output/wine/random_forest/imputer_20250623_021731.joblib differ diff --git a/models_output/wine/random_forest/metrics.json b/models_output/wine/random_forest/metrics.json new file mode 100644 index 0000000..77c22e9 --- /dev/null +++ b/models_output/wine/random_forest/metrics.json @@ -0,0 +1,58 @@ +{ + "accuracy": 1.0, + "classification_report": { + "class_0": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 12.0 + }, + "class_1": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 14.0 + }, + "class_2": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 10.0 + }, + "accuracy": 1.0, + "macro avg": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 36.0 + }, + "weighted avg": { + "precision": 1.0, + "recall": 1.0, + "f1-score": 1.0, + "support": 36.0 + } + }, + "confusion_matrix": [ + [ + 12, + 0, + 0 + ], + [ + 0, + 14, + 0 + ], + [ + 0, + 0, + 10 + ] + ], + "best_hyperparameters": { + "classifier__max_depth": 5, + "classifier__n_estimators": 50 + }, + "training_duration_sec": 0.6352250576019287 +} \ No newline at end of file diff --git a/models_output/wine/random_forest/scaler_20250622_215007.joblib b/models_output/wine/random_forest/scaler_20250622_215007.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/random_forest/scaler_20250622_215007.joblib differ diff --git a/models_output/wine/random_forest/scaler_20250622_223717.joblib b/models_output/wine/random_forest/scaler_20250622_223717.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/random_forest/scaler_20250622_223717.joblib differ diff --git a/models_output/wine/random_forest/scaler_20250622_223907.joblib b/models_output/wine/random_forest/scaler_20250622_223907.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/random_forest/scaler_20250622_223907.joblib differ diff --git a/models_output/wine/random_forest/scaler_20250623_013737.joblib b/models_output/wine/random_forest/scaler_20250623_013737.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/random_forest/scaler_20250623_013737.joblib differ diff --git a/models_output/wine/random_forest/scaler_20250623_014438.joblib b/models_output/wine/random_forest/scaler_20250623_014438.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/random_forest/scaler_20250623_014438.joblib differ diff --git a/models_output/wine/random_forest/scaler_20250623_021731.joblib b/models_output/wine/random_forest/scaler_20250623_021731.joblib new file mode 100644 index 0000000..e7d1eb4 Binary files /dev/null and b/models_output/wine/random_forest/scaler_20250623_021731.joblib differ diff --git a/monitoring_utils.py b/monitoring_utils.py new file mode 100644 index 0000000..03f10f2 --- /dev/null +++ b/monitoring_utils.py @@ -0,0 +1,160 @@ +# monitoring_utils.py (VERSIÓN CORREGIDA Y SIMPLIFICADA) +import ray +import requests +import pandas as pd +from prometheus_client.parser import text_string_to_metric_families + +def get_cluster_nodes_status(): + """Consulta el estado de los nodos del clúster de Ray.""" + if not ray.is_initialized(): return None + + nodes = ray.nodes() + status_list = [] + for node in nodes: + resources = node.get("Resources", {}) + status_list.append({ + "Node ID": node.get("NodeID", "N/A")[:12], + "Estado": "VIVO" if node.get("Alive") else "MUERTO", + "IP": node.get("NodeManagerAddress", "N/A"), + "CPUs (Total)": resources.get("CPU", 0), + "Memoria (GB)": f"{node.get('memory_total_bytes', 0) / (1024**3):.2f}", + "Object Store (GB)": f"{node.get('object_store_memory_bytes', 0) / (1024**3):.2f}", + }) + return status_list + +def get_actor_status(actor_name="model_registry", namespace="mi_plataforma"): + """ + Verifica el estado de un actor nombrado de una manera más robusta, + sin usar ray.state. + """ + if not ray.is_initialized(): + return {"Estado": "Ray no inicializado", "Vivo": "❓"} + + try: + # El simple hecho de poder obtener el actor significa que está vivo. + # Si está muerto o no existe, ray.get_actor() lanzará un ValueError. + actor = ray.get_actor(actor_name, namespace=namespace) + + # Podemos hacer una llamada simple a un método (ping) para confirmar que responde. + # Añadamos un método 'ping' al actor para esto. + # Por ahora, asumimos que si get_actor tiene éxito, está vivo. + return { + "Nombre": actor_name, + "Estado": "ALIVE", + # No podemos obtener fácilmente el Node ID o los reinicios sin ray.state, + # así que simplificamos la salida. + "Reinicios": "N/A (API simplificada)", + "Vivo": "✅" + } + except ValueError: + # Esto ocurre si el actor no se encuentra (puede estar muerto o nunca se creó). + return {"Estado": "❌ No Encontrado / Muerto", "Vivo": "❌"} + +# La función get_inference_stats se mantiene igual +def get_inference_stats(client): + """ + Obtiene las estadísticas de inferencia desde el endpoint /metrics + utilizando un ResilientClient para alta disponibilidad. + """ + try: + # En lugar de requests.get(metrics_url), usamos nuestro cliente inteligente. + # El endpoint para las métricas de Prometheus es siempre '/metrics'. + response = client.make_request("GET", "/metrics", timeout=5) + + stats = { + "total_requests": 0, + "average_latency_ms": 0.0, + "details_by_model": [] + } + + model_data = {} + # El resto de la lógica para parsear las métricas es la misma. + for family in text_string_to_metric_families(response.text): + if family.name == "inference_latency_seconds_count": + for sample in family.samples: + model_key = tuple(sorted(sample.labels.items())) + if model_key not in model_data: + model_data[model_key] = {"count": 0, "sum": 0.0} + model_data[model_key]["count"] += sample.value + elif family.name == "inference_latency_seconds_sum": + for sample in family.samples: + model_key = tuple(sorted(sample.labels.items())) + if model_key not in model_data: + model_data[model_key] = {"count": 0, "sum": 0.0} + model_data[model_key]["sum"] += sample.value + + total_latency_sum = sum(data["sum"] for data in model_data.values()) + stats["total_requests"] = int(sum(data["count"] for data in model_data.values())) + + for model_key, data in model_data.items(): + labels_dict = dict(model_key) + model_name = f"{labels_dict.get('dataset', 'N/A')}/{labels_dict.get('model_type', 'N/A')}" + stats["details_by_model"].append({ + "Modelo": model_name, "Peticiones": int(data["count"]), + "Latencia Media (ms)": (data["sum"] / data["count"]) * 1000 if data["count"] > 0 else 0 + }) + if stats["total_requests"] > 0: + stats["average_latency_ms"] = (total_latency_sum / stats["total_requests"]) * 1000 + return stats + except Exception as e: + return {"error": f"No se pudo obtener las métricas de inferencia: {e}"} + +def get_aggregated_inference_stats(client): + """ + Obtiene métricas de TODAS las réplicas de un servicio y las agrega. + """ + # Usamos la lista de servidores del cliente resiliente + all_servers = client.servers + if not all_servers: + return {"error": "No se encontraron servidores para el servicio de inferencia."} + + aggregated_stats = { + "total_requests": 0, + "total_latency_sum": 0.0, + "details_by_model": {} # Usamos un dict para agregar por modelo + } + + # Iteramos sobre cada servidor descubierto + for server_url in all_servers: + try: + # Hacemos una petición directa a cada réplica + response = requests.get(f"{server_url}/metrics", timeout=2) + response.raise_for_status() + + # Parseamos la respuesta de esta réplica específica + for family in text_string_to_metric_families(response.text): + if family.name == "inference_latency_seconds_count": + for sample in family.samples: + # Creamos una clave única para cada modelo/dataset + model_key = f"{sample.labels.get('dataset', 'N/A')}/{sample.labels.get('model_type', 'N/A')}" + if model_key not in aggregated_stats["details_by_model"]: + aggregated_stats["details_by_model"][model_key] = {"count": 0, "sum": 0.0} + aggregated_stats["details_by_model"][model_key]["count"] += sample.value + elif family.name == "inference_latency_seconds_sum": + for sample in family.samples: + model_key = f"{sample.labels.get('dataset', 'N/A')}/{sample.labels.get('model_type', 'N/A')}" + if model_key not in aggregated_stats["details_by_model"]: + aggregated_stats["details_by_model"][model_key] = {"count": 0, "sum": 0.0} + aggregated_stats["details_by_model"][model_key]["sum"] += sample.value + + except requests.exceptions.RequestException as e: + print(f"No se pudo obtener métricas de la réplica {server_url}: {e}") + # Continuamos con la siguiente réplica si una falla + + # Ahora calculamos los totales a partir de los datos agregados + total_reqs = sum(data["count"] for data in aggregated_stats["details_by_model"].values()) + total_latency = sum(data["sum"] for data in aggregated_stats["details_by_model"].values()) + + final_stats = { + "total_requests": int(total_reqs), + "average_latency_ms": (total_latency / total_reqs) * 1000 if total_reqs > 0 else 0, + "details_by_model": [ + { + "Modelo": model_key, + "Peticiones": int(data["count"]), + "Latencia Media (ms)": (data["sum"] / data["count"]) * 1000 if data["count"] > 0 else 0 + } for model_key, data in aggregated_stats["details_by_model"].items() + ] + } + + return final_stats \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d52d906 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,83 @@ +aiohappyeyeballs==2.6.1 +aiohttp==3.12.13 +aiohttp-cors==0.8.1 +python-multipart +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +async-timeout==5.0.1 +attrs==25.3.0 +cachetools==5.5.2 +certifi==2025.6.15 +charset-normalizer==3.4.2 +click==8.2.1 +colorama==0.4.6 +colorful==0.5.6 +distlib==0.3.9 +exceptiongroup==1.3.0 +fastapi==0.115.13 +filelock==3.18.0 +frozenlist==1.7.0 +google-api-core==2.25.1 +google-auth==2.40.3 +googleapis-common-protos==1.70.0 +grpcio==1.73.0 +h11==0.16.0 +httptools==0.6.4 +idna==3.10 +importlib_metadata==8.7.0 +joblib==1.5.1 +jsonschema==4.24.0 +jsonschema-specifications==2025.4.1 +msgpack==1.1.1 +multidict==6.5.0 +numpy==2.2.6 +opencensus==0.11.4 +opencensus-context==0.1.3 +opentelemetry-api==1.34.1 +opentelemetry-exporter-prometheus==0.55b1 +opentelemetry-proto==1.34.1 +opentelemetry-sdk==1.34.1 +opentelemetry-semantic-conventions==0.55b1 +packaging==25.0 +pandas==2.3.0 +platformdirs==4.3.8 +prometheus_client==0.22.1 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==5.29.5 +py-spy==0.4.0 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pydantic==2.11.7 +pydantic_core==2.33.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.0 +pytz==2025.2 +PyYAML==6.0.2 +ray==2.47.1 +referencing==0.36.2 +requests==2.32.4 +rpds-py==0.25.1 +rsa==4.9.1 +scikit-learn==1.7.0 +scipy==1.15.3 +six==1.17.0 +smart-open==7.1.0 +sniffio==1.3.1 +starlette==0.46.2 +threadpoolctl==3.6.0 +typing-inspection==0.4.1 +typing_extensions==4.14.0 +tzdata==2025.2 +urllib3==2.5.0 +uvicorn==0.34.3 +virtualenv==20.31.2 +watchfiles==1.1.0 +websockets==15.0.1 +wrapt==1.17.2 +yarl==1.20.1 +zipp==3.23.0 +matplotlib +seaborn +streamlit diff --git a/train.py b/train.py new file mode 100644 index 0000000..4b8a6af --- /dev/null +++ b/train.py @@ -0,0 +1,205 @@ +# train.py + +import ray +import os +import pickle +import json +import traceback +import pandas as pd +import socket +import time +from typing import List + +from sklearn.ensemble import RandomForestClassifier +from sklearn.impute import SimpleImputer +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score, classification_report, confusion_matrix +from sklearn.model_selection import train_test_split, GridSearchCV +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.tree import DecisionTreeClassifier + +# --- Actor de Registro de Modelos (Guarda artefactos en memoria) --- +@ray.remote +class ModelRegistryActor: + def __init__(self): + # Almacena el diccionario completo de artefactos serializados (bytes) + self.registered_models = {} + print(f"[{self.__class__.__name__}] Actor de Registro de Modelos (en memoria) inicializado.") + + def register_model(self, dataset_name, model_type, result_dict): + """ + Registra un modelo bajo un dataset específico. + """ + + if dataset_name not in self.registered_models: + self.registered_models[dataset_name] = {} + + self.registered_models[dataset_name][model_type] = result_dict + print(f"Registro en memoria para {dataset_name}/{model_type} completado.") + return True + + def get_model_artifacts(self, dataset_name, model_type): + """ + Devuelve el diccionario de artefactos serializados desde la memoria. + """ + return self.registered_models.get(dataset_name, {}).get(model_type) + + def list_models_details(self): + details = {} + for dataset, models in self.registered_models.items(): + model_types = list(models.keys()) + if model_types: + details[dataset] = {"available_models": model_types} + return details + + def delete_dataset_models(self, dataset_name): + if dataset_name in self.registered_models: + del self.registered_models[dataset_name] + # Nota: Esto no borra el archivo del disco. Se podría añadir esa lógica si se desea. + return True + return False + +# --- Tarea de Entrenamiento Remota (Sin Cambios) --- +@ray.remote(num_cpus=1) +def train_and_serialize_model( + data_df_input, + target_column_name: str, + model_config: dict, + param_grid_config: dict, + dataset_id: str, + feature_names: List[str], + class_names_for_metrics: List[str] +): + hostname_str = socket.gethostname() + log_prefix = f"Worker [{hostname_str}]" + data_df = data_df_input + + model_type = model_config['type'] + print(f"{log_prefix} - Iniciando entrenamiento para {model_type} en {dataset_id}...") + + try: + X = data_df.drop(columns=[target_column_name]) + y = data_df[target_column_name] + + stratify_option = y if len(y.unique()) > 1 and all(y.value_counts() >= 3) else None + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42, stratify=stratify_option + ) + + preprocessor = Pipeline([ + ('imputer', SimpleImputer(strategy='mean')), + ('scaler', StandardScaler()) + ]) + + base_model_params = model_config.get('params', {}) + if model_type == "logistic_regression": + model_instance = LogisticRegression(**base_model_params) + elif model_type == "decision_tree": + model_instance = DecisionTreeClassifier(**base_model_params) + elif model_type == "random_forest": + model_instance = RandomForestClassifier(**base_model_params) + else: + raise ValueError(f"Tipo de modelo '{model_type}' no reconocido.") + + full_pipeline = Pipeline([ + ('preprocessor', preprocessor), + ('classifier', model_instance) + ]) + + param_grid = {f'classifier__{k}': v for k, v in param_grid_config.items()} + grid_search = GridSearchCV(estimator=full_pipeline, param_grid=param_grid, cv=3, n_jobs=1) + + start_fit_time = time.time() + grid_search.fit(X_train, y_train) + fit_duration = time.time() - start_fit_time + + best_pipeline_obj = grid_search.best_estimator_ + y_pred = best_pipeline_obj.predict(X_test) + + metrics = { + 'accuracy': accuracy_score(y_test, y_pred), + 'classification_report': classification_report(y_test, y_pred, target_names=class_names_for_metrics, output_dict=True, zero_division=0), + 'confusion_matrix': confusion_matrix(y_test, y_pred).tolist(), + 'best_hyperparameters': grid_search.best_params_, + 'training_duration_sec': fit_duration + } + + print(f"{log_prefix} - Entrenamiento para {model_type} en {dataset_id} completado. Accuracy: {metrics['accuracy']:.4f}") + + result_dict = { + "pipeline": pickle.dumps(best_pipeline_obj), + "feature_names": pickle.dumps(feature_names), + "metrics": json.dumps(metrics).encode('utf-8') + } + return result_dict + + except Exception as e: + print(f"{log_prefix} - ERROR en la tarea de entrenamiento para {dataset_id}/{model_config['type']}: {e}") + traceback.print_exc() + return None + +# --- Función Orquestadora Principal (Con Persistencia) --- +def run_complete_training_job(dataset_name: str, df: pd.DataFrame, target_column: str, models_to_train: List[str]): + print(f"ORQUESTADOR: Iniciando trabajo para '{dataset_name}'. Modelos: {models_to_train}") + + registry_actor = ray.get_actor("model_registry", namespace="mi_plataforma") + + feature_names = [col for col in df.columns if col != target_column] + class_names = [str(c) for c in sorted(df[target_column].unique())] + + all_model_configurations = { + 'logistic_regression': {'params': {'max_iter': 200, 'random_state': 42}, 'param_grid': {'C': [0.1, 1.0]}}, + 'decision_tree': {'params': {'random_state': 42}, 'param_grid': {'max_depth': [5], 'min_samples_split': [5]}}, + 'random_forest': {'params': {'n_jobs': 1, 'random_state': 42}, 'param_grid': {'n_estimators': [50], 'max_depth': [10]}} + } + + selected_model_configs = { + model_type: config for model_type, config in all_model_configurations.items() if model_type in models_to_train + } + + if not selected_model_configs: + return f"Error: Ninguno de los modelos solicitados {models_to_train} es válido." + + print(f"ORQUESTADOR: Lanzando {len(selected_model_configs)} tareas para '{dataset_name}'...") + + task_refs = [] + for model_type, model_config in selected_model_configs.items(): + task_ref = train_and_serialize_model.remote( + df, + target_column, + {'type': model_type, 'params': model_config['params']}, + model_config['param_grid'], + dataset_name, + feature_names, + class_names + ) + task_refs.append((model_type, task_ref)) + + print(f"ORQUESTADOR: Esperando la finalización de {len(task_refs)} tareas...") + + base_model_dir = "/app/persistent_models" + os.makedirs(base_model_dir, exist_ok=True) + + for model_type, ref in task_refs: + try: + result_dictionary = ray.get(ref) + if result_dictionary: + # 1. Registrar en el actor en memoria para acceso rápido + registry_actor.register_model.remote(dataset_name, model_type, result_dictionary) + + # 2. Guardar en disco para persistencia a largo plazo + model_file_path = os.path.join(base_model_dir, f"{dataset_name}_{model_type}.pkl") + with open(model_file_path, "wb") as f: + pickle.dump(result_dictionary, f) + + print(f"ORQUESTADOR: Modelo guardado en disco en: {model_file_path}") + else: + print(f"ORQUESTADOR: Tarea para {model_type} en {dataset_name} falló (devolvió None).") + except Exception as e: + print(f"ORQUESTADOR: Excepción al obtener resultado para {model_type}: {e}") + traceback.print_exc() + + final_message = f"Trabajo para '{dataset_name}' completado." + print(f"ORQUESTADOR: {final_message}") + return final_message \ No newline at end of file diff --git a/wait_for_ray_head.py b/wait_for_ray_head.py new file mode 100644 index 0000000..e69de29 diff --git a/wait_for_services.py b/wait_for_services.py new file mode 100644 index 0000000..3850021 --- /dev/null +++ b/wait_for_services.py @@ -0,0 +1,37 @@ +# wait_for_services.py +import socket +import time +import sys + +def wait_for_service(host, port, service_name): + """Intenta conectarse a un servicio host:port hasta que tenga éxito o se agote el tiempo.""" + print(f'WAIT_SCRIPT: Esperando a {service_name} en {host}:{port}...') + + # Intentar por ~90 segundos (45 intentos * 2s de espera + timeout) + for i in range(45): + try: + # Usar 'with' asegura que el socket se cierre automáticamente + with socket.create_connection((host, port), timeout=2): + print(f'WAIT_SCRIPT: ¡{service_name} está accesible!') + return True + except (socket.timeout, ConnectionRefusedError, socket.gaierror) as e: + # ConnectionRefusedError es común si el servidor aún no está escuchando + print(f'WAIT_SCRIPT: Intento {i+1}/45, {service_name} aún no está listo ({type(e).__name__})...') + time.sleep(2) + except Exception as e_generic: + print(f'WAIT_SCRIPT: Intento {i+1}/45: Error de socket inesperado ({type(e_generic).__name__}: {e_generic})...') + time.sleep(2) + + print(f'WAIT_SCRIPT ERROR: {service_name} no estuvo disponible después del tiempo de espera.') + return False + +if __name__ == "__main__": + # Verificar Management API + if not wait_for_service('management-api', 9000, 'Management API'): + sys.exit(1) # Salir con error si no se puede conectar + + # Verificar Inference API + if not wait_for_service('api-service', 8000, 'Inference API'): + sys.exit(1) + + print("WAIT_SCRIPT: Todas las APIs dependientes están listas.") \ No newline at end of file