From 27c8592ea2d8096a3a53e16dd9cf889dc248aba7 Mon Sep 17 00:00:00 2001 From: Arpit Jain <3242828+arpitjain099@users.noreply.github.com> Date: Sat, 8 Mar 2025 22:53:17 +0900 Subject: [PATCH 1/8] Code scanning fixes Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- .../Advanced_RAG/src/session_manager/app.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py b/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py index e39fe1dc..7882f3c1 100644 --- a/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py +++ b/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py @@ -108,7 +108,11 @@ async def health_check(request: web.Request): @routes.get("/assets/{rest_of_path}") async def assets(request: web.Request): rest_of_path = request.match_info.get("rest_of_path", None) - return web.FileResponse(f"assets/{rest_of_path}") + base_path = os.path.join("assets") + full_path = os.path.normpath(os.path.join(base_path, rest_of_path)) + if not full_path.startswith(base_path): + raise web.HTTPForbidden(reason="Invalid path") + return web.FileResponse(full_path) # Serve content files from blob storage from within the app to keep the example self-contained. From f79838e73d949b8effd1652e7a4a12b9111a915c Mon Sep 17 00:00:00 2001 From: Arpit Jain <3242828+arpitjain099@users.noreply.github.com> Date: Sat, 8 Mar 2025 22:55:13 +0900 Subject: [PATCH 2/8] Code scanning fixes Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- .../AOAISearchDemo/scripts/prepopulate/prepopulate.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py b/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py index a2d95f7e..42757d04 100644 --- a/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py +++ b/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py @@ -9,9 +9,16 @@ from data.managers.permissions.manager import PermissionsManager from typing import List, Set +import os + +SAFE_ROOT = os.path.abspath("./entries") + def read_yaml(file_path, verbose) -> dict: if verbose: print(f"Reading YAML file: {file_path}") - with open(file_path, "r") as f: + normalized_path = os.path.normpath(os.path.join(SAFE_ROOT, file_path)) + if not normalized_path.startswith(SAFE_ROOT): + raise Exception(f"Invalid file path: {file_path}") + with open(normalized_path, "r") as f: try: return yaml.safe_load(f) except yaml.YAMLError as exc: From bca746e1ee394ba6c959d0e2c69ae7f53e2f5767 Mon Sep 17 00:00:00 2001 From: Arpit Jain <3242828+arpitjain099@users.noreply.github.com> Date: Sat, 8 Mar 2025 23:01:06 +0900 Subject: [PATCH 3/8] Code scanning fixes Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- .../Advanced_RAG/src/evals/rag_eval/__main__.py | 7 ++++--- .../Advanced_RAG/src/evals/rag_eval/requirements.txt | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py index 0b8c56f2..03240582 100644 --- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py +++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py @@ -9,6 +9,7 @@ import sys import time import uuid +from werkzeug.utils import secure_filename import pandas as pd import requests @@ -174,7 +175,7 @@ def get_arguments(): current_time = pd.Timestamp.now().strftime("%Y-%m-%d_%H%M%S") aml_dataset = parsed_config.aml_dataset experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}" - parsed_config.experiment_id = experiment_id + parsed_config.experiment_id = secure_filename(experiment_id) base_path = os.path.join(os.path.dirname(__file__), "results") save_path = os.path.normpath(os.path.join(base_path, experiment_id)) @@ -719,8 +720,8 @@ def evaluate(config: argparse.Namespace): combined_results = { "config": config.__dict__, - "metrics": json.load(open(os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "run_metrics.json")))), - "answers": pd.read_csv(os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "run_details.csv"))).to_dict(), + "metrics": json.load(open(os.path.join(current_dir, "results", config.experiment_id, "run_metrics.json"))), + "answers": pd.read_csv(os.path.join(current_dir, "results", config.experiment_id, "run_details.csv")).to_dict(), } metrics_path = os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "combined_results.json")) if not metrics_path.startswith(os.path.join(current_dir, "results")): diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt index b1be9e53..163b0ff3 100644 --- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt +++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt @@ -21,4 +21,5 @@ build==1.2.2.post1 marshmallow==3.23.2 azure-keyvault-secrets==4.7.0 opencensus-ext-azure==1.1.9 -opencensus==0.11.2 \ No newline at end of file +opencensus==0.11.2 +werkzeug==3.1.3 \ No newline at end of file From b10d9f7b2a6ddc2ff462821a29cd48f26242ddae Mon Sep 17 00:00:00 2001 From: Arpit Jain <3242828+arpitjain099@users.noreply.github.com> Date: Sat, 8 Mar 2025 23:01:19 +0900 Subject: [PATCH 4/8] Code scanning fixes Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- .../Advanced_RAG/src/evals/rag_eval/evaluation_params.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py index f092edae..041d75ae 100644 --- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py +++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py @@ -186,10 +186,13 @@ def get_arguments(): experiment_id = f"RAG-Eval_Dataset_eq_{dataset_name}_Start_eq_{current_time}" parsed_config.experiment_id = experiment_id - save_path = os.path.join(os.path.dirname(__file__), f"results/{experiment_id}") + base_path = os.path.join(os.path.dirname(__file__), "results") + save_path = os.path.normpath(os.path.join(base_path, experiment_id)) + if not save_path.startswith(base_path): + raise Exception("Invalid experiment ID resulting in unsafe path") parsed_config.save_path = save_path os.makedirs(save_path, exist_ok=True) - with open(f"{save_path}/config.json", "w") as f: + with open(os.path.join(save_path, "config.json"), "w") as f: json.dump(vars(parsed_config), f, indent=4) return parsed_config From 1c3f34f1e7c13c10898ec11d4d76b3260ae54809 Mon Sep 17 00:00:00 2001 From: Arpit Jain <3242828+arpitjain099@users.noreply.github.com> Date: Sat, 8 Mar 2025 23:01:58 +0900 Subject: [PATCH 5/8] Code scanning fixes Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- Solution_Accelerators/Retail/src/session_manager/app.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Solution_Accelerators/Retail/src/session_manager/app.py b/Solution_Accelerators/Retail/src/session_manager/app.py index bec8a6a4..0abd3d0a 100644 --- a/Solution_Accelerators/Retail/src/session_manager/app.py +++ b/Solution_Accelerators/Retail/src/session_manager/app.py @@ -108,7 +108,11 @@ async def health_check(request: web.Request): @routes.get("/assets/{rest_of_path}") async def assets(request: web.Request): rest_of_path = request.match_info.get("rest_of_path", None) - return web.FileResponse(f"assets/{rest_of_path}") + base_path = "assets" + full_path = os.path.normpath(os.path.join(base_path, rest_of_path)) + if not full_path.startswith(base_path): + raise web.HTTPForbidden(reason="Invalid path") + return web.FileResponse(full_path) # Serve content files from blob storage from within the app to keep the example self-contained. From eb7c85237d6cedc00c87faf8a80512d091b8408e Mon Sep 17 00:00:00 2001 From: Arpit Jain <3242828+arpitjain099@users.noreply.github.com> Date: Sat, 8 Mar 2025 23:09:16 +0900 Subject: [PATCH 6/8] Code scanning fixes Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- .../AOAISearchDemo/scripts/indexing/prepdocs.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py b/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py index 5c87789c..7993caf0 100644 --- a/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py +++ b/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py @@ -69,6 +69,14 @@ parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") args = parser.parse_args() +# Define a safe root directory +SAFE_ROOT = os.path.abspath(os.getcwd()) + +# Normalize and validate the user-provided path +user_path = os.path.abspath(os.path.normpath(args.files)) +if not user_path.startswith(SAFE_ROOT): + raise ValueError("The provided path is not allowed.") + # Use the current user identity to connect to Azure services unless a key is explicitly set for any of them azd_credential = AzureDeveloperCliCredential() if args.tenantid == None else AzureDeveloperCliCredential( tenant_id=args.tenantid) @@ -360,7 +368,7 @@ def remove_from_index(filename): create_search_index() print(f"Processing files...") - for root, dirs, files in os.walk(args.files): + for root, dirs, files in os.walk(user_path): for file in files: filename = os.path.join(root, file) if args.verbose: print(f"Processing '{filename}'") From 0e5798d43888f2fcefa1fdde16910f3c4c01e0c8 Mon Sep 17 00:00:00 2001 From: Arpit Jain <3242828+arpitjain099@users.noreply.github.com> Date: Sat, 8 Mar 2025 23:22:59 +0900 Subject: [PATCH 7/8] Revert back --- .../src/evals/rag_eval/__main__.py | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py index 03240582..67d2f8c4 100644 --- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py +++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py @@ -9,7 +9,6 @@ import sys import time import uuid -from werkzeug.utils import secure_filename import pandas as pd import requests @@ -175,14 +174,11 @@ def get_arguments(): current_time = pd.Timestamp.now().strftime("%Y-%m-%d_%H%M%S") aml_dataset = parsed_config.aml_dataset experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}" - parsed_config.experiment_id = secure_filename(experiment_id) + parsed_config.experiment_id = experiment_id - base_path = os.path.join(os.path.dirname(__file__), "results") - save_path = os.path.normpath(os.path.join(base_path, experiment_id)) - if not save_path.startswith(base_path): - raise Exception("Invalid experiment ID resulting in unsafe path.") + save_path = os.path.join(os.path.dirname(__file__), f"results/{experiment_id}") os.makedirs(save_path, exist_ok=True) - with open(os.path.join(save_path, "config.json"), "w") as f: + with open(f"{save_path}/config.json", "w") as f: json.dump(vars(parsed_config), f, indent=4) return parsed_config @@ -204,10 +200,7 @@ def load_previous_run_config(): default=None, ) parsed_config = arg_parser.parse_args() - base_path = os.path.join(os.path.dirname(__file__), "results") - config_path = os.path.normpath(os.path.join(base_path, parsed_config.resume_run_id, "config.json")) - if not config_path.startswith(base_path): - raise Exception("Invalid resume run ID resulting in unsafe path.") + config_path = f"{os.path.dirname(__file__)}/results/{parsed_config.resume_run_id}/config.json" parsed_config = json.load(open(config_path, "r")) print(f"Resuming run with ID: {parsed_config['experiment_id']}") @@ -720,14 +713,11 @@ def evaluate(config: argparse.Namespace): combined_results = { "config": config.__dict__, - "metrics": json.load(open(os.path.join(current_dir, "results", config.experiment_id, "run_metrics.json"))), - "answers": pd.read_csv(os.path.join(current_dir, "results", config.experiment_id, "run_details.csv")).to_dict(), + "metrics": json.load(open(f"{current_dir}/results/{config.experiment_id}/run_metrics.json")), + "answers": pd.read_csv(f"{current_dir}/results/{config.experiment_id}/run_details.csv").to_dict(), } - metrics_path = os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "combined_results.json")) - if not metrics_path.startswith(os.path.join(current_dir, "results")): - raise Exception("Invalid experiment ID resulting in unsafe path.") json.dump( - combined_results, open(metrics_path, "w"), indent=4 + combined_results, open(f"{current_dir}/results/{config.experiment_id}/combined_results.json", "w"), indent=4 ) return combined_results From a651fcbceebaac8b70537ea270fc1d1cb24e2d1a Mon Sep 17 00:00:00 2001 From: Arpit Jain <3242828+arpitjain099@users.noreply.github.com> Date: Sat, 8 Mar 2025 23:23:17 +0900 Subject: [PATCH 8/8] Update requirements.txt --- .../Advanced_RAG/src/evals/rag_eval/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt index 163b0ff3..01fa7a6c 100644 --- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt +++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt @@ -22,4 +22,3 @@ marshmallow==3.23.2 azure-keyvault-secrets==4.7.0 opencensus-ext-azure==1.1.9 opencensus==0.11.2 -werkzeug==3.1.3 \ No newline at end of file