arpitjain099 · arpitjain099 · Mar 8, 2025 · Mar 8, 2025 · Mar 8, 2025 · Mar 8, 2025
diff --git a/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py b/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py
@@ -69,6 +69,14 @@
 parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
 args = parser.parse_args()
 
+# Define a safe root directory
+SAFE_ROOT = os.path.abspath(os.getcwd())
+
+# Normalize and validate the user-provided path
+user_path = os.path.abspath(os.path.normpath(args.files))
+if not user_path.startswith(SAFE_ROOT):
+    raise ValueError("The provided path is not allowed.")
+
 # Use the current user identity to connect to Azure services unless a key is explicitly set for any of them
 azd_credential = AzureDeveloperCliCredential() if args.tenantid == None else AzureDeveloperCliCredential(
     tenant_id=args.tenantid)
@@ -360,7 +368,7 @@ def remove_from_index(filename):
         create_search_index()
 
     print(f"Processing files...")
-    for root, dirs, files in os.walk(args.files):
+    for root, dirs, files in os.walk(user_path):
         for file in files:
             filename = os.path.join(root, file)
             if args.verbose: print(f"Processing '{filename}'")

diff --git a/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py b/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py
@@ -9,9 +9,16 @@
 from data.managers.permissions.manager import PermissionsManager
 from typing import List, Set
 
+import os
+
+SAFE_ROOT = os.path.abspath("./entries")
+
 def read_yaml(file_path, verbose) -> dict:
     if verbose: print(f"Reading YAML file: {file_path}")
-    with open(file_path, "r") as f:
+    normalized_path = os.path.normpath(os.path.join(SAFE_ROOT, file_path))
+    if not normalized_path.startswith(SAFE_ROOT):
+        raise Exception(f"Invalid file path: {file_path}")
+    with open(normalized_path, "r") as f:
         try:
             return yaml.safe_load(f)
         except yaml.YAMLError as exc:

diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py
@@ -176,12 +176,9 @@
     experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}"
     parsed_config.experiment_id = experiment_id
 
-    base_path = os.path.join(os.path.dirname(__file__), "results")
-    save_path = os.path.normpath(os.path.join(base_path, experiment_id))
-    if not save_path.startswith(base_path):
-        raise Exception("Invalid experiment ID resulting in unsafe path.")
+    save_path = os.path.join(os.path.dirname(__file__), f"results/{experiment_id}")
     os.makedirs(save_path, exist_ok=True)
-    with open(os.path.join(save_path, "config.json"), "w") as f:
+    with open(f"{save_path}/config.json", "w") as f:
@@ -174,3 +174,5 @@
    current_time = pd.Timestamp.now().strftime("%Y-%m-%d_%H%M%S")
-    aml_dataset = parsed_config.aml_dataset
+    aml_dataset = os.path.normpath(parsed_config.aml_dataset)
+    if os.path.isabs(aml_dataset) or ".." in aml_dataset.split(os.path.sep):
+        raise ValueError("Invalid aml_dataset value.")
    experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}"
@@ -174,3 +174,5 @@
    current_time = pd.Timestamp.now().strftime("%Y-%m-%d_%H%M%S")
-    aml_dataset = parsed_config.aml_dataset
+    aml_dataset = os.path.normpath(parsed_config.aml_dataset)
+    if os.path.isabs(aml_dataset) or ".." in aml_dataset.split(os.path.sep):
+        raise ValueError("Invalid aml_dataset value.")
    experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}"
         json.dump(vars(parsed_config), f, indent=4)
 
     return parsed_config
@@ -203,10 +200,7 @@
         default=None,
     )
     parsed_config = arg_parser.parse_args()
-    base_path = os.path.join(os.path.dirname(__file__), "results")
-    config_path = os.path.normpath(os.path.join(base_path, parsed_config.resume_run_id, "config.json"))
-    if not config_path.startswith(base_path):
-        raise Exception("Invalid resume run ID resulting in unsafe path.")
+    config_path = f"{os.path.dirname(__file__)}/results/{parsed_config.resume_run_id}/config.json"
     parsed_config = json.load(open(config_path, "r"))
 
     print(f"Resuming run with ID: {parsed_config['experiment_id']}")
@@ -719,14 +713,11 @@
 
     combined_results = {
         "config": config.__dict__,
-        "metrics": json.load(open(os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "run_metrics.json")))),
-        "answers": pd.read_csv(os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "run_details.csv"))).to_dict(),
+        "metrics": json.load(open(f"{current_dir}/results/{config.experiment_id}/run_metrics.json")),
+        "answers": pd.read_csv(f"{current_dir}/results/{config.experiment_id}/run_details.csv").to_dict(),
     }
-    metrics_path = os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "combined_results.json"))
-    if not metrics_path.startswith(os.path.join(current_dir, "results")):
-        raise Exception("Invalid experiment ID resulting in unsafe path.")
     json.dump(
-        combined_results, open(metrics_path, "w"), indent=4
+        combined_results, open(f"{current_dir}/results/{config.experiment_id}/combined_results.json", "w"), indent=4
     )
     return combined_results
 

diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py
@@ -186,10 +186,13 @@ def get_arguments():
     experiment_id = f"RAG-Eval_Dataset_eq_{dataset_name}_Start_eq_{current_time}"
     parsed_config.experiment_id = experiment_id
 
-    save_path = os.path.join(os.path.dirname(__file__), f"results/{experiment_id}")
+    base_path = os.path.join(os.path.dirname(__file__), "results")
+    save_path = os.path.normpath(os.path.join(base_path, experiment_id))
+    if not save_path.startswith(base_path):
+        raise Exception("Invalid experiment ID resulting in unsafe path")
     parsed_config.save_path = save_path
     os.makedirs(save_path, exist_ok=True)
-    with open(f"{save_path}/config.json", "w") as f:
+    with open(os.path.join(save_path, "config.json"), "w") as f:
         json.dump(vars(parsed_config), f, indent=4)
 
     return parsed_config
diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt
@@ -21,4 +21,4 @@ build==1.2.2.post1
 marshmallow==3.23.2
 azure-keyvault-secrets==4.7.0
 opencensus-ext-azure==1.1.9
-opencensus==0.11.2
+opencensus==0.11.2
diff --git a/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py b/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py
@@ -108,7 +108,11 @@ async def health_check(request: web.Request):
 @routes.get("/assets/{rest_of_path}")
 async def assets(request: web.Request):
     rest_of_path = request.match_info.get("rest_of_path", None)
-    return web.FileResponse(f"assets/{rest_of_path}")
+    base_path = os.path.join("assets")
+    full_path = os.path.normpath(os.path.join(base_path, rest_of_path))
+    if not full_path.startswith(base_path):
+        raise web.HTTPForbidden(reason="Invalid path")
+    return web.FileResponse(full_path)
 
 
 # Serve content files from blob storage from within the app to keep the example self-contained.

diff --git a/Solution_Accelerators/Retail/src/session_manager/app.py b/Solution_Accelerators/Retail/src/session_manager/app.py
@@ -108,7 +108,11 @@ async def health_check(request: web.Request):
 @routes.get("/assets/{rest_of_path}")
 async def assets(request: web.Request):
     rest_of_path = request.match_info.get("rest_of_path", None)
-    return web.FileResponse(f"assets/{rest_of_path}")
+    base_path = "assets"
+    full_path = os.path.normpath(os.path.join(base_path, rest_of_path))
+    if not full_path.startswith(base_path):
+        raise web.HTTPForbidden(reason="Invalid path")
+    return web.FileResponse(full_path)
 
 
 # Serve content files from blob storage from within the app to keep the example self-contained.