Skip to content

Codescanning fix march8 #59

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
args = parser.parse_args()

# Define a safe root directory
SAFE_ROOT = os.path.abspath(os.getcwd())

# Normalize and validate the user-provided path
user_path = os.path.abspath(os.path.normpath(args.files))
if not user_path.startswith(SAFE_ROOT):
raise ValueError("The provided path is not allowed.")

# Use the current user identity to connect to Azure services unless a key is explicitly set for any of them
azd_credential = AzureDeveloperCliCredential() if args.tenantid == None else AzureDeveloperCliCredential(
tenant_id=args.tenantid)
Expand Down Expand Up @@ -360,7 +368,7 @@ def remove_from_index(filename):
create_search_index()

print(f"Processing files...")
for root, dirs, files in os.walk(args.files):
for root, dirs, files in os.walk(user_path):
for file in files:
filename = os.path.join(root, file)
if args.verbose: print(f"Processing '{filename}'")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,16 @@
from data.managers.permissions.manager import PermissionsManager
from typing import List, Set

import os

SAFE_ROOT = os.path.abspath("./entries")

def read_yaml(file_path, verbose) -> dict:
if verbose: print(f"Reading YAML file: {file_path}")
with open(file_path, "r") as f:
normalized_path = os.path.normpath(os.path.join(SAFE_ROOT, file_path))
if not normalized_path.startswith(SAFE_ROOT):
raise Exception(f"Invalid file path: {file_path}")
with open(normalized_path, "r") as f:
try:
return yaml.safe_load(f)
except yaml.YAMLError as exc:
Expand Down
21 changes: 6 additions & 15 deletions Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,9 @@
experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}"
parsed_config.experiment_id = experiment_id

base_path = os.path.join(os.path.dirname(__file__), "results")
save_path = os.path.normpath(os.path.join(base_path, experiment_id))
if not save_path.startswith(base_path):
raise Exception("Invalid experiment ID resulting in unsafe path.")
save_path = os.path.join(os.path.dirname(__file__), f"results/{experiment_id}")
os.makedirs(save_path, exist_ok=True)
with open(os.path.join(save_path, "config.json"), "w") as f:
with open(f"{save_path}/config.json", "w") as f:

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.

Copilot Autofix

AI 5 months ago

To fix the problem, we need to validate and sanitize the user-provided aml_dataset before using it to construct the file path. We can use os.path.normpath to normalize the path and ensure it does not contain any malicious segments. Additionally, we can check that the resulting path is within a safe root directory.

  1. Normalize the aml_dataset value using os.path.normpath.
  2. Ensure that the normalized path does not contain any segments that could lead to directory traversal.
  3. Construct the experiment_id and save_path using the sanitized aml_dataset.
Suggested changeset 1
Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py
--- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py
+++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py
@@ -174,3 +174,5 @@
     current_time = pd.Timestamp.now().strftime("%Y-%m-%d_%H%M%S")
-    aml_dataset = parsed_config.aml_dataset
+    aml_dataset = os.path.normpath(parsed_config.aml_dataset)
+    if os.path.isabs(aml_dataset) or ".." in aml_dataset.split(os.path.sep):
+        raise ValueError("Invalid aml_dataset value.")
     experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}"
EOF
@@ -174,3 +174,5 @@
current_time = pd.Timestamp.now().strftime("%Y-%m-%d_%H%M%S")
aml_dataset = parsed_config.aml_dataset
aml_dataset = os.path.normpath(parsed_config.aml_dataset)
if os.path.isabs(aml_dataset) or ".." in aml_dataset.split(os.path.sep):
raise ValueError("Invalid aml_dataset value.")
experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}"
Copilot is powered by AI and may make mistakes. Always verify output.
json.dump(vars(parsed_config), f, indent=4)

return parsed_config
Expand All @@ -203,10 +200,7 @@
default=None,
)
parsed_config = arg_parser.parse_args()
base_path = os.path.join(os.path.dirname(__file__), "results")
config_path = os.path.normpath(os.path.join(base_path, parsed_config.resume_run_id, "config.json"))
if not config_path.startswith(base_path):
raise Exception("Invalid resume run ID resulting in unsafe path.")
config_path = f"{os.path.dirname(__file__)}/results/{parsed_config.resume_run_id}/config.json"
parsed_config = json.load(open(config_path, "r"))

print(f"Resuming run with ID: {parsed_config['experiment_id']}")
Expand Down Expand Up @@ -719,14 +713,11 @@

combined_results = {
"config": config.__dict__,
"metrics": json.load(open(os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "run_metrics.json")))),
"answers": pd.read_csv(os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "run_details.csv"))).to_dict(),
"metrics": json.load(open(f"{current_dir}/results/{config.experiment_id}/run_metrics.json")),

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
"answers": pd.read_csv(f"{current_dir}/results/{config.experiment_id}/run_details.csv").to_dict(),
}
metrics_path = os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "combined_results.json"))
if not metrics_path.startswith(os.path.join(current_dir, "results")):
raise Exception("Invalid experiment ID resulting in unsafe path.")
json.dump(
combined_results, open(metrics_path, "w"), indent=4
combined_results, open(f"{current_dir}/results/{config.experiment_id}/combined_results.json", "w"), indent=4

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
)
return combined_results

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,13 @@ def get_arguments():
experiment_id = f"RAG-Eval_Dataset_eq_{dataset_name}_Start_eq_{current_time}"
parsed_config.experiment_id = experiment_id

save_path = os.path.join(os.path.dirname(__file__), f"results/{experiment_id}")
base_path = os.path.join(os.path.dirname(__file__), "results")
save_path = os.path.normpath(os.path.join(base_path, experiment_id))
if not save_path.startswith(base_path):
raise Exception("Invalid experiment ID resulting in unsafe path")
parsed_config.save_path = save_path
os.makedirs(save_path, exist_ok=True)
with open(f"{save_path}/config.json", "w") as f:
with open(os.path.join(save_path, "config.json"), "w") as f:
json.dump(vars(parsed_config), f, indent=4)

return parsed_config
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ build==1.2.2.post1
marshmallow==3.23.2
azure-keyvault-secrets==4.7.0
opencensus-ext-azure==1.1.9
opencensus==0.11.2
opencensus==0.11.2
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ async def health_check(request: web.Request):
@routes.get("/assets/{rest_of_path}")
async def assets(request: web.Request):
rest_of_path = request.match_info.get("rest_of_path", None)
return web.FileResponse(f"assets/{rest_of_path}")
base_path = os.path.join("assets")
full_path = os.path.normpath(os.path.join(base_path, rest_of_path))
if not full_path.startswith(base_path):
raise web.HTTPForbidden(reason="Invalid path")
return web.FileResponse(full_path)


# Serve content files from blob storage from within the app to keep the example self-contained.
Expand Down
6 changes: 5 additions & 1 deletion Solution_Accelerators/Retail/src/session_manager/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ async def health_check(request: web.Request):
@routes.get("/assets/{rest_of_path}")
async def assets(request: web.Request):
rest_of_path = request.match_info.get("rest_of_path", None)
return web.FileResponse(f"assets/{rest_of_path}")
base_path = "assets"
full_path = os.path.normpath(os.path.join(base_path, rest_of_path))
if not full_path.startswith(base_path):
raise web.HTTPForbidden(reason="Invalid path")
return web.FileResponse(full_path)


# Serve content files from blob storage from within the app to keep the example self-contained.
Expand Down