adjustments.py is now complete and has been integrated with analysis.py

NishilAmin1213 · NishilAmin1213 · commit cd1a7135fc4c · 2025-01-05T18:12:51.000Z
diff --git a/src/analysis/adjustments.py b/src/analysis/adjustments.py
@@ -1,7 +1,50 @@
 from src.infrastructure import file_utils
 
-def make_adjustments():
-    pass
+def _generate_array(existing_data, new_headers):
+    # Initialize an empty array for new data to go in
+    new_data = []
 
-# REMOVE THIS CALL
-make_adjustments()
+    for row in existing_data:
+        # for each existing row, create an array to represent the row from the dictionary
+        new_row = []
+        for header in new_headers:
+            new_row.append(row[header])
+
+        # append this row to the new_data array
+        new_data.append(new_row)
+
+    # return the new data array and prepend the headers to this array
+    return [new_headers] + new_data
+
+
+def make_adjustments(filename):
+    existing_data = file_utils.read_csv(filename)
+
+    for row in existing_data:
+        # Calculate the tdd percentage
+        try:
+            tdd_percentage = int(row['Test Before']) / (int(row['Test Before']) + int(row['Test After']))
+        except ZeroDivisionError:
+            tdd_percentage = 0
+
+        # Calculate the adjusted variables
+        adjusted_test_before = int(row['Test Before']) + (tdd_percentage * int(row['Test During']))
+        adjusted_test_after = int(row['Test After']) + ((1 - tdd_percentage) * int(row['Test During']))
+
+        # Create a new item in the dictionary to store the new data
+        row['Adjusted Test Before'] = int(adjusted_test_before)
+        row['Adjusted Test After'] = int(adjusted_test_after)
+
+    # Get the headers of the relevant file
+    headers = []
+    if filename == 'author_data':
+        headers = ["Author", "Test Before", "Test After", "Test During"]
+    elif filename == 'repo_data':
+        headers = ["Repo Name", "Language", "Commit Count", "Test Before", "Test After", "Test During", "Duration (s)",
+                "Avg Before Commit Size", "Avg After Commit Size", "Avg During Commit Size", "Avg Commit Size"]
+
+    # Generate the new data as a 2D array from the dictionary
+    new_data = _generate_array(existing_data, headers + ["Adjusted Test Before", "Adjusted Test After"])
+
+    # Write this data to a new csv
+    file_utils.write_csv(new_data, filename+'_adjusted')
diff --git a/src/analysis/analysis.py b/src/analysis/analysis.py
@@ -360,17 +360,13 @@ def _create_tdd_repo_categories_pie():
 
 
 def create_plots():
-    #make_adjustments()
+    make_adjustments('author_data')
+    make_adjustments('repo_data')
     _create_size_impact_scatter()
     _create_tdd_usage_box_plot()
     _create_avg_commit_size_bar_graph()
     _create_tdd_languages_bar_graph()
     _create_raw_tdd_percentage_pie()
     _create_overall_tdd_percentage_pie()
     _create_tdd_author_categories_pie()
-    _create_tdd_repo_categories_pie()
-
-'''
-todo - 
-write the adjustments/estimates code in python
-'''
+    _create_tdd_repo_categories_pie()