11
11
class DiffGenerator :
12
12
"""
13
13
A class for generating custom diffs between two pieces of content.
14
+ It enhances the standard unified diff by adding function/class context to hunk headers,
15
+ similar to `git diff`, in a fail-safe manner.
14
16
"""
15
17
18
+ # A pre-compiled list of regex patterns to find function/class definitions.
19
+ # This is the core mechanism that mimics Git's `xfuncname` feature.
20
+ # It covers a wide range of common languages to provide broad, out-of-the-box support.
21
+ _FUNC_CONTEXT_PATTERNS = [
22
+ re .compile (r'^\s*(def|class)\s+.*' , re .IGNORECASE ), # Python
23
+ re .compile (r'^\s*(public|private|protected|static|final|native|synchronized|abstract|transient|volatile|strictfp|async|function|class|interface|enum|@|implements|extends)' ), # Java, JS, TS, PHP, C#
24
+ re .compile (r'^\s*(func|fn|impl|trait|struct|enum|mod)\s+.*' , re .IGNORECASE ), # Go, Rust
25
+ re .compile (r'^\s*(def|class|module)\s+.*' , re .IGNORECASE ), # Ruby
26
+ re .compile (r'^\s*([a-zA-Z_][a-zA-Z0-9_]*\s+)*[a-zA-Z_][a-zA-Z0-9_]*\s*\(.*\)\s*\{' ), # C, C++ style function definitions
27
+ re .compile (r'^sub\s+.*' ), # Perl
28
+ ]
29
+
30
+ @staticmethod
31
+ def _find_context (line_index : int , lines : List [str ]) -> str :
32
+ """
33
+ Search upwards from a given line index to find the nearest function/class context.
34
+
35
+ Args:
36
+ line_index (int): The 0-based index to start searching upwards from.
37
+ lines (List[str]): The content of the file, as a list of lines.
38
+
39
+ Returns:
40
+ str: The found context line, stripped of whitespace, or an empty string if not found.
41
+ """
42
+ # Search from the target line upwards to the beginning of the file.
43
+ for i in range (line_index , - 1 , - 1 ):
44
+ line = lines [i ]
45
+ # Check the line against all our predefined patterns.
46
+ for pattern in DiffGenerator ._FUNC_CONTEXT_PATTERNS :
47
+ if pattern .search (line ):
48
+ return line .strip ()
49
+ return "" # Return empty string if no context is found.
50
+
16
51
@staticmethod
17
52
def generate_custom_diff (base_content : str , head_content : str , context_lines : int ) -> str :
18
53
"""
19
- Generate a custom diff between two pieces of content with specified context lines.
54
+ Generate a custom diff between two pieces of content with specified context lines,
55
+ and automatically add function/class context to hunk headers, similar to `git diff`.
56
+ This method is designed to be fail-safe; if context addition fails, it returns the standard diff.
20
57
21
58
Args:
22
59
base_content (str): The original content.
23
60
head_content (str): The new content to compare against the base.
24
61
context_lines (int): The number of context lines to include in the diff.
25
62
26
63
Returns:
27
- str: A string representation of the unified diff.
64
+ str: A string representation of the unified diff, preferably with hunk headers .
28
65
29
66
Raises:
30
67
ValueError: If context_lines is negative.
@@ -40,15 +77,69 @@ def generate_custom_diff(base_content: str, head_content: str, context_lines: in
40
77
# File is deleted
41
78
return "" .join (f"- { line } \n " for line in base_content .splitlines ())
42
79
80
+ # Use empty strings for None content to ensure difflib handles them correctly
81
+ # as file additions or deletions. This is more robust and aligns with difflib's expectations.
82
+ base_content = base_content or ""
83
+ head_content = head_content or ""
84
+
43
85
base_lines : List [str ] = base_content .splitlines ()
44
86
head_lines : List [str ] = head_content .splitlines ()
45
87
88
+ # Generate the standard unified diff. This part is considered stable.
89
+ diff : List [str ] = list (difflib .unified_diff (
90
+ base_lines ,
91
+ head_lines ,
92
+ n = context_lines ,
93
+ lineterm = ''
94
+ ))
95
+
96
+ if not diff :
97
+ return "" # No differences found, return early.
98
+
99
+ # --- Start of the fail-safe enhancement logic ---
100
+ # This entire block attempts to add context to hunk headers.
101
+ # If any exception occurs here, we catch it and return the original, un-enhanced diff.
102
+ # This ensures the function is always reliable (Pareto improvement).
46
103
try :
47
- diff : List [str ] = list (difflib .unified_diff (base_lines , head_lines , n = context_lines , lineterm = '' ))
48
- return '\n ' .join (diff )
104
+ enhanced_diff = []
105
+ # Regex to parse the original line number from a hunk header.
106
+ # e.g., from "@@ -35,7 +35,7 @@" it captures "35".
107
+ hunk_header_re = re .compile (r'^@@ -(\d+)(?:,\d+)? .*' )
108
+
109
+ for line in diff :
110
+ match = hunk_header_re .match (line )
111
+ if match :
112
+ # This is a hunk header line.
113
+ # The line number from the regex is 1-based.
114
+ start_line_num = int (match .group (1 ))
115
+
116
+ # The index is 0-based, so we subtract 1.
117
+ # We search from the line where the change starts, or the line before it.
118
+ context_line_index = max (0 , start_line_num - 1 )
119
+ context = DiffGenerator ._find_context (context_line_index , base_lines )
120
+
121
+ if context :
122
+ # If context was found, append it to the hunk header.
123
+ enhanced_diff .append (f"{ line } { context } " )
124
+ else :
125
+ # Otherwise, use the original hunk header.
126
+ enhanced_diff .append (line )
127
+ else :
128
+ # This is not a hunk header, just a regular diff line (+, -, ' ').
129
+ enhanced_diff .append (line )
130
+
131
+ # If the enhancement process completes successfully, return the result.
132
+ return '\n ' .join (enhanced_diff )
133
+
49
134
except Exception as e :
50
- logger .exception (f"Error generating diff: { str (e )} " )
51
- return ""
135
+ # If any error occurred during the enhancement, log a warning and fall back.
136
+ logger .warning (
137
+ f"Could not add hunk header context due to an unexpected error: { str (e )} . "
138
+ "Falling back to standard diff output."
139
+ )
140
+ # --- Fallback mechanism ---
141
+ # Return the original, unmodified diff generated by difflib.
142
+ return '\n ' .join (diff )
52
143
53
144
54
145
class DataAnonymizer :
0 commit comments