Skip to content

Commit dec5b25

Browse files
author
Jet Xu
committed
Enhance diff hunk header
1 parent acb41bd commit dec5b25

File tree

4 files changed

+104
-8
lines changed

4 files changed

+104
-8
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.3.3] - 2025-08-24
9+
10+
### Optimized
11+
- Enhance diff hunk header
12+
813
## [0.3.2] - 2025-06-23
914

1015
### Optimized

llama_github/utils.py

Lines changed: 97 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,57 @@
1111
class DiffGenerator:
1212
"""
1313
A class for generating custom diffs between two pieces of content.
14+
It enhances the standard unified diff by adding function/class context to hunk headers,
15+
similar to `git diff`, in a fail-safe manner.
1416
"""
1517

18+
# A pre-compiled list of regex patterns to find function/class definitions.
19+
# This is the core mechanism that mimics Git's `xfuncname` feature.
20+
# It covers a wide range of common languages to provide broad, out-of-the-box support.
21+
_FUNC_CONTEXT_PATTERNS = [
22+
re.compile(r'^\s*(def|class)\s+.*', re.IGNORECASE), # Python
23+
re.compile(r'^\s*(public|private|protected|static|final|native|synchronized|abstract|transient|volatile|strictfp|async|function|class|interface|enum|@|implements|extends)'), # Java, JS, TS, PHP, C#
24+
re.compile(r'^\s*(func|fn|impl|trait|struct|enum|mod)\s+.*', re.IGNORECASE), # Go, Rust
25+
re.compile(r'^\s*(def|class|module)\s+.*', re.IGNORECASE), # Ruby
26+
re.compile(r'^\s*([a-zA-Z_][a-zA-Z0-9_]*\s+)*[a-zA-Z_][a-zA-Z0-9_]*\s*\(.*\)\s*\{'), # C, C++ style function definitions
27+
re.compile(r'^sub\s+.*'), # Perl
28+
]
29+
30+
@staticmethod
31+
def _find_context(line_index: int, lines: List[str]) -> str:
32+
"""
33+
Search upwards from a given line index to find the nearest function/class context.
34+
35+
Args:
36+
line_index (int): The 0-based index to start searching upwards from.
37+
lines (List[str]): The content of the file, as a list of lines.
38+
39+
Returns:
40+
str: The found context line, stripped of whitespace, or an empty string if not found.
41+
"""
42+
# Search from the target line upwards to the beginning of the file.
43+
for i in range(line_index, -1, -1):
44+
line = lines[i]
45+
# Check the line against all our predefined patterns.
46+
for pattern in DiffGenerator._FUNC_CONTEXT_PATTERNS:
47+
if pattern.search(line):
48+
return line.strip()
49+
return "" # Return empty string if no context is found.
50+
1651
@staticmethod
1752
def generate_custom_diff(base_content: str, head_content: str, context_lines: int) -> str:
1853
"""
19-
Generate a custom diff between two pieces of content with specified context lines.
54+
Generate a custom diff between two pieces of content with specified context lines,
55+
and automatically add function/class context to hunk headers, similar to `git diff`.
56+
This method is designed to be fail-safe; if context addition fails, it returns the standard diff.
2057
2158
Args:
2259
base_content (str): The original content.
2360
head_content (str): The new content to compare against the base.
2461
context_lines (int): The number of context lines to include in the diff.
2562
2663
Returns:
27-
str: A string representation of the unified diff.
64+
str: A string representation of the unified diff, preferably with hunk headers.
2865
2966
Raises:
3067
ValueError: If context_lines is negative.
@@ -40,15 +77,69 @@ def generate_custom_diff(base_content: str, head_content: str, context_lines: in
4077
# File is deleted
4178
return "".join(f"- {line}\n" for line in base_content.splitlines())
4279

80+
# Use empty strings for None content to ensure difflib handles them correctly
81+
# as file additions or deletions. This is more robust and aligns with difflib's expectations.
82+
base_content = base_content or ""
83+
head_content = head_content or ""
84+
4385
base_lines: List[str] = base_content.splitlines()
4486
head_lines: List[str] = head_content.splitlines()
4587

88+
# Generate the standard unified diff. This part is considered stable.
89+
diff: List[str] = list(difflib.unified_diff(
90+
base_lines,
91+
head_lines,
92+
n=context_lines,
93+
lineterm=''
94+
))
95+
96+
if not diff:
97+
return "" # No differences found, return early.
98+
99+
# --- Start of the fail-safe enhancement logic ---
100+
# This entire block attempts to add context to hunk headers.
101+
# If any exception occurs here, we catch it and return the original, un-enhanced diff.
102+
# This ensures the function is always reliable (Pareto improvement).
46103
try:
47-
diff: List[str] = list(difflib.unified_diff(base_lines, head_lines, n=context_lines, lineterm=''))
48-
return '\n'.join(diff)
104+
enhanced_diff = []
105+
# Regex to parse the original line number from a hunk header.
106+
# e.g., from "@@ -35,7 +35,7 @@" it captures "35".
107+
hunk_header_re = re.compile(r'^@@ -(\d+)(?:,\d+)? .*')
108+
109+
for line in diff:
110+
match = hunk_header_re.match(line)
111+
if match:
112+
# This is a hunk header line.
113+
# The line number from the regex is 1-based.
114+
start_line_num = int(match.group(1))
115+
116+
# The index is 0-based, so we subtract 1.
117+
# We search from the line where the change starts, or the line before it.
118+
context_line_index = max(0, start_line_num - 1)
119+
context = DiffGenerator._find_context(context_line_index, base_lines)
120+
121+
if context:
122+
# If context was found, append it to the hunk header.
123+
enhanced_diff.append(f"{line} {context}")
124+
else:
125+
# Otherwise, use the original hunk header.
126+
enhanced_diff.append(line)
127+
else:
128+
# This is not a hunk header, just a regular diff line (+, -, ' ').
129+
enhanced_diff.append(line)
130+
131+
# If the enhancement process completes successfully, return the result.
132+
return '\n'.join(enhanced_diff)
133+
49134
except Exception as e:
50-
logger.exception(f"Error generating diff: {str(e)}")
51-
return ""
135+
# If any error occurred during the enhancement, log a warning and fall back.
136+
logger.warning(
137+
f"Could not add hunk header context due to an unexpected error: {str(e)}. "
138+
"Falling back to standard diff output."
139+
)
140+
# --- Fallback mechanism ---
141+
# Return the original, unmodified diff generated by difflib.
142+
return '\n'.join(diff)
52143

53144

54145
class DataAnonymizer:

llama_github/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.3.2'
1+
__version__ = '0.3.3'

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = llama-github
3-
version = 0.3.2
3+
version = 0.3.3
44
author = Jet Xu
55
author_email = [email protected]
66
description = Llama-github is an open-source Python library that empowers LLM Chatbots, AI Agents, and Auto-dev Agents to conduct Retrieval from actively selected GitHub public projects. It Augments through LLMs and Generates context for any coding question, in order to streamline the development of sophisticated AI-driven applications.

0 commit comments

Comments
 (0)