Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions codebase_rag/graph_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,12 +387,62 @@ def _collect_eligible_files(self) -> list[Path]:
eligible.append(filepath)
return eligible

def _should_force_full_reindex(
self, force: bool, old_hashes: FileHashCache
) -> bool:
if force or not old_hashes:
return False

fetch_all = getattr(self.ingestor, "fetch_all", None)
if not callable(fetch_all):
return False

try:
results = fetch_all(
(
"MATCH (n) "
"WHERE toString(n.qualified_name) STARTS WITH $prefix "
"RETURN count(n) AS c"
),
{"prefix": f"{self.project_name}."},
)
except Exception as e:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Catching a broad Exception can mask underlying issues and make debugging harder. While fetch_all might be an external call, it's best practice to either catch more specific exceptions that are expected from the IngestorProtocol or add a comment explaining why a broad Exception is necessary here due to an unpredictable variety of errors.

Consider replacing Exception with a more specific exception type or a tuple of expected exceptions if possible. If not, please add a comment explaining the rationale for catching a generic Exception.

References
  1. When handling errors from external libraries that may raise multiple types of exceptions for configuration issues (e.g., ValueError for invalid formats and AssertionError for missing keys), catch a tuple of these specific exceptions, such as (ValueError, AssertionError), instead of a generic Exception.
  2. If catching a broad Exception is a pragmatic choice due to a wide and unpredictable variety of exceptions from external libraries, add a comment explaining the rationale to improve code maintainability.

logger.debug(
"Incremental reindex graph-state probe failed for {name}: {error}",
name=self.project_name,
error=e,
)
return False

if not results:
logger.info(
"No graph-state probe results for {name}; forcing full reindex",
name=self.project_name,
)
return True

symbol_count = results[0].get("c", 0)
if not isinstance(symbol_count, int):
return False

if symbol_count == 0:
logger.info(
"No existing graph symbols found for {name}; ignoring hash cache and forcing full reindex",
name=self.project_name,
)
return True
Comment on lines +409 to +433
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Inline log message strings not in logs.py

Three new log-message strings are hardcoded directly inside _should_force_full_reindex rather than defined in logs.py:

  • "Incremental reindex graph-state probe failed for {name}: {error}"
  • "No graph-state probe results for {name}; forcing full reindex"
  • "No existing graph symbols found for {name}; ignoring hash cache and forcing full reindex"

The project's coding standard says files that are not logs.py should have almost no string literals, and all log messages should live in logs.py. These three should be moved there and referenced via ls..

Rule Used: ## Technical Requirements

Agentic Framework

-... (source)

Prompt To Fix With AI
This is a comment left during a code review.
Path: codebase_rag/graph_updater.py
Line: 409-433

Comment:
**Inline log message strings not in `logs.py`**

Three new log-message strings are hardcoded directly inside `_should_force_full_reindex` rather than defined in `logs.py`:

- `"Incremental reindex graph-state probe failed for {name}: {error}"`
- `"No graph-state probe results for {name}; forcing full reindex"`
- `"No existing graph symbols found for {name}; ignoring hash cache and forcing full reindex"`

The project's coding standard says files that are not `logs.py` should have almost no string literals, and all log messages should live in `logs.py`. These three should be moved there and referenced via `ls.`.

**Rule Used:** ## Technical Requirements

### Agentic Framework
-... ([source](https://app.greptile.com/review/custom-context?memory=d4240b05-b763-467a-a6bf-94f73e8b6859))

How can I resolve this? If you propose a fix, please make it concise.


return False

def _process_files(self, force: bool = False) -> None:
cache_path = self.repo_path / cs.HASH_CACHE_FILENAME
old_hashes = _load_hash_cache(cache_path) if not force else {}
if force:
logger.info(ls.INCREMENTAL_FORCE)

if self._should_force_full_reindex(force, old_hashes):
old_hashes = {}

eligible_files = self._collect_eligible_files()
new_hashes: FileHashCache = {}
skipped_count = 0
Expand Down
70 changes: 41 additions & 29 deletions codebase_rag/parsers/call_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

_SEPARATOR_PATTERN = re.compile(r"[.:]|::")
_CHAINED_METHOD_PATTERN = re.compile(r"\.([^.()]+)$")
_JS_INSTANCE_PREFIXES = {cs.KEYWORD_SELF, "this"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Hardcoded "this" string literal

The project already has JAVA_KEYWORD_THIS = "this" in constants.py (line 1961). Using a bare "this" string here violates the "No Hardcoded Strings" rule. Either reuse the existing constant with a rename, or introduce a new KEYWORD_THIS constant.

Suggested change
_JS_INSTANCE_PREFIXES = {cs.KEYWORD_SELF, "this"}
_JS_INSTANCE_PREFIXES = {cs.KEYWORD_SELF, cs.JAVA_KEYWORD_THIS}

Rule Used: ## Technical Requirements

Agentic Framework

-... (source)

Prompt To Fix With AI
This is a comment left during a code review.
Path: codebase_rag/parsers/call_resolver.py
Line: 18

Comment:
**Hardcoded `"this"` string literal**

The project already has `JAVA_KEYWORD_THIS = "this"` in `constants.py` (line 1961). Using a bare `"this"` string here violates the "No Hardcoded Strings" rule. Either reuse the existing constant with a rename, or introduce a new `KEYWORD_THIS` constant.

```suggestion
_JS_INSTANCE_PREFIXES = {cs.KEYWORD_SELF, cs.JAVA_KEYWORD_THIS}
```

**Rule Used:** ## Technical Requirements

### Agentic Framework
-... ([source](https://app.greptile.com/review/custom-context?memory=d4240b05-b763-467a-a6bf-94f73e8b6859))

How can I resolve this? If you propose a fix, please make it concise.



class CallResolver:
Expand Down Expand Up @@ -52,6 +53,15 @@ def _try_resolve_method(
method_qn = f"{class_qn}{separator}{method_name}"
if method_qn in self.function_registry:
return self.function_registry[method_qn], method_qn

class_name = class_qn.split(cs.SEPARATOR_DOT)[-1]
suffix_matches = self.function_registry.find_ending_with(
f"{class_name}{separator}{method_name}"
)
if len(suffix_matches) == 1:
matched_qn = suffix_matches[0]
return self.function_registry[matched_qn], matched_qn
Comment on lines +57 to +63
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Suffix-match fallback can resolve to wrong namespace

The single-suffix-match fallback searches the entire function registry for any entry ending with ClassName.method_name. If there is exactly one such entry globally, it is returned — even if it belongs to a completely unrelated namespace.

Example: class_qn = "proj.a.MyClass" with method "doWork". If "proj.b.MyClass.doWork" exists (same short class name, different package) and is the only match, this fallback silently returns it instead of falling through to _resolve_inherited_method. This can produce the very kind of incorrect cross-module edge the PR aims to eliminate.

The safeguard should at minimum verify that matched_qn shares the same module prefix as class_qn before trusting the single match:

if len(suffix_matches) == 1:
    matched_qn = suffix_matches[0]
    expected_prefix = cs.SEPARATOR_DOT.join(class_qn.split(cs.SEPARATOR_DOT)[:-1])
    if matched_qn.startswith(expected_prefix):
        return self.function_registry[matched_qn], matched_qn
Prompt To Fix With AI
This is a comment left during a code review.
Path: codebase_rag/parsers/call_resolver.py
Line: 57-63

Comment:
**Suffix-match fallback can resolve to wrong namespace**

The single-suffix-match fallback searches the entire function registry for any entry ending with `ClassName.method_name`. If there is exactly one such entry globally, it is returned — even if it belongs to a completely unrelated namespace.

Example: `class_qn = "proj.a.MyClass"` with method `"doWork"`. If `"proj.b.MyClass.doWork"` exists (same short class name, different package) and is the only match, this fallback silently returns it instead of falling through to `_resolve_inherited_method`. This can produce the very kind of incorrect cross-module edge the PR aims to eliminate.

The safeguard should at minimum verify that `matched_qn` shares the same module prefix as `class_qn` before trusting the single match:

```python
if len(suffix_matches) == 1:
    matched_qn = suffix_matches[0]
    expected_prefix = cs.SEPARATOR_DOT.join(class_qn.split(cs.SEPARATOR_DOT)[:-1])
    if matched_qn.startswith(expected_prefix):
        return self.function_registry[matched_qn], matched_qn
```

How can I resolve this? If you propose a fix, please make it concise.


return self._resolve_inherited_method(class_qn, method_name)

def resolve_function_call(
Expand All @@ -71,14 +81,17 @@ def resolve_function_call(
return self._resolve_chained_call(call_name, module_qn, local_var_types)

if result := self._try_resolve_via_imports(
call_name, module_qn, local_var_types
call_name, module_qn, local_var_types, class_context
):
return result

if result := self._try_resolve_same_module(call_name, module_qn):
return result
if not self._has_separator(call_name):
if result := self._try_resolve_same_module(call_name, module_qn):
return result
return self._try_resolve_via_trie(call_name, module_qn)

return self._try_resolve_via_trie(call_name, module_qn)
logger.debug(ls.CALL_UNRESOLVED, call_name=call_name)
return None

def _try_resolve_iife(
self, call_name: str, module_qn: str
Expand Down Expand Up @@ -107,21 +120,21 @@ def _try_resolve_via_imports(
call_name: str,
module_qn: str,
local_var_types: dict[str, str] | None,
class_context: str | None = None,
) -> tuple[str, str] | None:
if module_qn not in self.import_processor.import_mapping:
return None

import_map = self.import_processor.import_mapping[module_qn]
import_map = self.import_processor.import_mapping.get(module_qn, {})

if result := self._try_resolve_direct_import(call_name, import_map):
return result

if result := self._try_resolve_qualified_call(
call_name, import_map, module_qn, local_var_types
call_name, import_map, module_qn, local_var_types, class_context
):
return result

return self._try_resolve_wildcard_imports(call_name, import_map)
if import_map:
return self._try_resolve_wildcard_imports(call_name, import_map)
return None

def _try_resolve_direct_import(
self, call_name: str, import_map: dict[str, str]
Expand All @@ -140,6 +153,7 @@ def _try_resolve_qualified_call(
import_map: dict[str, str],
module_qn: str,
local_var_types: dict[str, str] | None,
class_context: str | None = None,
) -> tuple[str, str] | None:
if not self._has_separator(call_name):
return None
Expand All @@ -149,11 +163,17 @@ def _try_resolve_qualified_call(

if len(parts) == 2:
if result := self._resolve_two_part_call(
parts, call_name, separator, import_map, module_qn, local_var_types
parts,
call_name,
separator,
import_map,
module_qn,
local_var_types,
class_context,
):
return result

if len(parts) >= 3 and parts[0] == cs.KEYWORD_SELF:
if len(parts) >= 3 and parts[0] in _JS_INSTANCE_PREFIXES:
return self._resolve_self_attribute_call(
parts, call_name, import_map, module_qn, local_var_types
)
Expand Down Expand Up @@ -235,9 +255,14 @@ def _resolve_two_part_call(
import_map: dict[str, str],
module_qn: str,
local_var_types: dict[str, str] | None,
class_context: str | None = None,
) -> tuple[str, str] | None:
object_name, method_name = parts

if object_name in _JS_INSTANCE_PREFIXES and class_context:
if result := self._try_resolve_method(class_context, method_name, separator):
return result

if result := self._try_resolve_via_local_type(
object_name,
method_name,
Expand All @@ -254,7 +279,7 @@ def _resolve_two_part_call(
):
return result

return self._try_resolve_module_method(method_name, call_name, module_qn)
return None

def _try_resolve_via_local_type(
self,
Expand Down Expand Up @@ -401,28 +426,15 @@ def _resolve_self_attribute_call(
if class_qn := self._resolve_class_qn_from_type(
var_type, import_map, module_qn
):
method_qn = f"{class_qn}.{method_name}"
if method_qn in self.function_registry:
if resolved_method := self._try_resolve_method(class_qn, method_name):
logger.debug(
ls.CALL_INSTANCE_ATTR,
call_name=call_name,
method_qn=method_qn,
method_qn=resolved_method[1],
attr_ref=attribute_ref,
var_type=var_type,
)
return self.function_registry[method_qn], method_qn

if inherited_method := self._resolve_inherited_method(
class_qn, method_name
):
logger.debug(
ls.CALL_INSTANCE_ATTR_INHERITED,
call_name=call_name,
method_qn=inherited_method[1],
attr_ref=attribute_ref,
var_type=var_type,
)
return inherited_method
return resolved_method

return None

Expand Down
139 changes: 136 additions & 3 deletions codebase_rag/parsers/js_ts/type_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,14 @@ def build_local_variable_type_map(
) -> dict[str, str]:
local_var_types: dict[str, str] = {}

stack: list[ASTNode] = [caller_node]
if class_node := self._find_enclosing_class_node(caller_node):
self._collect_constructor_injected_types(
class_node, module_qn, local_var_types
)

self._collect_parameter_types(caller_node, module_qn, local_var_types)

stack: list[ASTNode] = [caller_node]
declarator_count = 0

while stack:
Expand All @@ -59,7 +65,7 @@ def build_local_variable_type_map(
)

if var_type := self._infer_js_variable_type_from_value(
value_node, module_qn
value_node, module_qn, local_var_types
):
local_var_types[var_name] = var_type
logger.debug(
Expand All @@ -79,11 +85,138 @@ def build_local_variable_type_map(
)
return local_var_types

def _find_enclosing_class_node(self, node: ASTNode) -> ASTNode | None:
current = node
while current is not None:
if current.type == cs.TS_CLASS_DECLARATION:
return current
current = current.parent
return None

def _collect_constructor_injected_types(
self,
class_node: ASTNode,
module_qn: str,
local_var_types: dict[str, str],
) -> None:
body_node = class_node.child_by_field_name(cs.FIELD_BODY)
if body_node is None:
return

for child in body_node.children:
if child.type != cs.TS_METHOD_DEFINITION:
continue

name_node = child.child_by_field_name(cs.FIELD_NAME)
if (
name_node is None
or name_node.text is None
or safe_decode_text(name_node) != cs.KEYWORD_CONSTRUCTOR
):
continue

params_node = child.child_by_field_name(cs.TS_FIELD_PARAMETERS)
if params_node is None:
return

for param in params_node.children:
self._collect_constructor_parameter_type(
param, module_qn, local_var_types
)
return

def _collect_constructor_parameter_type(
self,
param_node: ASTNode,
module_qn: str,
local_var_types: dict[str, str],
) -> None:
if param_node.type not in {
"required_parameter",
"optional_parameter",
cs.TS_FORMAL_PARAMETER,
}:
return

has_accessibility_modifier = any(
child.type == "accessibility_modifier" for child in param_node.children
)
if not has_accessibility_modifier:
return
Comment on lines +134 to +145
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Hardcoded tree-sitter node-type string literals

"required_parameter", "optional_parameter", and "accessibility_modifier" appear as raw string literals in _collect_constructor_parameter_type (and also in _collect_parameter_types at lines 169-174). No corresponding constants exist in constants.py for these three node types. Per the project's "No Hardcoded Strings" rule, they should each be added as constants (e.g. TS_REQUIRED_PARAMETER, TS_OPTIONAL_PARAMETER, TS_ACCESSIBILITY_MODIFIER) and referenced via cs..

The same applies to "type_annotation" used in _extract_type_annotation_name (line 195).

All four new literals should be centralised in constants.py.

Rule Used: ## Technical Requirements

Agentic Framework

-... (source)

Prompt To Fix With AI
This is a comment left during a code review.
Path: codebase_rag/parsers/js_ts/type_inference.py
Line: 134-145

Comment:
**Hardcoded tree-sitter node-type string literals**

`"required_parameter"`, `"optional_parameter"`, and `"accessibility_modifier"` appear as raw string literals in `_collect_constructor_parameter_type` (and also in `_collect_parameter_types` at lines 169-174). No corresponding constants exist in `constants.py` for these three node types. Per the project's "No Hardcoded Strings" rule, they should each be added as constants (e.g. `TS_REQUIRED_PARAMETER`, `TS_OPTIONAL_PARAMETER`, `TS_ACCESSIBILITY_MODIFIER`) and referenced via `cs.`.

The same applies to `"type_annotation"` used in `_extract_type_annotation_name` (line 195).

All four new literals should be centralised in `constants.py`.

**Rule Used:** ## Technical Requirements

### Agentic Framework
-... ([source](https://app.greptile.com/review/custom-context?memory=d4240b05-b763-467a-a6bf-94f73e8b6859))

How can I resolve this? If you propose a fix, please make it concise.


param_name = self._extract_parameter_name(param_node)
if not param_name:
return

if not (param_type := self._extract_type_annotation_name(param_node)):
return

resolved_type = self._resolve_js_class_name(param_type, module_qn) or param_type
local_var_types[param_name] = resolved_type
local_var_types[f"this.{param_name}"] = resolved_type

def _collect_parameter_types(
self,
caller_node: ASTNode,
module_qn: str,
local_var_types: dict[str, str],
) -> None:
params_node = caller_node.child_by_field_name(cs.TS_FIELD_PARAMETERS)
if params_node is None:
return

for param in params_node.children:
if param.type not in {
"required_parameter",
"optional_parameter",
cs.TS_FORMAL_PARAMETER,
}:
continue

param_name = self._extract_parameter_name(param)
if not param_name or param_name in local_var_types:
continue

if not (param_type := self._extract_type_annotation_name(param)):
continue

resolved_type = self._resolve_js_class_name(param_type, module_qn) or param_type
local_var_types[param_name] = resolved_type

def _extract_parameter_name(self, param_node: ASTNode) -> str | None:
identifier_node = next(
(child for child in param_node.children if child.type == cs.TS_IDENTIFIER),
None,
)
return safe_decode_text(identifier_node) if identifier_node is not None else None

def _extract_type_annotation_name(self, node: ASTNode) -> str | None:
type_node = next(
(child for child in node.children if child.type == "type_annotation"),
None,
)
if type_node is None or type_node.text is None:
return None

type_text = safe_decode_text(type_node)
if not type_text:
return None

return type_text.lstrip(":").strip()

def _infer_js_variable_type_from_value(
self, value_node: ASTNode, module_qn: str
self,
value_node: ASTNode,
module_qn: str,
local_var_types: dict[str, str],
) -> str | None:
logger.debug(ls.JS_INFER_VALUE_NODE, node_type=value_node.type)

if value_node.type == cs.TS_MEMBER_EXPRESSION:
expr_text = safe_decode_text(value_node)
if expr_text and expr_text in local_var_types:
return local_var_types[expr_text]

if value_node.type == cs.TS_NEW_EXPRESSION:
if class_name := ut.extract_constructor_name(value_node):
class_qn = self._resolve_js_class_name(class_name, module_qn)
Expand Down
Loading