-
-
Notifications
You must be signed in to change notification settings - Fork 371
Fix JS/TS service call resolution and empty-graph reindex #455
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -387,12 +387,62 @@ def _collect_eligible_files(self) -> list[Path]: | |
| eligible.append(filepath) | ||
| return eligible | ||
|
|
||
| def _should_force_full_reindex( | ||
| self, force: bool, old_hashes: FileHashCache | ||
| ) -> bool: | ||
| if force or not old_hashes: | ||
| return False | ||
|
|
||
| fetch_all = getattr(self.ingestor, "fetch_all", None) | ||
| if not callable(fetch_all): | ||
| return False | ||
|
|
||
| try: | ||
| results = fetch_all( | ||
| ( | ||
| "MATCH (n) " | ||
| "WHERE toString(n.qualified_name) STARTS WITH $prefix " | ||
| "RETURN count(n) AS c" | ||
| ), | ||
| {"prefix": f"{self.project_name}."}, | ||
| ) | ||
| except Exception as e: | ||
| logger.debug( | ||
| "Incremental reindex graph-state probe failed for {name}: {error}", | ||
| name=self.project_name, | ||
| error=e, | ||
| ) | ||
| return False | ||
|
|
||
| if not results: | ||
| logger.info( | ||
| "No graph-state probe results for {name}; forcing full reindex", | ||
| name=self.project_name, | ||
| ) | ||
| return True | ||
|
|
||
| symbol_count = results[0].get("c", 0) | ||
| if not isinstance(symbol_count, int): | ||
| return False | ||
|
|
||
| if symbol_count == 0: | ||
| logger.info( | ||
| "No existing graph symbols found for {name}; ignoring hash cache and forcing full reindex", | ||
| name=self.project_name, | ||
| ) | ||
| return True | ||
|
Comment on lines
+409
to
+433
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Three new log-message strings are hardcoded directly inside
The project's coding standard says files that are not Rule Used: ## Technical Requirements Agentic Framework-... (source) Prompt To Fix With AIThis is a comment left during a code review.
Path: codebase_rag/graph_updater.py
Line: 409-433
Comment:
**Inline log message strings not in `logs.py`**
Three new log-message strings are hardcoded directly inside `_should_force_full_reindex` rather than defined in `logs.py`:
- `"Incremental reindex graph-state probe failed for {name}: {error}"`
- `"No graph-state probe results for {name}; forcing full reindex"`
- `"No existing graph symbols found for {name}; ignoring hash cache and forcing full reindex"`
The project's coding standard says files that are not `logs.py` should have almost no string literals, and all log messages should live in `logs.py`. These three should be moved there and referenced via `ls.`.
**Rule Used:** ## Technical Requirements
### Agentic Framework
-... ([source](https://app.greptile.com/review/custom-context?memory=d4240b05-b763-467a-a6bf-94f73e8b6859))
How can I resolve this? If you propose a fix, please make it concise. |
||
|
|
||
| return False | ||
|
|
||
| def _process_files(self, force: bool = False) -> None: | ||
| cache_path = self.repo_path / cs.HASH_CACHE_FILENAME | ||
| old_hashes = _load_hash_cache(cache_path) if not force else {} | ||
| if force: | ||
| logger.info(ls.INCREMENTAL_FORCE) | ||
|
|
||
| if self._should_force_full_reindex(force, old_hashes): | ||
| old_hashes = {} | ||
|
|
||
| eligible_files = self._collect_eligible_files() | ||
| new_hashes: FileHashCache = {} | ||
| skipped_count = 0 | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |||||
|
|
||||||
| _SEPARATOR_PATTERN = re.compile(r"[.:]|::") | ||||||
| _CHAINED_METHOD_PATTERN = re.compile(r"\.([^.()]+)$") | ||||||
| _JS_INSTANCE_PREFIXES = {cs.KEYWORD_SELF, "this"} | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The project already has
Suggested change
Rule Used: ## Technical Requirements Agentic Framework-... (source) Prompt To Fix With AIThis is a comment left during a code review.
Path: codebase_rag/parsers/call_resolver.py
Line: 18
Comment:
**Hardcoded `"this"` string literal**
The project already has `JAVA_KEYWORD_THIS = "this"` in `constants.py` (line 1961). Using a bare `"this"` string here violates the "No Hardcoded Strings" rule. Either reuse the existing constant with a rename, or introduce a new `KEYWORD_THIS` constant.
```suggestion
_JS_INSTANCE_PREFIXES = {cs.KEYWORD_SELF, cs.JAVA_KEYWORD_THIS}
```
**Rule Used:** ## Technical Requirements
### Agentic Framework
-... ([source](https://app.greptile.com/review/custom-context?memory=d4240b05-b763-467a-a6bf-94f73e8b6859))
How can I resolve this? If you propose a fix, please make it concise. |
||||||
|
|
||||||
|
|
||||||
| class CallResolver: | ||||||
|
|
@@ -52,6 +53,15 @@ def _try_resolve_method( | |||||
| method_qn = f"{class_qn}{separator}{method_name}" | ||||||
| if method_qn in self.function_registry: | ||||||
| return self.function_registry[method_qn], method_qn | ||||||
|
|
||||||
| class_name = class_qn.split(cs.SEPARATOR_DOT)[-1] | ||||||
| suffix_matches = self.function_registry.find_ending_with( | ||||||
| f"{class_name}{separator}{method_name}" | ||||||
| ) | ||||||
| if len(suffix_matches) == 1: | ||||||
| matched_qn = suffix_matches[0] | ||||||
| return self.function_registry[matched_qn], matched_qn | ||||||
|
Comment on lines
+57
to
+63
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The single-suffix-match fallback searches the entire function registry for any entry ending with Example: The safeguard should at minimum verify that if len(suffix_matches) == 1:
matched_qn = suffix_matches[0]
expected_prefix = cs.SEPARATOR_DOT.join(class_qn.split(cs.SEPARATOR_DOT)[:-1])
if matched_qn.startswith(expected_prefix):
return self.function_registry[matched_qn], matched_qnPrompt To Fix With AIThis is a comment left during a code review.
Path: codebase_rag/parsers/call_resolver.py
Line: 57-63
Comment:
**Suffix-match fallback can resolve to wrong namespace**
The single-suffix-match fallback searches the entire function registry for any entry ending with `ClassName.method_name`. If there is exactly one such entry globally, it is returned — even if it belongs to a completely unrelated namespace.
Example: `class_qn = "proj.a.MyClass"` with method `"doWork"`. If `"proj.b.MyClass.doWork"` exists (same short class name, different package) and is the only match, this fallback silently returns it instead of falling through to `_resolve_inherited_method`. This can produce the very kind of incorrect cross-module edge the PR aims to eliminate.
The safeguard should at minimum verify that `matched_qn` shares the same module prefix as `class_qn` before trusting the single match:
```python
if len(suffix_matches) == 1:
matched_qn = suffix_matches[0]
expected_prefix = cs.SEPARATOR_DOT.join(class_qn.split(cs.SEPARATOR_DOT)[:-1])
if matched_qn.startswith(expected_prefix):
return self.function_registry[matched_qn], matched_qn
```
How can I resolve this? If you propose a fix, please make it concise. |
||||||
|
|
||||||
| return self._resolve_inherited_method(class_qn, method_name) | ||||||
|
|
||||||
| def resolve_function_call( | ||||||
|
|
@@ -71,14 +81,17 @@ def resolve_function_call( | |||||
| return self._resolve_chained_call(call_name, module_qn, local_var_types) | ||||||
|
|
||||||
| if result := self._try_resolve_via_imports( | ||||||
| call_name, module_qn, local_var_types | ||||||
| call_name, module_qn, local_var_types, class_context | ||||||
| ): | ||||||
| return result | ||||||
|
|
||||||
| if result := self._try_resolve_same_module(call_name, module_qn): | ||||||
| return result | ||||||
| if not self._has_separator(call_name): | ||||||
| if result := self._try_resolve_same_module(call_name, module_qn): | ||||||
| return result | ||||||
| return self._try_resolve_via_trie(call_name, module_qn) | ||||||
|
|
||||||
| return self._try_resolve_via_trie(call_name, module_qn) | ||||||
| logger.debug(ls.CALL_UNRESOLVED, call_name=call_name) | ||||||
| return None | ||||||
|
|
||||||
| def _try_resolve_iife( | ||||||
| self, call_name: str, module_qn: str | ||||||
|
|
@@ -107,21 +120,21 @@ def _try_resolve_via_imports( | |||||
| call_name: str, | ||||||
| module_qn: str, | ||||||
| local_var_types: dict[str, str] | None, | ||||||
| class_context: str | None = None, | ||||||
| ) -> tuple[str, str] | None: | ||||||
| if module_qn not in self.import_processor.import_mapping: | ||||||
| return None | ||||||
|
|
||||||
| import_map = self.import_processor.import_mapping[module_qn] | ||||||
| import_map = self.import_processor.import_mapping.get(module_qn, {}) | ||||||
|
|
||||||
| if result := self._try_resolve_direct_import(call_name, import_map): | ||||||
| return result | ||||||
|
|
||||||
| if result := self._try_resolve_qualified_call( | ||||||
| call_name, import_map, module_qn, local_var_types | ||||||
| call_name, import_map, module_qn, local_var_types, class_context | ||||||
| ): | ||||||
| return result | ||||||
|
|
||||||
| return self._try_resolve_wildcard_imports(call_name, import_map) | ||||||
| if import_map: | ||||||
| return self._try_resolve_wildcard_imports(call_name, import_map) | ||||||
| return None | ||||||
|
|
||||||
| def _try_resolve_direct_import( | ||||||
| self, call_name: str, import_map: dict[str, str] | ||||||
|
|
@@ -140,6 +153,7 @@ def _try_resolve_qualified_call( | |||||
| import_map: dict[str, str], | ||||||
| module_qn: str, | ||||||
| local_var_types: dict[str, str] | None, | ||||||
| class_context: str | None = None, | ||||||
| ) -> tuple[str, str] | None: | ||||||
| if not self._has_separator(call_name): | ||||||
| return None | ||||||
|
|
@@ -149,11 +163,17 @@ def _try_resolve_qualified_call( | |||||
|
|
||||||
| if len(parts) == 2: | ||||||
| if result := self._resolve_two_part_call( | ||||||
| parts, call_name, separator, import_map, module_qn, local_var_types | ||||||
| parts, | ||||||
| call_name, | ||||||
| separator, | ||||||
| import_map, | ||||||
| module_qn, | ||||||
| local_var_types, | ||||||
| class_context, | ||||||
| ): | ||||||
| return result | ||||||
|
|
||||||
| if len(parts) >= 3 and parts[0] == cs.KEYWORD_SELF: | ||||||
| if len(parts) >= 3 and parts[0] in _JS_INSTANCE_PREFIXES: | ||||||
| return self._resolve_self_attribute_call( | ||||||
| parts, call_name, import_map, module_qn, local_var_types | ||||||
| ) | ||||||
|
|
@@ -235,9 +255,14 @@ def _resolve_two_part_call( | |||||
| import_map: dict[str, str], | ||||||
| module_qn: str, | ||||||
| local_var_types: dict[str, str] | None, | ||||||
| class_context: str | None = None, | ||||||
| ) -> tuple[str, str] | None: | ||||||
| object_name, method_name = parts | ||||||
|
|
||||||
| if object_name in _JS_INSTANCE_PREFIXES and class_context: | ||||||
| if result := self._try_resolve_method(class_context, method_name, separator): | ||||||
| return result | ||||||
|
|
||||||
| if result := self._try_resolve_via_local_type( | ||||||
| object_name, | ||||||
| method_name, | ||||||
|
|
@@ -254,7 +279,7 @@ def _resolve_two_part_call( | |||||
| ): | ||||||
| return result | ||||||
|
|
||||||
| return self._try_resolve_module_method(method_name, call_name, module_qn) | ||||||
| return None | ||||||
|
|
||||||
| def _try_resolve_via_local_type( | ||||||
| self, | ||||||
|
|
@@ -401,28 +426,15 @@ def _resolve_self_attribute_call( | |||||
| if class_qn := self._resolve_class_qn_from_type( | ||||||
| var_type, import_map, module_qn | ||||||
| ): | ||||||
| method_qn = f"{class_qn}.{method_name}" | ||||||
| if method_qn in self.function_registry: | ||||||
| if resolved_method := self._try_resolve_method(class_qn, method_name): | ||||||
| logger.debug( | ||||||
| ls.CALL_INSTANCE_ATTR, | ||||||
| call_name=call_name, | ||||||
| method_qn=method_qn, | ||||||
| method_qn=resolved_method[1], | ||||||
| attr_ref=attribute_ref, | ||||||
| var_type=var_type, | ||||||
| ) | ||||||
| return self.function_registry[method_qn], method_qn | ||||||
|
|
||||||
| if inherited_method := self._resolve_inherited_method( | ||||||
| class_qn, method_name | ||||||
| ): | ||||||
| logger.debug( | ||||||
| ls.CALL_INSTANCE_ATTR_INHERITED, | ||||||
| call_name=call_name, | ||||||
| method_qn=inherited_method[1], | ||||||
| attr_ref=attribute_ref, | ||||||
| var_type=var_type, | ||||||
| ) | ||||||
| return inherited_method | ||||||
| return resolved_method | ||||||
|
|
||||||
| return None | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,8 +35,14 @@ def build_local_variable_type_map( | |
| ) -> dict[str, str]: | ||
| local_var_types: dict[str, str] = {} | ||
|
|
||
| stack: list[ASTNode] = [caller_node] | ||
| if class_node := self._find_enclosing_class_node(caller_node): | ||
| self._collect_constructor_injected_types( | ||
| class_node, module_qn, local_var_types | ||
| ) | ||
|
|
||
| self._collect_parameter_types(caller_node, module_qn, local_var_types) | ||
|
|
||
| stack: list[ASTNode] = [caller_node] | ||
| declarator_count = 0 | ||
|
|
||
| while stack: | ||
|
|
@@ -59,7 +65,7 @@ def build_local_variable_type_map( | |
| ) | ||
|
|
||
| if var_type := self._infer_js_variable_type_from_value( | ||
| value_node, module_qn | ||
| value_node, module_qn, local_var_types | ||
| ): | ||
| local_var_types[var_name] = var_type | ||
| logger.debug( | ||
|
|
@@ -79,11 +85,138 @@ def build_local_variable_type_map( | |
| ) | ||
| return local_var_types | ||
|
|
||
| def _find_enclosing_class_node(self, node: ASTNode) -> ASTNode | None: | ||
| current = node | ||
| while current is not None: | ||
| if current.type == cs.TS_CLASS_DECLARATION: | ||
| return current | ||
| current = current.parent | ||
| return None | ||
|
|
||
| def _collect_constructor_injected_types( | ||
| self, | ||
| class_node: ASTNode, | ||
| module_qn: str, | ||
| local_var_types: dict[str, str], | ||
| ) -> None: | ||
| body_node = class_node.child_by_field_name(cs.FIELD_BODY) | ||
| if body_node is None: | ||
| return | ||
|
|
||
| for child in body_node.children: | ||
| if child.type != cs.TS_METHOD_DEFINITION: | ||
| continue | ||
|
|
||
| name_node = child.child_by_field_name(cs.FIELD_NAME) | ||
| if ( | ||
| name_node is None | ||
| or name_node.text is None | ||
| or safe_decode_text(name_node) != cs.KEYWORD_CONSTRUCTOR | ||
| ): | ||
| continue | ||
|
|
||
| params_node = child.child_by_field_name(cs.TS_FIELD_PARAMETERS) | ||
| if params_node is None: | ||
| return | ||
|
|
||
| for param in params_node.children: | ||
| self._collect_constructor_parameter_type( | ||
| param, module_qn, local_var_types | ||
| ) | ||
| return | ||
|
|
||
| def _collect_constructor_parameter_type( | ||
| self, | ||
| param_node: ASTNode, | ||
| module_qn: str, | ||
| local_var_types: dict[str, str], | ||
| ) -> None: | ||
| if param_node.type not in { | ||
| "required_parameter", | ||
| "optional_parameter", | ||
| cs.TS_FORMAL_PARAMETER, | ||
| }: | ||
| return | ||
|
|
||
| has_accessibility_modifier = any( | ||
| child.type == "accessibility_modifier" for child in param_node.children | ||
| ) | ||
| if not has_accessibility_modifier: | ||
| return | ||
|
Comment on lines
+134
to
+145
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The same applies to All four new literals should be centralised in Rule Used: ## Technical Requirements Agentic Framework-... (source) Prompt To Fix With AIThis is a comment left during a code review.
Path: codebase_rag/parsers/js_ts/type_inference.py
Line: 134-145
Comment:
**Hardcoded tree-sitter node-type string literals**
`"required_parameter"`, `"optional_parameter"`, and `"accessibility_modifier"` appear as raw string literals in `_collect_constructor_parameter_type` (and also in `_collect_parameter_types` at lines 169-174). No corresponding constants exist in `constants.py` for these three node types. Per the project's "No Hardcoded Strings" rule, they should each be added as constants (e.g. `TS_REQUIRED_PARAMETER`, `TS_OPTIONAL_PARAMETER`, `TS_ACCESSIBILITY_MODIFIER`) and referenced via `cs.`.
The same applies to `"type_annotation"` used in `_extract_type_annotation_name` (line 195).
All four new literals should be centralised in `constants.py`.
**Rule Used:** ## Technical Requirements
### Agentic Framework
-... ([source](https://app.greptile.com/review/custom-context?memory=d4240b05-b763-467a-a6bf-94f73e8b6859))
How can I resolve this? If you propose a fix, please make it concise. |
||
|
|
||
| param_name = self._extract_parameter_name(param_node) | ||
| if not param_name: | ||
| return | ||
|
|
||
| if not (param_type := self._extract_type_annotation_name(param_node)): | ||
| return | ||
|
|
||
| resolved_type = self._resolve_js_class_name(param_type, module_qn) or param_type | ||
| local_var_types[param_name] = resolved_type | ||
| local_var_types[f"this.{param_name}"] = resolved_type | ||
|
|
||
| def _collect_parameter_types( | ||
| self, | ||
| caller_node: ASTNode, | ||
| module_qn: str, | ||
| local_var_types: dict[str, str], | ||
| ) -> None: | ||
| params_node = caller_node.child_by_field_name(cs.TS_FIELD_PARAMETERS) | ||
| if params_node is None: | ||
| return | ||
|
|
||
| for param in params_node.children: | ||
| if param.type not in { | ||
| "required_parameter", | ||
| "optional_parameter", | ||
| cs.TS_FORMAL_PARAMETER, | ||
| }: | ||
| continue | ||
|
|
||
| param_name = self._extract_parameter_name(param) | ||
| if not param_name or param_name in local_var_types: | ||
| continue | ||
|
|
||
| if not (param_type := self._extract_type_annotation_name(param)): | ||
| continue | ||
|
|
||
| resolved_type = self._resolve_js_class_name(param_type, module_qn) or param_type | ||
| local_var_types[param_name] = resolved_type | ||
|
|
||
| def _extract_parameter_name(self, param_node: ASTNode) -> str | None: | ||
| identifier_node = next( | ||
| (child for child in param_node.children if child.type == cs.TS_IDENTIFIER), | ||
| None, | ||
| ) | ||
| return safe_decode_text(identifier_node) if identifier_node is not None else None | ||
|
|
||
| def _extract_type_annotation_name(self, node: ASTNode) -> str | None: | ||
| type_node = next( | ||
| (child for child in node.children if child.type == "type_annotation"), | ||
| None, | ||
| ) | ||
| if type_node is None or type_node.text is None: | ||
| return None | ||
|
|
||
| type_text = safe_decode_text(type_node) | ||
| if not type_text: | ||
| return None | ||
|
|
||
| return type_text.lstrip(":").strip() | ||
|
|
||
| def _infer_js_variable_type_from_value( | ||
| self, value_node: ASTNode, module_qn: str | ||
| self, | ||
| value_node: ASTNode, | ||
| module_qn: str, | ||
| local_var_types: dict[str, str], | ||
| ) -> str | None: | ||
| logger.debug(ls.JS_INFER_VALUE_NODE, node_type=value_node.type) | ||
|
|
||
| if value_node.type == cs.TS_MEMBER_EXPRESSION: | ||
| expr_text = safe_decode_text(value_node) | ||
| if expr_text and expr_text in local_var_types: | ||
| return local_var_types[expr_text] | ||
|
|
||
| if value_node.type == cs.TS_NEW_EXPRESSION: | ||
| if class_name := ut.extract_constructor_name(value_node): | ||
| class_qn = self._resolve_js_class_name(class_name, module_qn) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Catching a broad
Exceptioncan mask underlying issues and make debugging harder. Whilefetch_allmight be an external call, it's best practice to either catch more specific exceptions that are expected from theIngestorProtocolor add a comment explaining why a broadExceptionis necessary here due to an unpredictable variety of errors.Consider replacing
Exceptionwith a more specific exception type or a tuple of expected exceptions if possible. If not, please add a comment explaining the rationale for catching a genericException.References
ValueErrorfor invalid formats andAssertionErrorfor missing keys), catch a tuple of these specific exceptions, such as(ValueError, AssertionError), instead of a genericException.Exceptionis a pragmatic choice due to a wide and unpredictable variety of exceptions from external libraries, add a comment explaining the rationale to improve code maintainability.