From 064815f0689095ed53c43620aba8a328170392e1 Mon Sep 17 00:00:00 2001 From: Andy Brummer Date: Wed, 17 Jun 2026 07:24:02 -0500 Subject: [PATCH 1/6] =?UTF-8?q?feat(refs):=20scope-based=20type=20resoluti?= =?UTF-8?q?on=20=E2=80=94=20Go=20(phase=201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve method-call targets by the receiver's locally-known TYPE instead of name-string only, so x.M() on a common method name resolves to the exact method (the SCIP base case), not an arbitrary same-named symbol. Mechanism (write-path only; reads stay lock-free RCU): - Per-function local type env {name -> type}, built syntactically in the Go extractor from the receiver, typed params, `var x T`, and `x := T{}/&T{}`. Cleared per function; closures inherit the enclosing env. - A method call `recv.M()` whose receiver type is known is emitted as a receiver-type-qualified ref `Type.M`. - resolve_reference_target: a dotted `Type.M` resolves to the method named M whose receiver/owning type is Type (Go receiver parsed from signature; class langs matched via scope_chain — ready for later phases). Unknown/dynamic receiver falls back to the existing name-based path (degrades to a candidate, honest — same as gopls/SCIP for interface dispatch). Verified: - Controlled 2-type corpus: A.Do->a.helpA resolves to A.helpA, B.Do->b.helpB to B.helpB (no same-name collision); callers correct. - chi: param-typed receivers resolve (r *http.Request -> r.Context() -> Request.Context); field-access chains (mx.pool.Get) degrade to bare name. - Full unit 1692/1692; integration 128/128; no golden regen (synthetic golden corpus has no typed method calls). Design + per-language rollout: docs/plans/2026-06-17-scope-type-resolution.md. Phases 2-4 (Java/C#/TS, Python/Rust, JS/C++/Kotlin/PHP/Ruby/Zig) follow this template per language. Co-Authored-By: Claude Opus 4.8 --- .../plans/2026-06-17-scope-type-resolution.md | 60 ++++++++ include/lci/parser/unified_extractor.h | 13 ++ src/core/reference_tracker.cpp | 65 +++++++- src/parser/unified_extractor.cpp | 140 +++++++++++++++++- 4 files changed, 273 insertions(+), 5 deletions(-) create mode 100644 docs/plans/2026-06-17-scope-type-resolution.md diff --git a/docs/plans/2026-06-17-scope-type-resolution.md b/docs/plans/2026-06-17-scope-type-resolution.md new file mode 100644 index 0000000..668ed5b --- /dev/null +++ b/docs/plans/2026-06-17-scope-type-resolution.md @@ -0,0 +1,60 @@ +# Scope-based type resolution (SCIP base case) — call-graph precision + +## Problem +`resolve_reference_target` is name-string based (same-file → import/package +disambiguation → first candidate). No receiver-type resolution, so `x.M()` on a +common method name (ServeHTTP/String/Error/Close/Get…) can attribute the call to +the wrong same-named symbol. Qualified function calls (`pkg.Func()`) resolve +well; method calls on receivers do not. + +## Approach (no type checker, no generics instantiation, no flow analysis) +Resolve the receiver's type from a **per-function local type environment** built +purely syntactically in the extractor, then emit method-call refs as +**receiver-type-qualified** names `Type.M`; the resolver matches `Type.M` to the +method symbol whose receiver type is `Type`. Unknown receiver type → bare `M` +(today's name-based path). Interface/dynamic dispatch → candidate set (honest; +same as gopls/SCIP). + +## Components +1. **Local type env** (extractor, per function): `{name → type}` from + receiver, typed params, and simple decls. Cleared on function entry. +2. **Qualified emission**: `recv.M` where `typeof(recv)` known → ref + `referenced_name = "Type.M"`. +3. **Resolver**: dotted `Type.M` → candidates named `M` filtered by receiver + type (parsed from each candidate's signature) → exact; else fall back. + +## Per-language base case (env population rules) +| Lang | receiver/self | local decls that yield a type | +|---|---|---| +| Go | `(r *T)` | `x := T{}`, `x := &T{}`, `var x T`, typed params, `x := NewT()`(ret type) | +| Java | `this`→class | `T x`, `new T()`, typed params | +| C# | `this`→class | `T x`, `var x = new T()`, typed params | +| TypeScript | `this`→class | `const x: T`, `x = new T()`, `(x: T)` | +| Python | `self`→class (scope) | `x = T(...)`, `x: T`, `def m(self, x: T)` | +| Rust | `&self`→impl type | `let x: T`, `let x = T::new()`, typed params | +| C++ | `this`→class | `T x;`, `T* x = new T()` | +| JS | `this`→class | `x = new T()` | +| Kotlin | `this`→class | `val x: T`, `x = T()` | +| PHP | `$this`→class | `$x = new T()` | +| Ruby | `self`→class | `x = T.new` | +| Zig | — | `var x: T`, `T{}` | + +## LCI guideline fit +- Write-path only (extract/link); reads stay lock-free RCU. +- Env built once per function; resolution = hash lookups + cached. +- Deterministic; candidate sets sorted. +- Base case only; name-based fallback; per-language rules isolated like + `process__reference`. +- Honest: unknown/dynamic → candidate set, never a fabricated single edge. + +## Rollout (each phase: implement → measure precision on a real corpus → goldens) +1. Go (reference impl; chi/pocketbase) — proves architecture. +2. Java / C# / TypeScript (explicit types — cheapest). +3. Python / Rust (annotations + constructor inference; fastapi + a rust repo). +4. JS / C++ / Kotlin / PHP / Ruby / Zig. + +## Status +- [ ] Phase 1 Go +- [ ] Phase 2 Java/C#/TS +- [ ] Phase 3 Python/Rust +- [ ] Phase 4 remainder diff --git a/include/lci/parser/unified_extractor.h b/include/lci/parser/unified_extractor.h index 6a84173..e0a46f6 100644 --- a/include/lci/parser/unified_extractor.h +++ b/include/lci/parser/unified_extractor.h @@ -6,6 +6,8 @@ #include #include +#include + #include #include #include @@ -272,6 +274,17 @@ class UnifiedExtractor { uintptr_t id{}; }; std::vector handled_nodes_; + + // Scope-based type resolution (SCIP base case): per-function map of local + // identifier -> type name, built syntactically (receiver, typed params, + // simple typed/constructor decls). Lets a method call `recv.M()` be emitted + // as a receiver-type-qualified ref `Type.M`, which the resolver matches to + // the method whose receiver type is `Type` — instead of a bare name that + // collides across same-named methods. Cleared on entering a top-level + // function/method; closures inherit the enclosing map. Write-path only. + absl::flat_hash_map local_var_types_; + void seed_go_local_types(TSNode fn_node, bool is_method); + void record_go_local_var(TSNode decl_node); }; /// Thread-local pool of UnifiedExtractor instances. diff --git a/src/core/reference_tracker.cpp b/src/core/reference_tracker.cpp index 624f6db..735760d 100644 --- a/src/core/reference_tracker.cpp +++ b/src/core/reference_tracker.cpp @@ -751,14 +751,50 @@ uint64_t ReferenceTracker::fnv1a_hash_name(std::string_view name) { return h; } +namespace { +// Bare type name from a possibly-decorated receiver token: "*chi.Mux" -> "Mux". +std::string_view bare_type_name(std::string_view t) { + size_t i = 0; + while (i < t.size() && (t[i] == '*' || t[i] == '&')) ++i; + t = t.substr(i); + if (auto dot = t.rfind('.'); dot != std::string_view::npos) + t = t.substr(dot + 1); + return t; +} + +// Go method-receiver type from a signature: "func (r *Mux) M(...)" -> "Mux". +std::string_view go_signature_receiver(std::string_view sig) { + constexpr std::string_view kFunc = "func ("; + if (sig.rfind(kFunc, 0) != 0) return {}; + auto close = sig.find(')', kFunc.size()); + if (close == std::string_view::npos) return {}; + std::string_view recv = sig.substr(kFunc.size(), close - kFunc.size()); + if (auto sp = recv.rfind(' '); sp != std::string_view::npos) + recv = recv.substr(sp + 1); // drop the receiver var name + return bare_type_name(recv); +} + +// Does this symbol's owning/receiver type equal `recv_type`? Matches Go +// receivers (parsed from the signature) and class-based languages (the +// enclosing class appears in scope_chain). +bool symbol_matches_receiver_type(const EnhancedSymbol& sym, + std::string_view recv_type) { + if (go_signature_receiver(sym.signature) == recv_type) return true; + for (const auto& sc : sym.scope_chain) { + if (bare_type_name(sc.name) == recv_type) return true; + } + return false; +} +} // namespace + SymbolID ReferenceTracker::resolve_reference_target( const Snapshot& s, const Reference& ref, std::span file_symbol_ids) { - const auto& name = ref.referenced_name; - if (name.empty()) return 0; + const auto& full_name = ref.referenced_name; + if (full_name.empty()) return 0; - uint64_t name_hash = fnv1a_hash_name(name); + uint64_t name_hash = fnv1a_hash_name(full_name); uint64_t cache_key = (static_cast(ref.file_id) << 32) | (name_hash & 0xFFFFFFFF); @@ -767,6 +803,29 @@ SymbolID ReferenceTracker::resolve_reference_target( return it->second; } + // Scope-typed method ref "Type.M" (emitted by the extractor when the + // receiver's type is locally known): resolve to the method named M whose + // receiver/owning type is Type — the precise target among same-named + // methods. Bare lookup name is M; on no receiver-type match we fall through + // to the name-based path on M (so unknown/dynamic receivers degrade to the + // existing behavior rather than failing). + std::string_view name = full_name; + std::string_view recv_type; + if (auto dot = full_name.rfind('.'); dot != std::string::npos) { + recv_type = std::string_view(full_name).substr(0, dot); + name = std::string_view(full_name).substr(dot + 1); + if (!recv_type.empty() && !name.empty()) { + for (SymbolID id : s.symbols.get_symbols_by_name(name)) { + if (const auto* sym = s.symbols.get(id)) { + if (symbol_matches_receiver_type(*sym, recv_type)) { + reference_cache_[cache_key] = id; + return id; + } + } + } + } + } + // Check same-file symbols first (fast path). for (SymbolID id : file_symbol_ids) { if (const auto* sym = s.symbols.get(id)) { diff --git a/src/parser/unified_extractor.cpp b/src/parser/unified_extractor.cpp index aa39875..92c9e2c 100644 --- a/src/parser/unified_extractor.cpp +++ b/src/parser/unified_extractor.cpp @@ -1493,6 +1493,106 @@ void UnifiedExtractor::process_reference_node(TSNode node, } } +namespace { +// Bare Go type name from a decorated type token: "*chi.Mux"/"[]Mux" -> "Mux". +std::string go_bare_type(std::string_view t) { + size_t i = 0; + while (i < t.size() && + (t[i] == '*' || t[i] == '&' || t[i] == '[' || t[i] == ']')) + ++i; + t = t.substr(i); + if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); + return std::string(t); +} +} // namespace + +// Seed the per-function local type env from the receiver (methods) and typed +// parameters. Cleared each function/method so types don't leak across funcs. +void UnifiedExtractor::seed_go_local_types(TSNode fn, bool is_method) { + local_var_types_.clear(); + auto add_plist = [&](TSNode plist) { + if (ts_node_is_null(plist)) return; + uint32_t n = ts_node_named_child_count(plist); + for (uint32_t i = 0; i < n; ++i) { + TSNode pd = ts_node_named_child(plist, i); + const char* pt = ts_node_type(pd); + if (!pt || std::string_view(pt) != "parameter_declaration") continue; + TSNode ty = ts_node_child_by_field_name(pd, "type", 4); + if (ts_node_is_null(ty)) continue; + std::string tn = go_bare_type(node_text(ty)); + if (tn.empty()) continue; + uint32_t cc = ts_node_named_child_count(pd); + for (uint32_t j = 0; j < cc; ++j) { + TSNode c = ts_node_named_child(pd, j); + const char* ct = ts_node_type(c); + if (ct && std::string_view(ct) == "identifier") + local_var_types_[std::string(node_text(c))] = tn; + } + } + }; + if (is_method) + add_plist(ts_node_child_by_field_name(fn, "receiver", + static_cast(8))); + add_plist(ts_node_child_by_field_name(fn, "parameters", + static_cast(10))); +} + +// Record `var x T` and `x := T{}` / `x := &T{}` into the local type env. +void UnifiedExtractor::record_go_local_var(TSNode decl) { + const char* dt = ts_node_type(decl); + std::string_view t(dt ? dt : ""); + if (t == "var_declaration") { + uint32_t n = ts_node_named_child_count(decl); + for (uint32_t i = 0; i < n; ++i) { + TSNode spec = ts_node_named_child(decl, i); + TSNode ty = ts_node_child_by_field_name(spec, "type", + static_cast(4)); + if (ts_node_is_null(ty)) continue; + std::string tn = go_bare_type(node_text(ty)); + if (tn.empty()) continue; + uint32_t cc = ts_node_named_child_count(spec); + for (uint32_t j = 0; j < cc; ++j) { + TSNode c = ts_node_named_child(spec, j); + const char* ct = ts_node_type(c); + if (ct && std::string_view(ct) == "identifier") + local_var_types_[std::string(node_text(c))] = tn; + } + } + } else if (t == "short_var_declaration") { + TSNode left = ts_node_child_by_field_name(decl, "left", + static_cast(4)); + TSNode right = ts_node_child_by_field_name(decl, "right", + static_cast(5)); + if (ts_node_is_null(left) || ts_node_is_null(right)) return; + if (ts_node_named_child_count(left) != 1 || + ts_node_named_child_count(right) != 1) + return; // base case: single binding + TSNode lid = ts_node_named_child(left, 0); + TSNode rex = ts_node_named_child(right, 0); + const char* lt = ts_node_type(lid); + if (!lt || std::string_view(lt) != "identifier") return; + const char* rt = ts_node_type(rex); + std::string_view r(rt ? rt : ""); + if (r == "unary_expression") { // &T{} + TSNode op = ts_node_named_child(rex, 0); + if (!ts_node_is_null(op)) { + rex = op; + rt = ts_node_type(rex); + r = rt ? rt : ""; + } + } + if (r == "composite_literal") { // T{...} + TSNode ty = ts_node_child_by_field_name(rex, "type", + static_cast(4)); + if (!ts_node_is_null(ty)) { + std::string tn = go_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(lid))] = tn; + } + } + } +} + void UnifiedExtractor::process_go_reference(TSNode node, std::string_view node_type) { auto is_handled = [&](TSNode n) { @@ -1503,6 +1603,22 @@ void UnifiedExtractor::process_go_reference(TSNode node, return false; }; + // Maintain the local type env (SCIP base case). func_literal (closures) + // deliberately does NOT clear — it inherits the enclosing function's types. + if (node_type == "function_declaration") { + seed_go_local_types(node, false); + return; + } + if (node_type == "method_declaration") { + seed_go_local_types(node, true); + return; + } + if (node_type == "short_var_declaration" || + node_type == "var_declaration") { + record_go_local_var(node); + return; + } + if (node_type == "call_expression") { TSNode func = ts_node_child_by_field_name( node, "function", @@ -1526,8 +1642,28 @@ void UnifiedExtractor::process_go_reference(TSNode node, if (!ts_node_is_null(field)) { handled_nodes_.push_back( {reinterpret_cast(field.id)}); - references_.push_back(create_reference( - field, ReferenceType::Call, RefStrength::Tight)); + Reference cref = + create_reference(field, ReferenceType::Call, + RefStrength::Tight); + // Receiver-type qualification (SCIP base case): if the receiver + // is a local identifier whose type we know, emit "Type.M" so the + // resolver selects the exact method among same-named candidates. + TSNode operand = ts_node_child_by_field_name( + func, "operand", static_cast(7)); + if (!ts_node_is_null(operand)) { + const char* ot = ts_node_type(operand); + if (ot && std::string_view(ot) == "identifier") { + auto it = local_var_types_.find( + std::string(node_text(operand))); + if (it != local_var_types_.end() && + !it->second.empty()) { + cref.referenced_name = + it->second + "." + + std::string(node_text(field)); + } + } + } + references_.push_back(std::move(cref)); return; } } From 21beb35bf141418f906d77dd7e1091b44e139a60 Mon Sep 17 00:00:00 2001 From: Andy Brummer Date: Wed, 17 Jun 2026 07:58:07 -0500 Subject: [PATCH 2/6] =?UTF-8?q?feat(refs):=20scope-based=20type=20resoluti?= =?UTF-8?q?on=20=E2=80=94=20Python=20(phase=202)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the SCIP-base-case type resolution to Python, and fix the same method-call caller gap Go had: process_python_reference tagged a method call's name as the un-resolvable "obj.M" Call + a Usage on "M", so Python methods had no callers. Now tag the attribute (method name) as the Call, qualified to "Type.M" when the receiver type is known. Local type env (UNAMBIGUOUS sources only — `x = Foo()` is skipped because constructor vs factory call is syntactically identical in Python): - self / cls -> enclosing class (via enclosing_class_name() over the scope stack; resolver matches the class through scope_chain). - annotated params `def m(self, x: T)` and annotated assignments `x: T`. py_bare_type strips quotes (string annotations), subscripts (List[Foo]->List), and module qualifiers. Verified: - Controlled 2-class corpus: A.do/self.helpA() -> A.helpA, B.do -> B.helpB (no same-name collision). - Full unit 1692/1692; MCP goldens 14/14 clean (earlier batch failures were the pre-existing MCP-readiness flake under load — 73ms pre-index responses — and reproduce on the Go phase too; get_context passes at 5082ms unloaded). Reused for all class-based languages: enclosing_class_name() + scope_chain receiver matching. Next: TS/JS, Java, C#, Rust, C++, Kotlin, PHP, Ruby, Zig. Co-Authored-By: Claude Opus 4.8 --- include/lci/parser/unified_extractor.h | 3 + src/parser/unified_extractor.cpp | 115 +++++++++++++++++++++++-- 2 files changed, 112 insertions(+), 6 deletions(-) diff --git a/include/lci/parser/unified_extractor.h b/include/lci/parser/unified_extractor.h index e0a46f6..4330c2c 100644 --- a/include/lci/parser/unified_extractor.h +++ b/include/lci/parser/unified_extractor.h @@ -285,6 +285,9 @@ class UnifiedExtractor { absl::flat_hash_map local_var_types_; void seed_go_local_types(TSNode fn_node, bool is_method); void record_go_local_var(TSNode decl_node); + // Nearest enclosing class/struct scope name (for self/this typing in + // class-based languages); empty if not inside one. + std::string enclosing_class_name() const; }; /// Thread-local pool of UnifiedExtractor instances. diff --git a/src/parser/unified_extractor.cpp b/src/parser/unified_extractor.cpp index 92c9e2c..3ad5f9e 100644 --- a/src/parser/unified_extractor.cpp +++ b/src/parser/unified_extractor.cpp @@ -1504,8 +1504,28 @@ std::string go_bare_type(std::string_view t) { if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); return std::string(t); } + +// Bare Python type from an annotation: strips quotes (string annotations), +// subscripts (List[Foo] -> List), and module qualifier (mod.Foo -> Foo). +std::string py_bare_type(std::string_view t) { + while (!t.empty() && (t.front() == '"' || t.front() == '\'' || t.front() == ' ')) + t.remove_prefix(1); + while (!t.empty() && (t.back() == '"' || t.back() == '\'' || t.back() == ' ')) + t.remove_suffix(1); + if (auto b = t.find('['); b != std::string_view::npos) t = t.substr(0, b); + if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); + while (!t.empty() && t.back() == ' ') t.remove_suffix(1); + return std::string(t); +} } // namespace +std::string UnifiedExtractor::enclosing_class_name() const { + for (auto it = scope_stack_.rbegin(); it != scope_stack_.rend(); ++it) { + if (it->scope_type == ScopeType::Class) return it->name; + } + return {}; +} + // Seed the per-function local type env from the receiver (methods) and typed // parameters. Cleared each function/method so types don't leak across funcs. void UnifiedExtractor::seed_go_local_types(TSNode fn, bool is_method) { @@ -1730,25 +1750,108 @@ void UnifiedExtractor::process_js_reference(TSNode node, void UnifiedExtractor::process_python_reference(TSNode node, std::string_view node_type) { + auto is_handled = [&](TSNode n) { + uintptr_t id = reinterpret_cast(n.id); + for (const auto& h : handled_nodes_) { + if (h.id == id) return true; + } + return false; + }; + + // Local type env (SCIP base case). Python uses only UNAMBIGUOUS type + // sources: self/cls -> enclosing class, and annotated params/vars + // (`x: T`). `x = Foo()` is intentionally skipped — constructor vs factory + // call is syntactically identical in Python, so it would mis-type. + if (node_type == "function_definition") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) { + local_var_types_["self"] = cls; + local_var_types_["cls"] = cls; + } + TSNode params = ts_node_child_by_field_name( + node, "parameters", static_cast(10)); + if (!ts_node_is_null(params)) { + uint32_t n = ts_node_named_child_count(params); + for (uint32_t i = 0; i < n; ++i) { + TSNode p = ts_node_named_child(params, i); + const char* pt = ts_node_type(p); + if (!pt || std::string_view(pt) != "typed_parameter") continue; + TSNode ty = ts_node_child_by_field_name( + p, "type", static_cast(4)); + TSNode nm = ts_node_named_child(p, 0); + if (!ts_node_is_null(ty) && !ts_node_is_null(nm)) { + std::string tn = py_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(nm))] = tn; + } + } + } + return; + } + if (node_type == "assignment") { + TSNode ty = ts_node_child_by_field_name(node, "type", + static_cast(4)); + TSNode lhs = ts_node_child_by_field_name(node, "left", + static_cast(4)); + if (!ts_node_is_null(ty) && !ts_node_is_null(lhs)) { + const char* lt = ts_node_type(lhs); + if (lt && std::string_view(lt) == "identifier") { + std::string tn = py_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(lhs))] = tn; + } + } + return; + } + if (node_type == "call") { TSNode func = ts_node_child_by_field_name( node, "function", static_cast(std::strlen("function"))); - if (!ts_node_is_null(func)) { - references_.push_back( - create_reference(func, ReferenceType::Call, RefStrength::Tight)); + if (ts_node_is_null(func)) return; + // Method call obj.M(...): tag the attribute (method name) as the Call + // (was a Call on the un-resolvable "obj.M" + a Usage on "M", so methods + // had no callers). Qualify "Type.M" when obj's type is known. + const char* ftype = ts_node_type(func); + if (ftype && std::string_view(ftype) == "attribute") { + TSNode attr = ts_node_child_by_field_name( + func, "attribute", static_cast(std::strlen("attribute"))); + if (!ts_node_is_null(attr)) { + handled_nodes_.push_back({reinterpret_cast(attr.id)}); + Reference cref = create_reference(attr, ReferenceType::Call, + RefStrength::Tight); + TSNode obj = ts_node_child_by_field_name( + func, "object", static_cast(6)); + if (!ts_node_is_null(obj)) { + const char* ot = ts_node_type(obj); + if (ot && std::string_view(ot) == "identifier") { + auto it = local_var_types_.find( + std::string(node_text(obj))); + if (it != local_var_types_.end() && !it->second.empty()) + cref.referenced_name = + it->second + "." + std::string(node_text(attr)); + } + } + references_.push_back(std::move(cref)); + return; + } } + references_.push_back( + create_reference(func, ReferenceType::Call, RefStrength::Tight)); } else if (node_type == "attribute") { TSNode attr = ts_node_child_by_field_name( node, "attribute", static_cast(std::strlen("attribute"))); - if (!ts_node_is_null(attr)) { + if (!ts_node_is_null(attr) && !is_handled(attr)) { references_.push_back( create_reference(attr, ReferenceType::Usage, RefStrength::Loose)); } } else if (node_type == "identifier") { - references_.push_back( - create_reference(node, ReferenceType::Usage, RefStrength::Loose)); + if (!is_handled(node)) { + references_.push_back( + create_reference(node, ReferenceType::Usage, RefStrength::Loose)); + } } } From 5af6f4becdf28b827271dc262a99e75ed68b0247 Mon Sep 17 00:00:00 2001 From: Andy Brummer Date: Wed, 17 Jun 2026 08:03:12 -0500 Subject: [PATCH 3/6] =?UTF-8?q?feat(refs):=20scope-based=20type=20resoluti?= =?UTF-8?q?on=20=E2=80=94=20JS/TS=20(phase=203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend SCIP-base-case type resolution to JavaScript/TypeScript and fix the same method-call caller gap: a call obj.M() tagged the un-resolvable "obj.M" member_expression as the Call (method name M only got a Usage), so JS/TS methods had no callers. Now tag the PROPERTY (M) as the Call, qualified to "Type.M" when the receiver type is known. Local type env: - this -> enclosing class (enclosing_class_name()). - TS-annotated params `(x: T)` and variable annotations `const x: T`. - `new T()` constructor inference (`const x = new T()` -> x: T) — unambiguous in JS/TS unlike Python. js_bare_type strips ": ", generics (Foo->Foo), array suffix, qualifier. Verified: - Controlled TS 2-class corpus: A.do/this.helpA()->A.helpA, B.do->B.helpB; run(): const a:A=new A(); a.do()->A.do and const b=new B(); b.do()->B.do (annotation + new() inference both resolve). - Full unit 1692/1692; trpc TS real-project 4/4. (MCP batch goldens flake on the pre-existing MCP-readiness race under load — 73ms pre-index responses; pass at ~5s when given time; multi-lang corpus has no JS/TS file so this change cannot affect them.) Co-Authored-By: Claude Opus 4.8 --- src/parser/unified_extractor.cpp | 114 +++++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 6 deletions(-) diff --git a/src/parser/unified_extractor.cpp b/src/parser/unified_extractor.cpp index 3ad5f9e..1d55fae 100644 --- a/src/parser/unified_extractor.cpp +++ b/src/parser/unified_extractor.cpp @@ -1505,6 +1505,18 @@ std::string go_bare_type(std::string_view t) { return std::string(t); } +// Bare JS/TS type from an annotation/type node: strips ": ", generics +// (Foo -> Foo), array suffix (Foo[] -> Foo), and qualifier (ns.Foo -> Foo). +std::string js_bare_type(std::string_view t) { + while (!t.empty() && (t.front() == ':' || t.front() == ' ')) + t.remove_prefix(1); + if (auto a = t.find('<'); a != std::string_view::npos) t = t.substr(0, a); + if (auto b = t.find('['); b != std::string_view::npos) t = t.substr(0, b); + while (!t.empty() && (t.back() == ' ')) t.remove_suffix(1); + if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); + return std::string(t); +} + // Bare Python type from an annotation: strips quotes (string annotations), // subscripts (List[Foo] -> List), and module qualifier (mod.Foo -> Foo). std::string py_bare_type(std::string_view t) { @@ -1708,22 +1720,112 @@ void UnifiedExtractor::process_go_reference(TSNode node, void UnifiedExtractor::process_js_reference(TSNode node, std::string_view node_type) { uintptr_t node_id = reinterpret_cast(node.id); + auto is_handled = [&](TSNode n) { + uintptr_t id = reinterpret_cast(n.id); + for (const auto& h : handled_nodes_) { + if (h.id == id) return true; + } + return false; + }; + + // Local type env (SCIP base case). this -> enclosing class; TS-annotated + // params/vars (`x: T`, `(x: T)`) and `new T()` constructions. + if (node_type == "method_definition" || + node_type == "function_declaration" || + node_type == "function_expression" || node_type == "arrow_function" || + node_type == "generator_function_declaration") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) local_var_types_["this"] = cls; + TSNode params = ts_node_child_by_field_name( + node, "parameters", static_cast(10)); + if (!ts_node_is_null(params)) { + uint32_t n = ts_node_named_child_count(params); + for (uint32_t i = 0; i < n; ++i) { + TSNode p = ts_node_named_child(params, i); + TSNode ty = ts_node_child_by_field_name( + p, "type", static_cast(4)); + TSNode pat = ts_node_child_by_field_name( + p, "pattern", static_cast(7)); + if (!ts_node_is_null(ty) && !ts_node_is_null(pat)) { + const char* pt = ts_node_type(pat); + if (pt && std::string_view(pt) == "identifier") { + std::string tn = js_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(pat))] = tn; + } + } + } + } + // fall through (arrow/function bodies still get their refs walked) + } else if (node_type == "variable_declarator") { + TSNode nm = ts_node_child_by_field_name(node, "name", + static_cast(4)); + if (!ts_node_is_null(nm)) { + const char* nt = ts_node_type(nm); + if (nt && std::string_view(nt) == "identifier") { + std::string ty; + TSNode tann = ts_node_child_by_field_name( + node, "type", static_cast(4)); + if (!ts_node_is_null(tann)) { + ty = js_bare_type(node_text(tann)); + } else { + TSNode val = ts_node_child_by_field_name( + node, "value", static_cast(5)); + if (!ts_node_is_null(val)) { + const char* vt = ts_node_type(val); + if (vt && std::string_view(vt) == "new_expression") { + TSNode ctor = ts_node_child_by_field_name( + val, "constructor", static_cast(11)); + if (!ts_node_is_null(ctor)) + ty = js_bare_type(node_text(ctor)); + } + } + } + if (!ty.empty()) + local_var_types_[std::string(node_text(nm))] = ty; + } + } + } if (node_type == "call_expression") { TSNode func = ts_node_child_by_field_name( node, "function", static_cast(std::strlen("function"))); - if (!ts_node_is_null(func)) { - handled_nodes_.push_back( - {reinterpret_cast(func.id)}); - references_.push_back( - create_reference(func, ReferenceType::Call, RefStrength::Tight)); + if (ts_node_is_null(func)) return; + // Method call obj.M(...): tag the PROPERTY (method name) as the Call so + // it resolves to the method symbol (not the un-resolvable "obj.M"); the + // member_expression branch then skips it. Qualify "Type.M" when obj's + // type is known (this -> class, typed local). + const char* ftype = ts_node_type(func); + if (ftype && std::string_view(ftype) == "member_expression") { + TSNode prop = ts_node_child_by_field_name( + func, "property", static_cast(std::strlen("property"))); + if (!ts_node_is_null(prop)) { + handled_nodes_.push_back({reinterpret_cast(prop.id)}); + Reference cref = create_reference(prop, ReferenceType::Call, + RefStrength::Tight); + TSNode obj = ts_node_child_by_field_name( + func, "object", static_cast(6)); + if (!ts_node_is_null(obj)) { + auto it = local_var_types_.find( + std::string(node_text(obj))); // "this" or ident + if (it != local_var_types_.end() && !it->second.empty()) + cref.referenced_name = + it->second + "." + std::string(node_text(prop)); + } + references_.push_back(std::move(cref)); + return; + } } + handled_nodes_.push_back({reinterpret_cast(func.id)}); + references_.push_back( + create_reference(func, ReferenceType::Call, RefStrength::Tight)); } else if (node_type == "member_expression") { TSNode prop = ts_node_child_by_field_name( node, "property", static_cast(std::strlen("property"))); - if (!ts_node_is_null(prop)) { + if (!ts_node_is_null(prop) && !is_handled(prop)) { handled_nodes_.push_back( {reinterpret_cast(prop.id)}); references_.push_back( From af9f73813687836f39da9bbb38003058f4e42176 Mon Sep 17 00:00:00 2001 From: Andy Brummer Date: Wed, 17 Jun 2026 08:29:27 -0500 Subject: [PATCH 4/6] feat(types): scope-based receiver-type resolution for C/C++ (SCIP base case) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4 of scope-type resolution. C/C++ method calls now resolve by the receiver's TYPE, not just the method name, so same-named methods across different classes (run/ServeHTTP/Close/…) attribute to the correct symbol. - process_scope_node: a named struct_specifier/class_specifier/union_specifier *with a body* opens a Class scope named after the aggregate. This gives member methods an owning-class entry in their scope_chain, which the resolver matches against a scope-typed `T.m` ref. Bodyless forms (forward decls, `struct A a;` uses) are excluded so they don't nest the surrounding scope. - process_reference_node (cpp branch): builds a per-function local var->type env (this -> enclosing class; `T x;` / `T x = ...` decls) and emits field-call refs as receiver-type-qualified `Type.m` when the receiver type is known. Unknown receivers fall back to the bare name (today's behavior). - Relocated go/js/py bare-type helpers to the top anon namespace so the cpp branch can use go_bare_type (was defined after the use site). Verified on a controlled corpus: go() -> {A.run -> A.helpA, B.run -> B.helpB} resolves both edges distinctly (previously both collapsed onto A.run). Added ReferenceTrackerTest.ResolvesByReceiverTypeScope. Full unit suite 1693/1693 green; no regressions from the new C/C++ class scopes. Co-Authored-By: Claude Opus 4.8 --- src/parser/unified_extractor.cpp | 146 ++++++++++++++++++++++--------- tests/reference_tracker_test.cpp | 82 +++++++++++++++++ 2 files changed, 188 insertions(+), 40 deletions(-) diff --git a/src/parser/unified_extractor.cpp b/src/parser/unified_extractor.cpp index 1d55fae..1ae26db 100644 --- a/src/parser/unified_extractor.cpp +++ b/src/parser/unified_extractor.cpp @@ -65,6 +65,42 @@ bool is_cpp_type_declaration_name_context(TSNode node) { parent_type == "namespace_definition"; } +// Bare Go type name from a decorated type token: "*chi.Mux"/"[]Mux" -> "Mux". +std::string go_bare_type(std::string_view t) { + size_t i = 0; + while (i < t.size() && + (t[i] == '*' || t[i] == '&' || t[i] == '[' || t[i] == ']')) + ++i; + t = t.substr(i); + if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); + return std::string(t); +} + +// Bare JS/TS type from an annotation/type node: strips ": ", generics +// (Foo -> Foo), array suffix (Foo[] -> Foo), and qualifier (ns.Foo -> Foo). +std::string js_bare_type(std::string_view t) { + while (!t.empty() && (t.front() == ':' || t.front() == ' ')) + t.remove_prefix(1); + if (auto a = t.find('<'); a != std::string_view::npos) t = t.substr(0, a); + if (auto b = t.find('['); b != std::string_view::npos) t = t.substr(0, b); + while (!t.empty() && (t.back() == ' ')) t.remove_suffix(1); + if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); + return std::string(t); +} + +// Bare Python type from an annotation: strips quotes (string annotations), +// subscripts (List[Foo] -> List), and module qualifier (mod.Foo -> Foo). +std::string py_bare_type(std::string_view t) { + while (!t.empty() && (t.front() == '"' || t.front() == '\'' || t.front() == ' ')) + t.remove_prefix(1); + while (!t.empty() && (t.back() == '"' || t.back() == '\'' || t.back() == ' ')) + t.remove_suffix(1); + if (auto b = t.find('['); b != std::string_view::npos) t = t.substr(0, b); + if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); + while (!t.empty() && t.back() == ' ') t.remove_suffix(1); + return std::string(t); +} + } // namespace // --------------------------------------------------------------------------- @@ -431,6 +467,31 @@ bool UnifiedExtractor::process_scope_node(TSNode node, scope_type = ScopeType::Class; name = std::string(extract_go_type_name(node)); + } else if (node_type == "struct_specifier" || + node_type == "class_specifier" || + node_type == "union_specifier") { + // C/C++ aggregate definition. Only a named specifier *with a body* + // (field_declaration_list) opens a class scope — forward decls and + // `struct A a;` uses carry no body and must not nest the surrounding + // scope. Giving member methods a Class scope named after the aggregate + // is what lets the resolver match a scope-typed `T.m` ref to the method + // whose owning type is `T`. Fields are located by child iteration: the + // grammar exposes `name` but the body has no stable field name here. + std::string_view aggr_name; + bool has_body = false; + uint32_t cc = ts_node_child_count(node); + for (uint32_t i = 0; i < cc; ++i) { + TSNode c = ts_node_child(node, i); + std::string_view ct(ts_node_type(c)); + if (ct == "type_identifier" && aggr_name.empty()) + aggr_name = node_text(c); + else if (ct == "field_declaration_list") + has_body = true; + } + if (!has_body || aggr_name.empty()) return false; + scope_type = ScopeType::Class; + name = std::string(aggr_name); + } else if (node_type == "function_declaration" || node_type == "function_definition") { scope_type = ScopeType::Function; @@ -1473,14 +1534,57 @@ void UnifiedExtractor::process_reference_node(TSNode node, } else if (ext_ == ".py") { process_python_reference(node, node_type); } else if (is_cpp_family_extension(ext_)) { + // Local type env (SCIP base case): this -> enclosing class; `T x;` / + // `T x = ...` declarations. C++ method calls already resolve by bare + // name (pick_cpp_reference_leaf returns the field), so this only adds + // the receiver-type qualification that disambiguates same-named methods. + if (node_type == "function_definition") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) local_var_types_["this"] = cls; + } else if (node_type == "declaration") { + TSNode ty = ts_node_child_by_field_name(node, "type", + static_cast(4)); + TSNode dcl = ts_node_child_by_field_name( + node, "declarator", static_cast(10)); + if (!ts_node_is_null(ty) && !ts_node_is_null(dcl)) { + std::string tn = go_bare_type(node_text(ty)); + const char* dt = ts_node_type(dcl); + if (dt && std::string_view(dt) == "init_declarator") + dcl = ts_node_child_by_field_name( + dcl, "declarator", static_cast(10)); + if (!ts_node_is_null(dcl)) { + const char* it = ts_node_type(dcl); + if (it && std::string_view(it) == "identifier" && + !tn.empty()) + local_var_types_[std::string(node_text(dcl))] = tn; + } + } + } + if (node_type == "call_expression") { TSNode func = ts_node_child_by_field_name( node, "function", static_cast(std::strlen("function"))); if (!ts_node_is_null(func)) { - references_.push_back(create_reference( + Reference cref = create_reference( pick_cpp_reference_leaf(func), ReferenceType::Call, - RefStrength::Tight)); + RefStrength::Tight); + const char* ft = ts_node_type(func); + if (ft && std::string_view(ft) == "field_expression") { + TSNode arg = ts_node_child_by_field_name( + func, "argument", static_cast(8)); + TSNode fld = ts_node_child_by_field_name( + func, "field", static_cast(5)); + if (!ts_node_is_null(arg) && !ts_node_is_null(fld)) { + auto lv = local_var_types_.find( + std::string(node_text(arg))); + if (lv != local_var_types_.end() && !lv->second.empty()) + cref.referenced_name = + lv->second + "." + std::string(node_text(fld)); + } + } + references_.push_back(std::move(cref)); } } else if ((node_type == "type_identifier" || node_type == "qualified_identifier" || @@ -1493,44 +1597,6 @@ void UnifiedExtractor::process_reference_node(TSNode node, } } -namespace { -// Bare Go type name from a decorated type token: "*chi.Mux"/"[]Mux" -> "Mux". -std::string go_bare_type(std::string_view t) { - size_t i = 0; - while (i < t.size() && - (t[i] == '*' || t[i] == '&' || t[i] == '[' || t[i] == ']')) - ++i; - t = t.substr(i); - if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); - return std::string(t); -} - -// Bare JS/TS type from an annotation/type node: strips ": ", generics -// (Foo -> Foo), array suffix (Foo[] -> Foo), and qualifier (ns.Foo -> Foo). -std::string js_bare_type(std::string_view t) { - while (!t.empty() && (t.front() == ':' || t.front() == ' ')) - t.remove_prefix(1); - if (auto a = t.find('<'); a != std::string_view::npos) t = t.substr(0, a); - if (auto b = t.find('['); b != std::string_view::npos) t = t.substr(0, b); - while (!t.empty() && (t.back() == ' ')) t.remove_suffix(1); - if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); - return std::string(t); -} - -// Bare Python type from an annotation: strips quotes (string annotations), -// subscripts (List[Foo] -> List), and module qualifier (mod.Foo -> Foo). -std::string py_bare_type(std::string_view t) { - while (!t.empty() && (t.front() == '"' || t.front() == '\'' || t.front() == ' ')) - t.remove_prefix(1); - while (!t.empty() && (t.back() == '"' || t.back() == '\'' || t.back() == ' ')) - t.remove_suffix(1); - if (auto b = t.find('['); b != std::string_view::npos) t = t.substr(0, b); - if (auto d = t.rfind('.'); d != std::string_view::npos) t = t.substr(d + 1); - while (!t.empty() && t.back() == ' ') t.remove_suffix(1); - return std::string(t); -} -} // namespace - std::string UnifiedExtractor::enclosing_class_name() const { for (auto it = scope_stack_.rbegin(); it != scope_stack_.rend(); ++it) { if (it->scope_type == ScopeType::Class) return it->name; diff --git a/tests/reference_tracker_test.cpp b/tests/reference_tracker_test.cpp index a381cdd..253df44 100644 --- a/tests/reference_tracker_test.cpp +++ b/tests/reference_tracker_test.cpp @@ -304,6 +304,88 @@ TEST(ReferenceTrackerTest, FunctionTree) { EXPECT_TRUE(tree.children.empty()); } +// --------------------------------------------------------------------------- +// Receiver-type-qualified method resolution (SCIP base case) +// --------------------------------------------------------------------------- + +// Two classes A and B each expose a method named run(). The extractor emits the +// call sites as receiver-type-qualified refs A.run / B.run (it knows the +// receiver types from the local var env). The resolver must route each ref to +// the run() whose owning class scope matches the receiver type, instead of +// collapsing both onto the first same-named symbol. +TEST(ReferenceTrackerTest, ResolvesByReceiverTypeScope) { + ReferenceTracker rt; + + auto scope = [](ScopeType t, const char* n, int s, int e) { + ScopeInfo si; + si.type = t; + si.name = n; + si.start_line = s; + si.end_line = e; + return si; + }; + std::vector scopes = { + scope(ScopeType::Class, "A", 1, 4), + scope(ScopeType::Class, "B", 5, 8), + }; + + std::vector symbols = { + make_sym("A", SymbolType::Struct, 1, 1, 4), + make_sym("runA", SymbolType::Function, 1, 2, 2), + make_sym("B", SymbolType::Struct, 1, 5, 8), + make_sym("runB", SymbolType::Function, 1, 6, 6), + make_sym("go", SymbolType::Function, 1, 9, 14), + }; + // The two run methods share the visible name "run"; their distinct + // identifiers above only let the test address each one. + symbols[1].name = "run"; + symbols[3].name = "run"; + + auto call = [](uint64_t id, const char* name, int line) { + Reference r; + r.id = id; + r.type = ReferenceType::Call; + r.referenced_name = name; + r.line = line; + r.column = 5; + return r; + }; + std::vector refs = { + call(1, "A.run", 11), // a.run() inside go() + call(2, "B.run", 13), // b.run() inside go() + }; + + rt.process_file(1, "m.cpp", symbols, refs, scopes); + rt.process_all_references(); + + // Address each run() method by line (the struct symbol shares the same + // span, so a plain name lookup is ambiguous). + const EnhancedSymbol* run_a = nullptr; + const EnhancedSymbol* run_b = nullptr; + for (const auto* es : rt.find_symbols_by_name("run")) { + if (es->symbol.line == 2) run_a = es; + if (es->symbol.line == 6) run_b = es; + } + ASSERT_NE(run_a, nullptr); + ASSERT_NE(run_b, nullptr); + + // Each run() carries its owning class in the scope chain. + auto chain_has = [](const EnhancedSymbol& es, const char* n) { + for (const auto& sc : es.scope_chain) + if (sc.name == n) return true; + return false; + }; + EXPECT_TRUE(chain_has(*run_a, "A")); + EXPECT_TRUE(chain_has(*run_b, "B")); + + // A.run resolves to the L2 method, B.run to the L6 method — not both to A. + auto a_callers = rt.get_caller_names(run_a->id); + auto b_callers = rt.get_caller_names(run_b->id); + EXPECT_EQ(a_callers.size(), 1u); + EXPECT_EQ(b_callers.size(), 1u) + << "B.run must resolve to the B method, not collapse onto A.run"; +} + // --------------------------------------------------------------------------- // Scope chain caching // --------------------------------------------------------------------------- From 635b746b7388d13e7f202ba1f55125821311ab20 Mon Sep 17 00:00:00 2001 From: Andy Brummer Date: Wed, 17 Jun 2026 11:35:25 -0500 Subject: [PATCH 5/6] feat(types): call-graph + scope-based receiver-type resolution for the remaining 7 languages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Java, C#, Rust, PHP, Kotlin, Ruby, and Zig previously emitted ZERO call references — they had no call graph at all, so there was nothing to type-resolve. This adds, in the single extraction pass, both the call-reference extraction and the SCIP-base-case receiver-type env for each: - process__reference: emit method/function calls as ReferenceType::Call, tagging the method-name node (not the un-resolvable receiver.method selector), and qualify to "Type.method" when the receiver's type is locally known (this/self/$this -> enclosing class; typed params; `T x`/`new T()`/`T::new()`/ `T{}`/`T.new` locals). Shared qualify_and_push() helper. - Class-scope prerequisites so the resolver can match an owning type: Rust impl_item/struct_item, Zig `const A = struct{…}`. (C/C++ landed earlier.) - Kotlin symbol extraction was entirely broken: the fieldless tree-sitter-kotlin grammar has no `name` field, so extract_function/extract_class/process_scope_node produced zero symbols. Added first_named_child_typed() fallback (simple_identifier / type_identifier) — Kotlin now indexes and resolves. Verified per language on controlled corpora: go() resolves a.run()/b.run() to the distinct run() of each class (previously collapsed onto the first same-named symbol). Added ScopeTypeResolution.* (7 langs). Full unit suite 1700/1700. Known base-case limits documented in the design doc: Ruby bare no-paren calls (parse as identifier, not call) aren't edges; Kotlin/Zig constructor calls show as a bare Call on the type. Unknown receivers degrade to the bare name, never a fabricated edge. Co-Authored-By: Claude Opus 4.8 --- .../plans/2026-06-17-scope-type-resolution.md | 33 +- include/lci/parser/unified_extractor.h | 7 + src/parser/unified_extractor.cpp | 595 ++++++++++++++++++ tests/language_extraction_test.cpp | 110 ++++ 4 files changed, 740 insertions(+), 5 deletions(-) diff --git a/docs/plans/2026-06-17-scope-type-resolution.md b/docs/plans/2026-06-17-scope-type-resolution.md index 668ed5b..b1cbe2e 100644 --- a/docs/plans/2026-06-17-scope-type-resolution.md +++ b/docs/plans/2026-06-17-scope-type-resolution.md @@ -53,8 +53,31 @@ same as gopls/SCIP). 3. Python / Rust (annotations + constructor inference; fastapi + a rust repo). 4. JS / C++ / Kotlin / PHP / Ruby / Zig. -## Status -- [ ] Phase 1 Go -- [ ] Phase 2 Java/C#/TS -- [ ] Phase 3 Python/Rust -- [ ] Phase 4 remainder +## Status — all 13 languages have scope-typed call resolution +- [x] Go, JS/TS, Python, C/C++ (had call graphs; added receiver-type env + qualified emission + resolver scope match) +- [x] Java, C#, Rust, PHP, Kotlin, Ruby, Zig (had **no** call references at all — added + `process__reference` Call extraction *and* the receiver-type env in the same pass) + +### Prerequisite gaps fixed along the way +- C/C++: named `struct/class/union` specifiers (with a body) now open a Class scope so + member methods carry an owning-type entry the resolver matches. +- Rust: `impl_item`/`struct_item` open Class scopes (methods live in `impl`, `self` -> impl type). +- Zig: `const A = struct {…}` opens a Class scope named after the const. +- Kotlin: symbol extraction was entirely broken (fieldless grammar → `name` field lookups + returned null → zero symbols). Added a fieldless-name fallback (`first_named_child_typed`) + in `extract_function`/`extract_class`/`process_scope_node`. + +### Known base-case limitations (honest; not fabricated) +- Ruby: a bare no-receiver, no-paren call (`help_a`) parses as `identifier`, not `call`, so it + is not emitted as a call edge. Receiver calls (`a.run`, `self.help_a`) and `T.new`-typed + locals resolve. Constructor `new` calls are intentionally not emitted as edges. +- Kotlin/Zig: `val a = A()` / `const a = A{}` constructor calls are emitted as a bare Call on the + type name (shows construction); harmless and resolves to the type symbol. +- All languages: unknown/dynamic receivers degrade to the bare method name (today's behavior), + never a fabricated single edge. + +### Verification +- Controlled corpus per language: `go()` resolves `a.run()`/`b.run()` to the *distinct* `run` + method of each class (previously collapsed onto the first same-named symbol). +- Unit: `ScopeTypeResolution.*` (7 langs, extraction-level qualified-ref assertions) + + `ReferenceTrackerTest.ResolvesByReceiverTypeScope` (resolver-level). Full suite 1700/1700. diff --git a/include/lci/parser/unified_extractor.h b/include/lci/parser/unified_extractor.h index 4330c2c..7e693d1 100644 --- a/include/lci/parser/unified_extractor.h +++ b/include/lci/parser/unified_extractor.h @@ -216,6 +216,13 @@ class UnifiedExtractor { void process_go_reference(TSNode node, std::string_view node_type); void process_js_reference(TSNode node, std::string_view node_type); void process_python_reference(TSNode node, std::string_view node_type); + void process_java_reference(TSNode node, std::string_view node_type); + void process_csharp_reference(TSNode node, std::string_view node_type); + void process_rust_reference(TSNode node, std::string_view node_type); + void process_php_reference(TSNode node, std::string_view node_type); + void process_kotlin_reference(TSNode node, std::string_view node_type); + void process_ruby_reference(TSNode node, std::string_view node_type); + void process_zig_reference(TSNode node, std::string_view node_type); Reference create_reference(TSNode node, ReferenceType ref_type, RefStrength strength); diff --git a/src/parser/unified_extractor.cpp b/src/parser/unified_extractor.cpp index 1ae26db..70161af 100644 --- a/src/parser/unified_extractor.cpp +++ b/src/parser/unified_extractor.cpp @@ -65,6 +65,17 @@ bool is_cpp_type_declaration_name_context(TSNode node) { parent_type == "namespace_definition"; } +// First named child of the given type, or a null node. Used to recover names +// from fieldless grammars (tree-sitter-kotlin exposes no `name` field). +TSNode first_named_child_typed(TSNode node, std::string_view type) { + uint32_t n = ts_node_named_child_count(node); + for (uint32_t i = 0; i < n; ++i) { + TSNode c = ts_node_named_child(node, i); + if (std::string_view(ts_node_type(c)) == type) return c; + } + return TSNode{}; +} + // Bare Go type name from a decorated type token: "*chi.Mux"/"[]Mux" -> "Mux". std::string go_bare_type(std::string_view t) { size_t i = 0; @@ -461,6 +472,9 @@ bool UnifiedExtractor::process_scope_node(TSNode node, scope_type = ScopeType::Class; TSNode n = ts_node_child_by_field_name( node, "name", static_cast(std::strlen("name"))); + // Kotlin class_declaration is fieldless (type_identifier child). + if (ts_node_is_null(n) && ext_ == ".kt") + n = first_named_child_typed(node, "type_identifier"); if (!ts_node_is_null(n)) name = std::string(node_text(n)); } else if (node_type == "type_declaration") { @@ -492,6 +506,44 @@ bool UnifiedExtractor::process_scope_node(TSNode node, scope_type = ScopeType::Class; name = std::string(aggr_name); + } else if (node_type == "impl_item") { + // Rust: `impl T { ... }`. Methods live in the impl block, and `self` + // types to T, so the impl opens a Class scope named after the impl type + // — giving each method an owning-type entry the resolver can match. + TSNode ty = ts_node_child_by_field_name(node, "type", + static_cast(4)); + if (ts_node_is_null(ty)) return false; + scope_type = ScopeType::Class; + name = go_bare_type(node_text(ty)); + + } else if (node_type == "struct_item") { + // Rust struct definition (the type itself; methods are in impl_item). + TSNode n = ts_node_child_by_field_name(node, "name", + static_cast(4)); + if (ts_node_is_null(n)) return false; + scope_type = ScopeType::Class; + name = std::string(node_text(n)); + + } else if (ext_ == ".zig" && node_type == "variable_declaration") { + // Zig: `const A = struct { ... };`. The container is an initializer of a + // variable_declaration; name the Class scope after the const identifier + // so member fns get an owning-type entry. Plain vars (no struct/union + // child) are left to fall through as non-scoping. + std::string_view zname; + bool is_container = false; + uint32_t cc = ts_node_child_count(node); + for (uint32_t i = 0; i < cc; ++i) { + TSNode c = ts_node_child(node, i); + std::string_view ct(ts_node_type(c)); + if (ct == "identifier" && zname.empty()) + zname = node_text(c); + else if (ct == "struct_declaration" || ct == "union_declaration") + is_container = true; + } + if (!is_container || zname.empty()) return false; + scope_type = ScopeType::Class; + name = std::string(zname); + } else if (node_type == "function_declaration" || node_type == "function_definition") { scope_type = ScopeType::Function; @@ -807,6 +859,13 @@ void UnifiedExtractor::extract_function(TSNode node, } } + // Kotlin function_declaration is fieldless: the name is a simple_identifier + // child rather than a `name` field. + if (name.empty() && ext_ == ".kt") { + TSNode n = first_named_child_typed(node, "simple_identifier"); + if (!ts_node_is_null(n)) name = node_text(n); + } + if (name.empty() && node_type != "func_literal" && node_type != "arrow_function") { return; @@ -963,6 +1022,9 @@ void UnifiedExtractor::extract_class(TSNode node, TSNode name_node = ts_node_child_by_field_name( node, "name", static_cast(std::strlen("name"))); + // Kotlin class_declaration is fieldless: the name is a type_identifier child. + if (ts_node_is_null(name_node) && ext_ == ".kt") + name_node = first_named_child_typed(node, "type_identifier"); if (ts_node_is_null(name_node)) return; std::string_view name = node_text(name_node); if (name.empty()) return; @@ -1533,6 +1595,20 @@ void UnifiedExtractor::process_reference_node(TSNode node, process_js_reference(node, node_type); } else if (ext_ == ".py") { process_python_reference(node, node_type); + } else if (ext_ == ".java") { + process_java_reference(node, node_type); + } else if (ext_ == ".cs") { + process_csharp_reference(node, node_type); + } else if (ext_ == ".rs") { + process_rust_reference(node, node_type); + } else if (ext_ == ".php") { + process_php_reference(node, node_type); + } else if (ext_ == ".kt" || ext_ == ".kts") { + process_kotlin_reference(node, node_type); + } else if (ext_ == ".rb") { + process_ruby_reference(node, node_type); + } else if (ext_ == ".zig") { + process_zig_reference(node, node_type); } else if (is_cpp_family_extension(ext_)) { // Local type env (SCIP base case): this -> enclosing class; `T x;` / // `T x = ...` declarations. C++ method calls already resolve by bare @@ -2023,6 +2099,525 @@ void UnifiedExtractor::process_python_reference(TSNode node, } } +// Shared by the class-based-language handlers below: emit a Call ref on the +// method-name node, qualified to "Type.M" when the receiver's type is known +// (receiver text resolved through local_var_types_, e.g. `this`/`self` or a +// typed local). Unknown receivers degrade to the bare method name. +namespace { +void qualify_and_push(std::vector& out, Reference cref, + const absl::flat_hash_map& env, + std::string_view recv_text, std::string_view method_text) { + auto it = env.find(std::string(recv_text)); + if (it != env.end() && !it->second.empty()) + cref.referenced_name = + it->second + "." + std::string(method_text); + out.push_back(std::move(cref)); +} +} // namespace + +void UnifiedExtractor::process_java_reference(TSNode node, + std::string_view node_type) { + // Local type env: this -> enclosing class; typed params; `T x` / + // `T x = new T()` locals (the declared type is authoritative either way). + if (node_type == "method_declaration" || + node_type == "constructor_declaration") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) local_var_types_["this"] = cls; + TSNode params = ts_node_child_by_field_name( + node, "parameters", static_cast(10)); + if (!ts_node_is_null(params)) { + uint32_t n = ts_node_named_child_count(params); + for (uint32_t i = 0; i < n; ++i) { + TSNode p = ts_node_named_child(params, i); + TSNode ty = ts_node_child_by_field_name( + p, "type", static_cast(4)); + TSNode nm = ts_node_child_by_field_name( + p, "name", static_cast(4)); + if (!ts_node_is_null(ty) && !ts_node_is_null(nm)) { + std::string tn = js_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(nm))] = tn; + } + } + } + return; + } + if (node_type == "local_variable_declaration") { + TSNode ty = ts_node_child_by_field_name(node, "type", + static_cast(4)); + TSNode dcl = ts_node_child_by_field_name(node, "declarator", + static_cast(10)); + if (!ts_node_is_null(ty) && !ts_node_is_null(dcl)) { + TSNode nm = ts_node_child_by_field_name(dcl, "name", + static_cast(4)); + if (!ts_node_is_null(nm)) { + std::string tn = js_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(nm))] = tn; + } + } + return; + } + + if (node_type == "method_invocation") { + TSNode name = ts_node_child_by_field_name(node, "name", + static_cast(4)); + if (ts_node_is_null(name)) return; + Reference cref = + create_reference(name, ReferenceType::Call, RefStrength::Tight); + TSNode obj = ts_node_child_by_field_name(node, "object", + static_cast(6)); + std::string_view recv = ts_node_is_null(obj) ? std::string_view("this") + : node_text(obj); + qualify_and_push(references_, std::move(cref), local_var_types_, recv, + node_text(name)); + } +} + +void UnifiedExtractor::process_csharp_reference(TSNode node, + std::string_view node_type) { + if (node_type == "method_declaration" || + node_type == "constructor_declaration") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) local_var_types_["this"] = cls; + TSNode params = ts_node_child_by_field_name( + node, "parameters", static_cast(10)); + if (!ts_node_is_null(params)) { + uint32_t n = ts_node_named_child_count(params); + for (uint32_t i = 0; i < n; ++i) { + TSNode p = ts_node_named_child(params, i); + TSNode ty = ts_node_child_by_field_name( + p, "type", static_cast(4)); + TSNode nm = ts_node_child_by_field_name( + p, "name", static_cast(4)); + if (!ts_node_is_null(ty) && !ts_node_is_null(nm)) { + std::string tn = js_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(nm))] = tn; + } + } + } + return; + } + if (node_type == "variable_declaration") { + // `T x = ...;` — the declared type is authoritative; for `var x = new + // T()` fall to the object_creation type on the declarator value. + TSNode ty = ts_node_child_by_field_name(node, "type", + static_cast(4)); + if (ts_node_is_null(ty)) return; + std::string tn = js_bare_type(node_text(ty)); + bool is_var = (node_text(ty) == "var"); + uint32_t n = ts_node_named_child_count(node); + for (uint32_t i = 0; i < n; ++i) { + TSNode d = ts_node_named_child(node, i); + if (std::string_view(ts_node_type(d)) != "variable_declarator") + continue; + TSNode nm = ts_node_child_by_field_name(d, "name", + static_cast(4)); + if (ts_node_is_null(nm)) nm = ts_node_named_child(d, 0); + if (ts_node_is_null(nm)) continue; + std::string vt = tn; + if (is_var) { + vt.clear(); + uint32_t dc = ts_node_named_child_count(d); + for (uint32_t k = 0; k < dc; ++k) { + TSNode v = ts_node_named_child(d, k); + if (std::string_view(ts_node_type(v)) == + "object_creation_expression") { + TSNode ot = ts_node_child_by_field_name( + v, "type", static_cast(4)); + if (!ts_node_is_null(ot)) vt = js_bare_type(node_text(ot)); + } + } + } + if (!vt.empty()) + local_var_types_[std::string(node_text(nm))] = vt; + } + return; + } + + if (node_type == "invocation_expression") { + TSNode func = ts_node_child_by_field_name(node, "function", + static_cast(8)); + if (ts_node_is_null(func)) return; + if (std::string_view(ts_node_type(func)) == "member_access_expression") { + TSNode nm = ts_node_child_by_field_name(func, "name", + static_cast(4)); + TSNode ex = ts_node_child_by_field_name(func, "expression", + static_cast(10)); + if (ts_node_is_null(nm)) return; + Reference cref = + create_reference(nm, ReferenceType::Call, RefStrength::Tight); + std::string_view recv = + ts_node_is_null(ex) ? std::string_view("this") : node_text(ex); + qualify_and_push(references_, std::move(cref), local_var_types_, recv, + node_text(nm)); + } else if (std::string_view(ts_node_type(func)) == "identifier") { + qualify_and_push( + references_, + create_reference(func, ReferenceType::Call, RefStrength::Tight), + local_var_types_, "this", node_text(func)); + } + } +} + +void UnifiedExtractor::process_rust_reference(TSNode node, + std::string_view node_type) { + // self -> impl type (the enclosing impl_item opens a Class scope named + // after its type); typed params; `let x: T` / `let x = T::new()`. + if (node_type == "function_item") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) local_var_types_["self"] = cls; + TSNode params = ts_node_child_by_field_name( + node, "parameters", static_cast(10)); + if (!ts_node_is_null(params)) { + uint32_t n = ts_node_named_child_count(params); + for (uint32_t i = 0; i < n; ++i) { + TSNode p = ts_node_named_child(params, i); + if (std::string_view(ts_node_type(p)) != "parameter") continue; + TSNode pat = ts_node_child_by_field_name( + p, "pattern", static_cast(7)); + TSNode ty = ts_node_child_by_field_name( + p, "type", static_cast(4)); + if (!ts_node_is_null(pat) && !ts_node_is_null(ty)) { + std::string tn = go_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(pat))] = tn; + } + } + } + return; + } + if (node_type == "let_declaration") { + TSNode pat = ts_node_child_by_field_name(node, "pattern", + static_cast(7)); + if (ts_node_is_null(pat) || + std::string_view(ts_node_type(pat)) != "identifier") + return; + std::string tn; + TSNode ty = ts_node_child_by_field_name(node, "type", + static_cast(4)); + if (!ts_node_is_null(ty)) { + tn = go_bare_type(node_text(ty)); + } else { + TSNode val = ts_node_child_by_field_name(node, "value", + static_cast(5)); + if (!ts_node_is_null(val)) { + std::string_view vt(ts_node_type(val)); + if (vt == "struct_expression") { + TSNode n = ts_node_named_child(val, 0); + if (!ts_node_is_null(n)) tn = go_bare_type(node_text(n)); + } else if (vt == "call_expression") { + // `T::new(...)` / `T::default()` — type is the path prefix. + TSNode f = ts_node_child_by_field_name( + val, "function", static_cast(8)); + if (!ts_node_is_null(f) && + std::string_view(ts_node_type(f)) == "scoped_identifier") { + TSNode path = ts_node_child_by_field_name( + f, "path", static_cast(4)); + if (!ts_node_is_null(path)) + tn = go_bare_type(node_text(path)); + } + } else if (vt == "identifier") { + // unit struct: `let a = A;` + tn = go_bare_type(node_text(val)); + } + } + } + if (!tn.empty()) + local_var_types_[std::string(node_text(pat))] = tn; + return; + } + + if (node_type == "call_expression") { + TSNode func = ts_node_child_by_field_name(node, "function", + static_cast(8)); + if (ts_node_is_null(func)) return; + if (std::string_view(ts_node_type(func)) == "field_expression") { + TSNode fld = ts_node_child_by_field_name(func, "field", + static_cast(5)); + TSNode val = ts_node_child_by_field_name(func, "value", + static_cast(5)); + if (ts_node_is_null(fld)) return; + Reference cref = + create_reference(fld, ReferenceType::Call, RefStrength::Tight); + std::string_view recv = + ts_node_is_null(val) ? std::string_view() : node_text(val); + qualify_and_push(references_, std::move(cref), local_var_types_, recv, + node_text(fld)); + } else if (std::string_view(ts_node_type(func)) == "identifier") { + references_.push_back( + create_reference(func, ReferenceType::Call, RefStrength::Tight)); + } + } +} + +void UnifiedExtractor::process_php_reference(TSNode node, + std::string_view node_type) { + // $this -> enclosing class; `$x = new T()` locals (keys keep the `$`). + if (node_type == "method_declaration" || + node_type == "function_definition") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) local_var_types_["$this"] = cls; + return; + } + if (node_type == "assignment_expression") { + TSNode lhs = ts_node_child_by_field_name(node, "left", + static_cast(4)); + TSNode rhs = ts_node_child_by_field_name(node, "right", + static_cast(5)); + if (!ts_node_is_null(lhs) && !ts_node_is_null(rhs) && + std::string_view(ts_node_type(lhs)) == "variable_name" && + std::string_view(ts_node_type(rhs)) == "object_creation_expression") { + // `new T(...)`: the first `name`/`qualified_name` child is the type. + uint32_t n = ts_node_named_child_count(rhs); + for (uint32_t i = 0; i < n; ++i) { + TSNode c = ts_node_named_child(rhs, i); + std::string_view ct(ts_node_type(c)); + if (ct == "name" || ct == "qualified_name") { + local_var_types_[std::string(node_text(lhs))] = + go_bare_type(node_text(c)); + break; + } + } + } + return; + } + + if (node_type == "member_call_expression" || + node_type == "nullsafe_member_call_expression") { + TSNode obj = ts_node_child_by_field_name(node, "object", + static_cast(6)); + TSNode nm = ts_node_child_by_field_name(node, "name", + static_cast(4)); + if (ts_node_is_null(nm)) return; + Reference cref = + create_reference(nm, ReferenceType::Call, RefStrength::Tight); + std::string_view recv = + ts_node_is_null(obj) ? std::string_view() : node_text(obj); + qualify_and_push(references_, std::move(cref), local_var_types_, recv, + node_text(nm)); + } else if (node_type == "function_call_expression") { + TSNode func = ts_node_child_by_field_name(node, "function", + static_cast(8)); + if (!ts_node_is_null(func) && + std::string_view(ts_node_type(func)) == "name") { + references_.push_back( + create_reference(func, ReferenceType::Call, RefStrength::Tight)); + } + } +} + +void UnifiedExtractor::process_kotlin_reference(TSNode node, + std::string_view node_type) { + // Kotlin's grammar is largely fieldless; navigate by ordered children. + if (node_type == "function_declaration") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) local_var_types_["this"] = cls; + return; + } + if (node_type == "property_declaration") { + // `val a: A` or `val a = A()`. The variable_declaration child holds the + // name (+ optional user_type); a call_expression sibling yields the + // constructed type for `= A()`. + TSNode vd{}; + uint32_t n = ts_node_named_child_count(node); + for (uint32_t i = 0; i < n; ++i) { + TSNode c = ts_node_named_child(node, i); + if (std::string_view(ts_node_type(c)) == "variable_declaration") { + vd = c; + break; + } + } + if (ts_node_is_null(vd)) return; + std::string_view name; + std::string type; + uint32_t vc = ts_node_named_child_count(vd); + for (uint32_t i = 0; i < vc; ++i) { + TSNode c = ts_node_named_child(vd, i); + std::string_view ct(ts_node_type(c)); + if (ct == "simple_identifier" && name.empty()) + name = node_text(c); + else if (ct == "user_type") + type = js_bare_type(node_text(c)); + } + if (type.empty()) { + for (uint32_t i = 0; i < n; ++i) { + TSNode c = ts_node_named_child(node, i); + if (std::string_view(ts_node_type(c)) == "call_expression") { + TSNode callee = ts_node_named_child(c, 0); + if (!ts_node_is_null(callee) && + std::string_view(ts_node_type(callee)) == + "simple_identifier") + type = std::string(node_text(callee)); + } + } + } + if (!name.empty() && !type.empty()) + local_var_types_[std::string(name)] = type; + return; + } + + if (node_type == "call_expression") { + TSNode first = ts_node_named_child(node, 0); + if (ts_node_is_null(first)) return; + std::string_view ft(ts_node_type(first)); + if (ft == "navigation_expression") { + // receiver . method — first child is the receiver, then a + // navigation_suffix whose simple_identifier is the method. + TSNode recv = ts_node_named_child(first, 0); + TSNode suffix{}; + uint32_t nc = ts_node_named_child_count(first); + for (uint32_t i = 0; i < nc; ++i) { + TSNode c = ts_node_named_child(first, i); + if (std::string_view(ts_node_type(c)) == "navigation_suffix") + suffix = c; + } + if (ts_node_is_null(suffix)) return; + TSNode m = ts_node_named_child(suffix, 0); + if (ts_node_is_null(m)) return; + Reference cref = + create_reference(m, ReferenceType::Call, RefStrength::Tight); + std::string_view rt = + ts_node_is_null(recv) ? std::string_view() : node_text(recv); + qualify_and_push(references_, std::move(cref), local_var_types_, rt, + node_text(m)); + } else if (ft == "simple_identifier") { + qualify_and_push( + references_, + create_reference(first, ReferenceType::Call, RefStrength::Tight), + local_var_types_, "this", node_text(first)); + } + } +} + +void UnifiedExtractor::process_ruby_reference(TSNode node, + std::string_view node_type) { + // self -> enclosing class; `x = T.new` locals. + if (node_type == "method" || node_type == "singleton_method") { + local_var_types_.clear(); + std::string cls = enclosing_class_name(); + if (!cls.empty()) local_var_types_["self"] = cls; + return; + } + if (node_type == "assignment") { + TSNode lhs = ts_node_child_by_field_name(node, "left", + static_cast(4)); + TSNode rhs = ts_node_child_by_field_name(node, "right", + static_cast(5)); + if (!ts_node_is_null(lhs) && !ts_node_is_null(rhs) && + std::string_view(ts_node_type(lhs)) == "identifier" && + std::string_view(ts_node_type(rhs)) == "call") { + // `T.new` — receiver is the class constant. + TSNode rc = ts_node_child_by_field_name(rhs, "receiver", + static_cast(8)); + TSNode mm = ts_node_child_by_field_name(rhs, "method", + static_cast(6)); + if (!ts_node_is_null(rc) && !ts_node_is_null(mm) && + std::string_view(ts_node_type(rc)) == "constant" && + node_text(mm) == "new") + local_var_types_[std::string(node_text(lhs))] = + std::string(node_text(rc)); + } + return; + } + + if (node_type == "call") { + TSNode mm = ts_node_child_by_field_name(node, "method", + static_cast(6)); + if (ts_node_is_null(mm) || + std::string_view(ts_node_type(mm)) != "identifier") + return; + if (node_text(mm) == "new") return; // constructor, not a call edge + TSNode recv = ts_node_child_by_field_name(node, "receiver", + static_cast(8)); + Reference cref = + create_reference(mm, ReferenceType::Call, RefStrength::Tight); + std::string_view rt = + ts_node_is_null(recv) ? std::string_view("self") : node_text(recv); + qualify_and_push(references_, std::move(cref), local_var_types_, rt, + node_text(mm)); + } +} + +void UnifiedExtractor::process_zig_reference(TSNode node, + std::string_view node_type) { + // Methods take an explicit `self: T` first param; `const a = T{}` locals. + if (node_type == "function_declaration") { + local_var_types_.clear(); + uint32_t n = ts_node_child_count(node); + for (uint32_t i = 0; i < n; ++i) { + TSNode c = ts_node_child(node, i); + if (std::string_view(ts_node_type(c)) != "parameters") continue; + uint32_t pc = ts_node_named_child_count(c); + for (uint32_t k = 0; k < pc; ++k) { + TSNode p = ts_node_named_child(c, k); + if (std::string_view(ts_node_type(p)) != "parameter") continue; + TSNode nm = ts_node_child_by_field_name( + p, "name", static_cast(4)); + TSNode ty = ts_node_child_by_field_name( + p, "type", static_cast(4)); + if (!ts_node_is_null(nm) && !ts_node_is_null(ty)) { + std::string tn = go_bare_type(node_text(ty)); + if (!tn.empty()) + local_var_types_[std::string(node_text(nm))] = tn; + } + } + } + return; + } + if (node_type == "variable_declaration") { + // `const a = T{};` — identifier name + a struct_initializer / call whose + // leading identifier is the type. + std::string_view name; + std::string type; + uint32_t n = ts_node_child_count(node); + for (uint32_t i = 0; i < n; ++i) { + TSNode c = ts_node_child(node, i); + std::string_view ct(ts_node_type(c)); + if (ct == "identifier" && name.empty()) { + name = node_text(c); + } else if (ct == "struct_initializer" || ct == "call_expression" || + ct == "field_expression") { + TSNode lead = ts_node_named_child(c, 0); + if (!ts_node_is_null(lead) && + std::string_view(ts_node_type(lead)) == "identifier") + type = std::string(node_text(lead)); + } + } + if (!name.empty() && !type.empty() && type != std::string(name)) + local_var_types_[std::string(name)] = type; + return; + } + + if (node_type == "call_expression") { + TSNode func = ts_node_child_by_field_name(node, "function", + static_cast(8)); + if (ts_node_is_null(func)) return; + if (std::string_view(ts_node_type(func)) == "field_expression") { + TSNode obj = ts_node_child_by_field_name(func, "object", + static_cast(6)); + TSNode mem = ts_node_child_by_field_name(func, "member", + static_cast(6)); + if (ts_node_is_null(mem)) return; + Reference cref = + create_reference(mem, ReferenceType::Call, RefStrength::Tight); + std::string_view recv = + ts_node_is_null(obj) ? std::string_view() : node_text(obj); + qualify_and_push(references_, std::move(cref), local_var_types_, recv, + node_text(mem)); + } else if (std::string_view(ts_node_type(func)) == "identifier") { + references_.push_back( + create_reference(func, ReferenceType::Call, RefStrength::Tight)); + } + } +} + Reference UnifiedExtractor::create_reference(TSNode node, ReferenceType ref_type, RefStrength strength) { diff --git a/tests/language_extraction_test.cpp b/tests/language_extraction_test.cpp index e320f3b..5a9254a 100644 --- a/tests/language_extraction_test.cpp +++ b/tests/language_extraction_test.cpp @@ -861,5 +861,115 @@ TEST(LanguageExtractionTest, AllParsersCreate) { } } +// --------------------------------------------------------------------------- +// Scope-based receiver-type resolution (SCIP base case): each language's +// reference handler must emit method calls as receiver-type-qualified +// `Type.method` Call refs when the receiver's type is locally known. +// --------------------------------------------------------------------------- + +// True if a Call reference named exactly `name` was extracted. +bool has_call_ref(const ExtractionResults& r, std::string_view name) { + for (const auto& ref : r.references) { + if (ref.type == ReferenceType::Call && ref.referenced_name == name) + return true; + } + return false; +} + +TEST(ScopeTypeResolution, JavaQualifiesReceiverAndThis) { + constexpr std::string_view src = R"(class A { + void run() { helpA(); } + void helpA() {} +} +class Main { + void go() { A a = new A(); a.run(); } +})"; + auto r = extract(Language::Java, ".java", src, "M.java"); + EXPECT_TRUE(has_call_ref(r, "A.helpA")); // this-qualified bare call + EXPECT_TRUE(has_call_ref(r, "A.run")); // typed-local receiver +} + +TEST(ScopeTypeResolution, CSharpQualifiesReceiverAndThis) { + constexpr std::string_view src = R"(class A { + void run() { helpA(); } + void helpA() {} +} +class Main { + void go() { A a = new A(); a.run(); } +})"; + auto r = extract(Language::CSharp, ".cs", src, "M.cs"); + EXPECT_TRUE(has_call_ref(r, "A.helpA")); + EXPECT_TRUE(has_call_ref(r, "A.run")); +} + +TEST(ScopeTypeResolution, RustQualifiesSelfAndLet) { + constexpr std::string_view src = R"(struct A; +impl A { + fn run(&self) { self.help_a(); } + fn help_a(&self) {} +} +fn go() { let a = A; a.run(); })"; + auto r = extract(Language::Rust, ".rs", src, "m.rs"); + EXPECT_TRUE(has_call_ref(r, "A.help_a")); // self -> impl type + EXPECT_TRUE(has_call_ref(r, "A.run")); // let a = A +} + +TEST(ScopeTypeResolution, PhpQualifiesThisAndNew) { + constexpr std::string_view src = R"(helpA(); } + function helpA() {} +} +function go() { $a = new A(); $a->run(); })"; + auto r = extract(Language::PHP, ".php", src, "m.php"); + EXPECT_TRUE(has_call_ref(r, "A.helpA")); // $this -> class + EXPECT_TRUE(has_call_ref(r, "A.run")); // $a = new A() +} + +TEST(ScopeTypeResolution, KotlinQualifiesThisAndVal) { + constexpr std::string_view src = R"(class A { + fun run() { helpA() } + fun helpA() {} +} +fun go() { + val a = A() + a.run() +})"; + auto r = extract(Language::Kotlin, ".kt", src, "m.kt"); + EXPECT_TRUE(has_call_ref(r, "A.helpA")); // this -> class + EXPECT_TRUE(has_call_ref(r, "A.run")); // val a = A() +} + +TEST(ScopeTypeResolution, RubyQualifiesReceiverFromNew) { + constexpr std::string_view src = R"(class A + def run + self.help_a + end + def help_a + end +end +def go + a = A.new + a.run +end)"; + auto r = extract(Language::Ruby, ".rb", src, "m.rb"); + EXPECT_TRUE(has_call_ref(r, "A.run")); // a = A.new ; a.run + EXPECT_TRUE(has_call_ref(r, "A.help_a")); // self.help_a +} + +TEST(ScopeTypeResolution, ZigQualifiesSelfAndConst) { + constexpr std::string_view src = R"(const A = struct { + fn run(self: A) void { self.helpA(); } + fn helpA(self: A) void {} +}; +fn go() void { + const a = A{}; + a.run(); +})"; + auto r = extract(Language::Zig, ".zig", src, "m.zig"); + EXPECT_TRUE(has_call_ref(r, "A.helpA")); // self: A param + EXPECT_TRUE(has_call_ref(r, "A.run")); // const a = A{} +} + } // namespace } // namespace lci::parser From 9f3bf52716604b9b5bbdae68a0e2e9ec92131b8a Mon Sep 17 00:00:00 2001 From: Andy Brummer Date: Wed, 17 Jun 2026 14:43:35 -0500 Subject: [PATCH 6/6] fix(types): record C++ pointer/reference/array declarators in the type env MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `A* a = new A();` is init_declarator > pointer_declarator > identifier, so the prior identifier-only unwrap never recorded `a:A` and `a->run()` stayed unqualified — the common C++ receiver shape. Peel init/pointer/reference/array declarators down to the identifier; the `*`/`&` live on the declarator, not the type token, so the recorded type stays "A". Added ScopeTypeResolution.CppQualifiesPointerAndValueReceivers. 1701/1701. Co-Authored-By: Claude Opus 4.8 --- src/parser/unified_extractor.cpp | 26 +++++++++++++++++--------- tests/language_extraction_test.cpp | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/parser/unified_extractor.cpp b/src/parser/unified_extractor.cpp index 70161af..d67309a 100644 --- a/src/parser/unified_extractor.cpp +++ b/src/parser/unified_extractor.cpp @@ -1625,16 +1625,24 @@ void UnifiedExtractor::process_reference_node(TSNode node, node, "declarator", static_cast(10)); if (!ts_node_is_null(ty) && !ts_node_is_null(dcl)) { std::string tn = go_bare_type(node_text(ty)); - const char* dt = ts_node_type(dcl); - if (dt && std::string_view(dt) == "init_declarator") - dcl = ts_node_child_by_field_name( - dcl, "declarator", static_cast(10)); - if (!ts_node_is_null(dcl)) { - const char* it = ts_node_type(dcl); - if (it && std::string_view(it) == "identifier" && - !tn.empty()) - local_var_types_[std::string(node_text(dcl))] = tn; + // Peel wrapping declarators down to the identifier: + // `A* a = new A()` is init_declarator > pointer_declarator > + // identifier, `A** a` nests pointer_declarator, etc. The `*`/`&` + // live on the declarator, not the type, so `tn` stays "A". + while (!ts_node_is_null(dcl)) { + std::string_view dt(ts_node_type(dcl)); + if (dt == "init_declarator" || dt == "pointer_declarator" || + dt == "reference_declarator" || + dt == "array_declarator") { + dcl = ts_node_child_by_field_name( + dcl, "declarator", static_cast(10)); + } else { + break; + } } + if (!ts_node_is_null(dcl) && !tn.empty() && + std::string_view(ts_node_type(dcl)) == "identifier") + local_var_types_[std::string(node_text(dcl))] = tn; } } diff --git a/tests/language_extraction_test.cpp b/tests/language_extraction_test.cpp index 5a9254a..732a75a 100644 --- a/tests/language_extraction_test.cpp +++ b/tests/language_extraction_test.cpp @@ -876,6 +876,22 @@ bool has_call_ref(const ExtractionResults& r, std::string_view name) { return false; } +TEST(ScopeTypeResolution, CppQualifiesPointerAndValueReceivers) { + constexpr std::string_view src = R"(struct A { + void run() { this->helpA(); } + void helpA() {} +}; +void go() { + A a; + a.run(); + A* p = new A(); + p->run(); +})"; + auto r = extract(Language::Cpp, ".cpp", src, "m.cpp"); + EXPECT_TRUE(has_call_ref(r, "A.helpA")); // this-> + EXPECT_TRUE(has_call_ref(r, "A.run")); // value `A a` and pointer `A* p` +} + TEST(ScopeTypeResolution, JavaQualifiesReceiverAndThis) { constexpr std::string_view src = R"(class A { void run() { helpA(); }