Skip to content

Make go to relevant constant in time using an index #3526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions benchmark.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Benchmarked in Buk main repository having ~25K ruby files.

user system total real
old-glob 1.019865 0.292442 1.312307 ( 1.312549)
new-index 0.000049 0.000007 0.000056 ( 0.000056)

Beign N = number of files
Beign K = number of files matching the subject/test
And should be very low (Almost constant)

Space Complexity O(N)
Time complexity is O(K)

The feature become almost instant
89 changes: 89 additions & 0 deletions lib/ruby_indexer/lib/ruby_indexer/file_index.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# typed: false
# frozen_string_literal: true

# Index:
# {
# "c": ["a/b/c.rb", "a/b/c_test.rb", "a/b/c_integration_test.rb", "a/b/test_c.rb", "a/b/sepc_c.rb", "a/b/integration_test_c.rb"]
# }

# API:
# index.search("c") -> ["a/b/c.rb", "a/b/c_test.rb", "a/b/c_integration_test.rb", "a/b/test_c.rb", "a/b/sepc_c.rb", "a/b/integration_test_c.rb"]
# index.insert("a/b/d.rb")
# index.insert("a/b/d_test.rb")
# index.search("d") -> ["a/b/d.rb", "a/b/d_test.rb"]
# index.delete("a/b/d.rb")
# index.search("d") -> ["a/b/d_test.rb"]
# index.delete("a/b/d_test.rb")
# index.search("d") -> []

module RubyIndexer
# The FileIndex class maintains an in-memory index of files for fast lookup.
# It allows searching for files based on their base name, handling various test file patterns.
class FileIndex # TODO: Provide better naming for just subject/test file index.
TEST_REGEX = /^(test_|spec_|integration_test_)|((_test|_spec|_integration_test)$)/
def initialize
@index = {}
end

# Extract the base name from a file path, removing test prefixes and suffixes
def extract_base_name(path)
basename = File.basename(path, ".*")

# Handle patterns like test_hello, spec_hello, integration_test_hello
basename = basename.sub(/^(test_|spec_|integration_test_)/, "")

# Handle patterns like hello_test, hello_spec, hello_integration_test
basename = basename.sub(/((_test|_spec|_integration_test)$)/, "")

basename
end

# Insert a file path into the index
def insert(path)
base_name = extract_base_name(path)
@index[base_name] ||= []
@index[base_name] << path
end

# Delete a file path from the index
def delete(path)
base_name = extract_base_name(path)
if @index.key?(base_name)
@index[base_name].delete(path)
@index.delete(base_name) if @index[base_name].empty?
end
end

# Search for files that match the given base name
def search(base_name)
@index[base_name] || []
end

# Search for files that can be tests of the given base_name
def search_test(base_name)
return [] if @index[base_name].nil?

search(base_name).select do |file|
File.basename(file, ".*").match?(TEST_REGEX)
end
end

def search_subject(base_name)
return [] if @index[base_name].nil?

search(base_name).reject do |file|
File.basename(file, ".*").match?(TEST_REGEX)
end
end

# Bulk load multiple file paths into the index
def bulk_load(paths)
paths.each { |path| insert(path) }
end

# Clear the entire index
def clear
@index.clear
end
end
end
17 changes: 17 additions & 0 deletions lib/ruby_indexer/lib/ruby_indexer/index.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# typed: strict
# frozen_string_literal: true

require_relative "file_index"

module RubyIndexer
class Index
class UnresolvableAliasError < StandardError; end
Expand All @@ -13,6 +15,9 @@ class IndexNotEmptyError < StandardError; end
#: Configuration
attr_reader :configuration

#: RubyIndexer::FileIndex
attr_reader :file_index

#: bool
attr_reader :initial_indexing_completed

Expand Down Expand Up @@ -80,6 +85,9 @@ def initialize
@configuration = RubyIndexer::Configuration.new #: Configuration

@initial_indexing_completed = false #: bool

# File index for tracking files and their relationships
@file_index = FileIndex.new #: FileIndex
end

# Register an included `hook` that will be executed when `module_name` is included into any namespace
Expand All @@ -93,6 +101,7 @@ def delete(uri, skip_require_paths_tree: false)
key = uri.to_s
# For each constant discovered in `path`, delete the associated entry from the index. If there are no entries
# left, delete the constant from the index.
# TODO: Adjust file index on delete.
@uris_to_entries[key]&.each do |entry|
name = entry.name
entries = @entries[name]
Expand Down Expand Up @@ -348,6 +357,9 @@ def index_all(uris: @configuration.indexable_uris, &block)
"The index is not empty. To prevent invalid entries, `index_all` can only be called once."
end

# Clear the file index at the start of a full indexing
@file_index.clear

RBSIndexer.new(self).index_ruby_core
# Calculate how many paths are worth 1% of progress
progress_step = (uris.length / 100.0).ceil
Expand All @@ -361,6 +373,9 @@ def index_all(uris: @configuration.indexable_uris, &block)
index_file(uri, collect_comments: false)
end

# Bulk load all file paths into the file index
@file_index.bulk_load(Dir.glob("**/*.rb"))

@initial_indexing_completed = true
end

Expand All @@ -377,6 +392,8 @@ def index_single(uri, source, collect_comments: true)

indexing_errors = listener.indexing_errors.uniq
indexing_errors.each { |error| $stderr.puts(error) } if indexing_errors.any?

# TODO: Add file to file index
rescue SystemStackError => e
if e.backtrace&.first&.include?("prism")
$stderr.puts "Prism error indexing #{uri}: #{e.message}"
Expand Down
1 change: 1 addition & 0 deletions lib/ruby_lsp/internal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
require "ruby_lsp/requests/folding_ranges"
require "ruby_lsp/requests/formatting"
require "ruby_lsp/requests/go_to_relevant_file"
require "ruby_lsp/requests/go_to_relevant_file_v2"
require "ruby_lsp/requests/hover"
require "ruby_lsp/requests/inlay_hints"
require "ruby_lsp/requests/on_type_formatting"
Expand Down
115 changes: 115 additions & 0 deletions lib/ruby_lsp/requests/go_to_relevant_file_v2.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# typed: strict
# frozen_string_literal: true

module RubyLsp
module Requests
# GoTo Relevant File is a custom [LSP
# request](https://microsoft.github.io/language-server-protocol/specification#requestMessage)
# that navigates to the relevant file for the current document.
# Currently, it supports source code file <> test file navigation.
class GoToRelevantFileV2 < Request
TEST_KEYWORDS = ["test", "spec", "integration_test"]

TEST_PREFIX_PATTERN = /^(#{TEST_KEYWORDS.join("_|")}_)/
TEST_SUFFIX_PATTERN = /(_#{TEST_KEYWORDS.join("|_")})$/
TEST_PATTERN = /#{TEST_PREFIX_PATTERN}|#{TEST_SUFFIX_PATTERN}/

TEST_PREFIX_GLOB = "#{TEST_KEYWORDS.join("_,")}_" #: String
TEST_SUFFIX_GLOB = "_#{TEST_KEYWORDS.join(",_")}" #: String

TEST_REGEX = /^(test_|spec_|integration_test_)|((_test|_spec|_integration_test)$)/

#: (String path, String workspace_path, RubyIndexer::FileIndex file_index) -> void
def initialize(path, workspace_path, file_index)
super()

@file_index = file_index
@workspace_path = workspace_path
@path = path.delete_prefix(workspace_path) #: String
end

# @override
#: -> Array[String]
def perform
find_relevant_paths
end

private

#: -> Array[String]
def find_relevant_paths
# Extract the base name from the current file (without the extension)
input_basename = File.basename(@path, File.extname(@path))

base_name = if input_basename.match?(TEST_PATTERN)
# If it's a test file, remove the test prefix/suffix
input_basename.gsub(TEST_PATTERN, "")
else
# If it's a source file, use its base name
input_basename
end

# Use the FileIndex to find relevant files via the index
candidate_paths = if input_basename.match?(TEST_REGEX)
@file_index.search_subject(base_name)
else
@file_index.search_test(base_name)
end

return [] if candidate_paths.empty?

# Filter out the current file from the candidates
full_path = File.join(@workspace_path, @path)
candidates = candidate_paths.reject do |path|
path == full_path
end

# Filter to include only files with matching extensions
extension = File.extname(@path)
candidates = candidates.select do |path|
File.extname(path) == extension
end

candidates = candidates.map { |path| File.join(@workspace_path, path) }

return [] if candidates.empty?

# Find the most similar paths using Jaccard similarity
find_most_similar_with_jaccard(candidates)
end

# Using the Jaccard algorithm to determine the similarity between the
# input path and the candidate relevant file paths.
# Ref: https://en.wikipedia.org/wiki/Jaccard_index
# The main idea of this algorithm is to take the size of interaction and divide
# it by the size of union between two sets (in our case the elements in each set
# would be the parts of the path separated by path divider.)
#: (Array[String] candidates) -> Array[String]
def find_most_similar_with_jaccard(candidates)
dirs = get_dir_parts(@path)

_, results = candidates
.group_by do |other_path|
# For consistency with the current path, convert absolute paths to relative if needed
relative_path = if other_path.start_with?(@workspace_path)
other_path.delete_prefix(@workspace_path)
else
other_path
end

other_dirs = get_dir_parts(relative_path)
# Similarity score between the two directories
(dirs & other_dirs).size.to_f / (dirs | other_dirs).size
end
.max_by(&:first)

results || []
end

#: (String path) -> Set[String]
def get_dir_parts(path)
Set.new(File.dirname(path).split(File::SEPARATOR))
end
end
end
end
24 changes: 23 additions & 1 deletion lib/ruby_lsp/server.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# typed: strict
# frozen_string_literal: true

require "benchmark"

module RubyLsp
class Server < BaseServer
# Only for testing
Expand Down Expand Up @@ -1181,8 +1183,28 @@ def experimental_go_to_relevant_file(message)
return
end

$stdout = File.new("benchmark.log", "w")
$stdout.sync = true

Benchmark.bm(15) do |x|
x.report("old-glob") do
Requests::GoToRelevantFile.new(path, @global_state.workspace_path).perform
end
x.report("new-index") do
Requests::GoToRelevantFileV2.new(
path,
@global_state.workspace_path,
@global_state.index.file_index,
).perform
end
end

response = {
locations: Requests::GoToRelevantFile.new(path, @global_state.workspace_path).perform,
locations: Requests::GoToRelevantFileV2.new(
path,
@global_state.workspace_path,
@global_state.index.file_index,
).perform,
}
send_message(Result.new(id: message[:id], response: response))
end
Expand Down
12 changes: 10 additions & 2 deletions test/requests/go_to_relevant_file_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ def test_when_input_is_test_file_returns_array_of_implementation_file_locations
test_file_path = "/workspace/test/requests/go_to_relevant_file_test.rb"
expected = ["/workspace/lib/ruby_lsp/requests/go_to_relevant_file.rb"]

result = RubyLsp::Requests::GoToRelevantFile.new(test_file_path, "/workspace").perform
file_index = RubyIndexer::FileIndex.new
file_index.insert("/workspace/lib/ruby_lsp/requests/go_to_relevant_file.rb")
file_index.insert("/workspace/test/requests/go_to_relevant_file_test.rb")

result = RubyLsp::Requests::GoToRelevantFile.new(test_file_path, "/workspace", file_index).perform
assert_equal(expected, result)
end

Expand All @@ -22,7 +26,11 @@ def test_when_input_is_implementation_file_returns_array_of_test_file_locations
impl_path = "/workspace/lib/ruby_lsp/requests/go_to_relevant_file.rb"
expected = ["/workspace/test/requests/go_to_relevant_file_test.rb"]

result = RubyLsp::Requests::GoToRelevantFile.new(impl_path, "/workspace").perform
file_index = RubyIndexer::FileIndex.new
file_index.insert("/workspace/lib/ruby_lsp/requests/go_to_relevant_file.rb")
file_index.insert("/workspace/test/requests/go_to_relevant_file_test.rb")

result = RubyLsp::Requests::GoToRelevantFile.new(impl_path, "/workspace", file_index).perform
assert_equal(expected, result)
end

Expand Down
Loading