From aecef8d8f8561c513ae887e8f4531b30b07f723a Mon Sep 17 00:00:00 2001 From: Abdelkader Boudih Date: Mon, 21 Jul 2025 00:00:33 +0100 Subject: [PATCH 1/2] refactor: convert SQL queries to Arel where feasible - Add ArelHelpers module for building Arel queries - Convert simple queries in numeric_deterministic_ordering.rb - Convert queries in finders.rb and hash_tree_support.rb - Fix table access patterns to use model context properly - Apply RuboCop corrections to all modified files All multi-database tests passing --- lib/closure_tree/arel_helpers.rb | 83 ++++++++++++++ lib/closure_tree/finders.rb | 102 ++++++++++++------ lib/closure_tree/hash_tree_support.rb | 45 ++++++-- lib/closure_tree/hierarchy_maintenance.rb | 14 +-- .../numeric_deterministic_ordering.rb | 95 +++++++++++----- lib/closure_tree/support.rb | 2 + 6 files changed, 258 insertions(+), 83 deletions(-) create mode 100644 lib/closure_tree/arel_helpers.rb diff --git a/lib/closure_tree/arel_helpers.rb b/lib/closure_tree/arel_helpers.rb new file mode 100644 index 00000000..92f4aa33 --- /dev/null +++ b/lib/closure_tree/arel_helpers.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module ClosureTree + module ArelHelpers + # Get model's arel table + def model_table + @model_table ||= model_class.arel_table + end + + # Get hierarchy table from a model class + # This method should be called from instance methods where hierarchy_class is available + def hierarchy_table_for(model) + if model.respond_to?(:hierarchy_class) + model.hierarchy_class.arel_table + elsif model.class.respond_to?(:hierarchy_class) + model.class.hierarchy_class.arel_table + else + raise ArgumentError, "Cannot find hierarchy_class for #{model}" + end + end + + # Get hierarchy table using the model_class + # This is for Support class methods + def hierarchy_table + @hierarchy_table ||= begin + hierarchy_class_name = options[:hierarchy_class_name] || "#{model_class}Hierarchy" + hierarchy_class_name.constantize.arel_table + end + end + + # Helper to create an Arel node for a table with an alias + def aliased_table(table, alias_name) + table.alias(alias_name) + end + + # Build Arel queries for hierarchy operations + def build_hierarchy_insert_query(hierarchy_table, node_id, parent_id) + x = aliased_table(hierarchy_table, 'x') + + # Build the SELECT subquery - use SelectManager + select_query = Arel::SelectManager.new(x) + select_query.project( + x[:ancestor_id], + Arel.sql(quote(node_id)), + x[:generations] + 1 + ) + select_query.where(x[:descendant_id].eq(parent_id)) + + # Build the INSERT statement + insert_manager = Arel::InsertManager.new + insert_manager.into(hierarchy_table) + insert_manager.columns << hierarchy_table[:ancestor_id] + insert_manager.columns << hierarchy_table[:descendant_id] + insert_manager.columns << hierarchy_table[:generations] + insert_manager.select(select_query) + + insert_manager + end + + def build_hierarchy_delete_query(hierarchy_table, id) + # Build the innermost subquery + inner_subquery_manager = Arel::SelectManager.new(hierarchy_table) + inner_subquery_manager.project(hierarchy_table[:descendant_id]) + inner_subquery_manager.where( + hierarchy_table[:ancestor_id].eq(id) + .or(hierarchy_table[:descendant_id].eq(id)) + ) + inner_subquery = inner_subquery_manager.as('x') + + # Build the middle subquery with DISTINCT + middle_subquery = Arel::SelectManager.new + middle_subquery.from(inner_subquery) + middle_subquery.project(Arel.sql('DISTINCT descendant_id')) + + # Build the DELETE statement + delete_manager = Arel::DeleteManager.new + delete_manager.from(hierarchy_table) + delete_manager.where(hierarchy_table[:descendant_id].in(middle_subquery)) + + delete_manager + end + end +end diff --git a/lib/closure_tree/finders.rb b/lib/closure_tree/finders.rb index 7c8574b3..ab38fca8 100644 --- a/lib/closure_tree/finders.rb +++ b/lib/closure_tree/finders.rb @@ -37,15 +37,24 @@ def find_or_create_by_path(path, attributes = {}) end def find_all_by_generation(generation_level) - s = _ct.base_class.joins(<<-SQL.squish) - INNER JOIN ( - SELECT descendant_id - FROM #{_ct.quoted_hierarchy_table_name} - WHERE ancestor_id = #{_ct.quote(id)} - GROUP BY descendant_id - HAVING MAX(#{_ct.quoted_hierarchy_table_name}.generations) = #{generation_level.to_i} - ) #{_ct.t_alias_keyword} descendants ON (#{_ct.quoted_table_name}.#{_ct.base_class.primary_key} = descendants.descendant_id) - SQL + hierarchy_table = self.class.hierarchy_class.arel_table + model_table = self.class.arel_table + + # Build the subquery + descendants_subquery = hierarchy_table + .project(hierarchy_table[:descendant_id]) + .where(hierarchy_table[:ancestor_id].eq(id)) + .group(hierarchy_table[:descendant_id]) + .having(hierarchy_table[:generations].maximum.eq(generation_level.to_i)) + .as('descendants') + + # Build the join + join_source = model_table + .join(descendants_subquery) + .on(model_table[_ct.base_class.primary_key].eq(descendants_subquery[:descendant_id])) + .join_sources + + s = _ct.base_class.joins(join_source) _ct.scope_with_order(s) end @@ -72,14 +81,23 @@ def root end def leaves - s = joins(<<-SQL.squish) - INNER JOIN ( - SELECT ancestor_id - FROM #{_ct.quoted_hierarchy_table_name} - GROUP BY ancestor_id - HAVING MAX(#{_ct.quoted_hierarchy_table_name}.generations) = 0 - ) #{_ct.t_alias_keyword} leaves ON (#{_ct.quoted_table_name}.#{primary_key} = leaves.ancestor_id) - SQL + hierarchy_table = hierarchy_class.arel_table + model_table = arel_table + + # Build the subquery for leaves (nodes with no children) + leaves_subquery = hierarchy_table + .project(hierarchy_table[:ancestor_id]) + .group(hierarchy_table[:ancestor_id]) + .having(hierarchy_table[:generations].maximum.eq(0)) + .as('leaves') + + # Build the join + join_source = model_table + .join(leaves_subquery) + .on(model_table[primary_key].eq(leaves_subquery[:ancestor_id])) + .join_sources + + s = joins(join_source) _ct.scope_with_order(s.readonly(false)) end @@ -123,22 +141,38 @@ def lowest_common_ancestor(*descendants) end def find_all_by_generation(generation_level) - s = joins(<<-SQL.squish) - INNER JOIN ( - SELECT #{primary_key} as root_id - FROM #{_ct.quoted_table_name} - WHERE #{_ct.quoted_parent_column_name} IS NULL - ) #{_ct.t_alias_keyword} roots ON (1 = 1) - INNER JOIN ( - SELECT ancestor_id, descendant_id - FROM #{_ct.quoted_hierarchy_table_name} - GROUP BY ancestor_id, descendant_id - HAVING MAX(generations) = #{generation_level.to_i} - ) #{_ct.t_alias_keyword} descendants ON ( - #{_ct.quoted_table_name}.#{primary_key} = descendants.descendant_id - AND roots.root_id = descendants.ancestor_id - ) - SQL + hierarchy_table = hierarchy_class.arel_table + model_table = arel_table + + # Build the roots subquery + roots_subquery = model_table + .project(model_table[primary_key].as('root_id')) + .where(model_table[_ct.parent_column_sym].eq(nil)) + .as('roots') + + # Build the descendants subquery + descendants_subquery = hierarchy_table + .project( + hierarchy_table[:ancestor_id], + hierarchy_table[:descendant_id] + ) + .group(hierarchy_table[:ancestor_id], hierarchy_table[:descendant_id]) + .having(hierarchy_table[:generations].maximum.eq(generation_level.to_i)) + .as('descendants') + + # Build the joins + join_roots = model_table + .join(roots_subquery) + .on(Arel.sql('1 = 1')) + + join_descendants = join_roots + .join(descendants_subquery) + .on( + model_table[primary_key].eq(descendants_subquery[:descendant_id]) + .and(roots_subquery[:root_id].eq(descendants_subquery[:ancestor_id])) + ) + + s = joins(join_descendants.join_sources) _ct.scope_with_order(s) end @@ -151,6 +185,7 @@ def find_by_path(path, attributes = {}, parent_id = nil) scope = where(path.pop) last_joined_table = _ct.table_name + path.reverse.each_with_index do |ea, idx| next_joined_table = "p#{idx}" scope = scope.joins(<<-SQL.squish) @@ -161,6 +196,7 @@ def find_by_path(path, attributes = {}, parent_id = nil) scope = _ct.scoped_attributes(scope, ea, next_joined_table) last_joined_table = next_joined_table end + scope.where("#{last_joined_table}.#{_ct.parent_column_name}" => parent_id).readonly(false).first end diff --git a/lib/closure_tree/hash_tree_support.rb b/lib/closure_tree/hash_tree_support.rb index 6b2a3d45..4d493d3f 100644 --- a/lib/closure_tree/hash_tree_support.rb +++ b/lib/closure_tree/hash_tree_support.rb @@ -5,17 +5,40 @@ module HashTreeSupport def default_tree_scope(scope, limit_depth = nil) # Deepest generation, within limit, for each descendant # NOTE: Postgres requires HAVING clauses to always contains aggregate functions (!!) - having_clause = limit_depth ? "HAVING MAX(generations) <= #{limit_depth - 1}" : '' - generation_depth = <<-SQL.squish - INNER JOIN ( - SELECT descendant_id, MAX(generations) as depth - FROM #{quoted_hierarchy_table_name} - GROUP BY descendant_id - #{having_clause} - ) #{t_alias_keyword} generation_depth - ON #{quoted_table_name}.#{model_class.primary_key} = generation_depth.descendant_id - SQL - scope_with_order(scope.joins(generation_depth), 'generation_depth.depth') + + # Get the hierarchy table for the scope's model class + hierarchy_table_arel = if scope.respond_to?(:hierarchy_class) + scope.hierarchy_class.arel_table + elsif scope.klass.respond_to?(:hierarchy_class) + scope.klass.hierarchy_class.arel_table + else + hierarchy_table + end + + model_table_arel = scope.klass.arel_table + + # Build the subquery using Arel + subquery = hierarchy_table_arel + .project( + hierarchy_table_arel[:descendant_id], + hierarchy_table_arel[:generations].maximum.as('depth') + ) + .group(hierarchy_table_arel[:descendant_id]) + + # Add HAVING clause if limit_depth is specified + subquery = subquery.having(hierarchy_table_arel[:generations].maximum.lteq(limit_depth - 1)) if limit_depth + + generation_depth_alias = subquery.as('generation_depth') + + # Build the join + join_condition = model_table_arel[scope.klass.primary_key].eq(generation_depth_alias[:descendant_id]) + + join_source = model_table_arel + .join(generation_depth_alias) + .on(join_condition) + .join_sources + + scope_with_order(scope.joins(join_source), 'generation_depth.depth') end def hash_tree(tree_scope, limit_depth = nil) diff --git a/lib/closure_tree/hierarchy_maintenance.rb b/lib/closure_tree/hierarchy_maintenance.rb index 029286a9..566e4661 100644 --- a/lib/closure_tree/hierarchy_maintenance.rb +++ b/lib/closure_tree/hierarchy_maintenance.rb @@ -90,16 +90,10 @@ def delete_hierarchy_references # It shouldn't affect performance of postgresql. # See http://dev.mysql.com/doc/refman/5.0/en/subquery-errors.html # Also: PostgreSQL doesn't support INNER JOIN on DELETE, so we can't use that. - _ct.connection.execute <<-SQL.squish - DELETE FROM #{_ct.quoted_hierarchy_table_name} - WHERE descendant_id IN ( - SELECT DISTINCT descendant_id - FROM (SELECT descendant_id - FROM #{_ct.quoted_hierarchy_table_name} - WHERE ancestor_id = #{_ct.quote(id)} - OR descendant_id = #{_ct.quote(id)} - ) #{_ct.t_alias_keyword} x ) - SQL + + hierarchy_table = hierarchy_class.arel_table + delete_query = _ct.build_hierarchy_delete_query(hierarchy_table, id) + _ct.connection.execute(delete_query.to_sql) end end diff --git a/lib/closure_tree/numeric_deterministic_ordering.rb b/lib/closure_tree/numeric_deterministic_ordering.rb index caaefe75..6d56acb0 100644 --- a/lib/closure_tree/numeric_deterministic_ordering.rb +++ b/lib/closure_tree/numeric_deterministic_ordering.rb @@ -29,17 +29,32 @@ def _ct_reorder_children(minimum_sort_order_value = nil) def self_and_descendants_preordered # TODO: raise NotImplementedError if sort_order is not numeric and not null? - join_sql = <<-SQL - JOIN #{_ct.quoted_hierarchy_table_name} anc_hier - ON anc_hier.descendant_id = #{_ct.quoted_hierarchy_table_name}.descendant_id - JOIN #{_ct.quoted_table_name} anc - ON anc.#{_ct.quoted_id_column_name} = anc_hier.ancestor_id - JOIN #{_ct.quoted_hierarchy_table_name} depths - ON depths.ancestor_id = #{_ct.quote(id)} AND depths.descendant_id = anc.#{_ct.quoted_id_column_name} - SQL + hierarchy_table = self.class.hierarchy_class.arel_table + model_table = self.class.arel_table + + # Create aliased tables for the joins + anc_hier = _ct.aliased_table(hierarchy_table, 'anc_hier') + anc = _ct.aliased_table(model_table, 'anc') + depths = _ct.aliased_table(hierarchy_table, 'depths') + + # Build the join conditions using Arel + join_anc_hier = hierarchy_table + .join(anc_hier) + .on(anc_hier[:descendant_id].eq(hierarchy_table[:descendant_id])) + + join_anc = join_anc_hier + .join(anc) + .on(anc[self.class.primary_key].eq(anc_hier[:ancestor_id])) + + join_depths = join_anc + .join(depths) + .on( + depths[:ancestor_id].eq(id) + .and(depths[:descendant_id].eq(anc[self.class.primary_key])) + ) self_and_descendants - .joins(join_sql) + .joins(join_depths.join_sources) .group("#{_ct.quoted_table_name}.#{_ct.quoted_id_column_name}") .reorder(self.class._ct_sum_order_by(self)) end @@ -47,14 +62,18 @@ def self_and_descendants_preordered class_methods do # If node is nil, order the whole tree. def _ct_sum_order_by(node = nil) - stats_sql = <<-SQL.squish - SELECT - count(*) as total_descendants, - max(generations) as max_depth - FROM #{_ct.quoted_hierarchy_table_name} - SQL - stats_sql += " WHERE ancestor_id = #{_ct.quote(node.id)}" if node - h = _ct.connection.select_one(stats_sql) + # Build the stats query using Arel + hierarchy_table = hierarchy_class.arel_table + + query = hierarchy_table + .project( + hierarchy_table[:ancestor_id].count.as('total_descendants'), + hierarchy_table[:generations].maximum.as('max_depth') + ) + + query = query.where(hierarchy_table[:ancestor_id].eq(node.id)) if node + + h = _ct.connection.select_one(query.to_sql) depth_column = node ? 'depths.generations' : 'depths.max_depth' @@ -68,18 +87,36 @@ def _ct_sum_order_by(node = nil) def roots_and_descendants_preordered raise ClosureTree::RootOrderingDisabledError, 'Root ordering is disabled on this model' if _ct.dont_order_roots - join_sql = <<-SQL.squish - JOIN #{_ct.quoted_hierarchy_table_name} anc_hier - ON anc_hier.descendant_id = #{_ct.quoted_table_name}.#{_ct.quoted_id_column_name} - JOIN #{_ct.quoted_table_name} anc - ON anc.#{_ct.quoted_id_column_name} = anc_hier.ancestor_id - JOIN ( - SELECT descendant_id, max(generations) AS max_depth - FROM #{_ct.quoted_hierarchy_table_name} - GROUP BY descendant_id - ) #{_ct.t_alias_keyword} depths ON depths.descendant_id = anc.#{_ct.quoted_id_column_name} - SQL - joins(join_sql) + hierarchy_table = hierarchy_class.arel_table + model_table = arel_table + + # Create aliased tables + anc_hier = _ct.aliased_table(hierarchy_table, 'anc_hier') + anc = _ct.aliased_table(model_table, 'anc') + + # Build the subquery for depths + depths_subquery = hierarchy_table + .project( + hierarchy_table[:descendant_id], + hierarchy_table[:generations].maximum.as('max_depth') + ) + .group(hierarchy_table[:descendant_id]) + .as('depths') + + # Build the join conditions + join_anc_hier = model_table + .join(anc_hier) + .on(anc_hier[:descendant_id].eq(model_table[primary_key])) + + join_anc = join_anc_hier + .join(anc) + .on(anc[primary_key].eq(anc_hier[:ancestor_id])) + + join_depths = join_anc + .join(depths_subquery) + .on(depths_subquery[:descendant_id].eq(anc[primary_key])) + + joins(join_depths.join_sources) .group("#{_ct.quoted_table_name}.#{_ct.quoted_id_column_name}") .reorder(_ct_sum_order_by) end diff --git a/lib/closure_tree/support.rb b/lib/closure_tree/support.rb index f57ef530..4ffefc8e 100644 --- a/lib/closure_tree/support.rb +++ b/lib/closure_tree/support.rb @@ -5,6 +5,7 @@ require 'closure_tree/numeric_order_support' require 'closure_tree/active_record_support' require 'closure_tree/hash_tree_support' +require 'closure_tree/arel_helpers' # This class and mixins are an effort to reduce the namespace pollution to models that act_as_tree. module ClosureTree @@ -13,6 +14,7 @@ class Support include ClosureTree::SupportAttributes include ClosureTree::ActiveRecordSupport include ClosureTree::HashTreeSupport + include ClosureTree::ArelHelpers attr_reader :model_class, :options From 763fedb22057661158ed3ebe14ebf3991536c2cb Mon Sep 17 00:00:00 2001 From: Abdelkader Boudih Date: Mon, 21 Jul 2025 01:00:09 +0100 Subject: [PATCH 2/2] Address Copilot code review comments - Use Arel.star.count instead of counting specific column for SQL COUNT(*) - Replace raw SQL 'DISTINCT' with Arel's .distinct method - Document intentional use of cartesian product join in find_all_by_generation These changes improve code consistency while maintaining the same functionality. All tests pass with these modifications. --- lib/closure_tree/arel_helpers.rb | 2 +- lib/closure_tree/finders.rb | 3 +++ lib/closure_tree/numeric_deterministic_ordering.rb | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/closure_tree/arel_helpers.rb b/lib/closure_tree/arel_helpers.rb index 92f4aa33..2a7b3cf0 100644 --- a/lib/closure_tree/arel_helpers.rb +++ b/lib/closure_tree/arel_helpers.rb @@ -70,7 +70,7 @@ def build_hierarchy_delete_query(hierarchy_table, id) # Build the middle subquery with DISTINCT middle_subquery = Arel::SelectManager.new middle_subquery.from(inner_subquery) - middle_subquery.project(Arel.sql('DISTINCT descendant_id')) + middle_subquery.project(inner_subquery[:descendant_id]).distinct # Build the DELETE statement delete_manager = Arel::DeleteManager.new diff --git a/lib/closure_tree/finders.rb b/lib/closure_tree/finders.rb index ab38fca8..eb7a6968 100644 --- a/lib/closure_tree/finders.rb +++ b/lib/closure_tree/finders.rb @@ -161,6 +161,9 @@ def find_all_by_generation(generation_level) .as('descendants') # Build the joins + # Note: We intentionally use a cartesian product join (CROSS JOIN) here. + # This allows us to find all nodes at a specific generation level across all root nodes. + # The 1=1 condition creates this cartesian product in a database-agnostic way. join_roots = model_table .join(roots_subquery) .on(Arel.sql('1 = 1')) diff --git a/lib/closure_tree/numeric_deterministic_ordering.rb b/lib/closure_tree/numeric_deterministic_ordering.rb index 6d56acb0..1ee48981 100644 --- a/lib/closure_tree/numeric_deterministic_ordering.rb +++ b/lib/closure_tree/numeric_deterministic_ordering.rb @@ -67,7 +67,7 @@ def _ct_sum_order_by(node = nil) query = hierarchy_table .project( - hierarchy_table[:ancestor_id].count.as('total_descendants'), + Arel.star.count.as('total_descendants'), hierarchy_table[:generations].maximum.as('max_depth') )