diff --git a/app/models/proxy_exercise.rb b/app/models/proxy_exercise.rb index 67b641d7b..4bd0a3bde 100644 --- a/app/models/proxy_exercise.rb +++ b/app/models/proxy_exercise.rb @@ -78,13 +78,18 @@ def get_matching_exercise(user) end def find_matching_exercise(user) - exercises_user_has_accessed = user.submissions.where(cause: %w[submit assess remoteSubmit remoteAssess]).map(&:exercise).uniq.compact - tags_user_has_seen = exercises_user_has_accessed.map(&:tags).uniq.flatten + exercises_user_has_accessed = + Exercise + .joins(:submissions) + .merge(user.submissions.where(cause: %w[submit assess remoteSubmit remoteAssess])) + .includes(:tags, :files) + .distinct + tags_user_has_seen = exercises_user_has_accessed.flat_map(&:tags).uniq Rails.logger.debug { "exercises_user_has_accessed #{exercises_user_has_accessed.map(&:id).join(',')}" } # find exercises potential_recommended_exercises = [] - exercises.where('expected_difficulty >= 1').find_each do |ex| + exercises.where('expected_difficulty >= 1').includes(:tags, exercise_tags: :tag).find_each do |ex| ## find exercises which have only tags the user has already seen if (ex.tags - tags_user_has_seen).empty? potential_recommended_exercises << ex @@ -119,9 +124,9 @@ def select_best_matching_exercise(user, exercises_user_has_accessed, potential_r relative_knowledge_improvement[potex] = 0.0 Rails.logger.debug { "review potential exercise #{potex.id}" } tags.each do |tag| - tag_ratio = potex.exercise_tags.find_by(tag:).factor.to_f / potex.exercise_tags.inject(0) do |sum, et| - sum + et.factor - end + tag_ratio = potex.exercise_tags.find {|et| et.tag == tag }.factor.to_f / potex.exercise_tags.inject(0) do |sum, et| + sum + et.factor + end max_topic_knowledge_ratio = potex.expected_difficulty * tag_ratio old_relative_loss_tag = topic_knowledge_user[tag] / topic_knowledge_max[tag] new_relative_loss_tag = topic_knowledge_user[tag] / (topic_knowledge_max[tag] + max_topic_knowledge_ratio) diff --git a/spec/factories/tag.rb b/spec/factories/tag.rb new file mode 100644 index 000000000..15b1630b4 --- /dev/null +++ b/spec/factories/tag.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :tag do + name { 'tagName' } + end +end diff --git a/spec/models/proxy_exercise_performance_spec.rb b/spec/models/proxy_exercise_performance_spec.rb new file mode 100644 index 000000000..b2ba29fbc --- /dev/null +++ b/spec/models/proxy_exercise_performance_spec.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +require 'rails_helper' + +# AI generated to capture performance characteristics of ProxyExercise +# rubocop:disable FactoryBot/ExcessiveCreateList +RSpec.describe ProxyExercise do + # Simple SQL capture helper using ActiveSupport notifications + def capture_sql(&) + events = [] + callback = ->(_name, _start, _finish, _id, payload) { events << payload[:sql] } + ActiveSupport::Notifications.subscribed(callback, 'sql.active_record', &) + events + end + + def query_count(&) + capture_sql(&).size + end + + describe 'performance characteristics' do + describe '#count_files' do + it 'uses an efficient COUNT query instead of loading all exercises' do + pe = create(:proxy_exercise) + create_list(:dummy, 20).each {|ex| pe.exercises << ex } + + # Warm-up (avoid counting queries from lazy loading of associations) + pe.reload + + sqls = capture_sql { expect(pe.count_files).to eq(20) } + + # We expect a single COUNT(*) style query (plus, in some adapters, an implicit PRAGMA/SHOW if any) + # Be tolerant: assert that at least one of the queries contains COUNT and total number is small + has_count = sqls.any? {|sql| sql =~ /COUNT\s*\(/i } + expect(has_count).to be(true), "Expected a COUNT query, got: #{sqls.inspect}" + expect(sqls.size).to be <= 3 # 1 main COUNT + possible noise + end + end + + describe "#get_matching_exercise when algorithm is 'random'" do + let(:user) { create(:learner) } + + it 'does not increase the number of SQL queries with a larger exercise pool' do + pe_small = create(:proxy_exercise, algorithm: 'random') + chosen_small = create(:dummy) + others_small = create_list(:dummy, 9) + pe_small.exercises << ([chosen_small] + others_small) + allow(pe_small.exercises.target).to receive(:sample).and_return(chosen_small) + + pe_large = create(:proxy_exercise, algorithm: 'random') + chosen_large = create(:dummy) + others_large = create_list(:dummy, 99) + pe_large.exercises << ([chosen_large] + others_large) + allow(pe_large.exercises.target).to receive(:sample).and_return(chosen_large) + + # Measure queries for small pool + small_queries = query_count { pe_small.get_matching_exercise(user) } + + # Measure queries for large pool + large_queries = query_count { pe_large.get_matching_exercise(user) } + + # The random algorithm should not need additional queries proportional to the pool size. + # Allow a tiny slack for adapter differences and inserts of assignment rows. + expect(large_queries - small_queries).to be <= 2, + "Expected roughly constant query count, got small=#{small_queries}, large=#{large_queries}" + end + end + + describe "#get_matching_exercise when algorithm is 'best_match'" do + it 'reports current SQL query counts across pool sizes (opt-in via PERF=1)' do + ActiveRecord.verbose_query_logs = true + + # Helper to build an exercise with given tags and difficulty + def build_tagged_exercise(difficulty:, tags:) + ex = create(:dummy, expected_difficulty: difficulty) + tags.each do |tag| + ExerciseTag.create!(exercise: ex, tag:, factor: 1) + end + ex + end + + # Prepare a fixed tag universe to be stable across runs + tags = (1..5).map {|i| Tag.create!(name: "perf-tag-#{i}-#{SecureRandom.hex(2)}") } + + # Build a small history of exercises the user has accessed, covering all tags + user = create(:learner) + seen_exercises = Array.new(5) {|i| build_tagged_exercise(difficulty: 2, tags: [tags[i]]) } + # Create real submissions for the user to reflect accessed exercises (no stubbing) + seen_exercises.each do |ex| + Submission.create!(exercise: ex, contributor: user, cause: 'submit') + end + + def build_pool(size:, tags:) + pe = create(:proxy_exercise, algorithm: 'best_match') + # Distribute tags and difficulties deterministically + exercises = (1..size).map do |i| + difficulty = 1 + (i % 3) # 1..3 + assigned_tags = [tags[i % tags.size], tags[(i + 1) % tags.size]].uniq + build_tagged_exercise(difficulty:, tags: assigned_tags) + end + pe.exercises << exercises + pe + end + + # Measure across multiple pool sizes + sizes = [10, 30, 60] + # sizes = [1] + results = {} + sizes.each do |n| + pe = build_pool(size: n, tags:) + # Warm up: ensure associations are loaded enough to avoid counting lazy first-time loads unrelated to algorithm + pe.reload + queries = query_count { pe.get_matching_exercise(user) } + results[n] = queries + end + + puts "BEST_MATCH query counts by pool size: #{results.inspect}" + + # Very loose sanity checks — this spec primarily reports the current optimization level. + expect(results.values.all?(&:positive?)).to be(true) + end + end + + describe "#get_matching_exercise 'best_match' with varying tag counts" do + it 'reports current SQL query counts across different tags per exercise (opt-in via PERF=1)' do + # skip('Set PERF=1 to run performance specs') unless ENV['PERF'] + + # Helper to build an exercise with given tags and difficulty + def build_tagged_exercise(difficulty:, tags:) + ex = create(:dummy, expected_difficulty: difficulty) + tags.each do |tag| + ExerciseTag.create!(exercise: ex, tag: tag, factor: 1) + end + ex + end + + user = create(:learner) + + # Build a small history of exercises the user has accessed, covering some tags + base_tags = (1..8).map {|i| Tag.create!(name: "var-tags-base-#{i}-#{SecureRandom.hex(2)}") } + seen_exercises = base_tags.first(4).map {|t| build_tagged_exercise(difficulty: 2, tags: [t]) } + # Create real submissions for the user to reflect accessed exercises (no stubbing) + seen_exercises.each do |ex| + Submission.create!(exercise: ex, contributor: user, cause: 'submit') + end + + def build_pool_with_tag_count(size:, tag_universe:, tags_per_ex:) + pe = create(:proxy_exercise, algorithm: 'best_match') + exercises = (1..size).map do |i| + difficulty = 1 + (i % 3) # 1..3 + assigned = (0...tags_per_ex).map {|k| tag_universe[(i + k) % tag_universe.size] }.uniq + build_tagged_exercise(difficulty: difficulty, tags: assigned) + end + pe.exercises << exercises + pe + end + + # Vary the number of tags per exercise while keeping pool size constant + tags_per_exercise = [1, 2, 3, 5] + tag_universe = (1..10).map {|i| Tag.create!(name: "var-tags-univ-#{i}-#{SecureRandom.hex(2)}") } + results = {} + tags_per_exercise.each do |k| + pe = build_pool_with_tag_count(size: 30, tag_universe: tag_universe, tags_per_ex: k) + # Warm up: ensure associations are loaded enough to avoid counting lazy first-time loads unrelated to algorithm + pe.reload + queries = query_count { pe.get_matching_exercise(user) } + results[k] = queries + end + + puts "BEST_MATCH query counts by tags per exercise: #{results.inspect}" + + # Loose sanity check + expect(results.values.all?(&:positive?)).to be(true) + end + end + end +end +# rubocop:enable FactoryBot/ExcessiveCreateList diff --git a/spec/models/proxy_exercise_spec.rb b/spec/models/proxy_exercise_spec.rb new file mode 100644 index 000000000..7e29699d3 --- /dev/null +++ b/spec/models/proxy_exercise_spec.rb @@ -0,0 +1,150 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe ProxyExercise do + describe 'defaults and validations' do + let(:pe) { described_class.new } + + it 'sets public to false by default (after_initialize)' do + expect(pe.public).to be(false) + end + + it 'generates a token if none is present (after_initialize)' do + expect(pe.token).to be_present + expect(pe.token).to be_a(String) + expect(pe.token.size).to eq(8) # SecureRandom.hex(4) + end + + context 'with a token' do + let(:pe) { build(:proxy_exercise, token: 'fixedtoken') } + + it 'does not overwrite an existing token' do + expect(pe.token).to eq('fixedtoken') + end + end + end + + describe '#count_files' do + let(:pe) { create(:proxy_exercise, exercises: create_list(:dummy, 7)) } + + it 'returns the number of associated exercises' do + expect(pe.count_files).to eq(7) + end + end + + describe '#duplicate' do + subject(:copy) { original.duplicate(title: 'Copy') } + + let(:original) { create(:proxy_exercise, title: 'Original', token: 'tok12345') } + + it 'returns a duplicated instance with overridden attributes' do + expect(copy).to be_a(described_class) + expect(copy).not_to be_persisted + expect(copy.title).to eq('Copy') + expect(copy.token).to eq('tok12345') + end + end + + describe '#get_matching_exercise' do + let(:user) { create(:learner) } + + context 'when an assignment already exists for the user' do + subject(:get_matching_exercise) { pe.get_matching_exercise(user) } + + let(:pe) { create(:proxy_exercise) } + let(:ex) { create(:dummy) } + + before { UserProxyExerciseExercise.create!(user:, exercise: ex, proxy_exercise: pe) } + + it { is_expected.to eql ex } + + it 'returns the previously assigned exercise without creating another one' do + expect { get_matching_exercise }.not_to change(UserProxyExerciseExercise, :count) + end + end + + context "when algorithm is 'random'" do + subject(:get_matching_exercise) { pe.get_matching_exercise(user) } + + let(:pe) { create(:proxy_exercise, algorithm: 'random', exercises: [chosen, other]) } + let(:chosen) { create(:dummy) } + let(:other) { create(:dummy) } + + before { allow(pe.exercises.target).to receive(:sample).and_return(chosen) } + + it { is_expected.to eql chosen } + + it 'uses exercises.sample and persists the assignment with a reason' do + expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1) + + assignment = UserProxyExerciseExercise.last + reason = JSON.parse(assignment.reason) + expect(reason['reason']).to eq('random exercise requested') + expect(assignment).to have_attributes(user: user, exercise: chosen, proxy_exercise: pe) + end + end + + context "when algorithm is 'best_match'" do + subject(:get_matching_exercise) { pe.get_matching_exercise(user) } + + let(:pe) { create(:proxy_exercise, algorithm: 'best_match', exercises: [easy, hard]) } + let(:easy) { create(:dummy, expected_difficulty: 1) } + let(:hard) { create(:dummy, expected_difficulty: 2) } + + context 'when an error occurs' do + before do + allow(pe).to receive(:find_matching_exercise).and_raise(StandardError, 'boom') + allow(pe.exercises.target).to receive(:sample).and_return(hard) + end + + it { is_expected.to eql hard } + + it 'falls back to a random exercise with expected_difficulty > 1 and records the error in the reason' do + expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1) + + reason = JSON.parse(UserProxyExerciseExercise.last.reason) + expect(reason['reason']).to eq('fallback because of error') + expect(reason['error']).to include('boom') + end + end + + context 'when the user has not seen any tags' do + let(:easy) { create(:dummy, expected_difficulty: 1, tags: [build(:tag, name: 'arrays')]) } + let(:hard) { create(:dummy, expected_difficulty: 2, tags: [build(:tag, name: 'hashes')]) } + + it { is_expected.to eql easy } + + it 'selects the easiest exercise and records the reason' do + expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1) + + reason = JSON.parse(UserProxyExerciseExercise.last.reason) + expect(reason['reason']).to eq('easiest exercise in pool. empty potential exercises') + end + end + + context 'when user has submission for exercise' do + let(:tag) { build(:tag, name: 'loops') } + let(:hard) { create(:dummy, expected_difficulty: 2, tags: [tag]) } + + before do + create(:dummy, expected_difficulty: 1, tags: [tag]) + create(:dummy, expected_difficulty: 3, tags: [tag]) + create(:submission, cause: 'submit', exercise: build(:dummy, expected_difficulty: 2, tags: [tag]), contributor: user) + end + + it { is_expected.to eql hard } + + it 'chooses the best matching exercise within the difficulty constraint and stores a detailed reason' do + expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1) + + reason = JSON.parse(UserProxyExerciseExercise.last.reason) + expect(reason['reason']).to eq('best matching exercise') + expect(reason['highest_difficulty_user_has_accessed']).to eq(2) + expect(reason).to have_key('current_users_knowledge_lack') + expect(reason).to have_key('relative_knowledge_improvement') + end + end + end + end +end