Skip to content

Commit 7456113

Browse files
committed
optimed best_match algorithm by adding eagerloading to reduce number of required db-queries
1 parent 16384dc commit 7456113

File tree

4 files changed

+343
-6
lines changed

4 files changed

+343
-6
lines changed

app/models/proxy_exercise.rb

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,18 @@ def get_matching_exercise(user)
7878
end
7979

8080
def find_matching_exercise(user)
81-
exercises_user_has_accessed = user.submissions.where(cause: %w[submit assess remoteSubmit remoteAssess]).map(&:exercise).uniq.compact
82-
tags_user_has_seen = exercises_user_has_accessed.map(&:tags).uniq.flatten
81+
exercises_user_has_accessed =
82+
Exercise
83+
.joins(:submissions)
84+
.merge(user.submissions.where(cause: %w[submit assess remoteSubmit remoteAssess]))
85+
.includes(:tags, :files)
86+
.distinct
87+
tags_user_has_seen = exercises_user_has_accessed.flat_map(&:tags).uniq
8388
Rails.logger.debug { "exercises_user_has_accessed #{exercises_user_has_accessed.map(&:id).join(',')}" }
8489

8590
# find exercises
8691
potential_recommended_exercises = []
87-
exercises.where('expected_difficulty >= 1').find_each do |ex|
92+
exercises.where('expected_difficulty >= 1').includes(:tags, exercise_tags: :tag).find_each do |ex|
8893
## find exercises which have only tags the user has already seen
8994
if (ex.tags - tags_user_has_seen).empty?
9095
potential_recommended_exercises << ex
@@ -119,9 +124,9 @@ def select_best_matching_exercise(user, exercises_user_has_accessed, potential_r
119124
relative_knowledge_improvement[potex] = 0.0
120125
Rails.logger.debug { "review potential exercise #{potex.id}" }
121126
tags.each do |tag|
122-
tag_ratio = potex.exercise_tags.find_by(tag:).factor.to_f / potex.exercise_tags.inject(0) do |sum, et|
123-
sum + et.factor
124-
end
127+
tag_ratio = potex.exercise_tags.find {|et| et.tag == tag }.factor.to_f / potex.exercise_tags.inject(0) do |sum, et|
128+
sum + et.factor
129+
end
125130
max_topic_knowledge_ratio = potex.expected_difficulty * tag_ratio
126131
old_relative_loss_tag = topic_knowledge_user[tag] / topic_knowledge_max[tag]
127132
new_relative_loss_tag = topic_knowledge_user[tag] / (topic_knowledge_max[tag] + max_topic_knowledge_ratio)

spec/factories/tag.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# frozen_string_literal: true
2+
3+
FactoryBot.define do
4+
factory :tag do
5+
name { 'tagName' }
6+
end
7+
end
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
# frozen_string_literal: true
2+
3+
require 'rails_helper'
4+
5+
# AI generated to capture performance characteristics of ProxyExercise
6+
RSpec.describe ProxyExercise, skip: 'Slow tests, activate manually' do
7+
# Simple SQL capture helper using ActiveSupport notifications
8+
def capture_sql(&)
9+
events = []
10+
callback = ->(_name, _start, _finish, _id, payload) { events << payload[:sql] }
11+
ActiveSupport::Notifications.subscribed(callback, 'sql.active_record', &)
12+
events
13+
end
14+
15+
def query_count(&)
16+
capture_sql(&).size
17+
end
18+
19+
describe 'performance characteristics' do
20+
describe '#count_files' do
21+
it 'uses an efficient COUNT query instead of loading all exercises' do
22+
pe = create(:proxy_exercise)
23+
create_list(:dummy, 20).each {|ex| pe.exercises << ex }
24+
25+
# Warm-up (avoid counting queries from lazy loading of associations)
26+
pe.reload
27+
28+
sqls = capture_sql { expect(pe.count_files).to eq(20) }
29+
30+
# We expect a single COUNT(*) style query (plus, in some adapters, an implicit PRAGMA/SHOW if any)
31+
# Be tolerant: assert that at least one of the queries contains COUNT and total number is small
32+
has_count = sqls.any? {|sql| sql =~ /COUNT\s*\(/i }
33+
expect(has_count).to be(true), "Expected a COUNT query, got: #{sqls.inspect}"
34+
expect(sqls.size).to be <= 3 # 1 main COUNT + possible noise
35+
end
36+
end
37+
38+
describe "#get_matching_exercise when algorithm is 'random'" do
39+
let(:user) { create(:learner) }
40+
41+
it 'does not increase the number of SQL queries with a larger exercise pool' do
42+
pe_small = create(:proxy_exercise, algorithm: 'random')
43+
chosen_small = create(:dummy)
44+
others_small = create_list(:dummy, 9)
45+
pe_small.exercises << ([chosen_small] + others_small)
46+
allow(pe_small).to receive_message_chain(:exercises, :sample).and_return(chosen_small)
47+
48+
pe_large = create(:proxy_exercise, algorithm: 'random')
49+
chosen_large = create(:dummy)
50+
others_large = create_list(:dummy, 99)
51+
pe_large.exercises << ([chosen_large] + others_large)
52+
allow(pe_large).to receive_message_chain(:exercises, :sample).and_return(chosen_large)
53+
54+
# Measure queries for small pool
55+
small_queries = query_count { pe_small.get_matching_exercise(user) }
56+
57+
# Measure queries for large pool
58+
large_queries = query_count { pe_large.get_matching_exercise(user) }
59+
60+
# The random algorithm should not need additional queries proportional to the pool size.
61+
# Allow a tiny slack for adapter differences and inserts of assignment rows.
62+
expect(large_queries - small_queries).to be <= 2,
63+
"Expected roughly constant query count, got small=#{small_queries}, large=#{large_queries}"
64+
end
65+
end
66+
67+
describe "#get_matching_exercise when algorithm is 'best_match'" do
68+
it 'reports current SQL query counts across pool sizes (opt-in via PERF=1)' do
69+
ActiveRecord.verbose_query_logs = true
70+
71+
# Helper to build an exercise with given tags and difficulty
72+
def build_tagged_exercise(difficulty:, tags:)
73+
ex = create(:dummy, expected_difficulty: difficulty)
74+
tags.each do |tag|
75+
ExerciseTag.create!(exercise: ex, tag:, factor: 1)
76+
end
77+
ex
78+
end
79+
80+
# Prepare a fixed tag universe to be stable across runs
81+
tags = (1..5).map {|i| Tag.create!(name: "perf-tag-#{i}-#{SecureRandom.hex(2)}") }
82+
83+
# Build a small history of exercises the user has accessed, covering all tags
84+
user = create(:learner)
85+
seen_exercises = Array.new(5) {|i| build_tagged_exercise(difficulty: 2, tags: [tags[i]]) }
86+
# Create real submissions for the user to reflect accessed exercises (no stubbing)
87+
seen_exercises.each do |ex|
88+
Submission.create!(exercise: ex, contributor: user, cause: 'submit')
89+
end
90+
91+
def build_pool(size:, tags:)
92+
pe = create(:proxy_exercise, algorithm: 'best_match')
93+
# Distribute tags and difficulties deterministically
94+
exercises = (1..size).map do |i|
95+
difficulty = 1 + (i % 3) # 1..3
96+
assigned_tags = [tags[i % tags.size], tags[(i + 1) % tags.size]].uniq
97+
build_tagged_exercise(difficulty:, tags: assigned_tags)
98+
end
99+
pe.exercises << exercises
100+
pe
101+
end
102+
103+
# Measure across multiple pool sizes
104+
sizes = [10, 30, 60]
105+
# sizes = [1]
106+
results = {}
107+
sizes.each do |n|
108+
pe = build_pool(size: n, tags:)
109+
# Warm up: ensure associations are loaded enough to avoid counting lazy first-time loads unrelated to algorithm
110+
pe.reload
111+
queries = query_count { pe.get_matching_exercise(user) }
112+
results[n] = queries
113+
end
114+
115+
puts "BEST_MATCH query counts by pool size: #{results.inspect}"
116+
117+
# Very loose sanity checks — this spec primarily reports the current optimization level.
118+
expect(results.values.all? {|q| q.positive? }).to be(true)
119+
end
120+
end
121+
122+
describe "#get_matching_exercise 'best_match' with varying tag counts" do
123+
it 'reports current SQL query counts across different tags per exercise (opt-in via PERF=1)' do
124+
# skip('Set PERF=1 to run performance specs') unless ENV['PERF']
125+
126+
# Helper to build an exercise with given tags and difficulty
127+
def build_tagged_exercise(difficulty:, tags:)
128+
ex = create(:dummy, expected_difficulty: difficulty)
129+
tags.each do |tag|
130+
ExerciseTag.create!(exercise: ex, tag: tag, factor: 1)
131+
end
132+
ex
133+
end
134+
135+
user = create(:learner)
136+
137+
# Build a small history of exercises the user has accessed, covering some tags
138+
base_tags = (1..8).map {|i| Tag.create!(name: "var-tags-base-#{i}-#{SecureRandom.hex(2)}") }
139+
seen_exercises = base_tags.first(4).map {|t| build_tagged_exercise(difficulty: 2, tags: [t]) }
140+
# Create real submissions for the user to reflect accessed exercises (no stubbing)
141+
seen_exercises.each do |ex|
142+
Submission.create!(exercise: ex, contributor: user, cause: 'submit')
143+
end
144+
145+
def build_pool_with_tag_count(size:, tag_universe:, tags_per_ex:)
146+
pe = create(:proxy_exercise, algorithm: 'best_match')
147+
exercises = (1..size).map do |i|
148+
difficulty = 1 + (i % 3) # 1..3
149+
assigned = (0...tags_per_ex).map {|k| tag_universe[(i + k) % tag_universe.size] }.uniq
150+
build_tagged_exercise(difficulty: difficulty, tags: assigned)
151+
end
152+
pe.exercises << exercises
153+
pe
154+
end
155+
156+
# Vary the number of tags per exercise while keeping pool size constant
157+
tags_per_exercise = [1, 2, 3, 5]
158+
tag_universe = (1..10).map {|i| Tag.create!(name: "var-tags-univ-#{i}-#{SecureRandom.hex(2)}") }
159+
results = {}
160+
tags_per_exercise.each do |k|
161+
pe = build_pool_with_tag_count(size: 30, tag_universe: tag_universe, tags_per_ex: k)
162+
# Warm up: ensure associations are loaded enough to avoid counting lazy first-time loads unrelated to algorithm
163+
pe.reload
164+
queries = query_count { pe.get_matching_exercise(user) }
165+
results[k] = queries
166+
end
167+
168+
puts "BEST_MATCH query counts by tags per exercise: #{results.inspect}"
169+
170+
# Loose sanity check
171+
expect(results.values.all? {|q| q.positive? }).to be(true)
172+
end
173+
end
174+
end
175+
end

spec/models/proxy_exercise_spec.rb

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# frozen_string_literal: true
2+
3+
require 'rails_helper'
4+
5+
RSpec.describe ProxyExercise do
6+
describe 'defaults and validations' do
7+
let(:pe) { described_class.new }
8+
9+
it 'sets public to false by default (after_initialize)' do
10+
expect(pe.public).to be(false)
11+
end
12+
13+
it 'generates a token if none is present (after_initialize)' do
14+
expect(pe.token).to be_present
15+
expect(pe.token).to be_a(String)
16+
expect(pe.token.size).to eq(8) # SecureRandom.hex(4)
17+
end
18+
19+
context 'with a token' do
20+
let(:pe) { build(:proxy_exercise, token: 'fixedtoken') }
21+
22+
it 'does not overwrite an existing token' do
23+
expect(pe.token).to eq('fixedtoken')
24+
end
25+
end
26+
end
27+
28+
describe '#count_files' do
29+
let(:pe) { create(:proxy_exercise, exercises: create_list(:dummy, 7)) }
30+
31+
it 'returns the number of associated exercises' do
32+
expect(pe.count_files).to eq(7)
33+
end
34+
end
35+
36+
describe '#duplicate' do
37+
subject(:copy) { original.duplicate(title: 'Copy') }
38+
39+
let(:original) { create(:proxy_exercise, title: 'Original', token: 'tok12345') }
40+
41+
it 'returns a duplicated instance with overridden attributes' do
42+
expect(copy).to be_a(ProxyExercise)
43+
expect(copy).not_to be_persisted
44+
expect(copy.title).to eq('Copy')
45+
expect(copy.token).to eq('tok12345')
46+
end
47+
end
48+
49+
describe '#get_matching_exercise' do
50+
let(:user) { create(:learner) }
51+
52+
context 'when an assignment already exists for the user' do
53+
subject(:get_matching_exercise) { pe.get_matching_exercise(user) }
54+
55+
let(:pe) { create(:proxy_exercise) }
56+
let(:ex) { create(:dummy) }
57+
58+
before { UserProxyExerciseExercise.create!(user:, exercise: ex, proxy_exercise: pe) }
59+
60+
it { is_expected.to eql ex }
61+
62+
it 'returns the previously assigned exercise without creating another one' do
63+
expect { get_matching_exercise }.not_to change(UserProxyExerciseExercise, :count)
64+
end
65+
end
66+
67+
context "when algorithm is 'random'" do
68+
subject(:get_matching_exercise) { pe.get_matching_exercise(user) }
69+
70+
let(:pe) { create(:proxy_exercise, algorithm: 'random', exercises: [chosen, other]) }
71+
let(:chosen) { create(:dummy) }
72+
let(:other) { create(:dummy) }
73+
74+
before { allow(pe.exercises.target).to receive(:sample).and_return(chosen) }
75+
76+
it { is_expected.to eql chosen }
77+
78+
it 'uses exercises.sample and persists the assignment with a reason' do
79+
expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1)
80+
81+
assignment = UserProxyExerciseExercise.last
82+
reason = JSON.parse(assignment.reason)
83+
expect(reason['reason']).to eq('random exercise requested')
84+
expect(assignment).to have_attributes(user: user, exercise: chosen, proxy_exercise: pe)
85+
end
86+
end
87+
88+
context "when algorithm is 'best_match'" do
89+
subject(:get_matching_exercise) { pe.get_matching_exercise(user) }
90+
91+
let(:pe) { create(:proxy_exercise, algorithm: 'best_match', exercises: [easy, hard]) }
92+
let(:easy) { create(:dummy, expected_difficulty: 1) }
93+
let(:hard) { create(:dummy, expected_difficulty: 2) }
94+
95+
context 'when an error occurs' do
96+
before do
97+
allow(pe).to receive(:find_matching_exercise).and_raise(StandardError, 'boom')
98+
allow(pe.exercises.target).to receive(:sample).and_return(hard)
99+
end
100+
101+
it { is_expected.to eql hard }
102+
103+
it 'falls back to a random exercise with expected_difficulty > 1 and records the error in the reason' do
104+
expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1)
105+
106+
reason = JSON.parse(UserProxyExerciseExercise.last.reason)
107+
expect(reason['reason']).to eq('fallback because of error')
108+
expect(reason['error']).to include('boom')
109+
end
110+
end
111+
112+
context 'when the user has not seen any tags' do
113+
let(:easy) { create(:dummy, expected_difficulty: 1, tags: [build(:tag, name: 'arrays')]) }
114+
let(:hard) { create(:dummy, expected_difficulty: 2, tags: [build(:tag, name: 'hashes')]) }
115+
116+
it { is_expected.to eql easy }
117+
118+
it 'selects the easiest exercise and records the reason' do
119+
expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1)
120+
121+
reason = JSON.parse(UserProxyExerciseExercise.last.reason)
122+
expect(reason['reason']).to eq('easiest exercise in pool. empty potential exercises')
123+
end
124+
end
125+
126+
context 'when user has submission for exercise' do
127+
let(:tag) { build(:tag, name: 'loops') }
128+
let(:hard) { create(:dummy, expected_difficulty: 2, tags: [tag]) }
129+
130+
before do
131+
create(:dummy, expected_difficulty: 1, tags: [tag])
132+
create(:dummy, expected_difficulty: 3, tags: [tag])
133+
create(:submission, cause: 'submit', exercise: build(:dummy, expected_difficulty: 2, tags: [tag]), contributor: user)
134+
end
135+
136+
it { is_expected.to eql hard }
137+
138+
it 'chooses the best matching exercise within the difficulty constraint and stores a detailed reason' do
139+
expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1)
140+
141+
reason = JSON.parse(UserProxyExerciseExercise.last.reason)
142+
expect(reason['reason']).to eq('best matching exercise')
143+
expect(reason['highest_difficulty_user_has_accessed']).to eq(2)
144+
expect(reason).to have_key('current_users_knowledge_lack')
145+
expect(reason).to have_key('relative_knowledge_improvement')
146+
end
147+
end
148+
end
149+
end
150+
end

0 commit comments

Comments
 (0)