Skip to content

Commit 143d467

Browse files
Merge pull request #162 from MITLibraries/tco-110-journals
Split Detector::Journals model into two, extending BulkChecker into the detection model
2 parents 351e5e3 + ac9a31f commit 143d467

File tree

11 files changed

+123
-68
lines changed

11 files changed

+123
-68
lines changed

app/models/detector/journal.rb

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,16 @@
11
# frozen_string_literal: true
22

3-
# == Schema Information
4-
#
5-
# Table name: detector_journals
6-
#
7-
# id :integer not null, primary key
8-
# name :string
9-
# additional_info :json
10-
# created_at :datetime not null
11-
# updated_at :datetime not null
12-
#
133
class Detector
14-
# Detector::Journal stores information about academic journals loaded from external sources to allow us to check our
15-
# incoming Terms against these information
16-
class Journal < ApplicationRecord
17-
before_save :downcase_fields!
4+
# Detector::Journal handles the comparison between incoming Term records and our known list of academic journals
5+
# (which are managed by the separate Journal model).
6+
class Journal
7+
attr_reader :detections
188

19-
def self.table_name_prefix
20-
'detector_'
9+
# shared singleton methods
10+
extend Detector::BulkChecker
11+
12+
def initialize(phrase)
13+
@detections = Detector::Journal.full_term_match(phrase)
2114
end
2215

2316
# Identify journals in which the incoming phrase matches a Journal.name exactly
@@ -30,9 +23,9 @@ def self.table_name_prefix
3023
#
3124
# @param phrase [String]. A string representation of a search term (not an actual Term object!)
3225
#
33-
# @return [Set of Detector::Journal] A set of ActiveRecord Detector::Journal relations.
26+
# @return [Set of Journal] A set of ActiveRecord Journal records.
3427
def self.full_term_match(phrase)
35-
Journal.where(name: phrase.downcase)
28+
::Journal.where(name: phrase.downcase)
3629
end
3730

3831
# Identify journals in which the incoming phrase contains one or more Journal names
@@ -41,12 +34,12 @@ def self.full_term_match(phrase)
4134
#
4235
# @param phrase [String]. A string representation of a search term (not an actual Term object!)
4336
#
44-
# @return [Set of Detector::Journal] A set of ActiveRecord Detector::Journal relations.
37+
# @return [Set of Journal] A set of ActiveRecord Journal records.
4538
def self.partial_term_match(phrase)
46-
Journal.all.select { |journal| phrase.downcase.include?(journal.name) }
39+
::Journal.all.select { |journal| phrase.downcase.include?(journal.name) }
4740
end
4841

49-
# Look up any matching Detector::Journal records, building on the full_term_match method. If a match is found, a
42+
# Look up any matching Journal records, building on the full_term_match method. If a match is found, a
5043
# Detection record is created to indicate this success.
5144
#
5245
# @note This does not care whether multiple matching journals are detected. If _any_ match is found, a Detection
@@ -65,13 +58,5 @@ def self.record(term)
6558

6659
nil
6760
end
68-
69-
private
70-
71-
# Downcasing all names before saving allows for more efficient matching by ensuring our index is lowercase.
72-
# If we find we need the non-lowercase Journal name in the future, we could store that as `additional_info` json
73-
def downcase_fields!
74-
name.downcase!
75-
end
7661
end
7762
end

app/models/journal.rb

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# frozen_string_literal: true
2+
3+
# == Schema Information
4+
#
5+
# Table name: journals
6+
#
7+
# id :integer not null, primary key
8+
# name :string
9+
# additional_info :json
10+
# created_at :datetime not null
11+
# updated_at :datetime not null
12+
#
13+
14+
# Journal is the list of academic journals which are known to TACOS. This list of records is referred to by the
15+
# Detector::Journal model in order to determine whether a given term matches a known journal. The names of these
16+
# journals are stored in lowercase, which matches how the Detector::Journal processes incoming terms, in order to
17+
# prevent capitalization differences resulting in a false negative.
18+
class Journal < ApplicationRecord
19+
before_save :downcase_fields!
20+
21+
private
22+
23+
# Downcasing all names before saving allows for more efficient matching by ensuring our index is lowercase.
24+
# If we find we need the non-lowercase Journal name in the future, we could store that as `additional_info` json
25+
def downcase_fields!
26+
name.downcase!
27+
end
28+
end
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
class CreateJournals < ActiveRecord::Migration[7.1]
2+
def change
3+
create_table :journals do |t|
4+
t.string :name
5+
t.json :additional_info
6+
7+
t.timestamps
8+
end
9+
add_index :journals, :name
10+
end
11+
end
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
class DropDetectorJournals < ActiveRecord::Migration[7.1]
2+
def up
3+
drop_table :detector_journals
4+
end
5+
6+
def down
7+
create_table :detector_journals do |t|
8+
t.string :name
9+
t.json :additional_info
10+
11+
t.timestamps
12+
end
13+
add_index :detector_journals, :name
14+
end
15+
end

db/schema.rb

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/classes.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ classDiagram
3030
3131
Detector "1" --> "0..*" DetectorCategory
3232
33+
DetectorJournal -- Journal : references
34+
3335
Confirmation --> Term
3436
Confirmation --> Category
3537
User --> Confirmation : provides many
@@ -110,6 +112,11 @@ classDiagram
110112
DetectorSuggestedResource: record()
111113
DetectorSuggestedResource: update_fingerprint()
112114
115+
class Journal
116+
Journal: +Integer id
117+
Journal: +String name
118+
Journal: +JSON additional_info
119+
113120
class Confirmation
114121
Confirmation: +Integer id
115122
Confirmation: +Integer user_id
@@ -134,6 +141,7 @@ classDiagram
134141
class DetectorLcsh["Detector::Lcsh"]
135142
class DetectorStandardIdentifier["Detector::StandardIdentifiers"]
136143
class DetectorSuggestedResource["Detector::SuggestedResource"]
144+
class Journal
137145
}
138146
139147
namespace UserActivity {
@@ -153,6 +161,7 @@ classDiagram
153161
style DetectorLcsh fill:#000,stroke:#fc8d62,color:#fc8d62
154162
style DetectorStandardIdentifier fill:#000,stroke:#fc8d62,color:#fc8d62
155163
style DetectorSuggestedResource fill:#000,stroke:#fc8d62,color:#fc8d62
164+
style Journal fill:#000,stroke:#fc8d62,color:#fc8d62
156165
157166
style Categorization fill:#000,stroke:#8da0cb,color:#8da0cb,stroke-dasharray: 3 5;
158167
style Detection fill:#000,stroke:#8da0cb,color:#8da0cb,stroke-dasharray: 3 5;

lib/tasks/journals.rake

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,18 +93,18 @@ namespace :journals do
9393
end
9494

9595
# Delete all journals. We do this to simplify the loader process to avoid consideration of updates/deletes.
96-
Detector::Journal.delete_all
96+
Journal.delete_all
9797

9898
# not ideal, we should consider streaming the file rather than loading it fully into memory
9999
json = JSON.parse(data)
100100

101101
json['core'].each do |journal|
102-
Detector::Journal.create(name: journal['title'],
103-
additional_info: { issns: journal['issns'],
104-
publisher: journal['publisher'],
105-
alternate_titles: journal['alternate_titles'],
106-
type: journal['type'],
107-
abbreviated_title: journal['abbreviated_title'] })
102+
Journal.create(name: journal['title'],
103+
additional_info: { issns: journal['issns'],
104+
publisher: journal['publisher'],
105+
alternate_titles: journal['alternate_titles'],
106+
type: journal['type'],
107+
abbreviated_title: journal['abbreviated_title'] })
108108
end
109109
end
110110
end
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# == Schema Information
22
#
3-
# Table name: detector_journals
3+
# Table name: journals
44
#
55
# id :integer not null, primary key
66
# name :string

test/models/detector/bulk_checker_test.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ class CitationTest < ActiveSupport::TestCase
1111
end
1212

1313
test 'journal_bulk_checker' do
14-
skip 'Detector::Journal does not yet support bulk_checker'
14+
bulk = Detector::Journal.check_all_matches(output: true)
15+
16+
assert_equal(1, bulk.count)
1517
end
1618

1719
test 'lcsh_bulk_checker' do

test/models/detector/journal_test.rb

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,19 @@
11
# frozen_string_literal: true
22

3-
# == Schema Information
4-
#
5-
# Table name: detector_journals
6-
#
7-
# id :integer not null, primary key
8-
# name :string
9-
# additional_info :json
10-
# created_at :datetime not null
11-
# updated_at :datetime not null
12-
#
133
require 'test_helper'
144

155
class Detector
166
class JournalTest < ActiveSupport::TestCase
177
test 'exact term match on journal name' do
18-
expected = detector_journals('the_new_england_journal_of_medicine')
8+
expected = journals('the_new_england_journal_of_medicine')
199
actual = Detector::Journal.full_term_match('the new england journal of medicine')
2010

2111
assert_equal 1, actual.count
2212
assert_equal(expected, actual.first)
2313
end
2414

2515
test 'mixed case exact term match on journal name' do
26-
expected = detector_journals('the_new_england_journal_of_medicine')
16+
expected = journals('the_new_england_journal_of_medicine')
2717
actual = Detector::Journal.full_term_match('The New England Journal of Medicine')
2818

2919
assert_equal 1, actual.count
@@ -48,15 +38,6 @@ class JournalTest < ActiveSupport::TestCase
4838
assert_equal 2, actual.count
4939
end
5040

51-
test 'mixed titles are downcased when saved' do
52-
mixed_case = 'ThIs Is A tItLe'
53-
actual = Detector::Journal.create(name: mixed_case)
54-
actual.reload
55-
56-
assert_not_equal(mixed_case, actual.name)
57-
assert_equal(mixed_case.downcase, actual.name)
58-
end
59-
6041
test 'record does relevant work' do
6142
detection_count = Detection.count
6243
t = terms('journal_nature_medicine')

0 commit comments

Comments
 (0)