Skip to content

Commit c61d590

Browse files
authored
Merge pull request #778 from CDLUC3/ror-version-update
Updates to use ROR v2 format
2 parents 0aa549f + 0ce24a0 commit c61d590

File tree

4 files changed

+120
-77
lines changed

4 files changed

+120
-77
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,5 @@ storage/
166166
/config/credentials/stage.yaml.enc
167167
/config/credentials/production.key
168168
/config/credentials/production.yaml.enc
169+
.nvmrc
170+

app/services/external_apis/ror_service.rb

Lines changed: 61 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def fetch(force: false)
6666
if old_checksum_val == metadata[:checksum]
6767
log_message(method: method, message: 'There is no new ROR file to process.')
6868
else
69-
download_file = download_file = metadata['key']
69+
download_file = metadata['key']
7070
download_url = metadata.fetch('links', {}).fetch('download', metadata.fetch('links', {})['self'])
7171
log_message(method: method, message: "New ROR file detected - checksum #{metadata[:checksum]}")
7272
log_message(method: method, message: "Downloading #{download_file}")
@@ -174,7 +174,7 @@ def process_ror_file(zip_file:, file:)
174174

175175
log_message(
176176
method: method,
177-
message: "Unable to process record for: '#{hash&.fetch('name', 'unknown')}'",
177+
message: "Unable to process record for: '#{hash.fetch('names', []).first&.fetch('value', 'unknown')}'",
178178
info: false
179179
)
180180
end
@@ -204,14 +204,16 @@ def process_ror_record(record:, time:)
204204

205205
registry_org = RegistryOrg.find_or_create_by(ror_id: record['id'])
206206
registry_org.name = safe_string(value: org_name(item: record))
207-
registry_org.acronyms = record['acronyms']
208-
registry_org.aliases = record['aliases']
209-
registry_org.country = record['country']
207+
registry_org.acronyms = extract_names(item: record, type: 'acronym')
208+
registry_org.aliases = extract_names(item: record, type: 'alias')
209+
registry_org.country = extract_country(item: record)
210210
registry_org.types = record['types']
211211
registry_org.language = org_language(item: record)
212212
registry_org.file_timestamp = time.strftime('%Y-%m-%d %H:%M:%S')
213213
registry_org.fundref_id = fundref_id(item: record)
214-
registry_org.home_page = safe_string(value: record.fetch('links', []).first)
214+
215+
website = record.fetch('links', []).find { |l| l['type'] == 'website' }
216+
registry_org.home_page = safe_string(value: website ? website['value'] : nil)
215217

216218
# Attempt to find a matching Org record
217219
registry_org.org_id = check_for_org_association(registry_org: registry_org)
@@ -250,54 +252,85 @@ def check_for_org_association(registry_org:)
250252
# "Example College (example.edu)"
251253
# "Example College (Brazil)"
252254
def org_name(item:)
253-
return '' unless item.present? && item['name'].present?
255+
return '' unless item.present? && item['names'].present?
256+
257+
# Find ror_display name
258+
name_obj = item['names'].find { |n| n['types']&.include?('ror_display') }
259+
name = name_obj ? name_obj['value'] : item['names'].first['value']
260+
261+
return '' if name.blank?
262+
263+
country = extract_country(item: item)&.fetch('country_name', '')
264+
265+
# Try to get the domain from the 'domains' array first
266+
website = item.fetch('domains', []).first
267+
# Fallback to extracting it from the website link
268+
website = org_website(item: item) if website.blank?
254269

255-
country = item.fetch('country', {}).fetch('country_name', '')
256-
website = org_website(item: item)
257270
# If no website or country then just return the name
258-
return item['name'] unless website.present? || country.present?
271+
return name unless website.present? || country.present?
259272

260273
# Otherwise return the contextualized name
261-
"#{item['name']} (#{website || country})"
274+
"#{name} (#{website || country})"
262275
end
263276

264277
# Extracts the org's ISO639 if available
265278
def org_language(item:)
266279
dflt = I18n.default_locale || 'en'
267280
return dflt if item.blank?
268281

269-
country = item.fetch('country', {}).fetch('country_code', '')
270-
labels = case country
271-
when 'US'
272-
[{ iso639: 'en' }]
273-
else
274-
item.fetch('labels', [{ iso639: dflt }])
275-
end
276-
labels.first&.fetch('iso639', I18n.default_locale) || dflt
282+
# Try to get language from ror_display name
283+
name_obj = item.fetch('names', []).find { |n| n['types']&.include?('ror_display') }
284+
return name_obj['lang'] if name_obj.present? && name_obj['lang'].present?
285+
286+
dflt
277287
end
278288

279289
# Extracts the website domain from the item
280290
def org_website(item:)
281291
return nil unless item.present? && item.fetch('links', [])&.any?
282-
return nil if item['links'].first.blank?
292+
293+
website_obj = item['links'].find { |l| l['type'] == 'website' }
294+
return nil unless website_obj.present? && website_obj['value'].present?
283295

284296
# A website was found, so extract just the domain without the www
285297
domain_regex = %r{^(?:http://|www\.|https://)([^/]+)}
286-
website = item['links'].first.scan(domain_regex).last.first
287-
website.gsub('www.', '')
298+
website = website_obj['value'].scan(domain_regex).last&.first
299+
website&.gsub('www.', '')
288300
end
289301

290302
# Extracts the FundRef Id if available
291303
def fundref_id(item:)
292304
return '' unless item.present? && item['external_ids'].present?
293-
return '' unless item['external_ids'].fetch('FundRef', {}).any?
305+
306+
fundref = item['external_ids'].find { |id| id['type'] == 'fundref' }
307+
return '' unless fundref.present?
308+
309+
return fundref['preferred'] if fundref['preferred'].present?
310+
311+
fundref.fetch('all', []).first
312+
end
294313

295-
# If a preferred Id was specified then use it
296-
ret = item['external_ids'].fetch('FundRef', {}).fetch('preferred', '')
297-
return ret if ret.present?
314+
# Helper to extract names by type
315+
def extract_names(item:, type:)
316+
return [] unless item.present? && item['names'].present?
317+
318+
item['names'].select { |n| n['types']&.include?(type) }.map { |n| n['value'] }
319+
end
298320

299-
# Otherwise take the first one listed
300-
item['external_ids'].fetch('FundRef', {}).fetch('all', []).first
321+
# Helper to extract country
322+
def extract_country(item:)
323+
return nil unless item.present? && item['locations'].present?
324+
325+
# Assuming we take the first location
326+
loc = item['locations'].first
327+
return nil unless loc.present? && loc['geonames_details'].present?
328+
329+
details = loc['geonames_details']
330+
{
331+
'country_name' => details['country_name'],
332+
'country_code' => details['country_code']
333+
}
301334
end
302335
end
303336
end

spec/services/external_apis/ror_service_spec.rb

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
xit 'returns an empty array' do
4343
expect(described_class.search(term: @term)).to eql([])
4444
end
45-
xit 'logs the response as an error' do
45+
it 'logs the response as an error' do
4646
described_class.expects(:handle_http_failure).at_least(1)
4747
described_class.search(term: @term)
4848
end
@@ -70,28 +70,36 @@
7070
items: [
7171
{
7272
id: 'https://ror.org/1234567890',
73-
name: 'Example University',
73+
names: [
74+
{ types: ['ror_display'], value: 'Example University' },
75+
{ types: ['alias'], value: 'Example' },
76+
{ types: ['acronym'], value: 'EU' }
77+
],
7478
types: ['Education'],
75-
links: ['http://example.edu/'],
76-
aliases: ['Example'],
77-
acronyms: ['EU'],
79+
links: [{ type: 'website', value: 'http://example.edu/' }],
7880
status: 'active',
79-
country: { country_name: 'United States', country_code: 'US' },
80-
external_ids: {
81-
GRID: { preferred: 'grid.12345.1', all: 'grid.12345.1' }
82-
}
81+
locations: [
82+
{ geonames_details: { country_name: 'United States', country_code: 'US' } }
83+
],
84+
external_ids: [
85+
{ type: 'grid', preferred: 'grid.12345.1', all: ['grid.12345.1'] }
86+
]
8387
}, {
8488
id: 'https://ror.org/0987654321',
85-
name: 'Universidade de Example',
89+
names: [
90+
{ types: ['ror_display'], value: 'Universidade de Example' },
91+
{ types: ['alias'], value: 'Example' },
92+
{ types: ['acronym'], value: 'EU' }
93+
],
8694
types: ['Education'],
8795
links: [],
88-
aliases: ['Example'],
89-
acronyms: ['EU'],
9096
status: 'active',
91-
country: { country_name: 'Mexico', country_code: 'MX' },
92-
external_ids: {
93-
GRID: { preferred: 'grid.98765.8', all: 'grid.98765.8' }
94-
}
97+
locations: [
98+
{ geonames_details: { country_name: 'Mexico', country_code: 'MX' } }
99+
],
100+
external_ids: [
101+
{ type: 'grid', preferred: 'grid.98765.8', all: ['grid.98765.8'] }
102+
]
95103
}
96104
]
97105
}
@@ -132,8 +140,8 @@
132140
time_taken: 5,
133141
items: [{
134142
id: Faker::Internet.url,
135-
name: Faker::Lorem.word,
136-
country: { country_name: Faker::Lorem.word }
143+
names: [{ types: ['ror_display'], value: Faker::Lorem.word }],
144+
locations: [{ geonames_details: { country_name: Faker::Lorem.word } }]
137145
}]
138146
}
139147
@term = Faker::Lorem.word
@@ -206,8 +214,8 @@
206214
items = Array.new(4).map do
207215
{
208216
id: Faker::Internet.unique.url,
209-
name: Faker::Lorem.word,
210-
country: { country_name: Faker::Lorem.word }
217+
names: [{ types: ['ror_display'], value: Faker::Lorem.word }],
218+
locations: [{ geonames_details: { country_name: Faker::Lorem.word } }]
211219
}
212220
end
213221
results1 = { number_of_results: 4, items: items }
@@ -225,8 +233,8 @@
225233
items = Array.new(7).map do
226234
{
227235
id: Faker::Internet.unique.url,
228-
name: Faker::Lorem.word,
229-
country: { country_name: Faker::Lorem.word }
236+
names: [{ types: ['ror_display'], value: Faker::Lorem.word }],
237+
locations: [{ geonames_details: { country_name: Faker::Lorem.word } }]
230238
}
231239
end
232240
results1 = { number_of_results: 7, items: items[0..4] }
@@ -247,8 +255,8 @@
247255
items = Array.new(12).map do
248256
{
249257
id: Faker::Internet.unique.url,
250-
name: Faker::Lorem.word,
251-
country: { country_name: Faker::Lorem.word }
258+
names: [{ types: ['ror_display'], value: Faker::Lorem.word }],
259+
locations: [{ geonames_details: { country_name: Faker::Lorem.word } }]
252260
}
253261
end
254262
results1 = { number_of_results: 12, items: items[0..4] }
@@ -273,17 +281,17 @@
273281
end
274282
xit 'ignores items with no name or id' do
275283
json = { items: [
276-
{ id: Faker::Internet.url, name: Faker::Lorem.word },
284+
{ id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] },
277285
{ id: Faker::Internet.url },
278-
{ name: Faker::Lorem.word }
286+
{ names: [{ types: ['ror_display'], value: Faker::Lorem.word }] }
279287
] }.to_json
280288
items = described_class.send(:parse_results, json: JSON.parse(json))
281289
expect(items.length).to eql(1)
282290
end
283291
xit 'returns the correct number of results' do
284292
json = { items: [
285-
{ id: Faker::Internet.url, name: Faker::Lorem.word },
286-
{ id: Faker::Internet.url, name: Faker::Lorem.word }
293+
{ id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] },
294+
{ id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] }
287295
] }.to_json
288296
items = described_class.send(:parse_results, json: JSON.parse(json))
289297
expect(items.length).to eql(2)
@@ -292,31 +300,31 @@
292300

293301
describe '#org_name' do
294302
xit 'returns nil if there is no name' do
295-
json = { country: { country_name: 'Nowhere' } }.to_json
303+
json = { locations: [{ geonames_details: { country_name: 'Nowhere' } }] }.to_json
296304
expect(described_class.send(:org_name, item: JSON.parse(json))).to eql('')
297305
end
298306
xit 'properly appends the website if available' do
299307
json = {
300-
name: 'Example College',
301-
links: ['https://example.edu'],
302-
country: { country_name: 'Nowhere' }
308+
names: [{ types: ['ror_display'], value: 'Example College' }],
309+
links: [{ type: 'website', value: 'https://example.edu' }],
310+
locations: [{ geonames_details: { country_name: 'Nowhere' } }]
303311
}.to_json
304312
expected = 'Example College (example.edu)'
305313
expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected)
306314
end
307-
xit 'properly appends the country if available and no website is available' do
315+
it 'properly appends the country if available and no website is available' do
308316
json = {
309-
name: 'Example College',
310-
country: { country_name: 'Nowhere' }
317+
names: [{ types: ['ror_display'], value: 'Example College' }],
318+
locations: [{ geonames_details: { country_name: 'Nowhere' } }]
311319
}.to_json
312320
expected = 'Example College (Nowhere)'
313321
expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected)
314322
end
315323
xit 'properly handles an item with no website or country' do
316324
json = {
317-
name: 'Example College',
325+
names: [{ types: ['ror_display'], value: 'Example College' }],
318326
links: [],
319-
country: {}
327+
locations: []
320328
}.to_json
321329
expected = 'Example College'
322330
expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected)
@@ -332,35 +340,35 @@
332340
expect(described_class.send(:org_website, item: nil)).to eql(nil)
333341
end
334342
xit 'returns the domain only' do
335-
item = JSON.parse({ links: ['https://example.org/path?a=b'] }.to_json)
343+
item = JSON.parse({ links: [{ type: 'website', value: 'https://example.org/path?a=b' }] }.to_json)
336344
expect(described_class.send(:org_website, item: item)).to eql('example.org')
337345
end
338346
xit 'removes the www prefix' do
339-
item = JSON.parse({ links: ['www.example.org'] }.to_json)
347+
item = JSON.parse({ links: [{ type: 'website', value: 'www.example.org' }] }.to_json)
340348
expect(described_class.send(:org_website, item: item)).to eql('example.org')
341349
end
342350
end
343351

344352
describe '#fundref_id' do
345353
before(:each) do
346-
@hash = { external_ids: {} }
354+
@hash = { external_ids: [] }
347355
end
348356
xit 'returns a blank if no external_ids are present' do
349357
json = JSON.parse(@hash.to_json)
350358
expect(described_class.send(:fundref_id, item: json)).to eql('')
351359
end
352360
xit 'returns a blank if no FundRef ids are present' do
353-
@hash['external_ids'] = { FundRef: {} }
361+
@hash['external_ids'] = [{ type: 'grid', preferred: '1', all: %w[2 1] }]
354362
json = JSON.parse(@hash.to_json)
355363
expect(described_class.send(:fundref_id, item: json)).to eql('')
356364
end
357365
xit 'returns the preferred id when specified' do
358-
@hash['external_ids'] = { FundRef: { preferred: '1', all: %w[2 1] } }
366+
@hash['external_ids'] = [{ type: 'fundref', preferred: '1', all: %w[2 1] }]
359367
json = JSON.parse(@hash.to_json)
360368
expect(described_class.send(:fundref_id, item: json)).to eql('1')
361369
end
362370
xit 'returns the firstid if no preferred is specified' do
363-
@hash['external_ids'] = { FundRef: { preferred: nil, all: %w[2 1] } }
371+
@hash['external_ids'] = [{ type: 'fundref', preferred: nil, all: %w[2 1] }]
364372
json = JSON.parse(@hash.to_json)
365373
expect(described_class.send(:fundref_id, item: json)).to eql('2')
366374
end

spec/support/helpers/webmocks.rb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ def mocked_ror_response
4949
10.times.each do
5050
body[:items] << {
5151
id: Faker::Internet.url(host: 'ror.org'),
52-
name: Faker::Company.unique.name,
53-
links: [[Faker::Internet.url, nil].sample],
54-
country: { country_name: Faker::Books::Dune.planet },
55-
external_ids: {
56-
FundRef: { preferred: nil, all: [Faker::Number.number(digits: 6)] }
57-
}
52+
names: [{ types: ['ror_display'], value: Faker::Company.unique.name }],
53+
links: [{ type: 'website', value: Faker::Internet.url }],
54+
locations: [{ geonames_details: { country_name: Faker::Books::Dune.planet } }],
55+
external_ids: [
56+
{ type: 'fundref', preferred: nil, all: [Faker::Number.number(digits: 6)] }
57+
]
5858
}
5959
end
6060
body.to_json

0 commit comments

Comments
 (0)