Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,5 @@ storage/
/config/credentials/stage.yaml.enc
/config/credentials/production.key
/config/credentials/production.yaml.enc
.nvmrc
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

L169: I believe you wouldn't want .nvmrc in your .gitignore file because you want to make sure that your team uses the same node version


89 changes: 61 additions & 28 deletions app/services/external_apis/ror_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def fetch(force: false)
if old_checksum_val == metadata[:checksum]
log_message(method: method, message: 'There is no new ROR file to process.')
else
download_file = download_file = metadata['key']
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh wow. 🤣 good catch. wonder how long that's been like that

download_file = metadata['key']
download_url = metadata.fetch('links', {}).fetch('download', metadata.fetch('links', {})['self'])
log_message(method: method, message: "New ROR file detected - checksum #{metadata[:checksum]}")
log_message(method: method, message: "Downloading #{download_file}")
Expand Down Expand Up @@ -174,7 +174,7 @@ def process_ror_file(zip_file:, file:)

log_message(
method: method,
message: "Unable to process record for: '#{hash&.fetch('name', 'unknown')}'",
message: "Unable to process record for: '#{hash.fetch('names', []).first&.fetch('value', 'unknown')}'",
info: false
)
end
Expand Down Expand Up @@ -204,14 +204,16 @@ def process_ror_record(record:, time:)

registry_org = RegistryOrg.find_or_create_by(ror_id: record['id'])
registry_org.name = safe_string(value: org_name(item: record))
registry_org.acronyms = record['acronyms']
registry_org.aliases = record['aliases']
registry_org.country = record['country']
registry_org.acronyms = extract_names(item: record, type: 'acronym')
registry_org.aliases = extract_names(item: record, type: 'alias')
registry_org.country = extract_country(item: record)
registry_org.types = record['types']
registry_org.language = org_language(item: record)
registry_org.file_timestamp = time.strftime('%Y-%m-%d %H:%M:%S')
registry_org.fundref_id = fundref_id(item: record)
registry_org.home_page = safe_string(value: record.fetch('links', []).first)

website = record.fetch('links', []).find { |l| l['type'] == 'website' }
registry_org.home_page = safe_string(value: website ? website['value'] : nil)

# Attempt to find a matching Org record
registry_org.org_id = check_for_org_association(registry_org: registry_org)
Expand Down Expand Up @@ -250,54 +252,85 @@ def check_for_org_association(registry_org:)
# "Example College (example.edu)"
# "Example College (Brazil)"
def org_name(item:)
return '' unless item.present? && item['name'].present?
return '' unless item.present? && item['names'].present?

# Find ror_display name
name_obj = item['names'].find { |n| n['types']&.include?('ror_display') }
name = name_obj ? name_obj['value'] : item['names'].first['value']

return '' if name.blank?

country = extract_country(item: item)&.fetch('country_name', '')

# Try to get the domain from the 'domains' array first
website = item.fetch('domains', []).first
# Fallback to extracting it from the website link
website = org_website(item: item) if website.blank?

country = item.fetch('country', {}).fetch('country_name', '')
website = org_website(item: item)
# If no website or country then just return the name
return item['name'] unless website.present? || country.present?
return name unless website.present? || country.present?

# Otherwise return the contextualized name
"#{item['name']} (#{website || country})"
"#{name} (#{website || country})"
end

# Extracts the org's ISO639 if available
def org_language(item:)
dflt = I18n.default_locale || 'en'
return dflt if item.blank?

country = item.fetch('country', {}).fetch('country_code', '')
labels = case country
when 'US'
[{ iso639: 'en' }]
else
item.fetch('labels', [{ iso639: dflt }])
end
labels.first&.fetch('iso639', I18n.default_locale) || dflt
# Try to get language from ror_display name
name_obj = item.fetch('names', []).find { |n| n['types']&.include?('ror_display') }
return name_obj['lang'] if name_obj.present? && name_obj['lang'].present?

dflt
end

# Extracts the website domain from the item
def org_website(item:)
return nil unless item.present? && item.fetch('links', [])&.any?
return nil if item['links'].first.blank?

website_obj = item['links'].find { |l| l['type'] == 'website' }
return nil unless website_obj.present? && website_obj['value'].present?

# A website was found, so extract just the domain without the www
domain_regex = %r{^(?:http://|www\.|https://)([^/]+)}
website = item['links'].first.scan(domain_regex).last.first
website.gsub('www.', '')
website = website_obj['value'].scan(domain_regex).last&.first
website&.gsub('www.', '')
end

# Extracts the FundRef Id if available
def fundref_id(item:)
return '' unless item.present? && item['external_ids'].present?
return '' unless item['external_ids'].fetch('FundRef', {}).any?

fundref = item['external_ids'].find { |id| id['type'] == 'fundref' }
return '' unless fundref.present?

return fundref['preferred'] if fundref['preferred'].present?

fundref.fetch('all', []).first
end

# If a preferred Id was specified then use it
ret = item['external_ids'].fetch('FundRef', {}).fetch('preferred', '')
return ret if ret.present?
# Helper to extract names by type
def extract_names(item:, type:)
return [] unless item.present? && item['names'].present?

item['names'].select { |n| n['types']&.include?(type) }.map { |n| n['value'] }
end

# Otherwise take the first one listed
item['external_ids'].fetch('FundRef', {}).fetch('all', []).first
# Helper to extract country
def extract_country(item:)
return nil unless item.present? && item['locations'].present?

# Assuming we take the first location
loc = item['locations'].first
return nil unless loc.present? && loc['geonames_details'].present?

details = loc['geonames_details']
{
'country_name' => details['country_name'],
'country_code' => details['country_code']
}
end
end
end
Expand Down
94 changes: 51 additions & 43 deletions spec/services/external_apis/ror_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
xit 'returns an empty array' do
expect(described_class.search(term: @term)).to eql([])
end
xit 'logs the response as an error' do
it 'logs the response as an error' do
described_class.expects(:handle_http_failure).at_least(1)
described_class.search(term: @term)
end
Expand Down Expand Up @@ -70,28 +70,36 @@
items: [
{
id: 'https://ror.org/1234567890',
name: 'Example University',
names: [
{ types: ['ror_display'], value: 'Example University' },
{ types: ['alias'], value: 'Example' },
{ types: ['acronym'], value: 'EU' }
],
types: ['Education'],
links: ['http://example.edu/'],
aliases: ['Example'],
acronyms: ['EU'],
links: [{ type: 'website', value: 'http://example.edu/' }],
status: 'active',
country: { country_name: 'United States', country_code: 'US' },
external_ids: {
GRID: { preferred: 'grid.12345.1', all: 'grid.12345.1' }
}
locations: [
{ geonames_details: { country_name: 'United States', country_code: 'US' } }
],
external_ids: [
{ type: 'grid', preferred: 'grid.12345.1', all: ['grid.12345.1'] }
]
}, {
id: 'https://ror.org/0987654321',
name: 'Universidade de Example',
names: [
{ types: ['ror_display'], value: 'Universidade de Example' },
{ types: ['alias'], value: 'Example' },
{ types: ['acronym'], value: 'EU' }
],
types: ['Education'],
links: [],
aliases: ['Example'],
acronyms: ['EU'],
status: 'active',
country: { country_name: 'Mexico', country_code: 'MX' },
external_ids: {
GRID: { preferred: 'grid.98765.8', all: 'grid.98765.8' }
}
locations: [
{ geonames_details: { country_name: 'Mexico', country_code: 'MX' } }
],
external_ids: [
{ type: 'grid', preferred: 'grid.98765.8', all: ['grid.98765.8'] }
]
}
]
}
Expand Down Expand Up @@ -132,8 +140,8 @@
time_taken: 5,
items: [{
id: Faker::Internet.url,
name: Faker::Lorem.word,
country: { country_name: Faker::Lorem.word }
names: [{ types: ['ror_display'], value: Faker::Lorem.word }],
locations: [{ geonames_details: { country_name: Faker::Lorem.word } }]
}]
}
@term = Faker::Lorem.word
Expand Down Expand Up @@ -206,8 +214,8 @@
items = Array.new(4).map do
{
id: Faker::Internet.unique.url,
name: Faker::Lorem.word,
country: { country_name: Faker::Lorem.word }
names: [{ types: ['ror_display'], value: Faker::Lorem.word }],
locations: [{ geonames_details: { country_name: Faker::Lorem.word } }]
}
end
results1 = { number_of_results: 4, items: items }
Expand All @@ -225,8 +233,8 @@
items = Array.new(7).map do
{
id: Faker::Internet.unique.url,
name: Faker::Lorem.word,
country: { country_name: Faker::Lorem.word }
names: [{ types: ['ror_display'], value: Faker::Lorem.word }],
locations: [{ geonames_details: { country_name: Faker::Lorem.word } }]
}
end
results1 = { number_of_results: 7, items: items[0..4] }
Expand All @@ -247,8 +255,8 @@
items = Array.new(12).map do
{
id: Faker::Internet.unique.url,
name: Faker::Lorem.word,
country: { country_name: Faker::Lorem.word }
names: [{ types: ['ror_display'], value: Faker::Lorem.word }],
locations: [{ geonames_details: { country_name: Faker::Lorem.word } }]
}
end
results1 = { number_of_results: 12, items: items[0..4] }
Expand All @@ -273,17 +281,17 @@
end
xit 'ignores items with no name or id' do
json = { items: [
{ id: Faker::Internet.url, name: Faker::Lorem.word },
{ id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] },
{ id: Faker::Internet.url },
{ name: Faker::Lorem.word }
{ names: [{ types: ['ror_display'], value: Faker::Lorem.word }] }
] }.to_json
items = described_class.send(:parse_results, json: JSON.parse(json))
expect(items.length).to eql(1)
end
xit 'returns the correct number of results' do
json = { items: [
{ id: Faker::Internet.url, name: Faker::Lorem.word },
{ id: Faker::Internet.url, name: Faker::Lorem.word }
{ id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] },
{ id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] }
] }.to_json
items = described_class.send(:parse_results, json: JSON.parse(json))
expect(items.length).to eql(2)
Expand All @@ -292,31 +300,31 @@

describe '#org_name' do
xit 'returns nil if there is no name' do
json = { country: { country_name: 'Nowhere' } }.to_json
json = { locations: [{ geonames_details: { country_name: 'Nowhere' } }] }.to_json
expect(described_class.send(:org_name, item: JSON.parse(json))).to eql('')
end
xit 'properly appends the website if available' do
json = {
name: 'Example College',
links: ['https://example.edu'],
country: { country_name: 'Nowhere' }
names: [{ types: ['ror_display'], value: 'Example College' }],
links: [{ type: 'website', value: 'https://example.edu' }],
locations: [{ geonames_details: { country_name: 'Nowhere' } }]
}.to_json
expected = 'Example College (example.edu)'
expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected)
end
xit 'properly appends the country if available and no website is available' do
it 'properly appends the country if available and no website is available' do
json = {
name: 'Example College',
country: { country_name: 'Nowhere' }
names: [{ types: ['ror_display'], value: 'Example College' }],
locations: [{ geonames_details: { country_name: 'Nowhere' } }]
}.to_json
expected = 'Example College (Nowhere)'
expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected)
end
xit 'properly handles an item with no website or country' do
json = {
name: 'Example College',
names: [{ types: ['ror_display'], value: 'Example College' }],
links: [],
country: {}
locations: []
}.to_json
expected = 'Example College'
expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected)
Expand All @@ -332,35 +340,35 @@
expect(described_class.send(:org_website, item: nil)).to eql(nil)
end
xit 'returns the domain only' do
item = JSON.parse({ links: ['https://example.org/path?a=b'] }.to_json)
item = JSON.parse({ links: [{ type: 'website', value: 'https://example.org/path?a=b' }] }.to_json)
expect(described_class.send(:org_website, item: item)).to eql('example.org')
end
xit 'removes the www prefix' do
item = JSON.parse({ links: ['www.example.org'] }.to_json)
item = JSON.parse({ links: [{ type: 'website', value: 'www.example.org' }] }.to_json)
expect(described_class.send(:org_website, item: item)).to eql('example.org')
end
end

describe '#fundref_id' do
before(:each) do
@hash = { external_ids: {} }
@hash = { external_ids: [] }
end
xit 'returns a blank if no external_ids are present' do
json = JSON.parse(@hash.to_json)
expect(described_class.send(:fundref_id, item: json)).to eql('')
end
xit 'returns a blank if no FundRef ids are present' do
@hash['external_ids'] = { FundRef: {} }
@hash['external_ids'] = [{ type: 'grid', preferred: '1', all: %w[2 1] }]
json = JSON.parse(@hash.to_json)
expect(described_class.send(:fundref_id, item: json)).to eql('')
end
xit 'returns the preferred id when specified' do
@hash['external_ids'] = { FundRef: { preferred: '1', all: %w[2 1] } }
@hash['external_ids'] = [{ type: 'fundref', preferred: '1', all: %w[2 1] }]
json = JSON.parse(@hash.to_json)
expect(described_class.send(:fundref_id, item: json)).to eql('1')
end
xit 'returns the firstid if no preferred is specified' do
@hash['external_ids'] = { FundRef: { preferred: nil, all: %w[2 1] } }
@hash['external_ids'] = [{ type: 'fundref', preferred: nil, all: %w[2 1] }]
json = JSON.parse(@hash.to_json)
expect(described_class.send(:fundref_id, item: json)).to eql('2')
end
Expand Down
12 changes: 6 additions & 6 deletions spec/support/helpers/webmocks.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ def mocked_ror_response
10.times.each do
body[:items] << {
id: Faker::Internet.url(host: 'ror.org'),
name: Faker::Company.unique.name,
links: [[Faker::Internet.url, nil].sample],
country: { country_name: Faker::Books::Dune.planet },
external_ids: {
FundRef: { preferred: nil, all: [Faker::Number.number(digits: 6)] }
}
names: [{ types: ['ror_display'], value: Faker::Company.unique.name }],
links: [{ type: 'website', value: Faker::Internet.url }],
locations: [{ geonames_details: { country_name: Faker::Books::Dune.planet } }],
external_ids: [
{ type: 'fundref', preferred: nil, all: [Faker::Number.number(digits: 6)] }
]
}
end
body.to_json
Expand Down
Loading