diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f12f55a..c15ea29 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,15 +6,15 @@ jobs: strategy: matrix: ruby-version: - - 2.0 + - 2.0.0 - 2.7 - - 3.0 + - 3.0.0 - 3.4 - ruby-head - jruby-head image: - ubuntu-24.04 - - ubuntu-24.04-arm +# - ubuntu-24.04-arm name: Ruby ${{ matrix.ruby-version }} on ${{ matrix.image }} steps: - uses: actions/checkout@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..a553184 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,28 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + + +## v2.0.0 + +### Added +- `detect_batch` method for batch detections + +### Changed +- Switched to v3 API which uses an updated language detection model +- ⚠️ `detect` method result fields are `language` and `score` +- ⚠️ Proxy URL configured using `config.proxy` +- Client connection is reused. If you change configuration after the client is initialized, you need to reset client using `DetectLanguage.client = nil`. + +### Deprecated +- Calling `detect` with array argument. Use `detect_batch` instead. +- `simple_detect` - Use `detect_code` instead. +- `user_status` - Use `account_status` instead. +- `configuration` - Use `config` instead. + +### Removed +- Secure mode configuration. HTTPS is always used. +- Ruby 1.x support diff --git a/Gemfile b/Gemfile index 5b8f682..d9d312b 100644 --- a/Gemfile +++ b/Gemfile @@ -6,5 +6,5 @@ gem 'rake' gem 'json' group :test do - gem "rspec" -end \ No newline at end of file + gem 'rspec' +end diff --git a/README.md b/README.md index 63a0efa..4a299cf 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ Detect Language API Ruby Client Detects language of the given text. Returns detected language codes and scores. -Before using Detect Language API client you have to setup your personal API key. -You can get it by signing up at https://detectlanguage.com ## Installation @@ -17,62 +15,56 @@ Add this line to your application's Gemfile: gem 'detect_language' ``` -Or install it yourself as: +### Upgrading -``` -gem install detect_language -``` +When upgrading please check [changelog](CHANGELOG.md) for breaking changes. ### Configuration -If you are using Rails, create initializer `config/initializers/detect_language.rb` and add following code there. -Otherwise just integrate following code into your apps configuration. +Get your personal API key by signing up at https://detectlanguage.com ```ruby DetectLanguage.configure do |config| - config.api_key = "YOUR API KEY" - - # enable secure mode (SSL) if you are passing sensitive data - # config.secure = true + config.api_key = 'YOUR API KEY' end ``` ## Usage -### Language detection +### Detect language ```ruby -DetectLanguage.detect("Buenos dias señor") +DetectLanguage.detect('Dolce far niente') ``` #### Result ```ruby -[{"language"=>"es", "isReliable"=>true, "confidence"=>6.62}] +[{"language" => "it", "score" => 0.5074}] ``` -### Simple language detection +### Detect single code -If you need just a language code you can use `simple_detect`. It returns just the language code. +If you need just a language code you can use `detect_code`. ```ruby -DetectLanguage.simple_detect("Buenos dias señor") +DetectLanguage.detect_code('Dolce far niente') ``` #### Result ```ruby -"es" +"it" ``` ### Batch detection It is possible to detect language of several texts with one request. This method is significantly faster than doing one request per text. -To use batch detection just pass array of texts to `detect` method. +To use batch detection just pass array of texts to `detect_batch` method. ```ruby -DetectLanguage.detect(["Buenos dias señor", "Labas rytas"]) +DetectLanguage.detect_batch(['Dolce far niente', 'Labas rytas']) ``` #### Result @@ -80,14 +72,13 @@ DetectLanguage.detect(["Buenos dias señor", "Labas rytas"]) Result is array of detections in the same order as the texts were passed. ```ruby -[ [{"language"=>"es", "isReliable"=>true, "confidence"=>6.62}], - [{"language"=>"lt", "isReliable"=>true, "confidence"=>6.82}] ] +[[{"language" => "it", "score" => 0.5074}], [{"language" => "lt", "score" => 0.3063}]] ``` -### Getting your account status +### Get your account status ```ruby -DetectLanguage.user_status +DetectLanguage.account_status ``` #### Result @@ -97,7 +88,7 @@ DetectLanguage.user_status "daily_requests_limit"=>5000, "daily_bytes_limit"=>1048576, "status"=>"ACTIVE"} ``` -### Getting list supported languages +### Get list of supported languages ```ruby DetectLanguage.languages diff --git a/lib/detect_language.rb b/lib/detect_language.rb index c02471a..ebafea8 100644 --- a/lib/detect_language.rb +++ b/lib/detect_language.rb @@ -5,44 +5,81 @@ module DetectLanguage class << self - attr_writer :configuration + attr_writer :config def configure - yield(configuration) + yield(config) end - # The configuration object. - # @see DetectLanguage.configure - def configuration - @configuration ||= Configuration.new + def config + @config ||= Configuration.new end def client - Thread.current[:detect_language_client] ||= Client.new(configuration) + Thread.current[:detect_language_client] ||= Client.new(config) end - def detect(data) - key = data.is_a?(Array) ? 'q[]' : 'q' - result = client.post(:detect, key => data) - result['data']['detections'] + def client=(client) + Thread.current[:detect_language_client] = client end - def simple_detect(text) + # @param query String + # @return [Array] Array of language detections + def detect(query) + if query.is_a?(Array) + warn '[DEPRECATED] `DetectLanguage.detect` with an array of queries is deprecated. Use `DetectLanguage.detect_batch` instead.' + return detect_batch(query) + end + + client.post('detect', q: query) + end + + # @param queries Array Array of queries to detect languages for + # @return [Array>] Array of language detections for each query + def detect_batch(queries) + raise(ArgumentError, 'Expected an Array of queries') unless queries.is_a?(Array) + + client.post('detect-batch', q: queries) + end + + # @param text String + # @return [String, nil] Detected language code or nil if no detection found + def detect_code(text) detections = detect(text) - if detections.empty? - nil - else - detections[0]['language'] - end + return if detections.empty? + + detections[0]['language'] end - def user_status - client.get('user/status') + # @return [Hash] Account status information + def account_status + client.get('account/status') end + # @return [Array] List of supported languages def languages client.get('languages') end + + ### DEPRECATED METHODS + + # @deprecated Use `DetectLanguage.config` instead + def configuration + warn '[DEPRECATED] `DetectLanguage.configuration` is deprecated. Use `DetectLanguage.config` instead.' + config + end + + # @deprecated Use `detect_code` instead + def simple_detect(text) + warn '[DEPRECATED] `DetectLanguage.simple_detect` is deprecated. Use `DetectLanguage.detect_code` instead.' + detect_code(text) + end + + # @deprecated Use `DetectLanguage.account_status` instead + def user_status + warn '[DEPRECATED] `DetectLanguage.user_status` is deprecated. Use `DetectLanguage.account_status` instead.' + account_status + end end end diff --git a/lib/detect_language/client.rb b/lib/detect_language/client.rb index d26b25d..ab69be9 100644 --- a/lib/detect_language/client.rb +++ b/lib/detect_language/client.rb @@ -1,44 +1,37 @@ -require 'cgi' require 'net/http' require 'net/https' require 'json' module DetectLanguage class Client - attr_reader :configuration + attr_reader :config - def initialize(configuration) - @configuration = configuration + def initialize(config) + @config = config end - def post(method, params = {}) - execute(method, params, :http_method => Net::HTTP::Post) + def post(path, payload = {}) + execute(Net::HTTP::Post, path, body: payload.to_json) end - def get(method, params = {}) - execute(method, params, :http_method => Net::HTTP::Get) + def get(path) + execute(Net::HTTP::Get, path) end private - def execute(method, params, options) - http = setup_http_connection - http_method = options[:http_method] - request = http_method.new(request_uri(method)) + def execute(method, path, body: nil) + request = method.new(base_uri.path + path) + request.body = body - if RUBY_VERSION == '1.8.7' - set_form_data_18(request, params) - else - request.set_form_data(params) - end - - request['Authorization'] = 'Bearer ' + configuration.api_key.to_s - request['User-Agent'] = configuration.user_agent + request['Content-Type'] = 'application/json' + request['Authorization'] = 'Bearer ' + config.api_key.to_s + request['User-Agent'] = config.user_agent - response = http.request(request) + response = connection.request(request) case response - when Net::HTTPSuccess, Net::HTTPUnauthorized then + when Net::HTTPSuccess, Net::HTTPUnauthorized parse_response(response.body) else raise(Error, "Failure: #{response.class}") @@ -55,44 +48,28 @@ def parse_response(response_body) end end - def request_uri(method) - "/#{configuration.api_version}/#{method}" + def base_uri + @base_uri ||= URI(config.base_url) end - def setup_http_connection - http = - Net::HTTP::Proxy(configuration.proxy_host, configuration.proxy_port, configuration.proxy_user, - configuration.proxy_pass). - new(configuration.host, configuration.port) - - http.read_timeout = configuration.http_read_timeout - http.open_timeout = configuration.http_open_timeout + def connection + @connection ||= setup_connection + end - if configuration.secure? - http.use_ssl = true - http.verify_mode = OpenSSL::SSL::VERIFY_PEER + def setup_connection + http = if config.proxy + proxy = URI(config.proxy) + Net::HTTP.new(base_uri.hostname, base_uri.port, proxy.hostname, proxy.port, proxy.user, proxy.password) else - http.use_ssl = false + Net::HTTP.new(base_uri.hostname, base_uri.port) end - http - end - - def set_form_data_18(request, params, sep = '&') - request.body = params.map {|k,v| - if v.instance_of?(Array) - v.map {|e| "#{urlencode(k.to_s)}=#{urlencode(e.to_s)}"}.join(sep) - else - "#{urlencode(k.to_s)}=#{urlencode(v.to_s)}" - end - }.join(sep) - - request.content_type = 'application/x-www-form-urlencoded' - end + http.use_ssl = base_uri.scheme == 'https' + http.verify_mode = OpenSSL::SSL::VERIFY_PEER if http.use_ssl? + http.read_timeout = config.http_read_timeout + http.open_timeout = config.http_open_timeout - def urlencode(str) - CGI::escape(str) + http end - end end diff --git a/lib/detect_language/configuration.rb b/lib/detect_language/configuration.rb index ce4992f..6db4c88 100644 --- a/lib/detect_language/configuration.rb +++ b/lib/detect_language/configuration.rb @@ -1,83 +1,28 @@ module DetectLanguage class Configuration - # The API key for your project, found on your homepage after you login into detectlanguage.com website - # Defaults to 'demo', which has a limited number of requests. + # The API key for your project, found on your homepage after you log in into + # https://detectlanguage.com website attr_accessor :api_key - # The API version you are using (defaults to 0.2). - attr_accessor :api_version + # API base URL + attr_accessor :base_url # HTTP request user agent (defaults to 'Detect Language API ruby gem'). attr_accessor :user_agent - # The host to connect to (defaults to ws.detectlanguage.com). - attr_accessor :host - - # The port on which your DetectLanguage server runs (defaults to 443 for secure - # connections, 80 for insecure connections). - attr_accessor :port - - # +true+ for https connections, +false+ for http connections. - attr_accessor :secure - # The HTTP open timeout in seconds. attr_accessor :http_open_timeout # The HTTP read timeout in seconds. attr_accessor :http_read_timeout - # The hostname of your proxy server (if using a proxy). - attr_accessor :proxy_host - - # The port of your proxy server (if using a proxy). - attr_accessor :proxy_port - - # The username to use when logging into your proxy server (if using a proxy). - attr_accessor :proxy_user - - # The password to use when logging into your proxy server (if using a proxy). - attr_accessor :proxy_pass - - alias_method :secure?, :secure + # The HTTP proxy to use for requests. Example: 'http://my-proxy:8080' + attr_accessor :proxy def initialize - @api_key = nil - @api_version = "0.2" - @host = "ws.detectlanguage.com" - @user_agent = "detectlanguage-ruby/#{VERSION}" - end - - def protocol - if secure? - 'https' - else - 'http' - end + @api_key = nil + @base_url = "https://ws.detectlanguage.com/v3/" + @user_agent = "detectlanguage-ruby/#{VERSION}" end - - def port - @port || default_port - end - - # Allows config options to be read like a hash - # - # @param [Symbol] option Key for a given attribute - def [](option) - send(option) - end - - private - - # Determines what port should we use for sending requests. - # @return [Fixnum] Returns 443 if you've set secure to true in your - # configuration, and 80 otherwise. - def default_port - if secure? - 443 - else - 80 - end - end - end end diff --git a/lib/detect_language/version.rb b/lib/detect_language/version.rb index 1b7e14c..e6711fd 100644 --- a/lib/detect_language/version.rb +++ b/lib/detect_language/version.rb @@ -1,3 +1,3 @@ module DetectLanguage - VERSION = '1.1.2' + VERSION = '2.0.0' end diff --git a/spec/detect_language_spec.rb b/spec/detect_language_spec.rb deleted file mode 100644 index ee86c9b..0000000 --- a/spec/detect_language_spec.rb +++ /dev/null @@ -1,102 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe DetectLanguage do - let(:api_key) { ENV['DETECTLANGUAGE_API_KEY'] } - let(:secure) { true } - - before do - described_class.configuration.api_key = api_key - described_class.configuration.secure = secure - end - - describe '.configuration' do - subject { described_class.configuration } - - it 'has default configuration values' do - expect(subject.api_version).to eq('0.2') - expect(subject.host).to eq('ws.detectlanguage.com') - expect(subject.user_agent).to eq("detectlanguage-ruby/#{DetectLanguage::VERSION}") - end - end - - describe '.detect' do - subject { described_class.detect(query) } - - let(:query) { 'Hello world' } - - it 'detects language' do - expect(subject).to be_an(Array) - expect(subject.first).to be_a(Hash) - expect(subject.first['language']).to eq('en') - end - - context 'with unicode characters' do - let(:query) { 'Jau saulelė vėl atkopdama budino svietą' } - - it 'detects language with unicode characters' do - expect(subject.first['language']).to eq('lt') - end - end - - context 'with batch requests' do - let(:query) { ['', 'Hello world', 'Jau saulelė vėl atkopdama budino svietą'] } - - it 'detects languages in batch' do - expect(subject).to be_an(Array) - expect(subject.size).to eq(3) - expect(subject[0]).to be_empty - expect(subject[1][0]['language']).to eq('en') - expect(subject[2][0]['language']).to eq('lt') - end - end - - context 'invalid api key' do - let(:api_key) { 'invalid' } - - it "should raise exception for invalid key" do - expect { subject }.to raise_error(DetectLanguage::Error) - end - end - end - - describe '.simple_detect' do - subject { described_class.simple_detect(query) } - - let(:query) { 'Hello world' } - - it 'detects language' do - expect(subject).to eq('en') - end - end - - describe '.user_status' do - subject { DetectLanguage.user_status } - - it 'fetches user status' do - expect(subject).to include( - 'date' => kind_of(String), - 'requests' => kind_of(Integer), - 'bytes' => kind_of(Integer), - 'plan' => kind_of(String), - 'daily_requests_limit' => kind_of(Integer), - 'daily_bytes_limit' => kind_of(Integer), - ) - end - end - - describe '.languages' do - subject { DetectLanguage.languages } - - it 'fetches list of detectable languages' do - expect(subject).to include('code' => 'en', 'name' => 'ENGLISH') - end - - context 'with http' do - let(:secure) { false } - - it 'fetches languages over http' do - expect(subject).to include('code' => 'en', 'name' => 'ENGLISH') - end - end - end -end diff --git a/spec/lib/detect_language/configuratino_spec.rb b/spec/lib/detect_language/configuratino_spec.rb new file mode 100644 index 0000000..7c57fb6 --- /dev/null +++ b/spec/lib/detect_language/configuratino_spec.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +RSpec.describe DetectLanguage::Configuration do + describe '#api_key' do + it 'returns the api key' do + expect(subject.api_key).to be_nil + end + end + + describe '#base_url' do + it 'returns the base url' do + expect(subject.base_url).to eq('https://ws.detectlanguage.com/v3/') + end + end + + describe '#user_agent' do + it 'returns the user agent' do + expect(subject.user_agent).to eq("detectlanguage-ruby/#{DetectLanguage::VERSION}") + end + end +end diff --git a/spec/lib/detect_language_spec.rb b/spec/lib/detect_language_spec.rb new file mode 100644 index 0000000..f8bcd29 --- /dev/null +++ b/spec/lib/detect_language_spec.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +RSpec.describe DetectLanguage do + let(:api_key) { ENV['DETECTLANGUAGE_API_KEY'] } + let(:proxy) { nil } + + before do + described_class.client = nil + described_class.config.api_key = api_key + described_class.config.proxy = proxy + end + + describe '.config' do + subject { described_class.config } + + it { is_expected.to be_a(DetectLanguage::Configuration) } + end + + describe '.configuration' do + subject { described_class.configuration } + + it 'delegates to config' do + expect(described_class).to receive(:config) + expect { subject }.to output(/DEPRECATED/).to_stderr + end + end + + describe '.detect' do + subject { described_class.detect(query) } + + let(:query) { 'Hello world' } + + it 'detects language' do + expect(subject).to be_an(Array) + expect(subject.first).to be_a(Hash) + expect(subject.first['language']).to eq('en') + end + + context 'with unicode characters' do + let(:query) { 'Jau saulelė vėl atkopdama budino svietą' } + + it 'detects language with unicode characters' do + expect(subject.first['language']).to eq('lt') + end + end + + context 'with array of queries' do + let(:query) { ['Hello world', 'Bonjour le monde'] } + let(:result) { double } + + before do + allow(described_class).to receive(:detect_batch).with(query).and_return(result) + end + + it 'delegates to detect_batch' do + expect(subject).to be(result) + end + + it 'issues a deprecation warning' do + expect { described_class.detect(query) }.to output(/DEPRECATED/).to_stderr + end + end + + context 'invalid api key' do + let(:api_key) { 'invalid' } + + it "should raise exception for invalid key" do + expect { subject }.to raise_error(DetectLanguage::Error) + end + end + + context 'with proxy' do + let(:proxy) { 'http://dummy:pass@my-proxy:8080' } + + it 'uses the proxy for requests' do + allow(Net::HTTP).to receive(:new) + .with('ws.detectlanguage.com', 443, 'my-proxy', 8080, 'dummy', 'pass') + .and_call_original + + expect { subject }.to raise_error(SocketError) + end + end + end + + describe '.detect_code' do + subject { described_class.detect_code(query) } + + let(:query) { 'Hello world' } + + it 'detects language' do + expect(subject).to eq('en') + end + + context 'with empty query' do + let(:query) { ' ' } + + it 'returns nil for empty query' do + expect(subject).to be_nil + end + end + end + + describe '.detect_batch' do + subject { described_class.detect_batch(queries) } + + let(:queries) { ['', 'Hello world', 'Jau saulelė vėl atkopdama budino svietą'] } + + it 'detects languages in batch' do + expect(subject).to be_an(Array) + expect(subject.size).to eq(3) + expect(subject[0]).to be_empty + expect(subject[1][0]['language']).to eq('en') + expect(subject[2][0]['language']).to eq('lt') + end + + context 'when queries is not an array' do + let(:queries) { 'Hello world' } + + it 'raises an ArgumentError' do + expect { subject }.to raise_error(ArgumentError, "Expected an Array of queries") + end + end + end + + describe '.account_status' do + subject { described_class.account_status } + + it 'fetches account status' do + expect(subject).to include( + 'date' => kind_of(String), + 'requests' => kind_of(Integer), + 'bytes' => kind_of(Integer), + 'plan' => kind_of(String), + 'daily_requests_limit' => kind_of(Integer), + 'daily_bytes_limit' => kind_of(Integer), + ) + end + end + + describe '.user_status' do + subject { described_class.user_status } + + it 'delegates to account_status' do + expect(described_class).to receive(:account_status) + expect { subject }.to output(/DEPRECATED/).to_stderr + end + end + + describe '.languages' do + subject { DetectLanguage.languages } + + it 'fetches list of detectable languages' do + expect(subject).to include('code' => 'en', 'name' => 'English') + end + end +end