Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8,061 changes: 8,061 additions & 0 deletions data/rules-ascii.txt

Large diffs are not rendered by default.

8,061 changes: 8,061 additions & 0 deletions data/rules-unicode.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions lib/public_suffix.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
require_relative "public_suffix/errors"
require_relative "public_suffix/rule"
require_relative "public_suffix/list"
require_relative "public_suffix/trie"

# PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
#
Expand Down
19 changes: 17 additions & 2 deletions lib/public_suffix/list.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def self.parse(input, private_domains: true)
# @yieldparam [PublicSuffix::List] self The newly created instance.
def initialize
@rules = {}
@trie = PublicSuffix::Trie.new
yield(self) if block_given?
end

Expand Down Expand Up @@ -137,7 +138,9 @@ def each(&block)
# @param rule [PublicSuffix::Rule::*] the rule to add to the list
# @return [self]
def add(rule)
@rules[rule.value] = rule_to_entry(rule)
entry = rule_to_entry(rule)
@rules[rule.value] = entry
@trie.insert(rule.value, type: entry.type, private: entry.private)
self
end
alias << add
Expand Down Expand Up @@ -170,14 +173,26 @@ def clear
# @param default [PublicSuffix::Rule::*] the default rule to return in case no rule matches
# @return [PublicSuffix::Rule::*]
def find(name, default: default_rule, **options)
if ENV["WHAT"] == "hash"
find_hash(name, default: default, **options)
else
find_trie(name, default: default, **options)
end
end

def find_hash(name, default: default_rule, **options)
rule = select(name, **options).inject do |l, r|
return r if r.class == Rule::Exception
l.length > r.length ? l : r
end
rule || default
end

# Selects all the rules matching given hostame.
def find_trie(name, default: default_rule, ignore_private: false)
@trie.longest_prefix(name, ignore_private: ignore_private) || default
end

# Selects all the rules matching given hostname.
#
# If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as
# private domain. Note that the rules will still be part of the loop.
Expand Down
97 changes: 97 additions & 0 deletions lib/public_suffix/trie.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
module PublicSuffix

# Implements a Trie data structure used to store the List.
class Trie

# @return [PublicSuffix::Node]
attr_reader :root

def initialize
@root = self.class::Node.new
end

def insert(word, type:, private:)
node = @root
word.split(DOT).reverse.each do |token|
node = node.put(token)
end
node.end!(type: type, private: private) && node
end

def longest_prefix(word, ignore_private: false)
node = @root

results = []
leaf = nil
excp = nil

word.split(DOT).reverse.each_with_index do |token, index|
break unless (child = node.get(token))
results << [child, token]
node = child

if node.end? && (ignore_private == false || node.private == false)
leaf = index + 1
excp = index + 1 if node.type == Rule::Exception
end
end

return nil if leaf.nil?

path = excp ? results[0, excp] : results[0, leaf]
node = path.last.first

tokens = []
(path.size - 1).downto(0).each do |index|
tokens << path[index].last
end
node.type.new(value: tokens.join(DOT), private: node.private)
end


# Node is a node of the Trie and contains references to all the children nodes.
#
# A node marked as "end" represents the final part of a rule. It contains the rule information
# such as the rule type and whether it belongs to PRIVATE.
class Node
attr_accessor :children
attr_accessor :type
attr_accessor :private

def initialize
@children = nil
end

def contains?(key)
return false if @children.nil?
!@children[index(key)].nil?
end

def put(key)
@children ||= {}
@children[index(key)] ||= self.class.new
end

def get(key)
return nil if @children.nil?
@children[index(key)]
end

def end?
[email protected]?
end

def end!(type:, private:)
@type = type
@private = private
end

private

def index(key)
key
end
end

end
end
3 changes: 2 additions & 1 deletion test/profilers/list_profsize.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@

prof = ObjectBinsize.new
prof.report(PublicSuffix::List.default, label: "PublicSuffix::List size")
prof.report(PublicSuffix::List.default.instance_variable_get(:@rules), label: "Size of rules")
prof.report(PublicSuffix::List.default.instance_variable_get(:@rules), label: "Size of @rules")
prof.report(PublicSuffix::List.default.instance_variable_get(:@trie), label: "Size of @trie")
15 changes: 15 additions & 0 deletions test/profilers/tries_profiler.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
$LOAD_PATH.unshift File.expand_path("../../lib", __dir__)

require "memory_profiler"
require "public_suffix"
require "public_suffix/trie"

list = PublicSuffix::List.default
puts "#{list.size} rules:"

report = MemoryProfiler.report do
@trie = PublicSuffix::Trie.new
list.instance_variable_get(:@rules).keys { |word| @trie.insert(word.split(".").reverse.join(".")) }
end

report.pretty_print
15 changes: 15 additions & 0 deletions test/profilers/tries_prosize.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
$LOAD_PATH.unshift File.expand_path("../../lib", __dir__)

require_relative "object_binsize"
require "public_suffix"
require "public_suffix/trie"

list = PublicSuffix::List.default
rules = list.instance_variable_get(:@rules)

@trie = PublicSuffix::Trie.new
rules.keys.each { |word| @trie.insert(word.split(".").reverse.join(".")) }

prof = ObjectBinsize.new
prof.report(rules, label: "@rules")
prof.report(@trie, label: "@trie")