Skip to content
This repository was archived by the owner on Aug 26, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ matrix:
env: TEST_SUITE=py.test
- python: "3.5"
env: TEST_SUITE=py.test
- python: "3.6"
env: TEST_SUITE=py.test
- python: "pypy"
env: TEST_SUITE=py.test
- python: "2.6"
Expand All @@ -18,6 +20,7 @@ matrix:
install:
- pip install pytest pycodestyle
- if [ $TRAVIS_PYTHON_VERSION != 2.6 -a $TRAVIS_PYTHON_VERSION != "pypy3" ]; then pip install hypothesis; fi;
- if [ $TRAVIS_PYTHON_VERSION = 3.3 ]; then pip install enum34; fi;
script:
- $TEST_SUITE
notifications:
Expand Down
23 changes: 21 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,30 @@
.. image:: https://travis-ci.org/seatgeek/fuzzywuzzy.svg?branch=master
:target: https://travis-ci.org/seatgeek/fuzzywuzzy
.. image:: https://travis-ci.org/ZhensongQian/fuzzywuzzy.svg?branch=master
:target: https://travis-ci.org/ZhensongQian/fuzzywuzzy

FuzzyWuzzy
==========

Fuzzy string matching like a boss. It uses `Levenshtein Distance <https://en.wikipedia.org/wiki/Levenshtein_distance>`_ to calculate the differences between sequences in a simple-to-use package.

Always Case Sensitive
=========
.. code:: bash
>>> fuzz.ratio("this is a test", "this is a test!")
97
>>> fuzz.ratio("this is a test", "this is a TEST!")
69
>>> fuzz.ratio("this is a test".lower(), "this is a TEST!".lower())
97

>>> fuzz.partial_ratio("this is a test", "this is a test!")
100
>>> fuzz.partial_ratio("this is a test", "this is a TEST!")
71
>>> fuzz.partial_ratio("this is a test".lower(), "this is a TEST!".lower())
100



Requirements
============

Expand Down
2 changes: 1 addition & 1 deletion fuzzywuzzy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def full_process(s, force_ascii=False):
# Keep only Letters and Numbers (see Unicode docs).
string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s)
# Force into lowercase.
string_out = StringProcessor.to_lower_case(string_out)
# string_out = StringProcessor.to_lower_case(string_out)
# Remove leading and trailing whitespaces.
string_out = StringProcessor.strip(string_out)
return string_out
Expand Down
22 changes: 15 additions & 7 deletions test_fuzzywuzzy.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ def setUp(self):
self.s4 = "new york mets vs atlanta braves"
self.s5 = "atlanta braves vs new york mets"
self.s6 = "new york mets - atlanta braves"
self.s7 = 'new york city mets - atlanta braves'
self.s7 = "new york city mets - atlanta braves"
self.s8 = "fuzzy wuzzy was a bear"
self.s9 = "fuzzy wuzzy was a BEAR"

self.cirque_strings = [
"cirque du soleil - zarkana - las vegas",
Expand All @@ -113,40 +115,46 @@ def tearDown(self):
def testEqual(self):
self.assertEqual(fuzz.ratio(self.s1, self.s1a), 100)

def testCaseInsensitive(self):
def testCaseSensitive(self):
self.assertNotEqual(fuzz.ratio(self.s1, self.s2), 100)
self.assertEqual(fuzz.ratio(utils.full_process(self.s1), utils.full_process(self.s2)), 100)
self.assertEqual(fuzz.ratio(utils.full_process(self.s1), utils.full_process(self.s2)), 69)

def testPartialRatio(self):
self.assertEqual(fuzz.partial_ratio(self.s1, self.s3), 100)

def testTokenSortRatio(self):
self.assertEqual(fuzz.token_sort_ratio(self.s1, self.s1a), 100)

def testTokenSortRatioCaseSensitive(self):
self.assertEqual(fuzz.token_sort_ratio(self.s8, self.s9), 77)

def testPartialTokenSortRatio(self):
self.assertEqual(fuzz.partial_token_sort_ratio(self.s1, self.s1a), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s4, self.s5), 100)

def testTokenSetRatio(self):
self.assertEqual(fuzz.token_set_ratio(self.s4, self.s5), 100)

def testTokenSetRatioCaseSensitive(self):
self.assertEqual(fuzz.token_set_ratio(self.s8, self.s9), 87)

def testPartialTokenSetRatio(self):
self.assertEqual(fuzz.partial_token_set_ratio(self.s4, self.s7), 100)

def testQuickRatioEqual(self):
self.assertEqual(fuzz.QRatio(self.s1, self.s1a), 100)

def testQuickRatioCaseInsensitive(self):
self.assertEqual(fuzz.QRatio(self.s1, self.s2), 100)
def testQuickRatioCaseSensitive(self):
self.assertEqual(fuzz.QRatio(self.s1, self.s2), 69)

def testQuickRatioNotEqual(self):
self.assertNotEqual(fuzz.QRatio(self.s1, self.s3), 100)

def testWRatioEqual(self):
self.assertEqual(fuzz.WRatio(self.s1, self.s1a), 100)

def testWRatioCaseInsensitive(self):
self.assertEqual(fuzz.WRatio(self.s1, self.s2), 100)
def testWRatioCaseSensitive(self):
self.assertEqual(fuzz.WRatio(self.s1, self.s2), 72)

def testWRatioPartialMatch(self):
# a partial match is scaled by .9
Expand Down