-
Notifications
You must be signed in to change notification settings - Fork 124
Open
Description
Seems to be working properly on a large dataset (used 40k sentence dataset) (Last time checked: Oct 2024)
This may not provide all the features offered by the original library.
- You may need to implement a feature to skip sentence after a few rejections as the api refuses to spell check certain sentences.
import requests
import json
import re
class SpellChecker:
def __init__(self):
self.passport_key = None
self.base_url = None
def fetch_passport_key(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
'Referer': 'https://search.naver.com/',
}
response = requests.get("https://search.naver.com/search.naver?query=%EB%A7%9E%EC%B6%A9%EB%B2%95%20%EA%B2%80%EC%82%AC%EA%B8%B0", headers=headers)
passport_key_match = re.search(r'(?<={new SpellingCheck\({API:{checker:").*?(?="},selector)', response.text)
if not passport_key_match:
return "Error: Unable to retrieve passport key"
self.base_url, self.passport_key = passport_key_match.group(0).split("?passportKey=")
def spell_check(self, text):
if self.passport_key is None or self.base_url is None:
self.fetch_passport_key()
payload = {
'passportKey': self.passport_key,
'where': 'nexearch',
'color_blindness': 0,
'q': text
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, Gecko) Chrome/129.0.0.0 Safari/537.36',
'Referer': 'https://search.naver.com/',
}
result_response = requests.get(self.base_url, headers=headers, params=payload)
return json.loads(result_response.text)['message']['result']['notag_html']
# Usage
checker = SpellChecker()
result = checker.spell_check("sentence for spell check")
print(result)yeong-hwan, cho104 and junyeong-nero
Metadata
Metadata
Assignees
Labels
No labels