Skip to content

Commit ceb2994

Browse files
cleanup some imports
1 parent 01ebb16 commit ceb2994

File tree

1 file changed

+3
-22
lines changed

1 file changed

+3
-22
lines changed

formfyxer/lit_explorer.py

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
FieldType,
2626
unlock_pdf_in_place,
2727
is_tagged,
28+
get_original_text_with_fields,
2829
)
2930

3031
import math
@@ -112,7 +113,7 @@ def _truncate_to_token_limit(
112113
return text
113114
return encoding.decode(tokens[:max_tokens])
114115

115-
stop_words = {
116+
STOP_WORDS = {
116117
'a','about','above','after','again','against','all','am','an','and','any','are','aren','as','at',
117118
'be','because','been','before','being','below','between','both','but','by',
118119
'could','did','do','does','doing','down','during',
@@ -311,24 +312,7 @@ def reformat_field(text: str, max_length: int = 30, tools_token: Optional[str] =
311312
if word not in deduped_sentence:
312313
deduped_sentence.append(word)
313314
# Use a local hardcoded stop word list (exported from passive voice detection)
314-
local_stop_words = {
315-
'a','about','above','after','again','against','all','am','an','and','any','are','aren','as','at',
316-
'be','because','been','before','being','below','between','both','but','by',
317-
'could','did','do','does','doing','down','during',
318-
'each','few','for','from','further',
319-
'had','has','have','having','he','her','here','hers','herself','him','himself','his','how',
320-
'i','if','in','into','is','it','its','itself',
321-
'just',
322-
'me','more','most','my','myself',
323-
'no','nor','not',
324-
'of','off','on','once','only','or','other','our','ours','ourselves','out','over','own',
325-
'same','she','should','so','some','such',
326-
'than','that','the','their','theirs','them','themselves','then','there','these','they','this','those','through','to','too',
327-
'under','until','up','very',
328-
'was','we','were','what','when','where','which','while','who','whom','why','will','with','you','your','yours','yourself','yourselves'
329-
}
330-
331-
filtered_sentence = [w for w in deduped_sentence if w.lower() not in local_stop_words]
315+
filtered_sentence = [w for w in deduped_sentence if w.lower() not in STOP_WORDS]
332316
candidate_words = filtered_sentence or deduped_sentence
333317

334318
sanitized_words: List[str] = []
@@ -569,9 +553,6 @@ def rename_pdf_fields_with_context(
569553
return {}
570554

571555
try:
572-
# Import here to avoid circular imports
573-
from .pdf_wrangling import get_original_text_with_fields
574-
575556
# Get PDF text with field markers
576557
with tempfile.NamedTemporaryFile(mode='w+', suffix='.txt', delete=False) as temp_file:
577558
try:

0 commit comments

Comments
 (0)