Skip to content

Commit f44849c

Browse files
Merge pull request #639 from Crozzers/fix-em-issue637
Fix middle-word-em interfering with strongs (#637)
2 parents 6621827 + 1acbf8f commit f44849c

File tree

6 files changed

+33
-28
lines changed

6 files changed

+33
-28
lines changed

CHANGES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## python-markdown2 2.5.5 (not yet released)
44

5-
(nothing yet)
5+
- [pull #639] Fix middle-word-em interfering with strongs (#637)
66

77

88
## python-markdown2 2.5.4

lib/markdown2.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3309,42 +3309,42 @@ def __init__(self, md: Markdown, options: Union[dict, bool, None]):
33093309
options.setdefault('allowed', True)
33103310
super().__init__(md, options)
33113311

3312-
self.liberal_em_re = self.em_re
3313-
if not options['allowed']:
3314-
self.em_re = re.compile(r'(?<=\b)%s(?=\b)' % self.em_re.pattern, self.em_re.flags)
3315-
self.liberal_em_re = re.compile(
3316-
r'''
3317-
( # \1 - must be a single em char in the middle of a word
3318-
(?<![*_\s]) # cannot be preceeded by em character or whitespace (must be in middle of word)
3319-
[*_] # em character
3320-
(?![*_]) # cannot be followed by another em char
3321-
)
3322-
(?=\S) # em opening must be followed by non-whitespace text
3323-
(.*?\S) # the emphasized text
3324-
\1 # closing char
3325-
(?!\s|$) # must not be followed by whitespace (middle of word) or EOF
3326-
'''
3327-
, re.S | re.X)
3312+
self.middle_word_em_re = re.compile(
3313+
r'''
3314+
(?<!^) # To be middle of a word, it cannot be at the start of the input
3315+
(?<![*_\s]) # cannot be preceeded by em character or whitespace (must be in middle of word)
3316+
([*_]) # em char
3317+
(?=\S) # must be followed by non-whitespace char
3318+
(?![*_]|$|\W) # cannot be followed by another em char, EOF or a non-word char
3319+
''', re.X | re.M
3320+
)
3321+
3322+
# add a prefix to it so we don't interfere with escaped/hashed chars from other stages
3323+
self.hash_table['_'] = _hash_text(self.name + '_')
3324+
self.hash_table['*'] = _hash_text(self.name + '*')
33283325

33293326
def run(self, text):
33303327
if self.options['allowed']:
33313328
# if middle word em is allowed, do nothing. This extra's only use is to prevent them
33323329
return text
33333330

3334-
# run strong and whatnot first
3335-
# this also will process all strict ems
3336-
text = super().run(text)
3331+
# hash all em chars in the middle of words to prevent em_re from picking up on them
33373332
if self.md.order < self.md.stage:
3338-
# hash all non-valid ems
3339-
text = self.liberal_em_re.sub(self.sub_hash, text)
3333+
text = self.middle_word_em_re.sub(self.sub, text)
3334+
3335+
# put all the em chars back
3336+
if self.md.order > self.md.stage:
3337+
text = text.replace(self.hash_table['_'], '_')
3338+
text = text.replace(self.hash_table['*'], '*')
3339+
33403340
return text
33413341

3342-
def sub(self, match: re.Match) -> str:
3343-
syntax = match.group(1)
3344-
if len(syntax) != 1:
3345-
# strong syntax
3342+
def sub(self, match: re.Match):
3343+
if match.re != self.middle_word_em_re:
33463344
return super().sub(match)
3347-
return '<em>%s</em>' % match.group(2)
3345+
3346+
syntax = match.group(1)
3347+
return self.hash_table[syntax]
33483348

33493349

33503350
class Numbering(Extra):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<p>Visit <a href="https://github.com"><strong>GitHub</strong></a> for code repositories and
2+
<a href="https://stackoverflow.com"><strong>Stack Overflow</strong></a> for programming help.</p>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{'extras': {'middle-word-em': False}}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Visit [**GitHub**](https://github.com) for code repositories and
2+
[**Stack Overflow**](https://stackoverflow.com) for programming help.

test/tm-cases/middle_word_em_with_extra_ems.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@
1616

1717
<p><em>one*two*three</em></p>
1818

19-
<p><em>one<em>two</em>three</em></p>
19+
<p><em>one*two*three</em></p>

0 commit comments

Comments
 (0)