3030 Mapping ,
3131 MutableSequence ,
3232)
33+ from dataclasses import dataclass
3334from functools import lru_cache
3435from os .path import relpath
3536from textwrap import dedent
@@ -99,6 +100,15 @@ def tell(self) -> int: ...
99100FSTRING_END = getattr (tokenize , "FSTRING_END" , None )
100101
101102
103+ @dataclass
104+ class FunctionStackItem :
105+ function_lineno : int
106+ function_name : str
107+ message_lineno : int | None
108+ messages : list [str | None ]
109+ translator_comments : list [tuple [int , str ]]
110+
111+
102112def _strip_comment_tags (comments : MutableSequence [str ], tags : Iterable [str ]):
103113 """Helper function for `extract` that strips comment tags from strings
104114 in a list of comment lines. This functions operates in-place.
@@ -507,14 +517,6 @@ def extract_python(
507517 :param options: a dictionary of additional options (optional)
508518 :rtype: ``iterator``
509519 """
510- funcname = lineno = message_lineno = None
511- call_stack = - 1
512- buf = []
513- messages = []
514- translator_comments = []
515- in_def = in_translator_comments = False
516- comment_tag = None
517-
518520 encoding = parse_encoding (fileobj ) or options .get ('encoding' , 'UTF-8' )
519521 future_flags = parse_future_flags (fileobj , encoding )
520522 next_line = lambda : fileobj .readline ().decode (encoding )
@@ -525,108 +527,148 @@ def extract_python(
525527 # currently parsing one.
526528 current_fstring_start = None
527529
530+ # Keep the stack of all function calls and its related contextual variables,
531+ # so we can handle nested gettext calls.
532+ function_stack : list [FunctionStackItem ] = []
533+ # Keep the last encountered function/variable name for when we encounter
534+ # an opening parenthesis
535+ last_name = None
536+ # Keep track of whether we're in a class or function definition
537+ in_def = False
538+ # Keep track of whether we're in a block of translator comments
539+ in_translator_comments = False
540+ # Keep track of the last encountered translator comments
541+ translator_comments = []
542+ # Keep track of the (split) strings encountered
543+ message_buffer = []
544+
528545 for tok , value , (lineno , _ ), _ , _ in tokens :
529- if call_stack == - 1 and tok == NAME and value in ('def' , 'class' ):
546+ if tok == NAME and value in ('def' , 'class' ):
547+ # We're entering a class or function definition
530548 in_def = True
531- elif tok == OP and value == '(' :
532- if in_def :
533- # Avoid false positives for declarations such as:
534- # def gettext(arg='message'):
535- in_def = False
536- continue
537- if funcname :
538- call_stack += 1
539- elif in_def and tok == OP and value == ':' :
540- # End of a class definition without parens
549+ continue
550+
551+ elif in_def and tok == OP and value in ('(' , ':' ):
552+ # We're in a class or function definition and should not do anything
541553 in_def = False
542554 continue
543- elif call_stack == - 1 and tok == COMMENT :
555+
556+ elif tok == OP and value == '(' and last_name :
557+ # We're entering a function call
558+ cur_translator_comments = translator_comments
559+ if function_stack and function_stack [- 1 ].function_lineno == lineno :
560+ # If our current function call is on the same line as the previous one,
561+ # copy their translator comments, since they also apply to us.
562+ cur_translator_comments = function_stack [- 1 ].translator_comments
563+
564+ # We add all information needed later for the current function call
565+ function_stack .append (FunctionStackItem (
566+ function_lineno = lineno ,
567+ function_name = last_name ,
568+ message_lineno = None ,
569+ messages = [],
570+ translator_comments = cur_translator_comments ,
571+ ))
572+ translator_comments = []
573+ message_buffer .clear ()
574+
575+ elif tok == COMMENT :
544576 # Strip the comment token from the line
545577 value = value [1 :].strip ()
546- if in_translator_comments and \
547- translator_comments [- 1 ][0 ] == lineno - 1 :
578+ if in_translator_comments and translator_comments [- 1 ][0 ] == lineno - 1 :
548579 # We're already inside a translator comment, continue appending
549580 translator_comments .append ((lineno , value ))
550581 continue
551- # If execution reaches this point, let's see if comment line
552- # starts with one of the comment tags
582+
553583 for comment_tag in comment_tags :
554584 if value .startswith (comment_tag ):
585+ # Comment starts with one of the comment tags,
586+ # so let's start capturing it
555587 in_translator_comments = True
556588 translator_comments .append ((lineno , value ))
557589 break
558- elif funcname and call_stack == 0 :
559- nested = (tok == NAME and value in keywords )
560- if (tok == OP and value == ')' ) or nested :
561- if buf :
562- messages .append ('' .join (buf ))
563- del buf [:]
590+
591+ elif function_stack and function_stack [- 1 ].function_name in keywords :
592+ # We're inside a translation function call
593+ if tok == OP and value == ')' :
594+ # The call has ended, so we yield the translatable term(s)
595+ messages = function_stack [- 1 ].messages
596+ lineno = (
597+ function_stack [- 1 ].message_lineno
598+ or function_stack [- 1 ].function_lineno
599+ )
600+ cur_translator_comments = function_stack [- 1 ].translator_comments
601+
602+ if message_buffer :
603+ messages .append ('' .join (message_buffer ))
604+ message_buffer .clear ()
564605 else :
565606 messages .append (None )
566607
567608 messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
568- # Comments don't apply unless they immediately
569- # precede the message
570- if translator_comments and \
571- translator_comments [- 1 ][0 ] < message_lineno - 1 :
572- translator_comments = []
609+ if (
610+ cur_translator_comments
611+ and cur_translator_comments [- 1 ][0 ] < lineno - 1
612+ ):
613+ # The translator comments are not immediately preceding the current
614+ # term, so we skip them.
615+ cur_translator_comments = []
616+
617+ yield (
618+ lineno ,
619+ function_stack [- 1 ].function_name ,
620+ messages ,
621+ [comment [1 ] for comment in cur_translator_comments ],
622+ )
623+
624+ function_stack .pop ()
573625
574- yield (message_lineno , funcname , messages ,
575- [comment [1 ] for comment in translator_comments ])
576-
577- funcname = lineno = message_lineno = None
578- call_stack = - 1
579- messages = []
580- translator_comments = []
581- in_translator_comments = False
582- if nested :
583- funcname = value
584626 elif tok == STRING :
585- val = _parse_python_string (value , encoding , future_flags )
586- if val is not None :
587- if not message_lineno :
588- message_lineno = lineno
589- buf .append (val )
627+ # We've encountered a string inside a translation function call
628+ string_value = _parse_python_string (value , encoding , future_flags )
629+ if not function_stack [- 1 ].message_lineno :
630+ function_stack [- 1 ].message_lineno = lineno
631+ if string_value is not None :
632+ message_buffer .append (string_value )
590633
591634 # Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
592635 elif tok == FSTRING_START :
593636 current_fstring_start = value
594- if not message_lineno :
595- message_lineno = lineno
596637 elif tok == FSTRING_MIDDLE :
597638 if current_fstring_start is not None :
598639 current_fstring_start += value
599640 elif tok == FSTRING_END :
600641 if current_fstring_start is not None :
601642 fstring = current_fstring_start + value
602- val = _parse_python_string (fstring , encoding , future_flags )
603- if val is not None :
604- buf .append (val )
643+ string_value = _parse_python_string (fstring , encoding , future_flags )
644+ if string_value is not None :
645+ message_buffer .append (string_value )
605646
606647 elif tok == OP and value == ',' :
607- if buf :
608- messages .append ('' .join (buf ))
609- del buf [:]
648+ # End of a function call argument
649+ if message_buffer :
650+ function_stack [- 1 ].messages .append ('' .join (message_buffer ))
651+ message_buffer .clear ()
610652 else :
611- messages .append (None )
612- if translator_comments :
613- # We have translator comments, and since we're on a
614- # comma(,) user is allowed to break into a new line
615- # Let's increase the last comment's lineno in order
616- # for the comment to still be a valid one
617- old_lineno , old_comment = translator_comments .pop ()
618- translator_comments .append ((old_lineno + 1 , old_comment ))
619-
620- elif tok != NL and not message_lineno :
621- message_lineno = lineno
622- elif call_stack > 0 and tok == OP and value == ')' :
623- call_stack -= 1
624- elif funcname and call_stack == - 1 :
625- funcname = None
626- elif tok == NAME and value in keywords :
627- funcname = value
653+ function_stack [- 1 ].messages .append (None )
628654
629- if current_fstring_start is not None and tok not in {FSTRING_START , FSTRING_MIDDLE }:
655+ elif function_stack and tok == OP and value == ')' :
656+ function_stack .pop ()
657+
658+ if in_translator_comments and translator_comments [- 1 ][0 ] < lineno :
659+ # We have a newline in between the comments, so they don't belong
660+ # together anymore
661+ in_translator_comments = False
662+
663+ if tok == NAME :
664+ last_name = value
665+ if function_stack and not function_stack [- 1 ].message_lineno :
666+ function_stack [- 1 ].message_lineno = lineno
667+
668+ if (
669+ current_fstring_start is not None
670+ and tok not in {FSTRING_START , FSTRING_MIDDLE }
671+ ):
630672 # In Python 3.12, tokens other than FSTRING_* mean the
631673 # f-string is dynamic, so we don't wan't to extract it.
632674 # And if it's FSTRING_END, we've already handled it above.
0 commit comments