@@ -502,14 +502,6 @@ def extract_python(
502502 :param options: a dictionary of additional options (optional)
503503 :rtype: ``iterator``
504504 """
505- funcname = lineno = message_lineno = None
506- call_stack = - 1
507- buf = []
508- messages = []
509- translator_comments = []
510- in_def = in_translator_comments = False
511- comment_tag = None
512-
513505 encoding = parse_encoding (fileobj ) or options .get ('encoding' , 'UTF-8' )
514506 future_flags = parse_future_flags (fileobj , encoding )
515507 next_line = lambda : fileobj .readline ().decode (encoding )
@@ -520,103 +512,147 @@ def extract_python(
520512 # currently parsing one.
521513 current_fstring_start = None
522514
523- for tok , value , (lineno , _ ), _ , _ in tokens :
524- if call_stack == - 1 and tok == NAME and value in ('def' , 'class' ):
515+ # Keep the stack of all function calls and its related contextual variables,
516+ # so we can handle nested gettext calls.
517+ function_stack = []
518+ # Keep the last encountered function/variable name for when we encounter
519+ # an opening parenthesis
520+ last_name = None
521+ # Keep track of whether we're in a class or function definition
522+ in_def = False
523+ # Keep track of whether we're in a block of translator comments
524+ in_translator_comments = False
525+ # Keep track of the last encountered translator comments
526+ translator_comments = []
527+ # Keep track of the (split) strings encountered
528+ message_buffer = []
529+
530+ for token , value , (line_no , _ ), _ , _ in tokens :
531+ if token == NAME and value in ('def' , 'class' ):
532+ # We're entering a class or function definition
525533 in_def = True
526- elif tok == OP and value == '(' :
527- if in_def :
528- # Avoid false positives for declarations such as:
529- # def gettext(arg='message'):
530- in_def = False
531- continue
532- if funcname :
533- message_lineno = lineno
534- call_stack += 1
535- elif in_def and tok == OP and value == ':' :
536- # End of a class definition without parens
534+ continue
535+
536+ elif in_def and token == OP and value in ('(' , ':' ):
537+ # We're in a class or function definition and should not do anything
537538 in_def = False
538539 continue
539- elif call_stack == - 1 and tok == COMMENT :
540+
541+ elif token == OP and value == '(' and last_name :
542+ # We're entering a function call
543+ cur_translator_comments = translator_comments
544+ if function_stack and function_stack [- 1 ]['function_line_no' ] == line_no :
545+ # If our current function call is on the same line as the previous one,
546+ # copy their translator comments, since they also apply to us.
547+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
548+
549+ # We add all information needed later for the current function call
550+ function_stack .append ({
551+ 'function_line_no' : line_no ,
552+ 'function_name' : last_name ,
553+ 'message_line_no' : None ,
554+ 'messages' : [],
555+ 'translator_comments' : cur_translator_comments ,
556+ })
557+ translator_comments = []
558+ message_buffer .clear ()
559+
560+ elif token == COMMENT :
540561 # Strip the comment token from the line
541562 value = value [1 :].strip ()
542- if in_translator_comments and \
543- translator_comments [- 1 ][0 ] == lineno - 1 :
563+ if in_translator_comments and translator_comments [- 1 ][0 ] == line_no - 1 :
544564 # We're already inside a translator comment, continue appending
545- translator_comments .append ((lineno , value ))
565+ translator_comments .append ((line_no , value ))
546566 continue
547- # If execution reaches this point, let's see if comment line
548- # starts with one of the comment tags
567+
549568 for comment_tag in comment_tags :
550569 if value .startswith (comment_tag ):
570+ # Comment starts with one of the comment tags,
571+ # so let's start capturing it
551572 in_translator_comments = True
552- translator_comments .append ((lineno , value ))
573+ translator_comments .append ((line_no , value ))
553574 break
554- elif funcname and call_stack == 0 :
555- nested = (tok == NAME and value in keywords )
556- if (tok == OP and value == ')' ) or nested :
557- if buf :
558- messages .append ('' .join (buf ))
559- del buf [:]
575+
576+ elif function_stack and function_stack [- 1 ]['function_name' ] in keywords :
577+ # We're inside a translation function call
578+ if token == OP and value == ')' :
579+ # The call has ended, so we yield the translatable term(s)
580+ messages = function_stack [- 1 ]['messages' ]
581+ line_no = (
582+ function_stack [- 1 ]['message_line_no' ]
583+ or function_stack [- 1 ]['function_line_no' ]
584+ )
585+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
586+
587+ if message_buffer :
588+ messages .append ('' .join (message_buffer ))
589+ message_buffer .clear ()
560590 else :
561591 messages .append (None )
562592
563593 messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
564- # Comments don't apply unless they immediately
565- # precede the message
566- if translator_comments and \
567- translator_comments [- 1 ][0 ] < message_lineno - 1 :
568- translator_comments = []
569-
570- yield (message_lineno , funcname , messages ,
571- [comment [1 ] for comment in translator_comments ])
572-
573- funcname = lineno = message_lineno = None
574- call_stack = - 1
575- messages = []
576- translator_comments = []
577- in_translator_comments = False
578- if nested :
579- funcname = value
580- elif tok == STRING :
581- val = _parse_python_string (value , encoding , future_flags )
582- if val is not None :
583- buf .append (val )
594+ if (
595+ cur_translator_comments
596+ and cur_translator_comments [- 1 ][0 ] < line_no - 1
597+ ):
598+ # The translator comments are not immediately preceding the current
599+ # term, so we skip them.
600+ cur_translator_comments = []
601+
602+ yield (
603+ line_no ,
604+ function_stack [- 1 ]['function_name' ],
605+ messages ,
606+ [comment [1 ] for comment in cur_translator_comments ],
607+ )
608+
609+ function_stack .pop ()
610+
611+ elif token == STRING :
612+ # We've encountered a string inside a translation function call
613+ string_value = _parse_python_string (value , encoding , future_flags )
614+ if not function_stack [- 1 ]['message_line_no' ]:
615+ function_stack [- 1 ]['message_line_no' ] = line_no
616+ if string_value is not None :
617+ message_buffer .append (string_value )
584618
585619 # Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
586- elif tok == FSTRING_START :
620+ elif token == FSTRING_START :
587621 current_fstring_start = value
588- elif tok == FSTRING_MIDDLE :
622+ elif token == FSTRING_MIDDLE :
589623 if current_fstring_start is not None :
590624 current_fstring_start += value
591- elif tok == FSTRING_END :
625+ elif token == FSTRING_END :
592626 if current_fstring_start is not None :
593627 fstring = current_fstring_start + value
594- val = _parse_python_string (fstring , encoding , future_flags )
595- if val is not None :
596- buf .append (val )
597-
598- elif tok == OP and value == ',' :
599- if buf :
600- messages .append ('' .join (buf ))
601- del buf [:]
628+ string_value = _parse_python_string (fstring , encoding , future_flags )
629+ if string_value is not None :
630+ message_buffer .append (string_value )
631+
632+ elif token == OP and value == ',' :
633+ # End of a function call argument
634+ if message_buffer :
635+ function_stack [- 1 ]['messages' ].append ('' .join (message_buffer ))
636+ message_buffer .clear ()
602637 else :
603- messages .append (None )
604- if translator_comments :
605- # We have translator comments, and since we're on a
606- # comma(,) user is allowed to break into a new line
607- # Let's increase the last comment's lineno in order
608- # for the comment to still be a valid one
609- old_lineno , old_comment = translator_comments . pop ()
610- translator_comments . append (( old_lineno + 1 , old_comment ))
611- elif call_stack > 0 and tok == OP and value == ')' :
612- call_stack -= 1
613- elif funcname and call_stack == - 1 :
614- funcname = None
615- elif tok == NAME and value in keywords :
616- funcname = value
638+ function_stack [ - 1 ][ ' messages' ] .append (None )
639+
640+ elif function_stack and token == OP and value == ')' :
641+ function_stack . pop ()
642+
643+ if in_translator_comments and translator_comments [ - 1 ][ 0 ] < line_no :
644+ # We have a newline in between the comments, so they don't belong
645+ # together anymore
646+ in_translator_comments = False
647+
648+ if token == NAME :
649+ last_name = value
650+ if function_stack and not function_stack [ - 1 ][ 'message_line_no' ] :
651+ function_stack [ - 1 ][ 'message_line_no' ] = line_no
617652
618- if (current_fstring_start is not None
619- and tok not in {FSTRING_START , FSTRING_MIDDLE }
653+ if (
654+ current_fstring_start is not None
655+ and token not in {FSTRING_START , FSTRING_MIDDLE }
620656 ):
621657 # In Python 3.12, tokens other than FSTRING_* mean the
622658 # f-string is dynamic, so we don't wan't to extract it.
0 commit comments