@@ -502,14 +502,6 @@ def extract_python(
502502 :param options: a dictionary of additional options (optional)
503503 :rtype: ``iterator``
504504 """
505- funcname = lineno = message_lineno = None
506- call_stack = - 1
507- buf = []
508- messages = []
509- translator_comments = []
510- in_def = in_translator_comments = False
511- comment_tag = None
512-
513505 encoding = parse_encoding (fileobj ) or options .get ('encoding' , 'UTF-8' )
514506 future_flags = parse_future_flags (fileobj , encoding )
515507 next_line = lambda : fileobj .readline ().decode (encoding )
@@ -520,103 +512,145 @@ def extract_python(
520512 # currently parsing one.
521513 current_fstring_start = None
522514
523- for tok , value , (lineno , _ ), _ , _ in tokens :
524- if call_stack == - 1 and tok == NAME and value in ('def' , 'class' ):
515+ # Keep the stack of all function calls and its related contextual variables,
516+ # so we can handle nested gettext calls.
517+ function_stack = []
518+ # Keep the last encountered function name for when we encounter
519+ # an opening parenthesis
520+ last_function_name = None
521+ # Keep track of whether we're in a class or function definition
522+ in_def = False
523+ # Keep track of whether we're in a block of translator comments
524+ in_translator_comments = False
525+ # Keep track of the last encountered translator comments
526+ translator_comments = []
527+ # Keep track of the (split) strings encountered
528+ message_buffer = []
529+
530+ for token , value , (line_no , _ ), _ , _ in tokens :
531+ if not function_stack and token == NAME and value in ('def' , 'class' ):
532+ # We're entering a class or function definition
525533 in_def = True
526- elif tok == OP and value == '(' :
527- if in_def :
528- # Avoid false positives for declarations such as:
529- # def gettext(arg='message'):
530- in_def = False
531- continue
532- if funcname :
533- message_lineno = lineno
534- call_stack += 1
535- elif in_def and tok == OP and value == ':' :
536- # End of a class definition without parens
534+
535+ elif in_def and token == OP and value in ('(' , ':' ):
536+ # We're in a class or function definition and should not do anything
537537 in_def = False
538538 continue
539- elif call_stack == - 1 and tok == COMMENT :
539+
540+ elif token == OP and value == '(' and last_function_name :
541+ # We're entering a function call
542+ cur_translator_comments = translator_comments
543+ if function_stack and function_stack [- 1 ]['function_line_no' ] == line_no :
544+ # If our current function call is on the same line as the previous one,
545+ # copy their translator comments, since they also apply to us.
546+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
547+
548+ # We add all information needed later for the current function call
549+ function_stack .append ({
550+ 'function_line_no' : line_no ,
551+ 'function_name' : last_function_name ,
552+ 'message_line_no' : None ,
553+ 'messages' : [],
554+ 'translator_comments' : cur_translator_comments ,
555+ })
556+ translator_comments = []
557+
558+ elif token == COMMENT :
540559 # Strip the comment token from the line
541560 value = value [1 :].strip ()
542- if in_translator_comments and \
543- translator_comments [- 1 ][0 ] == lineno - 1 :
561+ if in_translator_comments and translator_comments [- 1 ][0 ] == line_no - 1 :
544562 # We're already inside a translator comment, continue appending
545- translator_comments .append ((lineno , value ))
563+ translator_comments .append ((line_no , value ))
546564 continue
547- # If execution reaches this point, let's see if comment line
548- # starts with one of the comment tags
565+
549566 for comment_tag in comment_tags :
550567 if value .startswith (comment_tag ):
568+ # Comment starts with one of the comment tags,
569+ # so let's start capturing it
551570 in_translator_comments = True
552- translator_comments .append ((lineno , value ))
571+ translator_comments .append ((line_no , value ))
553572 break
554- elif funcname and call_stack == 0 :
555- nested = (tok == NAME and value in keywords )
556- if (tok == OP and value == ')' ) or nested :
557- if buf :
558- messages .append ('' .join (buf ))
559- del buf [:]
573+
574+ elif function_stack and function_stack [- 1 ]['function_name' ] in keywords :
575+ # We're inside a translation function call
576+ if token == OP and value == ')' :
577+ # The call has ended, so we yield the translatable term(s)
578+ messages = function_stack [- 1 ]['messages' ]
579+ line_no = (
580+ function_stack [- 1 ]['message_line_no' ]
581+ or function_stack [- 1 ]['function_line_no' ]
582+ )
583+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
584+
585+ if message_buffer :
586+ messages .append ('' .join (message_buffer ))
587+ message_buffer .clear ()
560588 else :
561589 messages .append (None )
562590
563591 messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
564- # Comments don't apply unless they immediately
565- # precede the message
566- if translator_comments and \
567- translator_comments [- 1 ][0 ] < message_lineno - 1 :
568- translator_comments = []
569-
570- yield (message_lineno , funcname , messages ,
571- [comment [1 ] for comment in translator_comments ])
572-
573- funcname = lineno = message_lineno = None
574- call_stack = - 1
575- messages = []
576- translator_comments = []
577- in_translator_comments = False
578- if nested :
579- funcname = value
580- elif tok == STRING :
581- val = _parse_python_string (value , encoding , future_flags )
582- if val is not None :
583- buf .append (val )
592+ if (
593+ cur_translator_comments
594+ and cur_translator_comments [- 1 ][0 ] < line_no - 1
595+ ):
596+ # The translator comments are not immediately preceding the current
597+ # term, so we skip them.
598+ cur_translator_comments = []
599+
600+ yield (
601+ line_no ,
602+ function_stack [- 1 ]['function_name' ],
603+ messages ,
604+ [comment [1 ] for comment in cur_translator_comments ],
605+ )
606+
607+ function_stack .pop ()
608+
609+ elif token == STRING :
610+ # We've encountered a string inside a translation function call
611+ string_value = _parse_python_string (value , encoding , future_flags )
612+ if not function_stack [- 1 ]['message_line_no' ]:
613+ function_stack [- 1 ]['message_line_no' ] = line_no
614+ if string_value is not None :
615+ message_buffer .append (string_value )
584616
585617 # Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
586- elif tok == FSTRING_START :
618+ elif token == FSTRING_START :
587619 current_fstring_start = value
588- elif tok == FSTRING_MIDDLE :
620+ elif token == FSTRING_MIDDLE :
589621 if current_fstring_start is not None :
590622 current_fstring_start += value
591- elif tok == FSTRING_END :
623+ elif token == FSTRING_END :
592624 if current_fstring_start is not None :
593625 fstring = current_fstring_start + value
594- val = _parse_python_string (fstring , encoding , future_flags )
595- if val is not None :
596- buf .append (val )
597-
598- elif tok == OP and value == ',' :
599- if buf :
600- messages .append ('' .join (buf ))
601- del buf [:]
626+ string_value = _parse_python_string (fstring , encoding , future_flags )
627+ if string_value is not None :
628+ message_buffer .append (string_value )
629+
630+ elif token == OP and value == ',' :
631+ # End of a function call argument
632+ if message_buffer :
633+ function_stack [- 1 ]['messages' ].append ('' .join (message_buffer ))
634+ message_buffer .clear ()
602635 else :
603- messages .append (None )
604- if translator_comments :
605- # We have translator comments, and since we're on a
606- # comma(,) user is allowed to break into a new line
607- # Let's increase the last comment's lineno in order
608- # for the comment to still be a valid one
609- old_lineno , old_comment = translator_comments . pop ()
610- translator_comments . append (( old_lineno + 1 , old_comment ))
611- elif call_stack > 0 and tok == OP and value == ')' :
612- call_stack -= 1
613- elif funcname and call_stack == - 1 :
614- funcname = None
615- elif tok == NAME and value in keywords :
616- funcname = value
636+ function_stack [ - 1 ][ ' messages' ] .append (None )
637+
638+ elif function_stack and token == OP and value == ')' :
639+ function_stack . pop ()
640+
641+ if in_translator_comments and translator_comments [ - 1 ][ 0 ] < line_no :
642+ # We have a newline in between the comments, so they don't belong
643+ # together anymore
644+ in_translator_comments = False
645+
646+ if token == NAME :
647+ last_function_name = value
648+ if function_stack and not function_stack [ - 1 ][ 'message_line_no' ] :
649+ function_stack [ - 1 ][ 'message_line_no' ] = line_no
617650
618- if (current_fstring_start is not None
619- and tok not in {FSTRING_START , FSTRING_MIDDLE }
651+ if (
652+ current_fstring_start is not None
653+ and token not in {FSTRING_START , FSTRING_MIDDLE }
620654 ):
621655 # In Python 3.12, tokens other than FSTRING_* mean the
622656 # f-string is dynamic, so we don't wan't to extract it.
0 commit comments