@@ -721,54 +721,109 @@ def extract_javascript(
721721 :param lineno: line number offset (for parsing embedded fragments)
722722 """
723723 from babel .messages .jslexer import Token , tokenize , unquote_string
724- funcname = message_lineno = None
725- messages = []
726- last_argument = None
727- translator_comments = []
728- concatenate_next = False
724+
729725 encoding = options .get ('encoding' , 'utf-8' )
730- last_token = None
731- call_stack = - 1
732726 dotted = any ('.' in kw for kw in keywords )
727+ last_token = None
728+ # Keep the stack of all function calls and its related contextual variables,
729+ # so we can handle nested gettext calls.
730+ function_stack : list [FunctionStackItem ] = []
731+ # Keep track of whether we're in a class or function definition
732+ in_def = False
733+ # Keep track of whether we're in a block of translator comments
734+ in_translator_comments = False
735+ # Keep track of the last encountered translator comments
736+ translator_comments = []
737+ # Keep track of the (split) strings encountered
738+ message_buffer = []
739+
733740 for token in tokenize (
734741 fileobj .read ().decode (encoding ),
735742 jsx = options .get ("jsx" , True ),
736743 template_string = options .get ("template_string" , True ),
737744 dotted = dotted ,
738745 lineno = lineno ,
739746 ):
740- if ( # Turn keyword`foo` expressions into keyword("foo") calls:
741- funcname and # have a keyword...
742- (last_token and last_token .type == 'name' ) and # we've seen nothing after the keyword...
743- token .type == 'template_string' # this is a template string
747+ if token .type == 'name' and token .value in ('class' , 'function' ):
748+ # We're entering a class or function definition
749+ in_def = True
750+
751+ elif in_def and token .type == 'operator' and token .value in ('(' , '{' ):
752+ # We're in a class or function definition and should not do anything
753+ in_def = False
754+ continue
755+
756+ elif (
757+ last_token
758+ and last_token .type == 'name'
759+ and last_token .value in keywords
760+ and token .type == 'template_string'
744761 ):
745- message_lineno = token .lineno
746- messages = [unquote_string (token .value )]
747- call_stack = 0
762+ # Turn keyword`foo` expressions into keyword("foo") function calls
763+ string_value = unquote_string (token .value )
764+ cur_translator_comments = translator_comments
765+ if function_stack and function_stack [- 1 ].function_lineno == last_token .lineno :
766+ # If our current function call is on the same line as the previous one,
767+ # copy their translator comments, since they also apply to us.
768+ cur_translator_comments = function_stack [- 1 ].translator_comments
769+
770+ # We add all information needed later for the current function call
771+ function_stack .append (FunctionStackItem (
772+ function_lineno = last_token .lineno ,
773+ function_name = last_token .value ,
774+ message_lineno = token .lineno ,
775+ messages = [string_value ],
776+ translator_comments = cur_translator_comments ,
777+ ))
778+ translator_comments = []
779+
780+ # We act as if we are closing the function call now
748781 token = Token ('operator' , ')' , token .lineno )
749782
750- if options .get ('parse_template_string' ) and not funcname and token .type == 'template_string' :
783+ if (
784+ options .get ('parse_template_string' )
785+ and (not last_token or last_token .type != 'name' or last_token .value not in keywords )
786+ and token .type == 'template_string'
787+ ):
751788 yield from parse_template_string (token .value , keywords , comment_tags , options , token .lineno )
752789
753790 elif token .type == 'operator' and token .value == '(' :
754- if funcname :
755- message_lineno = token .lineno
756- call_stack += 1
791+ if last_token .type == 'name' :
792+ # We're entering a function call
793+ cur_translator_comments = translator_comments
794+ if function_stack and function_stack [- 1 ].function_lineno == token .lineno :
795+ # If our current function call is on the same line as the previous one,
796+ # copy their translator comments, since they also apply to us.
797+ cur_translator_comments = function_stack [- 1 ].translator_comments
798+
799+ # We add all information needed later for the current function call
800+ function_stack .append (FunctionStackItem (
801+ function_lineno = token .lineno ,
802+ function_name = last_token .value ,
803+ message_lineno = None ,
804+ messages = [],
805+ translator_comments = cur_translator_comments ,
806+ ))
807+ translator_comments = []
757808
758- elif call_stack == - 1 and token .type == 'linecomment' :
809+ elif token .type == 'linecomment' :
810+ # Strip the comment token from the line
759811 value = token .value [2 :].strip ()
760- if translator_comments and \
761- translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
812+ if in_translator_comments and translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
813+ # We're already inside a translator comment, continue appending
762814 translator_comments .append ((token .lineno , value ))
763815 continue
764816
765817 for comment_tag in comment_tags :
766818 if value .startswith (comment_tag ):
767- translator_comments .append ((token .lineno , value .strip ()))
819+ # Comment starts with one of the comment tags,
820+ # so let's start capturing it
821+ in_translator_comments = True
822+ translator_comments .append ((token .lineno , value ))
768823 break
769824
770825 elif token .type == 'multilinecomment' :
771- # only one multi-line comment may precede a translation
826+ # Only one multi-line comment may precede a translation
772827 translator_comments = []
773828 value = token .value [2 :- 2 ].strip ()
774829 for comment_tag in comment_tags :
@@ -778,68 +833,67 @@ def extract_javascript(
778833 lines [0 ] = lines [0 ].strip ()
779834 lines [1 :] = dedent ('\n ' .join (lines [1 :])).splitlines ()
780835 for offset , line in enumerate (lines ):
781- translator_comments .append ((token .lineno + offset ,
782- line ))
836+ translator_comments .append ((token .lineno + offset , line ))
783837 break
784838
785- elif funcname and call_stack == 0 :
839+ elif function_stack and function_stack [- 1 ].function_name in keywords :
840+ # We're inside a translation function call
786841 if token .type == 'operator' and token .value == ')' :
787- if last_argument is not None :
788- messages .append (last_argument )
789- if len (messages ) > 1 :
790- messages = tuple (messages )
791- elif messages :
792- messages = messages [0 ]
842+ # The call has ended, so we yield the translatable term(s)
843+ messages = function_stack [- 1 ].messages
844+ lineno = (
845+ function_stack [- 1 ].message_lineno
846+ or function_stack [- 1 ].function_lineno
847+ )
848+ cur_translator_comments = function_stack [- 1 ].translator_comments
849+
850+ if message_buffer :
851+ messages .append ('' .join (message_buffer ))
852+ message_buffer .clear ()
793853 else :
794- messages = None
854+ messages . append ( None )
795855
796- # Comments don't apply unless they immediately precede the
797- # message
798- if translator_comments and \
799- translator_comments [- 1 ][0 ] < message_lineno - 1 :
800- translator_comments = []
856+ messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
857+ if (
858+ cur_translator_comments
859+ and cur_translator_comments [- 1 ][0 ] < lineno - 1
860+ ):
861+ # The translator comments are not immediately preceding the current
862+ # term, so we skip them.
863+ cur_translator_comments = []
801864
802- if messages is not None :
803- yield (message_lineno , funcname , messages ,
804- [comment [1 ] for comment in translator_comments ])
865+ yield (
866+ lineno ,
867+ function_stack [- 1 ].function_name ,
868+ messages ,
869+ [comment [1 ] for comment in cur_translator_comments ],
870+ )
805871
806- funcname = message_lineno = last_argument = None
807- concatenate_next = False
808- translator_comments = []
809- messages = []
810- call_stack = - 1
872+ function_stack .pop ()
811873
812874 elif token .type in ('string' , 'template_string' ):
813- new_value = unquote_string (token .value )
814- if concatenate_next :
815- last_argument = (last_argument or '' ) + new_value
816- concatenate_next = False
875+ # We've encountered a string inside a translation function call
876+ string_value = unquote_string (token .value )
877+ if not function_stack [- 1 ].message_lineno :
878+ function_stack [- 1 ].message_lineno = token .lineno
879+ if string_value is not None :
880+ message_buffer .append (string_value )
881+
882+ elif token .type == 'operator' and token .value == ',' :
883+ # End of a function call argument
884+ if message_buffer :
885+ function_stack [- 1 ].messages .append ('' .join (message_buffer ))
886+ message_buffer .clear ()
817887 else :
818- last_argument = new_value
819-
820- elif token .type == 'operator' :
821- if token .value == ',' :
822- if last_argument is not None :
823- messages .append (last_argument )
824- last_argument = None
825- else :
826- messages .append (None )
827- concatenate_next = False
828- elif token .value == '+' :
829- concatenate_next = True
830-
831- elif call_stack > 0 and token .type == 'operator' \
832- and token .value == ')' :
833- call_stack -= 1
834-
835- elif funcname and call_stack == - 1 :
836- funcname = None
837-
838- elif call_stack == - 1 and token .type == 'name' and \
839- token .value in keywords and \
840- (last_token is None or last_token .type != 'name' or
841- last_token .value != 'function' ):
842- funcname = token .value
888+ function_stack [- 1 ].messages .append (None )
889+
890+ elif function_stack and token .type == 'operator' and token .value == ')' :
891+ function_stack .pop ()
892+
893+ if in_translator_comments and translator_comments [- 1 ][0 ] < token .lineno :
894+ # We have a newline in between the comments, so they don't belong
895+ # together anymore
896+ in_translator_comments = False
843897
844898 last_token = token
845899
0 commit comments