@@ -704,54 +704,109 @@ def extract_javascript(
704704 :param lineno: line number offset (for parsing embedded fragments)
705705 """
706706 from babel .messages .jslexer import Token , tokenize , unquote_string
707- funcname = message_lineno = None
708- messages = []
709- last_argument = None
710- translator_comments = []
711- concatenate_next = False
707+
712708 encoding = options .get ('encoding' , 'utf-8' )
713- last_token = None
714- call_stack = - 1
715709 dotted = any ('.' in kw for kw in keywords )
710+ last_token = None
711+ # Keep the stack of all function calls and its related contextual variables,
712+ # so we can handle nested gettext calls.
713+ function_stack = []
714+ # Keep track of whether we're in a class or function definition
715+ in_def = False
716+ # Keep track of whether we're in a block of translator comments
717+ in_translator_comments = False
718+ # Keep track of the last encountered translator comments
719+ translator_comments = []
720+ # Keep track of the (split) strings encountered
721+ message_buffer = []
722+
716723 for token in tokenize (
717724 fileobj .read ().decode (encoding ),
718- jsx = options .get (" jsx" , True ),
719- template_string = options .get (" template_string" , True ),
725+ jsx = options .get (' jsx' , True ),
726+ template_string = options .get (' template_string' , True ),
720727 dotted = dotted ,
721728 lineno = lineno ,
722729 ):
723- if ( # Turn keyword`foo` expressions into keyword("foo") calls:
724- funcname and # have a keyword...
725- (last_token and last_token .type == 'name' ) and # we've seen nothing after the keyword...
726- token .type == 'template_string' # this is a template string
730+ if token .type == 'name' and token .value in ('class' , 'function' ):
731+ # We're entering a class or function definition
732+ in_def = True
733+
734+ elif in_def and token .type == 'operator' and token .value in ('(' , '{' ):
735+ # We're in a class or function definition and should not do anything
736+ in_def = False
737+ continue
738+
739+ elif (
740+ last_token
741+ and last_token .type == 'name'
742+ and last_token .value in keywords
743+ and token .type == 'template_string'
727744 ):
728- message_lineno = token .lineno
729- messages = [unquote_string (token .value )]
730- call_stack = 0
745+ # Turn keyword`foo` expressions into keyword("foo") function calls
746+ string_value = unquote_string (token .value )
747+ cur_translator_comments = translator_comments
748+ if function_stack and function_stack [- 1 ]['function_line_no' ] == last_token .lineno :
749+ # If our current function call is on the same line as the previous one,
750+ # copy their translator comments, since they also apply to us.
751+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
752+
753+ # We add all information needed later for the current function call
754+ function_stack .append ({
755+ 'function_line_no' : last_token .lineno ,
756+ 'function_name' : last_token .value ,
757+ 'message_line_no' : token .lineno ,
758+ 'messages' : [string_value ],
759+ 'translator_comments' : cur_translator_comments ,
760+ })
761+ translator_comments = []
762+
763+ # We act as if we are closing the function call now
731764 token = Token ('operator' , ')' , token .lineno )
732765
733- if options .get ('parse_template_string' ) and not funcname and token .type == 'template_string' :
766+ if (
767+ options .get ('parse_template_string' )
768+ and (not last_token or last_token .type != 'name' or last_token .value not in keywords )
769+ and token .type == 'template_string'
770+ ):
734771 yield from parse_template_string (token .value , keywords , comment_tags , options , token .lineno )
735772
736773 elif token .type == 'operator' and token .value == '(' :
737- if funcname :
738- message_lineno = token .lineno
739- call_stack += 1
774+ if last_token .type == 'name' :
775+ # We're entering a function call
776+ cur_translator_comments = translator_comments
777+ if function_stack and function_stack [- 1 ]['function_line_no' ] == token .lineno :
778+ # If our current function call is on the same line as the previous one,
779+ # copy their translator comments, since they also apply to us.
780+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
781+
782+ # We add all information needed later for the current function call
783+ function_stack .append ({
784+ 'function_line_no' : token .lineno ,
785+ 'function_name' : last_token .value ,
786+ 'message_line_no' : None ,
787+ 'messages' : [],
788+ 'translator_comments' : cur_translator_comments ,
789+ })
790+ translator_comments = []
740791
741- elif call_stack == - 1 and token .type == 'linecomment' :
792+ elif token .type == 'linecomment' :
793+ # Strip the comment token from the line
742794 value = token .value [2 :].strip ()
743- if translator_comments and \
744- translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
795+ if in_translator_comments and translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
796+ # We're already inside a translator comment, continue appending
745797 translator_comments .append ((token .lineno , value ))
746798 continue
747799
748800 for comment_tag in comment_tags :
749801 if value .startswith (comment_tag ):
750- translator_comments .append ((token .lineno , value .strip ()))
802+ # Comment starts with one of the comment tags,
803+ # so let's start capturing it
804+ in_translator_comments = True
805+ translator_comments .append ((token .lineno , value ))
751806 break
752807
753808 elif token .type == 'multilinecomment' :
754- # only one multi-line comment may precede a translation
809+ # Only one multi-line comment may precede a translation
755810 translator_comments = []
756811 value = token .value [2 :- 2 ].strip ()
757812 for comment_tag in comment_tags :
@@ -761,68 +816,67 @@ def extract_javascript(
761816 lines [0 ] = lines [0 ].strip ()
762817 lines [1 :] = dedent ('\n ' .join (lines [1 :])).splitlines ()
763818 for offset , line in enumerate (lines ):
764- translator_comments .append ((token .lineno + offset ,
765- line ))
819+ translator_comments .append ((token .lineno + offset , line ))
766820 break
767821
768- elif funcname and call_stack == 0 :
822+ elif function_stack and function_stack [- 1 ]['function_name' ] in keywords :
823+ # We're inside a translation function call
769824 if token .type == 'operator' and token .value == ')' :
770- if last_argument is not None :
771- messages .append (last_argument )
772- if len (messages ) > 1 :
773- messages = tuple (messages )
774- elif messages :
775- messages = messages [0 ]
825+ # The call has ended, so we yield the translatable term(s)
826+ messages = function_stack [- 1 ]['messages' ]
827+ line_no = (
828+ function_stack [- 1 ]['message_line_no' ]
829+ or function_stack [- 1 ]['function_line_no' ]
830+ )
831+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
832+
833+ if message_buffer :
834+ messages .append ('' .join (message_buffer ))
835+ message_buffer .clear ()
776836 else :
777- messages = None
837+ messages . append ( None )
778838
779- # Comments don't apply unless they immediately precede the
780- # message
781- if translator_comments and \
782- translator_comments [- 1 ][0 ] < message_lineno - 1 :
783- translator_comments = []
839+ messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
840+ if (
841+ cur_translator_comments
842+ and cur_translator_comments [- 1 ][0 ] < line_no - 1
843+ ):
844+ # The translator comments are not immediately preceding the current
845+ # term, so we skip them.
846+ cur_translator_comments = []
784847
785- if messages is not None :
786- yield (message_lineno , funcname , messages ,
787- [comment [1 ] for comment in translator_comments ])
848+ yield (
849+ line_no ,
850+ function_stack [- 1 ]['function_name' ],
851+ messages ,
852+ [comment [1 ] for comment in cur_translator_comments ],
853+ )
788854
789- funcname = message_lineno = last_argument = None
790- concatenate_next = False
791- translator_comments = []
792- messages = []
793- call_stack = - 1
855+ function_stack .pop ()
794856
795857 elif token .type in ('string' , 'template_string' ):
796- new_value = unquote_string (token .value )
797- if concatenate_next :
798- last_argument = (last_argument or '' ) + new_value
799- concatenate_next = False
858+ # We've encountered a string inside a translation function call
859+ string_value = unquote_string (token .value )
860+ if not function_stack [- 1 ]['message_line_no' ]:
861+ function_stack [- 1 ]['message_line_no' ] = token .lineno
862+ if string_value is not None :
863+ message_buffer .append (string_value )
864+
865+ elif token .type == 'operator' and token .value == ',' :
866+ # End of a function call argument
867+ if message_buffer :
868+ function_stack [- 1 ]['messages' ].append ('' .join (message_buffer ))
869+ message_buffer .clear ()
800870 else :
801- last_argument = new_value
802-
803- elif token .type == 'operator' :
804- if token .value == ',' :
805- if last_argument is not None :
806- messages .append (last_argument )
807- last_argument = None
808- else :
809- messages .append (None )
810- concatenate_next = False
811- elif token .value == '+' :
812- concatenate_next = True
813-
814- elif call_stack > 0 and token .type == 'operator' \
815- and token .value == ')' :
816- call_stack -= 1
817-
818- elif funcname and call_stack == - 1 :
819- funcname = None
820-
821- elif call_stack == - 1 and token .type == 'name' and \
822- token .value in keywords and \
823- (last_token is None or last_token .type != 'name' or
824- last_token .value != 'function' ):
825- funcname = token .value
871+ function_stack [- 1 ]['messages' ].append (None )
872+
873+ elif function_stack and token .type == 'operator' and token .value == ')' :
874+ function_stack .pop ()
875+
876+ if in_translator_comments and translator_comments [- 1 ][0 ] < token .lineno :
877+ # We have a newline in between the comments, so they don't belong
878+ # together anymore
879+ in_translator_comments = False
826880
827881 last_token = token
828882
0 commit comments