@@ -706,54 +706,109 @@ def extract_javascript(
706706 :param lineno: line number offset (for parsing embedded fragments)
707707 """
708708 from babel .messages .jslexer import Token , tokenize , unquote_string
709- funcname = message_lineno = None
710- messages = []
711- last_argument = None
712- translator_comments = []
713- concatenate_next = False
709+
714710 encoding = options .get ('encoding' , 'utf-8' )
715- last_token = None
716- call_stack = - 1
717711 dotted = any ('.' in kw for kw in keywords )
712+ last_token = None
713+ # Keep the stack of all function calls and its related contextual variables,
714+ # so we can handle nested gettext calls.
715+ function_stack = []
716+ # Keep track of whether we're in a class or function definition
717+ in_def = False
718+ # Keep track of whether we're in a block of translator comments
719+ in_translator_comments = False
720+ # Keep track of the last encountered translator comments
721+ translator_comments = []
722+ # Keep track of the (split) strings encountered
723+ message_buffer = []
724+
718725 for token in tokenize (
719726 fileobj .read ().decode (encoding ),
720- jsx = options .get (" jsx" , True ),
721- template_string = options .get (" template_string" , True ),
727+ jsx = options .get (' jsx' , True ),
728+ template_string = options .get (' template_string' , True ),
722729 dotted = dotted ,
723730 lineno = lineno ,
724731 ):
725- if ( # Turn keyword`foo` expressions into keyword("foo") calls:
726- funcname and # have a keyword...
727- (last_token and last_token .type == 'name' ) and # we've seen nothing after the keyword...
728- token .type == 'template_string' # this is a template string
732+ if token .type == 'name' and token .value in ('class' , 'function' ):
733+ # We're entering a class or function definition
734+ in_def = True
735+
736+ elif in_def and token .type == 'operator' and token .value in ('(' , '{' ):
737+ # We're in a class or function definition and should not do anything
738+ in_def = False
739+ continue
740+
741+ elif (
742+ last_token
743+ and last_token .type == 'name'
744+ and last_token .value in keywords
745+ and token .type == 'template_string'
729746 ):
730- message_lineno = token .lineno
731- messages = [unquote_string (token .value )]
732- call_stack = 0
747+ # Turn keyword`foo` expressions into keyword("foo") function calls
748+ string_value = unquote_string (token .value )
749+ cur_translator_comments = translator_comments
750+ if function_stack and function_stack [- 1 ]['function_line_no' ] == last_token .lineno :
751+ # If our current function call is on the same line as the previous one,
752+ # copy their translator comments, since they also apply to us.
753+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
754+
755+ # We add all information needed later for the current function call
756+ function_stack .append ({
757+ 'function_line_no' : last_token .lineno ,
758+ 'function_name' : last_token .value ,
759+ 'message_line_no' : token .lineno ,
760+ 'messages' : [string_value ],
761+ 'translator_comments' : cur_translator_comments ,
762+ })
763+ translator_comments = []
764+
765+ # We act as if we are closing the function call now
733766 token = Token ('operator' , ')' , token .lineno )
734767
735- if options .get ('parse_template_string' ) and not funcname and token .type == 'template_string' :
768+ if (
769+ options .get ('parse_template_string' )
770+ and (not last_token or last_token .type != 'name' or last_token .value not in keywords )
771+ and token .type == 'template_string'
772+ ):
736773 yield from parse_template_string (token .value , keywords , comment_tags , options , token .lineno )
737774
738775 elif token .type == 'operator' and token .value == '(' :
739- if funcname :
740- message_lineno = token .lineno
741- call_stack += 1
776+ if last_token .type == 'name' :
777+ # We're entering a function call
778+ cur_translator_comments = translator_comments
779+ if function_stack and function_stack [- 1 ]['function_line_no' ] == token .lineno :
780+ # If our current function call is on the same line as the previous one,
781+ # copy their translator comments, since they also apply to us.
782+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
783+
784+ # We add all information needed later for the current function call
785+ function_stack .append ({
786+ 'function_line_no' : token .lineno ,
787+ 'function_name' : last_token .value ,
788+ 'message_line_no' : None ,
789+ 'messages' : [],
790+ 'translator_comments' : cur_translator_comments ,
791+ })
792+ translator_comments = []
742793
743- elif call_stack == - 1 and token .type == 'linecomment' :
794+ elif token .type == 'linecomment' :
795+ # Strip the comment token from the line
744796 value = token .value [2 :].strip ()
745- if translator_comments and \
746- translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
797+ if in_translator_comments and translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
798+ # We're already inside a translator comment, continue appending
747799 translator_comments .append ((token .lineno , value ))
748800 continue
749801
750802 for comment_tag in comment_tags :
751803 if value .startswith (comment_tag ):
752- translator_comments .append ((token .lineno , value .strip ()))
804+ # Comment starts with one of the comment tags,
805+ # so let's start capturing it
806+ in_translator_comments = True
807+ translator_comments .append ((token .lineno , value ))
753808 break
754809
755810 elif token .type == 'multilinecomment' :
756- # only one multi-line comment may precede a translation
811+ # Only one multi-line comment may precede a translation
757812 translator_comments = []
758813 value = token .value [2 :- 2 ].strip ()
759814 for comment_tag in comment_tags :
@@ -763,68 +818,67 @@ def extract_javascript(
763818 lines [0 ] = lines [0 ].strip ()
764819 lines [1 :] = dedent ('\n ' .join (lines [1 :])).splitlines ()
765820 for offset , line in enumerate (lines ):
766- translator_comments .append ((token .lineno + offset ,
767- line ))
821+ translator_comments .append ((token .lineno + offset , line ))
768822 break
769823
770- elif funcname and call_stack == 0 :
824+ elif function_stack and function_stack [- 1 ]['function_name' ] in keywords :
825+ # We're inside a translation function call
771826 if token .type == 'operator' and token .value == ')' :
772- if last_argument is not None :
773- messages .append (last_argument )
774- if len (messages ) > 1 :
775- messages = tuple (messages )
776- elif messages :
777- messages = messages [0 ]
827+ # The call has ended, so we yield the translatable term(s)
828+ messages = function_stack [- 1 ]['messages' ]
829+ line_no = (
830+ function_stack [- 1 ]['message_line_no' ]
831+ or function_stack [- 1 ]['function_line_no' ]
832+ )
833+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
834+
835+ if message_buffer :
836+ messages .append ('' .join (message_buffer ))
837+ message_buffer .clear ()
778838 else :
779- messages = None
839+ messages . append ( None )
780840
781- # Comments don't apply unless they immediately precede the
782- # message
783- if translator_comments and \
784- translator_comments [- 1 ][0 ] < message_lineno - 1 :
785- translator_comments = []
841+ messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
842+ if (
843+ cur_translator_comments
844+ and cur_translator_comments [- 1 ][0 ] < line_no - 1
845+ ):
846+ # The translator comments are not immediately preceding the current
847+ # term, so we skip them.
848+ cur_translator_comments = []
786849
787- if messages is not None :
788- yield (message_lineno , funcname , messages ,
789- [comment [1 ] for comment in translator_comments ])
850+ yield (
851+ line_no ,
852+ function_stack [- 1 ]['function_name' ],
853+ messages ,
854+ [comment [1 ] for comment in cur_translator_comments ],
855+ )
790856
791- funcname = message_lineno = last_argument = None
792- concatenate_next = False
793- translator_comments = []
794- messages = []
795- call_stack = - 1
857+ function_stack .pop ()
796858
797859 elif token .type in ('string' , 'template_string' ):
798- new_value = unquote_string (token .value )
799- if concatenate_next :
800- last_argument = (last_argument or '' ) + new_value
801- concatenate_next = False
860+ # We've encountered a string inside a translation function call
861+ string_value = unquote_string (token .value )
862+ if not function_stack [- 1 ]['message_line_no' ]:
863+ function_stack [- 1 ]['message_line_no' ] = token .lineno
864+ if string_value is not None :
865+ message_buffer .append (string_value )
866+
867+ elif token .type == 'operator' and token .value == ',' :
868+ # End of a function call argument
869+ if message_buffer :
870+ function_stack [- 1 ]['messages' ].append ('' .join (message_buffer ))
871+ message_buffer .clear ()
802872 else :
803- last_argument = new_value
804-
805- elif token .type == 'operator' :
806- if token .value == ',' :
807- if last_argument is not None :
808- messages .append (last_argument )
809- last_argument = None
810- else :
811- messages .append (None )
812- concatenate_next = False
813- elif token .value == '+' :
814- concatenate_next = True
815-
816- elif call_stack > 0 and token .type == 'operator' \
817- and token .value == ')' :
818- call_stack -= 1
819-
820- elif funcname and call_stack == - 1 :
821- funcname = None
822-
823- elif call_stack == - 1 and token .type == 'name' and \
824- token .value in keywords and \
825- (last_token is None or last_token .type != 'name' or
826- last_token .value != 'function' ):
827- funcname = token .value
873+ function_stack [- 1 ]['messages' ].append (None )
874+
875+ elif function_stack and token .type == 'operator' and token .value == ')' :
876+ function_stack .pop ()
877+
878+ if in_translator_comments and translator_comments [- 1 ][0 ] < token .lineno :
879+ # We have a newline in between the comments, so they don't belong
880+ # together anymore
881+ in_translator_comments = False
828882
829883 last_token = token
830884
0 commit comments