| 
 | 1 | +# A prototype for converting JuliaSyntax data structures into CSTParser.EXPR.  | 
 | 2 | + | 
 | 3 | +using CSTParser  | 
 | 4 | + | 
 | 5 | +using JuliaSyntax  | 
 | 6 | +using JuliaSyntax: GreenNode, SyntaxHead, SourceFile, TaggedRange,  | 
 | 7 | +    @K_str, @KSet_cmd,  | 
 | 8 | +    haschildren, is_syntax_kind, is_keyword, is_operator, is_identifier, head, kind, span,  | 
 | 9 | +    is_infix, is_trivia, untokenize, TzTokens, children  | 
 | 10 | + | 
 | 11 | +# See CSTParser.tokenkindtoheadmap  | 
 | 12 | +function tokenkindtoheadmap(k::TzTokens.Kind)  | 
 | 13 | +    k === TzTokens.COMMA      ? :COMMA   :  | 
 | 14 | +    k === TzTokens.LPAREN     ? :LPAREN  :  | 
 | 15 | +    k === TzTokens.RPAREN     ? :RPAREN  :  | 
 | 16 | +    k === TzTokens.LSQUARE    ? :LSQUARE :  | 
 | 17 | +    k === TzTokens.RSQUARE    ? :RSQUARE :  | 
 | 18 | +    k === TzTokens.LBRACE     ? :LBRACE  :  | 
 | 19 | +    k === TzTokens.RBRACE     ? :RBRACE  :  | 
 | 20 | +    k === TzTokens.AT_SIGN    ? :ATSIGN  :  | 
 | 21 | +    k === TzTokens.DOT        ? :DOT     :  | 
 | 22 | +    k === TzTokens.ABSTRACT   ? :ABSTRACT :  | 
 | 23 | +    k === TzTokens.BAREMODULE ? :BAREMODULE :  | 
 | 24 | +    k === TzTokens.BEGIN      ? :BEGIN :  | 
 | 25 | +    k === TzTokens.BREAK      ? :BREAK :  | 
 | 26 | +    k === TzTokens.CATCH      ? :CATCH :  | 
 | 27 | +    k === TzTokens.CONST      ? :CONST :  | 
 | 28 | +    k === TzTokens.CONTINUE   ? :CONTINUE :  | 
 | 29 | +    k === TzTokens.DO         ? :DO :  | 
 | 30 | +    k === TzTokens.ELSE       ? :ELSE :  | 
 | 31 | +    k === TzTokens.ELSEIF     ? :ELSEIF :  | 
 | 32 | +    k === TzTokens.END        ? :END :  | 
 | 33 | +    k === TzTokens.EXPORT     ? :EXPORT :  | 
 | 34 | +    k === TzTokens.FINALLY    ? :FINALLY :  | 
 | 35 | +    k === TzTokens.FOR        ? :FOR :  | 
 | 36 | +    k === TzTokens.FUNCTION   ? :FUNCTION :  | 
 | 37 | +    k === TzTokens.GLOBAL     ? :GLOBAL :  | 
 | 38 | +    k === TzTokens.IF         ? :IF :  | 
 | 39 | +    k === TzTokens.IMPORT     ? :IMPORT :  | 
 | 40 | +    k === TzTokens.LET        ? :LET :  | 
 | 41 | +    k === TzTokens.LOCAL      ? :LOCAL :  | 
 | 42 | +    k === TzTokens.MACRO      ? :MACRO :  | 
 | 43 | +    k === TzTokens.MODULE     ? :MODULE :  | 
 | 44 | +    k === TzTokens.MUTABLE    ? :MUTABLE :  | 
 | 45 | +    k === TzTokens.OUTER      ? :OUTER :  | 
 | 46 | +    k === TzTokens.PRIMITIVE  ? :PRIMITIVE :  | 
 | 47 | +    k === TzTokens.QUOTE      ? :QUOTE :  | 
 | 48 | +    k === TzTokens.RETURN     ? :RETURN :  | 
 | 49 | +    k === TzTokens.STRUCT     ? :STRUCT :  | 
 | 50 | +    k === TzTokens.TRY        ? :TRY :  | 
 | 51 | +    k === TzTokens.TYPE       ? :TYPE :  | 
 | 52 | +    k === TzTokens.USING      ? :USING :  | 
 | 53 | +    k === TzTokens.WHILE      ? :WHILE :  | 
 | 54 | +    k === TzTokens.INTEGER    ? :INTEGER :  | 
 | 55 | +    k === TzTokens.BIN_INT    ? :BININT :  | 
 | 56 | +    k === TzTokens.HEX_INT    ? :HEXINT :  | 
 | 57 | +    k === TzTokens.OCT_INT    ? :OCTINT :  | 
 | 58 | +    k === TzTokens.FLOAT      ? :FLOAT :  | 
 | 59 | +    k === TzTokens.STRING     ? :STRING :  | 
 | 60 | +    # k === TzTokens.TRIPLE_STRING ? :TRIPLESTRING :  | 
 | 61 | +    k === TzTokens.CHAR       ? :CHAR :  | 
 | 62 | +    k === TzTokens.CMD        ? :CMD :  | 
 | 63 | +    # k === TzTokens.TRIPLE_CMD ? :TRIPLECMD :  | 
 | 64 | +    k === TzTokens.TRUE       ? :TRUE :  | 
 | 65 | +    k === TzTokens.FALSE      ? :FALSE :  | 
 | 66 | +    k === TzTokens.ENDMARKER  ? :errortoken :  | 
 | 67 | +        error("Unknown token $k")  | 
 | 68 | +end  | 
 | 69 | + | 
 | 70 | +# Things which are "trailing trivia" according to CSTParser  | 
 | 71 | +#  | 
 | 72 | +# "Trailing trivia" is trivia which will be attached to the end of a node.  | 
 | 73 | +is_cst_trailing_trivia(x) = kind(x) in KSet`Whitespace NewlineWs Comment ;`  | 
 | 74 | + | 
 | 75 | +# Convert GreenNode into CSTParser.EXPR  | 
 | 76 | +function cst(source::SourceFile, raw_node::GreenNode{SyntaxHead}, position::Integer=1)  | 
 | 77 | +    node_start = position  | 
 | 78 | +    cs = children(raw_node)  | 
 | 79 | +    i = 1  | 
 | 80 | +    args   = CSTParser.EXPR[]  | 
 | 81 | +    trivia = CSTParser.EXPR[]  | 
 | 82 | +    last_trivia_span = 0  | 
 | 83 | +    while i <= length(cs)  | 
 | 84 | +        raw = cs[i]  | 
 | 85 | +        if haschildren(raw)  | 
 | 86 | +            c = cst(source, raw, position)  | 
 | 87 | +            push!(args, c)  | 
 | 88 | +            last_trivia_span = c.fullspan - c.span  | 
 | 89 | +            position += span(raw)  | 
 | 90 | +        else  | 
 | 91 | +            start_pos = position  | 
 | 92 | +            token_start = i  | 
 | 93 | +            inner_span = span(raw)  | 
 | 94 | +            position += span(raw)  | 
 | 95 | +            # Here we append any trailing trivia tokens to the node.  | 
 | 96 | +            while i < length(cs) && is_cst_trailing_trivia(cs[i+1])  | 
 | 97 | +                position += span(cs[i+1])  | 
 | 98 | +                i += 1  | 
 | 99 | +            end  | 
 | 100 | +            full_span = position - start_pos  | 
 | 101 | +            last_trivia_span = full_span - inner_span  | 
 | 102 | + | 
 | 103 | +            # Leaf node  | 
 | 104 | +            k = kind(raw)  | 
 | 105 | +            val_range = start_pos:(start_pos + inner_span - 1)  | 
 | 106 | +            val = source[val_range]  | 
 | 107 | + | 
 | 108 | +            if kind(raw) == K"nothing"  | 
 | 109 | +                # First `nothing` token in file seems to require this. Why I don't know.  | 
 | 110 | +                inner_span = full_span  | 
 | 111 | +            end  | 
 | 112 | + | 
 | 113 | +            # See CSTParser.literalmap. Which we can't use directly because we've  | 
 | 114 | +            # customized Tokenize.jl :-(  | 
 | 115 | +            cst_head = k === TzTokens.NOTHING    ? :NOTHING :  | 
 | 116 | +                       # FIXME: Following probably need special handling  | 
 | 117 | +                       k === TzTokens.MACRO_NAME ? :IDENTIFIER :  | 
 | 118 | +                       k === TzTokens.CMD_MACRO_NAME ? :IDENTIFIER :  | 
 | 119 | +                       k === TzTokens.STRING_MACRO_NAME ? :IDENTIFIER :  | 
 | 120 | +                       k === TzTokens.DQUOTE     ? :DQUOTE :  | 
 | 121 | +                       k === TzTokens.BACKTICK   ? :BACKTICK :  | 
 | 122 | +                       is_operator(k)            ? :OPERATOR :  | 
 | 123 | +                       is_identifier(k)          ? :IDENTIFIER :  | 
 | 124 | +                       tokenkindtoheadmap(k)  | 
 | 125 | +            # FIXME: STRING, TRIPLE_STRING, CMD, TRIPLE_CMD, need special handling:  | 
 | 126 | +            #  * STRING        doesn't incude delimiters (DQUOTE tokens)  | 
 | 127 | +            #  * CMD           doesn't include delimiters (BACKTICK tokens)  | 
 | 128 | +            #  * TRIPLE_STRING is a composite of STRING and TRIPLE_DQUOTE  | 
 | 129 | +            #  * TRIPLE_CMD    is a composite of CMD and TRIPLE_BACKTICK  | 
 | 130 | +            # They don't exist anymore as individual tokens  | 
 | 131 | + | 
 | 132 | +            push!(is_trivia(raw) ? trivia : args,  | 
 | 133 | +                  CSTParser.EXPR(cst_head, nothing, nothing, full_span, inner_span, val,  | 
 | 134 | +                                 nothing, nothing))  | 
 | 135 | +        end  | 
 | 136 | +        i += 1  | 
 | 137 | +    end  | 
 | 138 | + | 
 | 139 | +    if is_infix(raw_node)  | 
 | 140 | +        args[1], args[2] = args[2], args[1]  | 
 | 141 | +        # TODO: Other argument swizzling, as done in SyntaxNode -> Expr conversions  | 
 | 142 | +    end  | 
 | 143 | + | 
 | 144 | +    full_span = position - node_start  | 
 | 145 | +    inner_span = full_span - last_trivia_span  | 
 | 146 | +    k = kind(raw_node)  | 
 | 147 | +    cst_head = k == K"toplevel" ? :file :  | 
 | 148 | +               is_operator(k)   ? popfirst!(trivia) :  | 
 | 149 | +               Symbol(lowercase(string(kind(raw_node))))  | 
 | 150 | +    x = CSTParser.EXPR(cst_head, args,  | 
 | 151 | +                       isempty(trivia) ? nothing : trivia,  | 
 | 152 | +                       full_span, inner_span, nothing, nothing, nothing)  | 
 | 153 | +    for a in args  | 
 | 154 | +        a.parent = x  | 
 | 155 | +    end  | 
 | 156 | +    for a in trivia  | 
 | 157 | +        a.parent = x  | 
 | 158 | +    end  | 
 | 159 | +    return x  | 
 | 160 | +end  | 
 | 161 | + | 
 | 162 | + | 
 | 163 | +# Some steps of conversion to CSTParser.EXPR is most conveniently done on the  | 
 | 164 | +# raw ParseStream representation. In particular, CSTParser.EXPR attaches  | 
 | 165 | +# some types of trivia to the end of nontrivia or trivia tokens.  | 
 | 166 | +#  | 
 | 167 | +# This function reassociates trivia with nonterminal nodes to make converting  | 
 | 168 | +# to CSTParser.EXPR a *local* operation on green tree nodes.  | 
 | 169 | +function parse_for_cst(text)  | 
 | 170 | +    stream = JuliaSyntax.ParseStream(text)  | 
 | 171 | + | 
 | 172 | +    # Insert initial nothing node if necessary to anchor trailing whitespace.  | 
 | 173 | +    if is_cst_trailing_trivia(peek(stream, skip_whitespace=false))  | 
 | 174 | +        JuliaSyntax.bump_invisible(stream, K"nothing")  | 
 | 175 | +    end  | 
 | 176 | +    JuliaSyntax.parse(stream, rule=:toplevel)  | 
 | 177 | + | 
 | 178 | +    # Fix up start of stream  | 
 | 179 | +    ranges = stream.ranges  | 
 | 180 | +    @assert kind(ranges[end]) == K"toplevel"  | 
 | 181 | +    ranges[end] = let r = ranges[end]  | 
 | 182 | +        TaggedRange(r.head, 1, r.last_token)  | 
 | 183 | +    end  | 
 | 184 | + | 
 | 185 | +    # Rearrange whitespace trivia tokens so that they're always *trailing*  | 
 | 186 | +    # siblings of non-whitespace trivia tokens.  | 
 | 187 | +    #  | 
 | 188 | +    # This is required for later conversion to CSTParser.EXPR  | 
 | 189 | +    tokens = stream.tokens  | 
 | 190 | +    for (i,range) in enumerate(ranges)  | 
 | 191 | +        first_token = range.first_token  | 
 | 192 | +        while first_token < length(tokens) &&  | 
 | 193 | +                is_cst_trailing_trivia(tokens[first_token])  | 
 | 194 | +            first_token += 1  | 
 | 195 | +        end  | 
 | 196 | +        last_token = range.last_token  | 
 | 197 | +        while last_token < length(tokens) &&   | 
 | 198 | +                is_cst_trailing_trivia(tokens[last_token+1])  | 
 | 199 | +            last_token += 1  | 
 | 200 | +        end  | 
 | 201 | +        ranges[i] = TaggedRange(head(range), first_token, last_token)  | 
 | 202 | +    end  | 
 | 203 | + | 
 | 204 | +    return JuliaSyntax.build_tree(JuliaSyntax.GreenNode, stream)  | 
 | 205 | +end  | 
 | 206 | + | 
 | 207 | +# CSTParser.EXPR equality; should be in CSTParser...  | 
 | 208 | +function Base.:(==)(x::CSTParser.EXPR, y::CSTParser.EXPR)  | 
 | 209 | +    # Debugging hacks:  | 
 | 210 | +    if x.head != y.head  | 
 | 211 | +        @info "Trivia mismatch" x.head y.head  | 
 | 212 | +    end  | 
 | 213 | +    if x.trivia != y.trivia  | 
 | 214 | +        @info "Trivia mismatch" x.trivia y.trivia  | 
 | 215 | +    end  | 
 | 216 | +    if x.fullspan != y.fullspan  | 
 | 217 | +        @info "Fullspan mismatch" x y x.fullspan y.fullspan  | 
 | 218 | +    end  | 
 | 219 | +    if x.span != y.span  | 
 | 220 | +        @info "Span mismatch" x y x.span y.span  | 
 | 221 | +    end  | 
 | 222 | +    if x.val != y.val  | 
 | 223 | +        @info "Trivia mismatch" x.val y.val  | 
 | 224 | +    end  | 
 | 225 | + | 
 | 226 | +    return x.head == y.head &&  | 
 | 227 | +           x.args == y.args &&  | 
 | 228 | +           x.trivia == y.trivia &&  | 
 | 229 | +           x.fullspan == y.fullspan &&  | 
 | 230 | +           x.span == y.span &&  | 
 | 231 | +           x.val == y.val &&  | 
 | 232 | +           x.meta == y.meta  | 
 | 233 | +end  | 
 | 234 | + | 
 | 235 | +# Some things which work  | 
 | 236 | +#text = " 1 + 2 * 3 "  | 
 | 237 | +#text = "[ 1 ; 2 ;]"  | 
 | 238 | +#text = "for i=1:10\nx\ny\nend"  | 
 | 239 | +#text = "100.00"  | 
 | 240 | +text = """  | 
 | 241 | +function f(x,y)  | 
 | 242 | +    s = 0  | 
 | 243 | +    for i = 1:10  | 
 | 244 | +        s += x - i^y  | 
 | 245 | +    end  | 
 | 246 | +end  | 
 | 247 | +"""  | 
 | 248 | + | 
 | 249 | +# Some things which don't yet work  | 
 | 250 | +#  | 
 | 251 | +# Macro names  | 
 | 252 | +# text = "@A.asdf x y"  | 
 | 253 | +#  | 
 | 254 | +# Bracket nodes don't exist yet in JuliaSyntax  | 
 | 255 | +# text = "(a + b)"  | 
 | 256 | +#  | 
 | 257 | +# Strings have separate delimiters. Will need to put them back together.  | 
 | 258 | +# text = "\"str\""  | 
 | 259 | + | 
 | 260 | +source = SourceFile(text)  | 
 | 261 | + | 
 | 262 | +ex = parse_for_cst(text)  | 
 | 263 | +# show(stdout, MIME"text/plain"(), ex, text)  | 
 | 264 | + | 
 | 265 | +y = CSTParser.parse(text, true)  | 
 | 266 | +x = cst(source, ex)  | 
 | 267 | +x == y  | 
 | 268 | + | 
0 commit comments