Skip to content

Commit a6bc954

Browse files
authored
[Parser] Parse folded instructions that contain parentheses (#6196)
To parse folded instructions in the right order, we need to defer parsing each instruction until we have parsed each of its children and found its closing parenthesis. Previously we naively looked for parentheses to determine where instructions began and ended before we parsed them, but that scheme did not correctly handle instructions that can contain parentheses in their immediates, such as call_indirect. Fix the problem by using the actual instruction parser functions with a placeholder context to find the end of the instructions, including any kind of immediates they might have.
1 parent 260fdfc commit a6bc954

File tree

3 files changed

+310
-155
lines changed

3 files changed

+310
-155
lines changed

src/parser/contexts.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,14 @@ struct NullInstrParserCtx {
517517
Result<> makeStringSliceIter(Index) { return Ok{}; }
518518
};
519519

520+
struct NullCtx : NullTypeParserCtx, NullInstrParserCtx {
521+
ParseInput in;
522+
NullCtx(const ParseInput& in) : in(in) {}
523+
Result<> makeTypeUse(Index, std::optional<HeapTypeT>, ParamsT*, ResultsT*) {
524+
return Ok{};
525+
}
526+
};
527+
520528
// Phase 1: Parse definition spans for top-level module elements and determine
521529
// their indices and names.
522530
struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx {

src/parser/parsers.h

Lines changed: 43 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#define parser_parsers_h
1919

2020
#include "common.h"
21+
#include "contexts.h"
2122
#include "input.h"
2223

2324
namespace wasm::WATParser {
@@ -706,62 +707,66 @@ template<typename Ctx> MaybeResult<> instr(Ctx& ctx) {
706707
}
707708

708709
template<typename Ctx> MaybeResult<> foldedinstr(Ctx& ctx) {
709-
// Check for valid strings that are not instructions.
710-
if (ctx.in.peekSExprStart("then"sv) || ctx.in.peekSExprStart("else")) {
710+
// We must have an '(' to start a folded instruction.
711+
if (auto tok = ctx.in.peek(); !tok || !tok->isLParen()) {
711712
return {};
712713
}
713-
if (auto inst = foldedBlockinstr(ctx)) {
714-
return inst;
715-
}
716-
if (!ctx.in.takeLParen()) {
714+
715+
// Check for valid strings that look like folded instructions but are not.
716+
if (ctx.in.peekSExprStart("then"sv) || ctx.in.peekSExprStart("else")) {
717717
return {};
718718
}
719719

720720
// A stack of (start, end) position pairs defining the positions of
721721
// instructions that need to be parsed after their folded children.
722722
std::vector<std::pair<Index, std::optional<Index>>> foldedInstrs;
723723

724-
// Begin a folded instruction. Push its start position and a placeholder
725-
// end position.
726-
foldedInstrs.push_back({ctx.in.getPos(), {}});
727-
while (!foldedInstrs.empty()) {
728-
// Consume everything up to the next paren. This span will be parsed as
729-
// an instruction later after its folded children have been parsed.
730-
if (!ctx.in.takeUntilParen()) {
731-
return ctx.in.err(foldedInstrs.back().first,
732-
"unterminated folded instruction");
733-
}
724+
do {
725+
if (ctx.in.takeRParen()) {
726+
// We've reached the end of a folded instruction. Parse it for real.
727+
auto [start, end] = foldedInstrs.back();
728+
if (!end) {
729+
return ctx.in.err("unexpected end of folded instruction");
730+
}
731+
foldedInstrs.pop_back();
734732

735-
if (!foldedInstrs.back().second) {
736-
// The folded instruction we just started should end here.
737-
foldedInstrs.back().second = ctx.in.getPos();
733+
WithPosition with(ctx, start);
734+
auto inst = plaininstr(ctx);
735+
assert(inst && "unexpectedly failed to parse instruction");
736+
CHECK_ERR(inst);
737+
assert(ctx.in.getPos() == *end && "expected end of instruction");
738+
continue;
738739
}
739740

740-
// We have either the start of a new folded child or the end of the last
741-
// one.
741+
// We're not ending an instruction, so we must be starting a new one. Maybe
742+
// it is a block instruction.
742743
if (auto blockinst = foldedBlockinstr(ctx)) {
743744
CHECK_ERR(blockinst);
744-
} else if (ctx.in.takeLParen()) {
745-
foldedInstrs.push_back({ctx.in.getPos(), {}});
746-
} else if (ctx.in.takeRParen()) {
747-
auto [start, end] = foldedInstrs.back();
748-
assert(end && "Should have found end of instruction");
749-
foldedInstrs.pop_back();
745+
continue;
746+
}
750747

751-
WithPosition with(ctx, start);
752-
if (auto inst = plaininstr(ctx)) {
753-
CHECK_ERR(inst);
754-
} else {
755-
return ctx.in.err(start, "expected folded instruction");
756-
}
748+
// We must be starting a new plain instruction.
749+
if (!ctx.in.takeLParen()) {
750+
return ctx.in.err("expected folded instruction");
751+
}
752+
foldedInstrs.push_back({ctx.in.getPos(), {}});
757753

758-
if (ctx.in.getPos() != *end) {
759-
return ctx.in.err("expected end of instruction");
760-
}
754+
// Consume the span for the instruction without meaningfully parsing it yet.
755+
// It will be parsed for real using the real context after its s-expression
756+
// children have been found and parsed.
757+
NullCtx nullCtx(ctx.in);
758+
if (auto inst = plaininstr(nullCtx)) {
759+
CHECK_ERR(inst);
760+
ctx.in = nullCtx.in;
761761
} else {
762-
WASM_UNREACHABLE("expected paren");
762+
return ctx.in.err("expected instruction");
763763
}
764-
}
764+
765+
// The folded instruction we just started ends here.
766+
assert(!foldedInstrs.back().second);
767+
foldedInstrs.back().second = ctx.in.getPos();
768+
} while (!foldedInstrs.empty());
769+
765770
return Ok{};
766771
}
767772

0 commit comments

Comments
 (0)