diff --git a/include/wabt/common.h b/include/wabt/common.h index e1411b4870..692c622bd6 100644 --- a/include/wabt/common.h +++ b/include/wabt/common.h @@ -186,6 +186,13 @@ void Destruct(T& placement) { placement.~T(); } +template +struct Overload: Fs... { + using Fs::operator()...; +}; +template +Overload(Fs...)->Overload; + enum class LabelType { Func, InitExpr, @@ -297,9 +304,162 @@ enum class RelocType { First = FuncIndexLEB, Last = FuncIndexI32, + None = -1, // Used internally as a sentinel value +}; + +enum class RelocDataType { + I32, I64, + LEB, LEB64, + SLEB, SLEB64, +}; + +constexpr size_t kRelocDataTypeSize[] { + 4, 8, + 5, 10, + 5, 10 +}; + +constexpr RelocDataType kRelocDataType[] { + RelocDataType::LEB, // FuncIndexLEB = 0 + RelocDataType::SLEB, // TableIndexSLEB = 1 + RelocDataType::I32, // TableIndexI32 = 2 + RelocDataType::LEB, // MemoryAddressLEB = 3 + RelocDataType::SLEB, // MemoryAddressSLEB = 4 + RelocDataType::I32, // MemoryAddressI32 = 5 + RelocDataType::LEB, // TypeIndexLEB = 6 + RelocDataType::LEB, // GlobalIndexLEB = 7 + RelocDataType::I32, // FunctionOffsetI32 = 8 + RelocDataType::I32, // SectionOffsetI32 = 9 + RelocDataType::LEB, // TagIndexLEB = 10 + RelocDataType::SLEB, // MemoryAddressRelSLEB = 11 + RelocDataType::SLEB, // TableIndexRelSLEB = 12 + RelocDataType::I32, // GlobalIndexI32 = 13 + RelocDataType::LEB64, // MemoryAddressLEB64 = 14 + RelocDataType::SLEB64, // MemoryAddressSLEB64 = 15 + RelocDataType::I64, // MemoryAddressI64 = 16 + RelocDataType::SLEB64, // MemoryAddressRelSLEB64 = 17 + RelocDataType::SLEB64, // TableIndexSLEB64 = 18 + RelocDataType::I64, // TableIndexI64 = 19 + RelocDataType::LEB, // TableNumberLEB = 20 + RelocDataType::SLEB, // MemoryAddressTLSSLEB = 21 + RelocDataType::I64, // FunctionOffsetI64 = 22 + RelocDataType::I32, // MemoryAddressLocRelI32 = 23 + RelocDataType::SLEB64, // TableIndexRelSLEB64 = 24 + RelocDataType::SLEB64, // MemoryAddressTLSSLEB64 = 25 + RelocDataType::I32, // FuncIndexI32 = 26 +}; + +enum class RelocKind { + Function, + FunctionTbl, + Data, + Global, + Table, + Tag, + Type, + Section, + Text, +}; + +constexpr RelocKind kRelocSymbolType[] { + RelocKind::Function, // FuncIndexLEB = 0 + RelocKind::FunctionTbl, // TableIndexSLEB = 1 + RelocKind::FunctionTbl, // TableIndexI32 = 2 + RelocKind::Data, // MemoryAddressLEB = 3 + RelocKind::Data, // MemoryAddressSLEB = 4 + RelocKind::Data, // MemoryAddressI32 = 5 + RelocKind::Type, // TypeIndexLEB = 6 + RelocKind::Global, // GlobalIndexLEB = 7 + RelocKind::Text, // FunctionOffsetI32 = 8 + RelocKind::Section, // SectionOffsetI32 = 9 + RelocKind::Tag, // TagIndexLEB = 10 + RelocKind::Data, // MemoryAddressRelSLEB = 11 + RelocKind::Table, // TableIndexRelSLEB = 12 + RelocKind::Global, // GlobalIndexI32 = 13 + RelocKind::Data, // MemoryAddressLEB64 = 14 + RelocKind::Data, // MemoryAddressSLEB64 = 15 + RelocKind::Data, // MemoryAddressI64 = 16 + RelocKind::Data, // MemoryAddressRelSLEB64 = 17 + RelocKind::FunctionTbl, // TableIndexSLEB64 = 18 + RelocKind::FunctionTbl, // TableIndexI64 = 19 + RelocKind::Table, // TableNumberLEB = 20 + RelocKind::Data, // MemoryAddressTLSSLEB = 21 + RelocKind::Text, // FunctionOffsetI64 = 22 + RelocKind::Data, // MemoryAddressLocRelI32 = 23 + RelocKind::FunctionTbl, // TableIndexRelSLEB64 = 24 + RelocKind::Data, // MemoryAddressTLSSLEB64 = 25 + RelocKind::Function, // FuncIndexI32 = 26 +}; + +enum class RelocModifiers { + None = 0, + TLS = 1, + PIC = 2, }; + +inline RelocModifiers operator|(RelocModifiers a, RelocModifiers b) { + using U = std::underlying_type_t; + return RelocModifiers(U(a) | U(b)); +} + +inline RelocModifiers operator&(RelocModifiers a, RelocModifiers b) { + using U = std::underlying_type_t; + return RelocModifiers(U(a) & U(b)); +} + +inline RelocModifiers operator~(RelocModifiers a) { + using U = std::underlying_type_t; + return RelocModifiers(~U(a)); +} + +constexpr RelocModifiers kRelocModifiers[] { + RelocModifiers::None, // FuncIndexLEB = 0 + RelocModifiers::None, // TableIndexSLEB = 1 + RelocModifiers::None, // TableIndexI32 = 2 + RelocModifiers::None, // MemoryAddressLEB = 3 + RelocModifiers::None, // MemoryAddressSLEB = 4 + RelocModifiers::None, // MemoryAddressI32 = 5 + RelocModifiers::None, // TypeIndexLEB = 6 + RelocModifiers::None, // GlobalIndexLEB = 7 + RelocModifiers::None, // FunctionOffsetI32 = 8 + RelocModifiers::None, // SectionOffsetI32 = 9 + RelocModifiers::None, // TagIndexLEB = 10 + RelocModifiers::PIC, // MemoryAddressRelSLEB = 11 + RelocModifiers::PIC, // TableIndexRelSLEB = 12 + RelocModifiers::None, // GlobalIndexI32 = 13 + RelocModifiers::None, // MemoryAddressLEB64 = 14 + RelocModifiers::None, // MemoryAddressSLEB64 = 15 + RelocModifiers::None, // MemoryAddressI64 = 16 + RelocModifiers::PIC, // MemoryAddressRelSLEB64 = 17 + RelocModifiers::None, // TableIndexSLEB64 = 18 + RelocModifiers::None, // TableIndexI64 = 19 + RelocModifiers::None, // TableNumberLEB = 20 + RelocModifiers::TLS, // MemoryAddressTLSSLEB = 21 + RelocModifiers::None, // FunctionOffsetI64 = 22 + RelocModifiers::PIC, // MemoryAddressLocRelI32 = 23 + RelocModifiers::PIC, // TableIndexRelSLEB64 = 24 + RelocModifiers::TLS, // MemoryAddressTLSSLEB64 = 25 + RelocModifiers::None, // FuncIndexI32 = 26 +}; + + constexpr int kRelocTypeCount = WABT_ENUM_COUNT(RelocType); +constexpr RelocType RecognizeReloc(RelocKind kind, + RelocDataType type, + RelocModifiers mod) { + for (int i = 0; i < kRelocTypeCount; ++i) { + if (kind != kRelocSymbolType[i]) + continue; + if (type != kRelocDataType[i]) + continue; + if (mod != kRelocModifiers[i]) + continue; + return RelocType(i); + } + return RelocType::None; +} + struct Reloc { Reloc(RelocType, size_t offset, Index index, int32_t addend = 0); diff --git a/include/wabt/ir.h b/include/wabt/ir.h index 5fa4439a0c..66d89cabd1 100644 --- a/include/wabt/ir.h +++ b/include/wabt/ir.h @@ -21,11 +21,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include "wabt/binding-hash.h" #include "wabt/common.h" @@ -203,6 +205,83 @@ struct Const { }; using ConstVector = std::vector; +struct IrReloc { + IrReloc(): type(RelocType::None) {} + IrReloc(RelocType type, Var symbol, int32_t addend = 0) + : type(type), symbol(symbol), addend(addend) { + static constexpr RelocType addend_allowed[] = { + RelocType::MemoryAddressI32, RelocType::MemoryAddressI64, + RelocType::MemoryAddressLEB, RelocType::MemoryAddressLEB64, + RelocType::MemoryAddressSLEB, RelocType::MemoryAddressSLEB64, + RelocType::MemoryAddressTLSSLEB, RelocType::MemoryAddressTLSSLEB64, + RelocType::MemoryAddressRelSLEB, RelocType::MemoryAddressRelSLEB64, + RelocType::MemoryAddressLocRelI32, RelocType::SectionOffsetI32, + RelocType::FunctionOffsetI32, RelocType::FunctionOffsetI64, + }; + if (addend) { + for (auto allowed_type: addend_allowed) + if (allowed_type == this->type) + return; + assert(!"Forbidden addend for relocation type"); + } + } + RelocType type; + Var symbol; + int32_t addend; +}; + +class SymbolCommon { +public: + std::string name_; + uint32_t flags_; + SymbolCommon(uint32_t flags = 0, std::string name = "") + : name_(name), flags_(flags) {} + const std::string& name() const { return name_; } + uint32_t flags() const { return flags_; } + + SymbolVisibility visibility() const { + return static_cast(flags() & WABT_SYMBOL_MASK_VISIBILITY); + } + SymbolBinding binding() const { + return static_cast(flags() & WABT_SYMBOL_MASK_BINDING); + } + bool undefined() const { return flags() & WABT_SYMBOL_FLAG_UNDEFINED; } + bool defined() const { return !undefined(); } + bool exported() const { return flags() & WABT_SYMBOL_FLAG_EXPORTED; } + bool explicit_name() const { + return flags() & WABT_SYMBOL_FLAG_EXPLICIT_NAME; + } + bool no_strip() const { return flags() & WABT_SYMBOL_FLAG_NO_STRIP; } + bool non_default(bool imported) const { + uint32_t flags = + flags_ & ~WABT_SYMBOL_FLAG_EXPORTED & ~WABT_SYMBOL_FLAG_UNDEFINED; + if (!undefined() && !exported() && name().empty()) + flags &= ~WABT_SYMBOL_MASK_BINDING & ~WABT_SYMBOL_MASK_VISIBILITY; + return flags != 0; + } +}; + +struct DataSym: SymbolCommon { + static DataSym MakeForSearch(Index segment, Index idx) { + return {{0}, "", segment, idx, 0}; + } + bool imported() const { + return segment == kInvalidIndex; + } + std::string name; + Index segment; + Address offset; + Address size; + bool operator<(const DataSym& other) const { + if (imported() && other.imported()) + return offset < other.offset; + if (!imported() && !other.imported()) + return std::tuple(segment, offset) < + std::tuple(other.segment, other.offset); + return !imported() < !other.imported(); + }; +}; + enum class ExpectationType { Values, Either, @@ -789,6 +868,7 @@ class ConstExpr : public ExprMixin { : ExprMixin(loc), const_(c) {} Const const_; + IrReloc reloc; }; // TODO(binji): Rename this, it is used for more than loads/stores now. @@ -808,6 +888,7 @@ class LoadStoreExpr : public MemoryExpr { Opcode opcode; Address align; Address offset; + IrReloc reloc; }; using LoadExpr = LoadStoreExpr; @@ -832,7 +913,7 @@ class AtomicFenceExpr : public ExprMixin { uint32_t consistency_model; }; -struct Tag { +struct Tag: SymbolCommon { explicit Tag(std::string_view name) : name(name) {} std::string name; @@ -900,7 +981,7 @@ inline bool operator!=(const LocalTypes::const_iterator& lhs, return !operator==(lhs, rhs); } -struct Func { +struct Func: SymbolCommon { explicit Func(std::string_view name) : name(name) {} Type GetParamType(Index index) const { return decl.GetParamType(index); } @@ -928,9 +1009,12 @@ struct Func { struct { bool tailcall = false; } features_used; + + // For relocatable binaries, if a function is an init function, its priority + std::optional priority = {}; }; -struct Global { +struct Global: SymbolCommon { explicit Global(std::string_view name) : name(name) {} std::string name; @@ -939,7 +1023,7 @@ struct Global { ExprList init_expr; }; -struct Table { +struct Table: SymbolCommon { explicit Table(std::string_view name) : name(name), elem_type(Type::FuncRef) {} @@ -979,6 +1063,8 @@ struct DataSegment { Var memory_var; ExprList offset; std::vector data; + std::vector> relocs; + std::pair symbol_range = {}; }; class Import { @@ -1211,6 +1297,191 @@ struct Custom { Location loc; }; +class Symbol: public SymbolCommon { + public: + struct Function { + static const SymbolType type = SymbolType::Function; + Index index; + }; + struct Data { + static const SymbolType type = SymbolType::Data; + Index index; + Offset offset; + Address size; + }; + struct Global { + static const SymbolType type = SymbolType::Global; + Index index; + }; + struct Section { + static const SymbolType type = SymbolType::Section; + Index section; + }; + struct Tag { + static const SymbolType type = SymbolType::Tag; + Index index; + }; + struct Table { + static const SymbolType type = SymbolType::Table; + Index index; + }; + + private: + SymbolType type_; + union { + Function function_; + Data data_; + Global global_; + Section section_; + Tag tag_; + Table table_; + }; + + public: + Symbol(const std::string& name, uint32_t flags, const Function& f) + : SymbolCommon{flags, name}, type_(Function::type), function_(f) {} + Symbol(const std::string& name, uint32_t flags, const Data& d) + : SymbolCommon{flags, name}, type_(Data::type), data_(d) {} + Symbol(const std::string& name, uint32_t flags, const Global& g) + : SymbolCommon{flags, name}, type_(Global::type), global_(g) {} + Symbol(const std::string& name, uint32_t flags, const Section& s) + : SymbolCommon{flags, name}, type_(Section::type), section_(s) {} + Symbol(const std::string& name, uint32_t flags, const Tag& e) + : SymbolCommon{flags, name}, type_(Tag::type), tag_(e) {} + Symbol(const std::string& name, uint32_t flags, const Table& t) + : SymbolCommon{flags, name}, type_(Table::type), table_(t) {} + + template + auto visit(F f) { + switch (type()) { + case Function::type: + return f(AsFunction()); + case Data::type: + return f(AsData()); + case Global::type: + return f(AsGlobal()); + case Section::type: + return f(AsSection()); + case Tag::type: + return f(AsTag()); + case Table::type: + return f(AsTable()); + } + } + + SymbolType type() const { return type_; } + + bool IsFunction() const { return type() == Function::type; } + bool IsData() const { return type() == Data::type; } + bool IsGlobal() const { return type() == Global::type; } + bool IsSection() const { return type() == Section::type; } + bool IsTag() const { return type() == Tag::type; } + bool IsTable() const { return type() == Table::type; } + + const Function& AsFunction() const { + assert(IsFunction()); + return function_; + } + const Data& AsData() const { + assert(IsData()); + return data_; + } + const Global& AsGlobal() const { + assert(IsGlobal()); + return global_; + } + const Section& AsSection() const { + assert(IsSection()); + return section_; + } + const Tag& AsTag() const { + assert(IsTag()); + return tag_; + } + const Table& AsTable() const { + assert(IsTable()); + return table_; + } +}; + +class SymbolTable { + std::vector symbols_; + + // Maps from wasm entities to symbol entry indices + std::vector functions_; + std::vector tables_; + std::vector globals_; + std::vector tags_; + std::vector datas_; + + std::set seen_names_; + + Result EnsureUnique(const std::string_view& name) { + if (seen_names_.count(name)) { + fprintf(stderr, + "error: duplicate symbol when writing relocatable " + "binary: %s\n", + &name[0]); + return Result::Error; + } + seen_names_.insert(name); + return Result::Ok; + }; + + template + std::vector& GetTable() = delete; + + template + auto GetTable() const + -> const decltype(std::declval().GetTable())& { + return const_cast(this)->GetTable(); + } + + public: + SymbolTable() {} + + Result Populate(const Module* module); + + Result AddSymbol(Symbol sym); + + std::vector& symbols() { return symbols_; } + const std::vector& symbols() const { return symbols_; } + + template + Index SymbolIndex(Index index) const { + // For well-formed modules, an index into (e.g.) functions_ will always be + // within bounds; the out-of-bounds case here is just to allow --relocatable + // to write known-invalid modules. + return index < GetTable().size() ? GetTable()[index] : kInvalidIndex; + } + + Index FunctionSymbolIndex(Index index) const { + return SymbolIndex(index); + } + Index TableSymbolIndex(Index index) const { + return SymbolIndex(index); + } + Index GlobalSymbolIndex(Index index) const { + return SymbolIndex(index); + } + Index TagSymbolIndex(Index index) const { + return SymbolIndex(index); + } + Index DataSymbolIndex(Index index) const { + return SymbolIndex(index); + } +}; +template<> +std::vector& SymbolTable::GetTable(); +template<> +std::vector& SymbolTable::GetTable(); +template<> +std::vector& SymbolTable::GetTable(); +template<> +std::vector& SymbolTable::GetTable(); +template<> +std::vector& SymbolTable::GetTable(); + struct Module { Index GetFuncTypeIndex(const Var&) const; Index GetFuncTypeIndex(const FuncDeclaration&) const; @@ -1238,6 +1509,8 @@ struct Module { const ElemSegment* GetElemSegment(const Var&) const; ElemSegment* GetElemSegment(const Var&); Index GetElemSegmentIndex(const Var&) const; + DataSym* GetDataSym(const Var&); + Index GetDataSymIndex(const Var&) const; bool IsImport(ExternalKind kind, const Var&) const; bool IsImport(const Export& export_) const { @@ -1268,6 +1541,7 @@ struct Module { Index num_table_imports = 0; Index num_memory_imports = 0; Index num_global_imports = 0; + Index num_data_imports = 0; // Cached for convenience; the pointers are shared with values that are // stored in either ModuleField or Import. @@ -1283,6 +1557,7 @@ struct Module { std::vector data_segments; std::vector starts; std::vector customs; + std::vector data_symbols; BindingHash tag_bindings; BindingHash func_bindings; @@ -1293,6 +1568,7 @@ struct Module { BindingHash memory_bindings; BindingHash data_segment_bindings; BindingHash elem_segment_bindings; + BindingHash data_symbol_bindings; // For a subset of features, the BinaryReaderIR tracks whether they are // actually used by the module. wasm2c (CWriter) uses this information to diff --git a/include/wabt/leb128.h b/include/wabt/leb128.h index e7290475b3..72571cf5c1 100644 --- a/include/wabt/leb128.h +++ b/include/wabt/leb128.h @@ -34,6 +34,8 @@ void WriteU64Leb128(Stream* stream, uint64_t value, const char* desc); void WriteS64Leb128(Stream* stream, uint64_t value, const char* desc); void WriteFixedS32Leb128(Stream* stream, uint32_t value, const char* desc); void WriteFixedU32Leb128(Stream* stream, uint32_t value, const char* desc); +void WriteFixedS64Leb128(Stream* stream, uint64_t value, const char* desc); +void WriteFixedU64Leb128(Stream* stream, uint64_t value, const char* desc); Offset WriteU32Leb128At(Stream* stream, Offset offset, diff --git a/include/wabt/wast-parser.h b/include/wabt/wast-parser.h index 7a60af23b5..354b1b72ca 100644 --- a/include/wabt/wast-parser.h +++ b/include/wabt/wast-parser.h @@ -64,6 +64,11 @@ class WastParser { Var var; }; + struct DatasymAux { + Var name; + Address size; + }; + typedef std::vector ReferenceVars; struct ResolveTypes { @@ -83,6 +88,9 @@ class WastParser { ReferenceVars vars; }; + static std::optional TryTrimPfx(std::string_view string, + std::string_view prefix); + void ErrorUnlessOpcodeEnabled(const Token&); // Print an error message listing the expected tokens, as well as an example @@ -132,6 +140,15 @@ class WastParser { // token is equal to the parameter. If so, then the token is consumed. bool MatchLpar(TokenType); + // Returns true if the next token's type is equal to the parameter, and if + // token's text matches parameter. If so, then the token is consumed. + bool MatchText(TokenType, std::string_view); + + // Returns true if the next token's type is equal to the parameter, and if + // token's text starts with parameter. If so, then the token is consumed and + // the rest of token's text is returned. + std::optional MatchTextPrefix(TokenType, std::string_view); + // Like Match(), but prints an error message if the token doesn't match, and // returns Result::Error. Result Expect(TokenType); @@ -196,7 +213,13 @@ class WastParser { Result ParseCustomSectionAnnotation(Module*); bool PeekIsCustom(); + bool PeekIsDataImport(); + Result ParseSymAfterPar(SymbolCommon*, + bool in_import, + DatasymAux* dat_sym = 0); + Result ParseSymOpt(SymbolCommon *, bool in_import, DatasymAux *dat_sym = 0); + Result ParseDataImport(Module* module); Result ParseExportDesc(Export*); Result ParseInlineExports(ModuleFieldList*, ExternalKind); Result ParseInlineImport(Import*); @@ -216,6 +239,14 @@ class WastParser { Result ParseInstrList(ExprList*); Result ParseTerminatingInstrList(ExprList*); Result ParseInstr(ExprList*); + Result ParseRejectReloc(); + Result ParseUnwindReloc(int curr_indent); + Result ParseRelocAfterType(IrReloc*, RelocDataType type); + Result ParseRelocModifiers(RelocModifiers*); + Result ParseRelocKind(RelocKind*); + Result ParseRelocDataType(RelocDataType*); + Result ParseReloc(IrReloc*); + Result ParseReloc(IrReloc*, RelocDataType type); Result ParseCodeMetadataAnnotation(ExprList*); Result ParsePlainInstr(std::unique_ptr*); Result ParseF32(Const*, ConstType type); diff --git a/include/wabt/wat-writer.h b/include/wabt/wat-writer.h index 0f19ba2465..c10337adb0 100644 --- a/include/wabt/wat-writer.h +++ b/include/wabt/wat-writer.h @@ -32,6 +32,7 @@ struct WriteWatOptions { bool fold_exprs = false; // Write folded expressions. bool inline_export = false; bool inline_import = false; + bool relocatable = false; }; Result WriteWat(Stream*, const Module*, const WriteWatOptions&); diff --git a/src/binary-reader-ir.cc b/src/binary-reader-ir.cc index 11e88da549..b305e235c0 100644 --- a/src/binary-reader-ir.cc +++ b/src/binary-reader-ir.cc @@ -361,6 +361,23 @@ class BinaryReaderIR : public BinaryReaderNop { std::string_view name, Index table_index) override; + /* Relocation handling */ + Result OnReloc(RelocType type, + Offset offset, + Index index, + uint32_t addend) override; + Result BeginCodeSection(Offset size) override; + Result BeginDataSection(Offset size) override; + Result BeginGenericCustomSection(Offset size) override; + Result BeginElemSection(Offset size) override; + Result OnRelocCount(Index count, Index section_index) override; + Result EndRelocSection() override; + Result BeginSection(Index section_index, + BinarySection section_type, + Offset size) override; + Result OnInitFunction(uint32_t priority, Index sym) override; + Result EndModule() override; + private: Location GetLocation() const; void PrintError(const char* format, ...); @@ -398,6 +415,44 @@ class BinaryReaderIR : public BinaryReaderNop { CodeMetadataExprQueue code_metadata_queue_; std::string_view current_metadata_name_; + + // Queue instructions to patch + struct RelocQueue { + enum Type { + CODE, + DATA, + CUSTOM, + }; + + RelocQueue(Offset start, Type type) + : start(start), type(type), incoming_relocs(), entries(), data_segment_starts() {} + + template + using Entries = std::tuple...>; + + template + decltype(auto) get() { + return std::get>(entries); + } + template + void traverse(F f) { + std::apply([&f](auto&&... vs) { (f(vs), ...); }, entries); + } + + Offset start; + Type type; + std::vector incoming_relocs; + Entries entries; + std::map data_segment_starts; + }; + std::unordered_map reloc_queues; + decltype(reloc_queues)::iterator active_reloc_section = end(reloc_queues); + SymbolTable table; + std::multiset data_symbols; + + Index active_section = kInvalidIndex; + void MakeQueue(RelocQueue::Type); + RelocQueue* GetQueue(); }; BinaryReaderIR::BinaryReaderIR(Module* out_module, @@ -467,6 +522,13 @@ Result BinaryReaderIR::TopLabelExpr(LabelNode** label, Expr** expr) { } Result BinaryReaderIR::AppendExpr(std::unique_ptr expr) { + if (RelocQueue* queue = GetQueue()) + queue->traverse([&](auto&& map) { + using Value = std::remove_reference_t; + if (auto* ce = dynamic_cast(expr.get())) { + map.insert({state->offset - queue->start, ce}); + } + }); expr->loc = GetLocation(); LabelNode* label; CHECK_RESULT(TopLabel(&label)); @@ -1480,6 +1542,7 @@ Result BinaryReaderIR::OnDataSegmentData(Index index, Address size) { assert(index == module_->data_segments.size() - 1); DataSegment* segment = module_->data_segments[index]; + GetQueue()->data_segment_starts.emplace(state->offset - size, segment); segment->data.resize(size); if (size > 0) { memcpy(segment->data.data(), data, size); @@ -1750,6 +1813,20 @@ Result BinaryReaderIR::OnDataSymbol(Index index, Index segment, uint32_t offset, uint32_t size) { + bool undef = flags & WABT_SYMBOL_FLAG_UNDEFINED; + if (undef) + ++module_->num_data_imports; + std::string name2{name}; + SymbolCommon common = {flags, name2}; + DataSym sym = + undef ? DataSym{common, MakeDollarName(name), kInvalidIndex, + module_->num_data_imports, 0} + : DataSym{common, MakeDollarName(name), segment, offset, size}; + data_symbols.emplace(sym); + assert(index == table.symbols().size()); + table.AddSymbol( + {name2, flags, + Symbol::Data{sym.segment, static_cast(sym.offset), sym.size}}); if (name.empty()) { return Result::Ok; } @@ -1778,14 +1855,18 @@ Result BinaryReaderIR::OnFunctionSymbol(Index index, uint32_t flags, std::string_view name, Index func_index) { - if (name.empty()) { - return Result::Ok; - } + assert(index == table.symbols().size()); + Symbol sym = {std::string(name), flags, Symbol::Function{func_index}}; + table.AddSymbol(sym); if (func_index >= module_->funcs.size()) { PrintError("invalid function index: %" PRIindex, func_index); return Result::Error; } Func* func = module_->funcs[func_index]; + static_cast(*func) = sym; + if (name.empty()) { + return Result::Ok; + } if (!func->name.empty()) { // The name section has already named this function. return Result::Ok; @@ -1801,12 +1882,23 @@ Result BinaryReaderIR::OnGlobalSymbol(Index index, uint32_t flags, std::string_view name, Index global_index) { + assert(index == table.symbols().size()); + Symbol sym = {std::string(name), flags, Symbol::Global{global_index}}; + table.AddSymbol(sym); + if (global_index >= module_->globals.size()) { + PrintError("invalid global index: %" PRIindex, global_index); + return Result::Error; + } + Global* glob = module_->globals[global_index]; + static_cast(*glob) = sym; return SetGlobalName(global_index, name); } Result BinaryReaderIR::OnSectionSymbol(Index index, uint32_t flags, Index section_index) { + assert(index == table.symbols().size()); + table.AddSymbol({"", flags, Symbol::Section{section_index}}); return Result::Ok; } @@ -1814,14 +1906,18 @@ Result BinaryReaderIR::OnTagSymbol(Index index, uint32_t flags, std::string_view name, Index tag_index) { - if (name.empty()) { - return Result::Ok; - } + assert(index == table.symbols().size()); + Symbol sym = {std::string(name), flags, Symbol::Tag{tag_index}}; + table.AddSymbol(sym); if (tag_index >= module_->tags.size()) { PrintError("invalid tag index: %" PRIindex, tag_index); return Result::Error; } Tag* tag = module_->tags[tag_index]; + static_cast(*tag) = sym; + if (name.empty()) { + return Result::Ok; + } std::string dollar_name = GetUniqueName(&module_->tag_bindings, MakeDollarName(name)); tag->name = dollar_name; @@ -1833,9 +1929,99 @@ Result BinaryReaderIR::OnTableSymbol(Index index, uint32_t flags, std::string_view name, Index table_index) { + assert(index == table.symbols().size()); + Symbol sym = {std::string(name), flags, Symbol::Table{table_index}}; + table.AddSymbol(sym); + if (table_index >= module_->tables.size()) { + PrintError("invalid table index: %" PRIindex, table_index); + return Result::Error; + } + Table* table = module_->tables[table_index]; + static_cast(*table) = sym; return SetTableName(table_index, name); } +Result BinaryReaderIR::OnReloc(RelocType type, + Offset offset, + Index index, + uint32_t addend) { + GetQueue()->incoming_relocs.emplace_back(type, offset, index, addend); + return Result::Ok; +} +void BinaryReaderIR::MakeQueue(RelocQueue::Type t) { + assert(active_section != kInvalidIndex); + active_reloc_section = + reloc_queues.insert({active_section, RelocQueue{state->offset, t}}).first; +} +BinaryReaderIR::RelocQueue* BinaryReaderIR::GetQueue() { + if (active_reloc_section != end(reloc_queues)) + return &active_reloc_section->second; + return nullptr; +} + +Result BinaryReaderIR::BeginCodeSection(Offset size) { + MakeQueue(RelocQueue::CODE); + return Result::Ok; +} + +Result BinaryReaderIR::BeginDataSection(Offset size) { + MakeQueue(RelocQueue::DATA); + return Result::Ok; +} + +Result BinaryReaderIR::BeginGenericCustomSection(Offset size) { + MakeQueue(RelocQueue::CUSTOM); + return Result::Ok; +} + +Result BinaryReaderIR::BeginElemSection(Offset size) { + return Result::Ok; +} + +Result BinaryReaderIR::OnRelocCount(Index count, Index section_index) { + active_reloc_section = reloc_queues.find(section_index); + if (!GetQueue()) { + if (active_section < section_index) { + PrintError( + "Relocation section [%d] does not follow its target section [%d]", + active_section, section_index); + } else { + PrintError( + "The target section for the relocation section [%d] does not have a " + "valid index [%d]", + active_section, section_index); + } + return Result::Error; + } + return Result::Ok; +} + +Result BinaryReaderIR::EndRelocSection() { + active_reloc_section = end(reloc_queues); + return Result::Ok; +} + +Result BinaryReaderIR::BeginSection(Index section_index, + BinarySection section_type, + Offset size) { + active_section = section_index; + return Result::Ok; +} + +Result BinaryReaderIR::OnInitFunction(uint32_t prio, Index sym) { + if (sym >= table.symbols().size()) { + return Result::Ok; + // PrintError("invalid init function priority symbol index: %" PRIindex, + // sym); return Result::Error; + } + Index func = table.symbols()[sym].AsFunction().index; + if (func >= module_->funcs.size()) + // We already emitted an error for the invalid symbol + return Result::Ok; + module_->funcs[func]->priority = prio; + return Result::Ok; +} + Result BinaryReaderIR::OnGenericCustomSection(std::string_view name, const void* data, Offset size) { @@ -1848,6 +2034,138 @@ Result BinaryReaderIR::OnGenericCustomSection(std::string_view name, return Result::Ok; } +Result BinaryReaderIR::EndModule() { + size_t i = 0; + Index range_start = 0, data_segment = -1; + for (auto& datasym : data_symbols) { + if (datasym.segment >= module_->data_segments.size() && datasym.segment != kInvalidIndex) + // all further symbols are invalid + break; + if (datasym.segment != data_segment) { + if (data_segment != kInvalidIndex) { + module_->data_segments[data_segment]->symbol_range = {range_start, i}; + } + range_start = i; + data_segment = datasym.segment; + } + module_->data_symbols.push_back(datasym); + if (!datasym.name.empty()) { + module_->data_symbols[i].name = datasym.name; + module_->data_symbol_bindings.emplace(datasym.name, i); + } + ++i; + } + if (data_segment != kInvalidIndex) { + module_->data_segments[data_segment]->symbol_range = {range_start, i}; + } + + auto lookup_reloc = [this](Reloc r) { + auto maybe_name = [](auto& table, Index idx) { + if (idx >= table.size()) + return Var{kInvalidIndex, {}}; + auto sym = Overload{ + [](auto* x) { return x; }, + [](auto& x) { return &x; }, + }(table[idx]); + return sym->name.empty() ? Var{idx, {}} : Var{sym->name, {}}; + }; + + if (r.index >= size(table.symbols())) + return Var{kInvalidIndex, {}}; + + auto& sym = table.symbols()[r.index]; + switch (sym.type()) { + case SymbolType::Data: { + auto& data = sym.AsData(); + auto&& syms = module_->data_symbols; + auto res = + std::lower_bound(syms.begin(), syms.end(), + DataSym::MakeForSearch(data.index, data.offset)); + Index sym = res - syms.begin(); + return maybe_name(module_->data_symbols, sym); + } + // Sure would've been nice to have a feature that would allow one to write + // a piece of code and stamp it out multiple times, but with different + // types and stuff. Better yet, maybe use that to yield different data for + // different types. And call that feature templates, that'd be a great + // name for it! + case SymbolType::Section: { + auto idx = sym.AsSection().section; + return maybe_name(module_->customs, idx); + } + case SymbolType::Function: { + auto idx = sym.AsFunction().index; + return maybe_name(module_->funcs, idx); + } + case SymbolType::Global: { + auto idx = sym.AsGlobal().index; + return maybe_name(module_->globals, idx); + } + case SymbolType::Table: { + auto idx = sym.AsTable().index; + return maybe_name(module_->tables, idx); + } + case SymbolType::Tag: { + auto idx = sym.AsTag().index; + return maybe_name(module_->tags, idx); + } + default: + WABT_UNREACHABLE; + } + }; + + for (auto& [index, queue] : reloc_queues) { + for (auto reloc : queue.incoming_relocs) { + bool applied_relocation = false; + Var sym_id = lookup_reloc(reloc); + if (sym_id.is_index() && sym_id.index() == kInvalidIndex) + // this reloc points to an invalid symbol and is therefore unapplicable + continue; + auto reloc_size = + kRelocDataTypeSize[int(kRelocDataType[int(reloc.type)])]; + // We pray that the relocation is always the last operand, and that the + // operand is an overlong leb already + auto reloc_addr = reloc.offset + reloc_size; + if (queue.type == RelocQueue::CODE && kRelocDataType[int(reloc.type)] == RelocDataType::LEB) { + switch (kRelocSymbolType[int(reloc.type)]) { + case RelocKind::Global: + case RelocKind::Type: + case RelocKind::Table: + case RelocKind::Function: + // Assume all relocations of primary shape are valid, we have no way + // to check + continue; + default: + break; + } + } + queue.traverse([&](auto& insns) { + auto insn = insns.find(reloc_addr); + if (insn != end(insns)) { + insn->second->reloc = {reloc.type, sym_id, reloc.addend}; + assert(insn->second->reloc.type != RelocType::None); + applied_relocation = true; + } + }); + if (applied_relocation) + continue; + auto it = queue.data_segment_starts.lower_bound(reloc.offset); + if (it != end(queue.data_segment_starts)) { + auto end = it->first + it->second->data.size(); + auto abs_offset = reloc.offset + queue.start; + if (end >= abs_offset + reloc_size) { + it->second->relocs.push_back( + {abs_offset - it->first, {reloc.type, sym_id, reloc.addend}}); + applied_relocation = true; + } + } + assert(applied_relocation && "Unable to apply relocation"); + } + } + + return Result::Ok; +} + } // end anonymous namespace Result ReadBinaryIr(const char* filename, diff --git a/src/binary-reader.cc b/src/binary-reader.cc index cbdd3384f8..434e15bf9e 100644 --- a/src/binary-reader.cc +++ b/src/binary-reader.cc @@ -2341,6 +2341,9 @@ Result BinaryReader::ReadLinkingSection(Offset section_size) { CALLBACK(OnSectionSymbol, i, flags, index); break; } + default: + PrintError("Unknown symbol type: %d", static_cast(sym_type)); + return Result::Error; } } break; diff --git a/src/binary-writer.cc b/src/binary-writer.cc index 600154b941..9451587176 100644 --- a/src/binary-writer.cc +++ b/src/binary-writer.cc @@ -117,252 +117,6 @@ struct RelocSection { std::vector relocations; }; -class Symbol { - public: - struct Function { - static const SymbolType type = SymbolType::Function; - Index index; - }; - struct Data { - static const SymbolType type = SymbolType::Data; - Index index; - Offset offset; - Address size; - }; - struct Global { - static const SymbolType type = SymbolType::Global; - Index index; - }; - struct Section { - static const SymbolType type = SymbolType::Section; - Index section; - }; - struct Tag { - static const SymbolType type = SymbolType::Tag; - Index index; - }; - struct Table { - static const SymbolType type = SymbolType::Table; - Index index; - }; - - private: - SymbolType type_; - std::string_view name_; - uint8_t flags_; - union { - Function function_; - Data data_; - Global global_; - Section section_; - Tag tag_; - Table table_; - }; - - public: - Symbol(const std::string_view& name, uint8_t flags, const Function& f) - : type_(Function::type), name_(name), flags_(flags), function_(f) {} - Symbol(const std::string_view& name, uint8_t flags, const Data& d) - : type_(Data::type), name_(name), flags_(flags), data_(d) {} - Symbol(const std::string_view& name, uint8_t flags, const Global& g) - : type_(Global::type), name_(name), flags_(flags), global_(g) {} - Symbol(const std::string_view& name, uint8_t flags, const Section& s) - : type_(Section::type), name_(name), flags_(flags), section_(s) {} - Symbol(const std::string_view& name, uint8_t flags, const Tag& e) - : type_(Tag::type), name_(name), flags_(flags), tag_(e) {} - Symbol(const std::string_view& name, uint8_t flags, const Table& t) - : type_(Table::type), name_(name), flags_(flags), table_(t) {} - - SymbolType type() const { return type_; } - const std::string_view& name() const { return name_; } - uint8_t flags() const { return flags_; } - - SymbolVisibility visibility() const { - return static_cast(flags() & WABT_SYMBOL_MASK_VISIBILITY); - } - SymbolBinding binding() const { - return static_cast(flags() & WABT_SYMBOL_MASK_BINDING); - } - bool undefined() const { return flags() & WABT_SYMBOL_FLAG_UNDEFINED; } - bool defined() const { return !undefined(); } - bool exported() const { return flags() & WABT_SYMBOL_FLAG_EXPORTED; } - bool explicit_name() const { - return flags() & WABT_SYMBOL_FLAG_EXPLICIT_NAME; - } - bool no_strip() const { return flags() & WABT_SYMBOL_FLAG_NO_STRIP; } - - bool IsFunction() const { return type() == Function::type; } - bool IsData() const { return type() == Data::type; } - bool IsGlobal() const { return type() == Global::type; } - bool IsSection() const { return type() == Section::type; } - bool IsTag() const { return type() == Tag::type; } - bool IsTable() const { return type() == Table::type; } - - const Function& AsFunction() const { - assert(IsFunction()); - return function_; - } - const Data& AsData() const { - assert(IsData()); - return data_; - } - const Global& AsGlobal() const { - assert(IsGlobal()); - return global_; - } - const Section& AsSection() const { - assert(IsSection()); - return section_; - } - const Tag& AsTag() const { - assert(IsTag()); - return tag_; - } - const Table& AsTable() const { - assert(IsTable()); - return table_; - } -}; - -class SymbolTable { - WABT_DISALLOW_COPY_AND_ASSIGN(SymbolTable); - - std::vector symbols_; - - std::vector functions_; - std::vector tables_; - std::vector globals_; - - std::set seen_names_; - - Result EnsureUnique(const std::string_view& name) { - if (seen_names_.count(name)) { - fprintf(stderr, - "error: duplicate symbol when writing relocatable " - "binary: %s\n", - &name[0]); - return Result::Error; - } - seen_names_.insert(name); - return Result::Ok; - }; - - template - Result AddSymbol(std::vector* map, - std::string_view name, - bool imported, - bool exported, - T&& sym) { - uint8_t flags = 0; - if (imported) { - flags |= WABT_SYMBOL_FLAG_UNDEFINED; - // Wabt currently has no way for a user to explicitly specify the name of - // an import, so never set the EXPLICIT_NAME flag, and ignore any display - // name fabricated by wabt. - name = std::string_view(); - } else { - if (name.empty()) { - // Definitions without a name are local. - flags |= uint8_t(SymbolBinding::Local); - flags |= uint8_t(SymbolVisibility::Hidden); - } else { - // Otherwise, strip the dollar off the name; a definition $foo is - // available for linking as "foo". - assert(name[0] == '$'); - name.remove_prefix(1); - } - - if (exported) { - CHECK_RESULT(EnsureUnique(name)); - flags |= uint8_t(SymbolVisibility::Hidden); - flags |= WABT_SYMBOL_FLAG_NO_STRIP; - } - } - if (exported) { - flags |= WABT_SYMBOL_FLAG_EXPORTED; - } - - map->push_back(symbols_.size()); - symbols_.emplace_back(name, flags, sym); - return Result::Ok; - }; - - Index SymbolIndex(const std::vector& table, Index index) const { - // For well-formed modules, an index into (e.g.) functions_ will always be - // within bounds; the out-of-bounds case here is just to allow --relocatable - // to write known-invalid modules. - return index < table.size() ? table[index] : kInvalidIndex; - } - - public: - SymbolTable() {} - - Result Populate(const Module* module) { - std::set exported_funcs; - std::set exported_globals; - std::set exported_tags; - std::set exported_tables; - - for (const Export* export_ : module->exports) { - switch (export_->kind) { - case ExternalKind::Func: - exported_funcs.insert(module->GetFuncIndex(export_->var)); - break; - case ExternalKind::Table: - exported_tables.insert(module->GetTableIndex(export_->var)); - break; - case ExternalKind::Memory: - break; - case ExternalKind::Global: - exported_globals.insert(module->GetGlobalIndex(export_->var)); - break; - case ExternalKind::Tag: - exported_tags.insert(module->GetTagIndex(export_->var)); - break; - } - } - - // We currently only create symbol table entries for function, table, and - // global symbols. - for (size_t i = 0; i < module->funcs.size(); ++i) { - const Func* func = module->funcs[i]; - bool imported = i < module->num_func_imports; - bool exported = exported_funcs.count(i); - CHECK_RESULT(AddSymbol(&functions_, func->name, imported, exported, - Symbol::Function{Index(i)})); - } - - for (size_t i = 0; i < module->tables.size(); ++i) { - const Table* table = module->tables[i]; - bool imported = i < module->num_table_imports; - bool exported = exported_tables.count(i); - CHECK_RESULT(AddSymbol(&tables_, table->name, imported, exported, - Symbol::Table{Index(i)})); - } - - for (size_t i = 0; i < module->globals.size(); ++i) { - const Global* global = module->globals[i]; - bool imported = i < module->num_global_imports; - bool exported = exported_globals.count(i); - CHECK_RESULT(AddSymbol(&globals_, global->name, imported, exported, - Symbol::Global{Index(i)})); - } - - return Result::Ok; - } - - const std::vector& symbols() const { return symbols_; } - Index FunctionSymbolIndex(Index index) const { - return SymbolIndex(functions_, index); - } - Index TableSymbolIndex(Index index) const { - return SymbolIndex(tables_, index); - } - Index GlobalSymbolIndex(Index index) const { - return SymbolIndex(globals_, index); - } -}; - struct CodeMetadata { Offset offset; std::vector data; @@ -407,6 +161,8 @@ class BinaryWriter { Index GetLocalIndex(const Func* func, const Var& var); Index GetSymbolIndex(RelocType reloc_type, Index index); void AddReloc(RelocType reloc_type, Index index); + void AddRelocAt(IrReloc, Offset); + void AddReloc(IrReloc); void WriteBlockDecl(const BlockDeclaration& decl); void WriteU32Leb128WithReloc(Index index, const char* desc, @@ -601,14 +357,20 @@ Index BinaryWriter::GetTagVarDepth(const Var* var) { } Index BinaryWriter::GetSymbolIndex(RelocType reloc_type, Index index) { - switch (reloc_type) { - case RelocType::FuncIndexLEB: + switch (kRelocSymbolType[int(reloc_type)]) { + case RelocKind::FunctionTbl: + case RelocKind::Function: + case RelocKind::Text: return symtab_.FunctionSymbolIndex(index); - case RelocType::TableNumberLEB: + case RelocKind::Table: return symtab_.TableSymbolIndex(index); - case RelocType::GlobalIndexLEB: + case RelocKind::Global: return symtab_.GlobalSymbolIndex(index); - case RelocType::TypeIndexLEB: + case RelocKind::Data: + return symtab_.DataSymbolIndex(index); + case RelocKind::Tag: + return symtab_.TagSymbolIndex(index); + case RelocKind::Type: // Type indexes don't create entries in the symbol table; instead their // index is used directly. return index; @@ -619,7 +381,7 @@ Index BinaryWriter::GetSymbolIndex(RelocType reloc_type, Index index) { } } -void BinaryWriter::AddReloc(RelocType reloc_type, Index index) { +void BinaryWriter::AddRelocAt(IrReloc r, Offset offset) { // Add a new reloc section if needed if (!current_reloc_section_ || current_reloc_section_->section_index != section_count_) { @@ -629,16 +391,25 @@ void BinaryWriter::AddReloc(RelocType reloc_type, Index index) { } // Add a new relocation to the curent reloc section - size_t offset = stream_->offset() - last_section_payload_offset_; - Index symbol_index = GetSymbolIndex(reloc_type, index); + Index symbol_index = GetSymbolIndex(r.type, r.symbol.index()); if (symbol_index == kInvalidIndex) { // The file is invalid, for example a reference to function 42 where only 10 // functions are defined. The user must have already passed --no-check, so // no extra warning here is needed. return; } - current_reloc_section_->relocations.emplace_back(reloc_type, offset, - symbol_index); + current_reloc_section_->relocations.emplace_back(r.type, offset, symbol_index, + r.addend); +} + +void BinaryWriter::AddReloc(IrReloc r) { + // Add a new relocation to the curent reloc section + size_t offset = stream_->offset() - last_section_payload_offset_; + return AddRelocAt(r, offset); +} + +void BinaryWriter::AddReloc(RelocType reloc_type, Index index) { + return AddReloc({reloc_type, Var{index, {}}}); } void BinaryWriter::WriteU32Leb128WithReloc(Index index, @@ -701,7 +472,24 @@ void BinaryWriter::WriteLoadStoreExpr(const Func* func, } else { stream_->WriteU8(log2_u32(align), "alignment"); } - WriteU64Leb128(stream_, typed_expr->offset, desc); + if constexpr (std::is_same_v || std::is_same_v) { + if (options_.relocatable && typed_expr->reloc.type != RelocType::None) { + AddReloc(typed_expr->reloc); + switch (kRelocDataType[int(typed_expr->reloc.type)]) { + case RelocDataType::LEB64: + WriteFixedU64Leb128(stream_, typed_expr->offset, desc); + break; + case RelocDataType::LEB: + WriteFixedU32Leb128(stream_, typed_expr->offset, desc); + break; + default: + WABT_UNREACHABLE; + } + } else + WriteU64Leb128(stream_, typed_expr->offset, desc); + } else { + WriteU64Leb128(stream_, typed_expr->offset, desc); + } } template @@ -814,16 +602,31 @@ void BinaryWriter::WriteExpr(const Func* func, const Expr* expr) { WriteOpcode(stream_, cast(expr)->opcode); break; case ExprType::Const: { - const Const& const_ = cast(expr)->const_; + const ConstExpr* const_expr = cast(expr); + const Const& const_ = const_expr->const_; switch (const_.type()) { case Type::I32: { WriteOpcode(stream_, Opcode::I32Const); - WriteS32Leb128(stream_, const_.u32(), "i32 literal"); + if (options_.relocatable && + const_expr->reloc.type != RelocType::None) { + assert(kRelocDataType[int(const_expr->reloc.type)] == + RelocDataType::SLEB); + AddReloc(const_expr->reloc); + WriteFixedS32Leb128(stream_, const_.u32(), "i32 literal"); + } else + WriteS32Leb128(stream_, const_.u32(), "i32 literal"); break; } case Type::I64: WriteOpcode(stream_, Opcode::I64Const); - WriteS64Leb128(stream_, const_.u64(), "i64 literal"); + if (options_.relocatable && + const_expr->reloc.type != RelocType::None) { + assert(kRelocDataType[int(const_expr->reloc.type)] == + RelocDataType::SLEB64); + AddReloc(const_expr->reloc); + WriteFixedS64Leb128(stream_, const_.u64(), "i64 literal"); + } else + WriteS64Leb128(stream_, const_.u64(), "i64 literal"); break; case Type::F32: WriteOpcode(stream_, Opcode::F32Const); @@ -1707,7 +1510,11 @@ Result BinaryWriter::WriteModule() { } WriteU32Leb128(stream_, segment->data.size(), "data segment size"); WriteHeader("data segment data", i); + size_t start_offset = stream_->offset() - last_section_payload_offset_; stream_->WriteData(segment->data, "data segment data"); + for (auto& [offset, reloc] : segment->relocs) { + AddRelocAt(reloc, offset + start_offset); + } } EndSection(); } diff --git a/src/ir.cc b/src/ir.cc index 47b5cb3187..053237ccc7 100644 --- a/src/ir.cc +++ b/src/ir.cc @@ -110,6 +110,72 @@ bool FuncSignature::operator==(const FuncSignature& rhs) const { return param_types == rhs.param_types && result_types == rhs.result_types; } +template <> +std::vector& SymbolTable::GetTable() { + return functions_; +} +template <> +std::vector& SymbolTable::GetTable() { + return tables_; +} +template <> +std::vector& SymbolTable::GetTable() { + return globals_; +} +template <> +std::vector& SymbolTable::GetTable() { + return tags_; +} +template <> +std::vector& SymbolTable::GetTable() { + return datas_; +} + +void EnlargeFor(std::vector& v, Index i) { + if (size(v) <= i) + v.resize(i + 1, kInvalidIndex); +} + +Result SymbolTable::AddSymbol(Symbol sym) { + sym.visit([this](auto type) { + if constexpr (!std::is_same_v && + !std::is_same_v) { + auto& table = this->GetTable(); + EnlargeFor(table, type.index); + // This is lossy since multiple symbols are genuinely possible, but apart + // from data symbols their semantics is not very clear + if (table[type.index] == kInvalidIndex) + table[type.index] = symbols_.size(); + } + }); + symbols_.push_back(sym); + return Result::Ok; +} +Result SymbolTable::Populate(const Module* module) { + auto add = [&](auto& table, auto make_sym) { + for (size_t i = 0; i < table.size(); ++i) { + auto sym = table[i]; + CHECK_RESULT(AddSymbol({sym->name_, sym->flags_, make_sym(i, sym)})); + } + return Result::Ok; + }; + add(module->funcs, [](Index i, auto&) { return Symbol::Function{i}; }); + add(module->tables, [](Index i, auto&) { return Symbol::Table{i}; }); + add(module->globals, [](Index i, auto&) { return Symbol::Global{i}; }); + add(module->tags, [](Index i, auto&) { return Symbol::Tag{i}; }); + for (size_t i = 0; i < module->data_symbols.size(); ++i) { + auto& sym = module->data_symbols[i]; + CHECK_RESULT( + AddSymbol({sym.name_, sym.flags_, + Symbol::Data{sym.segment, static_cast(sym.offset), + sym.size}})); + EnlargeFor(datas_, i); + datas_[i] = symbols().size() - 1; + } + + return Result::Ok; +} + const Export* Module::GetExport(std::string_view name) const { Index index = export_bindings.FindIndex(name); if (index >= exports.size()) { @@ -150,6 +216,10 @@ Index Module::GetElemSegmentIndex(const Var& var) const { return elem_segment_bindings.FindIndex(var); } +Index Module::GetDataSymIndex(const Var& var) const { + return data_symbol_bindings.FindIndex(var); +} + bool Module::IsImport(ExternalKind kind, const Var& var) const { switch (kind) { case ExternalKind::Func: @@ -312,6 +382,14 @@ ElemSegment* Module::GetElemSegment(const Var& var) { return elem_segments[index]; } +DataSym* Module::GetDataSym(const Var& var) { + Index index = data_symbol_bindings.FindIndex(var); + if (index >= elem_segments.size()) { + return nullptr; + } + return &data_symbols[index]; +} + const FuncType* Module::GetFuncType(const Var& var) const { return const_cast(this)->GetFuncType(var); } diff --git a/src/leb128.cc b/src/leb128.cc index 6c5a650fa9..ed2f2a26aa 100644 --- a/src/leb128.cc +++ b/src/leb128.cc @@ -141,6 +141,22 @@ void WriteU64Leb128(Stream* stream, uint64_t value, const char* desc) { stream->WriteData(data, length, desc); } +void WriteFixedU64Leb128(Stream* stream, uint64_t value, const char* desc) { + uint8_t data[MAX_U64_LEB128_BYTES]; + Offset length = 0; + LEB128_LOOP_UNTIL(length == MAX_U64_LEB128_BYTES); + stream->WriteData(data, length, desc); +} +void WriteFixedS64Leb128(Stream* stream, int64_t value, const char* desc) { + uint8_t data[MAX_U64_LEB128_BYTES]; + Offset length = 0; + LEB128_LOOP_UNTIL(length == MAX_U64_LEB128_BYTES); + stream->WriteData(data, length, desc); +} +void WriteFixedS64Leb128(Stream* stream, uint64_t value, const char* desc) { + WriteS64Leb128(stream, Bitcast(value), desc); +} + void WriteS64Leb128(Stream* stream, uint64_t value, const char* desc) { WriteS64Leb128(stream, Bitcast(value), desc); } diff --git a/src/resolve-names.cc b/src/resolve-names.cc index 67fc44e923..5ac824b4ce 100644 --- a/src/resolve-names.cc +++ b/src/resolve-names.cc @@ -81,6 +81,7 @@ class NameResolver : public ExprVisitor::DelegateNop { Result OnRethrowExpr(RethrowExpr*) override; Result OnSimdLoadLaneExpr(SimdLoadLaneExpr*) override; Result OnSimdStoreLaneExpr(SimdStoreLaneExpr*) override; + Result OnConstExpr(ConstExpr*) override; private: void PrintError(const Location* loc, const char* fmt, ...); @@ -100,7 +101,9 @@ class NameResolver : public ExprVisitor::DelegateNop { void ResolveTagVar(Var* var); void ResolveDataSegmentVar(Var* var); void ResolveElemSegmentVar(Var* var); + void ResolveDataVar(Var* var); void ResolveLocalVar(Var* var); + void ResolveReloc(IrReloc* reloc); void ResolveBlockDeclarationVar(BlockDeclaration* decl); void VisitFunc(Func* func); void VisitExport(Export* export_); @@ -219,6 +222,9 @@ void NameResolver::ResolveDataSegmentVar(Var* var) { void NameResolver::ResolveElemSegmentVar(Var* var) { ResolveVar(¤t_module_->elem_segment_bindings, var, "elem segment"); } +void NameResolver::ResolveDataVar(Var* var) { + ResolveVar(¤t_module_->data_symbol_bindings, var, "data symbol"); +} void NameResolver::ResolveLocalVar(Var* var) { if (var->is_name()) { @@ -236,6 +242,35 @@ void NameResolver::ResolveLocalVar(Var* var) { var->set_index(index); } } +void NameResolver::ResolveReloc(IrReloc* reloc) { + if (reloc->type == RelocType::None) + return; + switch (kRelocSymbolType[int(reloc->type)]) { + case RelocKind::Text: + case RelocKind::Function: + case RelocKind::FunctionTbl: + ResolveFuncVar(&reloc->symbol); + break; + case RelocKind::Data: + ResolveDataVar(&reloc->symbol); + break; + case RelocKind::Type: + ResolveFuncTypeVar(&reloc->symbol); + break; + case RelocKind::Table: + ResolveTableVar(&reloc->symbol); + break; + case RelocKind::Global: + ResolveGlobalVar(&reloc->symbol); + break; + case RelocKind::Tag: + ResolveTagVar(&reloc->symbol); + break; + case RelocKind::Section: + // Do nothing for now + break; + } +} void NameResolver::ResolveBlockDeclarationVar(BlockDeclaration* decl) { if (decl->has_func_type) { @@ -331,6 +366,7 @@ Result NameResolver::EndIfExpr(IfExpr* expr) { Result NameResolver::OnLoadExpr(LoadExpr* expr) { ResolveMemoryVar(&expr->memidx); + ResolveReloc(&expr->reloc); return Result::Ok; } @@ -430,6 +466,7 @@ Result NameResolver::OnRefFuncExpr(RefFuncExpr* expr) { Result NameResolver::OnStoreExpr(StoreExpr* expr) { ResolveMemoryVar(&expr->memidx); + ResolveReloc(&expr->reloc); return Result::Ok; } @@ -500,6 +537,10 @@ Result NameResolver::OnSimdStoreLaneExpr(SimdStoreLaneExpr* expr) { ResolveMemoryVar(&expr->memidx); return Result::Ok; } +Result NameResolver::OnConstExpr(ConstExpr* expr) { + ResolveReloc(&expr->reloc); + return Result::Ok; +} void NameResolver::VisitFunc(Func* func) { current_func_ = func; @@ -566,6 +607,8 @@ void NameResolver::VisitElemSegment(ElemSegment* segment) { void NameResolver::VisitDataSegment(DataSegment* segment) { ResolveMemoryVar(&segment->memory_var); visitor_.VisitExprList(segment->offset); + for (auto& [offset, reloc] : segment->relocs) + ResolveReloc(&reloc); } Result NameResolver::VisitModule(Module* module) { diff --git a/src/tools/wasm2wat.cc b/src/tools/wasm2wat.cc index 25e1c743c6..015865fff5 100644 --- a/src/tools/wasm2wat.cc +++ b/src/tools/wasm2wat.cc @@ -46,6 +46,7 @@ static bool s_read_debug_names = true; static bool s_fail_on_custom_section_error = true; static std::unique_ptr s_log_stream; static bool s_validate = true; +static bool s_relocatable = false; static const char s_description[] = R"( Read a file in the WebAssembly binary format, and convert it to @@ -96,6 +97,8 @@ static void ParseOptions(int argc, char** argv) { s_infile = argument; ConvertBackslashToSlash(&s_infile); }); + parser.AddOption('r', "relocatable", "Generate relocation annotations", + []() { s_relocatable = true; }); parser.Parse(argc, argv); } @@ -138,6 +141,7 @@ int ProgramMain(int argc, char** argv) { wat_options.fold_exprs = s_fold_exprs; wat_options.inline_import = s_inline_import; wat_options.inline_export = s_inline_export; + wat_options.relocatable = s_relocatable; FileStream stream(!s_outfile.empty() ? FileStream(s_outfile) : FileStream(stdout)); result = WriteWat(&stream, &module, wat_options); diff --git a/src/wast-parser.cc b/src/wast-parser.cc index b452697bfa..5e49fb2753 100644 --- a/src/wast-parser.cc +++ b/src/wast-parser.cc @@ -587,7 +587,8 @@ TokenType WastParser::Peek(size_t n) { } if ((options_->features.code_metadata_enabled() && cur.text().find("metadata.code.") == 0) || - cur.text() == "custom") { + cur.text() == "custom" || cur.text() == "reloc" || + cur.text() == "sym.import.data" || cur.text() == "sym") { tokens_.push_back(cur); continue; } @@ -657,6 +658,26 @@ bool WastParser::MatchLpar(TokenType type) { return false; } +bool WastParser::MatchText(TokenType type, std::string_view text) { + auto tok = GetToken(); + if (tok.token_type() == type && tok.text() == text) { + Consume(); + return true; + } + return false; +} +std::optional WastParser::MatchTextPrefix( + TokenType type, + std::string_view prefix) { + auto tok = GetToken(); + if (tok.token_type() == type) + if (auto rest = TryTrimPfx(tok.text(), prefix)) { + Consume(); + return rest; + } + return std::nullopt; +} + Result WastParser::Expect(TokenType type) { if (!Match(type)) { Token token = Consume(); @@ -693,6 +714,14 @@ Result WastParser::Synchronize(SynchronizeFunc func) { return Result::Error; } +std::optional WastParser::TryTrimPfx( + std::string_view string, + std::string_view prefix) { + if (string.substr(0, prefix.size()) == prefix) + return string.substr(prefix.size()); + return std::nullopt; +} + void WastParser::ErrorUnlessOpcodeEnabled(const Token& token) { Opcode opcode = token.opcode(); if (!opcode.IsEnabled(options_->features)) { @@ -1297,6 +1326,134 @@ bool WastParser::PeekIsCustom() { return options_->features.annotations_enabled() && IsLparAnn(PeekPair()) && tokens_.front().text() == "custom"; } +bool WastParser::PeekIsDataImport() { + // If IsLparAnn succeeds, tokens_.front() must have text, as it is an LparAnn + // token. + return options_->features.annotations_enabled() && IsLparAnn(PeekPair()) && + tokens_.front().text() == "sym.import.data"; +} + +Result WastParser::ParseSymAfterPar(SymbolCommon* sym, + bool in_import, + DatasymAux* data) { + using OnceProperty = std::pair>; + Location last_tok_loc; + + OnceProperty visibility{"visibility", {}}; + OnceProperty binding{"linkage", {}}; + OnceProperty retain{"retain", {}}; + OnceProperty name{"name", {}}; + OnceProperty size{"size", {}}; + + auto check_once = [this, &last_tok_loc](OnceProperty& var) { + if (!var.second) + var.second = last_tok_loc; + else { + Error(last_tok_loc, "Symbol's " PRIstringview " already specified", + WABT_PRINTF_STRING_VIEW_ARG(var.first)); + Error(*var.second, "See previous definition"); + } + }; + auto check_seen = [this, &last_tok_loc](OnceProperty& var) { + if (!var.second) + Error(last_tok_loc, "Must specify " PRIstringview " for this symbol", + WABT_PRINTF_STRING_VIEW_ARG(var.first)); + }; + auto check_unseen = [this](OnceProperty& var) { + if (var.second) + Error(*var.second, "Cannot specify " PRIstringview " for this symbol", + WABT_PRINTF_STRING_VIEW_ARG(var.first)); + }; + + auto validate = [&] { + if (in_import && (sym->flags_ & uint32_t(SymbolBinding::Local))) { + Error(*visibility.second, "static symbol cannot be an import"); + } + if (data) { + if (!in_import) + check_seen(size); + check_seen(name); + } else { + check_unseen(size); + } + }; + + if (data) { + ParseVarOpt(&data->name, data->name); + } + for (;;) { + last_tok_loc = GetLocation(); + if (Match(TokenType::Rpar)) { + validate(); + return Result::Ok; + } else if (MatchText(TokenType::Reserved, "static")) { + check_once(binding); + sym->flags_ |= uint32_t(SymbolBinding::Local); + } else if (MatchText(TokenType::Reserved, "weak")) { + check_once(binding); + sym->flags_ |= uint32_t(SymbolBinding::Weak); + } else if (MatchText(TokenType::Reserved, "retain")) { + check_once(retain); + sym->flags_ |= WABT_SYMBOL_FLAG_NO_STRIP; + } else if (auto sym_name = MatchTextPrefix(TokenType::Reserved, "name=")) { + check_once(name); + RemoveEscapes(*sym_name, std::back_inserter(sym->name_)); + sym->flags_ |= WABT_SYMBOL_FLAG_EXPLICIT_NAME; + } else if (auto sym_size = MatchTextPrefix(TokenType::Reserved, "size=")) { + check_once(size); + CHECK_RESULT(ParseUint64(*sym_size, &data->size)); + } else if (MatchText(TokenType::Reserved, "hidden")) { + check_once(visibility); + sym->flags_ |= uint32_t(SymbolVisibility::Hidden); + } else { + ErrorExpected({"symbol attribute", "')'"}); + } + } +} + +Result WastParser::ParseSymOpt(SymbolCommon* sym, + bool in_import, + DatasymAux* dat_sym) { + sym->flags_ |= in_import ? WABT_SYMBOL_FLAG_UNDEFINED : 0; + if (!IsLparAnn(PeekPair())) + return Result::Ok; + Token tok = GetToken(); + if (tok.text() != "sym") + return Result::Ok; + Consume(); + return ParseSymAfterPar(sym, in_import, dat_sym); +} + +Result WastParser::ParseDataImport(Module* module) { + DataSym sym; + DatasymAux aux; + sym.flags_ |= WABT_SYMBOL_FLAG_UNDEFINED; + if (!IsLparAnn(PeekPair())) + return Result::Ok; + Token tok = GetToken(); + if (tok.text() != "sym.import.data") + return Result::Ok; + Consume(); + CHECK_RESULT(ParseSymAfterPar(&sym, true, &aux)); + + if (!module->data_symbols.empty()) { + if (module->data_symbols.back().segment != kInvalidIndex) { + Error(GetLocation(), "data imports must occur before definitions"); + return Result::Error; + } + } + ++module->num_data_imports; + sym.segment = kInvalidIndex; + sym.offset = module->num_data_imports; + Index sym_idx = module->data_symbols.size(); + if (aux.name.is_name()) { + module->data_symbol_bindings.insert( + {aux.name.name(), {aux.name.loc, sym_idx}}); + sym.name = aux.name.name(); + } + module->data_symbols.push_back(sym); + return Result::Ok; +} Result WastParser::ResolveRefTypes(const Module& module, TypeVector* types, @@ -1358,11 +1515,15 @@ Result WastParser::ParseModuleFieldList(Module* module) { resolve_types_.clear(); resolve_funcs_.clear(); - while (IsModuleField(PeekPair()) || PeekIsCustom()) { + while (IsModuleField(PeekPair()) || PeekIsCustom() || PeekIsDataImport()) { if (PeekIsCustom()) { CHECK_RESULT(ParseCustomSectionAnnotation(module)); continue; } + if (PeekIsDataImport()) { + CHECK_RESULT(ParseDataImport(module)); + continue; + } if (Failed(ParseModuleField(module))) { CHECK_RESULT(Synchronize(IsModuleField)); } @@ -1383,6 +1544,51 @@ Result WastParser::ParseModuleFieldList(Module* module) { CHECK_RESULT(result); CHECK_RESULT(ResolveFuncTypes(module, errors_)); CHECK_RESULT(ResolveNamesModule(module, errors_)); + for (auto exp : module->exports) { + auto patch = [&](auto& fields, const BindingHash& bindings) { + Index i = bindings.FindIndex(exp->var); + if (i >= fields.size()) + return; + SymbolCommon& sym = *fields[i]; + sym.flags_ |= WABT_SYMBOL_FLAG_EXPORTED | WABT_SYMBOL_FLAG_NO_STRIP; + if (sym.name_.empty() && sym.defined()) { + sym.name_ = exp->name; + sym.flags_ |= WABT_SYMBOL_FLAG_EXPLICIT_NAME; + } + }; + switch (exp->kind) { + case ExternalKind::Func: + patch(module->funcs, module->func_bindings); + break; + case ExternalKind::Table: + patch(module->tables, module->table_bindings); + break; + case ExternalKind::Global: + patch(module->globals, module->global_bindings); + break; + case ExternalKind::Tag: + patch(module->tags, module->tag_bindings); + break; + case ExternalKind::Memory: + // Memories are not relocatable + break; + } + } + auto validize_flags = [](SymbolCommon* sym) { + if (!sym->undefined() && !sym->exported() && sym->name().empty()) { + sym->flags_ |= uint32_t(SymbolVisibility::Hidden); + sym->flags_ &= ~WABT_SYMBOL_MASK_BINDING; + sym->flags_ |= uint32_t(SymbolBinding::Local); + } + }; + for (auto sym : module->funcs) + validize_flags(sym); + for (auto sym : module->globals) + validize_flags(sym); + for (auto sym : module->tables) + validize_flags(sym); + for (auto sym : module->tags) + validize_flags(sym); return Result::Ok; } @@ -1433,7 +1639,50 @@ Result WastParser::ParseDataModuleField(Module* module) { field->data_segment.kind = SegmentKind::Passive; } - ParseTextListOpt(&field->data_segment.data); + field->data_segment.symbol_range.first = module->data_symbols.size(); + + for (;;) { + Token tok = GetToken(); + if (tok.token_type() == TokenType::Rpar) + break; + if (tok.token_type() == TokenType::LparAnn) { + size_t offset = field->data_segment.data.size(); + if (tok.text() == "reloc") { + IrReloc r; + ParseReloc(&r); + size_t reloc_size = + kRelocDataTypeSize[int(kRelocDataType[int(r.type)])]; + field->data_segment.relocs.push_back({offset - reloc_size, r}); + continue; + } + if (tok.text() == "sym") { + DataSym sym; + Index sym_idx = module->data_symbols.size(); + DatasymAux aux = {Var{sym_idx, GetLocation()}, 0}; + ParseSymOpt(&sym, false, &aux); + sym.segment = module->data_segments.size(); + sym.offset = offset; + sym.size = aux.size; + if (aux.name.is_name()) { + module->data_symbol_bindings.insert( + {aux.name.name(), {aux.name.loc, sym_idx}}); + sym.name = aux.name.name(); + } + module->data_symbols.push_back(sym); + continue; + } + } + if (PeekMatch(TokenType::Text)) { + RemoveEscapes(Consume().text(), + std::back_inserter(field->data_segment.data)); + continue; + } + Expect(TokenType::Rpar); + return Result::Error; + } + + field->data_segment.symbol_range.second = module->data_symbols.size(); + EXPECT(Rpar); module->AppendField(std::move(field)); return Result::Ok; @@ -1534,6 +1783,10 @@ Result WastParser::ParseTagModuleField(Module* module) { module->AppendField(std::move(field)); } else { auto field = std::make_unique(loc, name); + Tag& tag = field->tag; + CHECK_RESULT(ParseSymOpt(&tag, false)); + if (!name.empty() && !tag.explicit_name()) + tag.name_ = name.substr(1); CHECK_RESULT(ParseTypeUseOpt(&field->tag.decl)); CHECK_RESULT(ParseUnboundFuncSignature(&field->tag.decl.sig)); module->AppendField(std::move(field)); @@ -1572,6 +1825,7 @@ Result WastParser::ParseFuncModuleField(Module* module) { CheckImportOrdering(module); auto import = std::make_unique(name); Func& func = import->func; + CHECK_RESULT(ParseSymOpt(&func, true)); CHECK_RESULT(ParseInlineImport(import.get())); CHECK_RESULT(ParseTypeUseOpt(&func.decl)); CHECK_RESULT(ParseFuncSignature(&func.decl.sig, &func.bindings)); @@ -1583,6 +1837,9 @@ Result WastParser::ParseFuncModuleField(Module* module) { auto field = std::make_unique(loc, name); Func& func = field->func; func.loc = GetLocation(); + CHECK_RESULT(ParseSymOpt(&func, false)); + if (!name.empty() && !func.explicit_name()) + func.name_ = name.substr(1); CHECK_RESULT(ParseTypeUseOpt(&func.decl)); CHECK_RESULT(ParseFuncSignature(&func.decl.sig, &func.bindings)); @@ -1712,6 +1969,10 @@ Result WastParser::ParseGlobalModuleField(Module* module) { module->AppendField(std::move(field)); } else { auto field = std::make_unique(loc, name); + Global& global = field->global; + CHECK_RESULT(ParseSymOpt(&global, false)); + if (!name.empty() && !global.explicit_name()) + global.name_ = name.substr(1); CHECK_RESULT(ParseGlobalType(&field->global)); CHECK_RESULT(ParseTerminatingInstrList(&field->global.init_expr)); module->AppendField(std::move(field)); @@ -1735,6 +1996,11 @@ Result WastParser::ParseImportModuleField(Module* module) { CHECK_RESULT(ParseQuotedText(&field_name)); EXPECT(Lpar); + auto inject_name = [&](SymbolCommon& sym) { + if (!sym.explicit_name()) + sym.name_ = field_name; + }; + std::unique_ptr field; std::string name; @@ -1743,11 +2009,13 @@ Result WastParser::ParseImportModuleField(Module* module) { Consume(); ParseBindVarOpt(&name); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->func, true)); CHECK_RESULT(ParseTypeUseOpt(&import->func.decl)); CHECK_RESULT( ParseFuncSignature(&import->func.decl.sig, &import->func.bindings)); CHECK_RESULT(ErrorIfLpar({"param", "result"})); EXPECT(Rpar); + inject_name(import->func); field = std::make_unique(std::move(import), loc); break; } @@ -1756,10 +2024,12 @@ Result WastParser::ParseImportModuleField(Module* module) { Consume(); ParseBindVarOpt(&name); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->table, true)); CHECK_RESULT(ParseLimitsIndex(&import->table.elem_limits)); CHECK_RESULT(ParseLimits(&import->table.elem_limits)); CHECK_RESULT(ParseRefType(&import->table.elem_type)); EXPECT(Rpar); + inject_name(import->table); field = std::make_unique(std::move(import), loc); break; } @@ -1781,8 +2051,10 @@ Result WastParser::ParseImportModuleField(Module* module) { Consume(); ParseBindVarOpt(&name); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->global, true)); CHECK_RESULT(ParseGlobalType(&import->global)); EXPECT(Rpar); + inject_name(import->global); field = std::make_unique(std::move(import), loc); break; } @@ -1791,9 +2063,11 @@ Result WastParser::ParseImportModuleField(Module* module) { Consume(); ParseBindVarOpt(&name); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->tag, true)); CHECK_RESULT(ParseTypeUseOpt(&import->tag.decl)); CHECK_RESULT(ParseUnboundFuncSignature(&import->tag.decl.sig)); EXPECT(Rpar); + inject_name(import->tag); field = std::make_unique(std::move(import), loc); break; } @@ -1905,6 +2179,7 @@ Result WastParser::ParseTableModuleField(Module* module) { if (PeekMatchLpar(TokenType::Import)) { CheckImportOrdering(module); auto import = std::make_unique(name); + CHECK_RESULT(ParseSymOpt(&import->table, true)); CHECK_RESULT(ParseInlineImport(import.get())); CHECK_RESULT(ParseLimitsIndex(&import->table.elem_limits)); CHECK_RESULT(ParseLimits(&import->table.elem_limits)); @@ -1915,6 +2190,9 @@ Result WastParser::ParseTableModuleField(Module* module) { } else { auto field = std::make_unique(loc, name); auto& table = field->table; + CHECK_RESULT(ParseSymOpt(&table, false)); + if (!name.empty() && !table.explicit_name()) + table.name_ = name.substr(1); CHECK_RESULT(ParseLimitsIndex(&table.elem_limits)); if (PeekMatch(TokenType::ValueType)) { Type elem_type; @@ -2124,9 +2402,17 @@ Result WastParser::ParseInstrList(ExprList* exprs) { CHECK_RESULT(Synchronize(IsInstr)); } } else if (IsLparAnn(pair)) { - if (Succeeded(ParseCodeMetadataAnnotation(&new_exprs))) { - exprs->splice(exprs->end(), new_exprs); + Token tk = GetToken(); + constexpr std::string_view pfx = "metadata.code."; + std::string_view name = tk.text(); + if (name.substr(0, size(pfx)) == pfx) { + if (Succeeded(ParseCodeMetadataAnnotation(&new_exprs))) { + exprs->splice(exprs->end(), new_exprs); + } else { + CHECK_RESULT(Synchronize(IsLparAnn)); + } } else { + ErrorExpected({"an annotation", "an instruction"}); CHECK_RESULT(Synchronize(IsLparAnn)); } } else { @@ -2166,11 +2452,169 @@ Result WastParser::ParseInstr(ExprList* exprs) { } } +Result WastParser::ParseRejectReloc() { + Token tok = GetToken(); + if (tok.token_type() == TokenType::LparAnn && tok.text() == "reloc") { + Error(GetLocation(), "Operand is not relocatable"); + Consume(); + return ParseUnwindReloc(1); + } + return Result::Ok; +} +Result WastParser::ParseUnwindReloc(int curr_indent) { + while (curr_indent) { + if (PeekMatch(TokenType::Lpar) || PeekMatch(TokenType::LparAnn)) + ++curr_indent; + if (PeekMatch(TokenType::Rpar)) + --curr_indent; + Consume(); + } + return Result::Ok; +} +Result WastParser::ParseRelocAfterType(IrReloc* reloc, RelocDataType type) { + RelocKind kind; + CHECK_RESULT(ParseRelocKind(&kind)); + RelocModifiers mod; + CHECK_RESULT(ParseRelocModifiers(&mod)); + RelocType reloc_type = RecognizeReloc(kind, type, mod); + if (reloc_type == RelocType::None) { + Error(GetLocation(), "Invalid relocation"); + return ParseUnwindReloc(1); + } + Var target; + ParseVar(&target); + *reloc = {reloc_type, target}; + CHECK_RESULT(Expect(TokenType::Rpar)); + return Result::Ok; +} +Result WastParser::ParseRelocModifiers(RelocModifiers* mod) { + *mod = RelocModifiers::None; + Token tok = GetToken(); + if (tok.token_type() == TokenType::Reserved) { + if (tok.text() == "tls") + *mod = RelocModifiers::TLS; + else if (tok.text() == "pic") + *mod = RelocModifiers::PIC; + } + if (*mod != RelocModifiers::None) + Consume(); + return Result::Ok; +} + +Result WastParser::ParseRelocKind(RelocKind* kind) { + bool did_reloc = false; + Token tok = GetToken(); + TokenType tt = tok.token_type(); + if (tt == TokenType::Global) { + *kind = RelocKind::Global; + did_reloc = true; + } + if (tt == TokenType::Function) { + *kind = RelocKind::Function; + did_reloc = true; + } + if (tt == TokenType::Table) { + *kind = RelocKind::Table; + did_reloc = true; + } + if (tt == TokenType::Tag) { + *kind = RelocKind::Tag; + did_reloc = true; + } + if (tt == TokenType::Data) { + *kind = RelocKind::Data; + did_reloc = true; + } + if (tt == TokenType::Type) { + *kind = RelocKind::Type; + did_reloc = true; + } + if (tt == TokenType::Reserved) { + if (tok.text() == "text") { + *kind = RelocKind::Text; + did_reloc = true; + } + if (tok.text() == "functable") { + *kind = RelocKind::FunctionTbl; + did_reloc = true; + } + if (tok.text() == "custom") { + *kind = RelocKind::Section; + did_reloc = true; + } + } + if (did_reloc) { + Consume(); + return Result::Ok; + } else + return Result::Error; +} +Result WastParser::ParseRelocDataType(RelocDataType* type) { + bool did_reloc = false; + Token tok = GetToken(); + TokenType tt = tok.token_type(); + if (tt == TokenType::ValueType) { + if (tok.type() == Type::I32) { + *type = RelocDataType::I32; + did_reloc = true; + } + if (tok.type() == Type::I64) { + *type = RelocDataType::I64; + did_reloc = true; + } + } + if (tt == TokenType::Reserved) { + if (tok.text() == "leb") { + *type = RelocDataType::LEB; + did_reloc = true; + } + if (tok.text() == "sleb") { + *type = RelocDataType::SLEB; + did_reloc = true; + } + if (tok.text() == "leb64") { + *type = RelocDataType::LEB64; + did_reloc = true; + } + if (tok.text() == "sleb64") { + *type = RelocDataType::SLEB64; + did_reloc = true; + } + } + if (did_reloc) { + Consume(); + return Result::Ok; + } else + return Result::Error; +} +Result WastParser::ParseReloc(IrReloc* reloc) { + Token tok = GetToken(); + if (tok.token_type() == TokenType::LparAnn && tok.text() == "reloc") { + Consume(); + RelocDataType t; + CHECK_RESULT(ParseRelocDataType(&t)); + return ParseRelocAfterType(reloc, t); + } + return Result::Ok; +} +Result WastParser::ParseReloc(IrReloc* reloc, RelocDataType type) { + Token tok = GetToken(); + if (tok.token_type() == TokenType::LparAnn && tok.text() == "reloc") { + Consume(); + return ParseRelocAfterType(reloc, type); + } + return Result::Ok; +} + Result WastParser::ParseCodeMetadataAnnotation(ExprList* exprs) { WABT_TRACE(ParseCodeMetadataAnnotation); Token tk = Consume(); + constexpr std::string_view pfx = "metadata.code."; std::string_view name = tk.text(); - name.remove_prefix(sizeof("metadata.code.") - 1); + assert(name.substr(0, size(pfx)) == pfx && + "ParseCodeMetadataAnnotation should only be called with appropriate " + "annotation"); + name.remove_prefix(size(pfx)); std::string data_text; CHECK_RESULT(ParseQuotedText(&data_text, false)); std::vector data(data_text.begin(), data_text.end()); @@ -2220,14 +2664,24 @@ template Result WastParser::ParseLoadStoreInstr(Location loc, Token token, std::unique_ptr* out_expr) { + constexpr bool relocatable = + std::is_same_v || std::is_same_v; Opcode opcode = token.opcode(); Var memidx; Address offset; Address align; + IrReloc reloc; CHECK_RESULT(ParseMemidx(loc, &memidx)); ParseOffsetOpt(&offset); + if constexpr (relocatable) { + CHECK_RESULT(ParseReloc(&reloc, RelocDataType::LEB)); + } ParseAlignOpt(&align); - out_expr->reset(new T(opcode, memidx, align, offset, loc)); + T* expr = new T(opcode, memidx, align, offset, loc); + if constexpr (relocatable) { + expr->reloc = reloc; + } + out_expr->reset(expr); return Result::Ok; } @@ -2447,7 +2901,14 @@ Result WastParser::ParsePlainInstr(std::unique_ptr* out_expr) { case TokenType::Const: { Const const_; CHECK_RESULT(ParseConst(&const_, ConstType::Normal)); - out_expr->reset(new ConstExpr(const_, loc)); + auto expr = new ConstExpr(const_, loc); + out_expr->reset(expr); + if (const_.type() == Type::I64) + CHECK_RESULT(ParseReloc(&expr->reloc, RelocDataType::SLEB64)); + else if (const_.type() == Type::I32) + CHECK_RESULT(ParseReloc(&expr->reloc, RelocDataType::SLEB)); + else + CHECK_RESULT(ParseRejectReloc()); break; } @@ -3813,7 +4274,7 @@ Result WastParser::ParseScriptModule( auto tsm = std::make_unique(); tsm->module.name = name; tsm->module.loc = loc; - if (IsModuleField(PeekPair()) || PeekIsCustom()) { + if (IsModuleField(PeekPair()) || PeekIsCustom() || PeekIsDataImport()) { CHECK_RESULT(ParseModuleFieldList(&tsm->module)); } else if (!PeekMatch(TokenType::Rpar)) { ConsumeIfLpar(); diff --git a/src/wat-writer.cc b/src/wat-writer.cc index f19e3c3c86..dab08cd6dd 100644 --- a/src/wat-writer.cc +++ b/src/wat-writer.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -136,7 +137,7 @@ class WatWriter : ModuleContext { const Block& block, const char* text); void WriteEndBlock(); - void WriteConst(const Const& const_); + void WriteConst(const ConstExpr& const_); void WriteExpr(const Expr* expr); template void WriteLoadStoreExpr(const Expr* expr); @@ -149,6 +150,9 @@ class WatWriter : ModuleContext { const T& types, const std::vector& index_to_name, Index binding_index_offset = 0); + void WriteRelocAttrs(const SymbolCommon& sym); + void WriteReloc(const IrReloc& reloc, bool require_type = false); + void WriteDataImports(); void WriteBeginFunc(const Func& func); void WriteFunc(const Func& func); void WriteBeginGlobal(const Global& global); @@ -469,17 +473,20 @@ void WatWriter::WriteEndBlock() { WritePutsNewline(Opcode::End_Opcode.GetName()); } -void WatWriter::WriteConst(const Const& const_) { +void WatWriter::WriteConst(const ConstExpr& expr) { + const Const& const_ = expr.const_; switch (const_.type()) { case Type::I32: WritePutsSpace(Opcode::I32Const_Opcode.GetName()); Writef("%d", static_cast(const_.u32())); + WriteReloc(expr.reloc); WriteNewline(NO_FORCE_NEWLINE); break; case Type::I64: WritePutsSpace(Opcode::I64Const_Opcode.GetName()); Writef("%" PRId64, static_cast(const_.u64())); + WriteReloc(expr.reloc); WriteNewline(NO_FORCE_NEWLINE); break; @@ -539,6 +546,7 @@ void WatWriter::WriteMemoryLoadStoreExpr(const Expr* expr) { if (typed_expr->offset) { Writef("offset=%" PRIaddress, typed_expr->offset); } + WriteReloc(typed_expr->reloc); if (!typed_expr->opcode.IsNaturallyAligned(typed_expr->align)) { Writef("align=%" PRIaddress, typed_expr->align); } @@ -705,7 +713,7 @@ Result WatWriter::ExprVisitorDelegate::OnCompareExpr(CompareExpr* expr) { } Result WatWriter::ExprVisitorDelegate::OnConstExpr(ConstExpr* expr) { - writer_->WriteConst(expr->const_); + writer_->WriteConst(*expr); return Result::Ok; } @@ -1435,9 +1443,115 @@ void WatWriter::WriteTypeBindings(const char* prefix, } } +void WatWriter::WriteRelocAttrs(const SymbolCommon& sym) { + if (sym.binding() == SymbolBinding::Weak) + WritePutsSpace("weak"); + if (sym.binding() == SymbolBinding::Local) + WritePutsSpace("static"); + else { + if (sym.visibility() == SymbolVisibility::Hidden) + WritePutsSpace("hidden"); + } + if (sym.no_strip()) + WritePutsSpace("retain"); + if (sym.exported()) + WritePutsSpace("export"); + if (!sym.name().empty()) { + WritePuts("name=", NextChar::None); + WriteQuotedString(sym.name(), NextChar::Space); + } +} + +void WatWriter::WriteReloc(const IrReloc& reloc, bool require_type) { + if (reloc.type == RelocType::None || !options_.relocatable) + return; + WriteOpenSpace("@reloc"); + if (require_type) + switch (kRelocDataType[int(reloc.type)]) { + case RelocDataType::I32: + WritePutsSpace("i32"); + break; + case RelocDataType::I64: + WritePutsSpace("i64"); + break; + case RelocDataType::LEB: + WritePutsSpace("leb"); + break; + case RelocDataType::SLEB: + WritePutsSpace("sleb"); + break; + case RelocDataType::LEB64: + WritePutsSpace("leb64"); + break; + case RelocDataType::SLEB64: + WritePutsSpace("sleb64"); + break; + } + switch (kRelocSymbolType[int(reloc.type)]) { + case RelocKind::Function: + WritePutsSpace("func"); + break; + case RelocKind::Data: + WritePutsSpace("data"); + break; + case RelocKind::Global: + WritePutsSpace("global"); + break; + case RelocKind::FunctionTbl: + WritePutsSpace("functable"); + break; + case RelocKind::Table: + WritePutsSpace("table"); + break; + case RelocKind::Tag: + WritePutsSpace("tag"); + break; + case RelocKind::Type: + WritePutsSpace("type"); + break; + case RelocKind::Text: + WritePutsSpace("text"); + break; + case RelocKind::Section: + WritePutsSpace("section"); + break; + default: + WABT_UNREACHABLE; + } + + if (bool(kRelocModifiers[int(reloc.type)] & RelocModifiers::TLS)) + WritePutsSpace("tls"); + if (bool(kRelocModifiers[int(reloc.type)] & RelocModifiers::PIC)) + WritePutsSpace("pic"); + + WriteVar(reloc.symbol, NextChar::None); + if (reloc.addend) + Writef("+%u", reloc.addend); + WriteCloseSpace(); +} +void WatWriter::WriteDataImports() { + for (Index i = 0; i != module.num_data_imports; ++i) { + const DataSym& sym = module.data_symbols[i]; + WriteOpenSpace("@sym.import.data"); + if (!sym.name.empty()) + WriteName(sym.name, NextChar::Space); + WriteRelocAttrs(sym); + WriteCloseNewline(); + } +} + void WatWriter::WriteBeginFunc(const Func& func) { + bool import = module.IsImport(ExternalKind::Func, Var(func_index_, {})); WriteOpenSpace("func"); WriteNameOrIndex(func.name, func_index_, NextChar::Space); + + if ((func.non_default(import) || func.priority) && options_.relocatable) { + WriteOpenSpace("@sym"); + WriteRelocAttrs(func); + if (func.priority.has_value()) + Writef("init=%u", *func.priority); + WriteCloseSpace(); + } WriteInlineExports(ExternalKind::Func, func_index_); WriteInlineImport(ExternalKind::Func, func_index_); if (func.decl.has_func_type) { @@ -1446,7 +1560,7 @@ void WatWriter::WriteBeginFunc(const Func& func) { WriteCloseSpace(); } - if (module.IsImport(ExternalKind::Func, Var(func_index_, Location()))) { + if (import) { // Imported functions can be written a few ways: // // 1. (import "module" "field" (func (type 0))) @@ -1489,8 +1603,14 @@ void WatWriter::WriteFunc(const Func& func) { } void WatWriter::WriteBeginGlobal(const Global& global) { + bool import = module.IsImport(ExternalKind::Global, Var(func_index_, {})); WriteOpenSpace("global"); WriteNameOrIndex(global.name, global_index_, NextChar::Space); + if (global.non_default(import) && options_.relocatable) { + WriteOpenSpace("@sym"); + WriteRelocAttrs(global); + WriteCloseSpace(); + } WriteInlineExports(ExternalKind::Global, global_index_); WriteInlineImport(ExternalKind::Global, global_index_); if (global.mutable_) { @@ -1510,8 +1630,14 @@ void WatWriter::WriteGlobal(const Global& global) { } void WatWriter::WriteTag(const Tag& tag) { + bool import = module.IsImport(ExternalKind::Tag, Var(func_index_, {})); WriteOpenSpace("tag"); WriteNameOrIndex(tag.name, tag_index_, NextChar::Space); + if (tag.non_default(import) && options_.relocatable) { + WriteOpenSpace("@sym"); + WriteRelocAttrs(tag); + WriteCloseSpace(); + } WriteInlineExports(ExternalKind::Tag, tag_index_); WriteInlineImport(ExternalKind::Tag, tag_index_); if (tag.decl.has_func_type) { @@ -1538,8 +1664,14 @@ void WatWriter::WriteLimits(const Limits& limits) { } void WatWriter::WriteTable(const Table& table) { + bool import = module.IsImport(ExternalKind::Table, Var(func_index_, {})); WriteOpenSpace("table"); WriteNameOrIndex(table.name, table_index_, NextChar::Space); + if (table.non_default(import) && options_.relocatable) { + WriteOpenSpace("@sym"); + WriteRelocAttrs(table); + WriteCloseSpace(); + } WriteInlineExports(ExternalKind::Table, table_index_); WriteInlineImport(ExternalKind::Table, table_index_); WriteLimits(table.elem_limits); @@ -1622,7 +1754,43 @@ void WatWriter::WriteDataSegment(const DataSegment& segment) { } WriteInitExpr(segment.offset); } - WriteQuotedData(segment.data.data(), segment.data.size()); + Offset offset = 0, next_sym = 0, next_reloc = 0; + constexpr auto end_offset = std::numeric_limits::max(); + Index curr_sym = segment.symbol_range.first; + auto curr_reloc = begin(segment.relocs); + bool written_some_data = false; + for (;;) { + next_reloc = curr_reloc != end(segment.relocs) && options_.relocatable + ? curr_reloc->first + + kRelocDataTypeSize[int( + kRelocDataType[int(curr_reloc->second.type)])] + : end_offset; + next_sym = curr_sym != segment.symbol_range.second && options_.relocatable + ? module.data_symbols[curr_sym].offset + : end_offset; + if (offset == next_reloc) { + WriteReloc(curr_reloc->second, true); + ++curr_reloc; + continue; + } + if (offset == next_sym) { + WriteOpenSpace("@sym"); + WriteName(module.data_symbols[curr_sym].name, NextChar::Space); + Writef("size=%" PRIaddress, module.data_symbols[curr_sym].size); + WriteRelocAttrs(module.data_symbols[curr_sym]); + WriteCloseSpace(); + ++curr_sym; + continue; + } + if (offset == segment.data.size() && written_some_data) + // if we have no relocs/syms left, and there's also no data, leave + break; + Offset write_to = + std::min(segment.data.size(), std::min(next_reloc, next_sym)); + WriteQuotedData(segment.data.data() + offset, write_to - offset); + offset = write_to; + written_some_data = true; + } WriteCloseNewline(); data_segment_index_++; } @@ -1745,6 +1913,7 @@ Result WatWriter::WriteModule() { } else { WriteName(module.name, NextChar::Newline); } + WriteDataImports(); for (const ModuleField& field : module.fields) { switch (field.type()) { case ModuleFieldType::Func: @@ -1784,6 +1953,10 @@ Result WatWriter::WriteModule() { } if (options_.features.annotations_enabled()) { for (const Custom& custom : module.customs) { + if (custom.name == "linking") + continue; + if (std::string_view{custom.name}.substr(0, 6) == "reloc." && options_.relocatable) + continue; WriteCustom(custom); } } diff --git a/test/dump/relocations-all-features.txt b/test/dump/relocations-all-features.txt index 0b22b64184..b6d6dc48fd 100644 --- a/test/dump/relocations-all-features.txt +++ b/test/dump/relocations-all-features.txt @@ -59,7 +59,7 @@ Custom: - symbol table [count=5] - 0: F <__extern.foo> func=0 [ undefined binding=global vis=default ] - 1: F <__extern.bar> func=1 [ undefined binding=global vis=default ] - - 2: F func=2 [ exported no_strip binding=global vis=hidden ] + - 2: F func=2 [ exported no_strip binding=global vis=default ] - 3: T <> table=0 [ binding=local vis=hidden ] - 4: G global=0 [ binding=global vis=default ] Custom: diff --git a/test/dump/relocations-block-types.txt b/test/dump/relocations-block-types.txt index 5192b77488..9089b5b462 100644 --- a/test/dump/relocations-block-types.txt +++ b/test/dump/relocations-block-types.txt @@ -30,7 +30,7 @@ Code[1]: Custom: - name: "linking" - symbol table [count=1] - - 0: F func=0 [ exported no_strip binding=global vis=hidden ] + - 0: F func=0 [ exported no_strip binding=global vis=default ] Custom: - name: "reloc.Code" - relocations for section: 3 (Code) [1] diff --git a/test/dump/relocations-section-target.txt b/test/dump/relocations-section-target.txt index 112b655315..075bb9d235 100644 --- a/test/dump/relocations-section-target.txt +++ b/test/dump/relocations-section-target.txt @@ -27,7 +27,7 @@ Custom: - name: "linking" - symbol table [count=2] - 0: F func=0 [ undefined binding=global vis=default ] - - 1: F func=1 [ exported no_strip binding=global vis=hidden ] + - 1: F func=1 [ exported no_strip binding=global vis=default ] Custom: - name: "reloc.Code" - relocations for section: 4 (Code) [1] diff --git a/test/dump/relocations.txt b/test/dump/relocations.txt index 9114e3562f..ead46a0a43 100644 --- a/test/dump/relocations.txt +++ b/test/dump/relocations.txt @@ -59,7 +59,7 @@ Custom: - symbol table [count=5] - 0: F <__extern.foo> func=0 [ undefined binding=global vis=default ] - 1: F <__extern.bar> func=1 [ undefined binding=global vis=default ] - - 2: F func=2 [ exported no_strip binding=global vis=hidden ] + - 2: F func=2 [ exported no_strip binding=global vis=default ] - 3: T <> table=0 [ binding=local vis=hidden ] - 4: G global=0 [ binding=global vis=default ] Custom: diff --git a/test/dump/symbol-tables-all-features.txt b/test/dump/symbol-tables-all-features.txt index 5aaa07bfeb..a87c8cdda5 100644 --- a/test/dump/symbol-tables-all-features.txt +++ b/test/dump/symbol-tables-all-features.txt @@ -39,7 +39,7 @@ Custom: - name: "linking" - symbol table [count=5] - 0: F func=0 [ undefined binding=global vis=default ] - - 1: F func=1 [ exported no_strip binding=global vis=hidden ] + - 1: F func=1 [ exported no_strip binding=global vis=default ] - 2: F <> func=2 [ binding=local vis=hidden ] - 3: F func=3 [ binding=global vis=default ] - 4: T table=0 [ binding=global vis=default ] diff --git a/test/dump/symbol-tables.txt b/test/dump/symbol-tables.txt index 04a2085d97..8f716bafa0 100644 --- a/test/dump/symbol-tables.txt +++ b/test/dump/symbol-tables.txt @@ -41,7 +41,7 @@ Custom: - name: "linking" - symbol table [count=6] - 0: F func=0 [ undefined binding=global vis=default ] - - 1: F func=1 [ exported no_strip binding=global vis=hidden ] + - 1: F func=1 [ exported no_strip binding=global vis=default ] - 2: F <> func=2 [ binding=local vis=hidden ] - 3: F func=3 [ binding=global vis=default ] - 4: T table=0 [ undefined binding=global vis=default ] diff --git a/test/help/wasm2wat.txt b/test/help/wasm2wat.txt index f2b2f5c88f..93fa020485 100644 --- a/test/help/wasm2wat.txt +++ b/test/help/wasm2wat.txt @@ -45,4 +45,5 @@ options: --ignore-custom-section-errors Ignore errors in custom sections --generate-names Give auto-generated names to non-named functions, types, etc. --no-check Don't check for invalid modules + -r, --relocatable Generate relocation annotations ;;; STDOUT ;;)