diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 38b96b72ccac6..e44497a408245 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -285,6 +285,9 @@ enum CGDataVersion {
   // Version 3 adds the total size of the Names in the stable function map so
   // we can skip reading them into the memory for non-assertion builds.
   Version3 = 3,
+  // Version 4 adjusts the structure of stable function merging map for
+  // efficient lazy loading support.
+  Version4 = 4,
   CurrentVersion = CG_DATA_INDEX_VERSION
 };
 const uint64_t Version = CGDataVersion::CurrentVersion;
diff --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc
index 94de4c0b017a2..d5fbe2fb97718 100644
--- a/llvm/include/llvm/CGData/CodeGenData.inc
+++ b/llvm/include/llvm/CGData/CodeGenData.inc
@@ -49,4 +49,4 @@ CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
 #endif
 
 /* Indexed codegen data format version (start from 1). */
-#define CG_DATA_INDEX_VERSION 3
+#define CG_DATA_INDEX_VERSION 4
diff --git a/llvm/include/llvm/CGData/StableFunctionMap.h b/llvm/include/llvm/CGData/StableFunctionMap.h
index bcb72e8216973..b28e71fe8579c 100644
--- a/llvm/include/llvm/CGData/StableFunctionMap.h
+++ b/llvm/include/llvm/CGData/StableFunctionMap.h
@@ -20,6 +20,8 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/StructuralHash.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <mutex>
 
 namespace llvm {
 
@@ -72,11 +74,29 @@ struct StableFunctionMap {
           IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
   };
 
-  using HashFuncsMapType =
-      DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
+  using StableFunctionEntries =
+      SmallVector<std::unique_ptr<StableFunctionEntry>>;
+
+  /// In addition to the deserialized StableFunctionEntry, the struct stores
+  /// the offsets of corresponding serialized stable function entries, and a
+  /// once flag for safe lazy loading in a multithreaded environment.
+  struct EntryStorage {
+    StableFunctionEntries Entries;
+
+  private:
+    SmallVector<uint64_t> Offsets;
+    std::once_flag LazyLoadFlag;
+    friend struct StableFunctionMap;
+    friend struct StableFunctionMapRecord;
+  };
+
+  // Note: DenseMap requires value type to be copyable even if only using
+  // in-place insertion. Use STL instead. This also affects the
+  // deletion-while-iteration in finalize().
+  using HashFuncsMapType = std::unordered_map<stable_hash, EntryStorage>;
 
   /// Get the HashToFuncs map for serialization.
-  const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
+  const HashFuncsMapType &getFunctionMap() const;
 
   /// Get the NameToId vector for serialization.
   ArrayRef<std::string> getNames() const { return IdToName; }
@@ -99,6 +119,13 @@ struct StableFunctionMap {
   /// \returns true if there is no stable function entry.
   bool empty() const { return size() == 0; }
 
+  bool contains(HashFuncsMapType::key_type FunctionHash) const {
+    return HashToFuncs.count(FunctionHash) > 0;
+  }
+
+  const StableFunctionEntries &
+  at(HashFuncsMapType::key_type FunctionHash) const;
+
   enum SizeType {
     UniqueHashCount,        // The number of unique hashes in HashToFuncs.
     TotalFunctionCount,     // The number of total functions in HashToFuncs.
@@ -119,17 +146,31 @@ struct StableFunctionMap {
   /// `StableFunctionEntry` is ready for insertion.
   void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
     assert(!Finalized && "Cannot insert after finalization");
-    HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
+    HashToFuncs[FuncEntry->Hash].Entries.emplace_back(std::move(FuncEntry));
   }
 
+  void deserializeLazyLoadingEntry(HashFuncsMapType::iterator It);
+
+  /// Eagerly deserialize all the unloaded entries in the lazy loading map.
+  void deserializeLazyLoadingEntries();
+
+  bool isLazilyLoaded() const { return (bool)Buffer; }
+
   /// A map from a stable_hash to a vector of functions with that hash.
-  HashFuncsMapType HashToFuncs;
+  mutable HashFuncsMapType HashToFuncs;
   /// A vector of strings to hold names.
   SmallVector<std::string> IdToName;
   /// A map from StringRef (name) to an ID.
   StringMap<unsigned> NameToId;
   /// True if the function map is finalized with minimal content.
   bool Finalized = false;
+  /// The memory buffer that contains the serialized stable function map for
+  /// lazy loading.
+  /// Non-empty only if this StableFunctionMap is created from a MemoryBuffer
+  /// (i.e. by IndexedCodeGenDataReader::read()) and lazily deserialized.
+  std::shared_ptr<MemoryBuffer> Buffer;
+  /// Whether to read stable function names from the buffer.
+  bool ReadStableFunctionMapNames = true;
 
   friend struct StableFunctionMapRecord;
 };
diff --git a/llvm/include/llvm/CGData/StableFunctionMapRecord.h b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
index a75cb12a70ba6..5a2176574c9e6 100644
--- a/llvm/include/llvm/CGData/StableFunctionMapRecord.h
+++ b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
@@ -40,6 +40,14 @@ struct StableFunctionMapRecord {
                                  const StableFunctionMap *FunctionMap,
                                  std::vector<CGDataPatchItem> &PatchItems);
 
+  /// A static helper function to deserialize the stable function map entry.
+  /// Ptr should be pointing to the start of the fixed-sized fields of the
+  /// entry when passed in.
+  LLVM_ABI static void deserializeEntry(const unsigned char *Ptr,
+                                        stable_hash Hash,
+                                        StableFunctionMap *FunctionMap,
+                                        bool ReadStableFunctionMapNames = true);
+
   /// Serialize the stable function map to a raw_ostream.
   LLVM_ABI void serialize(raw_ostream &OS,
                           std::vector<CGDataPatchItem> &PatchItems) const;
@@ -48,6 +56,13 @@ struct StableFunctionMapRecord {
   LLVM_ABI void deserialize(const unsigned char *&Ptr,
                             bool ReadStableFunctionMapNames = true);
 
+  /// Lazily deserialize the stable function map from `Buffer` starting at
+  /// `Offset`. The individial stable function entry would be read lazily from
+  /// `Buffer` when the function map is accessed.
+  LLVM_ABI void lazyDeserialize(std::shared_ptr<MemoryBuffer> Buffer,
+                                uint64_t Offset,
+                                bool ReadStableFunctionMapNames = true);
+
   /// Serialize the stable function map to a YAML stream.
   LLVM_ABI void serializeYAML(yaml::Output &YOS) const;
 
@@ -70,6 +85,10 @@ struct StableFunctionMapRecord {
     yaml::Output YOS(OS);
     serializeYAML(YOS);
   }
+
+private:
+  void deserialize(const unsigned char *&Ptr, bool ReadStableFunctionMapNames,
+                   bool Lazy);
 };
 
 } // namespace llvm
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index cd012342e1958..b4f08c3d13b0d 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -186,7 +186,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
     return make_error<CGDataError>(cgdata_error::unsupported_version);
   H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
 
-  static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version3,
+  static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version4,
                 "Please update the offset computation below if a new field has "
                 "been added to the header.");
   H.OutlinedHashTreeOffset =
diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index 0ab35499c8986..c7c0383930d50 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -26,6 +26,12 @@ static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
              "disabled to save memory and time for final consumption of the "
              "indexed CodeGenData in production."));
 
+cl::opt<bool> IndexedCodeGenDataLazyLoading(
+    "indexed-codegen-data-lazy-loading", cl::init(false), cl::Hidden,
+    cl::desc(
+        "Lazily load indexed CodeGenData. Enable to save memory and time "
+        "for final consumption of the indexed CodeGenData in production."));
+
 namespace llvm {
 
 static Expected<std::unique_ptr<MemoryBuffer>>
@@ -109,11 +115,20 @@ Error IndexedCodeGenDataReader::read() {
       return error(cgdata_error::eof);
     HashTreeRecord.deserialize(Ptr);
   }
+
+  // TODO: lazy loading support for outlined hash tree.
+  std::shared_ptr<MemoryBuffer> SharedDataBuffer = std::move(DataBuffer);
   if (hasStableFunctionMap()) {
     const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
     if (Ptr >= End)
       return error(cgdata_error::eof);
-    FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
+    if (IndexedCodeGenDataLazyLoading)
+      FunctionMapRecord.lazyDeserialize(SharedDataBuffer,
+                                        Header.StableFunctionMapOffset,
+                                        IndexedCodeGenDataReadFunctionMapNames);
+    else
+      FunctionMapRecord.deserialize(Ptr,
+                                    IndexedCodeGenDataReadFunctionMapNames);
   }
 
   return success();
diff --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp
index 87f1e76afb60b..801c2a3bcfb41 100644
--- a/llvm/lib/CGData/StableFunctionMap.cpp
+++ b/llvm/lib/CGData/StableFunctionMap.cpp
@@ -15,8 +15,10 @@
 
 #include "llvm/CGData/StableFunctionMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include <mutex>
 
 #define DEBUG_TYPE "stable-function-map"
 
@@ -93,9 +95,10 @@ void StableFunctionMap::insert(const StableFunction &Func) {
 
 void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
   assert(!Finalized && "Cannot merge after finalization");
+  deserializeLazyLoadingEntries();
   for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
-    auto &ThisFuncs = HashToFuncs[Hash];
-    for (auto &Func : Funcs) {
+    auto &ThisFuncs = HashToFuncs[Hash].Entries;
+    for (auto &Func : Funcs.Entries) {
       auto FuncNameId =
           getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
       auto ModuleNameId =
@@ -114,25 +117,61 @@ size_t StableFunctionMap::size(SizeType Type) const {
   case UniqueHashCount:
     return HashToFuncs.size();
   case TotalFunctionCount: {
+    const_cast<StableFunctionMap *>(this)->deserializeLazyLoadingEntries();
     size_t Count = 0;
     for (auto &Funcs : HashToFuncs)
-      Count += Funcs.second.size();
+      Count += Funcs.second.Entries.size();
     return Count;
   }
   case MergeableFunctionCount: {
+    const_cast<StableFunctionMap *>(this)->deserializeLazyLoadingEntries();
     size_t Count = 0;
     for (auto &[Hash, Funcs] : HashToFuncs)
-      if (Funcs.size() >= 2)
-        Count += Funcs.size();
+      if (Funcs.Entries.size() >= 2)
+        Count += Funcs.Entries.size();
     return Count;
   }
   }
   llvm_unreachable("Unhandled size type");
 }
 
+const StableFunctionMap::StableFunctionEntries &
+StableFunctionMap::at(HashFuncsMapType::key_type FunctionHash) const {
+  auto It = HashToFuncs.find(FunctionHash);
+  if (isLazilyLoaded())
+    const_cast<StableFunctionMap *>(this)->deserializeLazyLoadingEntry(It);
+  return It->second.Entries;
+}
+
+void StableFunctionMap::deserializeLazyLoadingEntry(
+    HashFuncsMapType::iterator It) {
+  assert(isLazilyLoaded() && "Cannot deserialize non-lazily-loaded map");
+  std::call_once(It->second.LazyLoadFlag, [this, It]() {
+    for (auto Offset : It->second.Offsets)
+      StableFunctionMapRecord::deserializeEntry(
+          reinterpret_cast<const unsigned char *>(Offset), It->first, this,
+          ReadStableFunctionMapNames);
+  });
+}
+
+void ::StableFunctionMap::deserializeLazyLoadingEntries() {
+  if (!isLazilyLoaded())
+    return;
+  for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It)
+    deserializeLazyLoadingEntry(It);
+}
+
+const StableFunctionMap::HashFuncsMapType &
+StableFunctionMap::getFunctionMap() const {
+  // Ensure all entries are deserialized before returning the raw map.
+  if (isLazilyLoaded())
+    const_cast<StableFunctionMap *>(this)->deserializeLazyLoadingEntries();
+  return HashToFuncs;
+}
+
 using ParamLocs = SmallVector<IndexPair>;
-static void removeIdenticalIndexPair(
-    SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> &SFS) {
+static void
+removeIdenticalIndexPair(StableFunctionMap::StableFunctionEntries &SFS) {
   auto &RSF = SFS[0];
   unsigned StableFunctionCount = SFS.size();
 
@@ -159,9 +198,7 @@ static void removeIdenticalIndexPair(
       SF->IndexOperandHashMap->erase(Pair);
 }
 
-static bool isProfitable(
-    const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
-        &SFS) {
+static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS) {
   unsigned StableFunctionCount = SFS.size();
   if (StableFunctionCount < GlobalMergingMinMerges)
     return false;
@@ -202,8 +239,11 @@ static bool isProfitable(
 }
 
 void StableFunctionMap::finalize(bool SkipTrim) {
+  deserializeLazyLoadingEntries();
+  SmallVector<HashFuncsMapType::iterator> ToDelete;
   for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
-    auto &[StableHash, SFS] = *It;
+    auto &[StableHash, Storage] = *It;
+    auto &SFS = Storage.Entries;
 
     // Group stable functions by ModuleIdentifier.
     llvm::stable_sort(SFS, [&](const std::unique_ptr<StableFunctionEntry> &L,
@@ -236,7 +276,7 @@ void StableFunctionMap::finalize(bool SkipTrim) {
       }
     }
     if (Invalid) {
-      HashToFuncs.erase(It);
+      ToDelete.push_back(It);
       continue;
     }
 
@@ -248,8 +288,10 @@ void StableFunctionMap::finalize(bool SkipTrim) {
     removeIdenticalIndexPair(SFS);
 
     if (!isProfitable(SFS))
-      HashToFuncs.erase(It);
+      ToDelete.push_back(It);
   }
+  for (auto It : ToDelete)
+    HashToFuncs.erase(It);
 
   Finalized = true;
 }
diff --git a/llvm/lib/CGData/StableFunctionMapRecord.cpp b/llvm/lib/CGData/StableFunctionMapRecord.cpp
index 423e068023088..d60e4a30453d8 100644
--- a/llvm/lib/CGData/StableFunctionMapRecord.cpp
+++ b/llvm/lib/CGData/StableFunctionMapRecord.cpp
@@ -53,7 +53,7 @@ static SmallVector<const StableFunctionMap::StableFunctionEntry *>
 getStableFunctionEntries(const StableFunctionMap &SFM) {
   SmallVector<const StableFunctionMap::StableFunctionEntry *> FuncEntries;
   for (const auto &P : SFM.getFunctionMap())
-    for (auto &Func : P.second)
+    for (auto &Func : P.second.Entries)
       FuncEntries.emplace_back(Func.get());
 
   llvm::stable_sort(
@@ -104,17 +104,39 @@ void StableFunctionMapRecord::serialize(
       Writer.OS.tell() - NamesByteSizeOffset - sizeof(NamesByteSizeOffset);
   PatchItems.emplace_back(NamesByteSizeOffset, &NamesByteSize, 1);
 
-  // Write StableFunctionEntries whose pointers are sorted.
+  // Write StableFunctionEntries. The structure is:
+  // - Number of StableFunctionEntries
+  // - Hashes of StableFunctionEntries
+  // - Fixed-size fields for each StableFunctionEntry
+  //   - FunctionNameId
+  //   - ModuleNameId
+  //   - InstCount
+  //   - Relative offset to IndexOperandHashes
+  // - Total size of variable-sized IndexOperandHashes for lazy-loading support
+  // - Variable-sized IndexOperandHashes for each StableFunctionEntry
+  //   - Number of IndexOperandHashes
+  //   - Contents of each IndexOperandHashes
   auto FuncEntries = getStableFunctionEntries(*FunctionMap);
   Writer.write<uint32_t>(FuncEntries.size());
-
-  for (const auto *FuncRef : FuncEntries) {
+  for (const auto *FuncRef : FuncEntries)
     Writer.write<stable_hash>(FuncRef->Hash);
+  std::vector<uint64_t> IndexOperandHashesOffsets;
+  IndexOperandHashesOffsets.reserve(FuncEntries.size());
+  for (const auto *FuncRef : FuncEntries) {
     Writer.write<uint32_t>(FuncRef->FunctionNameId);
     Writer.write<uint32_t>(FuncRef->ModuleNameId);
     Writer.write<uint32_t>(FuncRef->InstCount);
-
+    const uint64_t Offset = Writer.OS.tell();
+    IndexOperandHashesOffsets.push_back(Offset);
+    Writer.write<uint64_t>(0);
+  }
+  const uint64_t IndexOperandHashesByteSizeOffset = Writer.OS.tell();
+  Writer.write<uint64_t>(0);
+  for (size_t I = 0; I < FuncEntries.size(); ++I) {
+    const uint64_t Offset = Writer.OS.tell() - IndexOperandHashesOffsets[I];
+    PatchItems.emplace_back(IndexOperandHashesOffsets[I], &Offset, 1);
     // Emit IndexOperandHashes sorted from IndexOperandHashMap.
+    const auto *FuncRef = FuncEntries[I];
     IndexOperandHashVecType IndexOperandHashes =
         getStableIndexOperandHashes(FuncRef);
     Writer.write<uint32_t>(IndexOperandHashes.size());
@@ -124,10 +146,64 @@ void StableFunctionMapRecord::serialize(
       Writer.write<stable_hash>(IndexOperandHash.second);
     }
   }
+  // Write the total size of IndexOperandHashes.
+  const uint64_t IndexOperandHashesByteSize =
+      Writer.OS.tell() - IndexOperandHashesByteSizeOffset - sizeof(uint64_t);
+  PatchItems.emplace_back(IndexOperandHashesByteSizeOffset,
+                          &IndexOperandHashesByteSize, 1);
+}
+
+void StableFunctionMapRecord::deserializeEntry(
+    const unsigned char *Ptr, stable_hash Hash, StableFunctionMap *FunctionMap,
+    bool ReadStableFunctionMapNames) {
+  assert(FunctionMap->ReadStableFunctionMapNames == ReadStableFunctionMapNames);
+  auto FunctionNameId =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  if (ReadStableFunctionMapNames)
+    assert(FunctionMap->getNameForId(FunctionNameId) &&
+           "FunctionNameId out of range");
+  auto ModuleNameId =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  if (ReadStableFunctionMapNames)
+    assert(FunctionMap->getNameForId(ModuleNameId) &&
+           "ModuleNameId out of range");
+  auto InstCount =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+
+  // Read IndexOperandHashes to build IndexOperandHashMap
+  auto CurrentPosition = reinterpret_cast<uintptr_t>(Ptr);
+  auto IndexOperandHashesOffset =
+      endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
+  auto *IndexOperandHashesPtr = reinterpret_cast<const unsigned char *>(
+      CurrentPosition + IndexOperandHashesOffset);
+  auto NumIndexOperandHashes =
+      endian::readNext<uint32_t, endianness::little, unaligned>(
+          IndexOperandHashesPtr);
+  auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+  for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
+    auto InstIndex = endian::readNext<uint32_t, endianness::little, unaligned>(
+        IndexOperandHashesPtr);
+    auto OpndIndex = endian::readNext<uint32_t, endianness::little, unaligned>(
+        IndexOperandHashesPtr);
+    auto OpndHash =
+        endian::readNext<stable_hash, endianness::little, unaligned>(
+            IndexOperandHashesPtr);
+    assert(InstIndex < InstCount && "InstIndex out of range");
+
+    IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
+  }
+
+  // Insert a new StableFunctionEntry into the map.
+  auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
+      Hash, FunctionNameId, ModuleNameId, InstCount,
+      std::move(IndexOperandHashMap));
+
+  FunctionMap->insert(std::move(FuncEntry));
 }
 
 void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
-                                          bool ReadStableFunctionMapNames) {
+                                          bool ReadStableFunctionMapNames,
+                                          bool Lazy) {
   // Assert that Ptr is 4-byte aligned
   assert(((uintptr_t)Ptr % 4) == 0);
   // Read Names.
@@ -139,6 +215,7 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
   const auto NamesByteSize =
       endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
   const auto NamesOffset = reinterpret_cast<uintptr_t>(Ptr);
+  FunctionMap->ReadStableFunctionMapNames = ReadStableFunctionMapNames;
   if (ReadStableFunctionMapNames) {
     for (unsigned I = 0; I < NumNames; ++I) {
       StringRef Name(reinterpret_cast<const char *>(Ptr));
@@ -157,47 +234,51 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
   // Read StableFunctionEntries.
   auto NumFuncs =
       endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  auto FixedSizeFieldsOffset =
+      reinterpret_cast<uintptr_t>(Ptr) + NumFuncs * sizeof(stable_hash);
+  constexpr uint32_t FixedSizeFieldsSizePerEntry =
+      // FunctionNameId
+      sizeof(uint32_t) +
+      // ModuleNameId
+      sizeof(uint32_t) +
+      // InstCount
+      sizeof(uint32_t) +
+      // Relative offset to IndexOperandHashes
+      sizeof(uint64_t);
   for (unsigned I = 0; I < NumFuncs; ++I) {
     auto Hash =
         endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
-    [[maybe_unused]] auto FunctionNameId =
-        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-    [[maybe_unused]] auto ModuleNameId =
-        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-    // Only validate IDs if we've read the names
-    if (ReadStableFunctionMapNames) {
-      assert(FunctionMap->getNameForId(FunctionNameId) &&
-             "FunctionNameId out of range");
-      assert(FunctionMap->getNameForId(ModuleNameId) &&
-             "ModuleNameId out of range");
-    }
+    if (Lazy)
+      FunctionMap->HashToFuncs.try_emplace(Hash)
+          .first->second.Offsets.push_back(FixedSizeFieldsOffset);
+    else
+      deserializeEntry(
+          reinterpret_cast<const unsigned char *>(FixedSizeFieldsOffset), Hash,
+          FunctionMap.get(), ReadStableFunctionMapNames);
+    FixedSizeFieldsOffset += FixedSizeFieldsSizePerEntry;
+  }
 
-    auto InstCount =
-        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-
-    // Read IndexOperandHashes to build IndexOperandHashMap
-    auto NumIndexOperandHashes =
-        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-    auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
-    for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
-      auto InstIndex =
-          endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-      auto OpndIndex =
-          endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-      auto OpndHash =
-          endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
-      assert(InstIndex < InstCount && "InstIndex out of range");
-
-      IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
-    }
+  // Update Ptr to the end of the serialized map to meet the expectation of
+  // CodeGenDataReader.
+  Ptr = reinterpret_cast<const unsigned char *>(FixedSizeFieldsOffset);
+  auto IndexOperandHashesByteSize =
+      endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
+  Ptr = reinterpret_cast<const unsigned char *>(
+      reinterpret_cast<uintptr_t>(Ptr) + IndexOperandHashesByteSize);
+}
 
-    // Insert a new StableFunctionEntry into the map.
-    auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
-        Hash, FunctionNameId, ModuleNameId, InstCount,
-        std::move(IndexOperandHashMap));
+void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
+                                          bool ReadStableFunctionMapNames) {
+  deserialize(Ptr, ReadStableFunctionMapNames, /*Lazy=*/false);
+}
 
-    FunctionMap->insert(std::move(FuncEntry));
-  }
+void StableFunctionMapRecord::lazyDeserialize(
+    std::shared_ptr<MemoryBuffer> Buffer, uint64_t Offset,
+    bool ReadStableFunctionMapNames) {
+  const auto *Ptr = reinterpret_cast<const unsigned char *>(
+      reinterpret_cast<uintptr_t>(Buffer->getBufferStart()) + Offset);
+  deserialize(Ptr, ReadStableFunctionMapNames, /*Lazy=*/true);
+  FunctionMap->Buffer = std::move(Buffer);
 }
 
 void StableFunctionMapRecord::serializeYAML(yaml::Output &YOS) const {
diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index 73f11c1345daf..47640c4aac6df 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -350,9 +350,8 @@ checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF,
   return true;
 }
 
-static ParamLocsVecTy computeParamInfo(
-    const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
-        &SFS) {
+static ParamLocsVecTy
+computeParamInfo(const StableFunctionMap::StableFunctionEntries &SFS) {
   std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
   auto &RSF = *SFS[0];
   unsigned StableFunctionCount = SFS.size();
@@ -396,19 +395,18 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
   // Collect stable functions related to the current module.
   DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
       HashToFuncs;
-  auto &Maps = FunctionMap->getFunctionMap();
   for (auto &F : M) {
     if (!isEligibleFunction(&F))
       continue;
     auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
-    if (Maps.contains(FI.FunctionHash))
+    if (FunctionMap->contains(FI.FunctionHash))
       HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
   }
 
   for (auto &[Hash, Funcs] : HashToFuncs) {
     std::optional<ParamLocsVecTy> ParamLocsVec;
     SmallVector<FuncMergeInfo> FuncMergeInfos;
-    auto &SFS = Maps.at(Hash);
+    auto &SFS = FunctionMap->at(Hash);
     assert(!SFS.empty());
     auto &RFS = SFS[0];
 
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
index a4022eb885b43..cae74f16bcf71 100644
--- a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
@@ -39,6 +39,8 @@
 ; Two functions have the same hash with only one different constnat at a same location.
 ; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2
 ; RUN: llvm-cgdata --convert %tout.cgdata   -o - | FileCheck %s
+; RUN: llvm-cgdata --merge -o %tout-lazy.cgdata %tout-nowrite.1 %tout-nowrite.2 -indexed-codegen-data-lazy-loading
+; RUN: llvm-cgdata --convert %tout-lazy.cgdata -indexed-codegen-data-lazy-loading -o - | FileCheck %s
 
 ; CHECK:      - Hash: [[#%d,HASH:]]
 ; CHECK-NEXT:   FunctionName: f1
diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test
index 0d2b0e848a2c9..2082eca58f073 100644
--- a/llvm/test/tools/llvm-cgdata/empty.test
+++ b/llvm/test/tools/llvm-cgdata/empty.test
@@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0
 
 # The version number appears when asked, as it's in the header
 RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
-VERSION: Version: 3
+VERSION: Version: 4
 
 # When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
 RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
@@ -30,7 +30,7 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
 #   uint64_t StableFunctionMapOffset;
 # }
 RUN: printf '\xffcgdata\x81' > %t_header.cgdata
-RUN: printf '\x03\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x04\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test
index 92ff484e31caf..9484371848a72 100644
--- a/llvm/test/tools/llvm-cgdata/error.test
+++ b/llvm/test/tools/llvm-cgdata/error.test
@@ -22,9 +22,9 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
 RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s  --check-prefix=CORRUPT
 CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
 
-# The current version 3 while the header says 4.
+# The current version 4 while the header says 5.
 RUN: printf '\xffcgdata\x81' > %t_version.cgdata
-RUN: printf '\x04\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x05\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
index b060872113b1b..70b83af407e5a 100644
--- a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
+++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
@@ -23,6 +23,8 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcma
 # Merge an object file having cgdata (__llvm_outline and __llvm_merge)
 RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata
 RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s
+RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap-lazy.cgdata -indexed-codegen-data-lazy-loading
+RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
 
 CHECK: Outlined hash tree:
 CHECK-NEXT:  Total Node Count: 3
@@ -63,4 +65,4 @@ CHECK-NEXT:  Mergeable function Count: 0
 
 ;--- merge-both-template.ll
 @.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
-@.data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+@.data2 = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
index 2936086321028..c088ffbb4e83f 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
@@ -23,8 +23,8 @@ RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
 # Merge the archive into the codegen data file.
 RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive.cgdata
 RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s
-
-RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s
+RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive-lazy.cgdata -indexed-codegen-data-lazy-loading
+RUN: llvm-cgdata --show %t/merge-archive-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
 CHECK: Stable function map:
 CHECK-NEXT:  Unique hash Count: 1
 CHECK-NEXT:  Total function Count: 2
@@ -65,7 +65,7 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-1-template.ll
-@.data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+@.data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
 
 ;--- raw-2.cgtext
 :stable_function_map
@@ -80,4 +80,4 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-2-template.ll
-@.data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+@.data = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
index d2965456a1999..90b5992973b49 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
@@ -17,6 +17,8 @@ RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-
 RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
 RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat.cgdata
 RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s
+RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat-lazy.cgdata -indexed-codegen-data-lazy-loading
+RUN: llvm-cgdata --show %t/merge-concat-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
 
 CHECK: Stable function map:
 CHECK-NEXT:  Unique hash Count: 1
@@ -74,5 +76,5 @@ MAP-NEXT: ...
 ; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated.
 ; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated.
 ; In other words, the following two trees are encoded back-to-back in a binary format.
-@.data1 = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
-@.data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+@.data1 = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+@.data2 = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
index 8277e3272d77e..b986aef26f1d7 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
@@ -19,8 +19,9 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
 
 # Merge two object files into the codegen data file.
 RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
-
 RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s
+RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge-lazy.cgdata -indexed-codegen-data-lazy-loading
+RUN: llvm-cgdata --show %t/merge-lazy.cgdata -indexed-codegen-data-lazy-loading  | FileCheck %s
 CHECK: Stable function map:
 CHECK-NEXT:  Unique hash Count: 1
 CHECK-NEXT:  Total function Count: 2
@@ -61,7 +62,7 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-1-template.ll
-@.data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+@.data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
 
 ;--- raw-2.cgtext
 :stable_function_map
@@ -76,4 +77,4 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-2-template.ll
-@.data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+@.data = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
index 9469f1cbda331..eac852ff7e710 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
@@ -15,6 +15,8 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merg
 # Merge an object file having cgdata (__llvm_merge)
 RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single.cgdata
 RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s
+RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single-lazy.cgdata -indexed-codegen-data-lazy-loading 
+RUN: llvm-cgdata -s %t/merge-single-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
 CHECK: Stable function map:
 CHECK-NEXT:  Unique hash Count: 1
 CHECK-NEXT:  Total function Count: 1
@@ -33,4 +35,4 @@ CHECK-NEXT:  Mergeable function Count: 0
 ...
 
 ;--- merge-single-template.ll
-@.data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+@.data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/tools/llvm-cgdata/Opts.td b/llvm/tools/llvm-cgdata/Opts.td
index 8da933f744e87..2b515a0140e67 100644
--- a/llvm/tools/llvm-cgdata/Opts.td
+++ b/llvm/tools/llvm-cgdata/Opts.td
@@ -31,3 +31,4 @@ def : JoinedOrSeparate<["-"], "o">, Alias<output>, MetaVarName<"<file>">, HelpTe
 def format : Option<["--"], "format", KIND_SEPARATE>,
              HelpText<"Specify the output format (text or binary)">, MetaVarName<"<value>">;
 def : JoinedOrSeparate<["-"], "f">, Alias<format>, HelpText<"Alias for --format">;
+def indexed_codegen_data_lazy_loading : F<"indexed-codegen-data-lazy-loading", "Lazily load indexed CodeGenData for testing purpose.">, Flags<[HelpHidden]>;
diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
index 98fa5c5657353..047557e5a7fae 100644
--- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
+++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
@@ -83,6 +83,8 @@ static CGDataAction Action;
 static std::optional<CGDataFormat> OutputFormat;
 static std::vector<std::string> InputFilenames;
 
+extern cl::opt<bool> IndexedCodeGenDataLazyLoading;
+
 static void exitWithError(Twine Message, StringRef Whence = "",
                           StringRef Hint = "") {
   WithColor::error();
@@ -361,6 +363,9 @@ static void parseArgs(int argc, char **argv) {
   default:
     llvm_unreachable("unrecognized action");
   }
+
+  IndexedCodeGenDataLazyLoading =
+      Args.hasArg(OPT_indexed_codegen_data_lazy_loading);
 }
 
 int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) {
diff --git a/llvm/unittests/CGData/StableFunctionMapTest.cpp b/llvm/unittests/CGData/StableFunctionMapTest.cpp
index d551ac8a814f4..5cf62ae0b3943 100644
--- a/llvm/unittests/CGData/StableFunctionMapTest.cpp
+++ b/llvm/unittests/CGData/StableFunctionMapTest.cpp
@@ -117,7 +117,7 @@ TEST(StableFunctionMap, Finalize3) {
   Map.finalize();
   auto &M = Map.getFunctionMap();
   EXPECT_THAT(M, SizeIs(1));
-  auto &FuncEntries = M.begin()->second;
+  auto &FuncEntries = M.begin()->second.Entries;
   for (auto &FuncEntry : FuncEntries) {
     EXPECT_THAT(*FuncEntry->IndexOperandHashMap, SizeIs(1));
     ASSERT_THAT(*FuncEntry->IndexOperandHashMap,