Skip to content

Commit 2ad513c

Browse files
authored
Merge pull request #1029 from openzim/detection_of_corruption_of_1st_blob_offset_in_cluster
Quick detection of corruption of 1st blob offset in cluster
2 parents d767195 + d2e8f88 commit 2ad513c

File tree

5 files changed

+64
-10
lines changed

5 files changed

+64
-10
lines changed

src/cluster.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,37 +73,49 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression*
7373

7474
} // unnamed namespace
7575

76-
std::shared_ptr<Cluster> Cluster::read(const Reader& zimReader, offset_t clusterOffset)
76+
std::shared_ptr<Cluster> Cluster::read(const Reader& zimReader, offset_t clusterOffset, size_t maxBlobCount)
7777
{
7878
Compression comp;
7979
bool extended;
8080
auto reader = getClusterReader(zimReader, clusterOffset, &comp, &extended);
81-
return std::make_shared<Cluster>(std::move(reader), comp, extended);
81+
return std::make_shared<Cluster>(std::move(reader), comp, extended, maxBlobCount);
8282
}
8383

84-
Cluster::Cluster(std::unique_ptr<IStreamReader> reader_, Compression comp, bool isExtended)
84+
Cluster::Cluster(std::unique_ptr<IStreamReader> reader_, Compression comp, bool isExtended, size_t maxBlobCount)
8585
: compression(comp),
8686
isExtended(isExtended),
8787
m_reader(std::move(reader_))
8888
{
8989
if (isExtended) {
90-
read_header<uint64_t>();
90+
read_header<uint64_t>(maxBlobCount);
9191
} else {
92-
read_header<uint32_t>();
92+
read_header<uint32_t>(maxBlobCount);
9393
}
9494
}
9595

9696
Cluster::~Cluster() = default;
9797

9898
/* This return the number of char read */
9999
template<typename OFFSET_TYPE>
100-
void Cluster::read_header()
100+
void Cluster::read_header(size_t maxBlobCount)
101101
{
102102
// read first offset, which specifies, how many offsets we need to read
103103
OFFSET_TYPE offset = m_reader->read<OFFSET_TYPE>();
104104

105+
if ( offset < 2 * sizeof(OFFSET_TYPE) ) {
106+
throw zim::ZimFileFormatError("Error parsing cluster. Offset of the first blob is too small.");
107+
}
108+
105109
size_t n_offset = offset / sizeof(OFFSET_TYPE);
106110

111+
if ( n_offset * sizeof(OFFSET_TYPE) != offset ) {
112+
throw zim::ZimFileFormatError("Error parsing cluster. Offset of the first blob is not properly aligned.");
113+
}
114+
115+
if ( n_offset > maxBlobCount + 1 ) {
116+
throw zim::ZimFileFormatError("Error parsing cluster. Offset of the first blob is too large.");
117+
}
118+
107119
// read offsets
108120
m_blobOffsets.clear();
109121
m_blobOffsets.reserve(n_offset);

src/cluster.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ namespace zim
7373

7474

7575
template<typename OFFSET_TYPE>
76-
void read_header();
76+
void read_header(size_t maxBlobCount);
7777
const Reader& getReader(blob_index_t n) const;
7878

7979
public:
80-
Cluster(std::unique_ptr<IStreamReader> reader, Compression comp, bool isExtended);
80+
Cluster(std::unique_ptr<IStreamReader> reader, Compression comp, bool isExtended, size_t maxBlobCount);
8181
~Cluster();
8282
Compression getCompression() const { return compression; }
8383
bool isCompressed() const { return compression != Compression::None; }
@@ -92,7 +92,7 @@ namespace zim
9292

9393
size_t getMemorySize() const;
9494

95-
static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset);
95+
static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset, size_t maxBlobCount);
9696
};
9797

9898
struct ClusterMemorySize {

src/fileimpl.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,11 +500,17 @@ class Grouping
500500
return entry_index_t(m_articleListByCluster[idx.v]);
501501
}
502502

503+
size_t FileImpl::getMaxBlobCountInCluster(cluster_index_t idx) const
504+
{
505+
return getCountArticles().v;
506+
}
507+
503508
ClusterHandle FileImpl::readCluster(cluster_index_t idx) const
504509
{
505510
offset_t clusterOffset(getClusterOffset(idx));
506511
log_debug("read cluster " << idx << " from offset " << clusterOffset);
507-
return Cluster::read(*zimReader, clusterOffset);
512+
const auto maxBlobCountInCluster = getMaxBlobCountInCluster(idx);
513+
return Cluster::read(*zimReader, clusterOffset, maxBlobCountInCluster);
508514
}
509515

510516
ClusterHandle FileImpl::getCluster(cluster_index_t idx) const

src/fileimpl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ namespace zim
192192
offset_type getMimeListEndUpperLimit() const;
193193
void readMimeTypes();
194194
void quickCheckForCorruptFile();
195+
size_t getMaxBlobCountInCluster(cluster_index_t idx) const;
195196

196197
bool checkChecksum();
197198
bool checkDirentPtrs();

test/archive.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,41 @@ TEST_F(ZimArchive, validate)
791791
"Invalid cluster pointer\n"
792792
);
793793

794+
TEST_BROKEN_ZIM_NAME(
795+
"invalid.too_small_offset_of_first_blob_in_cluster_0.zim",
796+
"Error parsing cluster. Offset of the first blob is too small.\n"
797+
)
798+
799+
TEST_BROKEN_ZIM_NAME(
800+
"invalid.too_small_offset_of_first_blob_in_cluster_4.zim",
801+
"Error parsing cluster. Offset of the first blob is too small.\n"
802+
)
803+
804+
TEST_BROKEN_ZIM_NAME(
805+
"invalid.too_small_offset_of_first_blob_in_cluster_7.zim",
806+
"Error parsing cluster. Offset of the first blob is too small.\n"
807+
)
808+
809+
TEST_BROKEN_ZIM_NAME(
810+
"invalid.misaligned_offset_of_first_blob_in_cluster_9.zim",
811+
"Error parsing cluster. Offset of the first blob is not properly aligned.\n"
812+
)
813+
814+
TEST_BROKEN_ZIM_NAME(
815+
"invalid.misaligned_offset_of_first_blob_in_cluster_10.zim",
816+
"Error parsing cluster. Offset of the first blob is not properly aligned.\n"
817+
)
818+
819+
TEST_BROKEN_ZIM_NAME(
820+
"invalid.misaligned_offset_of_first_blob_in_cluster_11.zim",
821+
"Error parsing cluster. Offset of the first blob is not properly aligned.\n"
822+
)
823+
824+
TEST_BROKEN_ZIM_NAME(
825+
"invalid.too_large_offset_of_first_blob_in_cluster.zim",
826+
"Error parsing cluster. Offset of the first blob is too large.\n"
827+
)
828+
794829
TEST_BROKEN_ZIM_NAME(
795830
"invalid.offset_in_cluster.zim",
796831
"Error parsing cluster. Offsets are not ordered.\n"

0 commit comments

Comments
 (0)