Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,18 @@ ifeq ($(HAS_PARQUET),yes)
PGSTROM_FLAGS += -DHAS_PARQUET=1
PG_CXXFLAGS += $(shell pkgconf --cflags parquet)
SHLIB_LINK += $(shell pkgconf --libs parquet)
# Add runtime library path
PARQUET_LIBDIR = $(shell pkgconf --variable=libdir parquet)
ifneq ($(PARQUET_LIBDIR),)
SHLIB_LINK += -Wl,-rpath,$(PARQUET_LIBDIR)
endif
else
SHLIB_LINK += $(shell pkgconf --libs arrow)
# Add runtime library path for Arrow
ARROW_LIBDIR = $(shell pkgconf --variable=libdir arrow)
ifneq ($(ARROW_LIBDIR),)
SHLIB_LINK += -Wl,-rpath,$(ARROW_LIBDIR)
endif
endif
endif

Expand Down
52 changes: 47 additions & 5 deletions src/arrow_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3223,7 +3223,26 @@ __readParquetMinMaxStats(ArrowFieldNode *field,
std::string min_datum;
std::string max_datum;

switch (stats->physical_type())
if (!stats)
return;

// Safely attempt to get physical_type
parquet::Type::type phys_type;
try {
phys_type = stats->physical_type();
} catch (const std::exception& e) {
#ifdef PGSTROM_DEBUG
elog(DEBUG1, "Failed to get physical_type from Parquet statistics: %s", e.what());
#endif
return;
} catch (...) {
#ifdef PGSTROM_DEBUG
elog(DEBUG1, "Unknown error getting physical_type from Parquet statistics");
#endif
return;
}
Comment on lines +3232 to +3243
Copy link

Copilot AI Sep 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code still calls stats->physical_type() without additional protection after the null check. If the object is corrupted due to ABI mismatch, this call itself could segfault before the exception handlers can catch it. Consider adding a more defensive approach or additional validation before making any method calls on the stats object.

Copilot uses AI. Check for mistakes.

switch (phys_type)
{
case parquet::Type::BOOLEAN: {
auto __stat = std::dynamic_pointer_cast<const parquet::BoolStatistics>(stats);
Expand Down Expand Up @@ -3359,10 +3378,33 @@ __readParquetRowGroupMetadata(ArrowMessage *rbatch_message,
field->length = col_meta->num_values();
if (stats)
{
if (stats->HasNullCount())
field->null_count = stats->null_count();
if (stats->HasMinMax())
__readParquetMinMaxStats(field, stats);
// Handle null count statistics
try {
if (stats->HasNullCount())
field->null_count = stats->null_count();
} catch (const std::exception& e) {
#ifdef PGSTROM_DEBUG
elog(DEBUG1, "Failed to access null count statistics: %s", e.what());
#endif
} catch (...) {
#ifdef PGSTROM_DEBUG
elog(DEBUG1, "Unknown error accessing null count statistics");
#endif
}

// Handle min/max statistics separately
try {
if (stats->HasMinMax())
__readParquetMinMaxStats(field, stats);
} catch (const std::exception& e) {
#ifdef PGSTROM_DEBUG
elog(DEBUG1, "Failed to access min/max statistics: %s", e.what());
#endif
} catch (...) {
#ifdef PGSTROM_DEBUG
elog(DEBUG1, "Unknown error accessing min/max statistics");
#endif
}
}
/*
* Some additional Parquet specific attrobutes for dump only
Expand Down