Skip to content

Commit 5ef9932

Browse files
authored
Merge pull request #155 from duckdb/bump121-2
update bundled duckdb to 1.2.1
2 parents 2dcfbd2 + 1a852c2 commit 5ef9932

File tree

116 files changed

+1371
-893
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+1371
-893
lines changed

src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -44,41 +44,33 @@ struct StringAggFunction {
4444
if (!state.dataptr) {
4545
finalize_data.ReturnNull();
4646
} else {
47-
target = StringVector::AddString(finalize_data.result, state.dataptr, state.size);
48-
}
49-
}
50-
51-
template <class STATE>
52-
static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
53-
if (state.dataptr) {
54-
delete[] state.dataptr;
47+
target = string_t(state.dataptr, state.size);
5548
}
5649
}
5750

5851
static bool IgnoreNull() {
5952
return true;
6053
}
6154

62-
static inline void PerformOperation(StringAggState &state, const char *str, const char *sep, idx_t str_size,
63-
idx_t sep_size) {
55+
static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, const char *str,
56+
const char *sep, idx_t str_size, idx_t sep_size) {
6457
if (!state.dataptr) {
6558
// first iteration: allocate space for the string and copy it into the state
6659
state.alloc_size = MaxValue<idx_t>(8, NextPowerOfTwo(str_size));
67-
state.dataptr = new char[state.alloc_size];
60+
state.dataptr = char_ptr_cast(allocator.Allocate(state.alloc_size));
6861
state.size = str_size;
6962
memcpy(state.dataptr, str, str_size);
7063
} else {
7164
// subsequent iteration: first check if we have space to place the string and separator
7265
idx_t required_size = state.size + str_size + sep_size;
7366
if (required_size > state.alloc_size) {
7467
// no space! allocate extra space
68+
const auto old_size = state.alloc_size;
7569
while (state.alloc_size < required_size) {
7670
state.alloc_size *= 2;
7771
}
78-
auto new_data = new char[state.alloc_size];
79-
memcpy(new_data, state.dataptr, state.size);
80-
delete[] state.dataptr;
81-
state.dataptr = new_data;
72+
state.dataptr =
73+
char_ptr_cast(allocator.Reallocate(data_ptr_cast(state.dataptr), old_size, state.alloc_size));
8274
}
8375
// copy the separator
8476
memcpy(state.dataptr + state.size, sep, sep_size);
@@ -89,14 +81,15 @@ struct StringAggFunction {
8981
}
9082
}
9183

92-
static inline void PerformOperation(StringAggState &state, string_t str, optional_ptr<FunctionData> data_p) {
84+
static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, string_t str,
85+
optional_ptr<FunctionData> data_p) {
9386
auto &data = data_p->Cast<StringAggBindData>();
94-
PerformOperation(state, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
87+
PerformOperation(state, allocator, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
9588
}
9689

9790
template <class INPUT_TYPE, class STATE, class OP>
9891
static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &unary_input) {
99-
PerformOperation(state, input, unary_input.input.bind_data);
92+
PerformOperation(state, unary_input.input.allocator, input, unary_input.input.bind_data);
10093
}
10194

10295
template <class INPUT_TYPE, class STATE, class OP>
@@ -113,8 +106,8 @@ struct StringAggFunction {
113106
// source is not set: skip combining
114107
return;
115108
}
116-
PerformOperation(target, string_t(source.dataptr, UnsafeNumericCast<uint32_t>(source.size)),
117-
aggr_input_data.bind_data);
109+
PerformOperation(target, aggr_input_data.allocator,
110+
string_t(source.dataptr, UnsafeNumericCast<uint32_t>(source.size)), aggr_input_data.bind_data);
118111
}
119112
};
120113

@@ -162,8 +155,7 @@ AggregateFunctionSet StringAggFun::GetFunctions() {
162155
AggregateFunction::UnaryScatterUpdate<StringAggState, string_t, StringAggFunction>,
163156
AggregateFunction::StateCombine<StringAggState, StringAggFunction>,
164157
AggregateFunction::StateFinalize<StringAggState, string_t, StringAggFunction>,
165-
AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind,
166-
AggregateFunction::StateDestroy<StringAggState, StringAggFunction>);
158+
AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind);
167159
string_agg_param.serialize = StringAggSerialize;
168160
string_agg_param.deserialize = StringAggDeserialize;
169161
string_agg.AddFunction(string_agg_param);

src/duckdb/extension/core_functions/aggregate/nested/list.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ static void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_d
116116

117117
// first iterate over all entries and set up the list entries, and get the newly required total length
118118
for (idx_t i = 0; i < count; i++) {
119-
120119
auto &state = *states[states_data.sel->get_index(i)];
121120
const auto rid = i + offset;
122121
result_data[rid].offset = total_len;

src/duckdb/extension/core_functions/lambda_functions.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -223,17 +223,6 @@ void ExecuteExpression(const idx_t elem_cnt, const LambdaFunctions::ColumnInfo &
223223
// ListLambdaBindData
224224
//===--------------------------------------------------------------------===//
225225

226-
unique_ptr<FunctionData> ListLambdaBindData::Copy() const {
227-
auto lambda_expr_copy = lambda_expr ? lambda_expr->Copy() : nullptr;
228-
return make_uniq<ListLambdaBindData>(return_type, std::move(lambda_expr_copy), has_index);
229-
}
230-
231-
bool ListLambdaBindData::Equals(const FunctionData &other_p) const {
232-
auto &other = other_p.Cast<ListLambdaBindData>();
233-
return Expression::Equals(lambda_expr, other.lambda_expr) && return_type == other.return_type &&
234-
has_index == other.has_index;
235-
}
236-
237226
void ListLambdaBindData::Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
238227
const ScalarFunction &) {
239228
auto &bind_data = bind_data_p->Cast<ListLambdaBindData>();

src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,17 @@
1515

1616
namespace duckdb {
1717

18-
// FIXME: use a local state for each thread to increase performance?
18+
struct ListAggregatesLocalState : public FunctionLocalState {
19+
explicit ListAggregatesLocalState(Allocator &allocator) : arena_allocator(allocator) {
20+
}
21+
22+
ArenaAllocator arena_allocator;
23+
};
24+
25+
unique_ptr<FunctionLocalState> ListAggregatesInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
26+
FunctionData *bind_data) {
27+
return make_uniq<ListAggregatesLocalState>(BufferAllocator::Get(state.GetContext()));
28+
}
1929
// FIXME: benchmark the use of simple_update against using update (if applicable)
2030

2131
static unique_ptr<FunctionData> ListAggregatesBindFailure(ScalarFunction &bound_function) {
@@ -207,7 +217,8 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
207217
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
208218
auto &info = func_expr.bind_info->Cast<ListAggregatesBindData>();
209219
auto &aggr = info.aggr_expr->Cast<BoundAggregateExpression>();
210-
ArenaAllocator allocator(Allocator::DefaultAllocator());
220+
auto &allocator = ExecuteFunctionState::GetFunctionState(state)->Cast<ListAggregatesLocalState>().arena_allocator;
221+
allocator.Reset();
211222
AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
212223

213224
D_ASSERT(aggr.function.update);
@@ -511,8 +522,9 @@ static unique_ptr<FunctionData> ListUniqueBind(ClientContext &context, ScalarFun
511522
}
512523

513524
ScalarFunction ListAggregateFun::GetFunction() {
514-
auto result = ScalarFunction({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR}, LogicalType::ANY,
515-
ListAggregateFunction, ListAggregateBind);
525+
auto result =
526+
ScalarFunction({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR}, LogicalType::ANY,
527+
ListAggregateFunction, ListAggregateBind, nullptr, nullptr, ListAggregatesInitLocalState);
516528
BaseScalarFunction::SetReturnsError(result);
517529
result.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
518530
result.varargs = LogicalType::ANY;
@@ -523,12 +535,12 @@ ScalarFunction ListAggregateFun::GetFunction() {
523535

524536
ScalarFunction ListDistinctFun::GetFunction() {
525537
return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY),
526-
ListDistinctFunction, ListDistinctBind);
538+
ListDistinctFunction, ListDistinctBind, nullptr, nullptr, ListAggregatesInitLocalState);
527539
}
528540

529541
ScalarFunction ListUniqueFun::GetFunction() {
530542
return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::UBIGINT, ListUniqueFunction,
531-
ListUniqueBind);
543+
ListUniqueBind, nullptr, nullptr, ListAggregatesInitLocalState);
532544
}
533545

534546
} // namespace duckdb

src/duckdb/extension/icu/icu-datefunc.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,20 @@ unique_ptr<FunctionData> ICUDateFunc::Bind(ClientContext &context, ScalarFunctio
7171
return make_uniq<BindData>(context);
7272
}
7373

74-
void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
74+
bool ICUDateFunc::TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
7575
auto tz = icu_66::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_id.GetString())));
7676
if (*tz == icu::TimeZone::getUnknown()) {
7777
delete tz;
78-
throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
78+
return false;
7979
}
8080
calendar->adoptTimeZone(tz);
81+
return true;
82+
}
83+
84+
void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
85+
if (!TrySetTimeZone(calendar, tz_id)) {
86+
throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
87+
}
8188
}
8289

8390
timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros) {

src/duckdb/extension/icu/icu-strptime.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@
1111
#include "duckdb/execution/expression_executor.hpp"
1212
#include "duckdb/function/scalar/strftime_format.hpp"
1313
#include "duckdb/main/client_context.hpp"
14-
#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
1514
#include "duckdb/planner/expression/bound_function_expression.hpp"
16-
#include "duckdb/function/function_binder.hpp"
1715
#include "duckdb/function/cast/default_casts.hpp"
1816
#include "duckdb/main/extension_util.hpp"
1917

@@ -60,14 +58,7 @@ struct ICUStrptime : public ICUDateFunc {
6058
}
6159

6260
static uint64_t ToMicros(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) {
63-
// Set TZ first, if any.
64-
// Note that empty TZ names are not allowed,
65-
// but unknown names will map to GMT.
66-
if (!parsed.tz.empty()) {
67-
SetTimeZone(calendar, parsed.tz);
68-
}
69-
70-
// Now get the parts in the given time zone
61+
// Get the parts in the current time zone
7162
uint64_t micros = parsed.GetMicros();
7263
calendar->set(UCAL_EXTENDED_YEAR, parsed.data[0]); // strptime doesn't understand eras
7364
calendar->set(UCAL_MONTH, parsed.data[1] - 1);
@@ -110,6 +101,11 @@ struct ICUStrptime : public ICUDateFunc {
110101
if (parsed.is_special) {
111102
return parsed.ToTimestamp();
112103
} else {
104+
// Set TZ first, if any.
105+
if (!parsed.tz.empty()) {
106+
SetTimeZone(calendar, parsed.tz);
107+
}
108+
113109
return GetTime(calendar, ToMicros(calendar, parsed, format));
114110
}
115111
}
@@ -143,7 +139,7 @@ struct ICUStrptime : public ICUDateFunc {
143139
if (format.Parse(input, parsed)) {
144140
if (parsed.is_special) {
145141
return parsed.ToTimestamp();
146-
} else {
142+
} else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) {
147143
timestamp_t result;
148144
if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
149145
return result;

src/duckdb/extension/icu/include/icu-datefunc.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ struct ICUDateFunc {
4949
static duckdb::unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
5050
vector<duckdb::unique_ptr<Expression>> &arguments);
5151

52-
//! Sets the time zone for the calendar.
52+
//! Tries to set the time zone for the calendar and returns false if it is not valid.
53+
static bool TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
54+
//! Sets the time zone for the calendar. Throws if it is not valid
5355
static void SetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
5456
//! Gets the timestamp from the calendar, throwing if it is not in range.
5557
static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result);

src/duckdb/extension/json/buffered_json_reader.cpp

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -90,22 +90,16 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
9090
optional_ptr<FileHandle> override_handle) {
9191
if (size != 0) {
9292
auto &handle = override_handle ? *override_handle.get() : *file_handle.get();
93-
if (can_seek) {
94-
handle.Read(pointer, size, position);
95-
} else if (sample_run) { // Cache the buffer
96-
handle.Read(pointer, size, position);
9793

94+
if (!cached_buffers.empty() || position < cached_size) {
95+
ReadFromCache(pointer, size, position);
96+
}
97+
98+
handle.Read(pointer, size, position);
99+
if (file_handle->IsPipe()) { // Cache the buffer
98100
cached_buffers.emplace_back(allocator.Allocate(size));
99101
memcpy(cached_buffers.back().get(), pointer, size);
100102
cached_size += size;
101-
} else {
102-
if (!cached_buffers.empty() || position < cached_size) {
103-
ReadFromCache(pointer, size, position);
104-
}
105-
106-
if (size != 0) {
107-
handle.Read(pointer, size, position);
108-
}
109103
}
110104
}
111105

@@ -121,30 +115,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
121115

122116
bool JSONFileHandle::Read(char *pointer, idx_t &read_size, idx_t requested_size, bool &file_done, bool sample_run) {
123117
D_ASSERT(requested_size != 0);
118+
read_size = 0;
124119
if (last_read_requested) {
125120
return false;
126121
}
127122

128-
if (can_seek) {
129-
read_size = ReadInternal(pointer, requested_size);
130-
read_position += read_size;
131-
} else if (sample_run) { // Cache the buffer
132-
read_size = ReadInternal(pointer, requested_size);
133-
if (read_size > 0) {
134-
cached_buffers.emplace_back(allocator.Allocate(read_size));
135-
memcpy(cached_buffers.back().get(), pointer, read_size);
136-
}
137-
cached_size += read_size;
138-
read_position += read_size;
139-
} else {
140-
read_size = 0;
141-
if (!cached_buffers.empty() || read_position < cached_size) {
142-
read_size += ReadFromCache(pointer, requested_size, read_position);
143-
}
144-
if (requested_size != 0) {
145-
read_size += ReadInternal(pointer, requested_size);
146-
}
123+
if (!cached_buffers.empty() || read_position < cached_size) {
124+
read_size += ReadFromCache(pointer, requested_size, read_position);
125+
}
126+
127+
auto temp_read_size = ReadInternal(pointer, requested_size);
128+
if (file_handle->IsPipe() && temp_read_size != 0) { // Cache the buffer
129+
cached_buffers.emplace_back(allocator.Allocate(temp_read_size));
130+
memcpy(cached_buffers.back().get(), pointer, temp_read_size);
147131
}
132+
cached_size += temp_read_size;
133+
read_position += temp_read_size;
134+
read_size += temp_read_size;
148135

149136
if (read_size == 0) {
150137
last_read_requested = true;

src/duckdb/extension/json/json_extension.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,17 @@
1717
namespace duckdb {
1818

1919
static DefaultMacro json_macros[] = {
20-
{DEFAULT_SCHEMA, "json_group_array", {"x", nullptr}, {{nullptr, nullptr}}, "to_json(list(x))"},
20+
{DEFAULT_SCHEMA,
21+
"json_group_array",
22+
{"x", nullptr},
23+
{{nullptr, nullptr}},
24+
"CAST('[' || string_agg(CASE WHEN x IS NULL THEN 'null'::JSON ELSE to_json(x) END, ',') || ']' AS JSON)"},
2125
{DEFAULT_SCHEMA,
2226
"json_group_object",
23-
{"name", "value", nullptr},
27+
{"n", "v", nullptr},
2428
{{nullptr, nullptr}},
25-
"to_json(map(list(name), list(value)))"},
29+
"CAST('{' || string_agg(to_json(n::VARCHAR) || ':' || CASE WHEN v IS NULL THEN 'null'::JSON ELSE to_json(v) END, "
30+
"',') || '}' AS JSON)"},
2631
{DEFAULT_SCHEMA,
2732
"json_group_structure",
2833
{"x", nullptr},

src/duckdb/extension/parquet/column_reader.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,8 @@ void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
319319

320320
auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes;
321321

322-
AllocateCompressed(compressed_bytes);
322+
ResizeableBuffer compressed_buffer;
323+
compressed_buffer.resize(GetAllocator(), compressed_bytes);
323324
reader.ReadData(*protocol, compressed_buffer.ptr, compressed_bytes);
324325

325326
DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes, block->ptr + uncompressed_bytes,
@@ -334,10 +335,6 @@ void ColumnReader::AllocateBlock(idx_t size) {
334335
}
335336
}
336337

337-
void ColumnReader::AllocateCompressed(idx_t size) {
338-
compressed_buffer.resize(GetAllocator(), size);
339-
}
340-
341338
void ColumnReader::PreparePage(PageHeader &page_hdr) {
342339
AllocateBlock(page_hdr.uncompressed_page_size + 1);
343340
if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
@@ -348,7 +345,8 @@ void ColumnReader::PreparePage(PageHeader &page_hdr) {
348345
return;
349346
}
350347

351-
AllocateCompressed(page_hdr.compressed_page_size + 1);
348+
ResizeableBuffer compressed_buffer;
349+
compressed_buffer.resize(GetAllocator(), page_hdr.compressed_page_size + 1);
352350
reader.ReadData(*protocol, compressed_buffer.ptr, page_hdr.compressed_page_size);
353351

354352
DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr,

0 commit comments

Comments
 (0)