-
Notifications
You must be signed in to change notification settings - Fork 6
Open
Description
Concurrent size: about 10
Tokenizer: jieba(use custom model, disable trigger)
pg log:
6373::JSONB, $6374::JSONB, $6375, $6376::VARCHAR, $6377, $6378::INTEGER, $6379::INTEGER, nextval('embeddings_id_seq'), $6380::INTEGER, 637), ($6381::BIGINT, $6382::INTEGER[], $6383::JSONB, $6384::JSONB, $6385, $6386::VARCHAR, $6387, $6388::INTEGER, $6389::INTEGER, nextval('embeddings_id_seq'), $6390::INTEGER, 638), ($6391::BIGINT, $6392::INTEGER[], $6393::JSONB, $6394::JSONB, $6395, $6396::VARCHAR, $6397, $6398::INTEGER, $6399::INTEGER, nextval('embeddings_id_seq'), $6400::INTEGER, 639)) AS imp_sen(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, sen_counter) ORDER BY sen_counter RETURNING embeddings.id, embeddings.id AS id__1
2025-07-04 07:18:24.113 UTC [469] ERROR: canceling statement due to user request
2025-07-04 07:18:24.113 UTC [469] CONTEXT: SQL statement "
WITH
new_tokens AS (
SELECT unnest(tokenizer_catalog.apply_text_analyzer_for_custom_model($1.text, 'jieba_analyzer')) AS token
),
to_insert AS (
SELECT token FROM new_tokens
WHERE NOT EXISTS (
SELECT 1 FROM tokenizer_catalog."model_jieba_model" WHERE token = new_tokens.token
)
)
INSERT INTO tokenizer_catalog."model_jieba_model" (token) SELECT token FROM to_insert ON CONFLICT (token) DO NOTHING"
PL/pgSQL function tokenizer_catalog.custom_model_insert_trigger() line 7 at EXECUTE
2025-07-04 07:18:24.113 UTC [469] STATEMENT: INSERT INTO embeddings (upload_id, pages, rects, content, embedding, text, bm25_embedding, created_at, updated_at, id, deleted_at) SELECT p0::BIGINT, p1::INTEGER[], p2::JSONB, p3::JSONB, p4::VECTOR(1536), p5::VARCHAR, p6::bm25vector, p7::INTEGER, p8::INTEGER, p9::BIGINT, p10::INTEGER FROM (VALUES ($1::BIGINT, $2::INTEGER[],
file: src/model/custom.rs
pgrx::extension_sql!(
r#"
CREATE FUNCTION custom_model_insert_trigger()
RETURNS TRIGGER AS $$
DECLARE
tokenizer_name TEXT := TG_ARGV[0];
target_column TEXT := TG_ARGV[1];
text_analyzer TEXT := TG_ARGV[2];
BEGIN
EXECUTE format('
WITH
new_tokens AS (
SELECT unnest(tokenizer_catalog.apply_text_analyzer_for_custom_model($1.%I, %L)) AS token
),
to_insert AS (
SELECT token FROM new_tokens
WHERE NOT EXISTS (
SELECT 1 FROM tokenizer_catalog."model_%s" WHERE token = new_tokens.token
)
)
INSERT INTO tokenizer_catalog."model_%s" (token) SELECT token FROM to_insert ON CONFLICT (token) DO NOTHING', target_column, text_analyzer, tokenizer_name, tokenizer_name) USING NEW;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
"#,
name = "custom_model_insert_trigger"
);Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels