Skip to content

Deadlock when inserting data concurrently(批量插入数据时死锁) #17

@Sube-py

Description

@Sube-py

Concurrent size: about 10
Tokenizer: jieba(use custom model, disable trigger)
pg log:

6373::JSONB, $6374::JSONB, $6375, $6376::VARCHAR, $6377, $6378::INTEGER, $6379::INTEGER, nextval('embeddings_id_seq'), $6380::INTEGER, 637), ($6381::BIGINT, $6382::INTEGER[], $6383::JSONB, $6384::JSONB, $6385, $6386::VARCHAR, $6387, $6388::INTEGER, $6389::INTEGER, nextval('embeddings_id_seq'), $6390::INTEGER, 638), ($6391::BIGINT, $6392::INTEGER[], $6393::JSONB, $6394::JSONB, $6395, $6396::VARCHAR, $6397, $6398::INTEGER, $6399::INTEGER, nextval('embeddings_id_seq'), $6400::INTEGER, 639)) AS imp_sen(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, sen_counter) ORDER BY sen_counter RETURNING embeddings.id, embeddings.id AS id__1
2025-07-04 07:18:24.113 UTC [469] ERROR:  canceling statement due to user request
2025-07-04 07:18:24.113 UTC [469] CONTEXT:  SQL statement "
	    WITH 
	    new_tokens AS (
	        SELECT unnest(tokenizer_catalog.apply_text_analyzer_for_custom_model($1.text, 'jieba_analyzer')) AS token
	    ),
	    to_insert AS (
	        SELECT token FROM new_tokens
	        WHERE NOT EXISTS (
	            SELECT 1 FROM tokenizer_catalog."model_jieba_model" WHERE token = new_tokens.token
	        )
	    )
	    INSERT INTO tokenizer_catalog."model_jieba_model" (token) SELECT token FROM to_insert ON CONFLICT (token) DO NOTHING"
	PL/pgSQL function tokenizer_catalog.custom_model_insert_trigger() line 7 at EXECUTE
2025-07-04 07:18:24.113 UTC [469] STATEMENT:  INSERT INTO embeddings (upload_id, pages, rects, content, embedding, text, bm25_embedding, created_at, updated_at, id, deleted_at) SELECT p0::BIGINT, p1::INTEGER[], p2::JSONB, p3::JSONB, p4::VECTOR(1536), p5::VARCHAR, p6::bm25vector, p7::INTEGER, p8::INTEGER, p9::BIGINT, p10::INTEGER FROM (VALUES ($1::BIGINT, $2::INTEGER[], 

file: src/model/custom.rs

pgrx::extension_sql!(
    r#"
CREATE FUNCTION custom_model_insert_trigger()
RETURNS TRIGGER AS $$
DECLARE
    tokenizer_name TEXT := TG_ARGV[0];
    target_column TEXT := TG_ARGV[1];
    text_analyzer TEXT := TG_ARGV[2];
BEGIN
    EXECUTE format('
    WITH 
    new_tokens AS (
        SELECT unnest(tokenizer_catalog.apply_text_analyzer_for_custom_model($1.%I, %L)) AS token
    ),
    to_insert AS (
        SELECT token FROM new_tokens
        WHERE NOT EXISTS (
            SELECT 1 FROM tokenizer_catalog."model_%s" WHERE token = new_tokens.token
        )
    )
    INSERT INTO tokenizer_catalog."model_%s" (token) SELECT token FROM to_insert ON CONFLICT (token) DO NOTHING', target_column, text_analyzer, tokenizer_name, tokenizer_name) USING NEW;
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;
    "#,
    name = "custom_model_insert_trigger"
);

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions