From ad8722f3d10dc683f93a276e8f9ca6808702a41d Mon Sep 17 00:00:00 2001 From: doomedraven Date: Wed, 2 Jul 2025 08:56:05 +0200 Subject: [PATCH 1/3] handle strings size is bigger than 16mb --- dev_utils/mongo_hooks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dev_utils/mongo_hooks.py b/dev_utils/mongo_hooks.py index 6270a86763a..4791e63938e 100644 --- a/dev_utils/mongo_hooks.py +++ b/dev_utils/mongo_hooks.py @@ -1,3 +1,4 @@ +import sys import itertools import logging @@ -68,6 +69,10 @@ def normalize_file(file_dict, task_id): new_dict["_id"] = key file_dict[FILE_REF_KEY] = key + # 16MB limit handling + if new_dict.get("strings") and new_dict["strings"] and sys.getsizeof(new_dict["strings"]) > 16793600: + log.warning("strings are bigger than 16MB, truncating") + new_dict["strings"] = [] return UpdateOne({"_id": key}, {"$set": new_dict, "$addToSet": {TASK_IDS_KEY: task_id}}, upsert=True, hint=[("_id", 1)]) From d1531ab2fdb571fa6025f01813b453bb7f6de09f Mon Sep 17 00:00:00 2001 From: doomedraven Date: Wed, 2 Jul 2025 09:36:36 +0200 Subject: [PATCH 2/3] Update dev_utils/mongo_hooks.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- dev_utils/mongo_hooks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev_utils/mongo_hooks.py b/dev_utils/mongo_hooks.py index 4791e63938e..7faaa82e219 100644 --- a/dev_utils/mongo_hooks.py +++ b/dev_utils/mongo_hooks.py @@ -70,8 +70,9 @@ def normalize_file(file_dict, task_id): new_dict["_id"] = key file_dict[FILE_REF_KEY] = key # 16MB limit handling - if new_dict.get("strings") and new_dict["strings"] and sys.getsizeof(new_dict["strings"]) > 16793600: - log.warning("strings are bigger than 16MB, truncating") + strings_val = new_dict.get("strings") + if strings_val and len(BSON.encode({"strings": strings_val})) > 15 * 1024 * 1024: + log.warning("strings field is too large (>15MB), truncating to avoid MongoDB document size error") new_dict["strings"] = [] return UpdateOne({"_id": key}, {"$set": new_dict, "$addToSet": {TASK_IDS_KEY: task_id}}, upsert=True, hint=[("_id", 1)]) From f561a44b42137b7381ced50d157d355d696ab5ce Mon Sep 17 00:00:00 2001 From: doomedraven Date: Wed, 2 Jul 2025 09:41:43 +0200 Subject: [PATCH 3/3] Update mongo_hooks.py --- dev_utils/mongo_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev_utils/mongo_hooks.py b/dev_utils/mongo_hooks.py index 7faaa82e219..ef45fa7852c 100644 --- a/dev_utils/mongo_hooks.py +++ b/dev_utils/mongo_hooks.py @@ -1,8 +1,8 @@ -import sys import itertools import logging from pymongo import UpdateOne, errors +from bson import BSON from dev_utils.mongodb import ( mongo_bulk_write,