diff --git a/04-Complex-Docs.ipynb b/04-Complex-Docs.ipynb index 4633c1e0..c125bbd1 100644 --- a/04-Complex-Docs.ipynb +++ b/04-Complex-Docs.ipynb @@ -106,7 +106,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Uploading Files: 100%|████████████████████████████████████████████████| 4/4 [00:02<00:00, 1.42it/s]" + "Uploading Files: 100%|████████████████████████████████████████████████| 4/4 [00:05<00:00, 1.43s/it]" ] }, { @@ -114,8 +114,8 @@ "output_type": "stream", "text": [ "Temp Folder: ./data/temp_extract removed\n", - "CPU times: user 315 ms, sys: 190 ms, total: 506 ms\n", - "Wall time: 4.33 s\n" + "CPU times: user 359 ms, sys: 244 ms, total: 603 ms\n", + "Wall time: 6.09 s\n" ] }, { @@ -173,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "0e0c21a6-bf09-48ca-b47c-27b8a2045d45", "metadata": { "tags": [] @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "050418ea-9b0e-4c76-8a11-e59b3d4429a0", "metadata": { "tags": [] @@ -198,10 +198,11 @@ "text": [ "Extracting Text from books/Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf ...\n", "Extracting text using PyPDF\n", - "Parsing took: 1.727564 seconds\n", + "Parsing took: 2.158757 seconds\n", "books/Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf contained 357 pages\n", "\n", - "Extracting Text from books/Fundamentals_of_Physics_Textbook.pdf ...\n" + "Extracting Text from books/Fundamentals_of_Physics_Textbook.pdf ...\n", + "Extracting text using PyPDF\n" ] }, { @@ -297,18 +298,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Extracting text using PyPDF\n", - "Parsing took: 98.872403 seconds\n", + "Parsing took: 109.123985 seconds\n", "books/Fundamentals_of_Physics_Textbook.pdf contained 1450 pages\n", "\n", "Extracting Text from books/Made_To_Stick.pdf ...\n", "Extracting text using PyPDF\n", - "Parsing took: 6.983265 seconds\n", + "Parsing took: 8.691508 seconds\n", "books/Made_To_Stick.pdf contained 225 pages\n", "\n", "Extracting Text from books/Pere_Riche_Pere_Pauvre.pdf ...\n", "Extracting text using PyPDF\n", - "Parsing took: 0.775370 seconds\n", + "Parsing took: 0.953015 seconds\n", "books/Pere_Riche_Pere_Pauvre.pdf contained 225 pages\n", "\n" ] @@ -356,7 +356,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "f2a5d62f-b664-4662-a6c9-a1eb2a3c5e11", "metadata": {}, "outputs": [ @@ -365,24 +365,25 @@ "output_type": "stream", "text": [ "books/Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf \n", - " chunk text: 14\n", - "11:59 A.M.\n", - "The rest of Sherrie’s morning proceeded fairly well. A tal-\n", - "ented fashion designer, Sherrie had an unerrin ...\n", + " chunk text: 24\n", + "tremendous confusion about when it is biblically appropriate to\n", + "set limits. When confronted with their lack of bounda ...\n", "\n", "books/Fundamentals_of_Physics_Textbook.pdf \n", - " chunk text: 71-3 MASS\n", - "international agreement, a mass of 1 kilogram. Accurate copies have been sent\n", - "to standardizing laboratories in ...\n", + " chunk text: 12 CHAPTER 1 MEASUREMENT\n", + "51 The cubit is an ancient unit of length based on the distance\n", + "between the elbow and the tip o ...\n", "\n", "books/Made_To_Stick.pdf \n", - " chunk text: of both fruits rather than all of either. Should we trade? If so, how do \n", - "we go about doing it?\" \n", - "Students are initiall ...\n", + " chunk text: How do we find the essential core of our ideas? A s uccessful defense \n", + "lawyer says, \"If you argue ten points, even if ea ...\n", "\n", "books/Pere_Riche_Pere_Pauvre.pdf \n", - " chunk text: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ...\n", + " chunk text: ~~~~~~~~~~~\n", + "~~~~~~~~~~~~~~~~~~~~~~~\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "~\n", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ...\n", "\n" ] } @@ -403,7 +404,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "801c6bc2-467c-4418-aa7e-ef89a1e20e1c", "metadata": {}, "outputs": [ @@ -412,11 +413,11 @@ "output_type": "stream", "text": [ "Extracting text using Azure Document Intelligence\n", - "Parsing took: 45.923739 seconds\n", + "Parsing took: 51.204662 seconds\n", "books/Pere_Riche_Pere_Pauvre.pdf contained 225 pages\n", "\n", - "CPU times: user 11.7 s, sys: 184 ms, total: 11.9 s\n", - "Wall time: 46 s\n" + "CPU times: user 13.1 s, sys: 437 ms, total: 13.5 s\n", + "Wall time: 51.2 s\n" ] } ], @@ -444,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "97f9c5bb-c44b-4a4d-9780-591f9f8d128a", "metadata": {}, "outputs": [ @@ -453,7 +454,7 @@ "output_type": "stream", "text": [ "books/Pere_Riche_Pere_Pauvre.pdf \n", - " chunk text: « Comment donc devrions-nous nous y prendre pour faire de l'argent? » demanda Mi ...\n", + " chunk text: La principale inquiétude de Robert était l'écart croissant entre les riches et l ...\n", "\n" ] } @@ -483,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "594ff0d4-56e3-4bed-843d-28c7a092069b", "metadata": {}, "outputs": [], @@ -497,7 +498,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "7d46e7c5-49c4-40f3-bb2d-79a9afeab4b1", "metadata": {}, "outputs": [], @@ -507,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "1b07e84b-d306-4bc9-9124-e64f252dd7b2", "metadata": {}, "outputs": [], @@ -554,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "2df4db6b-969b-4b91-963f-9334e17a4e3c", "metadata": {}, "outputs": [ @@ -562,7 +563,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "201\n", + "204\n", "True\n" ] } @@ -706,13 +707,22 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "a94911cf-c95f-4306-8574-b56296f29b88", "metadata": {}, "outputs": [], "source": [ - "# Function to process a batch of pages\n", - "def process_batch(bookname, pages):\n", + "def process_batch(bookname, pages, batch_id=None, max_retries=3, backoff=5):\n", + " \"\"\"\n", + " Function to process a batch of pages\n", + " This function will take a book name, a list of pages, and an optional batch ID.\n", + " It will embed the pages, create a payload for Azure Search, and upload the data.\n", + " If the upload fails, it will retry a specified number of times with exponential backoff.\n", + " It will also save the failed batch to a file for later inspection, ONLY if the all retries fail.\"\"\"\n", + " \n", + " failed_batches_dir = \"failed_batches\"\n", + " os.makedirs(failed_batches_dir, exist_ok=True)\n", + "\n", " try:\n", " contents = [page[2] for page in pages]\n", " chunk_vectors = embedder.embed_documents(contents)\n", @@ -735,20 +745,40 @@ " }\n", " upload_payload[\"value\"].append(payload)\n", " \n", - " r = requests.post(os.environ['AZURE_SEARCH_ENDPOINT'] + \"/indexes/\" + book_index_name + \"/docs/index\",\n", - " data=json.dumps(upload_payload), headers=headers, params=params)\n", - " if r.status_code != 200:\n", - " print(f\"Failed to upload batch of pages from {bookname}: {r.status_code}\")\n", - " print(r.text)\n", + " for attempt in range(1, max_retries + 1):\n", + " try:\n", + " r = requests.post(\n", + " os.environ['AZURE_SEARCH_ENDPOINT'] + \"/indexes/\" + book_index_name + \"/docs/index\",\n", + " data=json.dumps(upload_payload),\n", + " headers=headers,\n", + " params=params,\n", + " timeout=30\n", + " )\n", + " if r.status_code == 200:\n", + " print(f\"[{bookname}][batch {batch_id}] ✅ Upload successful\")\n", + " return\n", + " else:\n", + " print(f\"[{bookname}][batch {batch_id}] ⚠️ Attempt {attempt} failed: {r.status_code} - {r.text}\")\n", + " except Exception as e:\n", + " print(f\"[{bookname}][batch {batch_id}] ❗ Attempt {attempt} raised exception: {e}\")\n", + " time.sleep(backoff * attempt)\n", + "\n", + " # Save failed batch\n", + " failed_path = os.path.join(\n", + " failed_batches_dir,\n", + " f\"failed_batch_{bookname.replace('/', '_')}_batch_{batch_id}.json\"\n", + " )\n", + " with open(failed_path, 'w') as f:\n", + " json.dump(upload_payload, f, indent=2)\n", + " print(f\"[{bookname}][batch {batch_id}] ❌ Upload failed after {max_retries} attempts. Saved to {failed_path}\")\n", + "\n", " except Exception as e:\n", - " print(f\"Exception processing batch of pages from {bookname}: {e}\")\n", - " time.sleep(10) # Wait before retrying\n", - " process_batch(bookname, pages) # Retry the same batch" + " print(f\"[{bookname}][batch {batch_id}] 🚨 Unexpected error: {e}\")" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "id": "793a3171-f8f0-4070-8a54-8a540828333c", "metadata": {}, "outputs": [ @@ -763,13 +793,70 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 5/5 [00:26<00:00, 5.30s/it]\n" + " 20%|██ | 1/5 [00:07<00:31, 7.77s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf][batch 0] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 40%|████ | 2/5 [00:14<00:20, 6.87s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf][batch 75] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 60%|██████ | 3/5 [00:20<00:13, 6.55s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf][batch 150] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 80%|████████ | 4/5 [01:10<00:23, 23.94s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf][batch 225] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 5/5 [01:15<00:00, 15.10s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "[books/Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf][batch 300] ✅ Upload successful\n", "Uploading chunks from books/Fundamentals_of_Physics_Textbook.pdf\n" ] }, @@ -777,13 +864,280 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 20/20 [06:04<00:00, 18.22s/it]\n" + " 5%|▌ | 1/20 [01:11<22:38, 71.49s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 0] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 10%|█ | 2/20 [02:04<18:12, 60.69s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 75] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 15%|█▌ | 3/20 [03:03<16:56, 59.79s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 150] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 20%|██ | 4/20 [04:05<16:09, 60.62s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 225] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 25%|██▌ | 5/20 [05:07<15:16, 61.09s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 300] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 30%|███ | 6/20 [06:09<14:20, 61.47s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 375] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 35%|███▌ | 7/20 [07:11<13:20, 61.56s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 450] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 40%|████ | 8/20 [08:12<12:19, 61.61s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 525] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 45%|████▌ | 9/20 [09:15<11:19, 61.79s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 600] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 50%|█████ | 10/20 [10:16<10:18, 61.82s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 675] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 55%|█████▌ | 11/20 [11:19<09:17, 61.91s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 750] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 60%|██████ | 12/20 [12:20<08:15, 61.89s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 825] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 65%|██████▌ | 13/20 [13:27<07:23, 63.35s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 900] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 70%|███████ | 14/20 [14:24<06:08, 61.49s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 975] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 75%|███████▌ | 15/20 [15:26<05:08, 61.60s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 1050] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 80%|████████ | 16/20 [16:28<04:06, 61.66s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 1125] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 85%|████████▌ | 17/20 [17:30<03:05, 61.83s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 1200] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 90%|█████████ | 18/20 [18:32<02:03, 61.77s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 1275] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 95%|█████████▌| 19/20 [20:36<01:20, 80.66s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 1350] 🚨 Unexpected error: Error code: 429 - {'error': {'code': '429', 'message': 'Requests to the Embeddings_Create Operation under Azure OpenAI API version 2024-10-01-preview have exceeded call rate limit of your current AIServices S0 pricing tier. Please retry after 60 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit. For Free Account customers, upgrade to Pay as you Go here: https://aka.ms/429TrialUpgrade.'}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 20/20 [20:41<00:00, 62.09s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Fundamentals_of_Physics_Textbook.pdf][batch 1425] ✅ Upload successful\n", "Uploading chunks from books/Made_To_Stick.pdf\n" ] }, @@ -791,13 +1145,42 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 3/3 [00:17<00:00, 5.91s/it]\n" + " 33%|███▎ | 1/3 [00:06<00:13, 6.70s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "[books/Made_To_Stick.pdf][batch 0] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 67%|██████▋ | 2/3 [01:04<00:36, 36.59s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Made_To_Stick.pdf][batch 75] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 3/3 [01:10<00:00, 23.54s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Made_To_Stick.pdf][batch 150] ✅ Upload successful\n", "Uploading chunks from books/Pere_Riche_Pere_Pauvre.pdf\n" ] }, @@ -805,15 +1188,44 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 3/3 [00:40<00:00, 13.57s/it]" + " 33%|███▎ | 1/3 [00:57<01:55, 57.95s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 13.9 s, sys: 196 ms, total: 14.1 s\n", - "Wall time: 7min 29s\n" + "[books/Pere_Riche_Pere_Pauvre.pdf][batch 0] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 67%|██████▋ | 2/3 [01:04<00:27, 27.72s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Pere_Riche_Pere_Pauvre.pdf][batch 75] ✅ Upload successful\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 3/3 [01:59<00:00, 39.94s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[books/Pere_Riche_Pere_Pauvre.pdf][batch 150] ✅ Upload successful\n", + "CPU times: user 12 s, sys: 2.32 s, total: 14.3 s\n", + "Wall time: 25min 7s\n" ] }, { @@ -827,11 +1239,54 @@ "source": [ "%%time\n", "for bookname, bookmap in book_pages_map.items():\n", - " print(\"Uploading chunks from\", bookname)\n", - " # Split bookmap into chunks of size chunk_size\n", - " for i in tqdm(range(0, len(bookmap), batch_size)):\n", - " batch = bookmap[i:i + batch_size]\n", - " process_batch(bookname, batch)" + " print(\"Uploading chunks from\", bookname)\n", + " for i in tqdm(range(0, len(bookmap), batch_size)):\n", + " batch = bookmap[i:i + batch_size]\n", + " process_batch(bookname, batch, batch_id=i)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9afb037b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "🔁 Retrying failed batches...\n" + ] + } + ], + "source": [ + "# This is a simple retry mechanism for failed batches, in case there are failed batches in the folder failed_batches after the previous step, uncomment the code below to retry them.\n", + "# It assumes that the failed batches are saved in a directory called \"failed_batches\"\n", + "\n", + "# import glob\n", + "\n", + "# print(\"\\n🔁 Retrying failed batches...\")\n", + "# for path in glob.glob(\"failed_batches/failed_batch_*.json\"):\n", + "# print(f\"Retrying: {path}\")\n", + "# with open(path) as f:\n", + "# payload = json.load(f)\n", + "\n", + "# try:\n", + "# r = requests.post(\n", + "# os.environ['AZURE_SEARCH_ENDPOINT'] + \"/indexes/\" + book_index_name + \"/docs/index\",\n", + "# data=json.dumps(payload),\n", + "# headers=headers,\n", + "# params=params,\n", + "# timeout=30\n", + "# )\n", + "# if r.status_code == 200:\n", + "# print(f\"✅ Retry succeeded: {path}\")\n", + "# os.remove(path) # Clean up if retry successful\n", + "# else:\n", + "# print(f\"❌ Retry failed ({r.status_code}): {r.text}\")\n", + "# except Exception as e:\n", + "# print(f\"🚨 Retry exception: {e}\")" ] }, { @@ -1053,9 +1508,9 @@ ], "metadata": { "kernelspec": { - "display_name": "GPTSearch3 (Python 3.12)", + "display_name": ".venv", "language": "python", - "name": "gptsearch3" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1067,7 +1522,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.12.10" } }, "nbformat": 4,