Skip to content

Commit 69b2928

Browse files
authored
No dupe citations (#286)
1 parent cf63843 commit 69b2928

File tree

4 files changed

+30
-23
lines changed

4 files changed

+30
-23
lines changed

ai_chatbots/chatbots_test.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -348,16 +348,15 @@ async def test_syllabus_bot_tool(
348348
}
349349
for resource in raw_results
350350
],
351-
"citation_sources": [
352-
{
353-
"id": resource.get("resource_point_id"),
351+
"citation_sources": {
352+
resource.get("resource_point_id"): {
354353
"citation_title": resource.get("title")
355354
or resource.get("content_title"),
356355
"citation_url": resource.get("url"),
357356
}
358357
for resource in raw_results
359358
if resource.get("url")
360-
],
359+
},
361360
"metadata": {},
362361
}
363362

ai_chatbots/tools.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def _content_file_search(url, params, *, exclude_canvas=True):
249249
raw_results = response.json().get("results", [])
250250
# Simplify the response to only include the main properties
251251
simplified_results = []
252-
citations = []
252+
citations = {}
253253
for result in raw_results:
254254
platform = result.get("platform", {}).get("code")
255255
# Currently, canvas contentfiles have blank platform values,
@@ -262,15 +262,12 @@ def _content_file_search(url, params, *, exclude_canvas=True):
262262
"run_title": result.get("run_title"),
263263
}
264264
simplified_results.append(simplified_result)
265-
if result.get("url"):
266-
citations.append(
267-
{
268-
"id": result["resource_point_id"],
269-
"citation_url": result.get("url"),
270-
"citation_title": result.get("title")
271-
or result.get("content_title"),
272-
}
273-
)
265+
if result.get("url") and not citations.get(result["resource_point_id"]):
266+
citations[result["resource_point_id"]] = {
267+
"citation_url": result.get("url"),
268+
"citation_title": result.get("title")
269+
or result.get("content_title"),
270+
}
274271
full_output = {
275272
"results": simplified_results,
276273
"citation_sources": citations,

ai_chatbots/tools_test.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,20 @@ def test_search_content_files( # noqa: PLR0913
155155
)
156156
assert len(results["results"]) == len(content_chunk_results["results"])
157157
assert len(results["citation_sources"]) == len(
158-
[result for result in content_chunk_results["results"] if result["url"]]
158+
{
159+
result["resource_point_id"]
160+
for result in content_chunk_results["results"]
161+
if result["url"]
162+
}
159163
)
160164
for idx, result in enumerate(content_chunk_results["results"]):
161165
if content_chunk_results["results"][idx]["url"]:
162-
assert {
163-
"id": result["resource_point_id"],
166+
assert results["citation_sources"][
167+
content_chunk_results["results"][idx]["resource_point_id"]
168+
] == {
164169
"citation_url": result.get("url"),
165170
"citation_title": (result.get("title") or result["content_title"]),
166-
} in results["citation_sources"]
171+
}
167172

168173

169174
@pytest.mark.parametrize("exclude_canvas", [True, False])
@@ -190,7 +195,13 @@ def test_search_canvas_content_files(
190195
len(content_chunk_results["results"]) if not exclude_canvas else 0
191196
)
192197
assert len(results["citation_sources"]) == (
193-
len([result for result in content_chunk_results["results"] if result["url"]])
198+
len(
199+
{
200+
result["resource_point_id"]
201+
for result in content_chunk_results["results"]
202+
if result["url"]
203+
}
204+
)
194205
if not exclude_canvas
195206
else 0
196207
)

test_json/content_file_chunks.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"course_number": ["MITx+6.00.1x"],
3535
"description": null,
3636
"key": "2cab2fd265631bc527845acf2027686c",
37-
"resource_point_id": "3c77d1af-8a99-58ea-bc3a-67691b5c6ca6",
37+
"resource_point_id": "4c77d1af-8a99-58ea-bc3a-67691b5c6ca6",
3838
"url": "http://ocw.mit.edu/fake/url/2/",
3939
"file_extension": ".xml",
4040
"resource_readable_id": "MITx+6.00.1x",
@@ -61,7 +61,7 @@
6161
"course_number": ["MITx+6.00.1x"],
6262
"description": null,
6363
"key": "fb9fa09c795cd17544d2e5c917db2dca",
64-
"resource_point_id": "3c77d1af-8a99-58ea-bc3a-67691b5c6ca6",
64+
"resource_point_id": "5c77d1af-8a99-58ea-bc3a-67691b5c6ca6",
6565
"url": "http://ocw.mit.edu/fake/url/3/",
6666
"file_extension": ".xml",
6767
"resource_readable_id": "MITx+6.00.1x",
@@ -88,8 +88,8 @@
8888
"course_number": ["MITx+6.00.1x"],
8989
"description": null,
9090
"key": "d5dab5756147a6466f96284a68df85b8",
91-
"resource_point_id": "3c77d1af-8a99-58ea-bc3a-67691b5c6ca6",
92-
"url": null,
91+
"resource_point_id": "5c77d1af-8a99-58ea-bc3a-67691b5c6ca6",
92+
"url": "http://ocw.mit.edu/fake/url/3/",
9393
"file_extension": ".xml",
9494
"resource_readable_id": "MITx+6.00.1x",
9595
"run_title": "Introduction to Computer Science and Programming Using Python",

0 commit comments

Comments
 (0)