diff --git a/api/tests.py b/api/tests.py index b9c9683d10..6d27febbde 100644 --- a/api/tests.py +++ b/api/tests.py @@ -4,6 +4,7 @@ from reader.tests import SefariaTestCase import json from api.api_warnings import APIWarningCode +from sefaria.system.database import QueryCounter c = Client() @@ -224,3 +225,253 @@ def test_error_return_format(self): self.assertEqual(400, response.status_code) data = json.loads(response.content) self.assertEqual(data['error'], "return_format should be one of those formats: ['default', 'wrap_all_entities', 'text_only', 'strip_only_footnotes'].") + + +class APIRefTests(SefariaTestCase): + + def get_ref(self, tref, max_mongo_calls=1): + QueryCounter.reset(tracked_commands={'find', 'aggregate', 'count', 'distinct'}) + response = c.get(f'/api/ref/{tref}') + data = json.loads(response.content) + if max_mongo_calls is not None: + if QueryCounter.count > max_mongo_calls: + for i, q in enumerate(QueryCounter.queries): + print(f"\n--- Query {i+1}: {q['command']} on {q['collection']} ---") + print(q['traceback']) + self.assertLessEqual(QueryCounter.count, max_mongo_calls, + f"Expected at most {max_mongo_calls} mongo call(s) for '{tref}', got {QueryCounter.count}") + return data + + def test_not_ref(self): + data = self.get_ref('Not Ref', max_mongo_calls=0) + self.assertFalse(data['is_ref']) + + def test_book_level_jagged_array(self): + """Penei Moshe on Jerusalem Talmud Shabbat - book-level JaggedArrayNode depth 4""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat') + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'JaggedArrayNode') + self.assertEqual(data['index_title'], 'Penei Moshe on Jerusalem Talmud Shabbat') + self.assertEqual(data['depth'], 4) + self.assertEqual(data['address_types'], ['Perek', 'Halakhah', 'Integer', 'Integer']) + self.assertEqual(data['section_names'], ['Chapter', 'Halakhah', 'Segment', 'Comment']) + self.assertEqual(data['start_indexes'], []) + self.assertEqual(data['end_indexes'], []) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'], []) + self.assertEqual(data['navigation_refs']['first_available_section_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 1:1:1') + self.assertEqual(data['navigation_refs']['first_subref'], 'Penei Moshe on Jerusalem Talmud Shabbat 1') + self.assertEqual(data['navigation_refs']['last_subref'], 'Penei Moshe on Jerusalem Talmud Shabbat 24') + self.assertNotIn('prev_section_ref', data['navigation_refs']) + self.assertNotIn('next_section_ref', data['navigation_refs']) + self.assertNotIn('prev_segment_ref', data['navigation_refs']) + self.assertNotIn('next_segment_ref', data['navigation_refs']) + + def test_one_level_below_book(self): + """Penei Moshe on Jerusalem Talmud Shabbat 2 - one level below book""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat 2') + self.assertTrue(data['is_ref']) + self.assertEqual(data['start_indexes'], [2]) + self.assertEqual(data['start_labels'], ['2']) + self.assertEqual(data['end_indexes'], [2]) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Penei Moshe on Jerusalem Talmud Shabbat') + self.assertEqual(data['navigation_refs']['first_available_section_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:1:1') + self.assertEqual(data['navigation_refs']['first_subref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:1') + self.assertEqual(data['navigation_refs']['last_subref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:7') + self.assertNotIn('prev_section_ref', data['navigation_refs']) + self.assertNotIn('next_section_ref', data['navigation_refs']) + self.assertNotIn('prev_segment_ref', data['navigation_refs']) + self.assertNotIn('next_segment_ref', data['navigation_refs']) + + def test_two_levels_below_book(self): + """Penei Moshe on Jerusalem Talmud Shabbat 3:2 - two levels below book""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat 3:2') + self.assertTrue(data['is_ref']) + self.assertEqual(data['start_indexes'], [3, 2]) + self.assertEqual(data['start_labels'], ['3', '2']) + self.assertEqual(data['end_indexes'], [3, 2]) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Penei Moshe on Jerusalem Talmud Shabbat 3') + self.assertEqual(data['navigation_refs']['first_subref'], 'Penei Moshe on Jerusalem Talmud Shabbat 3:2:1') + self.assertEqual(data['navigation_refs']['last_subref'], 'Penei Moshe on Jerusalem Talmud Shabbat 3:2:3') + self.assertNotIn('prev_section_ref', data['navigation_refs']) + self.assertNotIn('next_section_ref', data['navigation_refs']) + self.assertNotIn('prev_segment_ref', data['navigation_refs']) + self.assertNotIn('next_segment_ref', data['navigation_refs']) + + def test_section_level(self): + """Penei Moshe on Jerusalem Talmud Shabbat 2:3:2 - section-level with prev/next""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat 2:3:2') + self.assertTrue(data['is_ref']) + self.assertEqual(data['start_indexes'], [2, 3, 2]) + self.assertEqual(data['end_indexes'], [2, 3, 2]) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Penei Moshe on Jerusalem Talmud Shabbat 2:3') + self.assertEqual(data['navigation_refs']['prev_section_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:3:1') + self.assertEqual(data['navigation_refs']['next_section_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:3:3') + self.assertEqual(data['navigation_refs']['first_subref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:3:2:1') + self.assertEqual(data['navigation_refs']['last_subref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:3:2:7') + self.assertNotIn('prev_segment_ref', data['navigation_refs']) + self.assertNotIn('next_segment_ref', data['navigation_refs']) + + def test_section_level_with_cross_node_navigation(self): + """Penei Moshe on Jerusalem Talmud Shabbat 2:3:1 - section-level crossing into prev chapter""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat 2:3:1') + self.assertTrue(data['is_ref']) + self.assertEqual(data['navigation_refs']['prev_section_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:2:6') + self.assertEqual(data['navigation_refs']['next_section_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 2:3:2') + self.assertNotIn('prev_segment_ref', data['navigation_refs']) + self.assertNotIn('next_segment_ref', data['navigation_refs']) + + def test_segment_level_first(self): + """Penei Moshe on Jerusalem Talmud Shabbat 3:2:1:1 - first segment in section""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat 3:2:1:1') + self.assertTrue(data['is_ref']) + self.assertEqual(data['start_indexes'], [3, 2, 1, 1]) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Penei Moshe on Jerusalem Talmud Shabbat 3:2:1') + self.assertEqual(data['navigation_refs']['prev_segment_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 3:1:14:2') + self.assertEqual(data['navigation_refs']['next_segment_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 3:2:1:2') + self.assertNotIn('first_subref', data['navigation_refs']) + self.assertNotIn('prev_section_ref', data['navigation_refs']) + self.assertNotIn('next_section_ref', data['navigation_refs']) + + def test_segment_level_last_in_section(self): + """Penei Moshe on Jerusalem Talmud Shabbat 3:2:1:2 - last segment in section""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat 3:2:1:2') + self.assertTrue(data['is_ref']) + self.assertEqual(data['navigation_refs']['prev_segment_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 3:2:1:1') + self.assertEqual(data['navigation_refs']['next_segment_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 3:2:2:1') + self.assertNotIn('prev_section_ref', data['navigation_refs']) + self.assertNotIn('next_section_ref', data['navigation_refs']) + + def test_range_halakhah_level(self): + """Penei Moshe on Jerusalem Talmud Shabbat 3:2-4:1 - range at halakhah level""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat 3:2-4:1') + self.assertTrue(data['is_ref']) + self.assertEqual(data['start_indexes'], [3, 2]) + self.assertEqual(data['start_labels'], ['3', '2']) + self.assertEqual(data['end_indexes'], [4, 1]) + self.assertEqual(data['end_labels'], ['4', '1']) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Penei Moshe on Jerusalem Talmud Shabbat 3-4') + self.assertNotIn('first_subref', data['navigation_refs']) + self.assertNotIn('prev_section_ref', data['navigation_refs']) + self.assertNotIn('next_section_ref', data['navigation_refs']) + self.assertNotIn('prev_segment_ref', data['navigation_refs']) + self.assertNotIn('next_segment_ref', data['navigation_refs']) + + def test_range_section_level(self): + """Penei Moshe on Jerusalem Talmud Shabbat 3:2:1-4:1:1 - range at section level""" + data = self.get_ref('Penei Moshe on Jerusalem Talmud Shabbat 3:2:1-4:1:1') + self.assertTrue(data['is_ref']) + self.assertEqual(data['start_indexes'], [3, 2, 1]) + self.assertEqual(data['end_indexes'], [4, 1, 1]) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Penei Moshe on Jerusalem Talmud Shabbat 3:2-4:1') + self.assertEqual(data['navigation_refs']['prev_section_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 3:1:14') + self.assertEqual(data['navigation_refs']['next_section_ref'], 'Penei Moshe on Jerusalem Talmud Shabbat 4:1:2') + self.assertNotIn('prev_segment_ref', data['navigation_refs']) + self.assertNotIn('next_segment_ref', data['navigation_refs']) + + def test_talmud_section(self): + """Berakhot 22a - Talmud section-level ref""" + data = self.get_ref('Berakhot 22a') + self.assertTrue(data['is_ref']) + self.assertEqual(data['index_title'], 'Berakhot') + self.assertEqual(data['node_type'], 'JaggedArrayNode') + self.assertEqual(data['depth'], 2) + self.assertEqual(data['address_types'], ['Talmud', 'Integer']) + self.assertEqual(data['section_names'], ['Daf', 'Line']) + self.assertEqual(data['start_indexes'], [43]) + self.assertEqual(data['start_labels'], ['22a']) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Berakhot') + self.assertEqual(data['navigation_refs']['prev_section_ref'], 'Berakhot 21b') + self.assertEqual(data['navigation_refs']['next_section_ref'], 'Berakhot 22b') + self.assertEqual(data['navigation_refs']['first_subref'], 'Berakhot 22a:1') + self.assertEqual(data['navigation_refs']['last_subref'], 'Berakhot 22a:25') + + def test_schema_node(self): + """Siddur Ashkenaz, Weekday, Shacharit - SchemaNode""" + data = self.get_ref('Siddur Ashkenaz, Weekday, Shacharit') + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'SchemaNode') + self.assertEqual(data['index_title'], 'Siddur Ashkenaz') + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Siddur Ashkenaz, Weekday') + self.assertIn('Preparatory Prayers', data['children']) + self.assertIn('Amidah', data['children']) + + def test_deep_complex_segment(self): + """Siddur Ashkenaz, Weekday, Shacharit, Preparatory Prayers, Modeh Ani 2 - deep segment""" + data = self.get_ref('Siddur Ashkenaz, Weekday, Shacharit, Preparatory Prayers, Modeh Ani 2', max_mongo_calls=1) + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'JaggedArrayNode') + self.assertEqual(data['depth'], 1) + self.assertEqual(data['start_indexes'], [2]) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Siddur Ashkenaz, Weekday, Shacharit, Preparatory Prayers, Modeh Ani') + self.assertEqual(data['navigation_refs']['prev_segment_ref'], 'Siddur Ashkenaz, Weekday, Shacharit, Preparatory Prayers, Modeh Ani 1') + self.assertEqual(data['navigation_refs']['next_segment_ref'], 'Siddur Ashkenaz, Weekday, Shacharit, Preparatory Prayers, Netilat Yadayim 1') + + def test_schema_node_with_default_child(self): + """Ramban on Genesis - SchemaNode with default child JaggedArrayNode""" + data = self.get_ref('Ramban on Genesis') + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'SchemaNode') + self.assertIn('Introduction', data['children']) + self.assertIn('default_child_node', data) + self.assertEqual(data['default_child_node']['node_type'], 'JaggedArrayNode') + self.assertEqual(data['default_child_node']['depth'], 3) + self.assertEqual(data['default_child_node']['node_index'], 2) + + def test_jagged_array_under_default_child(self): + """Ramban on Genesis 1 - JaggedArrayNode under default child""" + data = self.get_ref('Ramban on Genesis 1') + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'JaggedArrayNode') + self.assertEqual(data['depth'], 3) + self.assertEqual(data['start_indexes'], [1]) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Ramban on Genesis') + + def test_dictionary_node(self): + """BDB - SchemaNode with default DictionaryNode child""" + data = self.get_ref('BDB', max_mongo_calls=None) + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'SchemaNode') + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'], []) + self.assertIn('default_child_node', data) + self.assertEqual(data['default_child_node']['node_type'], 'DictionaryNode') + + def test_dictionary_entry_node(self): + """BDB, א - DictionaryEntryNode""" + data = self.get_ref('BDB, א', max_mongo_calls=None) + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'DictionaryEntryNode') + self.assertEqual(data['index_title'], 'BDB') + self.assertEqual(data['lexicon_name'], 'BDB Dictionary') + self.assertEqual(data['headword'], 'א') + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'BDB') + self.assertIsNone(data['navigation_refs']['prev_section_ref']) + self.assertIsNotNone(data['navigation_refs']['next_section_ref']) + + def test_dictionary_entry_segment(self): + """BDB, א 1 - DictionaryEntryNode segment-level""" + data = self.get_ref('BDB, אָב 1', max_mongo_calls=None) + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'DictionaryEntryNode') + self.assertEqual(data['start_indexes'], [1]) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'BDB, אָב') + self.assertIsNotNone(data['navigation_refs']['prev_segment_ref']) + self.assertIsNotNone(data['navigation_refs']['next_segment_ref']) + self.assertNotIn('prev_section_ref', data['navigation_refs']) + self.assertNotIn('next_section_ref', data['navigation_refs']) + + def test_sheet_node(self): + """Sheet 1 - SheetNode""" + data = self.get_ref('Sheet 1', max_mongo_calls=2) + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'SheetNode') + self.assertEqual(data['sheet_id'], 1) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Sheet') + self.assertEqual(data['navigation_refs']['first_subref'], 'Sheet 1:1') + + def test_sheet_segment(self): + """Sheet 1:1 - SheetNode segment-level""" + data = self.get_ref('Sheet 1:1', max_mongo_calls=1) + self.assertTrue(data['is_ref']) + self.assertEqual(data['node_type'], 'SheetNode') + self.assertEqual(data['sheet_id'], 1) + self.assertEqual(data['navigation_refs']['lineage_refs_top_down'][-1], 'Sheet 1') diff --git a/api/views.py b/api/views.py index 0ae5921267..ac875d5400 100644 --- a/api/views.py +++ b/api/views.py @@ -1,7 +1,7 @@ from sefaria.model import * from sefaria.model.text_request_adapter import TextRequestAdapter from sefaria.client.util import jsonResponse -from sefaria.system.exceptions import InputError, ComplexBookLevelRefError +from sefaria.system.exceptions import InputError, ComplexBookLevelRefError, DictionaryEntryNotFoundError from django.views import View from .api_warnings import * @@ -67,3 +67,91 @@ def get(self, request, *args, **kwargs): return jsonResponse({'error': str(e)}, status=400) return jsonResponse(data) + + +class RefView(View): + + def dispatch(self, request, *args, **kwargs): + try: + self.oref = Ref.instantiate_ref_with_legacy_parse_fallback(kwargs['tref']) + except (InputError, DictionaryEntryNotFoundError): + return jsonResponse({'is_ref': False}) + except Exception as e: + return jsonResponse({'error': getattr(e, 'message', str(e))}, status=404) + return super().dispatch(request, *args, **kwargs) + + def get(self, request, *args, **kwargs): + oref = self.oref + index = oref.index + index_node = oref.index_node + # Load vstate for non-virtual nodes (JaggedArrayNode, SchemaNode, DictionaryNode). + vstate = VersionState(index.title) if not index_node.is_virtual else None + return_object = { + 'is_ref': True, + 'normalized': oref.normal(), + 'hebrew': oref.he_normal(), + 'url_ref': oref.url(), + 'index_title': index.title, + 'node_type': type(index_node).__name__, + 'navigation_refs': { + 'lineage_refs_top_down': [r.normal() for r in oref.all_context_refs(False, True)][::-1] + } + } + + if return_object['node_type'] != 'SheetNode': + norm = lambda r: r.normal() if r else None + return_object['navigation_refs']['first_available_section_ref'] = norm(oref.first_available_section_ref(vstate=vstate)) + if oref.is_segment_level(): + return_object['navigation_refs']['prev_segment_ref'] = norm(oref.prev_segment_ref(vstate=vstate)) + return_object['navigation_refs']['next_segment_ref'] = norm(oref.next_segment_ref(vstate=vstate)) + elif oref.is_section_level(): + return_object['navigation_refs']['prev_section_ref'] = norm(oref.prev_section_ref(vstate=vstate)) + return_object['navigation_refs']['next_section_ref'] = norm(oref.next_section_ref(vstate=vstate)) + + if return_object['node_type'] == 'SchemaNode': + return_object['children'] = [child.get_primary_title() for child in index_node.children] + + elif return_object['node_type'] in ['JaggedArrayNode', 'DictionaryEntryNode']: + return_object.update({ + 'depth': index_node.depth, + 'address_types': index_node.addressTypes, + 'section_names': index_node.sectionNames, + 'start_indexes': oref.sections, + 'start_labels': oref.normal_sections(), + 'end_indexes': oref.toSections, + 'end_labels': oref.normal_toSections(), + }) + + if return_object['node_type'] == 'DictionaryEntryNode': + lexicon_entry = index_node.lexicon_entry + return_object['lexicon_name'] = lexicon_entry.parent_lexicon + return_object['headword'] = lexicon_entry.headword + + elif return_object['node_type'] == 'SheetNode': + return_object['sheet_id'] = index_node.sheetId + + elif return_object['node_type'] == 'DictionaryNode': + return_object['lexicon_name'] = index_node.lexiconName + + if index_node.has_default_child(): + default_ref = oref.default_child_ref() + default_node = default_ref.index_node + return_object['default_child_node'] = { + 'node_type': type(default_node).__name__, + 'node_index': index_node.children.index(default_node) + } + if return_object['default_child_node']['node_type'] == 'JaggedArrayNode': + return_object['default_child_node']['depth'] = default_node.depth + return_object['default_child_node']['address_types'] = default_node.addressTypes + return_object['default_child_node']['sectionNames'] = default_node.sectionNames + + if return_object['node_type'] == 'DictionaryNode': + return_object['default_child_node']['lexicon_name'] = index_node.lexiconName + + if getattr(index_node, "depth", None) and not oref.is_range() and not oref.is_segment_level(): + state_ja = oref.get_state_ja(vstate=vstate) if vstate else None + subrefs = oref.all_subrefs(state_ja=state_ja) + return_object['navigation_refs']['first_subref'] = subrefs[0].normal() + return_object['navigation_refs']['last_subref'] = subrefs[-1].normal() + + return jsonResponse(return_object) diff --git a/docs/openAPI.json b/docs/openAPI.json index 68e3e296f0..32dc91a971 100644 --- a/docs/openAPI.json +++ b/docs/openAPI.json @@ -51,6 +51,10 @@ { "name": "Misc", "description": "Miscellaneous API endpoints" + }, + { + "name": "Ref", + "description": "Operations related to validating and analyzing text references" } ], "paths": { @@ -5707,6 +5711,140 @@ "required": true } ] + }, + "/api/ref/{tref}": { + "summary": "Ref", + "description": "Validates a text reference string and returns metadata about it, including its type, structure, and navigation information (prev/next refs, parent ref, etc.).", + "get": { + "operationId": "get-ref", + "tags": [ + "Ref" + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RefJSON" + }, + "examples": { + "JaggedArrayNode (Segment Level)": { + "value": { + "is_ref": true, + "normalized": "Genesis 1:1", + "hebrew": "בראשית א׳:א׳", + "url_ref": "Genesis.1.1", + "index_title": "Genesis", + "node_type": "JaggedArrayNode", + "navigation_refs": { + "lineage_refs_top_down": ["Genesis", "Genesis 1"], + "first_available_section_ref": "Genesis 1", + "prev_segment_ref": null, + "next_segment_ref": "Genesis 1:2" + }, + "depth": 2, + "address_types": ["Perek", "Pasuk"], + "section_names": ["Chapter", "Verse"], + "start_indexes": [1, 1], + "start_labels": ["1", "1"], + "end_indexes": [1, 1], + "end_labels": ["1", "1"] + } + }, + "JaggedArrayNode (Section Level)": { + "value": { + "is_ref": true, + "normalized": "Genesis 1", + "hebrew": "בראשית א׳", + "url_ref": "Genesis.1", + "index_title": "Genesis", + "node_type": "JaggedArrayNode", + "navigation_refs": { + "lineage_refs_top_down": ["Genesis"], + "first_available_section_ref": "Genesis 1", + "prev_section_ref": null, + "next_section_ref": "Genesis 2", + "first_subref": "Genesis 1:1", + "last_subref": "Genesis 1:31" + }, + "depth": 2, + "address_types": ["Perek", "Pasuk"], + "section_names": ["Chapter", "Verse"], + "start_indexes": [1], + "start_labels": ["1"], + "end_indexes": [1], + "end_labels": ["1"] + } + }, + "SchemaNode": { + "value": { + "is_ref": true, + "normalized": "Ramban on Genesis", + "hebrew": "רמב\"ן על בראשית", + "url_ref": "Ramban_on_Genesis", + "index_title": "Ramban on Genesis", + "node_type": "SchemaNode", + "navigation_refs": { + "lineage_refs_top_down": [], + "first_available_section_ref": "Ramban on Genesis, Introduction 1" + }, + "children": ["Introduction", ""] + } + }, + "Invalid Ref": { + "value": { + "is_ref": false + } + } + } + } + }, + "description": "Returns reference metadata. If the reference string is invalid, returns `{\"is_ref\": false}` with HTTP 200." + }, + "404": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string" + } + } + } + } + }, + "description": "Returned when the reference string causes a parsing error." + } + }, + "summary": "Ref", + "description": "Validates a text reference (`tref`) and returns structured metadata about it.\n\nThe response varies based on the `node_type` of the referenced text:\n\n- **JaggedArrayNode**: Standard text nodes (e.g. biblical chapters/verses, Talmud pages). Returns `depth`, `address_types`, `section_names`, and positional indexes/labels.\n- **SchemaNode**: Nodes with named children (e.g. a commentary with an introduction and main text). Returns `children` list.\n- **DictionaryEntryNode**: Individual dictionary entries. Returns `lexicon_name` and `headword`, plus the JaggedArrayNode fields.\n- **DictionaryNode**: Dictionary root nodes. Returns `lexicon_name`.\n- **SheetNode**: User-created source sheets. Returns `sheet_id`.\n\nIf the node has a default child (a child node that represents the primary content), the response includes a `default_child_node` object with the child's `node_type`, `node_index`, and optionally `depth`, `address_types` and `sectionNames` (for JaggedArrayNode children) or `lexicon_name` (for DictionaryNode parents).\n\nAll types include `navigation_refs` with `lineage_refs_top_down` (array of ancestor refs from immediate parent to book level) and `first_available_section_ref`. Segment-level refs also get `prev_segment_ref`/`next_segment_ref`, and section-level refs get `prev_section_ref`/`next_section_ref`." + }, + "parameters": [ + { + "name": "tref", + "description": "A text reference string to validate and analyze. Can be any Sefaria reference, including book-level, section-level, segment-level, or ranged references.", + "schema": { + "type": "string" + }, + "in": "path", + "required": true, + "examples": { + "Segment Level": { + "value": "Genesis 1:1" + }, + "Section Level": { + "value": "Berakhot 22a" + }, + "Book Level": { + "value": "Ramban on Genesis" + }, + "Range": { + "value": "Genesis 1:1-3" + } + } + } + ] } }, "components": { @@ -13904,6 +14042,206 @@ ], "lastPath": "Torah" } + }, + "RefJSON": { + "title": "Root Type for RefJSON", + "description": "The JSON returned from the `ref` endpoint. Contains metadata about a validated text reference, including its type, structure, and navigation information. The fields returned vary based on the `node_type`.", + "type": "object", + "properties": { + "is_ref": { + "description": "Whether the provided string is a valid Sefaria reference.", + "type": "boolean" + }, + "normalized": { + "description": "The normalized (canonical) form of the reference string.", + "type": "string" + }, + "hebrew": { + "description": "The Hebrew form of the reference string.", + "type": "string" + }, + "url_ref": { + "description": "A URL-friendly form of the reference (spaces replaced with underscores, etc.).", + "type": "string" + }, + "index_title": { + "description": "The title of the book/index this reference belongs to.", + "type": "string" + }, + "node_type": { + "description": "The type of the index node. Determines which additional fields are present in the response.", + "type": "string", + "enum": ["JaggedArrayNode", "SchemaNode", "DictionaryNode", "DictionaryEntryNode", "SheetNode"] + }, + "navigation_refs": { + "description": "An object containing related references for navigating within the text.", + "type": "object", + "properties": { + "lineage_refs_top_down": { + "description": "An array of references representing the hierarchical path from the book level down to the immediate parent. Empty array if at book level.", + "type": "array", + "items": { + "type": "string" + } + }, + "first_available_section_ref": { + "description": "The first section-level reference that has available text content.", + "type": "string", + "nullable": true + }, + "prev_segment_ref": { + "description": "The previous segment-level reference. Only present for segment-level refs.", + "type": "string", + "nullable": true + }, + "next_segment_ref": { + "description": "The next segment-level reference. Only present for segment-level refs.", + "type": "string", + "nullable": true + }, + "prev_section_ref": { + "description": "The previous section-level reference. Only present for section-level refs.", + "type": "string", + "nullable": true + }, + "next_section_ref": { + "description": "The next section-level reference. Only present for section-level refs.", + "type": "string", + "nullable": true + }, + "first_subref": { + "description": "The first sub-reference within this ref. Present for non-segment-level refs with depth.", + "type": "string" + }, + "last_subref": { + "description": "The last sub-reference within this ref. Present for non-segment-level refs with depth.", + "type": "string" + } + } + }, + "depth": { + "description": "The depth of the text structure (e.g. 2 for Chapter/Verse). Present for JaggedArrayNode and DictionaryEntryNode.", + "type": "integer" + }, + "address_types": { + "description": "The address type for each level of depth (e.g. [\"Perek\", \"Pasuk\"]). Present for JaggedArrayNode and DictionaryEntryNode.", + "type": "array", + "items": { + "type": "string" + } + }, + "section_names": { + "description": "Human-readable names for each level of depth (e.g. [\"Chapter\", \"Verse\"]). Present for JaggedArrayNode and DictionaryEntryNode.", + "type": "array", + "items": { + "type": "string" + } + }, + "start_indexes": { + "description": "The numeric index values for the start of this reference at each depth level. Present for JaggedArrayNode and DictionaryEntryNode.", + "type": "array", + "items": { + "type": "integer" + } + }, + "start_labels": { + "description": "The display labels for the start of this reference at each depth level. Present for JaggedArrayNode and DictionaryEntryNode.", + "type": "array", + "items": { + "type": "string" + } + }, + "end_indexes": { + "description": "The numeric index values for the end of this reference at each depth level. For non-range refs, same as start_indexes. Present for JaggedArrayNode and DictionaryEntryNode.", + "type": "array", + "items": { + "type": "integer" + } + }, + "end_labels": { + "description": "The display labels for the end of this reference at each depth level. For non-range refs, same as start_labels. Present for JaggedArrayNode and DictionaryEntryNode.", + "type": "array", + "items": { + "type": "string" + } + }, + "children": { + "description": "The primary titles of child nodes. Present only for SchemaNode.", + "type": "array", + "items": { + "type": "string" + } + }, + "lexicon_name": { + "description": "The name of the lexicon (dictionary). Present for DictionaryNode and DictionaryEntryNode.", + "type": "string" + }, + "headword": { + "description": "The headword of the dictionary entry. Present only for DictionaryEntryNode.", + "type": "string" + }, + "sheet_id": { + "description": "The ID of the source sheet. Present only for SheetNode.", + "type": "integer" + }, + "default_child_node": { + "description": "Information about the default child node, if the current node has one. A default child is a child node that represents the primary content of a schema node.", + "type": "object", + "properties": { + "node_type": { + "description": "The type of the default child node.", + "type": "string" + }, + "node_index": { + "description": "The index of the default child within the parent's children array.", + "type": "integer" + }, + "depth": { + "description": "The depth of the default child. Present only when the default child is a JaggedArrayNode.", + "type": "integer" + }, + "address_types": { + "description": "The address type for each level of depth. Present only when the default child is a JaggedArrayNode.", + "type": "array", + "items": { + "type": "string" + } + }, + "section_names": { + "description": "Human-readable names for each level of depth. Present only when the default child is a JaggedArrayNode.", + "type": "array", + "items": { + "type": "string" + } + }, + "lexicon_name": { + "description": "The lexicon name. Present only when the parent is a DictionaryNode.", + "type": "string" + } + } + } + }, + "example": { + "is_ref": true, + "normalized": "Genesis 1:1", + "hebrew": "בראשית א׳:א׳", + "url_ref": "Genesis.1.1", + "index_title": "Genesis", + "node_type": "JaggedArrayNode", + "navigation_refs": { + "lineage_refs_top_down": ["Genesis", "Genesis 1"], + "first_available_section_ref": "Genesis 1", + "prev_segment_ref": null, + "next_segment_ref": "Genesis 1:2" + }, + "depth": 2, + "address_types": ["Perek", "Pasuk"], + "section_names": ["Chapter", "Verse"], + "start_indexes": [1, 1], + "start_labels": ["1", "1"], + "end_indexes": [1, 1], + "end_labels": ["1", "1"] + } } } } diff --git a/reader/tests.py b/reader/tests.py index f56f895508..ebae732ebc 100644 --- a/reader/tests.py +++ b/reader/tests.py @@ -20,7 +20,7 @@ import sefaria.utils.testing_utils as tutils from sefaria.model import library, Index, IndexSet, VersionSet, LinkSet, NoteSet, HistorySet, Ref, VersionState, \ - VersionStateSet, TextChunk, Category, UserHistory, UserHistorySet, WebPage, WebSite + VersionStateSet, TextChunk, Category, UserHistory, UserHistorySet, WebPage from sefaria.system.database import db import sefaria.system.cache as scache import random as rand diff --git a/sefaria/model/text.py b/sefaria/model/text.py index 210c5d0fb3..493c48121b 100644 --- a/sefaria/model/text.py +++ b/sefaria/model/text.py @@ -3632,12 +3632,13 @@ def recalibrate_next_prev_refs(self, add_self=True): if prev_ref: prev_ref._next = self if add_self else next_ref - def prev_segment_ref(self): + def prev_segment_ref(self, vstate=None): """ Returns a :class:`Ref` to the next previous populated segment. If this ref is not segment level, will return ``self``` + :param vstate: optional pre-fetched VersionState to avoid DB calls :return: :class:`Ref` """ r = self.starting_ref() @@ -3648,34 +3649,48 @@ def prev_segment_ref(self): d["sections"] = d["toSections"] = r.sections[:-1] + [r.sections[-1] - 1] return Ref(_obj=d) else: - r = r.prev_section_ref() + r = r.prev_section_ref(vstate=vstate) if not r: return None + if self.index_node.is_virtual: + return r.all_subrefs()[-1] d = r._core_dict() - newSections = r.sections + [self.get_state_ja().sub_array_length([i - 1 for i in r.sections])] + ja = self.get_state_ja(vstate=vstate) + newSections = r.sections + [ja.sub_array_length([i - 1 for i in r.sections])] d["sections"] = d["toSections"] = newSections return Ref(_obj=d) - def next_segment_ref(self): + def next_segment_ref(self, vstate=None): """ Returns a :class:`Ref` to the next populated segment. If this ref is not segment level, will return ``self``` + :param vstate: optional pre-fetched VersionState to avoid DB calls :return: :class:`Ref` """ r = self.ending_ref() if not r.is_segment_level(): return r + if self.index_node.is_virtual: + section_ref = self.context_ref() + siblings = section_ref.all_subrefs() + curr_index = siblings.index(self) + if len(siblings) == curr_index + 1: + next_section = section_ref.next_section_ref(vstate=vstate) + return next_section.all_subrefs()[0] if next_section else None + else: + return siblings[curr_index+1] sectionRef = r.section_ref() - sectionLength = self.get_state_ja().sub_array_length([i - 1 for i in sectionRef.sections]) + ja = self.get_state_ja(vstate=vstate) + sectionLength = ja.sub_array_length([i - 1 for i in sectionRef.sections]) if r.sections[-1] < sectionLength: d = r._core_dict() d["sections"] = d["toSections"] = r.sections[:-1] + [r.sections[-1] + 1] return Ref(_obj=d) else: try: - return r.next_section_ref().subref(1) + return r.next_section_ref(vstate=vstate).subref(1) except AttributeError: # No next section return None @@ -3692,16 +3707,16 @@ def last_segment_ref(self): o["sections"] = o["toSections"] = [i + 1 for i in self.get_state_ja().last_index(self.index_node.depth)] return Ref(_obj=o) - def first_available_section_ref(self): + def first_available_section_ref(self, vstate=None): """ Returns a :class:`Ref` to the first section inside of or following this :class:`Ref` that has some content. Return first available segment ref is `self` is depth 1 Returns ``None`` if self is empty and no following :class:`Ref` has content. + :param vstate: optional pre-fetched VersionState to avoid DB calls :return: :class:`Ref` """ - # todo: This is now stored on the VersionState. Look for performance gains. if isinstance(self.index_node, JaggedArrayNode): r = self.padded_ref() elif isinstance(self.index_node, TitledTreeNode): @@ -3722,9 +3737,9 @@ def first_available_section_ref(self): if r.is_book_level(): # r is depth 1. return first segment r = r.subref([1]) - return r.next_segment_ref() if r.is_empty() else r + return r.next_segment_ref(vstate=vstate) if r.is_empty(vstate=vstate) else r else: - return r.next_section_ref() if r.is_empty() else r + return r.next_section_ref(vstate=vstate) if r.is_empty(vstate=vstate) else r #Don't store results on Ref cache - state objects change, and don't yet propogate to this Cache def get_state_node(self, meta=None, hint=None): @@ -3734,11 +3749,14 @@ def get_state_node(self, meta=None, hint=None): from . import version_state return version_state.StateNode(snode=self.index_node, meta=meta, hint=hint) - def get_state_ja(self, lang="all"): + def get_state_ja(self, lang="all", vstate=None): """ :param lang: "all", "he", or "en" + :param vstate: optional pre-fetched VersionState to avoid DB calls :return: :class:`sefaria.datatype.jagged_array` """ + if vstate: + return vstate.state_node(self.index_node).ja(lang) #TODO: also does not work with complex texts... return self.get_state_node(hint=[(lang, "availableTexts")]).ja(lang) @@ -3766,17 +3784,16 @@ def is_text_translated(self): """ return self.is_text_fully_available("en") - def is_empty(self, lang=None): + def is_empty(self, lang=None, vstate=None): """ Checks if :class:`Ref` has any corresponding data in :class:`Version` records. + :param vstate: optional pre-fetched VersionState to avoid DB calls :return: Bool True is there is not text at this ref in any language """ - - # The commented code is easier to understand, but the code we're using puts a lot less on the wire. - # return not len(self.versionset()) - # depricated - # return db.texts.find(self.condition_query(), {"_id": 1}).count() == 0 + if vstate and not self.index_node.is_virtual: + state_ja = self.get_state_ja(vstate=vstate) + return state_ja.subarray_with_ref(self).is_empty() return db.texts.count_documents(self.condition_query(lang)) == 0 diff --git a/sefaria/system/database.py b/sefaria/system/database.py index b03430695e..f42fc17ec8 100644 --- a/sefaria/system/database.py +++ b/sefaria/system/database.py @@ -4,11 +4,38 @@ """ import sys import pymongo +from pymongo import monitoring import urllib.parse from pymongo.errors import OperationFailure from sefaria.settings import * + +class QueryCounter(monitoring.CommandListener): + count = 0 + queries = [] + tracked_commands = None + + def started(self, event): + if self.tracked_commands is not None and event.command_name not in self.tracked_commands: + return + import traceback + QueryCounter.count += 1 + QueryCounter.queries.append({ + 'command': event.command_name, + 'collection': event.command.get(event.command_name), + 'traceback': ''.join(traceback.format_stack()[-6:-1]) + }) + + def succeeded(self, event): pass + def failed(self, event): pass + + @classmethod + def reset(cls, tracked_commands=None): + cls.count = 0 + cls.queries = [] + cls.tracked_commands = tracked_commands + def check_db_exists(db_name): dbnames = client.list_database_names() return db_name in dbnames @@ -30,11 +57,13 @@ def get_test_db(): TEST_DB = SEFARIA_DB #If we have jsut a single instance mongo (such as for development) the MONGO_HOST param should contain jsut the host string e.g "localhost") + _event_listeners = [QueryCounter()] if hasattr(sys, '_called_from_test') else [] + if MONGO_REPLICASET_NAME is None: if SEFARIA_DB_USER and SEFARIA_DB_PASSWORD: - client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT, username=SEFARIA_DB_USER, password=SEFARIA_DB_PASSWORD) + client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT, username=SEFARIA_DB_USER, password=SEFARIA_DB_PASSWORD, event_listeners=_event_listeners) else: - client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT) + client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT, event_listeners=_event_listeners) #Else if we are using a replica set mongo, we need to connect with a URI that containts a comma separated list of 'host:port' strings else: if SEFARIA_DB_USER and SEFARIA_DB_PASSWORD: @@ -45,7 +74,7 @@ def get_test_db(): else: connection_uri = 'mongodb://{}/?ssl=false&readPreference=primaryPreferred&replicaSet={}'.format(MONGO_HOST, MONGO_REPLICASET_NAME) # Now connect to the mongo server - client = pymongo.MongoClient(connection_uri) + client = pymongo.MongoClient(connection_uri, event_listeners=_event_listeners) diff --git a/sefaria/urls_shared.py b/sefaria/urls_shared.py index dcf190ea87..f5a1b46a98 100644 --- a/sefaria/urls_shared.py +++ b/sefaria/urls_shared.py @@ -100,6 +100,7 @@ url(r'^api/calendars/topics/parasha/?$', reader_views.parasha_data_api), url(r'^api/calendars/topics/holiday/?$', reader_views.seasonal_topic_api), url(r'^api/name/(?P.+)$', reader_views.name_api), + url(r'^api/ref/(?P.+)$', api_views.RefView.as_view()), url(r'^api/category/?(?P.+)?$', reader_views.category_api), url(r'^api/tag-category/?(?P.+)?$', reader_views.tag_category_api), url(r'^api/words/completion/(?P.+)/(?P.+)$', reader_views.dictionary_completion_api),