diff --git a/docs/notebooks/NERD_TermSet_How_to_Guide.ipynb b/docs/notebooks/NERD_TermSet_How_to_Guide.ipynb
new file mode 100644
index 000000000..eb775e16e
--- /dev/null
+++ b/docs/notebooks/NERD_TermSet_How_to_Guide.ipynb
@@ -0,0 +1,3657 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "0e15e182",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from hdmf.common import DynamicTable, VectorData\n",
+ "from hdmf.term_set import TermSet\n",
+ "\n",
+ "from pynwb.resources import ExternalResources\n",
+ "from pynwb import NWBFile, NWBHDF5IO\n",
+ "from pynwb import get_type_map as tm\n",
+ "from pynwb.file import Subject\n",
+ "\n",
+ "from datetime import datetime\n",
+ "from dateutil import tz\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cb88a996",
+ "metadata": {},
+ "source": [
+ "## Dev Days Note:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b2ba3bdd",
+ "metadata": {},
+ "source": [
+ "To run this notebook please download the nwb files under the \"DynamicTermset and ExternalResources\" project in the \"Materials\" subsection marked \"NWB Files for Tutorials\"."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1941a492",
+ "metadata": {},
+ "source": [
+ "# An Overview NERD"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3f20146d",
+ "metadata": {},
+ "source": [
+ "### Goals and Use Cases"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b5a9e23",
+ "metadata": {},
+ "source": [
+ "To have a FAIR data ecosystem that supports data reuse, the `ExternalResources` class is a toolkit of standardized methods to create and manage linkages between data terms and external resources, such as online ontologies or digital identifiers. Common use cases:\n",
+ "\n",
+ "* Linking terms from user data to ontologies facilitates standardized diction and semantics of terms to precisely defined neuroscience metadata using existing curated resources, e.g., brain atlases; species taxonomies; and anatomical, cell, and gene function ontologies. \n",
+ "* Linking data to persistent digital identifiers (e.g., ORCID, RRID, or DOI) enables unique identification of experimenters, publications, subjects, software, and other resources and assets identified in the experimental metadata.\n",
+ "* Linking data to related data assets is essential for integration and interoperability of data across different data archives for experiments involving multiple data modalities."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b3748f8",
+ "metadata": {},
+ "source": [
+ "# Using NERD with a single NWB File from the DANDI Archive"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "19921e85",
+ "metadata": {},
+ "source": [
+ "Loading in the file, we can see multiple cases where contextual metadata will be important in regards to creating and sharing FAIR data. We can map the experimenter to a digital identifier, i.e., ORCID. The electrode group has a location that will be mapped to a brain atlas. Lastly, we can map the `Subject` species attribute to an ontology resource, in this case the NCBI Taxonomy."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "e114746e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "
session_description: Data from monkey Haydn performing ready-set-go time interval reproduction task. This file contains continuous segments of the full session on 2016-12-11 that can be used for training models for the Neural Latents Benchmark.
identifier: 8969f328-3929-11ec-8077-43176b153428
session_start_time: 2016-12-11 00:00:00-05:00
timestamps_reference_time: 2016-12-11 00:00:00-05:00
file_create_date
2021-10-29 22:31:50.351047-04:00
experimenter: ('Hansem Sohn',)
related_publications: ('http://dx.doi.org/10.1016/j.neuron.2019.06.012',)
keywords
epoch_tags: set()
electrodes
description: metadata about extracellular electrodes
id
colnames: ('x', 'y', 'z', 'imp', 'location', 'filtering', 'group', 'group_name')
columns: (, , , , , , , )
electrode_groups (3)
electrode_group_1
description: Electrodes on a neural probe
location: Dorsomedial frontal cortex
device
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
electrode_group_2
description: Electrodes on a neural probe
location: Dorsomedial frontal cortex
device
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
electrode_group_3
description: Electrodes on a neural probe
location: Dorsomedial frontal cortex
device
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
devices (3)
electrode_probe_1
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
electrode_probe_2
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
electrode_probe_3
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
intervals (1)
trials
description: experimental trials
id
colnames: ('start_time', 'stop_time', 'fix_on_time', 'fix_time', 'target_on_time', 'ready_time', 'set_time', 'go_time', 'target_acq_time', 'reward_time', 'bad_time', 'is_short', 'is_eye', 'theta', 'ts', 'tp', 'fix_time_dur', 'target_time_dur', 'iti', 'reward_dur', 'is_outlier', 'split')
columns: (, , , , , , , , , , , , , , , , , , , , , )
subject
age: P4Y
sex: M
species: Macaca mulatta
subject_id: Haydn
trials
description: experimental trials
id
colnames: ('start_time', 'stop_time', 'fix_on_time', 'fix_time', 'target_on_time', 'ready_time', 'set_time', 'go_time', 'target_acq_time', 'reward_time', 'bad_time', 'is_short', 'is_eye', 'theta', 'ts', 'tp', 'fix_time_dur', 'target_time_dur', 'iti', 'reward_dur', 'is_outlier', 'split')
columns: (, , , , , , , , , , , , , , , , , , , , , )
units
description: data on spiking units
id
colnames: ('heldout', 'spike_times', 'obs_intervals')
columns: (, , , , )
waveform_unit: volts
experiment_description: Cognitive timing task in which subject attempts to reproduce interval between two cues
session_id: 20161211
lab: Jazayeri
institution: Massachusetts Institute of Technology
"
+ ],
+ "text/plain": [
+ "root pynwb.file.NWBFile at 0x4956682848\n",
+ "Fields:\n",
+ " devices: {\n",
+ " electrode_probe_1 ,\n",
+ " electrode_probe_2 ,\n",
+ " electrode_probe_3 \n",
+ " }\n",
+ " electrode_groups: {\n",
+ " electrode_group_1 ,\n",
+ " electrode_group_2 ,\n",
+ " electrode_group_3 \n",
+ " }\n",
+ " electrodes: electrodes \n",
+ " experiment_description: Cognitive timing task in which subject attempts to reproduce interval between two cues\n",
+ " experimenter: ['Hansem Sohn']\n",
+ " file_create_date: [datetime.datetime(2021, 10, 29, 22, 31, 50, 351047, tzinfo=tzoffset(None, -14400))]\n",
+ " identifier: 8969f328-3929-11ec-8077-43176b153428\n",
+ " institution: Massachusetts Institute of Technology\n",
+ " intervals: {\n",
+ " trials \n",
+ " }\n",
+ " lab: Jazayeri\n",
+ " related_publications: ['http://dx.doi.org/10.1016/j.neuron.2019.06.012']\n",
+ " session_description: Data from monkey Haydn performing ready-set-go time interval reproduction task. This file contains continuous segments of the full session on 2016-12-11 that can be used for training models for the Neural Latents Benchmark.\n",
+ " session_id: 20161211\n",
+ " session_start_time: 2016-12-11 00:00:00-05:00\n",
+ " subject: subject pynwb.file.Subject at 0x4956678672\n",
+ "Fields:\n",
+ " age: P4Y\n",
+ " sex: M\n",
+ " species: Macaca mulatta\n",
+ " subject_id: Haydn\n",
+ "\n",
+ " timestamps_reference_time: 2016-12-11 00:00:00-05:00\n",
+ " trials: trials \n",
+ " units: units "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "with NWBHDF5IO(\"sub-Haydn_desc-train_ecephys.nwb\", \"r\") as io:\n",
+ " read_nwbfile = io.read()\n",
+ "read_nwbfile"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "09b88281",
+ "metadata": {},
+ "source": [
+ "When directly using NERD with a single source, in the most common case that'll be a `NWBFile`, it is recommended to link the instance of the `ExternalResources` class to the file. This link will allows for easier use of NERD, as shown later in the tutorial."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d7390ae2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:210: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ }
+ ],
+ "source": [
+ "er = ExternalResources() \n",
+ "read_nwbfile.link_resources(er)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b4639053",
+ "metadata": {},
+ "source": [
+ "We can see the linkage as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "5784a089",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " keys
files
entities
objects
object_keys
entity_keys
"
+ ],
+ "text/plain": [
+ "external_resources pynwb.resources.ExternalResources at 0x4956684000\n",
+ "Fields:"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "read_nwbfile.get_linked_resources()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3732afaa",
+ "metadata": {},
+ "source": [
+ "#### Important Note"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "65829ef3",
+ "metadata": {},
+ "source": [
+ "By setting `external_resources` in `NWBFile`, the user is establishing a link. However, since `ExternalResources` is written separately to an `NWBFile`, this link is not saved on write. This allows for users to annotate existing files without having to modify files containing large datasets."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cfadb3a3",
+ "metadata": {},
+ "source": [
+ "### ORCiD"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "85b8de4d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.add_ref(\n",
+ " container=read_nwbfile,\n",
+ " attribute=\"experimenter\",\n",
+ " key=\"Hansem Sohn\",\n",
+ " entity_id='ORCID:0000-0001-8593-7473', \n",
+ " entity_uri='https://orcid.org/0000-0001-8593-7473')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6c182055",
+ "metadata": {},
+ "source": [
+ "### Electrode Group Location"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "2c45f210",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.add_ref(\n",
+ " container=read_nwbfile.electrode_groups['electrode_group_1'],\n",
+ " attribute=\"location\",\n",
+ " key=\"Dorsomedial frontal cortex\",\n",
+ " entity_id=\"Frontal Cortex\", \n",
+ " entity_uri=\"https://www.ebrains.eu/tools/rat-brain\", \n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7082b589",
+ "metadata": {},
+ "source": [
+ "### Subject Species"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5b0d328d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.add_ref(\n",
+ " container=read_nwbfile.subject,\n",
+ " attribute='species',\n",
+ " key='Macaca mulatta',\n",
+ " entity_id='NCBI_TAXON:9544',\n",
+ " entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id=9544'\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07ac85ab",
+ "metadata": {},
+ "source": [
+ "### What about the connection to the NWBFile?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ccd764c",
+ "metadata": {},
+ "source": [
+ "Even though we've been using the notation using only the instance of `ExternalResources`, i.e., `er.add_ref(...)`, we are still updating the `ExternalResources` linked to the file. Alternatively, a user could use `read_nwbfile.external_resources.add_ref(...)`. We now see that our instance of `ExternalResources` shows a populated normalized set of tables for efficient data storage and query options. Even though the data structure consists of multiple tables, the user can visualize a flattened view of the NERD system."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "da6540e1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 0 | \n",
+ " Hansem Sohn | \n",
+ " 0 | \n",
+ " ORCID:0000-0001-8593-7473 | \n",
+ " https://orcid.org/0000-0001-8593-7473 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 1 | \n",
+ " f8641805-f93c-446f-8194-5fce08d22dbb | \n",
+ " 0 | \n",
+ " ElectrodeGroup | \n",
+ " location | \n",
+ " | \n",
+ " 1 | \n",
+ " Dorsomedial frontal cortex | \n",
+ " 1 | \n",
+ " Frontal Cortex | \n",
+ " https://www.ebrains.eu/tools/rat-brain | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 2 | \n",
+ " 5ee39486-8625-4ac3-9691-ce9d724812a4 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 2 | \n",
+ " Macaca mulatta | \n",
+ " 2 | \n",
+ " NCBI_TAXON:9544 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9 0 \\\n",
+ "1 9c3a5c45-316c-493d-a712-03a01b662ee9 1 \n",
+ "2 9c3a5c45-316c-493d-a712-03a01b662ee9 2 \n",
+ "\n",
+ " object_id files_idx object_type \n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9 0 NWBFile \\\n",
+ "1 f8641805-f93c-446f-8194-5fce08d22dbb 0 ElectrodeGroup \n",
+ "2 5ee39486-8625-4ac3-9691-ce9d724812a4 0 Subject \n",
+ "\n",
+ " relative_path field keys_idx key \n",
+ "0 general/experimenter 0 Hansem Sohn \\\n",
+ "1 location 1 Dorsomedial frontal cortex \n",
+ "2 species 2 Macaca mulatta \n",
+ "\n",
+ " entities_idx entity_id \n",
+ "0 0 ORCID:0000-0001-8593-7473 \\\n",
+ "1 1 Frontal Cortex \n",
+ "2 2 NCBI_TAXON:9544 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://orcid.org/0000-0001-8593-7473 \n",
+ "1 https://www.ebrains.eu/tools/rat-brain \n",
+ "2 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df=er.to_dataframe()\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "87fc5ff5",
+ "metadata": {},
+ "source": [
+ "### Useful query methods"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6f3df698",
+ "metadata": {},
+ "source": [
+ "NERD hosts multiple methods to retrieve the stored data. More methods are in active development and are open for community requests and feedback."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dd26e036",
+ "metadata": {},
+ "source": [
+ "#### Get Object Type"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2797cd69",
+ "metadata": {},
+ "source": [
+ "This method retrieves all instances of a specified `object_type`. In this case, a user can retrieve all instances involving `Subject`.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "538d2fc4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 2 | \n",
+ " 5ee39486-8625-4ac3-9691-ce9d724812a4 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 2 | \n",
+ " Macaca mulatta | \n",
+ " 2 | \n",
+ " NCBI_TAXON:9544 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "2 9c3a5c45-316c-493d-a712-03a01b662ee9 2 \\\n",
+ "\n",
+ " object_id files_idx object_type relative_path \n",
+ "2 5ee39486-8625-4ac3-9691-ce9d724812a4 0 Subject species \\\n",
+ "\n",
+ " field keys_idx key entities_idx entity_id \n",
+ "2 2 Macaca mulatta 2 NCBI_TAXON:9544 \\\n",
+ "\n",
+ " entity_uri \n",
+ "2 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.get_object_type(object_type='Subject', all_instances=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b03bd2f",
+ "metadata": {},
+ "source": [
+ "#### Get Key"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "913341a3",
+ "metadata": {},
+ "source": [
+ "`get_key` returns a `Key` object based on the name given. If the name is used more than once, the user provides the container, relative_path, and field to retrieve the specfic `Key` they want. Users will need to us this method if they want to resuse the key for a new reference, since `ExternalResources` requires unique keys associated with an `Object`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "84128fb4",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.get_key('Hansem Sohn')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "5d897495",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.get_key(key_name='Macaca mulatta', container=read_nwbfile.subject, relative_path='species')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b728ce47",
+ "metadata": {},
+ "source": [
+ "#### Get all entities for an Object"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "19a21f33",
+ "metadata": {},
+ "source": [
+ "`get_object_entities` allows the user to retrieve all entities and key information associated with an `Object`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "74e9fc1f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " NCBI_TAXON:9544 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " entity_id entity_uri\n",
+ "0 NCBI_TAXON:9544 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/..."
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.get_object_entities(container=read_nwbfile.subject,\n",
+ " relative_path='species')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "96387508",
+ "metadata": {},
+ "source": [
+ "### Write NERD as a zipped collection of tsv files"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2fefb46c",
+ "metadata": {},
+ "source": [
+ "As mentioned prior, NERD is written separately from the NWB File. `to_norm_tsv` writes each table as a tsv and stores in a zip file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "9d09b4af",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "er.to_norm_tsv(path='./')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cd09a129",
+ "metadata": {},
+ "source": [
+ "### Read ER from tsv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "56f295b2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:210: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ }
+ ],
+ "source": [
+ "er_read=ExternalResources.from_norm_tsv(path='./')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "1914c56c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 0 | \n",
+ " Hansem Sohn | \n",
+ " 0 | \n",
+ " ORCID:0000-0001-8593-7473 | \n",
+ " https://orcid.org/0000-0001-8593-7473 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 1 | \n",
+ " f8641805-f93c-446f-8194-5fce08d22dbb | \n",
+ " 0 | \n",
+ " ElectrodeGroup | \n",
+ " location | \n",
+ " | \n",
+ " 1 | \n",
+ " Dorsomedial frontal cortex | \n",
+ " 1 | \n",
+ " Frontal Cortex | \n",
+ " https://www.ebrains.eu/tools/rat-brain | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 2 | \n",
+ " 5ee39486-8625-4ac3-9691-ce9d724812a4 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 2 | \n",
+ " Macaca mulatta | \n",
+ " 2 | \n",
+ " NCBI_TAXON:9544 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9 0 \\\n",
+ "1 9c3a5c45-316c-493d-a712-03a01b662ee9 1 \n",
+ "2 9c3a5c45-316c-493d-a712-03a01b662ee9 2 \n",
+ "\n",
+ " object_id files_idx object_type \n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9 0 NWBFile \\\n",
+ "1 f8641805-f93c-446f-8194-5fce08d22dbb 0 ElectrodeGroup \n",
+ "2 5ee39486-8625-4ac3-9691-ce9d724812a4 0 Subject \n",
+ "\n",
+ " relative_path field keys_idx key \n",
+ "0 general/experimenter 0 Hansem Sohn \\\n",
+ "1 location 1 Dorsomedial frontal cortex \n",
+ "2 species 2 Macaca mulatta \n",
+ "\n",
+ " entities_idx entity_id \n",
+ "0 0 ORCID:0000-0001-8593-7473 \\\n",
+ "1 1 Frontal Cortex \n",
+ "2 2 NCBI_TAXON:9544 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://orcid.org/0000-0001-8593-7473 \n",
+ "1 https://www.ebrains.eu/tools/rat-brain \n",
+ "2 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er_read.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f729e70c",
+ "metadata": {},
+ "source": [
+ "# Using NERD with multiple NWBFiles"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07d21ab0",
+ "metadata": {},
+ "source": [
+ "A single instance of the `ExternalResources` class supports storing contextual metadata for multiple files. If the user decides to address multiple files at once, setting up a link between the instance of `ExternalResources` and the file is not possible (due to multiple files being present at once). However, there are ways around this. Users can set link to a file, populate the NERD data structure, and relink that instance to the next file.\n",
+ "\n",
+ "Another method (as seen below) would be to explicitely define the `file` parameter when populating with `add_ref`.\n",
+ "\n",
+ "In this example, we have three files currently existing on the DANDI Archive. These files all contain experiments regarding a \"rat\". The species field is free-form text, allowing a wide range of names to represent the same animal. Having contextual metadata for `Subject` species will allows users to connect and query across files with datasets and attributes that share the same external reference."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "0ce9341c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:210: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 8e4f1f81-85b8-469e-9d1b-b7b188edfd6f | \n",
+ " 0 | \n",
+ " ed65b7ec-a46e-48fc-b685-e37634e6a4fc | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 0 | \n",
+ " rat | \n",
+ " 0 | \n",
+ " NCBI_TAXON:10116 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8e4f1f81-85b8-469e-9d1b-b7b188edfd6f | \n",
+ " 1 | \n",
+ " 088479f0-5966-45a1-9394-21bedf7b9cf2 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 1 | \n",
+ " Rattus norvegicus domestica | \n",
+ " 0 | \n",
+ " NCBI_TAXON:10116 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 8e4f1f81-85b8-469e-9d1b-b7b188edfd6f | \n",
+ " 2 | \n",
+ " d0299e3c-f007-4465-98a9-92f2590699a4 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 2 | \n",
+ " rattus norvegicus | \n",
+ " 0 | \n",
+ " NCBI_TAXON:10116 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 8e4f1f81-85b8-469e-9d1b-b7b188edfd6f 0 \\\n",
+ "1 8e4f1f81-85b8-469e-9d1b-b7b188edfd6f 1 \n",
+ "2 8e4f1f81-85b8-469e-9d1b-b7b188edfd6f 2 \n",
+ "\n",
+ " object_id files_idx object_type relative_path \n",
+ "0 ed65b7ec-a46e-48fc-b685-e37634e6a4fc 0 Subject species \\\n",
+ "1 088479f0-5966-45a1-9394-21bedf7b9cf2 0 Subject species \n",
+ "2 d0299e3c-f007-4465-98a9-92f2590699a4 0 Subject species \n",
+ "\n",
+ " field keys_idx key entities_idx \n",
+ "0 0 rat 0 \\\n",
+ "1 1 Rattus norvegicus domestica 0 \n",
+ "2 2 rattus norvegicus 0 \n",
+ "\n",
+ " entity_id entity_uri \n",
+ "0 NCBI_TAXON:10116 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "1 NCBI_TAXON:10116 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "2 NCBI_TAXON:10116 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# File with Subject species as rat\n",
+ "file_1='sub-Rat203_ecephys.nwb'\n",
+ "with NWBHDF5IO(file_1, \"r\") as io:\n",
+ " read_nwbfile_1 = io.read()\n",
+ "\n",
+ "# File with Subject species as Rattus norvegicus domestica\n",
+ "file_2='sub-EE_ses-EE-042_ecephys.nwb'\n",
+ "with NWBHDF5IO(file_1, \"r\") as io:\n",
+ " read_nwbfile_2 = io.read()\n",
+ "\n",
+ "# File with Subject species as rattus norvegicus\n",
+ "file_3 = 'sub-BH243.nwb'\n",
+ "with NWBHDF5IO(file_1, \"r\") as io:\n",
+ " read_nwbfile_3 = io.read()\n",
+ "\n",
+ "er = ExternalResources()\n",
+ "\n",
+ "er.add_ref(\n",
+ " file=read_nwbfile_1,\n",
+ " container=read_nwbfile_e1.subject,\n",
+ " attribute='species',\n",
+ " key='rat',\n",
+ " entity_id='NCBI_TAXON:10116',\n",
+ " entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id=10116'\n",
+ ")\n",
+ "\n",
+ "er.add_ref(\n",
+ " file=read_nwbfile_2,\n",
+ " container=read_nwbfile_e2.subject,\n",
+ " attribute='species',\n",
+ " key='Rattus norvegicus domestica',\n",
+ " entity_id='NCBI_TAXON:10116',\n",
+ ")\n",
+ "\n",
+ "er.add_ref(\n",
+ " file=read_nwbfile_3,\n",
+ " container=read_nwbfile_e3.subject,\n",
+ " attribute='species',\n",
+ " key='rattus norvegicus',\n",
+ " entity_id='NCBI_TAXON:10116',\n",
+ ")\n",
+ "\n",
+ "er.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "97cb3bdd",
+ "metadata": {},
+ "source": [
+ "# NERD Structure"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d324f91b",
+ "metadata": {},
+ "source": [
+ "From a user's perspective, one can think of the `ExternalResources` as a simple table. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "73bd46d6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 0 | \n",
+ " Hansem Sohn | \n",
+ " 0 | \n",
+ " ORCID:0000-0001-8593-7473 | \n",
+ " https://orcid.org/0000-0001-8593-7473 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 1 | \n",
+ " f8641805-f93c-446f-8194-5fce08d22dbb | \n",
+ " 0 | \n",
+ " ElectrodeGroup | \n",
+ " location | \n",
+ " | \n",
+ " 1 | \n",
+ " Dorsomedial frontal cortex | \n",
+ " 1 | \n",
+ " Frontal Cortex | \n",
+ " https://www.ebrains.eu/tools/rat-brain | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 2 | \n",
+ " 5ee39486-8625-4ac3-9691-ce9d724812a4 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 2 | \n",
+ " Macaca mulatta | \n",
+ " 2 | \n",
+ " NCBI_TAXON:9544 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9 0 \\\n",
+ "1 9c3a5c45-316c-493d-a712-03a01b662ee9 1 \n",
+ "2 9c3a5c45-316c-493d-a712-03a01b662ee9 2 \n",
+ "\n",
+ " object_id files_idx object_type \n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9 0 NWBFile \\\n",
+ "1 f8641805-f93c-446f-8194-5fce08d22dbb 0 ElectrodeGroup \n",
+ "2 5ee39486-8625-4ac3-9691-ce9d724812a4 0 Subject \n",
+ "\n",
+ " relative_path field keys_idx key \n",
+ "0 general/experimenter 0 Hansem Sohn \\\n",
+ "1 location 1 Dorsomedial frontal cortex \n",
+ "2 species 2 Macaca mulatta \n",
+ "\n",
+ " entities_idx entity_id \n",
+ "0 0 ORCID:0000-0001-8593-7473 \\\n",
+ "1 1 Frontal Cortex \n",
+ "2 2 NCBI_TAXON:9544 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://orcid.org/0000-0001-8593-7473 \n",
+ "1 https://www.ebrains.eu/tools/rat-brain \n",
+ "2 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... "
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er_read.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8136831b",
+ "metadata": {},
+ "source": [
+ "However, to reduce data redundancy and improve data integrity, `ExternalResources` stores this data internally in a collection of interlinked tables.\n",
+ "* `KeyTable` where each row describes a `Key`. A `Key` is a term defined by the user's data. \n",
+ "* `FileTable` where each row describes a `File`. A `File` is a `NWBFile` in our use case.\n",
+ "* `EntityTable` where each row describes an `Entity`. An `Entity` is a term from an ontology or resource.\n",
+ "* `ObjectTable` where each row describes an `Object`. An `Object` is a NWB data-type, meaning it has an object_id, e.g., `AbstractContainer`.\n",
+ "* `ObjectKeyTable` where each row describes an `ObjectKey` pair identifying which `Key`\n",
+ " is used with which `Object`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c82d23a",
+ "metadata": {},
+ "source": [
+ "### KeyTable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0736a493",
+ "metadata": {},
+ "source": [
+ "Multiple `Keys` can have the same name. They are disambiguated by the `Object` associated with each. Meaning, we may have keys with the same name in different objects, but for a particular object all keys must be unique within `ExternalResources`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "38cef0de",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " key | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Hansem Sohn | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Dorsomedial frontal cortex | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Macaca mulatta | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " key\n",
+ "0 Hansem Sohn\n",
+ "1 Dorsomedial frontal cortex\n",
+ "2 Macaca mulatta"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er_read.keys.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "414246ad",
+ "metadata": {},
+ "source": [
+ "### EntityTable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bb29618c",
+ "metadata": {},
+ "source": [
+ "This stores the ID and URI information for the external references."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "4ba1d0ba",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ORCID:0000-0001-8593-7473 | \n",
+ " https://orcid.org/0000-0001-8593-7473 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Frontal Cortex | \n",
+ " https://www.ebrains.eu/tools/rat-brain | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NCBI_TAXON:9544 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " entity_id \n",
+ "0 ORCID:0000-0001-8593-7473 \\\n",
+ "1 Frontal Cortex \n",
+ "2 NCBI_TAXON:9544 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://orcid.org/0000-0001-8593-7473 \n",
+ "1 https://www.ebrains.eu/tools/rat-brain \n",
+ "2 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... "
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er_read.entities.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "72da3aa0",
+ "metadata": {},
+ "source": [
+ "### EntityKeyTable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "22853359",
+ "metadata": {},
+ "source": [
+ "The `EntityKeyTable` stores all relationship between which user/data defined `Key` represents which `Entity` from an external resource."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "11a3a458",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " entities_idx | \n",
+ " keys_idx | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " entities_idx keys_idx\n",
+ "0 0 0\n",
+ "1 1 1\n",
+ "2 2 2"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er_read.entity_keys.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a4b8cb2",
+ "metadata": {},
+ "source": [
+ "### FileTable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d2e6825a",
+ "metadata": {},
+ "source": [
+ "The `FileTable` stores the `id` for the `NWBFile`, allowing users to keep track of which files have the objects that have external references. With this update, the `ObjectTable` has a new column `file_id_idx`, i.e., the row index of the `FileTable`, to link the object and the file that stores it."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "15ba522d",
+ "metadata": {},
+ "source": [
+ "As we saw prior, `add_ref` is one of the main methods to populate `ExternalResources`.\n",
+ "\n",
+ "er.add_ref(\n",
+ " container=read_nwbfile.subject,\n",
+ " attribute='species',\n",
+ " key='Macaca mulatta',\n",
+ " entity_id='NCBI:9544',\n",
+ " entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id=9544'\n",
+ ")
\n",
+ "\n",
+ "The FileTable is not optional, meaning every new reference needs an associated file. `add_ref` will search for a file if none is provided, as in this example. Users can also manually provide the file if the container hasn't been added to the file (an example in the next section).."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "8c48bb3b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id\n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er_read.files.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc26f814",
+ "metadata": {},
+ "source": [
+ "### ObjectTable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "82691f13",
+ "metadata": {},
+ "source": [
+ "`file_id_idx` is the row index for the corresponding `NWBFile` that houses the `Object`. If there is no file, the user does *not* have to have one to use `ExternalResources`; it will be an empty string. The `object_type` column stores the explicit type of the object to allow for easy lookups.\n",
+ "\n",
+ "`relative_path` and `field` come in when dealing with different scenarios of adding references to `ExternalResources`.\n",
+ "* `relative_path` is the path from the closest parent that is a NWB data-type. This is used when the attribute is not a NWB data-type and so has no object id.\n",
+ "* `field` is used differentiate the different fields of the dataset for compound data. For example, if a dataset has a compound data-type with fields ‘x’, ‘y’, and ‘z’, and each field is associated with different ontologies, then use field=’x’ to denote that ‘x’ is using the external reference."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "f535adda",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " files_idx | \n",
+ " object_id | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " f8641805-f93c-446f-8194-5fce08d22dbb | \n",
+ " ElectrodeGroup | \n",
+ " location | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 5ee39486-8625-4ac3-9691-ce9d724812a4 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " files_idx object_id object_type \n",
+ "0 0 9c3a5c45-316c-493d-a712-03a01b662ee9 NWBFile \\\n",
+ "1 0 f8641805-f93c-446f-8194-5fce08d22dbb ElectrodeGroup \n",
+ "2 0 5ee39486-8625-4ac3-9691-ce9d724812a4 Subject \n",
+ "\n",
+ " relative_path field \n",
+ "0 general/experimenter \n",
+ "1 location \n",
+ "2 species "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er_read.objects.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cc8d9af6",
+ "metadata": {},
+ "source": [
+ "### ObjectKeyTable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "25d245c0",
+ "metadata": {},
+ "source": [
+ "Stores the relationship between which keys are used with each `Object`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "12a8753a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " objects_idx | \n",
+ " keys_idx | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " objects_idx keys_idx\n",
+ "0 0 0\n",
+ "1 1 1\n",
+ "2 2 2"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er_read.object_keys.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "41727578",
+ "metadata": {},
+ "source": [
+ "# ExternalResources Rules"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fbbcb47d",
+ "metadata": {},
+ "source": [
+ "1. Multiple `Key` objects can have the same name.\n",
+ " They are disambiguated by the `Object` associated\n",
+ " with each, meaning we may have keys with the same name in different objects, but for a particular object\n",
+ " all keys must be unique.\n",
+ "2. In order to query specific records, the `ExternalResources` class\n",
+ " uses '(file, object_id, relative_path, field, key)' as the unique identifier.\n",
+ "3. `Object` can have multiple `Key`\n",
+ " objects.\n",
+ "4. Multiple `Object` objects can use the same `Key`.\n",
+ "5. Do not use the private methods to add into the `KeyTable`,\n",
+ " `EntityKey`, `EntityTable`,\n",
+ " `ObjectTable`, `ObjectKeyTable`,\n",
+ " `FileTable`,\n",
+ " individually.\n",
+ "6. URIs are optional, but highly recommended. If not known, an empty string may be used.\n",
+ "7. An entity ID should be the unique string identifying the entity in the given resource.\n",
+ " This may or may not include a string representing the resource and a colon.\n",
+ " Use the format provided by the resource. For example, Identifiers.org uses the ID ``ncbigene:22353``\n",
+ " but the NCBI Gene uses the ID ``22353`` for the same term.\n",
+ "8. In a majority of cases, `Object` objects will have an empty string\n",
+ " for 'field'. The `ExternalResources` class supports compound data_types.\n",
+ " In that case, 'field' would be the field of the compound data_type that has an external reference.\n",
+ "9. In some cases, the attribute that needs an external reference is not a object with a 'data_type'.\n",
+ " The user must then use the nearest object that has a data type to be used as the parent object. When\n",
+ " adding an external resource for an object with a data type, users should not provide an attribute.\n",
+ " When adding an external resource for an attribute of an object, users need to provide\n",
+ " the name of the attribute.\n",
+ "10. The user must provide a `File` or an `Object` that\n",
+ " has `File` along the parent hierarchy.\n",
+ "11. When reusing `Entity` objects, the user provides only the ID for the entity when using\n",
+ " `add_ref`. This is done to prevent duplicates and will return an\n",
+ " error on how to reuse `Entity` objects if the\n",
+ " `add_ref` method finds an enitity ID and URI that already exists."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "93e5a7e4",
+ "metadata": {},
+ "source": [
+ "# An example with a new NWBFile"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "b3d0c66a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "session_start_time = datetime(2018, 4, 25, 2, 30, 3, tzinfo=tz.gettz(\"US/Pacific\"))\n",
+ "\n",
+ "nwbfile = NWBFile(\n",
+ " session_description=\"Mouse exploring an open field\",\n",
+ " identifier=\"Mouse5_Day3\", \n",
+ " session_start_time=session_start_time, \n",
+ " session_id=\"session_1234\",\n",
+ " experimenter=[\"Dichter, Benjamin K.\", \"Smith, Alex\"], \n",
+ " lab=\"My Lab Name\", \n",
+ " institution=\"University of My Institution\", \n",
+ " related_publications=\"DOI:10.1016/j.neuron.2016.12.011\", \n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "08bbf0f9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nwbfile.subject = Subject(\n",
+ " subject_id=\"001\",\n",
+ " age=\"P90D\",\n",
+ " description=\"mouse 5\",\n",
+ " species=\"Mus musculus\",\n",
+ " sex=\"M\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "ea2a32e9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:210: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ }
+ ],
+ "source": [
+ "er = ExternalResources() \n",
+ "nwbfile.link_resources(er) "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d102dc0c",
+ "metadata": {},
+ "source": [
+ "## Using add_ref"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "15b68c18",
+ "metadata": {},
+ "source": [
+ "### add_ref without a file"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0346be1d",
+ "metadata": {},
+ "source": [
+ "As mentioned prior, the file must be explicitly set within `add_ref` or the object must already exist within the file, in which the link betweent the file and the instance of `ExternalResources` will automatically resolve the `file` parameter. The example below will return an error as expected."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "9371fdf6",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "Could not find file. Add container to the file.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[35], line 7\u001b[0m\n\u001b[1;32m 1\u001b[0m col1 \u001b[38;5;241m=\u001b[39m VectorData(\n\u001b[1;32m 2\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSpecies_Data\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 3\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mspecies from NCBI and Ensemble\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 4\u001b[0m data\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHomo sapiens\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMus musculus\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 6\u001b[0m species \u001b[38;5;241m=\u001b[39m DynamicTable(name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mspecies\u001b[39m\u001b[38;5;124m'\u001b[39m, description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMy species\u001b[39m\u001b[38;5;124m'\u001b[39m, columns\u001b[38;5;241m=\u001b[39m[col1],)\n\u001b[0;32m----> 7\u001b[0m \u001b[43mer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_ref\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontainer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mspecies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mattribute\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSpecies_Data\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mHomo sapiens\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mentity_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNCBI_TAXON:9606\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mentity_uri\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mhttps://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\n\u001b[1;32m 13\u001b[0m \u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/resources.py:572\u001b[0m, in \u001b[0;36mExternalResources.add_ref\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 569\u001b[0m file \u001b[38;5;241m=\u001b[39m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfile\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 571\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 572\u001b[0m file \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_file_from_container\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcontainer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontainer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 574\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attribute \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m: \u001b[38;5;66;03m# Trivial Case\u001b[39;00m\n\u001b[1;32m 575\u001b[0m relative_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/resources.py:408\u001b[0m, in \u001b[0;36mExternalResources._get_file_from_container\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 406\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 407\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCould not find file. Add container to the file.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 408\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n",
+ "\u001b[0;31mValueError\u001b[0m: Could not find file. Add container to the file."
+ ]
+ }
+ ],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='Species_Data',\n",
+ " description='species from NCBI and Ensemble',\n",
+ " data=['Homo sapiens', 'Mus musculus']\n",
+ ")\n",
+ "species = DynamicTable(name='species', description='My species', columns=[col1],)\n",
+ "er.add_ref(\n",
+ " container=species,\n",
+ " attribute='Species_Data',\n",
+ " key='Homo sapiens',\n",
+ " entity_id='NCBI_TAXON:9606',\n",
+ " entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606'\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ef7715db",
+ "metadata": {},
+ "source": [
+ "### add_ref with attributes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ca468b28",
+ "metadata": {},
+ "source": [
+ "Let's look at very simple example. The `attribute` is structure/feature that holds a term the user wants to add a reference for. `attribute` can either be an NWB data-type or not, e.g., a variable that contains a string-value.\n",
+ "\n",
+ "*Note: we manually provide `file=nwbfile.object_id`.*\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "bcba7a7c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='Species_Data',\n",
+ " description='species from NCBI and Ensemble',\n",
+ " data=['Homo sapiens', 'Mus musculus']\n",
+ ")\n",
+ "species = DynamicTable(name='species', description='My species', columns=[col1],)\n",
+ "er.add_ref(\n",
+ " file=nwbfile,\n",
+ " container=species,\n",
+ " attribute='Species_Data',\n",
+ " key='Homo sapiens',\n",
+ " entity_id='NCBI_TAXON:9606',\n",
+ " entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606'\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5153a04b",
+ "metadata": {},
+ "source": [
+ "Recall that `relative_path` is the path from the closest parent that is a NWB data-type and is used when the attribute is not a NWB data-type and so has no `object_id`. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "d4818292",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Subject species attribute\n",
+ "er.add_ref(\n",
+ " container=nwbfile.subject,\n",
+ " attribute='species',\n",
+ " key='Mus musculus',\n",
+ " entity_id='NCBI_TAXON:10090',\n",
+ " entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'\n",
+ ")\n",
+ "\n",
+ "# NWBFile experimenter\n",
+ "er.add_ref(\n",
+ " container=nwbfile,\n",
+ " attribute=\"experimenter\",\n",
+ " key=\"Dichter, Benjamin K.\",\n",
+ " entity_id=\"ORCID:0000-0001-5725-6910\",\n",
+ " entity_uri=\"https://orcid.org/0000-0001-5725-6910\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "934a249d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 0 | \n",
+ " f99412f9-04f8-4c62-8056-a2e45f3a1afe | \n",
+ " 0 | \n",
+ " VectorData | \n",
+ " | \n",
+ " | \n",
+ " 0 | \n",
+ " Homo sapiens | \n",
+ " 0 | \n",
+ " NCBI_TAXON:9606 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 1 | \n",
+ " 69c78f04-79d0-40a3-be42-a3cf6d438d90 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 1 | \n",
+ " Mus musculus | \n",
+ " 1 | \n",
+ " NCBI_TAXON:10090 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 2 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 2 | \n",
+ " Dichter, Benjamin K. | \n",
+ " 2 | \n",
+ " ORCID:0000-0001-5725-6910 | \n",
+ " https://orcid.org/0000-0001-5725-6910 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 20563fc8-2e7a-496f-a34b-c1500eb8a10c 0 \\\n",
+ "1 20563fc8-2e7a-496f-a34b-c1500eb8a10c 1 \n",
+ "2 20563fc8-2e7a-496f-a34b-c1500eb8a10c 2 \n",
+ "\n",
+ " object_id files_idx object_type \n",
+ "0 f99412f9-04f8-4c62-8056-a2e45f3a1afe 0 VectorData \\\n",
+ "1 69c78f04-79d0-40a3-be42-a3cf6d438d90 0 Subject \n",
+ "2 20563fc8-2e7a-496f-a34b-c1500eb8a10c 0 NWBFile \n",
+ "\n",
+ " relative_path field keys_idx key entities_idx \n",
+ "0 0 Homo sapiens 0 \\\n",
+ "1 species 1 Mus musculus 1 \n",
+ "2 general/experimenter 2 Dichter, Benjamin K. 2 \n",
+ "\n",
+ " entity_id \n",
+ "0 NCBI_TAXON:9606 \\\n",
+ "1 NCBI_TAXON:10090 \n",
+ "2 ORCID:0000-0001-5725-6910 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "1 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "2 https://orcid.org/0000-0001-5725-6910 "
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae04931b",
+ "metadata": {},
+ "source": [
+ "### add_ref with compound data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "8a73b63d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='Species_column',\n",
+ " description='description',\n",
+ " data=np.array(\n",
+ " [('Mus musculus', 9, 81.0), ('Homo sapiens', 3, 27.0)],\n",
+ " dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')]\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "species = DynamicTable(name='SpeciesTable', description='My species', columns=[col1],)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32a77697",
+ "metadata": {},
+ "source": [
+ "### add_ref reusing an enitity"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "60beb83c",
+ "metadata": {},
+ "source": [
+ "As mentioned in the rules above, if a user wants to reuse an entity, then `add_ref` requires only the `entity_id` and not the `entity_uri`. An error, as seen below, will be shown alerting the user to this rule if an URI is provided if the entity exists within the instance of `ExternalResources`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "1b261846",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "If you plan on reusing an entity, then entity_uri parameter must be None.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[40], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_ref\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mfile\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnwbfile\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontainer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mspecies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mattribute\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSpecies_column\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mfield\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mspecies\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMus musculus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mentity_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNCBI_TAXON:10090\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mentity_uri\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mhttps://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\n\u001b[1;32m 9\u001b[0m \u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/resources.py:660\u001b[0m, in \u001b[0;36mExternalResources.add_ref\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 658\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m entity_uri \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 659\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mIf you plan on reusing an entity, then entity_uri parameter must be None.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 660\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 661\u001b[0m \u001b[38;5;66;03m# check for entity-key relationship in EntityKeyTable\u001b[39;00m\n\u001b[1;32m 662\u001b[0m key_idx \u001b[38;5;241m=\u001b[39m key\u001b[38;5;241m.\u001b[39midx\n",
+ "\u001b[0;31mValueError\u001b[0m: If you plan on reusing an entity, then entity_uri parameter must be None."
+ ]
+ }
+ ],
+ "source": [
+ "er.add_ref(\n",
+ " file=nwbfile,\n",
+ " container=species,\n",
+ " attribute='Species_column',\n",
+ " field='species',\n",
+ " key='Mus musculus',\n",
+ " entity_id='NCBI_TAXON:10090',\n",
+ " entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "34ba228c",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 0 | \n",
+ " f99412f9-04f8-4c62-8056-a2e45f3a1afe | \n",
+ " 0 | \n",
+ " VectorData | \n",
+ " | \n",
+ " | \n",
+ " 0 | \n",
+ " Homo sapiens | \n",
+ " 0 | \n",
+ " NCBI_TAXON:9606 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 1 | \n",
+ " 69c78f04-79d0-40a3-be42-a3cf6d438d90 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 1 | \n",
+ " Mus musculus | \n",
+ " 1 | \n",
+ " NCBI_TAXON:10090 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 2 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 2 | \n",
+ " Dichter, Benjamin K. | \n",
+ " 2 | \n",
+ " ORCID:0000-0001-5725-6910 | \n",
+ " https://orcid.org/0000-0001-5725-6910 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 20563fc8-2e7a-496f-a34b-c1500eb8a10c 0 \\\n",
+ "1 20563fc8-2e7a-496f-a34b-c1500eb8a10c 1 \n",
+ "2 20563fc8-2e7a-496f-a34b-c1500eb8a10c 2 \n",
+ "\n",
+ " object_id files_idx object_type \n",
+ "0 f99412f9-04f8-4c62-8056-a2e45f3a1afe 0 VectorData \\\n",
+ "1 69c78f04-79d0-40a3-be42-a3cf6d438d90 0 Subject \n",
+ "2 20563fc8-2e7a-496f-a34b-c1500eb8a10c 0 NWBFile \n",
+ "\n",
+ " relative_path field keys_idx key entities_idx \n",
+ "0 0 Homo sapiens 0 \\\n",
+ "1 species 1 Mus musculus 1 \n",
+ "2 general/experimenter 2 Dichter, Benjamin K. 2 \n",
+ "\n",
+ " entity_id \n",
+ "0 NCBI_TAXON:9606 \\\n",
+ "1 NCBI_TAXON:10090 \n",
+ "2 ORCID:0000-0001-5725-6910 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "1 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "2 https://orcid.org/0000-0001-5725-6910 "
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a401498",
+ "metadata": {},
+ "source": [
+ "## Write NWBFile and NERD separately"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "30ba3d98",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with NWBHDF5IO(\"NWBfile_ER_Example_child.nwb\", \"w\") as io:\n",
+ " io.write(nwbfile)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "4dcbf212",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "er.to_norm_tsv(path='./')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "886aeea4",
+ "metadata": {},
+ "source": [
+ "## Read the NWBFile with NERD with NWBHDF5IO"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c627264e",
+ "metadata": {},
+ "source": [
+ "As we saw in the beginning of this guide, users can set a link to an instance of an `ExternalResources` class. Users also have the option to set an existing instance of `ExternalResources` that exists as a populated zip file. This is conducted through NWBHDF5IO as a path parameter."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "ef785180",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:210: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ }
+ ],
+ "source": [
+ "with NWBHDF5IO(\"sub-Haydn_desc-train_ecephys.nwb\", \"r\", external_resources_path='./') as io:\n",
+ " read_nwbfile = io.read()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "bd12b993",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 0 | \n",
+ " f99412f9-04f8-4c62-8056-a2e45f3a1afe | \n",
+ " 0 | \n",
+ " VectorData | \n",
+ " | \n",
+ " | \n",
+ " 0 | \n",
+ " Homo sapiens | \n",
+ " 0 | \n",
+ " NCBI_TAXON:9606 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 1 | \n",
+ " 69c78f04-79d0-40a3-be42-a3cf6d438d90 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 1 | \n",
+ " Mus musculus | \n",
+ " 1 | \n",
+ " NCBI_TAXON:10090 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 2 | \n",
+ " 20563fc8-2e7a-496f-a34b-c1500eb8a10c | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 2 | \n",
+ " Dichter, Benjamin K. | \n",
+ " 2 | \n",
+ " ORCID:0000-0001-5725-6910 | \n",
+ " https://orcid.org/0000-0001-5725-6910 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 20563fc8-2e7a-496f-a34b-c1500eb8a10c 0 \\\n",
+ "1 20563fc8-2e7a-496f-a34b-c1500eb8a10c 1 \n",
+ "2 20563fc8-2e7a-496f-a34b-c1500eb8a10c 2 \n",
+ "\n",
+ " object_id files_idx object_type \n",
+ "0 f99412f9-04f8-4c62-8056-a2e45f3a1afe 0 VectorData \\\n",
+ "1 69c78f04-79d0-40a3-be42-a3cf6d438d90 0 Subject \n",
+ "2 20563fc8-2e7a-496f-a34b-c1500eb8a10c 0 NWBFile \n",
+ "\n",
+ " relative_path field keys_idx key entities_idx \n",
+ "0 0 Homo sapiens 0 \\\n",
+ "1 species 1 Mus musculus 1 \n",
+ "2 general/experimenter 2 Dichter, Benjamin K. 2 \n",
+ "\n",
+ " entity_id \n",
+ "0 NCBI_TAXON:9606 \\\n",
+ "1 NCBI_TAXON:10090 \n",
+ "2 ORCID:0000-0001-5725-6910 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "1 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "2 https://orcid.org/0000-0001-5725-6910 "
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "read_nwbfile.get_linked_resources().to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "774c5396",
+ "metadata": {},
+ "source": [
+ "# TermSet"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4e962354",
+ "metadata": {},
+ "source": [
+ "`TermSet` allows users to create their own subset of ontological references and is built upon the resources from LinkML.\n",
+ "\n",
+ "Use Cases:\n",
+ "1. Validation of data. Currently, validation with a `TermSet` is only supported for `Data`, but we are in the talks to expand out to, i.e., experimenters. \n",
+ "2. `TermSet` streamlines the user experience for adding new references to `ExternalResources` using `add_ref_term_set`.\n",
+ "\n",
+ "The first step is create a `.yaml` file "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "335ad90e",
+ "metadata": {},
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "92fe7e07",
+ "metadata": {},
+ "source": [
+ "LinkML Enumerations are collections of controlled string values. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "c1f11342",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "terms = TermSet(term_schema_path='./species_term_set.yaml')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9161f884",
+ "metadata": {},
+ "source": [
+ "The `TermSet` class has methods to help you view and retrieve terms."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "e0686900",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'Homo sapiens': Term_Info(id='NCBI_TAXON:9606', description='Humans', meaning='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606'),\n",
+ " 'Mus musculus': Term_Info(id='Ensemble:10090', description='Mouse', meaning='https://rest.ensembl.org/taxonomy/id/10090'),\n",
+ " 'Ursus arctos horribilis': Term_Info(id='NCBI_TAXON:116960', description='Brown Bear', meaning='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=116960'),\n",
+ " 'Myrmecophaga tridactyla': Term_Info(id='NCBI_TAXON:71006', description='Ant Eater', meaning='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=71006')}"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "terms.view_set"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "e5e54841",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Term_Info(id='NCBI_TAXON:9606', description='Humans', meaning='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "terms['Homo sapiens']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9ec81c76",
+ "metadata": {},
+ "source": [
+ "## Validate Data with a TermSet"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "15a6127e",
+ "metadata": {},
+ "source": [
+ "Data is validated when a TermSet is provided to Data and VectorData."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a8701ef8",
+ "metadata": {},
+ "source": [
+ "#### Validate Data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "d82004d6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='species',\n",
+ " description='...',\n",
+ " data=['Homo sapiens', 'Mus musculus'],\n",
+ " term_set=terms)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d0a4f222",
+ "metadata": {},
+ "source": [
+ "#### Validate Bad Data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "c3b7d286",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "\"Mus muscuklus, Rattus norvegicus, Mus muscuklus, Rattus norvegicus\" is not in the term set.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[52], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m col1 \u001b[38;5;241m=\u001b[39m \u001b[43mVectorData\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mspecies\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mdescription\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m...\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mHomo sapiens\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMus muscuklus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mRattus norvegicus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mterm_set\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mterms\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/table.py:47\u001b[0m, in \u001b[0;36mVectorData.__init__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;129m@docval\u001b[39m({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mstr\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdoc\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mthe name of this VectorData\u001b[39m\u001b[38;5;124m'\u001b[39m},\n\u001b[1;32m 39\u001b[0m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdescription\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28mstr\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdoc\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124ma description for this column\u001b[39m\u001b[38;5;124m'\u001b[39m},\n\u001b[1;32m 40\u001b[0m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m'\u001b[39m: (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124marray_data\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m'\u001b[39m),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 44\u001b[0m allow_positional\u001b[38;5;241m=\u001b[39mAllowPositional\u001b[38;5;241m.\u001b[39mWARNING)\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 46\u001b[0m description \u001b[38;5;241m=\u001b[39m popargs(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdescription\u001b[39m\u001b[38;5;124m'\u001b[39m, kwargs)\n\u001b[0;32m---> 47\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdescription \u001b[38;5;241m=\u001b[39m description\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:718\u001b[0m, in \u001b[0;36mData.__init__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 716\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(bad_data)\u001b[38;5;241m!=\u001b[39m\u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 717\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is not in the term set.\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;28mstr\u001b[39m(item) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m bad_data]))\n\u001b[0;32m--> 718\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 719\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__data \u001b[38;5;241m=\u001b[39m data\n\u001b[1;32m 720\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+ "\u001b[0;31mValueError\u001b[0m: \"Mus muscuklus, Rattus norvegicus, Mus muscuklus, Rattus norvegicus\" is not in the term set."
+ ]
+ }
+ ],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='species',\n",
+ " description='...',\n",
+ " data=['Homo sapiens', 'Mus muscuklus', 'Rattus norvegicus'],\n",
+ " term_set=terms,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d8297b1a",
+ "metadata": {},
+ "source": [
+ "#### Validate Data on append"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "8aa2cd52",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# append \n",
+ "col1 = VectorData(\n",
+ " name='species',\n",
+ " description='...',\n",
+ " data=['Homo sapiens', 'Ursus arctos horribilis'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "col1.append('Mus musculus')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fca75eac",
+ "metadata": {},
+ "source": [
+ "#### Validate Bad Data on append"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "882d0632",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "\"Macaca mulatta\" is not in the term set.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[54], line 8\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# append bad data\u001b[39;00m\n\u001b[1;32m 2\u001b[0m col1 \u001b[38;5;241m=\u001b[39m VectorData(\n\u001b[1;32m 3\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mspecies\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 4\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m...\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 5\u001b[0m data\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHomo sapiens\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUrsus arctos horribilis\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 6\u001b[0m term_set\u001b[38;5;241m=\u001b[39mterms,\n\u001b[1;32m 7\u001b[0m )\n\u001b[0;32m----> 8\u001b[0m \u001b[43mcol1\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mappend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMacaca mulatta\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:787\u001b[0m, in \u001b[0;36mData.append\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m 785\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 786\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is not in the term set.\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m arg)\n\u001b[0;32m--> 787\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n",
+ "\u001b[0;31mValueError\u001b[0m: \"Macaca mulatta\" is not in the term set."
+ ]
+ }
+ ],
+ "source": [
+ "# append bad data\n",
+ "col1 = VectorData(\n",
+ " name='species',\n",
+ " description='...',\n",
+ " data=['Homo sapiens', 'Ursus arctos horribilis'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "col1.append('Macaca mulatta')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "40c8efd9",
+ "metadata": {},
+ "source": [
+ "#### Validate Data on extend"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "2d003b08",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# extend\n",
+ "col1 = VectorData(\n",
+ " name='species',\n",
+ " description='...',\n",
+ " data=['Homo sapiens'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "col1.extend(['Mus musculus', 'Ursus arctos horribilis'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e7755bb2",
+ "metadata": {},
+ "source": [
+ "#### Validate Bad Data on extend"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "a234d85c",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "\"Macaca mulatta, Oryctolagus cuniculus\" is not in the term set.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[56], line 8\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# extend bad data\u001b[39;00m\n\u001b[1;32m 2\u001b[0m col1 \u001b[38;5;241m=\u001b[39m VectorData(\n\u001b[1;32m 3\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mspecies\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 4\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m...\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 5\u001b[0m data\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHomo sapiens\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 6\u001b[0m term_set\u001b[38;5;241m=\u001b[39mterms,\n\u001b[1;32m 7\u001b[0m )\n\u001b[0;32m----> 8\u001b[0m \u001b[43mcol1\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mextend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMacaca mulatta\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mOryctolagus cuniculus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/table.py:87\u001b[0m, in \u001b[0;36mVectorData.extend\u001b[0;34m(self, ar, **kwargs)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;66;03m#################################################################################\u001b[39;00m\n\u001b[1;32m 81\u001b[0m \u001b[38;5;66;03m# Each subclass of VectorData should have its own extend method to ensure\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;66;03m# functionality AND efficiency of the extend operation. However, because currently\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;66;03m# they do not all have one of these methods, the only way to ensure functionality\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;66;03m# is with calls to add_row. Because that is inefficient for basic VectorData,\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;66;03m# this check is added to ensure we always call extend on a basic VectorData.\u001b[39;00m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__mro__\u001b[39m[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m VectorData:\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mextend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mar\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m ar:\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:807\u001b[0m, in \u001b[0;36mData.extend\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(bad_data)\u001b[38;5;241m!=\u001b[39m\u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 806\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is not in the term set.\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;28mstr\u001b[39m(item) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m bad_data]))\n\u001b[0;32m--> 807\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n",
+ "\u001b[0;31mValueError\u001b[0m: \"Macaca mulatta, Oryctolagus cuniculus\" is not in the term set."
+ ]
+ }
+ ],
+ "source": [
+ "# extend bad data\n",
+ "col1 = VectorData(\n",
+ " name='species',\n",
+ " description='...',\n",
+ " data=['Homo sapiens'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "col1.extend(['Macaca mulatta', 'Oryctolagus cuniculus'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dc32c252",
+ "metadata": {},
+ "source": [
+ "#### Validate with add_row example 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c1936d0e",
+ "metadata": {},
+ "source": [
+ "Validating new data is determined by whether the `VectorData` column was intialized with validate. `DynamicTable` will automatically check for columns that have validation set. If any of the new data is actually *bad* data, then `add_row` will not add any new data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "fe37a3c3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='Species_1',\n",
+ " description='...',\n",
+ " data=['Homo sapiens'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "col2 = VectorData(\n",
+ " name='Species_2',\n",
+ " description='...',\n",
+ " data=['Mus musculus'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "species = DynamicTable(name='species', description='My species', columns=[col1,col2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "0cea528f",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "\"bad\" is not in the term set.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[58], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# add bad data\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mspecies\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_row\u001b[49m\u001b[43m(\u001b[49m\u001b[43mSpecies_1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMus musculus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mSpecies_2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbad\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/table.py:605\u001b[0m, in \u001b[0;36mDynamicTable.add_row\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(bad_data)\u001b[38;5;241m!=\u001b[39m\u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 604\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is not in the term set.\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;28mstr\u001b[39m(item) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m bad_data]))\n\u001b[0;32m--> 605\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 607\u001b[0m \u001b[38;5;66;03m# check to see if any of the extra columns just need to be added\u001b[39;00m\n\u001b[1;32m 608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m extra_columns:\n",
+ "\u001b[0;31mValueError\u001b[0m: \"bad\" is not in the term set."
+ ]
+ }
+ ],
+ "source": [
+ "# add bad data\n",
+ "species.add_row(Species_1='Mus musculus', Species_2='bad')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "b6ccf564",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Species_1 | \n",
+ " Species_2 | \n",
+ "
\n",
+ " \n",
+ " id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Homo sapiens | \n",
+ " Mus musculus | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Species_1 Species_2\n",
+ "id \n",
+ "0 Homo sapiens Mus musculus"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "species.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "24141509",
+ "metadata": {},
+ "source": [
+ "#### Validate with add_row example 2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4497c9fa",
+ "metadata": {},
+ "source": [
+ "`add_row` does not validate every column. It only validates the data for `VectorData` that has validate set."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "a10cc3f3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='Species_1',\n",
+ " description='...',\n",
+ " data=['Homo sapiens'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "col2 = VectorData(\n",
+ " name='Species_2',\n",
+ " description='...',\n",
+ " data=['Mus musculus'],\n",
+ ")\n",
+ "species = DynamicTable(name='species', description='My species', columns=[col1,col2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "cccd4a06",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "\"Mus mrusculus\" is not in the term set.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[61], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspecies\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_row\u001b[49m\u001b[43m(\u001b[49m\u001b[43mSpecies_1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMus mrusculus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mSpecies_2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrat\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/table.py:605\u001b[0m, in \u001b[0;36mDynamicTable.add_row\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(bad_data)\u001b[38;5;241m!=\u001b[39m\u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 604\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is not in the term set.\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;28mstr\u001b[39m(item) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m bad_data]))\n\u001b[0;32m--> 605\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 607\u001b[0m \u001b[38;5;66;03m# check to see if any of the extra columns just need to be added\u001b[39;00m\n\u001b[1;32m 608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m extra_columns:\n",
+ "\u001b[0;31mValueError\u001b[0m: \"Mus mrusculus\" is not in the term set."
+ ]
+ }
+ ],
+ "source": [
+ "species.add_row(Species_1='Mus mrusculus', Species_2='rat')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "d5dfbcc5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Species_1 | \n",
+ " Species_2 | \n",
+ "
\n",
+ " \n",
+ " id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Homo sapiens | \n",
+ " Mus musculus | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Species_1 Species_2\n",
+ "id \n",
+ "0 Homo sapiens Mus musculus"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "species.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4b71b2d1",
+ "metadata": {},
+ "source": [
+ "#### Validate with add_row example 3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0ff5fd62",
+ "metadata": {},
+ "source": [
+ "`add_row` is able to distinguish which columns have valid data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "8821f3d4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='Species_1',\n",
+ " description='...',\n",
+ " data=['Homo sapiens'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "col2 = VectorData(\n",
+ " name='Species_2',\n",
+ " description='...',\n",
+ " data=['Mus musculus'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "species = DynamicTable(name='species', description='My species', columns=[col1,col2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "9325dcdb",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "\"rat\" is not in the term set.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[64], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspecies\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_row\u001b[49m\u001b[43m(\u001b[49m\u001b[43mSpecies_1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mUrsus arctos horribilis\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mSpecies_2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrat\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/table.py:605\u001b[0m, in \u001b[0;36mDynamicTable.add_row\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(bad_data)\u001b[38;5;241m!=\u001b[39m\u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 604\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is not in the term set.\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;28mstr\u001b[39m(item) \u001b[38;5;28;01mfor\u001b[39;00m item \u001b[38;5;129;01min\u001b[39;00m bad_data]))\n\u001b[0;32m--> 605\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 607\u001b[0m \u001b[38;5;66;03m# check to see if any of the extra columns just need to be added\u001b[39;00m\n\u001b[1;32m 608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m extra_columns:\n",
+ "\u001b[0;31mValueError\u001b[0m: \"rat\" is not in the term set."
+ ]
+ }
+ ],
+ "source": [
+ "species.add_row(Species_1='Ursus arctos horribilis', Species_2='rat')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d589e0ac",
+ "metadata": {},
+ "source": [
+ "#### Validate with add_column"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae9823ee",
+ "metadata": {},
+ "source": [
+ "`add_column` also supports validation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "92e433ac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='col1',\n",
+ " description='column #1',\n",
+ " data=[1, 2],\n",
+ ")\n",
+ "species = DynamicTable(name='species', description='My species', columns=[col1],)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "06d28bf1",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "'Mus muscuflus' is not in the term set.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[66], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspecies\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_column\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mspecies\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mdescription\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSpecies data\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mHomo sapiens\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMus muscuflus\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mterm_set\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mterms\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/utils.py:644\u001b[0m, in \u001b[0;36mdocval..dec..func_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfunc_call\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 643\u001b[0m pargs \u001b[38;5;241m=\u001b[39m _check_args(args, kwargs)\n\u001b[0;32m--> 644\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Research/NWB/hdmf2/hdmf/src/hdmf/common/table.py:713\u001b[0m, in \u001b[0;36mDynamicTable.add_column\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 711\u001b[0m bad_data_string \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(bad_data)[\u001b[38;5;241m1\u001b[39m:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 712\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m is not in the term set.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m bad_data_string)\n\u001b[0;32m--> 713\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 715\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(index, VectorIndex):\n\u001b[1;32m 716\u001b[0m warn(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPassing a VectorIndex in for index may lead to unexpected behavior. This functionality will be \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 717\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdeprecated in a future version of HDMF.\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;167;01mFutureWarning\u001b[39;00m)\n",
+ "\u001b[0;31mValueError\u001b[0m: 'Mus muscuflus' is not in the term set."
+ ]
+ }
+ ],
+ "source": [
+ "species.add_column(name='species',\n",
+ " description='Species data',\n",
+ " data=['Homo sapiens', 'Mus muscuflus'],\n",
+ " term_set=terms)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b7246fdb",
+ "metadata": {},
+ "source": [
+ "## Add ExternalResources using a TermSet"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cdee6fe1",
+ "metadata": {},
+ "source": [
+ "`TermSet` allows for an easier way to add references to `ExternalResources`. The user will create a `.yaml` file that will contain enumerations. These enumerations take place as the `entities`. Using the `TermSet` does bring greater structure to the naming convention for `Key` values in `ExternalResources`. `Key` values will have to match the name of the term in the `TermSet`. For example, if I have species data, the species values need to be the proper ontological terms in order to be validated and pulled from the `TermSet`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7da99163",
+ "metadata": {},
+ "source": [
+ "Rules:\n",
+ "The termset must exist on the object that will use it. It cannot be used on a non-NWB datatype."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "9aff8ccb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "session_start_time = datetime(2018, 4, 25, 2, 30, 3, tzinfo=tz.gettz(\"US/Pacific\"))\n",
+ "\n",
+ "nwbfile = NWBFile(\n",
+ " session_description=\"Mouse exploring an open field\", # required\n",
+ " identifier=\"Mouse5_Day3\", # required\n",
+ " session_start_time=session_start_time, # required\n",
+ " session_id=\"session_1234\", # optional\n",
+ " experimenter=[\"Dichter, Benjamin K.\", \"Smith, Alex\"], # optional\n",
+ " lab=\"My Lab Name\", # optional\n",
+ " institution=\"University of My Institution\", # optional\n",
+ " related_publications=\"DOI:10.1016/j.neuron.2016.12.011\", # optional\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "99f96cc5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:210: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ }
+ ],
+ "source": [
+ "er = ExternalResources() \n",
+ "nwbfile.external_resources=er"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "d6b5a4a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='Species_Data',\n",
+ " description='species from NCBI and Ensemble',\n",
+ " data=['Homo sapiens', 'Ursus arctos horribilis'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "\n",
+ "species = DynamicTable(name='species', description='My species', columns=[col1],)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "0f1faa83",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.add_ref_term_set(file=nwbfile,\n",
+ " container=species,\n",
+ " attribute='Species_Data',\n",
+ " ) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "d84cb179",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 04090278-1ff0-40b5-be97-480d03eef0e8 | \n",
+ " 0 | \n",
+ " ae1e3e49-61e5-474b-9ee0-6a5386a0d52f | \n",
+ " 0 | \n",
+ " VectorData | \n",
+ " | \n",
+ " | \n",
+ " 0 | \n",
+ " Homo sapiens | \n",
+ " 0 | \n",
+ " NCBI_TAXON:9606 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 04090278-1ff0-40b5-be97-480d03eef0e8 | \n",
+ " 0 | \n",
+ " ae1e3e49-61e5-474b-9ee0-6a5386a0d52f | \n",
+ " 0 | \n",
+ " VectorData | \n",
+ " | \n",
+ " | \n",
+ " 1 | \n",
+ " Ursus arctos horribilis | \n",
+ " 1 | \n",
+ " NCBI_TAXON:116960 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 04090278-1ff0-40b5-be97-480d03eef0e8 0 \\\n",
+ "1 04090278-1ff0-40b5-be97-480d03eef0e8 0 \n",
+ "\n",
+ " object_id files_idx object_type relative_path \n",
+ "0 ae1e3e49-61e5-474b-9ee0-6a5386a0d52f 0 VectorData \\\n",
+ "1 ae1e3e49-61e5-474b-9ee0-6a5386a0d52f 0 VectorData \n",
+ "\n",
+ " field keys_idx key entities_idx entity_id \n",
+ "0 0 Homo sapiens 0 NCBI_TAXON:9606 \\\n",
+ "1 1 Ursus arctos horribilis 1 NCBI_TAXON:116960 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... \n",
+ "1 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... "
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f1431cfd",
+ "metadata": {},
+ "source": [
+ "## Auto-add to ER with Termset (In Development)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b2fe1a05",
+ "metadata": {},
+ "source": [
+ "In order to take advantage of auto-adding references to `ExternalResources` the data needs to first be validated and then be added to the `NWBFile`. This functionality has limited use cases, but will be expanded on in the future to support auto-add to `ExternalResources` for other NWB data-types. Currently, only `DynamicTable` is supported.\n",
+ "\n",
+ "This requires validation and the use of a `TermSet`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "19f9cc43",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "terms = TermSet(name='Species_TermSet', term_schema_path='/Users/mavaylon/Research/NWB/species_term_set.yaml')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "9a37d754",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "session_start_time = datetime(2018, 4, 25, 2, 30, 3, tzinfo=tz.gettz(\"US/Pacific\"))\n",
+ "\n",
+ "nwbfile = NWBFile(\n",
+ " session_description=\"Mouse exploring an open field\", # required\n",
+ " identifier=\"Mouse5_Day3\", # required\n",
+ " session_start_time=session_start_time, # required\n",
+ " session_id=\"session_1234\", # optional\n",
+ " experimenter=[\"Dichter, Benjamin K.\", \"Smith, Alex\"], # optional\n",
+ " lab=\"My Lab Name\", # optional\n",
+ " institution=\"University of My Institution\", # optional\n",
+ " related_publications=\"DOI:10.1016/j.neuron.2016.12.011\", # optional\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "f13989a5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf/src/hdmf/container.py:205: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ }
+ ],
+ "source": [
+ "er = ExternalResources() \n",
+ "nwbfile.external_resources=er"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "a5a293c8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col1 = VectorData(\n",
+ " name='Species_1',\n",
+ " description='...',\n",
+ " data=['Homo sapiens'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "col2 = VectorData(\n",
+ " name='Species_2',\n",
+ " description='...',\n",
+ " data=['Mus musculus'],\n",
+ " term_set=terms,\n",
+ ")\n",
+ "\n",
+ "species = DynamicTable(name='species', description='My species', columns=[col1,col2],)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "549f2698",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nwbfile.add_acquisition(species)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bdb9e1da",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/notebooks/NERD_TermSet_Quickstart.ipynb b/docs/notebooks/NERD_TermSet_Quickstart.ipynb
new file mode 100644
index 000000000..567f3aa21
--- /dev/null
+++ b/docs/notebooks/NERD_TermSet_Quickstart.ipynb
@@ -0,0 +1,855 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "9258314f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from hdmf.common import DynamicTable, VectorData\n",
+ "from hdmf.term_set import TermSet\n",
+ "\n",
+ "from pynwb.resources import ExternalResources\n",
+ "from pynwb import NWBFile, NWBHDF5IO\n",
+ "from pynwb import get_type_map as tm\n",
+ "from pynwb.file import Subject\n",
+ "\n",
+ "from datetime import datetime\n",
+ "from dateutil import tz\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8ce25df5",
+ "metadata": {},
+ "source": [
+ "## Dev Days Note:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a83d43e7",
+ "metadata": {},
+ "source": [
+ "To run this notebook please download the nwb files under the \"DynamicTermset and ExternalResources\" project in the \"Materials\" subsection marked \"NWB Files for Tutorials\"."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "81a2a82f",
+ "metadata": {},
+ "source": [
+ "# NERD and TermSet QuickStart"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0b3bc6da",
+ "metadata": {},
+ "source": [
+ "The NWB External Resources Data (NERD) data structure supports annotation of NWB data files by linking terms used in the data to external resources, such as ontologies, brain atlases, and persistent digital identifiers. NERD files are external to NWB files, enabling annotation of both new and existing data without requiring modification of existing data. \n",
+ "\n",
+ "This tutorial focuses on getting users quickly into the fold on how to use the `NERD` data structure in conjunction with the `TermSet` class. For a detailed guide that covers even more examples, please refer to the [NERD guide](NERD_TermSet_How_to_Guide.ipynb).\n",
+ "\n"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "id": "5fdd2101",
+ "metadata": {},
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bb99dc70",
+ "metadata": {},
+ "source": [
+ "## NERD Example"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4e7ab685",
+ "metadata": {},
+ "source": [
+ "In the following example, we will highlight the fact `NERD` is written separately to the `NWBFile`. This is to allow users to add metadata references to existing files. Loading in the file, we can see multiple cases where contextual metadata will be important in regards to creating and sharing FAIR data. We can map the experimenter to a digital identifier (e.g., ORCID), the electrode group location can be mapped to a brain atlas, and the `Subject` species attribute can be mapped to the NCBI Taxonomy.\n",
+ "\n",
+ "Checkout the following links to explore [ExternalResources](https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/common/resources.py) and [NWBFile](https://github.com/NeurodataWithoutBorders/pynwb/blob/dev/src/pynwb/file.py)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "72938867",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " session_description: Data from monkey Haydn performing ready-set-go time interval reproduction task. This file contains continuous segments of the full session on 2016-12-11 that can be used for training models for the Neural Latents Benchmark.
identifier: 8969f328-3929-11ec-8077-43176b153428
session_start_time: 2016-12-11 00:00:00-05:00
timestamps_reference_time: 2016-12-11 00:00:00-05:00
file_create_date
2021-10-29 22:31:50.351047-04:00
experimenter: ('Hansem Sohn',)
related_publications: ('http://dx.doi.org/10.1016/j.neuron.2019.06.012',)
keywords
epoch_tags: set()
electrodes
description: metadata about extracellular electrodes
id
colnames: ('x', 'y', 'z', 'imp', 'location', 'filtering', 'group', 'group_name')
columns: (, , , , , , , )
electrode_groups (3)
electrode_group_1
description: Electrodes on a neural probe
location: Dorsomedial frontal cortex
device
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
electrode_group_2
description: Electrodes on a neural probe
location: Dorsomedial frontal cortex
device
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
electrode_group_3
description: Electrodes on a neural probe
location: Dorsomedial frontal cortex
device
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
devices (3)
electrode_probe_1
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
electrode_probe_2
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
electrode_probe_3
description: Linear probe with 24 recording channels
manufacturer: Plexon Inc.
intervals (1)
trials
description: experimental trials
id
colnames: ('start_time', 'stop_time', 'fix_on_time', 'fix_time', 'target_on_time', 'ready_time', 'set_time', 'go_time', 'target_acq_time', 'reward_time', 'bad_time', 'is_short', 'is_eye', 'theta', 'ts', 'tp', 'fix_time_dur', 'target_time_dur', 'iti', 'reward_dur', 'is_outlier', 'split')
columns: (, , , , , , , , , , , , , , , , , , , , , )
subject
age: P4Y
sex: M
species: Macaca mulatta
subject_id: Haydn
trials
description: experimental trials
id
colnames: ('start_time', 'stop_time', 'fix_on_time', 'fix_time', 'target_on_time', 'ready_time', 'set_time', 'go_time', 'target_acq_time', 'reward_time', 'bad_time', 'is_short', 'is_eye', 'theta', 'ts', 'tp', 'fix_time_dur', 'target_time_dur', 'iti', 'reward_dur', 'is_outlier', 'split')
columns: (, , , , , , , , , , , , , , , , , , , , , )
units
description: data on spiking units
id
colnames: ('heldout', 'spike_times', 'obs_intervals')
columns: (, , , , )
waveform_unit: volts
experiment_description: Cognitive timing task in which subject attempts to reproduce interval between two cues
session_id: 20161211
lab: Jazayeri
institution: Massachusetts Institute of Technology
"
+ ],
+ "text/plain": [
+ "root pynwb.file.NWBFile at 0x4892684976\n",
+ "Fields:\n",
+ " devices: {\n",
+ " electrode_probe_1 ,\n",
+ " electrode_probe_2 ,\n",
+ " electrode_probe_3 \n",
+ " }\n",
+ " electrode_groups: {\n",
+ " electrode_group_1 ,\n",
+ " electrode_group_2 ,\n",
+ " electrode_group_3 \n",
+ " }\n",
+ " electrodes: electrodes \n",
+ " experiment_description: Cognitive timing task in which subject attempts to reproduce interval between two cues\n",
+ " experimenter: ['Hansem Sohn']\n",
+ " file_create_date: [datetime.datetime(2021, 10, 29, 22, 31, 50, 351047, tzinfo=tzoffset(None, -14400))]\n",
+ " identifier: 8969f328-3929-11ec-8077-43176b153428\n",
+ " institution: Massachusetts Institute of Technology\n",
+ " intervals: {\n",
+ " trials \n",
+ " }\n",
+ " lab: Jazayeri\n",
+ " related_publications: ['http://dx.doi.org/10.1016/j.neuron.2019.06.012']\n",
+ " session_description: Data from monkey Haydn performing ready-set-go time interval reproduction task. This file contains continuous segments of the full session on 2016-12-11 that can be used for training models for the Neural Latents Benchmark.\n",
+ " session_id: 20161211\n",
+ " session_start_time: 2016-12-11 00:00:00-05:00\n",
+ " subject: subject pynwb.file.Subject at 0x4892800432\n",
+ "Fields:\n",
+ " age: P4Y\n",
+ " sex: M\n",
+ " species: Macaca mulatta\n",
+ " subject_id: Haydn\n",
+ "\n",
+ " timestamps_reference_time: 2016-12-11 00:00:00-05:00\n",
+ " trials: trials \n",
+ " units: units "
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "with NWBHDF5IO(\"sub-Haydn_desc-train_ecephys.nwb\", \"r\") as io:\n",
+ " read_nwbfile = io.read()\n",
+ "read_nwbfile"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2943062e",
+ "metadata": {},
+ "source": [
+ "First we are going to link the ExternalResources class to the file we want to annotate to ensure we can track correctly the location of all the data objects that contain terms we want to describe via external references. This can also be accomplished by setting the `file` field as we will see later on."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "90ca25df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:210: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ }
+ ],
+ "source": [
+ "er = ExternalResources() \n",
+ "read_nwbfile.link_resources(er)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2bb65cee",
+ "metadata": {},
+ "source": [
+ "To create the metadata linkage from the experimenter, electrode group location, and subject species to their respective external references, the user can use the `add_ref` method from `ExternalResources`. The user provides:\n",
+ "1. `file`: This is an optional parameter if the `ExternalResources` instance has been linked to a file. When linked, the file will be used automatically.\n",
+ "2. `container`: This is the NWB Object that is either being linked or the object that stores the attribute being linked. In the case of \"experimenter\", the container is the object that stores the experimenter attriubte.\n",
+ "3. `attribute`: This is an optional field. It is set when the reference is being added for the \"attribute\" of a NWB object. \n",
+ "4. `key`: This is the data defined term to represent the reference. For example, the `NWBFile` we loaded in as the value of experimenter to be \"Hansem Sohn\", which would be the value for key.\n",
+ "5. `entity_id`: This is the ID for the resource the user wants to use. \n",
+ "6. `entity_uri`: This is the URI for the resource the user wants to use. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "820c117d",
+ "metadata": {},
+ "source": [
+ "### NWBFile Experimenter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "4eca7dad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.add_ref(\n",
+ " container=read_nwbfile,\n",
+ " attribute=\"experimenter\",\n",
+ " key=\"Hansem Sohn\",\n",
+ " entity_id='ORCID:0000-0001-8593-7473', \n",
+ " entity_uri='https://orcid.org/0000-0001-8593-7473')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8d0d6bc9",
+ "metadata": {},
+ "source": [
+ "### ElectrodeGroup Location"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "df773b9a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.add_ref(\n",
+ " container=read_nwbfile.electrode_groups['electrode_group_1'],\n",
+ " attribute=\"location\",\n",
+ " key=\"Dorsomedial frontal cortex\",\n",
+ " entity_id=\"DB09\", \n",
+ " entity_uri=\"https://scalablebrainatlas.incf.org/macaque/DB09\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0d2ad54d",
+ "metadata": {},
+ "source": [
+ "### Subject Species"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "46f3b92b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.add_ref(\n",
+ " container=read_nwbfile.subject,\n",
+ " attribute='species',\n",
+ " key='Macaca mulatta',\n",
+ " entity_id='NCBI_TAXON:9544',\n",
+ " entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/id=9544')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0cc2760e",
+ "metadata": {},
+ "source": [
+ "We can see that the linked `ExternalResources` instance has been populated."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "5d4f4626",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " keys
files
entities
objects
object_keys
entity_keys
"
+ ],
+ "text/plain": [
+ "external_resources pynwb.resources.ExternalResources at 0x4892961776\n",
+ "Fields:\n",
+ " entities: entities \n",
+ " entity_keys: entity_keys \n",
+ " files: files \n",
+ " keys: keys \n",
+ " object_keys: object_keys \n",
+ " objects: objects "
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "read_nwbfile.get_linked_resources()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0389bdab",
+ "metadata": {},
+ "source": [
+ "We can visualize `ExternalResources` as a single table:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "c5a697a9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 0 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 0 | \n",
+ " Hansem Sohn | \n",
+ " 0 | \n",
+ " ORCID:0000-0001-8593-7473 | \n",
+ " https://orcid.org/0000-0001-8593-7473 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 1 | \n",
+ " f8641805-f93c-446f-8194-5fce08d22dbb | \n",
+ " 0 | \n",
+ " ElectrodeGroup | \n",
+ " location | \n",
+ " | \n",
+ " 1 | \n",
+ " Dorsomedial frontal cortex | \n",
+ " 1 | \n",
+ " DB09 | \n",
+ " https://scalablebrainatlas.incf.org/macaque/DB09 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 9c3a5c45-316c-493d-a712-03a01b662ee9 | \n",
+ " 2 | \n",
+ " 5ee39486-8625-4ac3-9691-ce9d724812a4 | \n",
+ " 0 | \n",
+ " Subject | \n",
+ " species | \n",
+ " | \n",
+ " 2 | \n",
+ " Macaca mulatta | \n",
+ " 2 | \n",
+ " NCBI_TAXON:9544 | \n",
+ " https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9 0 \\\n",
+ "1 9c3a5c45-316c-493d-a712-03a01b662ee9 1 \n",
+ "2 9c3a5c45-316c-493d-a712-03a01b662ee9 2 \n",
+ "\n",
+ " object_id files_idx object_type \n",
+ "0 9c3a5c45-316c-493d-a712-03a01b662ee9 0 NWBFile \\\n",
+ "1 f8641805-f93c-446f-8194-5fce08d22dbb 0 ElectrodeGroup \n",
+ "2 5ee39486-8625-4ac3-9691-ce9d724812a4 0 Subject \n",
+ "\n",
+ " relative_path field keys_idx key \n",
+ "0 general/experimenter 0 Hansem Sohn \\\n",
+ "1 location 1 Dorsomedial frontal cortex \n",
+ "2 species 2 Macaca mulatta \n",
+ "\n",
+ " entities_idx entity_id \n",
+ "0 0 ORCID:0000-0001-8593-7473 \\\n",
+ "1 1 DB09 \n",
+ "2 2 NCBI_TAXON:9544 \n",
+ "\n",
+ " entity_uri \n",
+ "0 https://orcid.org/0000-0001-8593-7473 \n",
+ "1 https://scalablebrainatlas.incf.org/macaque/DB09 \n",
+ "2 https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/... "
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df=read_nwbfile.get_linked_resources().to_dataframe()\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7ea19c25",
+ "metadata": {},
+ "source": [
+ "As mentioned prior, `NERD` and the `NWBFile` are written separately."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d84e67a7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with NWBHDF5IO(\"NWBfile_ER_Example.nwb\", \"w\") as io:\n",
+ " io.write(nwbfile)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fb53f9b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "er.to_norm_tsv(path='./')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "00dcc877",
+ "metadata": {},
+ "source": [
+ "To see the various query methods and the explicit set of rules within `NERD`, please refer to the [NERD guide](NERD_TermSet_How_to_Guide.ipynb)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2502229f",
+ "metadata": {},
+ "source": [
+ "## NERD Example with TermSet"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ad6c8992",
+ "metadata": {},
+ "source": [
+ "`TermSet` allows users to create their own subset of terms with ontological references and is built upon the resources from LinkML.\n",
+ "\n",
+ "Use Cases:\n",
+ "1. Validation of data. Currently, validation with a `TermSet` is only supported for `Data`, but we are discussing ways to expand this to any attribute, e.g., experimenters. \n",
+ "2. `TermSet` streamlines the user experience for adding new references to `ExternalResources` using `add_ref_term_set`.\n",
+ "\n",
+ "In order to see how to create a [TermSet](https://github.com/hdmf-dev/hdmf/blob/dev/src/hdmf/term_set.py), refer to the [NERD guide](NERD_TermSet_How_to_Guide.ipynb) and also these relevant [LinkML resources](https://linkml.io/linkml/intro/tutorial06.html)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5b432ac0",
+ "metadata": {},
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "57c2c342",
+ "metadata": {},
+ "source": [
+ "In this example, we will create a brand new `NWBFile` that stores a `DynamicTable` of species data. We create a new column, i.e a new instance of `VectorData`, that uses the optional `term_set` field. When provided a `TermSet`, the data will be validated according to that set of terms.\n",
+ "\n",
+ "For more details on how we handle validation with a `TermSet` please refer to the [NERD guide](NERD_TermSet_How_to_Guide.ipynb)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "159090bc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/mavaylon/Research/NWB/hdmf2/hdmf/src/hdmf/container.py:210: UserWarning: ExternalResources is experimental -- it may be removed in the future and is not guaranteed to maintain backward compatibility\n",
+ " warn(_exp_warn_msg(cls))\n"
+ ]
+ }
+ ],
+ "source": [
+ "terms = TermSet(term_schema_path='./experimenter_term_set.yaml')\n",
+ "er = ExternalResources() "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "e1c5eb2a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "session_start_time = datetime(2018, 4, 25, 2, 30, 3, tzinfo=tz.gettz(\"US/Pacific\"))\n",
+ "\n",
+ "nwbfile = NWBFile(\n",
+ " session_description=\"Mouse exploring an open field\", \n",
+ " identifier=\"Mouse5_Day3\", \n",
+ " session_start_time=session_start_time, \n",
+ " session_id=\"session_1234\", \n",
+ " experimenter=[\"Dichter, Benjamin K.\", \"Rubel, Oliver\"], \n",
+ " lab=\"My Lab Name\", \n",
+ " institution=\"University of My Institution\", \n",
+ " related_publications=\"DOI:10.1016/j.neuron.2016.12.011\", \n",
+ ")\n",
+ "nwbfile.subject = Subject(\n",
+ " subject_id=\"001\",\n",
+ " age=\"P90D\",\n",
+ " description=\"mouse 5\",\n",
+ " species=\"Mus musculus\",\n",
+ " sex=\"M\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "f3f3a50f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nwbfile.get_linked_resources()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "078157dc",
+ "metadata": {},
+ "source": [
+ "As mentioned prior, the `add_ref_term_set` method streamlines the original `add_ref` method. The `key` field is removed as the data values themselves will be used as keys, the `entity_id` and `entity_uri` fields will be populated from the values within the `TermSet`. If the user linked the `NWBFile` to the `ExternalResources` instance as in the prior example, then it is further streamlined, requiring only the `container` and possibly an `attribute`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "098db4c3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.add_ref_term_set(container=nwbfile,\n",
+ " attribute='experimenter',\n",
+ " term_set=terms\n",
+ " ) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "e757eae8",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " file_object_id | \n",
+ " objects_idx | \n",
+ " object_id | \n",
+ " files_idx | \n",
+ " object_type | \n",
+ " relative_path | \n",
+ " field | \n",
+ " keys_idx | \n",
+ " key | \n",
+ " entities_idx | \n",
+ " entity_id | \n",
+ " entity_uri | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 7cc452e3-925d-489d-9d64-01c6b227a906 | \n",
+ " 0 | \n",
+ " 7cc452e3-925d-489d-9d64-01c6b227a906 | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 0 | \n",
+ " Dichter, Benjamin K. | \n",
+ " 0 | \n",
+ " ORCID:0000-0001-5725-6910 | \n",
+ " https://orcid.org/0000-0001-5725-6910 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 7cc452e3-925d-489d-9d64-01c6b227a906 | \n",
+ " 0 | \n",
+ " 7cc452e3-925d-489d-9d64-01c6b227a906 | \n",
+ " 0 | \n",
+ " NWBFile | \n",
+ " general/experimenter | \n",
+ " | \n",
+ " 1 | \n",
+ " Rubel, Oliver | \n",
+ " 1 | \n",
+ " ORCID:0000-0001-9902-1984 | \n",
+ " https://orcid.org/0000-0001-9902-1984 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " file_object_id objects_idx \n",
+ "0 7cc452e3-925d-489d-9d64-01c6b227a906 0 \\\n",
+ "1 7cc452e3-925d-489d-9d64-01c6b227a906 0 \n",
+ "\n",
+ " object_id files_idx object_type \n",
+ "0 7cc452e3-925d-489d-9d64-01c6b227a906 0 NWBFile \\\n",
+ "1 7cc452e3-925d-489d-9d64-01c6b227a906 0 NWBFile \n",
+ "\n",
+ " relative_path field keys_idx key entities_idx \n",
+ "0 general/experimenter 0 Dichter, Benjamin K. 0 \\\n",
+ "1 general/experimenter 1 Rubel, Oliver 1 \n",
+ "\n",
+ " entity_id entity_uri \n",
+ "0 ORCID:0000-0001-5725-6910 https://orcid.org/0000-0001-5725-6910 \n",
+ "1 ORCID:0000-0001-9902-1984 https://orcid.org/0000-0001-9902-1984 "
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "er.to_dataframe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b475951b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "schema_path = 'tests/unit/example_dynamic_term_set.yaml'\n",
+ "termset = TermSet(term_schema_path=schema_path, dynamic=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "215c94ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "termset = TermSet(schemasheets_folder=folder)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/notebooks/er_img.png b/docs/notebooks/er_img.png
new file mode 100644
index 000000000..a35725fae
Binary files /dev/null and b/docs/notebooks/er_img.png differ
diff --git a/docs/notebooks/experimenter_term_set.yaml b/docs/notebooks/experimenter_term_set.yaml
new file mode 100644
index 000000000..24285f926
--- /dev/null
+++ b/docs/notebooks/experimenter_term_set.yaml
@@ -0,0 +1,17 @@
+id: notebooks/species_example
+name: Experimenter
+prefixes:
+ ORCID: https://orcid.org/
+imports:
+ - linkml:types
+default_range: string
+
+enums:
+ Species:
+ permissible_values:
+ Dichter, Benjamin K.:
+ description: The ORCiD
+ meaning: ORCID:0000-0001-5725-6910
+ Rubel, Oliver:
+ description: The ORCiD
+ meaning: ORCID:0000-0001-9902-1984
\ No newline at end of file
diff --git a/docs/notebooks/experimenters_schema.png b/docs/notebooks/experimenters_schema.png
new file mode 100644
index 000000000..7fbc233cd
Binary files /dev/null and b/docs/notebooks/experimenters_schema.png differ
diff --git a/docs/notebooks/species_schema.png b/docs/notebooks/species_schema.png
new file mode 100644
index 000000000..ee30dfea9
Binary files /dev/null and b/docs/notebooks/species_schema.png differ
diff --git a/docs/notebooks/species_term_set.yaml b/docs/notebooks/species_term_set.yaml
new file mode 100644
index 000000000..fdf53f169
--- /dev/null
+++ b/docs/notebooks/species_term_set.yaml
@@ -0,0 +1,24 @@
+id: noteeooks/species_example
+name: Species
+prefixes:
+ NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=
+ Ensemble: https://rest.ensembl.org/taxonomy/id/
+imports:
+ - linkml:types
+default_range: string
+
+enums:
+ Species:
+ permissible_values:
+ Homo sapiens:
+ description: Humans
+ meaning: NCBI_TAXON:9606
+ Mus musculus:
+ description: Mouse
+ meaning: Ensemble:10090
+ Ursus arctos horribilis:
+ description: Brown Bear
+ meaning: NCBI_TAXON:116960
+ Myrmecophaga tridactyla:
+ description: Ant Eater
+ meaning: NCBI_TAXON:71006
\ No newline at end of file
diff --git a/src/pynwb/file.py b/src/pynwb/file.py
index b03141b7f..b473e571a 100644
--- a/src/pynwb/file.py
+++ b/src/pynwb/file.py
@@ -8,7 +8,7 @@
import pandas as pd
from hdmf.common import DynamicTableRegion, DynamicTable
-from hdmf.container import ExternalResourcesManager
+from hdmf.container import HERDManager
from hdmf.utils import docval, getargs, get_docval, popargs, popargs_to_dict, AllowPositional
from . import register_class, CORE_NAMESPACE
@@ -150,7 +150,7 @@ def __init__(self, **kwargs):
@register_class('NWBFile', CORE_NAMESPACE)
-class NWBFile(MultiContainerInterface, ExternalResourcesManager):
+class NWBFile(MultiContainerInterface, HERDManager):
"""
A representation of an NWB file.
"""
diff --git a/src/pynwb/resources.py b/src/pynwb/resources.py
index bfb8a6ba2..c89eea23d 100644
--- a/src/pynwb/resources.py
+++ b/src/pynwb/resources.py
@@ -1,9 +1,9 @@
-from hdmf.common import ExternalResources as hdmf_ExternalResources
+from hdmf.common import HERD as hdmf_ExternalResources
from . import get_type_map as tm
from hdmf.utils import docval, get_docval
-class ExternalResources(hdmf_ExternalResources):
+class HERD(hdmf_ExternalResources):
@docval(*get_docval(hdmf_ExternalResources.__init__))
def __init__(self, **kwargs):
kwargs['type_map'] = tm()
diff --git a/tests/unit/test_resources.py b/tests/unit/test_resources.py
index cfb598b7b..c604f9efa 100644
--- a/tests/unit/test_resources.py
+++ b/tests/unit/test_resources.py
@@ -1,4 +1,4 @@
-from pynwb.resources import ExternalResources
+from pynwb.resources import HERD
from pynwb.testing import TestCase
@@ -7,5 +7,5 @@ def test_constructor(self):
"""
Test constructor
"""
- er = ExternalResources()
+ er = HERD()
self.assertIsInstance(er, ExternalResources)