unity-sds
diff --git a/‎jupyter-notebooks/tutorials/2_working_with_data.ipynb
Lines changed: 177 additions & 18 deletions b/‎jupyter-notebooks/tutorials/2_working_with_data.ipynb
Lines changed: 177 additions & 18 deletions
@@ -191,22 +191,24 @@
     "url = env['url'] + \"am-uds-dapa/collections/\"+data_set+\"/items\"\n",
     "\n",
     "params = []\n",
-    "params.append((\"limit\", 20))\n",
-    "params.append((\"datetime\", \"1900-01-01T00:00:00Z/2022-11-01T02:31:12Z\")) # working through a bug that requires a time to be included.\n",
+    "#params.append((\"limit\", 20))\n",
     "\n",
     "response = requests.get(url, headers={\"Authorization\": \"Bearer \" + token}, params=params)\n",
     "\n",
+    "print(f\"Endpoint: \"+url)\n",
+    "print(f\"Total number of files: {response.json()['numberMatched']}\")\n",
     "print(\"File IDs, titles, and hrefs in Collection \" + data_set + \"\\n\")\n",
     "\n",
     "features = response.json()['features']\n",
     "\n",
     "for data_file in features: {\n",
-    "   print(data_file['id']),\n",
-    "   print(data_file['assets']['metadata__data']['title']),\n",
-    "   print(data_file['assets']['metadata__data']['href']),\n",
+    "   print(\"For \"+ data_file['id']),\n",
+    "   print(\"File:\\t\\t\"+data_file['assets']['data']['href']),\n",
+    "   print(\"Metadata:\\t\"+data_file['assets']['metadata__data']['href']),\n",
     "   print(\"\")\n",
     "}\n",
     "\n",
+    "\n",
     "print(\"Full JSON response object:\")\n",
     "JSON(response.json())\n"
    ]
@@ -240,24 +242,132 @@
     "params.append((\"datetime\", \"2000-11-01T00:00:00Z/2022-11-01T02:31:12Z\"))\n",
     "\n",
     "# limit - how many results to return in a single request\n",
-    "params.append((\"limit\", 100))\n",
-    "\n",
-    "#offset - the number of results to offset, for paging\n",
-    "#params.append((\"offset\", 100))\n",
+    "#params.append((\"limit\", 10))\n",
     "\n",
     "response = requests.get(url, headers={\"Authorization\": \"Bearer \" + token}, params=params)\n",
     "\n",
+    "print(f\"Total number of files: {response.json()['numberMatched']}\")\n",
     "print(\"File IDs, datetimes, and hrefs in Collection \" + data_set + \"\\n\")\n",
     "\n",
     "features = response.json()['features']\n",
-    "\n",
-    "for data_file in features: {\n",
-    "   print(data_file['id']),\n",
-    "   print(data_file['properties']['created']),\n",
-    "   print(data_file['assets']['metadata__data']['href']),\n",
-    "   print(\"\")\n",
+    "while len(features) > 0:\n",
+    "    for data_file in features: {\n",
+    "       print(data_file['id']),\n",
+    "       print(data_file['properties']['created']),\n",
+    "       print(data_file['assets']['metadata__data']['href']),\n",
+    "       print(data_file['assets']['data']['href']),\n",
+    "       print(\"\")\n",
+    "    }\n",
+    "    # Get the next page of results\n",
+    "    response = requests.get(next(item for item in response.json()['links'] if item['rel'] == 'next')['href'], headers={\"Authorization\": \"Bearer \" + token}, params=params)\n",
+    "    features = response.json()['features']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2aba320-4079-4fa0-943e-d97391c2ce8c",
+   "metadata": {},
+   "source": [
+    "## Create a new Collection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acd23209-573e-4b72-9f5a-5f7b7f6be26d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "collection_id = \"NEW_COLLECTION_EXAMPLE_L1B___5\"\n",
+    "collection = {\n",
+    "  \"type\": \"Collection\",\n",
+    "  \"id\": collection_id,\n",
+    "  \"stac_version\": \"1.0.0\",\n",
+    "  \"description\": \"TODO\",\n",
+    "  \"links\": [\n",
+    "    {\n",
+    "      \"rel\": \"root\",\n",
+    "      \"href\": \"./collection.json?bucket=unknown_bucket&regex=%7BcmrMetadata.Granule.Collection.ShortName%7D___%7BcmrMetadata.Granule.Collection.VersionId%7D\",\n",
+    "      \"type\": \"application/json\",\n",
+    "      \"title\": \"test_file01.nc\"\n",
+    "    },\n",
+    "    {\n",
+    "      \"rel\": \"item\",\n",
+    "      \"href\": \"./collection.json?bucket=protected&regex=%5Etest_file.%2A%5C.nc%24\",\n",
+    "      \"type\": \"data\",\n",
+    "      \"title\": \"test_file01.nc\"\n",
+    "    },\n",
+    "    {\n",
+    "      \"rel\": \"item\",\n",
+    "      \"href\": \"./collection.json?bucket=protected&regex=%5Etest_file.%2A%5C.nc%5C.cas%24\",\n",
+    "      \"type\": \"metadata\",\n",
+    "      \"title\": \"test_file01.nc.cas\"\n",
+    "    },\n",
+    "    {\n",
+    "      \"rel\": \"item\",\n",
+    "      \"href\": \"./collection.json?bucket=private&regex=%5Etest_file.%2A%5C.cmr%5C.xml%24\",\n",
+    "      \"type\": \"metadata\",\n",
+    "      \"title\": \"test_file01.cmr.xml\"\n",
+    "    }\n",
+    "  ],\n",
+    "  \"stac_extensions\": [],\n",
+    "  \"extent\": {\n",
+    "    \"spatial\": {\n",
+    "      \"bbox\": [\n",
+    "        [\n",
+    "          -180,\n",
+    "          -90,\n",
+    "          180,\n",
+    "          90\n",
+    "        ]\n",
+    "      ]\n",
+    "    },\n",
+    "    \"temporal\": {\n",
+    "      \"interval\": [\n",
+    "        [\n",
+    "          \"2022-10-04T00:00:00.000000Z\",\n",
+    "          \"2022-10-04T23:59:59.999999Z\"\n",
+    "        ]\n",
+    "      ]\n",
+    "    }\n",
+    "  },\n",
+    "  \"license\": \"proprietary\",\n",
+    "  \"summaries\": {\n",
+    "    \"granuleId\": [\n",
+    "      \"^test_file.*$\"\n",
+    "    ],\n",
+    "    \"granuleIdExtraction\": [\n",
+    "      \"(^test_file.*)(\\\\.nc|\\\\.nc\\\\.cas|\\\\.cmr\\\\.xml)\"\n",
+    "    ],\n",
+    "    \"process\": [\n",
+    "      \"snpp.level1\"\n",
+    "    ]\n",
+    "  }\n",
     "}\n",
     "\n",
+    "url = env['url'] + \"am-uds-dapa/collections\"\n",
+    "response = requests.post(url, headers={\"Authorization\": \"Bearer \" + token}, json=collection)\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c48cc2e9-4d2a-4056-bf1a-59a4365960a4",
+   "metadata": {},
+   "source": [
+    "## Get newly created Collection\n",
+    "\n",
+    "The collection creation may take a minute, so if the new collection isn't returned immediately, please retry."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = env['url'] + \"am-uds-dapa/collections/\" + collection_id\n",
+    "response = requests.get(url, headers={\"Authorization\": \"Bearer \" + token})\n",
     "print(\"Full JSON response object:\")\n",
     "JSON(response.json())"
    ]
@@ -267,16 +377,34 @@
    "id": "7719daba",
    "metadata": {},
    "source": [
-    "## Explore on your own (COMING SOON)\n",
+    "## Explore on your own\n",
+    "\n",
+    "Given the endpoints above for finding collections and then finding data within those collections, try to craft a query by copying cells to find data from one of the other collections in the list above.\n",
     "\n",
-    "Given the endpoints above for finding collections and then finding data within those collections, try to craft a query by copying cells to find data from one of the other collections in the list above. Some things to try\n",
+    "Some things to try:\n",
     "\n",
     "* Find data in the Unity system for the L0_SNPP_EphAtt product type\n",
     "* Find data in the Unity system for the L1 SounderSIPS \n",
     "* Filter the collections above on a numer\n",
     "\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "d79e544a",
+   "metadata": {},
+   "source": [
+    "## Credential-less data download\n",
+    "\n",
+    "When accessing data stores within the same venue, you'll be able to download data from S3 without credentials. \n",
+    "\n",
+    "**Note**, the following libraries are needed for this, and the below command can be run in a jupyter-terminal to install them:\n",
+    "\n",
+    "```\n",
+    "conda install xarray netcdf4 hdf5 boto3 matplotlib\n",
+    "```\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -295,8 +423,39 @@
    "outputs": [],
    "source": [
     "s3 = boto3.client('s3')\n",
-    "s3.download_file('uds-test-cumulus-protected', 'ATMS_SCIENCE_Group___1/P1570515ATMSSCIENCEAAT16001044919900.PDS', 'P1570515ATMSSCIENCEAAT16001044919900.PDS')"
+    "s3.download_file('uds-test-cumulus-protected', 'SNDR_SNPP_ATMS_L1A___1/SNDR.SNPP.ATMS.L1A.nominal2.04.nc', 'test_file11.nc')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c09f8626",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xarray as xr\n",
+    "ds = xr.open_dataset('test_file11.nc')\n",
+    "ds"
+
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fba4dff8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds.band_surf_alt.plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9386a4f1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {