Skip to content

Commit e4e5396

Browse files
committed
Remove HugeCTR examples
1 parent 92dc8cb commit e4e5396

File tree

5 files changed

+44
-1181
lines changed

5 files changed

+44
-1181
lines changed

examples/scaling-criteo/01-Download-Convert.ipynb

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": null,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -33,7 +33,7 @@
3333
"\n",
3434
"# Scaling Criteo: Download and Convert\n",
3535
"\n",
36-
"This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags), [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags), or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container. \n",
36+
"This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container. \n",
3737
"\n",
3838
"## Criteo 1TB Click Logs dataset\n",
3939
"\n",
@@ -51,7 +51,7 @@
5151
},
5252
{
5353
"cell_type": "code",
54-
"execution_count": 2,
54+
"execution_count": null,
5555
"metadata": {},
5656
"outputs": [],
5757
"source": [
@@ -74,7 +74,7 @@
7474
},
7575
{
7676
"cell_type": "code",
77-
"execution_count": 3,
77+
"execution_count": null,
7878
"metadata": {},
7979
"outputs": [
8080
{
@@ -151,7 +151,7 @@
151151
},
152152
{
153153
"cell_type": "code",
154-
"execution_count": 4,
154+
"execution_count": null,
155155
"metadata": {},
156156
"outputs": [],
157157
"source": [
@@ -177,7 +177,7 @@
177177
},
178178
{
179179
"cell_type": "code",
180-
"execution_count": 5,
180+
"execution_count": null,
181181
"metadata": {},
182182
"outputs": [],
183183
"source": [
@@ -196,7 +196,7 @@
196196
},
197197
{
198198
"cell_type": "code",
199-
"execution_count": 6,
199+
"execution_count": null,
200200
"metadata": {},
201201
"outputs": [
202202
{
@@ -227,7 +227,7 @@
227227
},
228228
{
229229
"cell_type": "code",
230-
"execution_count": 7,
230+
"execution_count": null,
231231
"metadata": {},
232232
"outputs": [
233233
{
@@ -277,7 +277,7 @@
277277
},
278278
{
279279
"cell_type": "code",
280-
"execution_count": 8,
280+
"execution_count": null,
281281
"metadata": {},
282282
"outputs": [],
283283
"source": [
@@ -297,28 +297,9 @@
297297
],
298298
"metadata": {
299299
"kernelspec": {
300-
"display_name": "Python 3 (ipykernel)",
300+
"display_name": "python3",
301301
"language": "python",
302302
"name": "python3"
303-
},
304-
"language_info": {
305-
"codemirror_mode": {
306-
"name": "ipython",
307-
"version": 3
308-
},
309-
"file_extension": ".py",
310-
"mimetype": "text/x-python",
311-
"name": "python",
312-
"nbconvert_exporter": "python",
313-
"pygments_lexer": "ipython3",
314-
"version": "3.8.10"
315-
},
316-
"merlin": {
317-
"containers": [
318-
"nvcr.io/nvidia/merlin/merlin-hugectr:latest",
319-
"nvcr.io/nvidia/merlin/merlin-tensorflow:latest",
320-
"nvcr.io/nvidia/merlin/merlin-pytorch:latest"
321-
]
322303
}
323304
},
324305
"nbformat": 4,

examples/scaling-criteo/02-ETL-with-NVTabular.ipynb

Lines changed: 33 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,8 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
6-
"metadata": {
7-
"jupyter": {
8-
"outputs_hidden": false
9-
}
10-
},
5+
"execution_count": null,
6+
"metadata": {},
117
"outputs": [],
128
"source": [
139
"# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n",
@@ -37,7 +33,7 @@
3733
"\n",
3834
"# Scaling Criteo: ETL with NVTabular\n",
3935
"\n",
40-
"This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags), [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags), or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container.\n",
36+
"This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container.\n",
4137
"\n",
4238
"## Overview\n",
4339
"\n",
@@ -80,12 +76,8 @@
8076
},
8177
{
8278
"cell_type": "code",
83-
"execution_count": 2,
84-
"metadata": {
85-
"jupyter": {
86-
"outputs_hidden": false
87-
}
88-
},
79+
"execution_count": null,
80+
"metadata": {},
8981
"outputs": [],
9082
"source": [
9183
"# Standard Libraries\n",
@@ -122,19 +114,14 @@
122114
},
123115
{
124116
"cell_type": "code",
125-
"execution_count": 5,
126-
"metadata": {
127-
"jupyter": {
128-
"outputs_hidden": false
129-
}
130-
},
117+
"execution_count": null,
118+
"metadata": {},
131119
"outputs": [],
132120
"source": [
133121
"# define some information about where to get our data\n",
134122
"BASE_DIR = os.environ.get(\"BASE_DIR\", \"/raid/data/criteo\")\n",
135123
"INPUT_DATA_DIR = os.environ.get(\"INPUT_DATA_DIR\", BASE_DIR + \"/converted/criteo\")\n",
136124
"OUTPUT_DATA_DIR = os.environ.get(\"OUTPUT_DATA_DIR\", BASE_DIR + \"/test_dask/output\")\n",
137-
"USE_HUGECTR = bool(os.environ.get(\"USE_HUGECTR\", \"\"))\n",
138125
"stats_path = os.path.join(OUTPUT_DATA_DIR, \"test_dask/stats\")\n",
139126
"dask_workdir = os.path.join(OUTPUT_DATA_DIR, \"test_dask/workdir\")\n",
140127
"\n",
@@ -163,7 +150,7 @@
163150
},
164151
{
165152
"cell_type": "code",
166-
"execution_count": 7,
153+
"execution_count": null,
167154
"metadata": {},
168155
"outputs": [],
169156
"source": [
@@ -179,12 +166,8 @@
179166
},
180167
{
181168
"cell_type": "code",
182-
"execution_count": 8,
183-
"metadata": {
184-
"jupyter": {
185-
"outputs_hidden": false
186-
}
187-
},
169+
"execution_count": null,
170+
"metadata": {},
188171
"outputs": [
189172
{
190173
"name": "stdout",
@@ -216,12 +199,8 @@
216199
},
217200
{
218201
"cell_type": "code",
219-
"execution_count": 9,
220-
"metadata": {
221-
"jupyter": {
222-
"outputs_hidden": false
223-
}
224-
},
202+
"execution_count": null,
203+
"metadata": {},
225204
"outputs": [
226205
{
227206
"name": "stderr",
@@ -465,7 +444,7 @@
465444
"<Client: 'tcp://127.0.0.1:44059' processes=2 threads=2, memory=100.00 GiB>"
466445
]
467446
},
468-
"execution_count": 9,
447+
"execution_count": null,
469448
"metadata": {},
470449
"output_type": "execute_result"
471450
}
@@ -537,12 +516,8 @@
537516
},
538517
{
539518
"cell_type": "code",
540-
"execution_count": 10,
541-
"metadata": {
542-
"jupyter": {
543-
"outputs_hidden": false
544-
}
545-
},
519+
"execution_count": null,
520+
"metadata": {},
546521
"outputs": [],
547522
"source": [
548523
"# define our dataset schema\n",
@@ -568,24 +543,19 @@
568543
"cell_type": "markdown",
569544
"metadata": {},
570545
"source": [
571-
"We need to enforce the required HugeCTR data types, so we set them in a dictionary and give as an argument when creating our dataset. The dictionary defines the output datatypes of our datasets."
546+
"Optionally, we can define the output datatypes of our datasets."
572547
]
573548
},
574549
{
575550
"cell_type": "code",
576-
"execution_count": 11,
577-
"metadata": {
578-
"jupyter": {
579-
"outputs_hidden": false
580-
}
581-
},
551+
"execution_count": null,
552+
"metadata": {},
582553
"outputs": [],
583554
"source": [
584555
"dict_dtypes = {}\n",
585556
"\n",
586-
"# The environment variable USE_HUGECTR defines, if we want to use the output for HugeCTR or another framework\n",
587557
"for col in CATEGORICAL_COLUMNS:\n",
588-
" dict_dtypes[col] = np.int64 if USE_HUGECTR else np.int32\n",
558+
" dict_dtypes[col] = np.int32\n",
589559
"\n",
590560
"for col in CONTINUOUS_COLUMNS:\n",
591561
" dict_dtypes[col] = np.float32\n",
@@ -603,12 +573,8 @@
603573
},
604574
{
605575
"cell_type": "code",
606-
"execution_count": 13,
607-
"metadata": {
608-
"jupyter": {
609-
"outputs_hidden": false
610-
}
611-
},
576+
"execution_count": null,
577+
"metadata": {},
612578
"outputs": [],
613579
"source": [
614580
"train_dataset = nvt.Dataset(train_paths, engine=\"parquet\", part_size=part_size)\n",
@@ -624,12 +590,8 @@
624590
},
625591
{
626592
"cell_type": "code",
627-
"execution_count": 14,
628-
"metadata": {
629-
"jupyter": {
630-
"outputs_hidden": false
631-
}
632-
},
593+
"execution_count": null,
594+
"metadata": {},
633595
"outputs": [],
634596
"source": [
635597
"output_train_dir = os.path.join(OUTPUT_DATA_DIR, \"train/\")\n",
@@ -647,7 +609,7 @@
647609
},
648610
{
649611
"cell_type": "code",
650-
"execution_count": 15,
612+
"execution_count": null,
651613
"metadata": {},
652614
"outputs": [
653615
{
@@ -661,10 +623,10 @@
661623
{
662624
"data": {
663625
"text/plain": [
664-
"<nvtabular.workflow.workflow.Workflow at 0x7fdacec4fdc0>"
626+
"<nvtabular.workflow.workflow.Workflow>"
665627
]
666628
},
667-
"execution_count": 15,
629+
"execution_count": null,
668630
"metadata": {},
669631
"output_type": "execute_result"
670632
}
@@ -676,12 +638,8 @@
676638
},
677639
{
678640
"cell_type": "code",
679-
"execution_count": 16,
680-
"metadata": {
681-
"jupyter": {
682-
"outputs_hidden": false
683-
}
684-
},
641+
"execution_count": null,
642+
"metadata": {},
685643
"outputs": [
686644
{
687645
"name": "stdout",
@@ -708,12 +666,8 @@
708666
},
709667
{
710668
"cell_type": "code",
711-
"execution_count": 17,
712-
"metadata": {
713-
"jupyter": {
714-
"outputs_hidden": false
715-
}
716-
},
669+
"execution_count": null,
670+
"metadata": {},
717671
"outputs": [
718672
{
719673
"name": "stdout",
@@ -745,7 +699,7 @@
745699
},
746700
{
747701
"cell_type": "code",
748-
"execution_count": 18,
702+
"execution_count": null,
749703
"metadata": {},
750704
"outputs": [],
751705
"source": [
@@ -754,35 +708,11 @@
754708
}
755709
],
756710
"metadata": {
757-
"file_extension": ".py",
758711
"kernelspec": {
759-
"display_name": "Python 3 (ipykernel)",
712+
"display_name": "python3",
760713
"language": "python",
761714
"name": "python3"
762-
},
763-
"language_info": {
764-
"codemirror_mode": {
765-
"name": "ipython",
766-
"version": 3
767-
},
768-
"file_extension": ".py",
769-
"mimetype": "text/x-python",
770-
"name": "python",
771-
"nbconvert_exporter": "python",
772-
"pygments_lexer": "ipython3",
773-
"version": "3.8.10"
774-
},
775-
"merlin": {
776-
"containers": [
777-
"nvcr.io/nvidia/merlin/merlin-hugectr:latest",
778-
"nvcr.io/nvidia/merlin/merlin-tensorflow:latest",
779-
"nvcr.io/nvidia/merlin/merlin-pytorch:latest"
780-
]
781-
},
782-
"mimetype": "text/x-python",
783-
"npconvert_exporter": "python",
784-
"pygments_lexer": "ipython3",
785-
"version": 3
715+
}
786716
},
787717
"nbformat": 4,
788718
"nbformat_minor": 4

0 commit comments

Comments
 (0)