From 6facada6f80d3529c078ca518c21eea59f8675c4 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 14 Jul 2025 14:05:55 -0700 Subject: [PATCH 01/10] [DO NOT MERGE] 2.8 RC Test --- .jenkins/build.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 9162152ff7..2e0d8ebd12 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -22,10 +22,12 @@ sudo apt-get install -y pandoc #Install PyTorch Nightly for test. # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). -# sudo pip uninstall -y fbgemm-gpu torchrec -# sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict -# sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 -# pip3 install torch==2.7.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 +sudo pip uninstall -y fbgemm-gpu torchrec +sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict +sudo pip3 install fbgemm-gpu==1.2.0 torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 +pip3 install torch==2.8.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 +pip3 install torchrl==0.9.1 tensordict==0.9.1 + # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm python -m spacy download de_core_news_sm From 251bfe5e830da80ba58f2692882575b6be68f141 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 14 Jul 2025 14:38:03 -0700 Subject: [PATCH 02/10] Update .jenkins/build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 2e0d8ebd12..8576019859 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,7 +24,7 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y fbgemm-gpu torchrec sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict -sudo pip3 install fbgemm-gpu==1.2.0 torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 +sudo pip3 install torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 pip3 install torch==2.8.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 pip3 install torchrl==0.9.1 tensordict==0.9.1 From 20ee808e2c54968034779641c2e354d2678e15d6 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 14 Jul 2025 14:54:37 -0700 Subject: [PATCH 03/10] Update .jenkins/build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 8576019859..407feaeca7 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,7 +24,7 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y fbgemm-gpu torchrec sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict -sudo pip3 install torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124 +sudo pip3 install fbgemm-gpu==1.2.0 torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 pip3 install torch==2.8.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 pip3 install torchrl==0.9.1 tensordict==0.9.1 From bfb17d50947e3ee96be430ee6d4e56136a45a343 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 14 Jul 2025 14:59:27 -0700 Subject: [PATCH 04/10] Update build.sh --- .jenkins/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 407feaeca7..3900fc15ce 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -24,9 +24,10 @@ sudo apt-get install -y pandoc # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). sudo pip uninstall -y fbgemm-gpu torchrec sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict -sudo pip3 install fbgemm-gpu==1.2.0 torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 pip3 install torch==2.8.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 pip3 install torchrl==0.9.1 tensordict==0.9.1 +sudo pip3 install fbgemm-gpu==1.2.0 torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 + # Install two language tokenizers for Translation with TorchText tutorial python -m spacy download en_core_web_sm From eed36cb9c528fcadf0432c947cb83d9e1ac575bc Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 14 Jul 2025 15:20:57 -0700 Subject: [PATCH 05/10] Update .jenkins/build.sh --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 3900fc15ce..ae1b464b4f 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -26,7 +26,7 @@ sudo pip uninstall -y fbgemm-gpu torchrec sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.8.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 pip3 install torchrl==0.9.1 tensordict==0.9.1 -sudo pip3 install fbgemm-gpu==1.2.0 torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 +# sudo pip3 install fbgemm-gpu==1.2.0 torchrec==1.2.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 # Install two language tokenizers for Translation with TorchText tutorial From d18ba1197b530cc42503ecfd9ab629ae3b42f410 Mon Sep 17 00:00:00 2001 From: sekyondaMeta <127536312+sekyondaMeta@users.noreply.github.com> Date: Mon, 14 Jul 2025 20:10:20 -0400 Subject: [PATCH 06/10] Update dqn_with_rnn_tutorial.py https://github.com/pytorch/tutorials/actions/runs/16279151183/job/45965049712?pr=3463#step:9:8173 --- intermediate_source/dqn_with_rnn_tutorial.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py index bcc484f0a0..4ed08cb29d 100644 --- a/intermediate_source/dqn_with_rnn_tutorial.py +++ b/intermediate_source/dqn_with_rnn_tutorial.py @@ -342,7 +342,8 @@ # will return a new instance of the LSTM (with shared weights) that will # assume that the input data is sequential in nature. # -policy = Seq(feature, lstm.set_recurrent_mode(True), mlp, qval) +lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, default_recurrent_mode=True) +policy = Seq(feature, lstm, mlp, qval) ###################################################################### # Because we still have a couple of uninitialized parameters we should From c0d83873e1a7bc6e2977b600580272115b499ff2 Mon Sep 17 00:00:00 2001 From: sekyondaMeta <127536312+sekyondaMeta@users.noreply.github.com> Date: Tue, 15 Jul 2025 07:31:40 -0400 Subject: [PATCH 07/10] Update dqn_with_rnn_tutorial.py --- intermediate_source/dqn_with_rnn_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py index 4ed08cb29d..c053267d19 100644 --- a/intermediate_source/dqn_with_rnn_tutorial.py +++ b/intermediate_source/dqn_with_rnn_tutorial.py @@ -238,6 +238,7 @@ device=device, in_key="embed", out_key="embed", + default_recurrent_mode=True, ) ###################################################################### @@ -342,7 +343,6 @@ # will return a new instance of the LSTM (with shared weights) that will # assume that the input data is sequential in nature. # -lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, default_recurrent_mode=True) policy = Seq(feature, lstm, mlp, qval) ###################################################################### From 13572ee898494b604dc05d38b4fb30a16e269f58 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Tue, 15 Jul 2025 13:11:55 -0700 Subject: [PATCH 08/10] Update --- .jenkins/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index ae1b464b4f..49c1856fc3 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -22,7 +22,7 @@ sudo apt-get install -y pandoc #Install PyTorch Nightly for test. # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed). -sudo pip uninstall -y fbgemm-gpu torchrec +#sudo pip uninstall -y fbgemm-gpu torchrec sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata torchrl tensordict pip3 install torch==2.8.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu126 pip3 install torchrl==0.9.1 tensordict==0.9.1 From caa0094db7e40deff8bca4a1a29d37cbf451b481 Mon Sep 17 00:00:00 2001 From: sekyondaMeta <127536312+sekyondaMeta@users.noreply.github.com> Date: Wed, 16 Jul 2025 12:37:06 -0400 Subject: [PATCH 09/10] Update dqn_with_rnn_tutorial.py Resetting changes in favor of: https://github.com/pytorch/tutorials/pull/3462 --- intermediate_source/dqn_with_rnn_tutorial.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py index c053267d19..bcc484f0a0 100644 --- a/intermediate_source/dqn_with_rnn_tutorial.py +++ b/intermediate_source/dqn_with_rnn_tutorial.py @@ -238,7 +238,6 @@ device=device, in_key="embed", out_key="embed", - default_recurrent_mode=True, ) ###################################################################### @@ -343,7 +342,7 @@ # will return a new instance of the LSTM (with shared weights) that will # assume that the input data is sequential in nature. # -policy = Seq(feature, lstm, mlp, qval) +policy = Seq(feature, lstm.set_recurrent_mode(True), mlp, qval) ###################################################################### # Because we still have a couple of uninitialized parameters we should From 995c09f0ad408095e3f9c49d87e84ea2885cba2d Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Wed, 23 Jul 2025 11:46:58 -0700 Subject: [PATCH 10/10] Fix DQN w RNN tutorial (#3462) * Fix DQN w RNN tutorial * bump torchrl and tensordict req (#3474) --------- Co-authored-by: Vincent Moens --- .ci/docker/requirements.txt | 4 ++-- intermediate_source/dqn_with_rnn_tutorial.py | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt index f1f105122e..3d8507160a 100644 --- a/.ci/docker/requirements.txt +++ b/.ci/docker/requirements.txt @@ -38,8 +38,8 @@ tensorboard jinja2==3.1.3 pytorch-lightning torchx -torchrl==0.7.2 -tensordict==0.7.2 +torchrl==0.9.2 +tensordict==0.9.1 # For ax_multiobjective_nas_tutorial.py ax-platform>=0.4.0,<0.5.0 nbformat>=5.9.2 diff --git a/intermediate_source/dqn_with_rnn_tutorial.py b/intermediate_source/dqn_with_rnn_tutorial.py index bcc484f0a0..462415dcc7 100644 --- a/intermediate_source/dqn_with_rnn_tutorial.py +++ b/intermediate_source/dqn_with_rnn_tutorial.py @@ -342,7 +342,9 @@ # will return a new instance of the LSTM (with shared weights) that will # assume that the input data is sequential in nature. # -policy = Seq(feature, lstm.set_recurrent_mode(True), mlp, qval) +from torchrl.modules import set_recurrent_mode + +policy = Seq(feature, lstm, mlp, qval) ###################################################################### # Because we still have a couple of uninitialized parameters we should @@ -389,7 +391,10 @@ # For the sake of efficiency, we're only running a few thousands iterations # here. In a real setting, the total number of frames should be set to 1M. # -collector = SyncDataCollector(env, stoch_policy, frames_per_batch=50, total_frames=200, device=device) + +collector = SyncDataCollector( + env, stoch_policy, frames_per_batch=50, total_frames=200, device=device +) rb = TensorDictReplayBuffer( storage=LazyMemmapStorage(20_000), batch_size=4, prefetch=10 ) @@ -422,7 +427,8 @@ rb.extend(data.unsqueeze(0).to_tensordict().cpu()) for _ in range(utd): s = rb.sample().to(device, non_blocking=True) - loss_vals = loss_fn(s) + with set_recurrent_mode(True): + loss_vals = loss_fn(s) loss_vals["loss"].backward() optim.step() optim.zero_grad() @@ -464,5 +470,5 @@ # # Further Reading # --------------- -# +# # - The TorchRL documentation can be found `here `_.