Skip to content

Commit 8843abf

Browse files
author
Himani Anil Deshpande
committed
Revert "[Fabric] Install NVIDIA Fabric manager for ARM instances (aws#3014)"
This reverts commit f516bba.
1 parent f516bba commit 8843abf

File tree

5 files changed

+37
-51
lines changed

5 files changed

+37
-51
lines changed

CHANGELOG.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ This file is used to list changes made in each version of the AWS ParallelCluste
3131
- Remove `berkshelf`. All cookbooks are local and do not need `berkshelf` dependency management.
3232
- Add support for GB200 instance types.
3333
- Install nvidia-imex for all OSs except AL2.
34-
- Install nvidia-fabricmanager for ARM instances for all OSs except AL2.
3534

3635
**BUG FIXES**
3736
- Fix a race condition in CloudWatch Agent startup that could cause nodes bootstrap failures.

cookbooks/aws-parallelcluster-platform/resources/fabric_manager/fabric_manager_amazon2.rb

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,3 @@ def fabric_manager_version
2828
def platform
2929
'rhel7'
3030
end
31-
32-
def _fabric_manager_enabled
33-
!arm_instance? && _nvidia_enabled
34-
end

cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_common.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
end
4141

4242
def _fabric_manager_enabled
43-
_nvidia_enabled
43+
# NVIDIA Fabric Manager not present on ARM
44+
!arm_instance? && _nvidia_enabled
4445
end
4546

4647
def _nvidia_enabled

cookbooks/aws-parallelcluster-platform/spec/unit/resources/fabric_manager_spec.rb

Lines changed: 34 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -118,55 +118,45 @@ def self.configure(chef_run)
118118
end
119119

120120
describe 'fabric_manager:_fabric_manager_enabled' do
121-
for_all_oses do |platform, version|
122-
context "on #{platform}#{version}" do
123-
context 'when on arm' do
124-
cached(:chef_run) do
125-
allow_any_instance_of(Object).to receive(:arm_instance?).and_return(true)
126-
ChefSpec::SoloRunner.new(step_into: ['fabric_manager'], platform: platform, version: version)
127-
end
128-
cached(:resource) do
129-
ConvergeFabricManager.setup(chef_run, nvidia_enabled: true)
130-
chef_run.find_resource('fabric_manager', 'setup')
131-
end
132-
if platform == 'amazon' && version == '2'
133-
it "is not enabled" do
134-
expect(resource._fabric_manager_enabled).to eq(false)
135-
end
136-
else
137-
it "is enabled" do
138-
expect(resource._fabric_manager_enabled).to eq(true)
139-
end
140-
end
141-
end
121+
context 'when on arm' do
122+
cached(:chef_run) do
123+
allow_any_instance_of(Object).to receive(:arm_instance?).and_return(true)
124+
ChefSpec::SoloRunner.new(step_into: ['fabric_manager'])
125+
end
126+
cached(:resource) do
127+
ConvergeFabricManager.setup(chef_run, nvidia_enabled: true)
128+
chef_run.find_resource('fabric_manager', 'setup')
129+
end
130+
it "is not enabled" do
131+
expect(resource._fabric_manager_enabled).to eq(false)
132+
end
133+
end
142134

143-
context 'when not on arm' do
144-
cached(:chef_run) do
145-
allow_any_instance_of(Object).to receive(:arm_instance?).and_return(false)
146-
ChefSpec::SoloRunner.new(step_into: ['fabric_manager'])
147-
end
135+
context 'when not on arm' do
136+
cached(:chef_run) do
137+
allow_any_instance_of(Object).to receive(:arm_instance?).and_return(false)
138+
ChefSpec::SoloRunner.new(step_into: ['fabric_manager'])
139+
end
148140

149-
context 'when nvidia enabled' do
150-
cached(:resource) do
151-
ConvergeFabricManager.setup(chef_run, nvidia_enabled: true)
152-
chef_run.find_resource('fabric_manager', 'setup')
153-
end
141+
context 'when nvidia enabled' do
142+
cached(:resource) do
143+
ConvergeFabricManager.setup(chef_run, nvidia_enabled: true)
144+
chef_run.find_resource('fabric_manager', 'setup')
145+
end
154146

155-
it "is enabled" do
156-
expect(resource._fabric_manager_enabled).to eq(true)
157-
end
158-
end
147+
it "is enabled" do
148+
expect(resource._fabric_manager_enabled).to eq(true)
149+
end
150+
end
159151

160-
context 'when nvidia not enabled' do
161-
cached(:resource) do
162-
ConvergeFabricManager.setup(chef_run, nvidia_enabled: false)
163-
chef_run.find_resource('fabric_manager', 'setup')
164-
end
152+
context 'when nvidia not enabled' do
153+
cached(:resource) do
154+
ConvergeFabricManager.setup(chef_run, nvidia_enabled: false)
155+
chef_run.find_resource('fabric_manager', 'setup')
156+
end
165157

166-
it "is not enabled" do
167-
expect(resource._fabric_manager_enabled).to eq(false)
168-
end
169-
end
158+
it "is not enabled" do
159+
expect(resource._fabric_manager_enabled).to eq(false)
170160
end
171161
end
172162
end

cookbooks/aws-parallelcluster-platform/test/controls/nvidia_fabric_manager_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# See the License for the specific language governing permissions and limitations under the License.
1111

1212
control 'tag:install_expected_versions_of_nvidia_fabric_manager_installed' do
13-
only_if { !os_properties.arm? && ['yes', true, 'true'].include?(node['cluster']['nvidia']['enabled']) && !os_properties.alinux2? }
13+
only_if { !os_properties.arm? && ['yes', true, 'true'].include?(node['cluster']['nvidia']['enabled']) }
1414

1515
describe package(node['cluster']['nvidia']['fabricmanager']['package']) do
1616
it { should be_installed }

0 commit comments

Comments
 (0)