|
3 | 3 | nvidia_version = "1.2.3"
|
4 | 4 | SOURCE_DIR = 'SOURCE_DIR'.freeze
|
5 | 5 | nvidia_imex_shared_dir = "SHARED_DIR/nvidia-imex"
|
| 6 | +imex_service_file = "/etc/systemd/system/nvidia-imex.service" |
6 | 7 | imex_binary = '/usr/bin/nvidia-imex'
|
7 | 8 | imex_ctl_binary = '/usr/bin/nvidia-imex-ctl'
|
8 | 9 | queue_name = 'queue-name'
|
@@ -296,118 +297,152 @@ def self.configure(chef_run)
|
296 | 297 |
|
297 | 298 | describe 'nvidia_imex:configure' do
|
298 | 299 | [%w(false), [false], %w(no), %w(true), [true], %w(yes)].each do |force_indicator|
|
299 |
| - for_all_oses do |platform, version| |
300 |
| - context "on #{platform}#{version} with force_configuration #{force_indicator}" do |
301 |
| - context "when nvidia-imex binary is not installed" do |
302 |
| - cached(:chef_run) do |
303 |
| - stubs_for_resource('nvidia_imex') do |res| |
304 |
| - allow(res).to receive(:imex_installed?).and_return(false) |
305 |
| - end |
306 |
| - runner = runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
307 |
| - ConvergeNvidiaImex.configure(runner) |
308 |
| - end |
309 |
| - cached(:node) { chef_run.node } |
310 |
| - |
311 |
| - it 'does not configure nvidia-imex' do |
312 |
| - is_expected.not_to configure_nvidia_imex('nvidia-imex') |
313 |
| - end |
314 |
| - end |
315 |
| - |
316 |
| - %w(HeadNode LoginNode ComputeFleet).each do |node_type| |
317 |
| - context "when get_nvswitch_count > 1 on #{node_type} node" do |
318 |
| - cached(:chef_run) do |
319 |
| - stubs_for_provider('nvidia_imex[configure]') do |pro| |
320 |
| - allow(pro).to receive(:imex_installed?).and_return(true) |
321 |
| - allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' }) |
322 |
| - allow(pro).to receive(:get_nvswitch_count).with('test').and_return(4) |
323 |
| - allow(pro).to receive(:enable_force_configuration?).and_return(force_indicator) |
| 300 | + [true, false].each do |shared_dir_exists| |
| 301 | + [true, false].each do |imex_service_file_exists| |
| 302 | + for_all_oses do |platform, version| |
| 303 | + context "on #{platform}#{version} with force_configuration #{force_indicator} with shared_dir existence #{shared_dir_exists}" do |
| 304 | + context "when nvidia-imex binary is not installed" do |
| 305 | + cached(:chef_run) do |
| 306 | + stubs_for_resource('nvidia_imex') do |res| |
| 307 | + allow(res).to receive(:imex_installed?).and_return(false) |
| 308 | + allow(Dir).to receive(:exist?).with(nvidia_imex_shared_dir).and_return(shared_dir_exists) |
| 309 | + allow(File).to receive(:exist?).with(imex_service_file).and_return(imex_service_file_exists) |
| 310 | + end |
| 311 | + runner = runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
| 312 | + ConvergeNvidiaImex.configure(runner) |
324 | 313 | end
|
325 |
| - runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
326 |
| - end |
327 |
| - cached(:node) { chef_run.node } |
| 314 | + cached(:node) { chef_run.node } |
328 | 315 |
|
329 |
| - before do |
330 |
| - chef_run.node.override['cluster']['region'] = 'aws_region' |
331 |
| - chef_run.node.override['cluster']['nvidia']['imex']['force_configuration'] = force_indicator |
332 |
| - chef_run.node.override['cluster']['nvidia']['imex']['shared_dir'] = nvidia_imex_shared_dir |
333 |
| - chef_run.node.override['cluster']['node_type'] = node_type |
334 |
| - chef_run.node.override['cluster']['scheduler_queue_name'] = queue_name |
335 |
| - chef_run.node.override['cluster']['scheduler_compute_resource_name'] = compute_resource_name |
336 |
| - |
337 |
| - ConvergeNvidiaImex.configure(chef_run) |
338 |
| - end |
339 |
| - |
340 |
| - if (platform == 'amazon' && version == '2') || %w(HeadNode LoginNode).include?(node_type) |
341 | 316 | it 'does not configure nvidia-imex' do
|
342 |
| - is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
343 |
| - .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
344 |
| - .with(user: 'root') |
345 |
| - .with(group: 'root') |
346 |
| - .with(mode: '0755') |
347 |
| - is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
348 |
| - .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
349 |
| - .with(user: 'root') |
350 |
| - .with(group: 'root') |
351 |
| - .with(mode: '0755') |
352 |
| - .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
353 |
| - is_expected.not_to create_template("/etc/systemd/system/nvidia-imex.service") |
354 |
| - .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
355 |
| - .with(user: 'root') |
356 |
| - .with(group: 'root') |
357 |
| - .with(mode: '0644') |
358 |
| - .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
359 |
| - is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
360 |
| - end |
361 |
| - else |
362 |
| - it 'it starts nvidia-imex service' do |
363 |
| - is_expected.to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
364 |
| - .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
365 |
| - .with(user: 'root') |
366 |
| - .with(group: 'root') |
367 |
| - .with(mode: '0755') |
368 |
| - is_expected.to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
369 |
| - .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
370 |
| - .with(user: 'root') |
371 |
| - .with(group: 'root') |
372 |
| - .with(mode: '0755') |
373 |
| - .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
374 |
| - is_expected.to create_template("/etc/systemd/system/nvidia-imex.service") |
375 |
| - .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
376 |
| - .with(user: 'root') |
377 |
| - .with(group: 'root') |
378 |
| - .with(mode: '0644') |
379 |
| - .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
380 |
| - is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 317 | + is_expected.not_to configure_nvidia_imex('nvidia-imex') |
381 | 318 | end
|
382 | 319 | end
|
383 |
| - end |
384 |
| - end |
385 | 320 |
|
386 |
| - context "when get_nvswitch_count <= 1" do |
387 |
| - cached(:chef_run) do |
388 |
| - stubs_for_provider('nvidia_imex[configure]') do |pro| |
389 |
| - allow(pro).to receive(:imex_installed?).and_return(true) |
390 |
| - allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' }) |
391 |
| - allow(pro).to receive(:get_nvswitch_count).with('test').and_return(1) |
392 |
| - allow(pro).to receive(:enable_force_configuration?).and_return(force_indicator) |
| 321 | + %w(HeadNode LoginNode ComputeFleet).each do |node_type| |
| 322 | + context "when get_nvswitch_count > 1 on #{node_type} node" do |
| 323 | + cached(:chef_run) do |
| 324 | + stubs_for_provider('nvidia_imex[configure]') do |pro| |
| 325 | + allow(pro).to receive(:imex_installed?).and_return(true) |
| 326 | + allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' }) |
| 327 | + allow(pro).to receive(:get_nvswitch_count).with('test').and_return(4) |
| 328 | + allow(pro).to receive(:enable_force_configuration?).and_return(force_indicator) |
| 329 | + allow(Dir).to receive(:exist?).with(nvidia_imex_shared_dir).and_return(shared_dir_exists) |
| 330 | + allow(File).to receive(:exist?).with(imex_service_file).and_return(imex_service_file_exists) |
| 331 | + end |
| 332 | + runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
| 333 | + end |
| 334 | + cached(:node) { chef_run.node } |
| 335 | + |
| 336 | + before do |
| 337 | + chef_run.node.override['cluster']['region'] = 'aws_region' |
| 338 | + chef_run.node.override['cluster']['nvidia']['imex']['force_configuration'] = force_indicator |
| 339 | + chef_run.node.override['cluster']['nvidia']['imex']['shared_dir'] = nvidia_imex_shared_dir |
| 340 | + chef_run.node.override['cluster']['node_type'] = node_type |
| 341 | + chef_run.node.override['cluster']['scheduler_queue_name'] = queue_name |
| 342 | + chef_run.node.override['cluster']['scheduler_compute_resource_name'] = compute_resource_name |
| 343 | + |
| 344 | + ConvergeNvidiaImex.configure(chef_run) |
| 345 | + end |
| 346 | + |
| 347 | + if (platform == 'amazon' && version == '2') || %w(HeadNode LoginNode).include?(node_type) |
| 348 | + it 'does not configure nvidia-imex' do |
| 349 | + is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
| 350 | + .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
| 351 | + .with(user: 'root') |
| 352 | + .with(group: 'root') |
| 353 | + .with(mode: '0755') |
| 354 | + is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
| 355 | + .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
| 356 | + .with(user: 'root') |
| 357 | + .with(group: 'root') |
| 358 | + .with(mode: '0755') |
| 359 | + .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 360 | + is_expected.not_to create_template(imex_service_file) |
| 361 | + .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
| 362 | + .with(user: 'root') |
| 363 | + .with(group: 'root') |
| 364 | + .with(mode: '0644') |
| 365 | + .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 366 | + is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 367 | + end |
| 368 | + else |
| 369 | + it 'it starts nvidia-imex service' do |
| 370 | + if shared_dir_exists |
| 371 | + is_expected.to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
| 372 | + .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
| 373 | + .with(user: 'root') |
| 374 | + .with(group: 'root') |
| 375 | + .with(mode: '0755') |
| 376 | + is_expected.to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
| 377 | + .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
| 378 | + .with(user: 'root') |
| 379 | + .with(group: 'root') |
| 380 | + .with(mode: '0755') |
| 381 | + .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 382 | + is_expected.to create_template(imex_service_file) |
| 383 | + .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
| 384 | + .with(user: 'root') |
| 385 | + .with(group: 'root') |
| 386 | + .with(mode: '0644') |
| 387 | + .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 388 | + else |
| 389 | + is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
| 390 | + .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
| 391 | + .with(user: 'root') |
| 392 | + .with(group: 'root') |
| 393 | + .with(mode: '0755') |
| 394 | + is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
| 395 | + .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
| 396 | + .with(user: 'root') |
| 397 | + .with(group: 'root') |
| 398 | + .with(mode: '0755') |
| 399 | + .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 400 | + is_expected.not_to create_template(imex_service_file) |
| 401 | + .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
| 402 | + .with(user: 'root') |
| 403 | + .with(group: 'root') |
| 404 | + .with(mode: '0644') |
| 405 | + .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 406 | + end |
| 407 | + if imex_service_file_exists |
| 408 | + is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 409 | + else |
| 410 | + is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 411 | + end |
| 412 | + end |
| 413 | + end |
| 414 | + end |
393 | 415 | end
|
394 |
| - runner = runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
395 |
| - ConvergeNvidiaImex.configure(runner) |
396 |
| - end |
397 |
| - cached(:node) { chef_run.node } |
398 | 416 |
|
399 |
| - before do |
400 |
| - chef_run.node.override['cluster']['region'] = 'aws_region' |
401 |
| - chef_run.node.override['cluster']['nvidia']['imex']['force_configuration'] = force_indicator |
402 |
| - end |
| 417 | + context "when get_nvswitch_count <= 1" do |
| 418 | + cached(:chef_run) do |
| 419 | + stubs_for_provider('nvidia_imex[configure]') do |pro| |
| 420 | + allow(pro).to receive(:imex_installed?).and_return(true) |
| 421 | + allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' }) |
| 422 | + allow(pro).to receive(:get_nvswitch_count).with('test').and_return(1) |
| 423 | + allow(pro).to receive(:enable_force_configuration?).and_return(force_indicator) |
| 424 | + allow(Dir).to receive(:exist?).with(nvidia_imex_shared_dir).and_return(shared_dir_exists) |
| 425 | + allow(File).to receive(:exist?).with(imex_service_file).and_return(imex_service_file_exists) |
| 426 | + end |
| 427 | + runner = runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
| 428 | + ConvergeNvidiaImex.configure(runner) |
| 429 | + end |
| 430 | + cached(:node) { chef_run.node } |
403 | 431 |
|
404 |
| - if ['true', 'yes', true].include?(force_indicator) |
405 |
| - it 'does configure nvidia-imex' do |
406 |
| - is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
407 |
| - end |
408 |
| - else |
409 |
| - it 'does not configure nvidia-imex' do |
410 |
| - is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 432 | + before do |
| 433 | + chef_run.node.override['cluster']['region'] = 'aws_region' |
| 434 | + chef_run.node.override['cluster']['nvidia']['imex']['force_configuration'] = force_indicator |
| 435 | + end |
| 436 | + |
| 437 | + if ['true', 'yes', true].include?(force_indicator) && imex_service_file_exists |
| 438 | + it 'does configure nvidia-imex' do |
| 439 | + is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 440 | + end |
| 441 | + else |
| 442 | + it 'does not configure nvidia-imex' do |
| 443 | + is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 444 | + end |
| 445 | + end |
411 | 446 | end
|
412 | 447 | end
|
413 | 448 | end
|
|
0 commit comments