77use std:: collections:: HashMap ;
88use std:: sync:: OnceLock ;
99
10+ use crate :: DiskPaths ;
11+ use crate :: Partition ;
12+ use crate :: PooledDiskError ;
1013use crate :: illumos:: gpt;
11- use crate :: { DiskPaths , Partition , PooledDiskError } ;
14+ use crate :: is_oxide_sled ;
1215use camino:: Utf8Path ;
1316use illumos_utils:: zpool:: Zpool ;
1417use illumos_utils:: zpool:: ZpoolName ;
@@ -26,11 +29,16 @@ static DEFAULT_NVME_LBA_DATA_SIZE: u64 = 4096;
2629/// NVMe device settings for a particular NVMe model.
2730struct NvmeDeviceSettings {
2831 /// The desired disk size for dealing with overprovisioning.
29- size : u32 ,
32+ resize : NvmeVendorResize ,
3033 /// An override for the default 4k LBA formatting.
3134 lba_data_size_override : Option < u64 > ,
3235}
3336
37+ enum NvmeVendorResize {
38+ Wdc ( u32 ) ,
39+ Unsupported ,
40+ }
41+
3442/// A mapping from model to desired settings.
3543/// A device not found in this lookup table will not be modified by sled-agent.
3644static PREFERRED_NVME_DEVICE_SETTINGS : OnceLock <
@@ -43,23 +51,45 @@ fn preferred_nvme_device_settings()
4351 HashMap :: from ( [
4452 (
4553 "WUS4C6432DSP3X3" ,
46- NvmeDeviceSettings { size : 3200 , lba_data_size_override : None } ,
54+ NvmeDeviceSettings {
55+ resize : NvmeVendorResize :: Wdc ( 3200 ) ,
56+ lba_data_size_override : None ,
57+ } ,
4758 ) ,
4859 (
4960 "WUS5EA138ESP7E1" ,
50- NvmeDeviceSettings { size : 3200 , lba_data_size_override : None } ,
61+ NvmeDeviceSettings {
62+ resize : NvmeVendorResize :: Wdc ( 3200 ) ,
63+ lba_data_size_override : None ,
64+ } ,
5165 ) ,
5266 (
5367 "WUS5EA138ESP7E3" ,
54- NvmeDeviceSettings { size : 3200 , lba_data_size_override : None } ,
68+ NvmeDeviceSettings {
69+ resize : NvmeVendorResize :: Wdc ( 3200 ) ,
70+ lba_data_size_override : None ,
71+ } ,
5572 ) ,
5673 (
5774 "WUS5EA176ESP7E1" ,
58- NvmeDeviceSettings { size : 6400 , lba_data_size_override : None } ,
75+ NvmeDeviceSettings {
76+ resize : NvmeVendorResize :: Wdc ( 6400 ) ,
77+ lba_data_size_override : None ,
78+ } ,
5979 ) ,
6080 (
6181 "WUS5EA176ESP7E3" ,
62- NvmeDeviceSettings { size : 6400 , lba_data_size_override : None } ,
82+ NvmeDeviceSettings {
83+ resize : NvmeVendorResize :: Wdc ( 6400 ) ,
84+ lba_data_size_override : None ,
85+ } ,
86+ ) ,
87+ (
88+ "SDS6BA138PSP9X3" ,
89+ NvmeDeviceSettings {
90+ resize : NvmeVendorResize :: Unsupported ,
91+ lba_data_size_override : None ,
92+ } ,
6393 ) ,
6494 ] )
6595 } )
@@ -81,6 +111,8 @@ pub enum NvmeFormattingError {
81111 InfoError ( #[ from] libnvme:: controller_info:: NvmeInfoError ) ,
82112 #[ error( "Could not find NVMe controller for disk with serial {0}" ) ]
83113 NoController ( String ) ,
114+ #[ error( "Could not determine if host is an Oxide sled: {0}" ) ]
115+ SystemDetection ( #[ source] anyhow:: Error ) ,
84116}
85117
86118// The expected layout of an M.2 device within the Oxide rack.
@@ -267,101 +299,122 @@ fn ensure_size_and_formatting(
267299 use libnvme:: Nvme ;
268300 use libnvme:: namespace:: NamespaceDiscoveryLevel ;
269301
270- let mut controller_found = false ;
302+ // Check that we are on real Oxide hardware so that we avoid:
303+ // - Messing with NVMe devices in other environments
304+ // - Failing tests which use zvols rather than real NVMe devices
305+ // - Breaking virtual environments like a4x2 which likely don't expose or
306+ // implement changing the LBA on emulated devices.
307+ if !is_oxide_sled ( ) . map_err ( NvmeFormattingError :: SystemDetection ) ? {
308+ return Ok ( ( ) ) ;
309+ }
271310
272- if let Some ( nvme_settings) =
273- preferred_nvme_device_settings ( ) . get ( identity. model . as_str ( ) )
274- {
275- let nvme = Nvme :: new ( ) ?;
276- for controller in nvme. controller_discovery ( ) ? {
277- let controller = controller?. write_lock ( ) . map_err ( |( _, e) | e) ?;
278- let controller_info = controller. get_info ( ) ?;
279- // Make sure we are operating on the correct NVMe device.
280- if controller_info. serial ( ) != identity. serial {
281- continue ;
282- } ;
283- controller_found = true ;
284- let nsdisc = controller
285- . namespace_discovery ( NamespaceDiscoveryLevel :: Active ) ?;
286- let namespaces =
287- nsdisc. into_iter ( ) . collect :: < Result < Vec < _ > , _ > > ( ) ?;
288- if namespaces. len ( ) != 1 {
289- return Err ( NvmeFormattingError :: UnexpectedNamespaces (
290- namespaces. len ( ) ,
291- ) ) ;
292- }
293- // Safe because verified there is exactly one namespace.
294- let namespace = namespaces. into_iter ( ) . next ( ) . unwrap ( ) ;
295-
296- // NB: Only some vendors such as WDC support adjusting the size
297- // of the disk to deal with overprovisioning. This will need to be
298- // abstracted away if/when we ever start using another vendor with
299- // this capability.
300- let size = controller. wdc_resize_get ( ) ?;
301-
302- // First we need to detach blkdev from the namespace.
303- namespace. blkdev_detach ( ) ?;
304-
305- // Resize the device if needed to ensure we get the expected
306- // durability level in terms of drive writes per day.
307- if size != nvme_settings. size {
308- controller. wdc_resize_set ( nvme_settings. size ) ?;
309- info ! (
310- log,
311- "Resized {} from {size} to {}" ,
312- identity. serial,
313- nvme_settings. size
314- )
311+ let mut controller_found = false ;
312+ let nvme = Nvme :: new ( ) ?;
313+
314+ for controller in nvme. controller_discovery ( ) ? {
315+ let controller = controller?. write_lock ( ) . map_err ( |( _, e) | e) ?;
316+ let controller_info = controller. get_info ( ) ?;
317+
318+ // Make sure we are operating on the correct NVMe device.
319+ if controller_info. serial ( ) != identity. serial {
320+ continue ;
321+ } ;
322+ controller_found = true ;
323+ let nsdisc =
324+ controller. namespace_discovery ( NamespaceDiscoveryLevel :: Active ) ?;
325+ let namespaces = nsdisc. into_iter ( ) . collect :: < Result < Vec < _ > , _ > > ( ) ?;
326+
327+ // We only want to continue if there is a single namespace associated
328+ // with the device, so we accomplish this by pattern matching for it.
329+ let [ namespace] = namespaces. as_slice ( ) else {
330+ return Err ( NvmeFormattingError :: UnexpectedNamespaces (
331+ namespaces. len ( ) ,
332+ ) ) ;
333+ } ;
334+
335+ // First we need to detach blkdev from the namespace.
336+ namespace. blkdev_detach ( ) ?;
337+
338+ // Check for a known nvme drive and apply our desired configuration.
339+ let mut wanted_data_size = DEFAULT_NVME_LBA_DATA_SIZE ;
340+ if let Some ( nvme_settings) =
341+ preferred_nvme_device_settings ( ) . get ( identity. model . as_str ( ) )
342+ {
343+ match nvme_settings. resize {
344+ NvmeVendorResize :: Wdc ( provisioning_size) => {
345+ let size = controller. wdc_resize_get ( ) ?;
346+
347+ // Resize the device if needed to ensure we get the expected
348+ // durability level in terms of drive writes per day.
349+ if size != provisioning_size {
350+ controller. wdc_resize_set ( provisioning_size) ?;
351+ info ! (
352+ log,
353+ "Resized {} from {size} to {provisioning_size}" ,
354+ identity. serial,
355+ )
356+ }
357+ }
358+ // This device doesn't have a vendor specific resize command to
359+ // deal with overprovisioning so there's nothing to do.
360+ NvmeVendorResize :: Unsupported => ( ) ,
315361 }
316362
317- // Find the LBA format we want to use for the device.
318- let wanted_data_size = nvme_settings
319- . lba_data_size_override
320- . unwrap_or ( DEFAULT_NVME_LBA_DATA_SIZE ) ;
321- let desired_lba = controller_info
322- . lba_formats ( )
323- . collect :: < Result < Vec < _ > , _ > > ( ) ?
324- . into_iter ( )
325- . find ( |lba| {
326- lba. meta_size ( ) == NVME_LBA_META_SIZE
327- && lba. data_size ( ) == wanted_data_size
328- } )
329- . ok_or_else ( || NvmeFormattingError :: LbaFormatMissing ) ?;
330-
331- // If the controller isn't formatted to our desired LBA we need to
332- // issue a format request.
333- let ns_info = namespace. get_info ( ) ?;
334- let current_lba = ns_info. current_format ( ) ?;
335- if current_lba. id ( ) != desired_lba. id ( ) {
336- controller
337- . format_request ( ) ?
338- . set_lbaf ( desired_lba. id ( ) ) ?
339- // TODO map this to libnvme::BROADCAST_NAMESPACE once added
340- . set_nsid ( u32:: MAX ) ?
341- // No secure erase
342- . set_ses ( 0 ) ?
343- . execute ( ) ?;
344-
345- info ! (
346- log,
347- "Formatted disk with serial {} to an LBA with data size \
348- {wanted_data_size}",
349- identity. serial,
350- ) ;
363+ if let Some ( lba_data_size_override) =
364+ nvme_settings. lba_data_size_override
365+ {
366+ wanted_data_size = lba_data_size_override;
351367 }
368+ } else {
369+ info ! (
370+ log,
371+ "There are no preferred NVMe settings for disk model {}; will \
372+ attempt to format to the default LBA data size for disk with \
373+ serial {}",
374+ identity. model,
375+ identity. serial
376+ ) ;
377+ }
352378
353- // Attach blkdev to the namespace again
354- namespace. blkdev_attach ( ) ?;
379+ // Find the LBA format we want to use for the device.
380+ let desired_lba = controller_info
381+ . lba_formats ( )
382+ . collect :: < Result < Vec < _ > , _ > > ( ) ?
383+ . into_iter ( )
384+ . find ( |lba| {
385+ lba. meta_size ( ) == NVME_LBA_META_SIZE
386+ && lba. data_size ( ) == wanted_data_size
387+ } )
388+ . ok_or_else ( || NvmeFormattingError :: LbaFormatMissing ) ?;
389+
390+ // If the controller isn't formatted to our desired LBA we need to
391+ // issue a format request.
392+ let ns_info = namespace. get_info ( ) ?;
393+ let current_lba = ns_info. current_format ( ) ?;
394+ if current_lba. id ( ) != desired_lba. id ( ) {
395+ controller
396+ . format_request ( ) ?
397+ . set_lbaf ( desired_lba. id ( ) ) ?
398+ // TODO map this to libnvme::BROADCAST_NAMESPACE once added
399+ . set_nsid ( u32:: MAX ) ?
400+ // No secure erase
401+ . set_ses ( 0 ) ?
402+ . execute ( ) ?;
403+
404+ info ! (
405+ log,
406+ "Formatted disk with serial {} to an LBA with data size \
407+ {wanted_data_size}",
408+ identity. serial,
409+ ) ;
355410 }
356- } else {
357- info ! (
358- log,
359- "There are no preferred NVMe settings for disk model {}; nothing to\
360- do for disk with serial {}",
361- identity. model,
362- identity. serial
363- ) ;
364- return Ok ( ( ) ) ;
411+
412+ // Attach blkdev to the namespace again
413+ namespace. blkdev_attach ( ) ?;
414+
415+ // We found the disk and applied the settings so there's no use scanning
416+ // the rest of the devices.
417+ break ;
365418 }
366419
367420 if !controller_found {
0 commit comments