@@ -253,21 +253,6 @@ static int metaslab_perf_bias = 1;
253253 */
254254static const boolean_t zfs_remap_blkptr_enable = B_TRUE ;
255255
256- /*
257- * Enable/disable segment-based metaslab selection.
258- */
259- static int zfs_metaslab_segment_weight_enabled = B_TRUE ;
260-
261- /*
262- * Enable/disable the new space-based metaslab selection algorithm.
263- *
264- * The new space-based algorithm attempts to take into account not only the
265- * largest free segment, as the segment-based weight does, but other segments
266- * that are almost as large. This can improve metaslab selection and reduce the
267- * number of metaslab loads needed to satisfy a given set of allocations.
268- */
269- static int zfs_metaslab_space_weight_v2_enabled = B_TRUE ;
270-
271256/*
272257 * When using segment-based metaslab selection, we will continue
273258 * allocating from the active metaslab until we have exhausted
@@ -433,7 +418,7 @@ metaslab_stat_fini(void)
433418 */
434419metaslab_class_t *
435420metaslab_class_create (spa_t * spa , const char * name ,
436- const metaslab_ops_t * ops , boolean_t is_log )
421+ const metaslab_ops_t * ops , const metaslab_wfs_t * wfs , boolean_t is_log )
437422{
438423 metaslab_class_t * mc ;
439424
@@ -443,6 +428,7 @@ metaslab_class_create(spa_t *spa, const char *name,
443428 mc -> mc_spa = spa ;
444429 mc -> mc_name = name ;
445430 mc -> mc_ops = ops ;
431+ mc -> mc_wfs = wfs ;
446432 mc -> mc_is_log = is_log ;
447433 mc -> mc_alloc_io_size = SPA_OLD_MAXBLOCKSIZE ;
448434 mc -> mc_alloc_max = UINT64_MAX ;
@@ -3082,6 +3068,79 @@ metaslab_fini(metaslab_t *msp)
30823068 kmem_free (msp , sizeof (metaslab_t ));
30833069}
30843070
3071+ static uint64_t metaslab_space_weight (metaslab_t * msp );
3072+ static uint64_t metaslab_segment_weight (metaslab_t * msp );
3073+ static uint64_t metaslab_space_weight_v2 (metaslab_t * msp );
3074+ metaslab_wfs_t * metaslab_weightfunc (spa_t * spa );
3075+
3076+ static metaslab_wfs_t metaslab_weightfuncs [] = {
3077+ { "auto" , metaslab_space_weight_v2 },
3078+ { "space" , metaslab_space_weight },
3079+ { "space_v2" , metaslab_space_weight_v2 },
3080+ { "segment" , metaslab_segment_weight },
3081+ };
3082+
3083+ static int
3084+ spa_find_weightfunc_byname (const char * val )
3085+ {
3086+ int a = ARRAY_SIZE (metaslab_weightfuncs ) - 1 ;
3087+ for (; a >= 0 ; a -- ) {
3088+ if (strcmp (val , metaslab_weightfuncs [a ].mswf_name ) == 0 )
3089+ return (a );
3090+ }
3091+ return (-1 );
3092+ }
3093+
3094+ void
3095+ spa_set_weightfunc (spa_t * spa , const char * weightfunc )
3096+ {
3097+ int a = spa_find_weightfunc_byname (weightfunc );
3098+ if (a < 0 ) a = 0 ;
3099+ if (a != 1 && !spa_feature_is_enabled (spa ,
3100+ SPA_FEATURE_SPACEMAP_HISTOGRAM )) {
3101+ zfs_dbgmsg ("warning: weight function %s will not be used for "
3102+ "pool %s since space map histograms are not enabled" ,
3103+ weightfunc , spa_name (spa ));
3104+ }
3105+ spa -> spa_active_weightfunc = a ;
3106+ zfs_dbgmsg ("spa weight function: %s" ,
3107+ metaslab_weightfuncs [a ].mswf_name );
3108+ }
3109+
3110+ int
3111+ spa_get_weightfunc (spa_t * spa )
3112+ {
3113+ return (spa -> spa_active_weightfunc );
3114+ }
3115+
3116+ #if defined(_KERNEL )
3117+ int
3118+ param_set_active_weightfunc_common (const char * val )
3119+ {
3120+ char * p ;
3121+
3122+ if (val == NULL )
3123+ return (SET_ERROR (EINVAL ));
3124+
3125+ if ((p = strchr (val , '\n' )) != NULL )
3126+ * p = '\0' ;
3127+
3128+ int a = spa_find_weightfunc_byname (val );
3129+ if (a < 0 )
3130+ return (SET_ERROR (EINVAL ));
3131+
3132+ zfs_active_weightfunc = metaslab_weightfuncs [a ].mswf_name ;
3133+ return (0 );
3134+ }
3135+ #endif
3136+
3137+ metaslab_wfs_t *
3138+ metaslab_weightfunc (spa_t * spa )
3139+ {
3140+ int weightfunc = spa_get_weightfunc (spa );
3141+ return (& metaslab_weightfuncs [weightfunc ]);
3142+ }
3143+
30853144/*
30863145 * Return the weight of the specified metaslab, according to the new space-based
30873146 * weighting algorithm. The metaslab must be loaded. This function can
@@ -3156,7 +3215,7 @@ metaslab_space_weight_from_spacemap(metaslab_t *msp)
31563215 if (segments == 0 )
31573216 continue ;
31583217 if (weight == 0 )
3159- weight = i + sm -> sm_shift ;
3218+ weight = i + sm -> sm_shift ;
31603219 // Prevent overflow using log_2 math
31613220 if (seg_shift + highbit64 (segments ) > METASLAB_WEIGHT_MAX_IDX )
31623221 return (METASLAB_WEIGHT_MAX );
@@ -3186,8 +3245,16 @@ static uint64_t
31863245metaslab_space_weight_v2 (metaslab_t * msp )
31873246{
31883247 metaslab_group_t * mg = msp -> ms_group ;
3248+ spa_t * spa = mg -> mg_vd -> vdev_spa ;
31893249 uint64_t weight = 0 ;
31903250 uint8_t shift = mg -> mg_vd -> vdev_ashift ;
3251+
3252+ if (!spa_feature_is_enabled (spa , SPA_FEATURE_SPACEMAP_HISTOGRAM ) ||
3253+ (msp -> ms_sm != NULL && msp -> ms_sm -> sm_dbuf -> db_size !=
3254+ sizeof (space_map_phys_t ))) {
3255+ return (metaslab_space_weight (msp ));
3256+ }
3257+
31913258 if (metaslab_allocated_space (msp ) == 0 ) {
31923259 int idx = highbit64 (msp -> ms_size ) - shift - 1 + 3 ;
31933260 weight = 1ULL << MIN (METASLAB_WEIGHT_MAX_IDX , 2 * idx );
@@ -3365,18 +3432,10 @@ metaslab_space_weight(metaslab_t *msp)
33653432{
33663433 metaslab_group_t * mg = msp -> ms_group ;
33673434 vdev_t * vd = mg -> mg_vd ;
3368- spa_t * spa = vd -> vdev_spa ;
33693435 uint64_t weight , space ;
33703436
33713437 ASSERT (MUTEX_HELD (& msp -> ms_lock ));
33723438
3373- if (zfs_metaslab_space_weight_v2_enabled &&
3374- spa_feature_is_enabled (spa , SPA_FEATURE_SPACEMAP_HISTOGRAM ) &&
3375- (msp -> ms_sm == NULL || msp -> ms_sm -> sm_dbuf -> db_size ==
3376- sizeof (space_map_phys_t ))) {
3377- return (metaslab_space_weight_v2 (msp ));
3378- }
3379-
33803439 /*
33813440 * The baseline weight is the metaslab's free space.
33823441 */
@@ -3532,11 +3591,18 @@ static uint64_t
35323591metaslab_segment_weight (metaslab_t * msp )
35333592{
35343593 metaslab_group_t * mg = msp -> ms_group ;
3594+ spa_t * spa = mg -> mg_vd -> vdev_spa ;
35353595 uint64_t weight = 0 ;
35363596 uint8_t shift = mg -> mg_vd -> vdev_ashift ;
35373597
35383598 ASSERT (MUTEX_HELD (& msp -> ms_lock ));
35393599
3600+ if (!spa_feature_is_enabled (spa , SPA_FEATURE_SPACEMAP_HISTOGRAM ) ||
3601+ (msp -> ms_sm != NULL && msp -> ms_sm -> sm_dbuf -> db_size !=
3602+ sizeof (space_map_phys_t ))) {
3603+ return (metaslab_space_weight (msp ));
3604+ }
3605+
35403606 /*
35413607 * The metaslab is completely free.
35423608 */
@@ -3641,8 +3707,6 @@ metaslab_should_allocate(metaslab_t *msp, uint64_t asize, boolean_t try_hard)
36413707static uint64_t
36423708metaslab_weight (metaslab_t * msp , boolean_t nodirty )
36433709{
3644- vdev_t * vd = msp -> ms_group -> mg_vd ;
3645- spa_t * spa = vd -> vdev_spa ;
36463710 uint64_t weight ;
36473711
36483712 ASSERT (MUTEX_HELD (& msp -> ms_lock ));
@@ -3666,17 +3730,7 @@ metaslab_weight(metaslab_t *msp, boolean_t nodirty)
36663730 metaslab_largest_unflushed_free (msp ));
36673731 }
36683732
3669- /*
3670- * Segment-based weighting requires space map histogram support.
3671- */
3672- if (zfs_metaslab_segment_weight_enabled &&
3673- spa_feature_is_enabled (spa , SPA_FEATURE_SPACEMAP_HISTOGRAM ) &&
3674- (msp -> ms_sm == NULL || msp -> ms_sm -> sm_dbuf -> db_size ==
3675- sizeof (space_map_phys_t ))) {
3676- weight = metaslab_segment_weight (msp );
3677- } else {
3678- weight = metaslab_space_weight (msp );
3679- }
3733+ weight = msp -> ms_group -> mg_class -> mc_wfs -> mswf_func (msp );
36803734 return (weight );
36813735}
36823736
@@ -6566,12 +6620,6 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, bias_enabled, INT, ZMOD_RW,
65666620ZFS_MODULE_PARAM (zfs_metaslab , metaslab_ , perf_bias , INT , ZMOD_RW ,
65676621 "Enable performance-based metaslab group biasing" );
65686622
6569- ZFS_MODULE_PARAM (zfs_metaslab , zfs_metaslab_ , segment_weight_enabled , INT ,
6570- ZMOD_RW , "Enable segment-based metaslab selection" );
6571-
6572- ZFS_MODULE_PARAM (zfs_metaslab , zfs_metaslab_ , space_weight_v2_enabled , INT ,
6573- ZMOD_RW , "Enable new space-based metaslab selection" );
6574-
65756623ZFS_MODULE_PARAM (zfs_metaslab , zfs_metaslab_ , switch_threshold , INT , ZMOD_RW ,
65766624 "Segment-based metaslab selection maximum buckets before switching" );
65776625
@@ -6602,3 +6650,7 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW,
66026650ZFS_MODULE_PARAM_CALL (zfs , zfs_ , active_allocator ,
66036651 param_set_active_allocator , param_get_charp , ZMOD_RW ,
66046652 "SPA active allocator" );
6653+
6654+ ZFS_MODULE_PARAM_CALL (zfs , zfs_ , active_weightfunc ,
6655+ param_set_active_weightfunc , param_get_charp , ZMOD_RW ,
6656+ "SPA active weight function" );
0 commit comments