Add new argument for sharding type (#2969)

jd7-tr · facebook-github-bot · commit 8cda1a4a8d50 · 2025-05-14T20:34:37.000-07:00
Summary: Pull Request resolved: #2969 Add new arg to `shard_quant_model` to allow users to specify sharding_type, which is currently hardcoded to `ShardingType.TABLE_WISE`. Still use `ShardingType.TABLE_WISE` as default value so there is no impact. Reviewed By: aporialiao Differential Revision: D73540534 fbshipit-source-id: bebefd06f38b98df23120e3a1217b5d5e92a357b
diff --git a/torchrec/inference/modules.py b/torchrec/inference/modules.py
@@ -499,6 +499,7 @@ def shard_quant_model(
     device_memory_size: Optional[int] = None,
     constraints: Optional[Dict[str, ParameterConstraints]] = None,
     ddr_cap: Optional[int] = None,
+    sharding_type: ShardingType = ShardingType.TABLE_WISE,
 ) -> Tuple[torch.nn.Module, ShardingPlan]:
     """
     Shard a quantized TorchRec model, used for generating the most optimal model for inference and
@@ -534,6 +535,10 @@ def shard_quant_model(
         quant_model = quantize_inference_model(module)
         sharded_model, _ = shard_quant_model(quant_model)
     """
+    # TODO(T220572301): remove after new sharding types are validated.
+    assert (
+        sharding_type == ShardingType.TABLE_WISE
+    ), "Only table-wise sharding is supported now."
 
     if constraints is None:
         table_fqns = []
@@ -552,7 +557,7 @@ def shard_quant_model(
         constraints = {}
         for name in table_fqns:
             constraints[name] = ParameterConstraints(
-                sharding_types=[ShardingType.TABLE_WISE.value],
+                sharding_types=[sharding_type.value],
                 compute_kernels=[EmbeddingComputeKernel.QUANT.value],
             )