diff --git a/mpu/layers.py b/mpu/layers.py
index 5ff0b33..08df8a8 100644
--- a/mpu/layers.py
+++ b/mpu/layers.py
@@ -186,7 +186,7 @@ class ColumnParallelLinear(torch.nn.Module):
         input_size: first dimension of matrix A.
         output_size: second dimension of matrix A.
         bias: If true, add bias
-        gather_output: If true, call all-gether on output and make Y avaiable
+        gather_output: If true, call all-gether on output and make Y available
                        to all GPUs, otherwise, every GPU will have its output
                        which is Y_i = XA_i
         init_method: method to initialize weights. Note that bias is always set