diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5692e60..92300b29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## Unreleased
 ### Changed
 
+## v0.15.0
+### Changed
+- PyTorch v2.2 support
+
 ## v0.14.0
 ### Changed
 - PyTorch v2.1 support
diff --git a/Cargo.toml b/Cargo.toml
index d0d46c79..49286e9c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tch"
-version = "0.14.0"
+version = "0.15.0"
 authors = ["Laurent Mazare <lmazare@gmail.com>"]
 edition = "2021"
 build = "build.rs"
@@ -22,7 +22,7 @@ libc = "0.2.0"
 ndarray = "0.15"
 rand = "0.8"
 thiserror = "1"
-torch-sys = { version = "0.14.0", path = "torch-sys" }
+torch-sys = { version = "0.15.0", path = "torch-sys" }
 zip = "0.6"
 half = "2"
 safetensors = "0.3.0"
diff --git a/README.md b/README.md
index 0582b258..6761797a 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ The code generation part for the C api on top of libtorch comes from
 
 ## Getting Started
 
-This crate requires the C++ PyTorch library (libtorch) in version *v2.1.0* to be available on
+This crate requires the C++ PyTorch library (libtorch) in version *v2.2.0* to be available on
 your system. You can either:
 
 - Use the system-wide libtorch installation (default).
@@ -85,7 +85,7 @@ seem to include `libtorch.a` by default so this would have to be compiled
 manually, e.g. via the following:
 
 ```bash
-git clone -b v2.1.0 --recurse-submodule https://github.com/pytorch/pytorch.git pytorch-static --depth 1
+git clone -b v2.2.0 --recurse-submodule https://github.com/pytorch/pytorch.git pytorch-static --depth 1
 cd pytorch-static
 USE_CUDA=OFF BUILD_SHARED_LIBS=OFF python setup.py build
 # export LIBTORCH to point at the build directory in pytorch-static.
diff --git a/examples/python-extension/Cargo.toml b/examples/python-extension/Cargo.toml
index ca978580..8bb57a24 100644
--- a/examples/python-extension/Cargo.toml
+++ b/examples/python-extension/Cargo.toml
@@ -18,6 +18,6 @@ crate-type = ["cdylib"]
 
 [dependencies]
 pyo3 = { version = "0.18.3", features = ["extension-module"] }
-pyo3-tch = { path = "../../pyo3-tch", version = "0.14.0" }
-tch = { path = "../..", features = ["python-extension"], version = "0.14.0" }
-torch-sys = { path = "../../torch-sys", features = ["python-extension"], version = "0.14.0" }
+pyo3-tch = { path = "../../pyo3-tch", version = "0.15.0" }
+tch = { path = "../..", features = ["python-extension"], version = "0.15.0" }
+torch-sys = { path = "../../torch-sys", features = ["python-extension"], version = "0.15.0" }
diff --git a/gen/gen.ml b/gen/gen.ml
index 4b119806..009d82b5 100644
--- a/gen/gen.ml
+++ b/gen/gen.ml
@@ -878,7 +878,7 @@ let run
 
 let () =
   run
-    ~yaml_filename:"third_party/pytorch/Declarations-v2.1.0.yaml"
+    ~yaml_filename:"third_party/pytorch/Declarations-v2.2.0.yaml"
     ~cpp_filename:"torch-sys/libtch/torch_api_generated"
     ~ffi_filename:"torch-sys/src/c_generated.rs"
     ~wrapper_filename:"src/wrappers/tensor_generated.rs"
diff --git a/pyo3-tch/Cargo.toml b/pyo3-tch/Cargo.toml
index 86ca8aa9..61b06a3e 100644
--- a/pyo3-tch/Cargo.toml
+++ b/pyo3-tch/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "pyo3-tch"
-version = "0.14.0"
+version = "0.15.0"
 authors = ["Laurent Mazare <lmazare@gmail.com>"]
 edition = "2021"
 build = "build.rs"
@@ -12,6 +12,6 @@ categories = ["science"]
 license = "MIT/Apache-2.0"
 
 [dependencies]
-tch = { path = "..", features = ["python-extension"], version = "0.14.0" }
-torch-sys = { path = "../torch-sys", features = ["python-extension"], version = "0.14.0" }
+tch = { path = "..", features = ["python-extension"], version = "0.15.0" }
+torch-sys = { path = "../torch-sys", features = ["python-extension"], version = "0.15.0" }
 pyo3 = { version = "0.18.3", features = ["extension-module"] }
diff --git a/src/wrappers/tensor_fallible_generated.rs b/src/wrappers/tensor_fallible_generated.rs
index 10260711..4939ea59 100644
--- a/src/wrappers/tensor_fallible_generated.rs
+++ b/src/wrappers/tensor_fallible_generated.rs
@@ -1054,6 +1054,19 @@ impl Tensor {
         Ok(Tensor { c_tensor: c_tensors[0] })
     }
 
+    pub fn f_internal_convert_weight_to_int4pack(
+        &self,
+        innerktiles: i64,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg__convert_weight_to_int4pack(
+            c_tensors.as_mut_ptr(),
+            self.c_tensor,
+            innerktiles
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_internal_convolution<T: Borrow<Tensor>>(
         &self,
         weight: &Tensor,
@@ -1264,6 +1277,8 @@ impl Tensor {
         compressed_a: &Tensor,
         dense_b: &Tensor,
         bias: Option<T>,
+        alpha: Option<T>,
+        out_dtype: impl Into<Option<Kind>>,
         transpose_result: bool,
     ) -> Result<Tensor, TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 1];
@@ -1272,6 +1287,8 @@ impl Tensor {
             compressed_a.c_tensor,
             dense_b.c_tensor,
             bias.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
+            alpha.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
+            out_dtype.into().map_or(-1, |s| s.c_int()),
             if transpose_result { 1 } else { 0 }
         ));
         Ok(Tensor { c_tensor: c_tensors[0] })
@@ -1803,8 +1820,8 @@ impl Tensor {
         out: &Tensor,
         cu_seqlens_q: Option<T>,
         cu_seqlens_k: Option<T>,
-        max_seqlen_k: i64,
         max_seqlen_q: i64,
+        max_seqlen_k: i64,
         logsumexp: &Tensor,
         dropout_p: f64,
         philox_seed: &Tensor,
@@ -1827,8 +1844,8 @@ impl Tensor {
             out.c_tensor,
             cu_seqlens_q.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
             cu_seqlens_k.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
-            max_seqlen_k,
             max_seqlen_q,
+            max_seqlen_k,
             logsumexp.c_tensor,
             dropout_p,
             philox_seed.c_tensor,
@@ -3939,6 +3956,26 @@ impl Tensor {
         Ok(Tensor { c_tensor: c_tensors[0] })
     }
 
+    pub fn f_internal_mixed_dtypes_linear<T: Borrow<Tensor>>(
+        &self,
+        weight: &Tensor,
+        scale: &Tensor,
+        bias: Option<T>,
+        activation: &str,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg__mixed_dtypes_linear(
+            c_tensors.as_mut_ptr(),
+            self.c_tensor,
+            weight.c_tensor,
+            scale.c_tensor,
+            bias.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
+            activation.as_ptr(),
+            activation.len() as i32
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_internal_mkldnn_reshape(&self, shape: impl IntList) -> Result<Tensor, TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 1];
         unsafe_torch_err!(atg__mkldnn_reshape(
@@ -5140,6 +5177,7 @@ impl Tensor {
         scale_a: Option<T>,
         scale_b: Option<T>,
         scale_result: Option<T>,
+        use_fast_accum: bool,
     ) -> Result<(Tensor, Tensor), TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 2];
         unsafe_torch_err!(atg__scaled_mm(
@@ -5150,7 +5188,8 @@ impl Tensor {
             out_dtype.into().map_or(-1, |s| s.c_int()),
             scale_a.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
             scale_b.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
-            scale_result.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor)
+            scale_result.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
+            if use_fast_accum { 1 } else { 0 }
         ));
         Ok((Tensor { c_tensor: c_tensors[0] }, Tensor { c_tensor: c_tensors[1] }))
     }
@@ -5165,6 +5204,7 @@ impl Tensor {
         scale_a: Option<T>,
         scale_b: Option<T>,
         scale_result: Option<T>,
+        use_fast_accum: bool,
     ) -> Result<(Tensor, Tensor), TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 2];
         unsafe_torch_err!(atg__scaled_mm_out(
@@ -5177,7 +5217,8 @@ impl Tensor {
             out_dtype.into().map_or(-1, |s| s.c_int()),
             scale_a.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
             scale_b.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
-            scale_result.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor)
+            scale_result.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor),
+            if use_fast_accum { 1 } else { 0 }
         ));
         Ok((Tensor { c_tensor: c_tensors[0] }, Tensor { c_tensor: c_tensors[1] }))
     }
@@ -8102,6 +8143,23 @@ impl Tensor {
         Ok(return_)
     }
 
+    pub fn f_internal_weight_int4pack_mm(
+        &self,
+        mat2: &Tensor,
+        qgroupsize: i64,
+        qscaleandzeros: &Tensor,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg__weight_int4pack_mm(
+            c_tensors.as_mut_ptr(),
+            self.c_tensor,
+            mat2.c_tensor,
+            qgroupsize,
+            qscaleandzeros.c_tensor
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_internal_weight_norm(v: &Tensor, g: &Tensor, dim: i64) -> Result<Tensor, TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 1];
         unsafe_torch_err!(atg__weight_norm(c_tensors.as_mut_ptr(), v.c_tensor, g.c_tensor, dim));
@@ -8909,6 +8967,36 @@ impl Tensor {
         Ok(Tensor { c_tensor: c_tensors[0] })
     }
 
+    pub fn f_all_dims(&self, dim: impl IntListOption, keepdim: bool) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_all_dims(
+            c_tensors.as_mut_ptr(),
+            self.c_tensor,
+            dim.as_ptr(),
+            dim.len_i32(),
+            if keepdim { 1 } else { 0 }
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_all_dims_out(
+        &self,
+        out: &Tensor,
+        dim: impl IntListOption,
+        keepdim: bool,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_all_dims_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            self.c_tensor,
+            dim.as_ptr(),
+            dim.len_i32(),
+            if keepdim { 1 } else { 0 }
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_all_out(&self, out: &Tensor, dim: i64, keepdim: bool) -> Result<Tensor, TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 1];
         unsafe_torch_err!(atg_all_out(
@@ -9096,6 +9184,36 @@ impl Tensor {
         Ok(Tensor { c_tensor: c_tensors[0] })
     }
 
+    pub fn f_any_dims(&self, dim: impl IntListOption, keepdim: bool) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_any_dims(
+            c_tensors.as_mut_ptr(),
+            self.c_tensor,
+            dim.as_ptr(),
+            dim.len_i32(),
+            if keepdim { 1 } else { 0 }
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_any_dims_out(
+        &self,
+        out: &Tensor,
+        dim: impl IntListOption,
+        keepdim: bool,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_any_dims_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            self.c_tensor,
+            dim.as_ptr(),
+            dim.len_i32(),
+            if keepdim { 1 } else { 0 }
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_any_out(&self, out: &Tensor, dim: i64, keepdim: bool) -> Result<Tensor, TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 1];
         unsafe_torch_err!(atg_any_out(
@@ -16670,6 +16788,21 @@ impl Tensor {
         Ok(Tensor { c_tensor: c_tensors[0] })
     }
 
+    pub fn f_floor_divide_scalar_out<S: Into<Scalar>>(
+        &self,
+        out: &Tensor,
+        other: S,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_floor_divide_scalar_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            self.c_tensor,
+            other.into().c_scalar
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_floor_out(&self, out: &Tensor) -> Result<Tensor, TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 1];
         unsafe_torch_err!(atg_floor_out(c_tensors.as_mut_ptr(), out.c_tensor, self.c_tensor));
@@ -21547,6 +21680,111 @@ impl Tensor {
         Ok(Tensor { c_tensor: c_tensors[0] })
     }
 
+    pub fn f_linspace_scalar_tensor<S: Into<Scalar>>(
+        start: S,
+        end: &Tensor,
+        steps: i64,
+        options: (Kind, Device),
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_linspace_scalar_tensor(
+            c_tensors.as_mut_ptr(),
+            start.into().c_scalar,
+            end.c_tensor,
+            steps,
+            options.0.c_int(),
+            options.1.c_int()
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_linspace_scalar_tensor_out<S: Into<Scalar>>(
+        out: &Tensor,
+        start: S,
+        end: &Tensor,
+        steps: i64,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_linspace_scalar_tensor_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            start.into().c_scalar,
+            end.c_tensor,
+            steps
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_linspace_tensor_scalar<S: Into<Scalar>>(
+        start: &Tensor,
+        end: S,
+        steps: i64,
+        options: (Kind, Device),
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_linspace_tensor_scalar(
+            c_tensors.as_mut_ptr(),
+            start.c_tensor,
+            end.into().c_scalar,
+            steps,
+            options.0.c_int(),
+            options.1.c_int()
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_linspace_tensor_scalar_out<S: Into<Scalar>>(
+        out: &Tensor,
+        start: &Tensor,
+        end: S,
+        steps: i64,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_linspace_tensor_scalar_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            start.c_tensor,
+            end.into().c_scalar,
+            steps
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_linspace_tensor_tensor(
+        start: &Tensor,
+        end: &Tensor,
+        steps: i64,
+        options: (Kind, Device),
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_linspace_tensor_tensor(
+            c_tensors.as_mut_ptr(),
+            start.c_tensor,
+            end.c_tensor,
+            steps,
+            options.0.c_int(),
+            options.1.c_int()
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_linspace_tensor_tensor_out(
+        out: &Tensor,
+        start: &Tensor,
+        end: &Tensor,
+        steps: i64,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_linspace_tensor_tensor_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            start.c_tensor,
+            end.c_tensor,
+            steps
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_log(&self) -> Result<Tensor, TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 1];
         unsafe_torch_err!(atg_log(c_tensors.as_mut_ptr(), self.c_tensor));
@@ -21979,6 +22217,123 @@ impl Tensor {
         Ok(Tensor { c_tensor: c_tensors[0] })
     }
 
+    pub fn f_logspace_scalar_tensor<S: Into<Scalar>>(
+        start: S,
+        end: &Tensor,
+        steps: i64,
+        base: f64,
+        options: (Kind, Device),
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_logspace_scalar_tensor(
+            c_tensors.as_mut_ptr(),
+            start.into().c_scalar,
+            end.c_tensor,
+            steps,
+            base,
+            options.0.c_int(),
+            options.1.c_int()
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_logspace_scalar_tensor_out<S: Into<Scalar>>(
+        out: &Tensor,
+        start: S,
+        end: &Tensor,
+        steps: i64,
+        base: f64,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_logspace_scalar_tensor_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            start.into().c_scalar,
+            end.c_tensor,
+            steps,
+            base
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_logspace_tensor_scalar<S: Into<Scalar>>(
+        start: &Tensor,
+        end: S,
+        steps: i64,
+        base: f64,
+        options: (Kind, Device),
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_logspace_tensor_scalar(
+            c_tensors.as_mut_ptr(),
+            start.c_tensor,
+            end.into().c_scalar,
+            steps,
+            base,
+            options.0.c_int(),
+            options.1.c_int()
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_logspace_tensor_scalar_out<S: Into<Scalar>>(
+        out: &Tensor,
+        start: &Tensor,
+        end: S,
+        steps: i64,
+        base: f64,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_logspace_tensor_scalar_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            start.c_tensor,
+            end.into().c_scalar,
+            steps,
+            base
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_logspace_tensor_tensor(
+        start: &Tensor,
+        end: &Tensor,
+        steps: i64,
+        base: f64,
+        options: (Kind, Device),
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_logspace_tensor_tensor(
+            c_tensors.as_mut_ptr(),
+            start.c_tensor,
+            end.c_tensor,
+            steps,
+            base,
+            options.0.c_int(),
+            options.1.c_int()
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
+    pub fn f_logspace_tensor_tensor_out(
+        out: &Tensor,
+        start: &Tensor,
+        end: &Tensor,
+        steps: i64,
+        base: f64,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_logspace_tensor_tensor_out(
+            c_tensors.as_mut_ptr(),
+            out.c_tensor,
+            start.c_tensor,
+            end.c_tensor,
+            steps,
+            base
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_logsumexp(&self, dim: impl IntList, keepdim: bool) -> Result<Tensor, TchError> {
         let mut c_tensors = [std::ptr::null_mut(); 1];
         unsafe_torch_err!(atg_logsumexp(
@@ -22405,6 +22760,22 @@ impl Tensor {
         Ok(Tensor { c_tensor: c_tensors[0] })
     }
 
+    pub fn f_masked_scatter_backward(
+        grad_output: &Tensor,
+        mask: &Tensor,
+        sizes: impl IntList,
+    ) -> Result<Tensor, TchError> {
+        let mut c_tensors = [std::ptr::null_mut(); 1];
+        unsafe_torch_err!(atg_masked_scatter_backward(
+            c_tensors.as_mut_ptr(),
+            grad_output.c_tensor,
+            mask.c_tensor,
+            sizes.as_ptr(),
+            sizes.len_i32()
+        ));
+        Ok(Tensor { c_tensor: c_tensors[0] })
+    }
+
     pub fn f_masked_scatter_out(
         &self,
         out: &Tensor,
diff --git a/src/wrappers/tensor_generated.rs b/src/wrappers/tensor_generated.rs
index eb392928..a4471129 100644
--- a/src/wrappers/tensor_generated.rs
+++ b/src/wrappers/tensor_generated.rs
@@ -506,6 +506,10 @@ impl Tensor {
         .unwrap()
     }
 
+    pub fn internal_convert_weight_to_int4pack(&self, innerktiles: i64) -> Tensor {
+        self.f_internal_convert_weight_to_int4pack(innerktiles).unwrap()
+    }
+
     pub fn internal_convolution<T: Borrow<Tensor>>(
         &self,
         weight: &Tensor,
@@ -638,9 +642,19 @@ impl Tensor {
         compressed_a: &Tensor,
         dense_b: &Tensor,
         bias: Option<T>,
+        alpha: Option<T>,
+        out_dtype: impl Into<Option<Kind>>,
         transpose_result: bool,
     ) -> Tensor {
-        Tensor::f_internal_cslt_sparse_mm(compressed_a, dense_b, bias, transpose_result).unwrap()
+        Tensor::f_internal_cslt_sparse_mm(
+            compressed_a,
+            dense_b,
+            bias,
+            alpha,
+            out_dtype,
+            transpose_result,
+        )
+        .unwrap()
     }
 
     pub fn internal_ctc_loss(
@@ -1065,8 +1079,8 @@ impl Tensor {
         out: &Tensor,
         cu_seqlens_q: Option<T>,
         cu_seqlens_k: Option<T>,
-        max_seqlen_k: i64,
         max_seqlen_q: i64,
+        max_seqlen_k: i64,
         logsumexp: &Tensor,
         dropout_p: f64,
         philox_seed: &Tensor,
@@ -1085,8 +1099,8 @@ impl Tensor {
             out,
             cu_seqlens_q,
             cu_seqlens_k,
-            max_seqlen_k,
             max_seqlen_q,
+            max_seqlen_k,
             logsumexp,
             dropout_p,
             philox_seed,
@@ -2367,6 +2381,16 @@ impl Tensor {
         self.f_internal_masked_softmax_out(out, mask, dim, mask_type).unwrap()
     }
 
+    pub fn internal_mixed_dtypes_linear<T: Borrow<Tensor>>(
+        &self,
+        weight: &Tensor,
+        scale: &Tensor,
+        bias: Option<T>,
+        activation: &str,
+    ) -> Tensor {
+        self.f_internal_mixed_dtypes_linear(weight, scale, bias, activation).unwrap()
+    }
+
     pub fn internal_mkldnn_reshape(&self, shape: impl IntList) -> Tensor {
         self.f_internal_mkldnn_reshape(shape).unwrap()
     }
@@ -3078,8 +3102,18 @@ impl Tensor {
         scale_a: Option<T>,
         scale_b: Option<T>,
         scale_result: Option<T>,
+        use_fast_accum: bool,
     ) -> (Tensor, Tensor) {
-        self.f_internal_scaled_mm(mat2, bias, out_dtype, scale_a, scale_b, scale_result).unwrap()
+        self.f_internal_scaled_mm(
+            mat2,
+            bias,
+            out_dtype,
+            scale_a,
+            scale_b,
+            scale_result,
+            use_fast_accum,
+        )
+        .unwrap()
     }
 
     pub fn internal_scaled_mm_out<T: Borrow<Tensor>>(
@@ -3092,6 +3126,7 @@ impl Tensor {
         scale_a: Option<T>,
         scale_b: Option<T>,
         scale_result: Option<T>,
+        use_fast_accum: bool,
     ) -> (Tensor, Tensor) {
         self.f_internal_scaled_mm_out(
             out,
@@ -3102,6 +3137,7 @@ impl Tensor {
             scale_a,
             scale_b,
             scale_result,
+            use_fast_accum,
         )
         .unwrap()
     }
@@ -4663,6 +4699,15 @@ impl Tensor {
         self.f_internal_version().unwrap()
     }
 
+    pub fn internal_weight_int4pack_mm(
+        &self,
+        mat2: &Tensor,
+        qgroupsize: i64,
+        qscaleandzeros: &Tensor,
+    ) -> Tensor {
+        self.f_internal_weight_int4pack_mm(mat2, qgroupsize, qscaleandzeros).unwrap()
+    }
+
     pub fn internal_weight_norm(v: &Tensor, g: &Tensor, dim: i64) -> Tensor {
         Tensor::f_internal_weight_norm(v, g, dim).unwrap()
     }
@@ -5025,6 +5070,14 @@ impl Tensor {
         self.f_all_dim(dim, keepdim).unwrap()
     }
 
+    pub fn all_dims(&self, dim: impl IntListOption, keepdim: bool) -> Tensor {
+        self.f_all_dims(dim, keepdim).unwrap()
+    }
+
+    pub fn all_dims_out(&self, out: &Tensor, dim: impl IntListOption, keepdim: bool) -> Tensor {
+        self.f_all_dims_out(out, dim, keepdim).unwrap()
+    }
+
     pub fn all_out(&self, out: &Tensor, dim: i64, keepdim: bool) -> Tensor {
         self.f_all_out(out, dim, keepdim).unwrap()
     }
@@ -5091,6 +5144,14 @@ impl Tensor {
         self.f_any_dim(dim, keepdim).unwrap()
     }
 
+    pub fn any_dims(&self, dim: impl IntListOption, keepdim: bool) -> Tensor {
+        self.f_any_dims(dim, keepdim).unwrap()
+    }
+
+    pub fn any_dims_out(&self, out: &Tensor, dim: impl IntListOption, keepdim: bool) -> Tensor {
+        self.f_any_dims_out(out, dim, keepdim).unwrap()
+    }
+
     pub fn any_out(&self, out: &Tensor, dim: i64, keepdim: bool) -> Tensor {
         self.f_any_out(out, dim, keepdim).unwrap()
     }
@@ -8716,6 +8777,10 @@ impl Tensor {
         self.f_floor_divide_scalar_(other).unwrap()
     }
 
+    pub fn floor_divide_scalar_out<S: Into<Scalar>>(&self, out: &Tensor, other: S) -> Tensor {
+        self.f_floor_divide_scalar_out(out, other).unwrap()
+    }
+
     pub fn floor_out(&self, out: &Tensor) -> Tensor {
         self.f_floor_out(out).unwrap()
     }
@@ -10980,6 +11045,60 @@ impl Tensor {
         Tensor::f_linspace_out(out, start, end, steps).unwrap()
     }
 
+    pub fn linspace_scalar_tensor<S: Into<Scalar>>(
+        start: S,
+        end: &Tensor,
+        steps: i64,
+        options: (Kind, Device),
+    ) -> Tensor {
+        Tensor::f_linspace_scalar_tensor(start, end, steps, options).unwrap()
+    }
+
+    pub fn linspace_scalar_tensor_out<S: Into<Scalar>>(
+        out: &Tensor,
+        start: S,
+        end: &Tensor,
+        steps: i64,
+    ) -> Tensor {
+        Tensor::f_linspace_scalar_tensor_out(out, start, end, steps).unwrap()
+    }
+
+    pub fn linspace_tensor_scalar<S: Into<Scalar>>(
+        start: &Tensor,
+        end: S,
+        steps: i64,
+        options: (Kind, Device),
+    ) -> Tensor {
+        Tensor::f_linspace_tensor_scalar(start, end, steps, options).unwrap()
+    }
+
+    pub fn linspace_tensor_scalar_out<S: Into<Scalar>>(
+        out: &Tensor,
+        start: &Tensor,
+        end: S,
+        steps: i64,
+    ) -> Tensor {
+        Tensor::f_linspace_tensor_scalar_out(out, start, end, steps).unwrap()
+    }
+
+    pub fn linspace_tensor_tensor(
+        start: &Tensor,
+        end: &Tensor,
+        steps: i64,
+        options: (Kind, Device),
+    ) -> Tensor {
+        Tensor::f_linspace_tensor_tensor(start, end, steps, options).unwrap()
+    }
+
+    pub fn linspace_tensor_tensor_out(
+        out: &Tensor,
+        start: &Tensor,
+        end: &Tensor,
+        steps: i64,
+    ) -> Tensor {
+        Tensor::f_linspace_tensor_tensor_out(out, start, end, steps).unwrap()
+    }
+
     pub fn log(&self) -> Tensor {
         self.f_log().unwrap()
     }
@@ -11195,6 +11314,66 @@ impl Tensor {
         Tensor::f_logspace_out(out, start, end, steps, base).unwrap()
     }
 
+    pub fn logspace_scalar_tensor<S: Into<Scalar>>(
+        start: S,
+        end: &Tensor,
+        steps: i64,
+        base: f64,
+        options: (Kind, Device),
+    ) -> Tensor {
+        Tensor::f_logspace_scalar_tensor(start, end, steps, base, options).unwrap()
+    }
+
+    pub fn logspace_scalar_tensor_out<S: Into<Scalar>>(
+        out: &Tensor,
+        start: S,
+        end: &Tensor,
+        steps: i64,
+        base: f64,
+    ) -> Tensor {
+        Tensor::f_logspace_scalar_tensor_out(out, start, end, steps, base).unwrap()
+    }
+
+    pub fn logspace_tensor_scalar<S: Into<Scalar>>(
+        start: &Tensor,
+        end: S,
+        steps: i64,
+        base: f64,
+        options: (Kind, Device),
+    ) -> Tensor {
+        Tensor::f_logspace_tensor_scalar(start, end, steps, base, options).unwrap()
+    }
+
+    pub fn logspace_tensor_scalar_out<S: Into<Scalar>>(
+        out: &Tensor,
+        start: &Tensor,
+        end: S,
+        steps: i64,
+        base: f64,
+    ) -> Tensor {
+        Tensor::f_logspace_tensor_scalar_out(out, start, end, steps, base).unwrap()
+    }
+
+    pub fn logspace_tensor_tensor(
+        start: &Tensor,
+        end: &Tensor,
+        steps: i64,
+        base: f64,
+        options: (Kind, Device),
+    ) -> Tensor {
+        Tensor::f_logspace_tensor_tensor(start, end, steps, base, options).unwrap()
+    }
+
+    pub fn logspace_tensor_tensor_out(
+        out: &Tensor,
+        start: &Tensor,
+        end: &Tensor,
+        steps: i64,
+        base: f64,
+    ) -> Tensor {
+        Tensor::f_logspace_tensor_tensor_out(out, start, end, steps, base).unwrap()
+    }
+
     pub fn logsumexp(&self, dim: impl IntList, keepdim: bool) -> Tensor {
         self.f_logsumexp(dim, keepdim).unwrap()
     }
@@ -11396,6 +11575,14 @@ impl Tensor {
         self.f_masked_scatter_(mask, source).unwrap()
     }
 
+    pub fn masked_scatter_backward(
+        grad_output: &Tensor,
+        mask: &Tensor,
+        sizes: impl IntList,
+    ) -> Tensor {
+        Tensor::f_masked_scatter_backward(grad_output, mask, sizes).unwrap()
+    }
+
     pub fn masked_scatter_out(&self, out: &Tensor, mask: &Tensor, source: &Tensor) -> Tensor {
         self.f_masked_scatter_out(out, mask, source).unwrap()
     }
diff --git a/torch-sys/Cargo.toml b/torch-sys/Cargo.toml
index cf19f445..5990458d 100644
--- a/torch-sys/Cargo.toml
+++ b/torch-sys/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "torch-sys"
-version = "0.14.0"
+version = "0.15.0"
 authors = ["Laurent Mazare <lmazare@gmail.com>"]
 edition = "2021"
 build = "build.rs"
diff --git a/torch-sys/build.rs b/torch-sys/build.rs
index 9ba49a32..c66087ca 100644
--- a/torch-sys/build.rs
+++ b/torch-sys/build.rs
@@ -10,7 +10,7 @@ use anyhow::{Context, Result};
 use std::path::{Path, PathBuf};
 use std::{env, fs, io};
 
-const TORCH_VERSION: &str = "2.1.0";
+const TORCH_VERSION: &str = "2.2.0";
 const PYTHON_PRINT_PYTORCH_DETAILS: &str = r"
 import torch
 from torch.utils import cpp_extension
@@ -158,7 +158,7 @@ fn version_check(version: &str) -> Result<()> {
         return Ok(());
     }
     let version = version.trim();
-    // Typical version number is 2.1.0+cpu or 2.1.0+cu117
+    // Typical version number is 2.2.0+cpu or 2.2.0+cu121
     let version = match version.split_once('+') {
         None => version,
         Some((version, _)) => version,
@@ -312,11 +312,8 @@ impl SystemInfo {
                     "https://download.pytorch.org/libtorch/{}/libtorch-cxx11-abi-shared-with-deps-{}{}.zip",
                     device, TORCH_VERSION, match device.as_ref() {
                         "cpu" => "%2Bcpu",
-                        "cu102" => "%2Bcu102",
-                        "cu113" => "%2Bcu113",
-                        "cu116" => "%2Bcu116",
-                        "cu117" => "%2Bcu117",
                         "cu118" => "%2Bcu118",
+                        "cu121" => "%2Bcu121",
                         _ => anyhow::bail!("unsupported device {device}, TORCH_CUDA_VERSION may be set incorrectly?"),
                     }
                 ),
@@ -331,18 +328,15 @@ impl SystemInfo {
                             export DYLD_LIBRARY_PATH=${{LIBTORCH}}/lib
                             ")
                     } else {
-                        format!("https://download.pytorch.org/libtorch/cpu/libtorch-macos-{TORCH_VERSION}.zip")
+                        format!("https://download.pytorch.org/libtorch/cpu/libtorch-macos-x86_64-{TORCH_VERSION}.zip")
                     }
                 },
                 Os::Windows => format!(
                     "https://download.pytorch.org/libtorch/{}/libtorch-win-shared-with-deps-{}{}.zip",
                     device, TORCH_VERSION, match device.as_ref() {
                         "cpu" => "%2Bcpu",
-                        "cu102" => "%2Bcu102",
-                        "cu113" => "%2Bcu113",
-                        "cu116" => "%2Bcu116",
-                        "cu117" => "%2Bcu117",
                         "cu118" => "%2Bcu118",
+                        "cu121" => "%2Bcu121",
                         _ => ""
                     }),
             };
diff --git a/torch-sys/libtch/torch_api_generated.cpp b/torch-sys/libtch/torch_api_generated.cpp
index b293e9fe..bbf5d251 100644
--- a/torch-sys/libtch/torch_api_generated.cpp
+++ b/torch-sys/libtch/torch_api_generated.cpp
@@ -572,6 +572,13 @@ void atg__convert_indices_from_csr_to_coo_out(tensor *out__, tensor out, tensor
   )
 }
 
+void atg__convert_weight_to_int4pack(tensor *out__, tensor self, int64_t innerKTiles) {
+  PROTECT(
+    auto outputs__ = torch::_convert_weight_to_int4pack(*self, innerKTiles);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg__convolution(tensor *out__, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32) {
   PROTECT(
     auto outputs__ = torch::_convolution(*input, *weight, (bias ? *bias : torch::Tensor()), torch::IntArrayRef(stride_data, stride_len), torch::IntArrayRef(padding_data, padding_len), torch::IntArrayRef(dilation_data, dilation_len), (bool)transposed, torch::IntArrayRef(output_padding_data, output_padding_len), groups, (bool)benchmark, (bool)deterministic, (bool)cudnn_enabled, (bool)allow_tf32);
@@ -635,9 +642,9 @@ void atg__cslt_compress(tensor *out__, tensor input) {
   )
 }
 
-void atg__cslt_sparse_mm(tensor *out__, tensor compressed_A, tensor dense_B, tensor bias, int transpose_result) {
+void atg__cslt_sparse_mm(tensor *out__, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result) {
   PROTECT(
-    auto outputs__ = torch::_cslt_sparse_mm(*compressed_A, *dense_B, (bias ? *bias : torch::Tensor()), (bool)transpose_result);
+    auto outputs__ = torch::_cslt_sparse_mm(*compressed_A, *dense_B, (bias ? *bias : torch::Tensor()), (alpha ? *alpha : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional<at::ScalarType>(at::ScalarType(out_dtype)), (bool)transpose_result);
     out__[0] = new torch::Tensor(outputs__);
   )
 }
@@ -811,9 +818,9 @@ void atg__dirichlet_grad_out(tensor *out__, tensor out, tensor x, tensor alpha,
   )
 }
 
-void atg__efficient_attention_backward(tensor *out__, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_k, int64_t max_seqlen_q, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null) {
+void atg__efficient_attention_backward(tensor *out__, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_q, int64_t max_seqlen_k, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null) {
   PROTECT(
-    auto outputs__ = torch::_efficient_attention_backward(*grad_out_, *query, *key, *value, (bias ? *bias : torch::Tensor()), *out, (cu_seqlens_q ? *cu_seqlens_q : torch::Tensor()), (cu_seqlens_k ? *cu_seqlens_k : torch::Tensor()), max_seqlen_k, max_seqlen_q, *logsumexp, dropout_p, *philox_seed, *philox_offset, custom_mask_type, (bool)bias_requires_grad, scale_null ? c10::nullopt : c10::optional<double>(scale_v), num_splits_key_null ? c10::nullopt : c10::optional<int64_t>(num_splits_key_v));
+    auto outputs__ = torch::_efficient_attention_backward(*grad_out_, *query, *key, *value, (bias ? *bias : torch::Tensor()), *out, (cu_seqlens_q ? *cu_seqlens_q : torch::Tensor()), (cu_seqlens_k ? *cu_seqlens_k : torch::Tensor()), max_seqlen_q, max_seqlen_k, *logsumexp, dropout_p, *philox_seed, *philox_offset, custom_mask_type, (bool)bias_requires_grad, scale_null ? c10::nullopt : c10::optional<double>(scale_v), num_splits_key_null ? c10::nullopt : c10::optional<int64_t>(num_splits_key_v));
     out__[0] = new torch::Tensor(std::get<0>(outputs__));
     out__[1] = new torch::Tensor(std::get<1>(outputs__));
     out__[2] = new torch::Tensor(std::get<2>(outputs__));
@@ -1624,6 +1631,13 @@ void atg__masked_softmax_out(tensor *out__, tensor out, tensor self, tensor mask
   )
 }
 
+void atg__mixed_dtypes_linear(tensor *out__, tensor input, tensor weight, tensor scale, tensor bias, char* activation_ptr, int activation_len) {
+  PROTECT(
+    auto outputs__ = torch::_mixed_dtypes_linear(*input, *weight, *scale, (bias ? *bias : torch::Tensor()), std::string(activation_ptr, activation_len));
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg__mkldnn_reshape(tensor *out__, tensor self, int64_t *shape_data, int shape_len) {
   PROTECT(
     auto outputs__ = torch::_mkldnn_reshape(*self, torch::IntArrayRef(shape_data, shape_len));
@@ -2107,17 +2121,17 @@ void atg__scaled_dot_product_flash_attention_backward(tensor *out__, tensor grad
   )
 }
 
-void atg__scaled_mm(tensor *out__, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result) {
+void atg__scaled_mm(tensor *out__, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum) {
   PROTECT(
-    auto outputs__ = torch::_scaled_mm(*self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional<at::ScalarType>(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor()));
+    auto outputs__ = torch::_scaled_mm(*self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional<at::ScalarType>(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor()), (bool)use_fast_accum);
     out__[0] = new torch::Tensor(std::get<0>(outputs__));
     out__[1] = new torch::Tensor(std::get<1>(outputs__));
   )
 }
 
-void atg__scaled_mm_out(tensor *out__, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result) {
+void atg__scaled_mm_out(tensor *out__, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum) {
   PROTECT(
-    auto outputs__ = torch::_scaled_mm_out(*out, *out_amax, *self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional<at::ScalarType>(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor()));
+    auto outputs__ = torch::_scaled_mm_out(*out, *out_amax, *self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional<at::ScalarType>(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor()), (bool)use_fast_accum);
     out__[0] = new torch::Tensor(std::get<0>(outputs__));
     out__[1] = new torch::Tensor(std::get<1>(outputs__));
   )
@@ -3280,6 +3294,13 @@ int64_t atg__version(tensor self) {
   return 0;
 }
 
+void atg__weight_int4pack_mm(tensor *out__, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros) {
+  PROTECT(
+    auto outputs__ = torch::_weight_int4pack_mm(*self, *mat2, qGroupSize, *qScaleAndZeros);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg__weight_norm(tensor *out__, tensor v, tensor g, int64_t dim) {
   PROTECT(
     auto outputs__ = torch::_weight_norm(*v, *g, dim);
@@ -3779,6 +3800,20 @@ void atg_all_dim(tensor *out__, tensor self, int64_t dim, int keepdim) {
   )
 }
 
+void atg_all_dims(tensor *out__, tensor self, int64_t *dim_data, int dim_len, int keepdim) {
+  PROTECT(
+    auto outputs__ = torch::all(*self, dim_data == nullptr ? c10::nullopt : c10::optional<torch::IntArrayRef>(torch::IntArrayRef(dim_data, dim_len)), (bool)keepdim);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_all_dims_out(tensor *out__, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim) {
+  PROTECT(
+    auto outputs__ = torch::all_out(*out, *self, dim_data == nullptr ? c10::nullopt : c10::optional<torch::IntArrayRef>(torch::IntArrayRef(dim_data, dim_len)), (bool)keepdim);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg_all_out(tensor *out__, tensor out, tensor self, int64_t dim, int keepdim) {
   PROTECT(
     auto outputs__ = torch::all_out(*out, *self, dim, (bool)keepdim);
@@ -3886,6 +3921,20 @@ void atg_any_dim(tensor *out__, tensor self, int64_t dim, int keepdim) {
   )
 }
 
+void atg_any_dims(tensor *out__, tensor self, int64_t *dim_data, int dim_len, int keepdim) {
+  PROTECT(
+    auto outputs__ = torch::any(*self, dim_data == nullptr ? c10::nullopt : c10::optional<torch::IntArrayRef>(torch::IntArrayRef(dim_data, dim_len)), (bool)keepdim);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_any_dims_out(tensor *out__, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim) {
+  PROTECT(
+    auto outputs__ = torch::any_out(*out, *self, dim_data == nullptr ? c10::nullopt : c10::optional<torch::IntArrayRef>(torch::IntArrayRef(dim_data, dim_len)), (bool)keepdim);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg_any_out(tensor *out__, tensor out, tensor self, int64_t dim, int keepdim) {
   PROTECT(
     auto outputs__ = torch::any_out(*out, *self, dim, (bool)keepdim);
@@ -7784,6 +7833,13 @@ void atg_floor_divide_scalar_(tensor *out__, tensor self, scalar other) {
   )
 }
 
+void atg_floor_divide_scalar_out(tensor *out__, tensor out, tensor self, scalar other) {
+  PROTECT(
+    auto outputs__ = torch::floor_divide_out(*out, *self, *other);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg_floor_out(tensor *out__, tensor out, tensor self) {
   PROTECT(
     auto outputs__ = torch::floor_out(*out, *self);
@@ -10491,6 +10547,48 @@ void atg_linspace_out(tensor *out__, tensor out, scalar start, scalar end, int64
   )
 }
 
+void atg_linspace_scalar_tensor(tensor *out__, scalar start, tensor end, int64_t steps, int options_kind, int options_device) {
+  PROTECT(
+    auto outputs__ = torch::linspace(*start, *end, steps, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind)));
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_linspace_scalar_tensor_out(tensor *out__, tensor out, scalar start, tensor end, int64_t steps) {
+  PROTECT(
+    auto outputs__ = torch::linspace_out(*out, *start, *end, steps);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_linspace_tensor_scalar(tensor *out__, tensor start, scalar end, int64_t steps, int options_kind, int options_device) {
+  PROTECT(
+    auto outputs__ = torch::linspace(*start, *end, steps, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind)));
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_linspace_tensor_scalar_out(tensor *out__, tensor out, tensor start, scalar end, int64_t steps) {
+  PROTECT(
+    auto outputs__ = torch::linspace_out(*out, *start, *end, steps);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_linspace_tensor_tensor(tensor *out__, tensor start, tensor end, int64_t steps, int options_kind, int options_device) {
+  PROTECT(
+    auto outputs__ = torch::linspace(*start, *end, steps, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind)));
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_linspace_tensor_tensor_out(tensor *out__, tensor out, tensor start, tensor end, int64_t steps) {
+  PROTECT(
+    auto outputs__ = torch::linspace_out(*out, *start, *end, steps);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg_log(tensor *out__, tensor self) {
   PROTECT(
     auto outputs__ = torch::log(*self);
@@ -10820,6 +10918,48 @@ void atg_logspace_out(tensor *out__, tensor out, scalar start, scalar end, int64
   )
 }
 
+void atg_logspace_scalar_tensor(tensor *out__, scalar start, tensor end, int64_t steps, double base, int options_kind, int options_device) {
+  PROTECT(
+    auto outputs__ = torch::logspace(*start, *end, steps, base, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind)));
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_logspace_scalar_tensor_out(tensor *out__, tensor out, scalar start, tensor end, int64_t steps, double base) {
+  PROTECT(
+    auto outputs__ = torch::logspace_out(*out, *start, *end, steps, base);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_logspace_tensor_scalar(tensor *out__, tensor start, scalar end, int64_t steps, double base, int options_kind, int options_device) {
+  PROTECT(
+    auto outputs__ = torch::logspace(*start, *end, steps, base, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind)));
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_logspace_tensor_scalar_out(tensor *out__, tensor out, tensor start, scalar end, int64_t steps, double base) {
+  PROTECT(
+    auto outputs__ = torch::logspace_out(*out, *start, *end, steps, base);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_logspace_tensor_tensor(tensor *out__, tensor start, tensor end, int64_t steps, double base, int options_kind, int options_device) {
+  PROTECT(
+    auto outputs__ = torch::logspace(*start, *end, steps, base, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind)));
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
+void atg_logspace_tensor_tensor_out(tensor *out__, tensor out, tensor start, tensor end, int64_t steps, double base) {
+  PROTECT(
+    auto outputs__ = torch::logspace_out(*out, *start, *end, steps, base);
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg_logsumexp(tensor *out__, tensor self, int64_t *dim_data, int dim_len, int keepdim) {
   PROTECT(
     auto outputs__ = torch::logsumexp(*self, torch::IntArrayRef(dim_data, dim_len), (bool)keepdim);
@@ -11003,6 +11143,13 @@ void atg_masked_scatter_(tensor *out__, tensor self, tensor mask, tensor source)
   )
 }
 
+void atg_masked_scatter_backward(tensor *out__, tensor grad_output, tensor mask, int64_t *sizes_data, int sizes_len) {
+  PROTECT(
+    auto outputs__ = torch::masked_scatter_backward(*grad_output, *mask, torch::IntArrayRef(sizes_data, sizes_len));
+    out__[0] = new torch::Tensor(outputs__);
+  )
+}
+
 void atg_masked_scatter_out(tensor *out__, tensor out, tensor self, tensor mask, tensor source) {
   PROTECT(
     auto outputs__ = torch::masked_scatter_out(*out, *self, *mask, *source);
diff --git a/torch-sys/libtch/torch_api_generated.h b/torch-sys/libtch/torch_api_generated.h
index e8a1caaa..97beb08c 100644
--- a/torch-sys/libtch/torch_api_generated.h
+++ b/torch-sys/libtch/torch_api_generated.h
@@ -83,6 +83,7 @@ void atg__convert_indices_from_coo_to_csr(tensor *, tensor self, int64_t size, i
 void atg__convert_indices_from_coo_to_csr_out(tensor *, tensor out, tensor self, int64_t size, int out_int32);
 void atg__convert_indices_from_csr_to_coo(tensor *, tensor crow_indices, tensor col_indices, int out_int32, int transpose);
 void atg__convert_indices_from_csr_to_coo_out(tensor *, tensor out, tensor crow_indices, tensor col_indices, int out_int32, int transpose);
+void atg__convert_weight_to_int4pack(tensor *, tensor self, int64_t innerKTiles);
 void atg__convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32);
 void atg__convolution_deprecated(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled);
 void atg__convolution_mode(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
@@ -92,7 +93,7 @@ void atg__copy_from_and_resize(tensor *, tensor self, tensor dst);
 void atg__copy_from_and_resize_out(tensor *, tensor out, tensor self, tensor dst);
 void atg__copy_from_out(tensor *, tensor out, tensor self, tensor dst, int non_blocking);
 void atg__cslt_compress(tensor *, tensor input);
-void atg__cslt_sparse_mm(tensor *, tensor compressed_A, tensor dense_B, tensor bias, int transpose_result);
+void atg__cslt_sparse_mm(tensor *, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result);
 void atg__ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity);
 void atg__ctc_loss_backward(tensor *, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity);
 void atg__ctc_loss_backward_out(tensor *, tensor out, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity);
@@ -115,7 +116,7 @@ int64_t atg__dimi(tensor self);
 int64_t atg__dimv(tensor self);
 void atg__dirichlet_grad(tensor *, tensor x, tensor alpha, tensor total);
 void atg__dirichlet_grad_out(tensor *, tensor out, tensor x, tensor alpha, tensor total);
-void atg__efficient_attention_backward(tensor *, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_k, int64_t max_seqlen_q, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null);
+void atg__efficient_attention_backward(tensor *, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_q, int64_t max_seqlen_k, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null);
 void atg__efficientzerotensor(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
 void atg__efficientzerotensor_out(tensor *, tensor out, int64_t *size_data, int size_len);
 void atg__embedding_bag(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
@@ -221,6 +222,7 @@ void atg__masked_softmax(tensor *, tensor self, tensor mask, int64_t dim_v, uint
 void atg__masked_softmax_backward(tensor *, tensor grad_output, tensor output, tensor mask, int64_t dim_v, uint8_t dim_null);
 void atg__masked_softmax_backward_out(tensor *, tensor out, tensor grad_output, tensor output, tensor mask, int64_t dim_v, uint8_t dim_null);
 void atg__masked_softmax_out(tensor *, tensor out, tensor self, tensor mask, int64_t dim_v, uint8_t dim_null, int64_t mask_type_v, uint8_t mask_type_null);
+void atg__mixed_dtypes_linear(tensor *, tensor input, tensor weight, tensor scale, tensor bias, char* activation_ptr, int activation_len);
 void atg__mkldnn_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len);
 void atg__mkldnn_reshape_out(tensor *, tensor out, tensor self, int64_t *shape_data, int shape_len);
 void atg__mkldnn_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1);
@@ -286,8 +288,8 @@ void atg__saturate_weight_to_fp16(tensor *, tensor weight);
 void atg__scaled_dot_product_attention_math(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, tensor dropout_mask, double scale_v, uint8_t scale_null);
 void atg__scaled_dot_product_efficient_attention(tensor *, tensor query, tensor key, tensor value, tensor attn_bias, int compute_log_sumexp, double dropout_p, int is_causal, double scale_v, uint8_t scale_null);
 void atg__scaled_dot_product_flash_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, tensor philox_seed, tensor philox_offset, double scale_v, uint8_t scale_null);
-void atg__scaled_mm(tensor *, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result);
-void atg__scaled_mm_out(tensor *, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result);
+void atg__scaled_mm(tensor *, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum);
+void atg__scaled_mm_out(tensor *, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum);
 void atg__scatter_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
 void atg__scatter_reduce_(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
 void atg__scatter_reduce_two_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
@@ -451,6 +453,7 @@ void atg__values(tensor *, tensor self);
 void atg__values_copy(tensor *, tensor self);
 void atg__values_copy_out(tensor *, tensor out, tensor self);
 int64_t atg__version(tensor self);
+void atg__weight_int4pack_mm(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros);
 void atg__weight_norm(tensor *, tensor v, tensor g, int64_t dim);
 void atg__weight_norm_differentiable_backward(tensor *, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim);
 void atg__weight_norm_interface(tensor *, tensor v, tensor g, int64_t dim);
@@ -520,6 +523,8 @@ tensor *atg_align_tensors(tensor *tensors_data, int tensors_len);
 void atg_all(tensor *, tensor self);
 void atg_all_all_out(tensor *, tensor out, tensor self);
 void atg_all_dim(tensor *, tensor self, int64_t dim, int keepdim);
+void atg_all_dims(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
+void atg_all_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
 void atg_all_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim);
 int atg_allclose(tensor self, tensor other, double rtol, double atol, int equal_nan);
 void atg_alpha_dropout(tensor *, tensor input, double p, int train);
@@ -535,6 +540,8 @@ void atg_angle_out(tensor *, tensor out, tensor self);
 void atg_any(tensor *, tensor self);
 void atg_any_all_out(tensor *, tensor out, tensor self);
 void atg_any_dim(tensor *, tensor self, int64_t dim, int keepdim);
+void atg_any_dims(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
+void atg_any_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
 void atg_any_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim);
 void atg_arange(tensor *, scalar end, int options_kind, int options_device);
 void atg_arange_start(tensor *, scalar start, scalar end, int options_kind, int options_device);
@@ -1079,6 +1086,7 @@ void atg_floor_divide_(tensor *, tensor self, tensor other);
 void atg_floor_divide_out(tensor *, tensor out, tensor self, tensor other);
 void atg_floor_divide_scalar(tensor *, tensor self, scalar other);
 void atg_floor_divide_scalar_(tensor *, tensor self, scalar other);
+void atg_floor_divide_scalar_out(tensor *, tensor out, tensor self, scalar other);
 void atg_floor_out(tensor *, tensor out, tensor self);
 void atg_fmax(tensor *, tensor self, tensor other);
 void atg_fmax_out(tensor *, tensor out, tensor self, tensor other);
@@ -1456,6 +1464,12 @@ void atg_linear(tensor *, tensor input, tensor weight, tensor bias);
 void atg_linear_out(tensor *, tensor out, tensor input, tensor weight, tensor bias);
 void atg_linspace(tensor *, scalar start, scalar end, int64_t steps, int options_kind, int options_device);
 void atg_linspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps);
+void atg_linspace_scalar_tensor(tensor *, scalar start, tensor end, int64_t steps, int options_kind, int options_device);
+void atg_linspace_scalar_tensor_out(tensor *, tensor out, scalar start, tensor end, int64_t steps);
+void atg_linspace_tensor_scalar(tensor *, tensor start, scalar end, int64_t steps, int options_kind, int options_device);
+void atg_linspace_tensor_scalar_out(tensor *, tensor out, tensor start, scalar end, int64_t steps);
+void atg_linspace_tensor_tensor(tensor *, tensor start, tensor end, int64_t steps, int options_kind, int options_device);
+void atg_linspace_tensor_tensor_out(tensor *, tensor out, tensor start, tensor end, int64_t steps);
 void atg_log(tensor *, tensor self);
 void atg_log10(tensor *, tensor self);
 void atg_log10_(tensor *, tensor self);
@@ -1503,6 +1517,12 @@ void atg_logit_backward_grad_input(tensor *, tensor grad_input, tensor grad_outp
 void atg_logit_out(tensor *, tensor out, tensor self, double eps_v, uint8_t eps_null);
 void atg_logspace(tensor *, scalar start, scalar end, int64_t steps, double base, int options_kind, int options_device);
 void atg_logspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps, double base);
+void atg_logspace_scalar_tensor(tensor *, scalar start, tensor end, int64_t steps, double base, int options_kind, int options_device);
+void atg_logspace_scalar_tensor_out(tensor *, tensor out, scalar start, tensor end, int64_t steps, double base);
+void atg_logspace_tensor_scalar(tensor *, tensor start, scalar end, int64_t steps, double base, int options_kind, int options_device);
+void atg_logspace_tensor_scalar_out(tensor *, tensor out, tensor start, scalar end, int64_t steps, double base);
+void atg_logspace_tensor_tensor(tensor *, tensor start, tensor end, int64_t steps, double base, int options_kind, int options_device);
+void atg_logspace_tensor_tensor_out(tensor *, tensor out, tensor start, tensor end, int64_t steps, double base);
 void atg_logsumexp(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
 void atg_logsumexp_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
 void atg_lstm(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
@@ -1528,6 +1548,7 @@ void atg_masked_fill_tensor_(tensor *, tensor self, tensor mask, tensor value);
 void atg_masked_fill_tensor_out(tensor *, tensor out, tensor self, tensor mask, tensor value);
 void atg_masked_scatter(tensor *, tensor self, tensor mask, tensor source);
 void atg_masked_scatter_(tensor *, tensor self, tensor mask, tensor source);
+void atg_masked_scatter_backward(tensor *, tensor grad_output, tensor mask, int64_t *sizes_data, int sizes_len);
 void atg_masked_scatter_out(tensor *, tensor out, tensor self, tensor mask, tensor source);
 void atg_masked_select(tensor *, tensor self, tensor mask);
 void atg_masked_select_backward(tensor *, tensor grad, tensor input, tensor mask);
diff --git a/torch-sys/src/c_generated.rs b/torch-sys/src/c_generated.rs
index 5b8e921e..883f61ad 100644
--- a/torch-sys/src/c_generated.rs
+++ b/torch-sys/src/c_generated.rs
@@ -377,6 +377,11 @@ extern "C" {
         out_int32_: c_int,
         transpose_: c_int,
     );
+    pub fn atg__convert_weight_to_int4pack(
+        out__: *mut *mut C_tensor,
+        self_: *mut C_tensor,
+        innerKTiles_: i64,
+    );
     pub fn atg__convolution(
         out__: *mut *mut C_tensor,
         input_: *mut C_tensor,
@@ -480,6 +485,8 @@ extern "C" {
         compressed_A_: *mut C_tensor,
         dense_B_: *mut C_tensor,
         bias_: *mut C_tensor,
+        alpha_: *mut C_tensor,
+        out_dtype_: c_int,
         transpose_result_: c_int,
     );
     pub fn atg__ctc_loss(
@@ -719,8 +726,8 @@ extern "C" {
         out_: *mut C_tensor,
         cu_seqlens_q_: *mut C_tensor,
         cu_seqlens_k_: *mut C_tensor,
-        max_seqlen_k_: i64,
         max_seqlen_q_: i64,
+        max_seqlen_k_: i64,
         logsumexp_: *mut C_tensor,
         dropout_p_: f64,
         philox_seed_: *mut C_tensor,
@@ -1587,6 +1594,15 @@ extern "C" {
         mask_type_v: i64,
         mask_type_null: i8,
     );
+    pub fn atg__mixed_dtypes_linear(
+        out__: *mut *mut C_tensor,
+        input_: *mut C_tensor,
+        weight_: *mut C_tensor,
+        scale_: *mut C_tensor,
+        bias_: *mut C_tensor,
+        activation_ptr: *const u8,
+        activation_len: c_int,
+    );
     pub fn atg__mkldnn_reshape(
         out__: *mut *mut C_tensor,
         self_: *mut C_tensor,
@@ -2104,6 +2120,7 @@ extern "C" {
         scale_a_: *mut C_tensor,
         scale_b_: *mut C_tensor,
         scale_result_: *mut C_tensor,
+        use_fast_accum_: c_int,
     );
     pub fn atg__scaled_mm_out(
         out__: *mut *mut C_tensor,
@@ -2116,6 +2133,7 @@ extern "C" {
         scale_a_: *mut C_tensor,
         scale_b_: *mut C_tensor,
         scale_result_: *mut C_tensor,
+        use_fast_accum_: c_int,
     );
     pub fn atg__scatter_reduce(
         out__: *mut *mut C_tensor,
@@ -3384,6 +3402,13 @@ extern "C" {
         self_: *mut C_tensor,
     );
     pub fn atg__version(self_: *mut C_tensor) -> i64;
+    pub fn atg__weight_int4pack_mm(
+        out__: *mut *mut C_tensor,
+        self_: *mut C_tensor,
+        mat2_: *mut C_tensor,
+        qGroupSize_: i64,
+        qScaleAndZeros_: *mut C_tensor,
+    );
     pub fn atg__weight_norm(
         out__: *mut *mut C_tensor,
         v_: *mut C_tensor,
@@ -3704,6 +3729,21 @@ extern "C" {
     pub fn atg_all(out__: *mut *mut C_tensor, self_: *mut C_tensor);
     pub fn atg_all_all_out(out__: *mut *mut C_tensor, out_: *mut C_tensor, self_: *mut C_tensor);
     pub fn atg_all_dim(out__: *mut *mut C_tensor, self_: *mut C_tensor, dim_: i64, keepdim_: c_int);
+    pub fn atg_all_dims(
+        out__: *mut *mut C_tensor,
+        self_: *mut C_tensor,
+        dim_data: *const i64,
+        dim_len: c_int,
+        keepdim_: c_int,
+    );
+    pub fn atg_all_dims_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        self_: *mut C_tensor,
+        dim_data: *const i64,
+        dim_len: c_int,
+        keepdim_: c_int,
+    );
     pub fn atg_all_out(
         out__: *mut *mut C_tensor,
         out_: *mut C_tensor,
@@ -3781,6 +3821,21 @@ extern "C" {
     pub fn atg_any(out__: *mut *mut C_tensor, self_: *mut C_tensor);
     pub fn atg_any_all_out(out__: *mut *mut C_tensor, out_: *mut C_tensor, self_: *mut C_tensor);
     pub fn atg_any_dim(out__: *mut *mut C_tensor, self_: *mut C_tensor, dim_: i64, keepdim_: c_int);
+    pub fn atg_any_dims(
+        out__: *mut *mut C_tensor,
+        self_: *mut C_tensor,
+        dim_data: *const i64,
+        dim_len: c_int,
+        keepdim_: c_int,
+    );
+    pub fn atg_any_dims_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        self_: *mut C_tensor,
+        dim_data: *const i64,
+        dim_len: c_int,
+        keepdim_: c_int,
+    );
     pub fn atg_any_out(
         out__: *mut *mut C_tensor,
         out_: *mut C_tensor,
@@ -6893,6 +6948,12 @@ extern "C" {
         self_: *mut C_tensor,
         other_: *mut C_scalar,
     );
+    pub fn atg_floor_divide_scalar_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        self_: *mut C_tensor,
+        other_: *mut C_scalar,
+    );
     pub fn atg_floor_out(out__: *mut *mut C_tensor, out_: *mut C_tensor, self_: *mut C_tensor);
     pub fn atg_fmax(out__: *mut *mut C_tensor, self_: *mut C_tensor, other_: *mut C_tensor);
     pub fn atg_fmax_out(
@@ -8840,6 +8901,51 @@ extern "C" {
         end_: *mut C_scalar,
         steps_: i64,
     );
+    pub fn atg_linspace_scalar_tensor(
+        out__: *mut *mut C_tensor,
+        start_: *mut C_scalar,
+        end_: *mut C_tensor,
+        steps_: i64,
+        options_kind: c_int,
+        options_device: c_int,
+    );
+    pub fn atg_linspace_scalar_tensor_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        start_: *mut C_scalar,
+        end_: *mut C_tensor,
+        steps_: i64,
+    );
+    pub fn atg_linspace_tensor_scalar(
+        out__: *mut *mut C_tensor,
+        start_: *mut C_tensor,
+        end_: *mut C_scalar,
+        steps_: i64,
+        options_kind: c_int,
+        options_device: c_int,
+    );
+    pub fn atg_linspace_tensor_scalar_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        start_: *mut C_tensor,
+        end_: *mut C_scalar,
+        steps_: i64,
+    );
+    pub fn atg_linspace_tensor_tensor(
+        out__: *mut *mut C_tensor,
+        start_: *mut C_tensor,
+        end_: *mut C_tensor,
+        steps_: i64,
+        options_kind: c_int,
+        options_device: c_int,
+    );
+    pub fn atg_linspace_tensor_tensor_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        start_: *mut C_tensor,
+        end_: *mut C_tensor,
+        steps_: i64,
+    );
     pub fn atg_log(out__: *mut *mut C_tensor, self_: *mut C_tensor);
     pub fn atg_log10(out__: *mut *mut C_tensor, self_: *mut C_tensor);
     pub fn atg_log10_(out__: *mut *mut C_tensor, self_: *mut C_tensor);
@@ -8987,6 +9093,57 @@ extern "C" {
         steps_: i64,
         base_: f64,
     );
+    pub fn atg_logspace_scalar_tensor(
+        out__: *mut *mut C_tensor,
+        start_: *mut C_scalar,
+        end_: *mut C_tensor,
+        steps_: i64,
+        base_: f64,
+        options_kind: c_int,
+        options_device: c_int,
+    );
+    pub fn atg_logspace_scalar_tensor_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        start_: *mut C_scalar,
+        end_: *mut C_tensor,
+        steps_: i64,
+        base_: f64,
+    );
+    pub fn atg_logspace_tensor_scalar(
+        out__: *mut *mut C_tensor,
+        start_: *mut C_tensor,
+        end_: *mut C_scalar,
+        steps_: i64,
+        base_: f64,
+        options_kind: c_int,
+        options_device: c_int,
+    );
+    pub fn atg_logspace_tensor_scalar_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        start_: *mut C_tensor,
+        end_: *mut C_scalar,
+        steps_: i64,
+        base_: f64,
+    );
+    pub fn atg_logspace_tensor_tensor(
+        out__: *mut *mut C_tensor,
+        start_: *mut C_tensor,
+        end_: *mut C_tensor,
+        steps_: i64,
+        base_: f64,
+        options_kind: c_int,
+        options_device: c_int,
+    );
+    pub fn atg_logspace_tensor_tensor_out(
+        out__: *mut *mut C_tensor,
+        out_: *mut C_tensor,
+        start_: *mut C_tensor,
+        end_: *mut C_tensor,
+        steps_: i64,
+        base_: f64,
+    );
     pub fn atg_logsumexp(
         out__: *mut *mut C_tensor,
         self_: *mut C_tensor,
@@ -9168,6 +9325,13 @@ extern "C" {
         mask_: *mut C_tensor,
         source_: *mut C_tensor,
     );
+    pub fn atg_masked_scatter_backward(
+        out__: *mut *mut C_tensor,
+        grad_output_: *mut C_tensor,
+        mask_: *mut C_tensor,
+        sizes_data: *const i64,
+        sizes_len: c_int,
+    );
     pub fn atg_masked_scatter_out(
         out__: *mut *mut C_tensor,
         out_: *mut C_tensor,