diff --git a/CHANGELOG.md b/CHANGELOG.md index f5692e60..92300b29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Changed +## v0.15.0 +### Changed +- PyTorch v2.2 support + ## v0.14.0 ### Changed - PyTorch v2.1 support diff --git a/Cargo.toml b/Cargo.toml index d0d46c79..49286e9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tch" -version = "0.14.0" +version = "0.15.0" authors = ["Laurent Mazare "] edition = "2021" build = "build.rs" @@ -22,7 +22,7 @@ libc = "0.2.0" ndarray = "0.15" rand = "0.8" thiserror = "1" -torch-sys = { version = "0.14.0", path = "torch-sys" } +torch-sys = { version = "0.15.0", path = "torch-sys" } zip = "0.6" half = "2" safetensors = "0.3.0" diff --git a/README.md b/README.md index 0582b258..6761797a 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ The code generation part for the C api on top of libtorch comes from ## Getting Started -This crate requires the C++ PyTorch library (libtorch) in version *v2.1.0* to be available on +This crate requires the C++ PyTorch library (libtorch) in version *v2.2.0* to be available on your system. You can either: - Use the system-wide libtorch installation (default). @@ -85,7 +85,7 @@ seem to include `libtorch.a` by default so this would have to be compiled manually, e.g. via the following: ```bash -git clone -b v2.1.0 --recurse-submodule https://github.com/pytorch/pytorch.git pytorch-static --depth 1 +git clone -b v2.2.0 --recurse-submodule https://github.com/pytorch/pytorch.git pytorch-static --depth 1 cd pytorch-static USE_CUDA=OFF BUILD_SHARED_LIBS=OFF python setup.py build # export LIBTORCH to point at the build directory in pytorch-static. diff --git a/examples/python-extension/Cargo.toml b/examples/python-extension/Cargo.toml index ca978580..8bb57a24 100644 --- a/examples/python-extension/Cargo.toml +++ b/examples/python-extension/Cargo.toml @@ -18,6 +18,6 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.18.3", features = ["extension-module"] } -pyo3-tch = { path = "../../pyo3-tch", version = "0.14.0" } -tch = { path = "../..", features = ["python-extension"], version = "0.14.0" } -torch-sys = { path = "../../torch-sys", features = ["python-extension"], version = "0.14.0" } +pyo3-tch = { path = "../../pyo3-tch", version = "0.15.0" } +tch = { path = "../..", features = ["python-extension"], version = "0.15.0" } +torch-sys = { path = "../../torch-sys", features = ["python-extension"], version = "0.15.0" } diff --git a/gen/gen.ml b/gen/gen.ml index 4b119806..009d82b5 100644 --- a/gen/gen.ml +++ b/gen/gen.ml @@ -878,7 +878,7 @@ let run let () = run - ~yaml_filename:"third_party/pytorch/Declarations-v2.1.0.yaml" + ~yaml_filename:"third_party/pytorch/Declarations-v2.2.0.yaml" ~cpp_filename:"torch-sys/libtch/torch_api_generated" ~ffi_filename:"torch-sys/src/c_generated.rs" ~wrapper_filename:"src/wrappers/tensor_generated.rs" diff --git a/pyo3-tch/Cargo.toml b/pyo3-tch/Cargo.toml index 86ca8aa9..61b06a3e 100644 --- a/pyo3-tch/Cargo.toml +++ b/pyo3-tch/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyo3-tch" -version = "0.14.0" +version = "0.15.0" authors = ["Laurent Mazare "] edition = "2021" build = "build.rs" @@ -12,6 +12,6 @@ categories = ["science"] license = "MIT/Apache-2.0" [dependencies] -tch = { path = "..", features = ["python-extension"], version = "0.14.0" } -torch-sys = { path = "../torch-sys", features = ["python-extension"], version = "0.14.0" } +tch = { path = "..", features = ["python-extension"], version = "0.15.0" } +torch-sys = { path = "../torch-sys", features = ["python-extension"], version = "0.15.0" } pyo3 = { version = "0.18.3", features = ["extension-module"] } diff --git a/src/wrappers/tensor_fallible_generated.rs b/src/wrappers/tensor_fallible_generated.rs index 10260711..4939ea59 100644 --- a/src/wrappers/tensor_fallible_generated.rs +++ b/src/wrappers/tensor_fallible_generated.rs @@ -1054,6 +1054,19 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_convert_weight_to_int4pack( + &self, + innerktiles: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__convert_weight_to_int4pack( + c_tensors.as_mut_ptr(), + self.c_tensor, + innerktiles + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_convolution>( &self, weight: &Tensor, @@ -1264,6 +1277,8 @@ impl Tensor { compressed_a: &Tensor, dense_b: &Tensor, bias: Option, + alpha: Option, + out_dtype: impl Into>, transpose_result: bool, ) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; @@ -1272,6 +1287,8 @@ impl Tensor { compressed_a.c_tensor, dense_b.c_tensor, bias.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + alpha.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + out_dtype.into().map_or(-1, |s| s.c_int()), if transpose_result { 1 } else { 0 } )); Ok(Tensor { c_tensor: c_tensors[0] }) @@ -1803,8 +1820,8 @@ impl Tensor { out: &Tensor, cu_seqlens_q: Option, cu_seqlens_k: Option, - max_seqlen_k: i64, max_seqlen_q: i64, + max_seqlen_k: i64, logsumexp: &Tensor, dropout_p: f64, philox_seed: &Tensor, @@ -1827,8 +1844,8 @@ impl Tensor { out.c_tensor, cu_seqlens_q.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), cu_seqlens_k.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), - max_seqlen_k, max_seqlen_q, + max_seqlen_k, logsumexp.c_tensor, dropout_p, philox_seed.c_tensor, @@ -3939,6 +3956,26 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_mixed_dtypes_linear>( + &self, + weight: &Tensor, + scale: &Tensor, + bias: Option, + activation: &str, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__mixed_dtypes_linear( + c_tensors.as_mut_ptr(), + self.c_tensor, + weight.c_tensor, + scale.c_tensor, + bias.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + activation.as_ptr(), + activation.len() as i32 + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_mkldnn_reshape(&self, shape: impl IntList) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg__mkldnn_reshape( @@ -5140,6 +5177,7 @@ impl Tensor { scale_a: Option, scale_b: Option, scale_result: Option, + use_fast_accum: bool, ) -> Result<(Tensor, Tensor), TchError> { let mut c_tensors = [std::ptr::null_mut(); 2]; unsafe_torch_err!(atg__scaled_mm( @@ -5150,7 +5188,8 @@ impl Tensor { out_dtype.into().map_or(-1, |s| s.c_int()), scale_a.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), scale_b.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), - scale_result.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor) + scale_result.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + if use_fast_accum { 1 } else { 0 } )); Ok((Tensor { c_tensor: c_tensors[0] }, Tensor { c_tensor: c_tensors[1] })) } @@ -5165,6 +5204,7 @@ impl Tensor { scale_a: Option, scale_b: Option, scale_result: Option, + use_fast_accum: bool, ) -> Result<(Tensor, Tensor), TchError> { let mut c_tensors = [std::ptr::null_mut(); 2]; unsafe_torch_err!(atg__scaled_mm_out( @@ -5177,7 +5217,8 @@ impl Tensor { out_dtype.into().map_or(-1, |s| s.c_int()), scale_a.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), scale_b.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), - scale_result.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor) + scale_result.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + if use_fast_accum { 1 } else { 0 } )); Ok((Tensor { c_tensor: c_tensors[0] }, Tensor { c_tensor: c_tensors[1] })) } @@ -8102,6 +8143,23 @@ impl Tensor { Ok(return_) } + pub fn f_internal_weight_int4pack_mm( + &self, + mat2: &Tensor, + qgroupsize: i64, + qscaleandzeros: &Tensor, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__weight_int4pack_mm( + c_tensors.as_mut_ptr(), + self.c_tensor, + mat2.c_tensor, + qgroupsize, + qscaleandzeros.c_tensor + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_weight_norm(v: &Tensor, g: &Tensor, dim: i64) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg__weight_norm(c_tensors.as_mut_ptr(), v.c_tensor, g.c_tensor, dim)); @@ -8909,6 +8967,36 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_all_dims(&self, dim: impl IntListOption, keepdim: bool) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_all_dims( + c_tensors.as_mut_ptr(), + self.c_tensor, + dim.as_ptr(), + dim.len_i32(), + if keepdim { 1 } else { 0 } + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_all_dims_out( + &self, + out: &Tensor, + dim: impl IntListOption, + keepdim: bool, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_all_dims_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + self.c_tensor, + dim.as_ptr(), + dim.len_i32(), + if keepdim { 1 } else { 0 } + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_all_out(&self, out: &Tensor, dim: i64, keepdim: bool) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg_all_out( @@ -9096,6 +9184,36 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_any_dims(&self, dim: impl IntListOption, keepdim: bool) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_any_dims( + c_tensors.as_mut_ptr(), + self.c_tensor, + dim.as_ptr(), + dim.len_i32(), + if keepdim { 1 } else { 0 } + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_any_dims_out( + &self, + out: &Tensor, + dim: impl IntListOption, + keepdim: bool, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_any_dims_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + self.c_tensor, + dim.as_ptr(), + dim.len_i32(), + if keepdim { 1 } else { 0 } + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_any_out(&self, out: &Tensor, dim: i64, keepdim: bool) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg_any_out( @@ -16670,6 +16788,21 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_floor_divide_scalar_out>( + &self, + out: &Tensor, + other: S, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_floor_divide_scalar_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + self.c_tensor, + other.into().c_scalar + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_floor_out(&self, out: &Tensor) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg_floor_out(c_tensors.as_mut_ptr(), out.c_tensor, self.c_tensor)); @@ -21547,6 +21680,111 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_linspace_scalar_tensor>( + start: S, + end: &Tensor, + steps: i64, + options: (Kind, Device), + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_linspace_scalar_tensor( + c_tensors.as_mut_ptr(), + start.into().c_scalar, + end.c_tensor, + steps, + options.0.c_int(), + options.1.c_int() + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_linspace_scalar_tensor_out>( + out: &Tensor, + start: S, + end: &Tensor, + steps: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_linspace_scalar_tensor_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + start.into().c_scalar, + end.c_tensor, + steps + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_linspace_tensor_scalar>( + start: &Tensor, + end: S, + steps: i64, + options: (Kind, Device), + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_linspace_tensor_scalar( + c_tensors.as_mut_ptr(), + start.c_tensor, + end.into().c_scalar, + steps, + options.0.c_int(), + options.1.c_int() + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_linspace_tensor_scalar_out>( + out: &Tensor, + start: &Tensor, + end: S, + steps: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_linspace_tensor_scalar_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + start.c_tensor, + end.into().c_scalar, + steps + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_linspace_tensor_tensor( + start: &Tensor, + end: &Tensor, + steps: i64, + options: (Kind, Device), + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_linspace_tensor_tensor( + c_tensors.as_mut_ptr(), + start.c_tensor, + end.c_tensor, + steps, + options.0.c_int(), + options.1.c_int() + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_linspace_tensor_tensor_out( + out: &Tensor, + start: &Tensor, + end: &Tensor, + steps: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_linspace_tensor_tensor_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + start.c_tensor, + end.c_tensor, + steps + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_log(&self) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg_log(c_tensors.as_mut_ptr(), self.c_tensor)); @@ -21979,6 +22217,123 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_logspace_scalar_tensor>( + start: S, + end: &Tensor, + steps: i64, + base: f64, + options: (Kind, Device), + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_logspace_scalar_tensor( + c_tensors.as_mut_ptr(), + start.into().c_scalar, + end.c_tensor, + steps, + base, + options.0.c_int(), + options.1.c_int() + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_logspace_scalar_tensor_out>( + out: &Tensor, + start: S, + end: &Tensor, + steps: i64, + base: f64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_logspace_scalar_tensor_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + start.into().c_scalar, + end.c_tensor, + steps, + base + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_logspace_tensor_scalar>( + start: &Tensor, + end: S, + steps: i64, + base: f64, + options: (Kind, Device), + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_logspace_tensor_scalar( + c_tensors.as_mut_ptr(), + start.c_tensor, + end.into().c_scalar, + steps, + base, + options.0.c_int(), + options.1.c_int() + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_logspace_tensor_scalar_out>( + out: &Tensor, + start: &Tensor, + end: S, + steps: i64, + base: f64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_logspace_tensor_scalar_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + start.c_tensor, + end.into().c_scalar, + steps, + base + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_logspace_tensor_tensor( + start: &Tensor, + end: &Tensor, + steps: i64, + base: f64, + options: (Kind, Device), + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_logspace_tensor_tensor( + c_tensors.as_mut_ptr(), + start.c_tensor, + end.c_tensor, + steps, + base, + options.0.c_int(), + options.1.c_int() + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_logspace_tensor_tensor_out( + out: &Tensor, + start: &Tensor, + end: &Tensor, + steps: i64, + base: f64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_logspace_tensor_tensor_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + start.c_tensor, + end.c_tensor, + steps, + base + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_logsumexp(&self, dim: impl IntList, keepdim: bool) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg_logsumexp( @@ -22405,6 +22760,22 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_masked_scatter_backward( + grad_output: &Tensor, + mask: &Tensor, + sizes: impl IntList, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_masked_scatter_backward( + c_tensors.as_mut_ptr(), + grad_output.c_tensor, + mask.c_tensor, + sizes.as_ptr(), + sizes.len_i32() + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_masked_scatter_out( &self, out: &Tensor, diff --git a/src/wrappers/tensor_generated.rs b/src/wrappers/tensor_generated.rs index eb392928..a4471129 100644 --- a/src/wrappers/tensor_generated.rs +++ b/src/wrappers/tensor_generated.rs @@ -506,6 +506,10 @@ impl Tensor { .unwrap() } + pub fn internal_convert_weight_to_int4pack(&self, innerktiles: i64) -> Tensor { + self.f_internal_convert_weight_to_int4pack(innerktiles).unwrap() + } + pub fn internal_convolution>( &self, weight: &Tensor, @@ -638,9 +642,19 @@ impl Tensor { compressed_a: &Tensor, dense_b: &Tensor, bias: Option, + alpha: Option, + out_dtype: impl Into>, transpose_result: bool, ) -> Tensor { - Tensor::f_internal_cslt_sparse_mm(compressed_a, dense_b, bias, transpose_result).unwrap() + Tensor::f_internal_cslt_sparse_mm( + compressed_a, + dense_b, + bias, + alpha, + out_dtype, + transpose_result, + ) + .unwrap() } pub fn internal_ctc_loss( @@ -1065,8 +1079,8 @@ impl Tensor { out: &Tensor, cu_seqlens_q: Option, cu_seqlens_k: Option, - max_seqlen_k: i64, max_seqlen_q: i64, + max_seqlen_k: i64, logsumexp: &Tensor, dropout_p: f64, philox_seed: &Tensor, @@ -1085,8 +1099,8 @@ impl Tensor { out, cu_seqlens_q, cu_seqlens_k, - max_seqlen_k, max_seqlen_q, + max_seqlen_k, logsumexp, dropout_p, philox_seed, @@ -2367,6 +2381,16 @@ impl Tensor { self.f_internal_masked_softmax_out(out, mask, dim, mask_type).unwrap() } + pub fn internal_mixed_dtypes_linear>( + &self, + weight: &Tensor, + scale: &Tensor, + bias: Option, + activation: &str, + ) -> Tensor { + self.f_internal_mixed_dtypes_linear(weight, scale, bias, activation).unwrap() + } + pub fn internal_mkldnn_reshape(&self, shape: impl IntList) -> Tensor { self.f_internal_mkldnn_reshape(shape).unwrap() } @@ -3078,8 +3102,18 @@ impl Tensor { scale_a: Option, scale_b: Option, scale_result: Option, + use_fast_accum: bool, ) -> (Tensor, Tensor) { - self.f_internal_scaled_mm(mat2, bias, out_dtype, scale_a, scale_b, scale_result).unwrap() + self.f_internal_scaled_mm( + mat2, + bias, + out_dtype, + scale_a, + scale_b, + scale_result, + use_fast_accum, + ) + .unwrap() } pub fn internal_scaled_mm_out>( @@ -3092,6 +3126,7 @@ impl Tensor { scale_a: Option, scale_b: Option, scale_result: Option, + use_fast_accum: bool, ) -> (Tensor, Tensor) { self.f_internal_scaled_mm_out( out, @@ -3102,6 +3137,7 @@ impl Tensor { scale_a, scale_b, scale_result, + use_fast_accum, ) .unwrap() } @@ -4663,6 +4699,15 @@ impl Tensor { self.f_internal_version().unwrap() } + pub fn internal_weight_int4pack_mm( + &self, + mat2: &Tensor, + qgroupsize: i64, + qscaleandzeros: &Tensor, + ) -> Tensor { + self.f_internal_weight_int4pack_mm(mat2, qgroupsize, qscaleandzeros).unwrap() + } + pub fn internal_weight_norm(v: &Tensor, g: &Tensor, dim: i64) -> Tensor { Tensor::f_internal_weight_norm(v, g, dim).unwrap() } @@ -5025,6 +5070,14 @@ impl Tensor { self.f_all_dim(dim, keepdim).unwrap() } + pub fn all_dims(&self, dim: impl IntListOption, keepdim: bool) -> Tensor { + self.f_all_dims(dim, keepdim).unwrap() + } + + pub fn all_dims_out(&self, out: &Tensor, dim: impl IntListOption, keepdim: bool) -> Tensor { + self.f_all_dims_out(out, dim, keepdim).unwrap() + } + pub fn all_out(&self, out: &Tensor, dim: i64, keepdim: bool) -> Tensor { self.f_all_out(out, dim, keepdim).unwrap() } @@ -5091,6 +5144,14 @@ impl Tensor { self.f_any_dim(dim, keepdim).unwrap() } + pub fn any_dims(&self, dim: impl IntListOption, keepdim: bool) -> Tensor { + self.f_any_dims(dim, keepdim).unwrap() + } + + pub fn any_dims_out(&self, out: &Tensor, dim: impl IntListOption, keepdim: bool) -> Tensor { + self.f_any_dims_out(out, dim, keepdim).unwrap() + } + pub fn any_out(&self, out: &Tensor, dim: i64, keepdim: bool) -> Tensor { self.f_any_out(out, dim, keepdim).unwrap() } @@ -8716,6 +8777,10 @@ impl Tensor { self.f_floor_divide_scalar_(other).unwrap() } + pub fn floor_divide_scalar_out>(&self, out: &Tensor, other: S) -> Tensor { + self.f_floor_divide_scalar_out(out, other).unwrap() + } + pub fn floor_out(&self, out: &Tensor) -> Tensor { self.f_floor_out(out).unwrap() } @@ -10980,6 +11045,60 @@ impl Tensor { Tensor::f_linspace_out(out, start, end, steps).unwrap() } + pub fn linspace_scalar_tensor>( + start: S, + end: &Tensor, + steps: i64, + options: (Kind, Device), + ) -> Tensor { + Tensor::f_linspace_scalar_tensor(start, end, steps, options).unwrap() + } + + pub fn linspace_scalar_tensor_out>( + out: &Tensor, + start: S, + end: &Tensor, + steps: i64, + ) -> Tensor { + Tensor::f_linspace_scalar_tensor_out(out, start, end, steps).unwrap() + } + + pub fn linspace_tensor_scalar>( + start: &Tensor, + end: S, + steps: i64, + options: (Kind, Device), + ) -> Tensor { + Tensor::f_linspace_tensor_scalar(start, end, steps, options).unwrap() + } + + pub fn linspace_tensor_scalar_out>( + out: &Tensor, + start: &Tensor, + end: S, + steps: i64, + ) -> Tensor { + Tensor::f_linspace_tensor_scalar_out(out, start, end, steps).unwrap() + } + + pub fn linspace_tensor_tensor( + start: &Tensor, + end: &Tensor, + steps: i64, + options: (Kind, Device), + ) -> Tensor { + Tensor::f_linspace_tensor_tensor(start, end, steps, options).unwrap() + } + + pub fn linspace_tensor_tensor_out( + out: &Tensor, + start: &Tensor, + end: &Tensor, + steps: i64, + ) -> Tensor { + Tensor::f_linspace_tensor_tensor_out(out, start, end, steps).unwrap() + } + pub fn log(&self) -> Tensor { self.f_log().unwrap() } @@ -11195,6 +11314,66 @@ impl Tensor { Tensor::f_logspace_out(out, start, end, steps, base).unwrap() } + pub fn logspace_scalar_tensor>( + start: S, + end: &Tensor, + steps: i64, + base: f64, + options: (Kind, Device), + ) -> Tensor { + Tensor::f_logspace_scalar_tensor(start, end, steps, base, options).unwrap() + } + + pub fn logspace_scalar_tensor_out>( + out: &Tensor, + start: S, + end: &Tensor, + steps: i64, + base: f64, + ) -> Tensor { + Tensor::f_logspace_scalar_tensor_out(out, start, end, steps, base).unwrap() + } + + pub fn logspace_tensor_scalar>( + start: &Tensor, + end: S, + steps: i64, + base: f64, + options: (Kind, Device), + ) -> Tensor { + Tensor::f_logspace_tensor_scalar(start, end, steps, base, options).unwrap() + } + + pub fn logspace_tensor_scalar_out>( + out: &Tensor, + start: &Tensor, + end: S, + steps: i64, + base: f64, + ) -> Tensor { + Tensor::f_logspace_tensor_scalar_out(out, start, end, steps, base).unwrap() + } + + pub fn logspace_tensor_tensor( + start: &Tensor, + end: &Tensor, + steps: i64, + base: f64, + options: (Kind, Device), + ) -> Tensor { + Tensor::f_logspace_tensor_tensor(start, end, steps, base, options).unwrap() + } + + pub fn logspace_tensor_tensor_out( + out: &Tensor, + start: &Tensor, + end: &Tensor, + steps: i64, + base: f64, + ) -> Tensor { + Tensor::f_logspace_tensor_tensor_out(out, start, end, steps, base).unwrap() + } + pub fn logsumexp(&self, dim: impl IntList, keepdim: bool) -> Tensor { self.f_logsumexp(dim, keepdim).unwrap() } @@ -11396,6 +11575,14 @@ impl Tensor { self.f_masked_scatter_(mask, source).unwrap() } + pub fn masked_scatter_backward( + grad_output: &Tensor, + mask: &Tensor, + sizes: impl IntList, + ) -> Tensor { + Tensor::f_masked_scatter_backward(grad_output, mask, sizes).unwrap() + } + pub fn masked_scatter_out(&self, out: &Tensor, mask: &Tensor, source: &Tensor) -> Tensor { self.f_masked_scatter_out(out, mask, source).unwrap() } diff --git a/torch-sys/Cargo.toml b/torch-sys/Cargo.toml index cf19f445..5990458d 100644 --- a/torch-sys/Cargo.toml +++ b/torch-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "torch-sys" -version = "0.14.0" +version = "0.15.0" authors = ["Laurent Mazare "] edition = "2021" build = "build.rs" diff --git a/torch-sys/build.rs b/torch-sys/build.rs index 9ba49a32..c66087ca 100644 --- a/torch-sys/build.rs +++ b/torch-sys/build.rs @@ -10,7 +10,7 @@ use anyhow::{Context, Result}; use std::path::{Path, PathBuf}; use std::{env, fs, io}; -const TORCH_VERSION: &str = "2.1.0"; +const TORCH_VERSION: &str = "2.2.0"; const PYTHON_PRINT_PYTORCH_DETAILS: &str = r" import torch from torch.utils import cpp_extension @@ -158,7 +158,7 @@ fn version_check(version: &str) -> Result<()> { return Ok(()); } let version = version.trim(); - // Typical version number is 2.1.0+cpu or 2.1.0+cu117 + // Typical version number is 2.2.0+cpu or 2.2.0+cu121 let version = match version.split_once('+') { None => version, Some((version, _)) => version, @@ -312,11 +312,8 @@ impl SystemInfo { "https://download.pytorch.org/libtorch/{}/libtorch-cxx11-abi-shared-with-deps-{}{}.zip", device, TORCH_VERSION, match device.as_ref() { "cpu" => "%2Bcpu", - "cu102" => "%2Bcu102", - "cu113" => "%2Bcu113", - "cu116" => "%2Bcu116", - "cu117" => "%2Bcu117", "cu118" => "%2Bcu118", + "cu121" => "%2Bcu121", _ => anyhow::bail!("unsupported device {device}, TORCH_CUDA_VERSION may be set incorrectly?"), } ), @@ -331,18 +328,15 @@ impl SystemInfo { export DYLD_LIBRARY_PATH=${{LIBTORCH}}/lib ") } else { - format!("https://download.pytorch.org/libtorch/cpu/libtorch-macos-{TORCH_VERSION}.zip") + format!("https://download.pytorch.org/libtorch/cpu/libtorch-macos-x86_64-{TORCH_VERSION}.zip") } }, Os::Windows => format!( "https://download.pytorch.org/libtorch/{}/libtorch-win-shared-with-deps-{}{}.zip", device, TORCH_VERSION, match device.as_ref() { "cpu" => "%2Bcpu", - "cu102" => "%2Bcu102", - "cu113" => "%2Bcu113", - "cu116" => "%2Bcu116", - "cu117" => "%2Bcu117", "cu118" => "%2Bcu118", + "cu121" => "%2Bcu121", _ => "" }), }; diff --git a/torch-sys/libtch/torch_api_generated.cpp b/torch-sys/libtch/torch_api_generated.cpp index b293e9fe..bbf5d251 100644 --- a/torch-sys/libtch/torch_api_generated.cpp +++ b/torch-sys/libtch/torch_api_generated.cpp @@ -572,6 +572,13 @@ void atg__convert_indices_from_csr_to_coo_out(tensor *out__, tensor out, tensor ) } +void atg__convert_weight_to_int4pack(tensor *out__, tensor self, int64_t innerKTiles) { + PROTECT( + auto outputs__ = torch::_convert_weight_to_int4pack(*self, innerKTiles); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__convolution(tensor *out__, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32) { PROTECT( auto outputs__ = torch::_convolution(*input, *weight, (bias ? *bias : torch::Tensor()), torch::IntArrayRef(stride_data, stride_len), torch::IntArrayRef(padding_data, padding_len), torch::IntArrayRef(dilation_data, dilation_len), (bool)transposed, torch::IntArrayRef(output_padding_data, output_padding_len), groups, (bool)benchmark, (bool)deterministic, (bool)cudnn_enabled, (bool)allow_tf32); @@ -635,9 +642,9 @@ void atg__cslt_compress(tensor *out__, tensor input) { ) } -void atg__cslt_sparse_mm(tensor *out__, tensor compressed_A, tensor dense_B, tensor bias, int transpose_result) { +void atg__cslt_sparse_mm(tensor *out__, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result) { PROTECT( - auto outputs__ = torch::_cslt_sparse_mm(*compressed_A, *dense_B, (bias ? *bias : torch::Tensor()), (bool)transpose_result); + auto outputs__ = torch::_cslt_sparse_mm(*compressed_A, *dense_B, (bias ? *bias : torch::Tensor()), (alpha ? *alpha : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (bool)transpose_result); out__[0] = new torch::Tensor(outputs__); ) } @@ -811,9 +818,9 @@ void atg__dirichlet_grad_out(tensor *out__, tensor out, tensor x, tensor alpha, ) } -void atg__efficient_attention_backward(tensor *out__, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_k, int64_t max_seqlen_q, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null) { +void atg__efficient_attention_backward(tensor *out__, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_q, int64_t max_seqlen_k, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null) { PROTECT( - auto outputs__ = torch::_efficient_attention_backward(*grad_out_, *query, *key, *value, (bias ? *bias : torch::Tensor()), *out, (cu_seqlens_q ? *cu_seqlens_q : torch::Tensor()), (cu_seqlens_k ? *cu_seqlens_k : torch::Tensor()), max_seqlen_k, max_seqlen_q, *logsumexp, dropout_p, *philox_seed, *philox_offset, custom_mask_type, (bool)bias_requires_grad, scale_null ? c10::nullopt : c10::optional(scale_v), num_splits_key_null ? c10::nullopt : c10::optional(num_splits_key_v)); + auto outputs__ = torch::_efficient_attention_backward(*grad_out_, *query, *key, *value, (bias ? *bias : torch::Tensor()), *out, (cu_seqlens_q ? *cu_seqlens_q : torch::Tensor()), (cu_seqlens_k ? *cu_seqlens_k : torch::Tensor()), max_seqlen_q, max_seqlen_k, *logsumexp, dropout_p, *philox_seed, *philox_offset, custom_mask_type, (bool)bias_requires_grad, scale_null ? c10::nullopt : c10::optional(scale_v), num_splits_key_null ? c10::nullopt : c10::optional(num_splits_key_v)); out__[0] = new torch::Tensor(std::get<0>(outputs__)); out__[1] = new torch::Tensor(std::get<1>(outputs__)); out__[2] = new torch::Tensor(std::get<2>(outputs__)); @@ -1624,6 +1631,13 @@ void atg__masked_softmax_out(tensor *out__, tensor out, tensor self, tensor mask ) } +void atg__mixed_dtypes_linear(tensor *out__, tensor input, tensor weight, tensor scale, tensor bias, char* activation_ptr, int activation_len) { + PROTECT( + auto outputs__ = torch::_mixed_dtypes_linear(*input, *weight, *scale, (bias ? *bias : torch::Tensor()), std::string(activation_ptr, activation_len)); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__mkldnn_reshape(tensor *out__, tensor self, int64_t *shape_data, int shape_len) { PROTECT( auto outputs__ = torch::_mkldnn_reshape(*self, torch::IntArrayRef(shape_data, shape_len)); @@ -2107,17 +2121,17 @@ void atg__scaled_dot_product_flash_attention_backward(tensor *out__, tensor grad ) } -void atg__scaled_mm(tensor *out__, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result) { +void atg__scaled_mm(tensor *out__, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum) { PROTECT( - auto outputs__ = torch::_scaled_mm(*self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor())); + auto outputs__ = torch::_scaled_mm(*self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor()), (bool)use_fast_accum); out__[0] = new torch::Tensor(std::get<0>(outputs__)); out__[1] = new torch::Tensor(std::get<1>(outputs__)); ) } -void atg__scaled_mm_out(tensor *out__, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result) { +void atg__scaled_mm_out(tensor *out__, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum) { PROTECT( - auto outputs__ = torch::_scaled_mm_out(*out, *out_amax, *self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor())); + auto outputs__ = torch::_scaled_mm_out(*out, *out_amax, *self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor()), (bool)use_fast_accum); out__[0] = new torch::Tensor(std::get<0>(outputs__)); out__[1] = new torch::Tensor(std::get<1>(outputs__)); ) @@ -3280,6 +3294,13 @@ int64_t atg__version(tensor self) { return 0; } +void atg__weight_int4pack_mm(tensor *out__, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros) { + PROTECT( + auto outputs__ = torch::_weight_int4pack_mm(*self, *mat2, qGroupSize, *qScaleAndZeros); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__weight_norm(tensor *out__, tensor v, tensor g, int64_t dim) { PROTECT( auto outputs__ = torch::_weight_norm(*v, *g, dim); @@ -3779,6 +3800,20 @@ void atg_all_dim(tensor *out__, tensor self, int64_t dim, int keepdim) { ) } +void atg_all_dims(tensor *out__, tensor self, int64_t *dim_data, int dim_len, int keepdim) { + PROTECT( + auto outputs__ = torch::all(*self, dim_data == nullptr ? c10::nullopt : c10::optional(torch::IntArrayRef(dim_data, dim_len)), (bool)keepdim); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_all_dims_out(tensor *out__, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim) { + PROTECT( + auto outputs__ = torch::all_out(*out, *self, dim_data == nullptr ? c10::nullopt : c10::optional(torch::IntArrayRef(dim_data, dim_len)), (bool)keepdim); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg_all_out(tensor *out__, tensor out, tensor self, int64_t dim, int keepdim) { PROTECT( auto outputs__ = torch::all_out(*out, *self, dim, (bool)keepdim); @@ -3886,6 +3921,20 @@ void atg_any_dim(tensor *out__, tensor self, int64_t dim, int keepdim) { ) } +void atg_any_dims(tensor *out__, tensor self, int64_t *dim_data, int dim_len, int keepdim) { + PROTECT( + auto outputs__ = torch::any(*self, dim_data == nullptr ? c10::nullopt : c10::optional(torch::IntArrayRef(dim_data, dim_len)), (bool)keepdim); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_any_dims_out(tensor *out__, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim) { + PROTECT( + auto outputs__ = torch::any_out(*out, *self, dim_data == nullptr ? c10::nullopt : c10::optional(torch::IntArrayRef(dim_data, dim_len)), (bool)keepdim); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg_any_out(tensor *out__, tensor out, tensor self, int64_t dim, int keepdim) { PROTECT( auto outputs__ = torch::any_out(*out, *self, dim, (bool)keepdim); @@ -7784,6 +7833,13 @@ void atg_floor_divide_scalar_(tensor *out__, tensor self, scalar other) { ) } +void atg_floor_divide_scalar_out(tensor *out__, tensor out, tensor self, scalar other) { + PROTECT( + auto outputs__ = torch::floor_divide_out(*out, *self, *other); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg_floor_out(tensor *out__, tensor out, tensor self) { PROTECT( auto outputs__ = torch::floor_out(*out, *self); @@ -10491,6 +10547,48 @@ void atg_linspace_out(tensor *out__, tensor out, scalar start, scalar end, int64 ) } +void atg_linspace_scalar_tensor(tensor *out__, scalar start, tensor end, int64_t steps, int options_kind, int options_device) { + PROTECT( + auto outputs__ = torch::linspace(*start, *end, steps, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind))); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_linspace_scalar_tensor_out(tensor *out__, tensor out, scalar start, tensor end, int64_t steps) { + PROTECT( + auto outputs__ = torch::linspace_out(*out, *start, *end, steps); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_linspace_tensor_scalar(tensor *out__, tensor start, scalar end, int64_t steps, int options_kind, int options_device) { + PROTECT( + auto outputs__ = torch::linspace(*start, *end, steps, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind))); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_linspace_tensor_scalar_out(tensor *out__, tensor out, tensor start, scalar end, int64_t steps) { + PROTECT( + auto outputs__ = torch::linspace_out(*out, *start, *end, steps); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_linspace_tensor_tensor(tensor *out__, tensor start, tensor end, int64_t steps, int options_kind, int options_device) { + PROTECT( + auto outputs__ = torch::linspace(*start, *end, steps, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind))); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_linspace_tensor_tensor_out(tensor *out__, tensor out, tensor start, tensor end, int64_t steps) { + PROTECT( + auto outputs__ = torch::linspace_out(*out, *start, *end, steps); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg_log(tensor *out__, tensor self) { PROTECT( auto outputs__ = torch::log(*self); @@ -10820,6 +10918,48 @@ void atg_logspace_out(tensor *out__, tensor out, scalar start, scalar end, int64 ) } +void atg_logspace_scalar_tensor(tensor *out__, scalar start, tensor end, int64_t steps, double base, int options_kind, int options_device) { + PROTECT( + auto outputs__ = torch::logspace(*start, *end, steps, base, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind))); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_logspace_scalar_tensor_out(tensor *out__, tensor out, scalar start, tensor end, int64_t steps, double base) { + PROTECT( + auto outputs__ = torch::logspace_out(*out, *start, *end, steps, base); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_logspace_tensor_scalar(tensor *out__, tensor start, scalar end, int64_t steps, double base, int options_kind, int options_device) { + PROTECT( + auto outputs__ = torch::logspace(*start, *end, steps, base, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind))); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_logspace_tensor_scalar_out(tensor *out__, tensor out, tensor start, scalar end, int64_t steps, double base) { + PROTECT( + auto outputs__ = torch::logspace_out(*out, *start, *end, steps, base); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_logspace_tensor_tensor(tensor *out__, tensor start, tensor end, int64_t steps, double base, int options_kind, int options_device) { + PROTECT( + auto outputs__ = torch::logspace(*start, *end, steps, base, at::device(device_of_int(options_device)).dtype(at::ScalarType(options_kind))); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg_logspace_tensor_tensor_out(tensor *out__, tensor out, tensor start, tensor end, int64_t steps, double base) { + PROTECT( + auto outputs__ = torch::logspace_out(*out, *start, *end, steps, base); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg_logsumexp(tensor *out__, tensor self, int64_t *dim_data, int dim_len, int keepdim) { PROTECT( auto outputs__ = torch::logsumexp(*self, torch::IntArrayRef(dim_data, dim_len), (bool)keepdim); @@ -11003,6 +11143,13 @@ void atg_masked_scatter_(tensor *out__, tensor self, tensor mask, tensor source) ) } +void atg_masked_scatter_backward(tensor *out__, tensor grad_output, tensor mask, int64_t *sizes_data, int sizes_len) { + PROTECT( + auto outputs__ = torch::masked_scatter_backward(*grad_output, *mask, torch::IntArrayRef(sizes_data, sizes_len)); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg_masked_scatter_out(tensor *out__, tensor out, tensor self, tensor mask, tensor source) { PROTECT( auto outputs__ = torch::masked_scatter_out(*out, *self, *mask, *source); diff --git a/torch-sys/libtch/torch_api_generated.h b/torch-sys/libtch/torch_api_generated.h index e8a1caaa..97beb08c 100644 --- a/torch-sys/libtch/torch_api_generated.h +++ b/torch-sys/libtch/torch_api_generated.h @@ -83,6 +83,7 @@ void atg__convert_indices_from_coo_to_csr(tensor *, tensor self, int64_t size, i void atg__convert_indices_from_coo_to_csr_out(tensor *, tensor out, tensor self, int64_t size, int out_int32); void atg__convert_indices_from_csr_to_coo(tensor *, tensor crow_indices, tensor col_indices, int out_int32, int transpose); void atg__convert_indices_from_csr_to_coo_out(tensor *, tensor out, tensor crow_indices, tensor col_indices, int out_int32, int transpose); +void atg__convert_weight_to_int4pack(tensor *, tensor self, int64_t innerKTiles); void atg__convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32); void atg__convolution_deprecated(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled); void atg__convolution_mode(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups); @@ -92,7 +93,7 @@ void atg__copy_from_and_resize(tensor *, tensor self, tensor dst); void atg__copy_from_and_resize_out(tensor *, tensor out, tensor self, tensor dst); void atg__copy_from_out(tensor *, tensor out, tensor self, tensor dst, int non_blocking); void atg__cslt_compress(tensor *, tensor input); -void atg__cslt_sparse_mm(tensor *, tensor compressed_A, tensor dense_B, tensor bias, int transpose_result); +void atg__cslt_sparse_mm(tensor *, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result); void atg__ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity); void atg__ctc_loss_backward(tensor *, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity); void atg__ctc_loss_backward_out(tensor *, tensor out, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity); @@ -115,7 +116,7 @@ int64_t atg__dimi(tensor self); int64_t atg__dimv(tensor self); void atg__dirichlet_grad(tensor *, tensor x, tensor alpha, tensor total); void atg__dirichlet_grad_out(tensor *, tensor out, tensor x, tensor alpha, tensor total); -void atg__efficient_attention_backward(tensor *, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_k, int64_t max_seqlen_q, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null); +void atg__efficient_attention_backward(tensor *, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_q, int64_t max_seqlen_k, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null); void atg__efficientzerotensor(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device); void atg__efficientzerotensor_out(tensor *, tensor out, int64_t *size_data, int size_len); void atg__embedding_bag(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx); @@ -221,6 +222,7 @@ void atg__masked_softmax(tensor *, tensor self, tensor mask, int64_t dim_v, uint void atg__masked_softmax_backward(tensor *, tensor grad_output, tensor output, tensor mask, int64_t dim_v, uint8_t dim_null); void atg__masked_softmax_backward_out(tensor *, tensor out, tensor grad_output, tensor output, tensor mask, int64_t dim_v, uint8_t dim_null); void atg__masked_softmax_out(tensor *, tensor out, tensor self, tensor mask, int64_t dim_v, uint8_t dim_null, int64_t mask_type_v, uint8_t mask_type_null); +void atg__mixed_dtypes_linear(tensor *, tensor input, tensor weight, tensor scale, tensor bias, char* activation_ptr, int activation_len); void atg__mkldnn_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len); void atg__mkldnn_reshape_out(tensor *, tensor out, tensor self, int64_t *shape_data, int shape_len); void atg__mkldnn_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1); @@ -286,8 +288,8 @@ void atg__saturate_weight_to_fp16(tensor *, tensor weight); void atg__scaled_dot_product_attention_math(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, tensor dropout_mask, double scale_v, uint8_t scale_null); void atg__scaled_dot_product_efficient_attention(tensor *, tensor query, tensor key, tensor value, tensor attn_bias, int compute_log_sumexp, double dropout_p, int is_causal, double scale_v, uint8_t scale_null); void atg__scaled_dot_product_flash_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, tensor philox_seed, tensor philox_offset, double scale_v, uint8_t scale_null); -void atg__scaled_mm(tensor *, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result); -void atg__scaled_mm_out(tensor *, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result); +void atg__scaled_mm(tensor *, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum); +void atg__scaled_mm_out(tensor *, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum); void atg__scatter_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self); void atg__scatter_reduce_(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self); void atg__scatter_reduce_two_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self); @@ -451,6 +453,7 @@ void atg__values(tensor *, tensor self); void atg__values_copy(tensor *, tensor self); void atg__values_copy_out(tensor *, tensor out, tensor self); int64_t atg__version(tensor self); +void atg__weight_int4pack_mm(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros); void atg__weight_norm(tensor *, tensor v, tensor g, int64_t dim); void atg__weight_norm_differentiable_backward(tensor *, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim); void atg__weight_norm_interface(tensor *, tensor v, tensor g, int64_t dim); @@ -520,6 +523,8 @@ tensor *atg_align_tensors(tensor *tensors_data, int tensors_len); void atg_all(tensor *, tensor self); void atg_all_all_out(tensor *, tensor out, tensor self); void atg_all_dim(tensor *, tensor self, int64_t dim, int keepdim); +void atg_all_dims(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim); +void atg_all_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim); void atg_all_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim); int atg_allclose(tensor self, tensor other, double rtol, double atol, int equal_nan); void atg_alpha_dropout(tensor *, tensor input, double p, int train); @@ -535,6 +540,8 @@ void atg_angle_out(tensor *, tensor out, tensor self); void atg_any(tensor *, tensor self); void atg_any_all_out(tensor *, tensor out, tensor self); void atg_any_dim(tensor *, tensor self, int64_t dim, int keepdim); +void atg_any_dims(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim); +void atg_any_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim); void atg_any_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim); void atg_arange(tensor *, scalar end, int options_kind, int options_device); void atg_arange_start(tensor *, scalar start, scalar end, int options_kind, int options_device); @@ -1079,6 +1086,7 @@ void atg_floor_divide_(tensor *, tensor self, tensor other); void atg_floor_divide_out(tensor *, tensor out, tensor self, tensor other); void atg_floor_divide_scalar(tensor *, tensor self, scalar other); void atg_floor_divide_scalar_(tensor *, tensor self, scalar other); +void atg_floor_divide_scalar_out(tensor *, tensor out, tensor self, scalar other); void atg_floor_out(tensor *, tensor out, tensor self); void atg_fmax(tensor *, tensor self, tensor other); void atg_fmax_out(tensor *, tensor out, tensor self, tensor other); @@ -1456,6 +1464,12 @@ void atg_linear(tensor *, tensor input, tensor weight, tensor bias); void atg_linear_out(tensor *, tensor out, tensor input, tensor weight, tensor bias); void atg_linspace(tensor *, scalar start, scalar end, int64_t steps, int options_kind, int options_device); void atg_linspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps); +void atg_linspace_scalar_tensor(tensor *, scalar start, tensor end, int64_t steps, int options_kind, int options_device); +void atg_linspace_scalar_tensor_out(tensor *, tensor out, scalar start, tensor end, int64_t steps); +void atg_linspace_tensor_scalar(tensor *, tensor start, scalar end, int64_t steps, int options_kind, int options_device); +void atg_linspace_tensor_scalar_out(tensor *, tensor out, tensor start, scalar end, int64_t steps); +void atg_linspace_tensor_tensor(tensor *, tensor start, tensor end, int64_t steps, int options_kind, int options_device); +void atg_linspace_tensor_tensor_out(tensor *, tensor out, tensor start, tensor end, int64_t steps); void atg_log(tensor *, tensor self); void atg_log10(tensor *, tensor self); void atg_log10_(tensor *, tensor self); @@ -1503,6 +1517,12 @@ void atg_logit_backward_grad_input(tensor *, tensor grad_input, tensor grad_outp void atg_logit_out(tensor *, tensor out, tensor self, double eps_v, uint8_t eps_null); void atg_logspace(tensor *, scalar start, scalar end, int64_t steps, double base, int options_kind, int options_device); void atg_logspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps, double base); +void atg_logspace_scalar_tensor(tensor *, scalar start, tensor end, int64_t steps, double base, int options_kind, int options_device); +void atg_logspace_scalar_tensor_out(tensor *, tensor out, scalar start, tensor end, int64_t steps, double base); +void atg_logspace_tensor_scalar(tensor *, tensor start, scalar end, int64_t steps, double base, int options_kind, int options_device); +void atg_logspace_tensor_scalar_out(tensor *, tensor out, tensor start, scalar end, int64_t steps, double base); +void atg_logspace_tensor_tensor(tensor *, tensor start, tensor end, int64_t steps, double base, int options_kind, int options_device); +void atg_logspace_tensor_tensor_out(tensor *, tensor out, tensor start, tensor end, int64_t steps, double base); void atg_logsumexp(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim); void atg_logsumexp_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim); void atg_lstm(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first); @@ -1528,6 +1548,7 @@ void atg_masked_fill_tensor_(tensor *, tensor self, tensor mask, tensor value); void atg_masked_fill_tensor_out(tensor *, tensor out, tensor self, tensor mask, tensor value); void atg_masked_scatter(tensor *, tensor self, tensor mask, tensor source); void atg_masked_scatter_(tensor *, tensor self, tensor mask, tensor source); +void atg_masked_scatter_backward(tensor *, tensor grad_output, tensor mask, int64_t *sizes_data, int sizes_len); void atg_masked_scatter_out(tensor *, tensor out, tensor self, tensor mask, tensor source); void atg_masked_select(tensor *, tensor self, tensor mask); void atg_masked_select_backward(tensor *, tensor grad, tensor input, tensor mask); diff --git a/torch-sys/src/c_generated.rs b/torch-sys/src/c_generated.rs index 5b8e921e..883f61ad 100644 --- a/torch-sys/src/c_generated.rs +++ b/torch-sys/src/c_generated.rs @@ -377,6 +377,11 @@ extern "C" { out_int32_: c_int, transpose_: c_int, ); + pub fn atg__convert_weight_to_int4pack( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + innerKTiles_: i64, + ); pub fn atg__convolution( out__: *mut *mut C_tensor, input_: *mut C_tensor, @@ -480,6 +485,8 @@ extern "C" { compressed_A_: *mut C_tensor, dense_B_: *mut C_tensor, bias_: *mut C_tensor, + alpha_: *mut C_tensor, + out_dtype_: c_int, transpose_result_: c_int, ); pub fn atg__ctc_loss( @@ -719,8 +726,8 @@ extern "C" { out_: *mut C_tensor, cu_seqlens_q_: *mut C_tensor, cu_seqlens_k_: *mut C_tensor, - max_seqlen_k_: i64, max_seqlen_q_: i64, + max_seqlen_k_: i64, logsumexp_: *mut C_tensor, dropout_p_: f64, philox_seed_: *mut C_tensor, @@ -1587,6 +1594,15 @@ extern "C" { mask_type_v: i64, mask_type_null: i8, ); + pub fn atg__mixed_dtypes_linear( + out__: *mut *mut C_tensor, + input_: *mut C_tensor, + weight_: *mut C_tensor, + scale_: *mut C_tensor, + bias_: *mut C_tensor, + activation_ptr: *const u8, + activation_len: c_int, + ); pub fn atg__mkldnn_reshape( out__: *mut *mut C_tensor, self_: *mut C_tensor, @@ -2104,6 +2120,7 @@ extern "C" { scale_a_: *mut C_tensor, scale_b_: *mut C_tensor, scale_result_: *mut C_tensor, + use_fast_accum_: c_int, ); pub fn atg__scaled_mm_out( out__: *mut *mut C_tensor, @@ -2116,6 +2133,7 @@ extern "C" { scale_a_: *mut C_tensor, scale_b_: *mut C_tensor, scale_result_: *mut C_tensor, + use_fast_accum_: c_int, ); pub fn atg__scatter_reduce( out__: *mut *mut C_tensor, @@ -3384,6 +3402,13 @@ extern "C" { self_: *mut C_tensor, ); pub fn atg__version(self_: *mut C_tensor) -> i64; + pub fn atg__weight_int4pack_mm( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + mat2_: *mut C_tensor, + qGroupSize_: i64, + qScaleAndZeros_: *mut C_tensor, + ); pub fn atg__weight_norm( out__: *mut *mut C_tensor, v_: *mut C_tensor, @@ -3704,6 +3729,21 @@ extern "C" { pub fn atg_all(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg_all_all_out(out__: *mut *mut C_tensor, out_: *mut C_tensor, self_: *mut C_tensor); pub fn atg_all_dim(out__: *mut *mut C_tensor, self_: *mut C_tensor, dim_: i64, keepdim_: c_int); + pub fn atg_all_dims( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + dim_data: *const i64, + dim_len: c_int, + keepdim_: c_int, + ); + pub fn atg_all_dims_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + self_: *mut C_tensor, + dim_data: *const i64, + dim_len: c_int, + keepdim_: c_int, + ); pub fn atg_all_out( out__: *mut *mut C_tensor, out_: *mut C_tensor, @@ -3781,6 +3821,21 @@ extern "C" { pub fn atg_any(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg_any_all_out(out__: *mut *mut C_tensor, out_: *mut C_tensor, self_: *mut C_tensor); pub fn atg_any_dim(out__: *mut *mut C_tensor, self_: *mut C_tensor, dim_: i64, keepdim_: c_int); + pub fn atg_any_dims( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + dim_data: *const i64, + dim_len: c_int, + keepdim_: c_int, + ); + pub fn atg_any_dims_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + self_: *mut C_tensor, + dim_data: *const i64, + dim_len: c_int, + keepdim_: c_int, + ); pub fn atg_any_out( out__: *mut *mut C_tensor, out_: *mut C_tensor, @@ -6893,6 +6948,12 @@ extern "C" { self_: *mut C_tensor, other_: *mut C_scalar, ); + pub fn atg_floor_divide_scalar_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + self_: *mut C_tensor, + other_: *mut C_scalar, + ); pub fn atg_floor_out(out__: *mut *mut C_tensor, out_: *mut C_tensor, self_: *mut C_tensor); pub fn atg_fmax(out__: *mut *mut C_tensor, self_: *mut C_tensor, other_: *mut C_tensor); pub fn atg_fmax_out( @@ -8840,6 +8901,51 @@ extern "C" { end_: *mut C_scalar, steps_: i64, ); + pub fn atg_linspace_scalar_tensor( + out__: *mut *mut C_tensor, + start_: *mut C_scalar, + end_: *mut C_tensor, + steps_: i64, + options_kind: c_int, + options_device: c_int, + ); + pub fn atg_linspace_scalar_tensor_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + start_: *mut C_scalar, + end_: *mut C_tensor, + steps_: i64, + ); + pub fn atg_linspace_tensor_scalar( + out__: *mut *mut C_tensor, + start_: *mut C_tensor, + end_: *mut C_scalar, + steps_: i64, + options_kind: c_int, + options_device: c_int, + ); + pub fn atg_linspace_tensor_scalar_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + start_: *mut C_tensor, + end_: *mut C_scalar, + steps_: i64, + ); + pub fn atg_linspace_tensor_tensor( + out__: *mut *mut C_tensor, + start_: *mut C_tensor, + end_: *mut C_tensor, + steps_: i64, + options_kind: c_int, + options_device: c_int, + ); + pub fn atg_linspace_tensor_tensor_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + start_: *mut C_tensor, + end_: *mut C_tensor, + steps_: i64, + ); pub fn atg_log(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg_log10(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg_log10_(out__: *mut *mut C_tensor, self_: *mut C_tensor); @@ -8987,6 +9093,57 @@ extern "C" { steps_: i64, base_: f64, ); + pub fn atg_logspace_scalar_tensor( + out__: *mut *mut C_tensor, + start_: *mut C_scalar, + end_: *mut C_tensor, + steps_: i64, + base_: f64, + options_kind: c_int, + options_device: c_int, + ); + pub fn atg_logspace_scalar_tensor_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + start_: *mut C_scalar, + end_: *mut C_tensor, + steps_: i64, + base_: f64, + ); + pub fn atg_logspace_tensor_scalar( + out__: *mut *mut C_tensor, + start_: *mut C_tensor, + end_: *mut C_scalar, + steps_: i64, + base_: f64, + options_kind: c_int, + options_device: c_int, + ); + pub fn atg_logspace_tensor_scalar_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + start_: *mut C_tensor, + end_: *mut C_scalar, + steps_: i64, + base_: f64, + ); + pub fn atg_logspace_tensor_tensor( + out__: *mut *mut C_tensor, + start_: *mut C_tensor, + end_: *mut C_tensor, + steps_: i64, + base_: f64, + options_kind: c_int, + options_device: c_int, + ); + pub fn atg_logspace_tensor_tensor_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + start_: *mut C_tensor, + end_: *mut C_tensor, + steps_: i64, + base_: f64, + ); pub fn atg_logsumexp( out__: *mut *mut C_tensor, self_: *mut C_tensor, @@ -9168,6 +9325,13 @@ extern "C" { mask_: *mut C_tensor, source_: *mut C_tensor, ); + pub fn atg_masked_scatter_backward( + out__: *mut *mut C_tensor, + grad_output_: *mut C_tensor, + mask_: *mut C_tensor, + sizes_data: *const i64, + sizes_len: c_int, + ); pub fn atg_masked_scatter_out( out__: *mut *mut C_tensor, out_: *mut C_tensor,