diff --git a/CHANGELOG.md b/CHANGELOG.md index 92300b29..bfd61687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Changed +## v0.16.0 +### Changed +- PyTorch v2.3 support + ## v0.15.0 ### Changed - PyTorch v2.2 support diff --git a/Cargo.toml b/Cargo.toml index 49286e9c..c1513dba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tch" -version = "0.15.0" +version = "0.16.0" authors = ["Laurent Mazare "] edition = "2021" build = "build.rs" @@ -22,7 +22,7 @@ libc = "0.2.0" ndarray = "0.15" rand = "0.8" thiserror = "1" -torch-sys = { version = "0.15.0", path = "torch-sys" } +torch-sys = { version = "0.16.0", path = "torch-sys" } zip = "0.6" half = "2" safetensors = "0.3.0" diff --git a/README.md b/README.md index 6761797a..7986e8d2 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ The code generation part for the C api on top of libtorch comes from ## Getting Started -This crate requires the C++ PyTorch library (libtorch) in version *v2.2.0* to be available on +This crate requires the C++ PyTorch library (libtorch) in version *v2.3.0* to be available on your system. You can either: - Use the system-wide libtorch installation (default). @@ -85,7 +85,7 @@ seem to include `libtorch.a` by default so this would have to be compiled manually, e.g. via the following: ```bash -git clone -b v2.2.0 --recurse-submodule https://github.com/pytorch/pytorch.git pytorch-static --depth 1 +git clone -b v2.3.0 --recurse-submodule https://github.com/pytorch/pytorch.git pytorch-static --depth 1 cd pytorch-static USE_CUDA=OFF BUILD_SHARED_LIBS=OFF python setup.py build # export LIBTORCH to point at the build directory in pytorch-static. diff --git a/examples/python-extension/Cargo.toml b/examples/python-extension/Cargo.toml index f56fe003..a312e10c 100644 --- a/examples/python-extension/Cargo.toml +++ b/examples/python-extension/Cargo.toml @@ -18,6 +18,6 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.21", features = ["extension-module"] } -pyo3-tch = { path = "../../pyo3-tch", version = "0.15.0" } -tch = { path = "../..", features = ["python-extension"], version = "0.15.0" } -torch-sys = { path = "../../torch-sys", features = ["python-extension"], version = "0.15.0" } +pyo3-tch = { path = "../../pyo3-tch", version = "0.16.0" } +tch = { path = "../..", features = ["python-extension"], version = "0.16.0" } +torch-sys = { path = "../../torch-sys", features = ["python-extension"], version = "0.16.0" } \ No newline at end of file diff --git a/gen/gen.ml b/gen/gen.ml index 009d82b5..b0357481 100644 --- a/gen/gen.ml +++ b/gen/gen.ml @@ -93,6 +93,7 @@ let excluded_prefixes = ; "_nested_tensor" ; "_fused_adam" ; "sym_" + ; "_fused_sgd" ] let excluded_suffixes = [ "_forward"; "_forward_out" ] @@ -878,7 +879,7 @@ let run let () = run - ~yaml_filename:"third_party/pytorch/Declarations-v2.2.0.yaml" + ~yaml_filename:"third_party/pytorch/Declarations-v2.3.0.yaml" ~cpp_filename:"torch-sys/libtch/torch_api_generated" ~ffi_filename:"torch-sys/src/c_generated.rs" ~wrapper_filename:"src/wrappers/tensor_generated.rs" diff --git a/pyo3-tch/Cargo.toml b/pyo3-tch/Cargo.toml index c1fde9f2..d833aff6 100644 --- a/pyo3-tch/Cargo.toml +++ b/pyo3-tch/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyo3-tch" -version = "0.15.0" +version = "0.16.0" authors = ["Laurent Mazare "] edition = "2021" build = "build.rs" @@ -12,6 +12,6 @@ categories = ["science"] license = "MIT/Apache-2.0" [dependencies] -tch = { path = "..", features = ["python-extension"], version = "0.15.0" } -torch-sys = { path = "../torch-sys", features = ["python-extension"], version = "0.15.0" } -pyo3 = { version = "0.21", features = ["extension-module"] } +tch = { path = "..", features = ["python-extension"], version = "0.16.0" } +torch-sys = { path = "../torch-sys", features = ["python-extension"], version = "0.16.0" } +pyo3 = { version = "0.21", features = ["extension-module"] } \ No newline at end of file diff --git a/src/wrappers/tensor_fallible_generated.rs b/src/wrappers/tensor_fallible_generated.rs index 4939ea59..c28a5f40 100644 --- a/src/wrappers/tensor_fallible_generated.rs +++ b/src/wrappers/tensor_fallible_generated.rs @@ -618,6 +618,18 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_assert_scalar>( + self_scalar: S, + assert_msg: &str, + ) -> Result<(), TchError> { + unsafe_torch_err!(atg__assert_scalar( + self_scalar.into().c_scalar, + assert_msg.as_ptr(), + assert_msg.len() as i32 + )); + Ok(()) + } + pub fn f_internal_assert_tensor_metadata( a: &Tensor, size: impl IntListOption, @@ -821,6 +833,40 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_chunk_cat>( + tensors: &[T], + dim: i64, + num_chunks: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__chunk_cat( + c_tensors.as_mut_ptr(), + ptr_list(tensors).as_ptr(), + tensors.len() as i32, + dim, + num_chunks + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_internal_chunk_cat_out>( + out: &Tensor, + tensors: &[T], + dim: i64, + num_chunks: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__chunk_cat_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + ptr_list(tensors).as_ptr(), + tensors.len() as i32, + dim, + num_chunks + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_coalesce(&self) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg__coalesce(c_tensors.as_mut_ptr(), self.c_tensor)); @@ -1280,6 +1326,7 @@ impl Tensor { alpha: Option, out_dtype: impl Into>, transpose_result: bool, + alg_id: i64, ) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg__cslt_sparse_mm( @@ -1289,11 +1336,34 @@ impl Tensor { bias.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), alpha.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), out_dtype.into().map_or(-1, |s| s.c_int()), - if transpose_result { 1 } else { 0 } + if transpose_result { 1 } else { 0 }, + alg_id )); Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_cslt_sparse_mm_search>( + compressed_a: &Tensor, + dense_b: &Tensor, + bias: Option, + alpha: Option, + out_dtype: impl Into>, + transpose_result: bool, + ) -> Result { + let return_; + unsafe_torch_err!( + return_ = atg__cslt_sparse_mm_search( + compressed_a.c_tensor, + dense_b.c_tensor, + bias.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + alpha.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + out_dtype.into().map_or(-1, |s| s.c_int()), + if transpose_result { 1 } else { 0 } + ) + ); + Ok(return_) + } + pub fn f_internal_ctc_loss( log_probs: &Tensor, targets: &Tensor, @@ -2729,6 +2799,22 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_functional_assert_scalar>( + self_scalar: S, + assert_msg: &str, + dep_token: &Tensor, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__functional_assert_scalar( + c_tensors.as_mut_ptr(), + self_scalar.into().c_scalar, + assert_msg.as_ptr(), + assert_msg.len() as i32, + dep_token.c_tensor + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_functional_sym_constrain_range>( size: S, min: impl Into>, @@ -3311,6 +3397,12 @@ impl Tensor { Ok(return_ != 0) } + pub fn f_internal_lazy_clone(&self) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__lazy_clone(c_tensors.as_mut_ptr(), self.c_tensor)); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_linalg_check_errors( info: &Tensor, api_name: &str, @@ -3392,6 +3484,12 @@ impl Tensor { Ok((Tensor { c_tensor: c_tensors[0] }, Tensor { c_tensor: c_tensors[1] })) } + pub fn f_internal_linalg_eigvals(&self) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__linalg_eigvals(c_tensors.as_mut_ptr(), self.c_tensor)); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_linalg_slogdet( a: &Tensor, ) -> Result<(Tensor, Tensor, Tensor, Tensor), TchError> { @@ -4522,6 +4620,52 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_nested_get_jagged_dummy(any: &Tensor) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_get_jagged_dummy(c_tensors.as_mut_ptr(), any.c_tensor)); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_internal_nested_get_lengths(&self) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_get_lengths(c_tensors.as_mut_ptr(), self.c_tensor)); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_internal_nested_get_offsets(&self) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_get_offsets(c_tensors.as_mut_ptr(), self.c_tensor)); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_internal_nested_get_ragged_idx(&self) -> Result { + let return_; + unsafe_torch_err!(return_ = atg__nested_get_ragged_idx(self.c_tensor)); + Ok(return_) + } + + pub fn f_internal_nested_get_values(&self) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_get_values(c_tensors.as_mut_ptr(), self.c_tensor)); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_internal_nested_get_values_copy(&self) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_get_values_copy(c_tensors.as_mut_ptr(), self.c_tensor)); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_internal_nested_get_values_copy_out(&self, out: &Tensor) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_get_values_copy_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + self.c_tensor + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_nested_select_backward( &self, grad_output: &Tensor, @@ -4610,6 +4754,65 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_nested_view_from_jagged>( + &self, + offsets: &Tensor, + dummy: &Tensor, + lengths: Option, + ragged_idx: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_view_from_jagged( + c_tensors.as_mut_ptr(), + self.c_tensor, + offsets.c_tensor, + dummy.c_tensor, + lengths.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + ragged_idx + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_internal_nested_view_from_jagged_copy>( + &self, + offsets: &Tensor, + dummy: &Tensor, + lengths: Option, + ragged_idx: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_view_from_jagged_copy( + c_tensors.as_mut_ptr(), + self.c_tensor, + offsets.c_tensor, + dummy.c_tensor, + lengths.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + ragged_idx + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + + pub fn f_internal_nested_view_from_jagged_copy_out>( + &self, + out: &Tensor, + offsets: &Tensor, + dummy: &Tensor, + lengths: Option, + ragged_idx: i64, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__nested_view_from_jagged_copy_out( + c_tensors.as_mut_ptr(), + out.c_tensor, + self.c_tensor, + offsets.c_tensor, + dummy.c_tensor, + lengths.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + ragged_idx + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_new_zeros_with_same_feature_meta( &self, other: &Tensor, @@ -4882,6 +5085,11 @@ impl Tensor { Ok((Tensor { c_tensor: c_tensors[0] }, Tensor { c_tensor: c_tensors[1] })) } + pub fn f_internal_print(s: &str) -> Result<(), TchError> { + unsafe_torch_err!(atg__print(s.as_ptr(), s.len() as i32)); + Ok(()) + } + pub fn f_internal_propagate_xla_data(&self, output: &Tensor) -> Result<(), TchError> { unsafe_torch_err!(atg__propagate_xla_data(self.c_tensor, output.c_tensor)); Ok(()) @@ -5092,6 +5300,36 @@ impl Tensor { Ok((Tensor { c_tensor: c_tensors[0] }, Tensor { c_tensor: c_tensors[1] })) } + pub fn f_internal_scaled_dot_product_cudnn_attention( + query: &Tensor, + key: &Tensor, + value: &Tensor, + dropout_p: f64, + is_causal: bool, + return_debug_mask: bool, + scale: impl Into>, + ) -> Result<(Tensor, Tensor, Tensor, Tensor), TchError> { + let scale = scale.into(); + let mut c_tensors = [std::ptr::null_mut(); 4]; + unsafe_torch_err!(atg__scaled_dot_product_cudnn_attention( + c_tensors.as_mut_ptr(), + query.c_tensor, + key.c_tensor, + value.c_tensor, + dropout_p, + if is_causal { 1 } else { 0 }, + if return_debug_mask { 1 } else { 0 }, + scale.unwrap_or(std::f64::NAN), + scale.is_none() as i8 + )); + Ok(( + Tensor { c_tensor: c_tensors[0] }, + Tensor { c_tensor: c_tensors[1] }, + Tensor { c_tensor: c_tensors[2] }, + Tensor { c_tensor: c_tensors[3] }, + )) + } + pub fn f_internal_scaled_dot_product_efficient_attention>( query: &Tensor, key: &Tensor, @@ -5169,6 +5407,66 @@ impl Tensor { )) } + pub fn f_internal_scaled_dot_product_flash_attention_for_cpu>( + query: &Tensor, + key: &Tensor, + value: &Tensor, + dropout_p: f64, + is_causal: bool, + attn_mask: Option, + scale: impl Into>, + ) -> Result<(Tensor, Tensor), TchError> { + let scale = scale.into(); + let mut c_tensors = [std::ptr::null_mut(); 2]; + unsafe_torch_err!(atg__scaled_dot_product_flash_attention_for_cpu( + c_tensors.as_mut_ptr(), + query.c_tensor, + key.c_tensor, + value.c_tensor, + dropout_p, + if is_causal { 1 } else { 0 }, + attn_mask.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + scale.unwrap_or(std::f64::NAN), + scale.is_none() as i8 + )); + Ok((Tensor { c_tensor: c_tensors[0] }, Tensor { c_tensor: c_tensors[1] })) + } + + pub fn f_internal_scaled_dot_product_flash_attention_for_cpu_backward>( + grad_out: &Tensor, + query: &Tensor, + key: &Tensor, + value: &Tensor, + out: &Tensor, + logsumexp: &Tensor, + dropout_p: f64, + is_causal: bool, + attn_mask: Option, + scale: impl Into>, + ) -> Result<(Tensor, Tensor, Tensor), TchError> { + let scale = scale.into(); + let mut c_tensors = [std::ptr::null_mut(); 3]; + unsafe_torch_err!(atg__scaled_dot_product_flash_attention_for_cpu_backward( + c_tensors.as_mut_ptr(), + grad_out.c_tensor, + query.c_tensor, + key.c_tensor, + value.c_tensor, + out.c_tensor, + logsumexp.c_tensor, + dropout_p, + if is_causal { 1 } else { 0 }, + attn_mask.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), + scale.unwrap_or(std::f64::NAN), + scale.is_none() as i8 + )); + Ok(( + Tensor { c_tensor: c_tensors[0] }, + Tensor { c_tensor: c_tensors[1] }, + Tensor { c_tensor: c_tensors[2] }, + )) + } + pub fn f_internal_scaled_mm>( &self, mat2: &Tensor, @@ -6037,6 +6335,7 @@ impl Tensor { meta: &Tensor, bias: Option, activation: &str, + out_dtype: impl Into>, ) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg__sparse_semi_structured_linear( @@ -6046,7 +6345,8 @@ impl Tensor { meta.c_tensor, bias.as_ref().map_or(std::ptr::null_mut(), |t| t.borrow().c_tensor), activation.as_ptr(), - activation.len() as i32 + activation.len() as i32, + out_dtype.into().map_or(-1, |s| s.c_int()) )); Ok(Tensor { c_tensor: c_tensors[0] }) } @@ -6579,6 +6879,21 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_test_parallel_materialize( + &self, + num_parallel: i64, + skip_first: bool, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__test_parallel_materialize( + c_tensors.as_mut_ptr(), + self.c_tensor, + num_parallel, + if skip_first { 1 } else { 0 } + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_test_serialization_subcmul( &self, other: &Tensor, @@ -8160,6 +8475,21 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_internal_weight_int8pack_mm( + &self, + mat2: &Tensor, + scales: &Tensor, + ) -> Result { + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg__weight_int8pack_mm( + c_tensors.as_mut_ptr(), + self.c_tensor, + mat2.c_tensor, + scales.c_tensor + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_internal_weight_norm(v: &Tensor, g: &Tensor, dim: i64) -> Result { let mut c_tensors = [std::ptr::null_mut(); 1]; unsafe_torch_err!(atg__weight_norm(c_tensors.as_mut_ptr(), v.c_tensor, g.c_tensor, dim)); @@ -30749,6 +31079,31 @@ impl Tensor { Ok(Tensor { c_tensor: c_tensors[0] }) } + pub fn f_slice_inverse( + &self, + src: &Tensor, + dim: i64, + start: impl Into>, + end: impl Into>, + step: i64, + ) -> Result { + let start = start.into(); + let end = end.into(); + let mut c_tensors = [std::ptr::null_mut(); 1]; + unsafe_torch_err!(atg_slice_inverse( + c_tensors.as_mut_ptr(), + self.c_tensor, + src.c_tensor, + dim, + start.unwrap_or(0i64), + start.is_none() as i8, + end.unwrap_or(0i64), + end.is_none() as i8, + step + )); + Ok(Tensor { c_tensor: c_tensors[0] }) + } + pub fn f_slice_scatter( &self, src: &Tensor, diff --git a/src/wrappers/tensor_generated.rs b/src/wrappers/tensor_generated.rs index a4471129..56a2217f 100644 --- a/src/wrappers/tensor_generated.rs +++ b/src/wrappers/tensor_generated.rs @@ -275,6 +275,10 @@ impl Tensor { .unwrap() } + pub fn internal_assert_scalar>(self_scalar: S, assert_msg: &str) { + Tensor::f_internal_assert_scalar(self_scalar, assert_msg).unwrap() + } + pub fn internal_assert_tensor_metadata( a: &Tensor, size: impl IntListOption, @@ -374,6 +378,23 @@ impl Tensor { self.f_internal_cholesky_solve_helper_out(out, a, upper).unwrap() } + pub fn internal_chunk_cat>( + tensors: &[T], + dim: i64, + num_chunks: i64, + ) -> Tensor { + Tensor::f_internal_chunk_cat(tensors, dim, num_chunks).unwrap() + } + + pub fn internal_chunk_cat_out>( + out: &Tensor, + tensors: &[T], + dim: i64, + num_chunks: i64, + ) -> Tensor { + Tensor::f_internal_chunk_cat_out(out, tensors, dim, num_chunks).unwrap() + } + pub fn internal_coalesce(&self) -> Tensor { self.f_internal_coalesce().unwrap() } @@ -645,6 +666,7 @@ impl Tensor { alpha: Option, out_dtype: impl Into>, transpose_result: bool, + alg_id: i64, ) -> Tensor { Tensor::f_internal_cslt_sparse_mm( compressed_a, @@ -653,6 +675,26 @@ impl Tensor { alpha, out_dtype, transpose_result, + alg_id, + ) + .unwrap() + } + + pub fn internal_cslt_sparse_mm_search>( + compressed_a: &Tensor, + dense_b: &Tensor, + bias: Option, + alpha: Option, + out_dtype: impl Into>, + transpose_result: bool, + ) -> i64 { + Tensor::f_internal_cslt_sparse_mm_search( + compressed_a, + dense_b, + bias, + alpha, + out_dtype, + transpose_result, ) .unwrap() } @@ -1727,6 +1769,14 @@ impl Tensor { self.f_internal_functional_assert_async(assert_msg, dep_token).unwrap() } + pub fn internal_functional_assert_scalar>( + self_scalar: S, + assert_msg: &str, + dep_token: &Tensor, + ) -> Tensor { + Tensor::f_internal_functional_assert_scalar(self_scalar, assert_msg, dep_token).unwrap() + } + pub fn internal_functional_sym_constrain_range>( size: S, min: impl Into>, @@ -2079,6 +2129,10 @@ impl Tensor { self.f_internal_is_zerotensor().unwrap() } + pub fn internal_lazy_clone(&self) -> Tensor { + self.f_internal_lazy_clone().unwrap() + } + pub fn internal_linalg_check_errors(info: &Tensor, api_name: &str, is_matrix: bool) { Tensor::f_internal_linalg_check_errors(info, api_name, is_matrix).unwrap() } @@ -2111,6 +2165,10 @@ impl Tensor { .unwrap() } + pub fn internal_linalg_eigvals(&self) -> Tensor { + self.f_internal_linalg_eigvals().unwrap() + } + pub fn internal_linalg_slogdet(a: &Tensor) -> (Tensor, Tensor, Tensor, Tensor) { Tensor::f_internal_linalg_slogdet(a).unwrap() } @@ -2755,6 +2813,34 @@ impl Tensor { .unwrap() } + pub fn internal_nested_get_jagged_dummy(any: &Tensor) -> Tensor { + Tensor::f_internal_nested_get_jagged_dummy(any).unwrap() + } + + pub fn internal_nested_get_lengths(&self) -> Tensor { + self.f_internal_nested_get_lengths().unwrap() + } + + pub fn internal_nested_get_offsets(&self) -> Tensor { + self.f_internal_nested_get_offsets().unwrap() + } + + pub fn internal_nested_get_ragged_idx(&self) -> i64 { + self.f_internal_nested_get_ragged_idx().unwrap() + } + + pub fn internal_nested_get_values(&self) -> Tensor { + self.f_internal_nested_get_values().unwrap() + } + + pub fn internal_nested_get_values_copy(&self) -> Tensor { + self.f_internal_nested_get_values_copy().unwrap() + } + + pub fn internal_nested_get_values_copy_out(&self, out: &Tensor) -> Tensor { + self.f_internal_nested_get_values_copy_out(out).unwrap() + } + pub fn internal_nested_select_backward( &self, grad_output: &Tensor, @@ -2802,6 +2888,38 @@ impl Tensor { .unwrap() } + pub fn internal_nested_view_from_jagged>( + &self, + offsets: &Tensor, + dummy: &Tensor, + lengths: Option, + ragged_idx: i64, + ) -> Tensor { + self.f_internal_nested_view_from_jagged(offsets, dummy, lengths, ragged_idx).unwrap() + } + + pub fn internal_nested_view_from_jagged_copy>( + &self, + offsets: &Tensor, + dummy: &Tensor, + lengths: Option, + ragged_idx: i64, + ) -> Tensor { + self.f_internal_nested_view_from_jagged_copy(offsets, dummy, lengths, ragged_idx).unwrap() + } + + pub fn internal_nested_view_from_jagged_copy_out>( + &self, + out: &Tensor, + offsets: &Tensor, + dummy: &Tensor, + lengths: Option, + ragged_idx: i64, + ) -> Tensor { + self.f_internal_nested_view_from_jagged_copy_out(out, offsets, dummy, lengths, ragged_idx) + .unwrap() + } + pub fn internal_new_zeros_with_same_feature_meta( &self, other: &Tensor, @@ -2941,6 +3059,10 @@ impl Tensor { self.f_internal_prelu_kernel_backward(grad_output, weight).unwrap() } + pub fn internal_print(s: &str) { + Tensor::f_internal_print(s).unwrap() + } + pub fn internal_propagate_xla_data(&self, output: &Tensor) { self.f_internal_propagate_xla_data(output).unwrap() } @@ -3034,6 +3156,27 @@ impl Tensor { .unwrap() } + pub fn internal_scaled_dot_product_cudnn_attention( + query: &Tensor, + key: &Tensor, + value: &Tensor, + dropout_p: f64, + is_causal: bool, + return_debug_mask: bool, + scale: impl Into>, + ) -> (Tensor, Tensor, Tensor, Tensor) { + Tensor::f_internal_scaled_dot_product_cudnn_attention( + query, + key, + value, + dropout_p, + is_causal, + return_debug_mask, + scale, + ) + .unwrap() + } + pub fn internal_scaled_dot_product_efficient_attention>( query: &Tensor, key: &Tensor, @@ -3094,6 +3237,39 @@ impl Tensor { .unwrap() } + pub fn internal_scaled_dot_product_flash_attention_for_cpu>( + query: &Tensor, + key: &Tensor, + value: &Tensor, + dropout_p: f64, + is_causal: bool, + attn_mask: Option, + scale: impl Into>, + ) -> (Tensor, Tensor) { + Tensor::f_internal_scaled_dot_product_flash_attention_for_cpu( + query, key, value, dropout_p, is_causal, attn_mask, scale, + ) + .unwrap() + } + + pub fn internal_scaled_dot_product_flash_attention_for_cpu_backward>( + grad_out: &Tensor, + query: &Tensor, + key: &Tensor, + value: &Tensor, + out: &Tensor, + logsumexp: &Tensor, + dropout_p: f64, + is_causal: bool, + attn_mask: Option, + scale: impl Into>, + ) -> (Tensor, Tensor, Tensor) { + Tensor::f_internal_scaled_dot_product_flash_attention_for_cpu_backward( + grad_out, query, key, value, out, logsumexp, dropout_p, is_causal, attn_mask, scale, + ) + .unwrap() + } + pub fn internal_scaled_mm>( &self, mat2: &Tensor, @@ -3596,8 +3772,10 @@ impl Tensor { meta: &Tensor, bias: Option, activation: &str, + out_dtype: impl Into>, ) -> Tensor { - self.f_internal_sparse_semi_structured_linear(weight, meta, bias, activation).unwrap() + self.f_internal_sparse_semi_structured_linear(weight, meta, bias, activation, out_dtype) + .unwrap() } pub fn internal_sparse_softmax(&self, dim: i64, half_to_float: bool) -> Tensor { @@ -3806,6 +3984,14 @@ impl Tensor { Tensor::f_internal_test_optional_intlist_out(out, values, addends).unwrap() } + pub fn internal_test_parallel_materialize( + &self, + num_parallel: i64, + skip_first: bool, + ) -> Tensor { + self.f_internal_test_parallel_materialize(num_parallel, skip_first).unwrap() + } + pub fn internal_test_serialization_subcmul(&self, other: &Tensor) -> Tensor { self.f_internal_test_serialization_subcmul(other).unwrap() } @@ -4708,6 +4894,10 @@ impl Tensor { self.f_internal_weight_int4pack_mm(mat2, qgroupsize, qscaleandzeros).unwrap() } + pub fn internal_weight_int8pack_mm(&self, mat2: &Tensor, scales: &Tensor) -> Tensor { + self.f_internal_weight_int8pack_mm(mat2, scales).unwrap() + } + pub fn internal_weight_norm(v: &Tensor, g: &Tensor, dim: i64) -> Tensor { Tensor::f_internal_weight_norm(v, g, dim).unwrap() } @@ -15651,6 +15841,17 @@ impl Tensor { self.f_slice_copy_tensor_out(out, dim, start, end, step).unwrap() } + pub fn slice_inverse( + &self, + src: &Tensor, + dim: i64, + start: impl Into>, + end: impl Into>, + step: i64, + ) -> Tensor { + self.f_slice_inverse(src, dim, start, end, step).unwrap() + } + pub fn slice_scatter( &self, src: &Tensor, diff --git a/torch-sys/Cargo.toml b/torch-sys/Cargo.toml index 5990458d..b23bf0e9 100644 --- a/torch-sys/Cargo.toml +++ b/torch-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "torch-sys" -version = "0.15.0" +version = "0.16.0" authors = ["Laurent Mazare "] edition = "2021" build = "build.rs" diff --git a/torch-sys/build.rs b/torch-sys/build.rs index c66087ca..637bb50e 100644 --- a/torch-sys/build.rs +++ b/torch-sys/build.rs @@ -10,7 +10,7 @@ use anyhow::{Context, Result}; use std::path::{Path, PathBuf}; use std::{env, fs, io}; -const TORCH_VERSION: &str = "2.2.0"; +const TORCH_VERSION: &str = "2.3.0"; const PYTHON_PRINT_PYTORCH_DETAILS: &str = r" import torch from torch.utils import cpp_extension @@ -158,7 +158,7 @@ fn version_check(version: &str) -> Result<()> { return Ok(()); } let version = version.trim(); - // Typical version number is 2.2.0+cpu or 2.2.0+cu121 + // Typical version number is 2.3.0+cpu or 2.3.0+cu121 let version = match version.split_once('+') { None => version, Some((version, _)) => version, diff --git a/torch-sys/libtch/torch_api_generated.cpp b/torch-sys/libtch/torch_api_generated.cpp index bbf5d251..b54d6b0c 100644 --- a/torch-sys/libtch/torch_api_generated.cpp +++ b/torch-sys/libtch/torch_api_generated.cpp @@ -342,6 +342,12 @@ void atg__amp_update_scale_out(tensor *out__, tensor out, tensor self, tensor gr ) } +void atg__assert_scalar(scalar self_scalar, char* assert_msg_ptr, int assert_msg_len) { + PROTECT( + torch::_assert_scalar(*self_scalar, std::string(assert_msg_ptr, assert_msg_len)); + ) +} + void atg__assert_tensor_metadata(tensor a, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int dtype) { PROTECT( torch::_assert_tensor_metadata(*a, size_data == nullptr ? c10::nullopt : c10::optional(torch::IntArrayRef(size_data, size_len)), stride_data == nullptr ? c10::nullopt : c10::optional(torch::IntArrayRef(stride_data, stride_len)), dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(dtype))); @@ -446,6 +452,20 @@ void atg__cholesky_solve_helper_out(tensor *out__, tensor out, tensor self, tens ) } +void atg__chunk_cat(tensor *out__, tensor *tensors_data, int tensors_len, int64_t dim, int64_t num_chunks) { + PROTECT( + auto outputs__ = torch::_chunk_cat(of_carray_tensor(tensors_data, tensors_len), dim, num_chunks); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg__chunk_cat_out(tensor *out__, tensor out, tensor *tensors_data, int tensors_len, int64_t dim, int64_t num_chunks) { + PROTECT( + auto outputs__ = torch::_chunk_cat_out(*out, of_carray_tensor(tensors_data, tensors_len), dim, num_chunks); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__coalesce(tensor *out__, tensor self) { PROTECT( auto outputs__ = torch::_coalesce(*self); @@ -642,13 +662,20 @@ void atg__cslt_compress(tensor *out__, tensor input) { ) } -void atg__cslt_sparse_mm(tensor *out__, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result) { +void atg__cslt_sparse_mm(tensor *out__, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result, int64_t alg_id) { PROTECT( - auto outputs__ = torch::_cslt_sparse_mm(*compressed_A, *dense_B, (bias ? *bias : torch::Tensor()), (alpha ? *alpha : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (bool)transpose_result); + auto outputs__ = torch::_cslt_sparse_mm(*compressed_A, *dense_B, (bias ? *bias : torch::Tensor()), (alpha ? *alpha : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (bool)transpose_result, alg_id); out__[0] = new torch::Tensor(outputs__); ) } +int64_t atg__cslt_sparse_mm_search(tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result) { + PROTECT( + return torch::_cslt_sparse_mm_search(*compressed_A, *dense_B, (bias ? *bias : torch::Tensor()), (alpha ? *alpha : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (bool)transpose_result); + ) + return 0; +} + void atg__ctc_loss(tensor *out__, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity) { PROTECT( auto outputs__ = torch::_ctc_loss(*log_probs, *targets, torch::IntArrayRef(input_lengths_data, input_lengths_len), torch::IntArrayRef(target_lengths_data, target_lengths_len), blank, (bool)zero_infinity); @@ -1107,6 +1134,13 @@ void atg__functional_assert_async(tensor *out__, tensor self, char* assert_msg_p ) } +void atg__functional_assert_scalar(tensor *out__, scalar self_scalar, char* assert_msg_ptr, int assert_msg_len, tensor dep_token) { + PROTECT( + auto outputs__ = torch::_functional_assert_scalar(*self_scalar, std::string(assert_msg_ptr, assert_msg_len), *dep_token); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__functional_sym_constrain_range(tensor *out__, scalar size, int64_t min_v, uint8_t min_null, int64_t max_v, uint8_t max_null, tensor dep_token) { PROTECT( auto outputs__ = torch::_functional_sym_constrain_range(*size, min_null ? c10::nullopt : c10::optional(min_v), max_null ? c10::nullopt : c10::optional(max_v), *dep_token); @@ -1360,6 +1394,13 @@ int atg__is_zerotensor(tensor self) { return 0; } +void atg__lazy_clone(tensor *out__, tensor self) { + PROTECT( + auto outputs__ = torch::_lazy_clone(*self); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__linalg_check_errors(tensor info, char* api_name_ptr, int api_name_len, int is_matrix) { PROTECT( torch::_linalg_check_errors(*info, std::string(api_name_ptr, api_name_len), (bool)is_matrix); @@ -1400,6 +1441,13 @@ void atg__linalg_eigh_eigenvalues(tensor *out__, tensor eigenvalues, tensor eige ) } +void atg__linalg_eigvals(tensor *out__, tensor self) { + PROTECT( + auto outputs__ = torch::_linalg_eigvals(*self); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__linalg_slogdet(tensor *out__, tensor A) { PROTECT( auto outputs__ = torch::_linalg_slogdet(*A); @@ -1831,6 +1879,55 @@ void atg__nested_from_padded_out(tensor *out__, tensor out, tensor padded, tenso ) } +void atg__nested_get_jagged_dummy(tensor *out__, tensor any) { + PROTECT( + auto outputs__ = torch::_nested_get_jagged_dummy(*any); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg__nested_get_lengths(tensor *out__, tensor self) { + PROTECT( + auto outputs__ = torch::_nested_get_lengths(*self); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg__nested_get_offsets(tensor *out__, tensor self) { + PROTECT( + auto outputs__ = torch::_nested_get_offsets(*self); + out__[0] = new torch::Tensor(outputs__); + ) +} + +int64_t atg__nested_get_ragged_idx(tensor self) { + PROTECT( + return torch::_nested_get_ragged_idx(*self); + ) + return 0; +} + +void atg__nested_get_values(tensor *out__, tensor self) { + PROTECT( + auto outputs__ = torch::_nested_get_values(*self); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg__nested_get_values_copy(tensor *out__, tensor self) { + PROTECT( + auto outputs__ = torch::_nested_get_values_copy(*self); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg__nested_get_values_copy_out(tensor *out__, tensor out, tensor self) { + PROTECT( + auto outputs__ = torch::_nested_get_values_copy_out(*out, *self); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__nested_select_backward(tensor *out__, tensor grad_output, tensor self, int64_t dim, int64_t index) { PROTECT( auto outputs__ = torch::_nested_select_backward(*grad_output, *self, dim, index); @@ -1866,6 +1963,27 @@ void atg__nested_view_from_buffer_copy_out(tensor *out__, tensor out, tensor sel ) } +void atg__nested_view_from_jagged(tensor *out__, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx) { + PROTECT( + auto outputs__ = torch::_nested_view_from_jagged(*self, *offsets, *dummy, (lengths ? *lengths : torch::Tensor()), ragged_idx); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg__nested_view_from_jagged_copy(tensor *out__, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx) { + PROTECT( + auto outputs__ = torch::_nested_view_from_jagged_copy(*self, *offsets, *dummy, (lengths ? *lengths : torch::Tensor()), ragged_idx); + out__[0] = new torch::Tensor(outputs__); + ) +} + +void atg__nested_view_from_jagged_copy_out(tensor *out__, tensor out, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx) { + PROTECT( + auto outputs__ = torch::_nested_view_from_jagged_copy_out(*out, *self, *offsets, *dummy, (lengths ? *lengths : torch::Tensor()), ragged_idx); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__new_zeros_with_same_feature_meta(tensor *out__, tensor self, tensor other, int64_t self_num_batch_dims) { PROTECT( auto outputs__ = torch::_new_zeros_with_same_feature_meta(*self, *other, self_num_batch_dims); @@ -1996,6 +2114,12 @@ void atg__prelu_kernel_backward(tensor *out__, tensor grad_output, tensor self, ) } +void atg__print(char* s_ptr, int s_len) { + PROTECT( + torch::_print(std::string(s_ptr, s_len)); + ) +} + void atg__propagate_xla_data(tensor input, tensor output) { PROTECT( torch::_propagate_xla_data(*input, *output); @@ -2102,6 +2226,16 @@ void atg__scaled_dot_product_attention_math(tensor *out__, tensor query, tensor ) } +void atg__scaled_dot_product_cudnn_attention(tensor *out__, tensor query, tensor key, tensor value, double dropout_p, int is_causal, int return_debug_mask, double scale_v, uint8_t scale_null) { + PROTECT( + auto outputs__ = torch::_scaled_dot_product_cudnn_attention(*query, *key, *value, dropout_p, (bool)is_causal, (bool)return_debug_mask, scale_null ? c10::nullopt : c10::optional(scale_v)); + out__[0] = new torch::Tensor(std::get<0>(outputs__)); + out__[1] = new torch::Tensor(std::get<1>(outputs__)); + out__[2] = new torch::Tensor(std::get<2>(outputs__)); + out__[3] = new torch::Tensor(std::get<3>(outputs__)); + ) +} + void atg__scaled_dot_product_efficient_attention(tensor *out__, tensor query, tensor key, tensor value, tensor attn_bias, int compute_log_sumexp, double dropout_p, int is_causal, double scale_v, uint8_t scale_null) { PROTECT( auto outputs__ = torch::_scaled_dot_product_efficient_attention(*query, *key, *value, (attn_bias ? *attn_bias : torch::Tensor()), (bool)compute_log_sumexp, dropout_p, (bool)is_causal, scale_null ? c10::nullopt : c10::optional(scale_v)); @@ -2121,6 +2255,23 @@ void atg__scaled_dot_product_flash_attention_backward(tensor *out__, tensor grad ) } +void atg__scaled_dot_product_flash_attention_for_cpu(tensor *out__, tensor query, tensor key, tensor value, double dropout_p, int is_causal, tensor attn_mask, double scale_v, uint8_t scale_null) { + PROTECT( + auto outputs__ = torch::_scaled_dot_product_flash_attention_for_cpu(*query, *key, *value, dropout_p, (bool)is_causal, (attn_mask ? *attn_mask : torch::Tensor()), scale_null ? c10::nullopt : c10::optional(scale_v)); + out__[0] = new torch::Tensor(std::get<0>(outputs__)); + out__[1] = new torch::Tensor(std::get<1>(outputs__)); + ) +} + +void atg__scaled_dot_product_flash_attention_for_cpu_backward(tensor *out__, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, double dropout_p, int is_causal, tensor attn_mask, double scale_v, uint8_t scale_null) { + PROTECT( + auto outputs__ = torch::_scaled_dot_product_flash_attention_for_cpu_backward(*grad_out, *query, *key, *value, *out, *logsumexp, dropout_p, (bool)is_causal, (attn_mask ? *attn_mask : torch::Tensor()), scale_null ? c10::nullopt : c10::optional(scale_v)); + out__[0] = new torch::Tensor(std::get<0>(outputs__)); + out__[1] = new torch::Tensor(std::get<1>(outputs__)); + out__[2] = new torch::Tensor(std::get<2>(outputs__)); + ) +} + void atg__scaled_mm(tensor *out__, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum) { PROTECT( auto outputs__ = torch::_scaled_mm(*self, *mat2, (bias ? *bias : torch::Tensor()), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype)), (scale_a ? *scale_a : torch::Tensor()), (scale_b ? *scale_b : torch::Tensor()), (scale_result ? *scale_result : torch::Tensor()), (bool)use_fast_accum); @@ -2449,9 +2600,9 @@ void atg__sparse_mm_reduce_impl(tensor *out__, tensor self, tensor other, char* ) } -void atg__sparse_semi_structured_linear(tensor *out__, tensor input, tensor weight, tensor meta, tensor bias, char* activation_ptr, int activation_len) { +void atg__sparse_semi_structured_linear(tensor *out__, tensor input, tensor weight, tensor meta, tensor bias, char* activation_ptr, int activation_len, int out_dtype) { PROTECT( - auto outputs__ = torch::_sparse_semi_structured_linear(*input, *weight, *meta, (bias ? *bias : torch::Tensor()), std::string(activation_ptr, activation_len)); + auto outputs__ = torch::_sparse_semi_structured_linear(*input, *weight, *meta, (bias ? *bias : torch::Tensor()), std::string(activation_ptr, activation_len), out_dtype < 0 ? c10::nullopt : c10::optional(at::ScalarType(out_dtype))); out__[0] = new torch::Tensor(outputs__); ) } @@ -2729,6 +2880,13 @@ void atg__test_optional_intlist_out(tensor *out__, tensor out, tensor values, in ) } +void atg__test_parallel_materialize(tensor *out__, tensor self, int64_t num_parallel, int skip_first) { + PROTECT( + auto outputs__ = torch::_test_parallel_materialize(*self, num_parallel, (bool)skip_first); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__test_serialization_subcmul(tensor *out__, tensor self, tensor other) { PROTECT( auto outputs__ = torch::_test_serialization_subcmul(*self, *other); @@ -3301,6 +3459,13 @@ void atg__weight_int4pack_mm(tensor *out__, tensor self, tensor mat2, int64_t qG ) } +void atg__weight_int8pack_mm(tensor *out__, tensor self, tensor mat2, tensor scales) { + PROTECT( + auto outputs__ = torch::_weight_int8pack_mm(*self, *mat2, *scales); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg__weight_norm(tensor *out__, tensor v, tensor g, int64_t dim) { PROTECT( auto outputs__ = torch::_weight_norm(*v, *g, dim); @@ -14954,6 +15119,13 @@ void atg_slice_copy_tensor_out(tensor *out__, tensor out, tensor self, int64_t d ) } +void atg_slice_inverse(tensor *out__, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step) { + PROTECT( + auto outputs__ = torch::slice_inverse(*self, *src, dim, start_null ? c10::nullopt : c10::optional(start_v), end_null ? c10::nullopt : c10::optional(end_v), step); + out__[0] = new torch::Tensor(outputs__); + ) +} + void atg_slice_scatter(tensor *out__, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step) { PROTECT( auto outputs__ = torch::slice_scatter(*self, *src, dim, start_null ? c10::nullopt : c10::optional(start_v), end_null ? c10::nullopt : c10::optional(end_v), step); diff --git a/torch-sys/libtch/torch_api_generated.h b/torch-sys/libtch/torch_api_generated.h index 97beb08c..da3bc167 100644 --- a/torch-sys/libtch/torch_api_generated.h +++ b/torch-sys/libtch/torch_api_generated.h @@ -50,6 +50,7 @@ void atg__aminmax_out(tensor *, tensor out0, tensor out1, tensor self); void atg__amp_update_scale(tensor *, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); void atg__amp_update_scale_(tensor *, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); void atg__amp_update_scale_out(tensor *, tensor out, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); +void atg__assert_scalar(scalar self_scalar, char* assert_msg_ptr, int assert_msg_len); void atg__assert_tensor_metadata(tensor a, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int dtype); void atg__autocast_to_full_precision(tensor *, tensor self, int cuda_enabled, int cpu_enabled); void atg__autocast_to_reduced_precision(tensor *, tensor self, int cuda_enabled, int cpu_enabled, int cuda_dtype, int cpu_dtype); @@ -65,6 +66,8 @@ void atg__cdist_backward(tensor *, tensor grad, tensor x1, tensor x2, double p, void atg__cdist_backward_out(tensor *, tensor out, tensor grad, tensor x1, tensor x2, double p, tensor cdist); void atg__cholesky_solve_helper(tensor *, tensor self, tensor A, int upper); void atg__cholesky_solve_helper_out(tensor *, tensor out, tensor self, tensor A, int upper); +void atg__chunk_cat(tensor *, tensor *tensors_data, int tensors_len, int64_t dim, int64_t num_chunks); +void atg__chunk_cat_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim, int64_t num_chunks); void atg__coalesce(tensor *, tensor self); void atg__coalesce_out(tensor *, tensor out, tensor self); void atg__coalesced(tensor *, tensor self, int coalesced); @@ -93,7 +96,8 @@ void atg__copy_from_and_resize(tensor *, tensor self, tensor dst); void atg__copy_from_and_resize_out(tensor *, tensor out, tensor self, tensor dst); void atg__copy_from_out(tensor *, tensor out, tensor self, tensor dst, int non_blocking); void atg__cslt_compress(tensor *, tensor input); -void atg__cslt_sparse_mm(tensor *, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result); +void atg__cslt_sparse_mm(tensor *, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result, int64_t alg_id); +int64_t atg__cslt_sparse_mm_search(tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result); void atg__ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity); void atg__ctc_loss_backward(tensor *, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity); void atg__ctc_loss_backward_out(tensor *, tensor out, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity); @@ -154,6 +158,7 @@ void atg__flash_attention_backward(tensor *, tensor grad_out, tensor query, tens void atg__foobar(tensor *, tensor self, int arg1, int arg2, int arg3); void atg__foobar_out(tensor *, tensor out, tensor self, int arg1, int arg2, int arg3); void atg__functional_assert_async(tensor *, tensor self, char* assert_msg_ptr, int assert_msg_len, tensor dep_token); +void atg__functional_assert_scalar(tensor *, scalar self_scalar, char* assert_msg_ptr, int assert_msg_len, tensor dep_token); void atg__functional_sym_constrain_range(tensor *, scalar size, int64_t min_v, uint8_t min_null, int64_t max_v, uint8_t max_null, tensor dep_token); void atg__functional_sym_constrain_range_for_size(tensor *, scalar size, int64_t min_v, uint8_t min_null, int64_t max_v, uint8_t max_null, tensor dep_token); void atg__fused_dropout(tensor *, tensor self, double p); @@ -188,11 +193,13 @@ void atg__int_mm_out(tensor *, tensor out, tensor self, tensor mat2); void atg__is_all_true(tensor *, tensor self); void atg__is_any_true(tensor *, tensor self); int atg__is_zerotensor(tensor self); +void atg__lazy_clone(tensor *, tensor self); void atg__linalg_check_errors(tensor info, char* api_name_ptr, int api_name_len, int is_matrix); void atg__linalg_det(tensor *, tensor A); void atg__linalg_det_result(tensor *, tensor result, tensor LU, tensor pivots, tensor A); void atg__linalg_eigh(tensor *, tensor A, char* UPLO_ptr, int UPLO_len, int compute_v); void atg__linalg_eigh_eigenvalues(tensor *, tensor eigenvalues, tensor eigenvectors, tensor A, char* UPLO_ptr, int UPLO_len, int compute_v); +void atg__linalg_eigvals(tensor *, tensor self); void atg__linalg_slogdet(tensor *, tensor A); void atg__linalg_slogdet_sign(tensor *, tensor sign, tensor logabsdet, tensor LU, tensor pivots, tensor A); void atg__linalg_solve_ex(tensor *, tensor A, tensor B, int left, int check_errors); @@ -248,11 +255,21 @@ void atg__nested_from_padded(tensor *, tensor padded, tensor cpu_nested_shape_ex void atg__nested_from_padded_and_nested_example(tensor *, tensor padded, tensor nt_example); void atg__nested_from_padded_and_nested_example_out(tensor *, tensor out, tensor padded, tensor nt_example); void atg__nested_from_padded_out(tensor *, tensor out, tensor padded, tensor cpu_nested_shape_example, int fuse_transform_0213); +void atg__nested_get_jagged_dummy(tensor *, tensor any); +void atg__nested_get_lengths(tensor *, tensor self); +void atg__nested_get_offsets(tensor *, tensor self); +int64_t atg__nested_get_ragged_idx(tensor self); +void atg__nested_get_values(tensor *, tensor self); +void atg__nested_get_values_copy(tensor *, tensor self); +void atg__nested_get_values_copy_out(tensor *, tensor out, tensor self); void atg__nested_select_backward(tensor *, tensor grad_output, tensor self, int64_t dim, int64_t index); void atg__nested_sum_backward(tensor *, tensor grad, tensor self, int64_t *dim_data, int dim_len, int keepdim); void atg__nested_view_from_buffer(tensor *, tensor self, tensor nested_size, tensor nested_strides, tensor offsets); void atg__nested_view_from_buffer_copy(tensor *, tensor self, tensor nested_size, tensor nested_strides, tensor offsets); void atg__nested_view_from_buffer_copy_out(tensor *, tensor out, tensor self, tensor nested_size, tensor nested_strides, tensor offsets); +void atg__nested_view_from_jagged(tensor *, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx); +void atg__nested_view_from_jagged_copy(tensor *, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx); +void atg__nested_view_from_jagged_copy_out(tensor *, tensor out, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx); void atg__new_zeros_with_same_feature_meta(tensor *, tensor self, tensor other, int64_t self_num_batch_dims); void atg__new_zeros_with_same_feature_meta_out(tensor *, tensor out, tensor self, tensor other, int64_t self_num_batch_dims); int atg__nnpack_available(); @@ -271,6 +288,7 @@ void atg__pin_memory(tensor *, tensor self, int device); void atg__pin_memory_out(tensor *, tensor out, tensor self, int device); void atg__prelu_kernel(tensor *, tensor self, tensor weight); void atg__prelu_kernel_backward(tensor *, tensor grad_output, tensor self, tensor weight); +void atg__print(char* s_ptr, int s_len); void atg__propagate_xla_data(tensor input, tensor output); void atg__remove_batch_dim(tensor *, tensor self, int64_t level, int64_t batch_size, int64_t out_dim); void atg__reshape_alias(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len); @@ -286,8 +304,11 @@ void atg__sample_dirichlet(tensor *, tensor self); void atg__sample_dirichlet_out(tensor *, tensor out, tensor self); void atg__saturate_weight_to_fp16(tensor *, tensor weight); void atg__scaled_dot_product_attention_math(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, tensor dropout_mask, double scale_v, uint8_t scale_null); +void atg__scaled_dot_product_cudnn_attention(tensor *, tensor query, tensor key, tensor value, double dropout_p, int is_causal, int return_debug_mask, double scale_v, uint8_t scale_null); void atg__scaled_dot_product_efficient_attention(tensor *, tensor query, tensor key, tensor value, tensor attn_bias, int compute_log_sumexp, double dropout_p, int is_causal, double scale_v, uint8_t scale_null); void atg__scaled_dot_product_flash_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, tensor philox_seed, tensor philox_offset, double scale_v, uint8_t scale_null); +void atg__scaled_dot_product_flash_attention_for_cpu(tensor *, tensor query, tensor key, tensor value, double dropout_p, int is_causal, tensor attn_mask, double scale_v, uint8_t scale_null); +void atg__scaled_dot_product_flash_attention_for_cpu_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, double dropout_p, int is_causal, tensor attn_mask, double scale_v, uint8_t scale_null); void atg__scaled_mm(tensor *, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum); void atg__scaled_mm_out(tensor *, tensor out, tensor out_amax, tensor self, tensor mat2, tensor bias, int out_dtype, tensor scale_a, tensor scale_b, tensor scale_result, int use_fast_accum); void atg__scatter_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self); @@ -334,7 +355,7 @@ void atg__sparse_mask_projection_out(tensor *, tensor out, tensor self, tensor m void atg__sparse_mm(tensor *, tensor sparse, tensor dense); void atg__sparse_mm_reduce(tensor *, tensor sparse, tensor dense, char* reduce_ptr, int reduce_len); void atg__sparse_mm_reduce_impl(tensor *, tensor self, tensor other, char* reduce_ptr, int reduce_len); -void atg__sparse_semi_structured_linear(tensor *, tensor input, tensor weight, tensor meta, tensor bias, char* activation_ptr, int activation_len); +void atg__sparse_semi_structured_linear(tensor *, tensor input, tensor weight, tensor meta, tensor bias, char* activation_ptr, int activation_len, int out_dtype); void atg__sparse_softmax(tensor *, tensor self, int64_t dim, int half_to_float); void atg__sparse_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self); void atg__sparse_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, tensor self); @@ -374,6 +395,7 @@ void atg__test_optional_floatlist(tensor *, tensor values, double *addends_data, void atg__test_optional_floatlist_out(tensor *, tensor out, tensor values, double *addends_data, int addends_len); void atg__test_optional_intlist(tensor *, tensor values, int64_t *addends_data, int addends_len); void atg__test_optional_intlist_out(tensor *, tensor out, tensor values, int64_t *addends_data, int addends_len); +void atg__test_parallel_materialize(tensor *, tensor self, int64_t num_parallel, int skip_first); void atg__test_serialization_subcmul(tensor *, tensor self, tensor other); void atg__test_string_default(tensor *, tensor dummy, char* a_ptr, int a_len, char* b_ptr, int b_len); void atg__test_warn_in_autograd(tensor *, tensor self); @@ -454,6 +476,7 @@ void atg__values_copy(tensor *, tensor self); void atg__values_copy_out(tensor *, tensor out, tensor self); int64_t atg__version(tensor self); void atg__weight_int4pack_mm(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros); +void atg__weight_int8pack_mm(tensor *, tensor self, tensor mat2, tensor scales); void atg__weight_norm(tensor *, tensor v, tensor g, int64_t dim); void atg__weight_norm_differentiable_backward(tensor *, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim); void atg__weight_norm_interface(tensor *, tensor v, tensor g, int64_t dim); @@ -2079,6 +2102,7 @@ void atg_slice_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, void atg_slice_backward_out(tensor *, tensor out, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t start, int64_t end, int64_t step); void atg_slice_copy(tensor *, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step); void atg_slice_copy_tensor_out(tensor *, tensor out, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step); +void atg_slice_inverse(tensor *, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step); void atg_slice_scatter(tensor *, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step); void atg_slice_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step); void atg_slogdet(tensor *, tensor self); diff --git a/torch-sys/src/c_generated.rs b/torch-sys/src/c_generated.rs index 883f61ad..a84e9808 100644 --- a/torch-sys/src/c_generated.rs +++ b/torch-sys/src/c_generated.rs @@ -230,6 +230,11 @@ extern "C" { scale_backoff_factor_: f64, growth_interval_: i64, ); + pub fn atg__assert_scalar( + self_scalar_: *mut C_scalar, + assert_msg_ptr: *const u8, + assert_msg_len: c_int, + ); pub fn atg__assert_tensor_metadata( a_: *mut C_tensor, size_data: *const i64, @@ -290,6 +295,21 @@ extern "C" { A_: *mut C_tensor, upper_: c_int, ); + pub fn atg__chunk_cat( + out__: *mut *mut C_tensor, + tensors_data: *const *mut C_tensor, + tensors_len: c_int, + dim_: i64, + num_chunks_: i64, + ); + pub fn atg__chunk_cat_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + tensors_data: *const *mut C_tensor, + tensors_len: c_int, + dim_: i64, + num_chunks_: i64, + ); pub fn atg__coalesce(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg__coalesce_out(out__: *mut *mut C_tensor, out_: *mut C_tensor, self_: *mut C_tensor); pub fn atg__coalesced(out__: *mut *mut C_tensor, self_: *mut C_tensor, coalesced_: c_int); @@ -488,7 +508,16 @@ extern "C" { alpha_: *mut C_tensor, out_dtype_: c_int, transpose_result_: c_int, + alg_id_: i64, ); + pub fn atg__cslt_sparse_mm_search( + compressed_A_: *mut C_tensor, + dense_B_: *mut C_tensor, + bias_: *mut C_tensor, + alpha_: *mut C_tensor, + out_dtype_: c_int, + transpose_result_: c_int, + ) -> i64; pub fn atg__ctc_loss( out__: *mut *mut C_tensor, log_probs_: *mut C_tensor, @@ -1107,6 +1136,13 @@ extern "C" { assert_msg_len: c_int, dep_token_: *mut C_tensor, ); + pub fn atg__functional_assert_scalar( + out__: *mut *mut C_tensor, + self_scalar_: *mut C_scalar, + assert_msg_ptr: *const u8, + assert_msg_len: c_int, + dep_token_: *mut C_tensor, + ); pub fn atg__functional_sym_constrain_range( out__: *mut *mut C_tensor, size_: *mut C_scalar, @@ -1342,6 +1378,7 @@ extern "C" { pub fn atg__is_all_true(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg__is_any_true(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg__is_zerotensor(self_: *mut C_tensor) -> c_int; + pub fn atg__lazy_clone(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg__linalg_check_errors( info_: *mut C_tensor, api_name_ptr: *const u8, @@ -1372,6 +1409,7 @@ extern "C" { UPLO_len: c_int, compute_v_: c_int, ); + pub fn atg__linalg_eigvals(out__: *mut *mut C_tensor, self_: *mut C_tensor); pub fn atg__linalg_slogdet(out__: *mut *mut C_tensor, A_: *mut C_tensor); pub fn atg__linalg_slogdet_sign( out__: *mut *mut C_tensor, @@ -1838,6 +1876,17 @@ extern "C" { cpu_nested_shape_example_: *mut C_tensor, fuse_transform_0213_: c_int, ); + pub fn atg__nested_get_jagged_dummy(out__: *mut *mut C_tensor, any_: *mut C_tensor); + pub fn atg__nested_get_lengths(out__: *mut *mut C_tensor, self_: *mut C_tensor); + pub fn atg__nested_get_offsets(out__: *mut *mut C_tensor, self_: *mut C_tensor); + pub fn atg__nested_get_ragged_idx(self_: *mut C_tensor) -> i64; + pub fn atg__nested_get_values(out__: *mut *mut C_tensor, self_: *mut C_tensor); + pub fn atg__nested_get_values_copy(out__: *mut *mut C_tensor, self_: *mut C_tensor); + pub fn atg__nested_get_values_copy_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + self_: *mut C_tensor, + ); pub fn atg__nested_select_backward( out__: *mut *mut C_tensor, grad_output_: *mut C_tensor, @@ -1875,6 +1924,31 @@ extern "C" { nested_strides_: *mut C_tensor, offsets_: *mut C_tensor, ); + pub fn atg__nested_view_from_jagged( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + offsets_: *mut C_tensor, + dummy_: *mut C_tensor, + lengths_: *mut C_tensor, + ragged_idx_: i64, + ); + pub fn atg__nested_view_from_jagged_copy( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + offsets_: *mut C_tensor, + dummy_: *mut C_tensor, + lengths_: *mut C_tensor, + ragged_idx_: i64, + ); + pub fn atg__nested_view_from_jagged_copy_out( + out__: *mut *mut C_tensor, + out_: *mut C_tensor, + self_: *mut C_tensor, + offsets_: *mut C_tensor, + dummy_: *mut C_tensor, + lengths_: *mut C_tensor, + ragged_idx_: i64, + ); pub fn atg__new_zeros_with_same_feature_meta( out__: *mut *mut C_tensor, self_: *mut C_tensor, @@ -1989,6 +2063,7 @@ extern "C" { self_: *mut C_tensor, weight_: *mut C_tensor, ); + pub fn atg__print(s_ptr: *const u8, s_len: c_int); pub fn atg__propagate_xla_data(input_: *mut C_tensor, output_: *mut C_tensor); pub fn atg__remove_batch_dim( out__: *mut *mut C_tensor, @@ -2080,6 +2155,17 @@ extern "C" { scale_v: f64, scale_null: i8, ); + pub fn atg__scaled_dot_product_cudnn_attention( + out__: *mut *mut C_tensor, + query_: *mut C_tensor, + key_: *mut C_tensor, + value_: *mut C_tensor, + dropout_p_: f64, + is_causal_: c_int, + return_debug_mask_: c_int, + scale_v: f64, + scale_null: i8, + ); pub fn atg__scaled_dot_product_efficient_attention( out__: *mut *mut C_tensor, query_: *mut C_tensor, @@ -2111,6 +2197,31 @@ extern "C" { scale_v: f64, scale_null: i8, ); + pub fn atg__scaled_dot_product_flash_attention_for_cpu( + out__: *mut *mut C_tensor, + query_: *mut C_tensor, + key_: *mut C_tensor, + value_: *mut C_tensor, + dropout_p_: f64, + is_causal_: c_int, + attn_mask_: *mut C_tensor, + scale_v: f64, + scale_null: i8, + ); + pub fn atg__scaled_dot_product_flash_attention_for_cpu_backward( + out__: *mut *mut C_tensor, + grad_out_: *mut C_tensor, + query_: *mut C_tensor, + key_: *mut C_tensor, + value_: *mut C_tensor, + out_: *mut C_tensor, + logsumexp_: *mut C_tensor, + dropout_p_: f64, + is_causal_: c_int, + attn_mask_: *mut C_tensor, + scale_v: f64, + scale_null: i8, + ); pub fn atg__scaled_mm( out__: *mut *mut C_tensor, self_: *mut C_tensor, @@ -2499,6 +2610,7 @@ extern "C" { bias_: *mut C_tensor, activation_ptr: *const u8, activation_len: c_int, + out_dtype_: c_int, ); pub fn atg__sparse_softmax( out__: *mut *mut C_tensor, @@ -2717,6 +2829,12 @@ extern "C" { addends_data: *const i64, addends_len: c_int, ); + pub fn atg__test_parallel_materialize( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + num_parallel_: i64, + skip_first_: c_int, + ); pub fn atg__test_serialization_subcmul( out__: *mut *mut C_tensor, self_: *mut C_tensor, @@ -3409,6 +3527,12 @@ extern "C" { qGroupSize_: i64, qScaleAndZeros_: *mut C_tensor, ); + pub fn atg__weight_int8pack_mm( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + mat2_: *mut C_tensor, + scales_: *mut C_tensor, + ); pub fn atg__weight_norm( out__: *mut *mut C_tensor, v_: *mut C_tensor, @@ -12691,6 +12815,17 @@ extern "C" { end_null: i8, step_: i64, ); + pub fn atg_slice_inverse( + out__: *mut *mut C_tensor, + self_: *mut C_tensor, + src_: *mut C_tensor, + dim_: i64, + start_v: i64, + start_null: i8, + end_v: i64, + end_null: i8, + step_: i64, + ); pub fn atg_slice_scatter( out__: *mut *mut C_tensor, self_: *mut C_tensor,