upgraded to libtorch-1.11 required cuda11.3

This commit is contained in:
sugarme 2022-03-13 12:56:11 +11:00
parent 664928551b
commit c24abefcf3
11 changed files with 182979 additions and 47645 deletions

View File

@ -3,10 +3,10 @@
## Overview
`gotch` creates a thin wrapper to Pytorch C++ APIs (Libtorch) to make use of its already optimized C++ tensor APIs (~ 2169) and dynamic graph computation with CUDA support and provides idiomatic Go APIs for developing and implementing Deep Learning in Go.
`gotch` creates a thin wrapper to Pytorch C++ APIs (Libtorch) to make use of its already optimized C++ tensor APIs (~ 2209) and dynamic graph computation with CUDA support and provides idiomatic Go APIs for developing and implementing Deep Learning in Go.
**Some features are**
- [x] Comprehensive Pytorch tensor APIs (~ 2169)
- [x] Comprehensive Pytorch tensor APIs (~ 1893)
- [x] Fully featured Pytorch dynamic graph computation
- [x] JIT interface to run model trained/saved using PyTorch Python API
- [x] Load pretrained Pytorch models and run inference

View File

@ -41,7 +41,11 @@ let excluded_functions =
; "linalg_vector_norm"
; "linalg_vector_norm_out"
; "linalg_matrix_norm"
; "linalg_matrix_norm_out"]
; "linalg_matrix_norm_out"
; "_histogramdd_bin_edges"
; "_histogramdd_bin_edges"
; "_histogramdd_from_bin_cts"
; "_linalg_check_errors"]
let no_tensor_options =
Set.of_list
@ -1344,7 +1348,7 @@ let run ~yaml_filename ~cpp_filename ~ffi_filename ~must_wrapper_filename
write_wrapper funcs wrapper_filename
let () =
run ~yaml_filename:"gen/pytorch/Declarations-v1.10.0.yaml"
run ~yaml_filename:"gen/pytorch/Declarations-v1.11.0.yaml"
~cpp_filename:"libtch/torch_api_generated"
~ffi_filename:"libtch/c-generated.go"
~must_wrapper_filename:"ts/must-tensor-generated.go"

1351
gen/gen.ml.1.10 Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,8 @@ void atg__add_relu_scalar_(tensor *, tensor self, scalar other);
void atg__aminmax(tensor *, tensor self);
void atg__aminmax_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg__amp_update_scale_(tensor *, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval);
void atg__baddbmm_mkl_(tensor *, tensor self, tensor batch1, tensor batch2);
void atg__autocast_to_full_precision(tensor *, tensor self, int cuda_enabled, int cpu_enabled);
void atg__autocast_to_reduced_precision(tensor *, tensor self, int cuda_enabled, int cpu_enabled, int cuda_dtype, int cpu_dtype);
void atg__cast_byte(tensor *, tensor self, int non_blocking);
void atg__cast_char(tensor *, tensor self, int non_blocking);
void atg__cast_double(tensor *, tensor self, int non_blocking);
@ -53,14 +54,14 @@ void atg__compute_linear_combination_out(tensor *, tensor out, tensor input, ten
void atg__conj(tensor *, tensor self);
void atg__conj_physical(tensor *, tensor self);
void atg__conv_depthwise2d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg__conv_depthwise2d_backward(tensor *, tensor grad_input, tensor grad_weight, tensor grad_output, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg__conv_depthwise2d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg__convert_indices_from_coo_to_csr(tensor *, tensor self, int64_t size, int out_int32);
void atg__convert_indices_from_coo_to_csr_out(tensor *, tensor out, tensor self, int64_t size, int out_int32);
void atg__convert_indices_from_csr_to_coo(tensor *, tensor crow_indices, tensor col_indices, int out_int32, int transpose);
void atg__convert_indices_from_csr_to_coo_out(tensor *, tensor out, tensor crow_indices, tensor col_indices, int out_int32, int transpose);
void atg__convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32);
void atg__convolution_deprecated(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled);
void atg__convolution_mode(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__convolution_nogroup(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len);
void atg__copy_from(tensor *, tensor self, tensor dst, int non_blocking);
void atg__copy_from_and_resize(tensor *, tensor self, tensor dst);
void atg__ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity);
@ -78,6 +79,7 @@ void atg__dim_arange(tensor *, tensor like, int64_t dim);
int64_t atg__dimi(tensor self);
int64_t atg__dimv(tensor self);
void atg__dirichlet_grad(tensor *, tensor x, tensor alpha, tensor total);
void atg__efficientzerotensor(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__embedding_bag(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_backward(tensor *, tensor grad, tensor indices, tensor offsets, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int64_t padding_idx);
void atg__embedding_bag_dense_backward(tensor *, tensor grad, tensor indices, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights, int64_t padding_idx);
@ -105,15 +107,19 @@ void atg__gather_sparse_backward(tensor *, tensor self, int64_t dim, tensor inde
void atg__grid_sampler_2d_cpu_fallback(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__grid_sampler_2d_cpu_fallback_backward(tensor *, tensor grad_output, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
int atg__has_compatible_shallow_copy_type(tensor self, tensor from);
int atg__has_same_storage_numel(tensor self, tensor other);
void atg__histogramdd_from_bin_tensors(tensor *, tensor self, tensor *bins_data, int bins_len, tensor weight, int density);
void atg__index_copy_(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg__index_put_impl_(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate, int unsafe);
void atg__indices(tensor *, tensor self);
void atg__inverse_helper(tensor *, tensor self);
int atg__is_zerotensor(tensor self);
void atg__linalg_inv_out_helper_(tensor *, tensor self, tensor infos_lu, tensor infos_getri);
void atg__linalg_qr_helper(tensor *, tensor self, char* mode_ptr, int mode_len);
void atg__linalg_svd(tensor *, tensor A, int full_matrices, int compute_uv);
void atg__linalg_svd_u(tensor *, tensor U, tensor S, tensor Vh, tensor A, int full_matrices, int compute_uv);
void atg__log_softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__log_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__log_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__log_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__log_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__log_softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__logcumsumexp(tensor *, tensor self, int64_t dim);
void atg__logcumsumexp_out(tensor *, tensor out, tensor self, int64_t dim);
@ -122,14 +128,15 @@ void atg__make_dual(tensor *, tensor primal, tensor tangent, int64_t level);
void atg__make_per_channel_quantized_tensor(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis);
void atg__make_per_tensor_quantized_tensor(tensor *, tensor self, double scale, int64_t zero_point);
void atg__masked_scale(tensor *, tensor self, tensor mask, double scale);
void atg__masked_softmax(tensor *, tensor self, tensor mask);
void atg__mkldnn_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len);
void atg__mkldnn_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg__mkldnn_transpose_(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg__native_multi_head_self_attention(tensor *, tensor query, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask);
void atg__neg_view(tensor *, tensor self);
void atg__new_zeros_with_same_feature_meta(tensor *, tensor self, tensor other, int64_t self_num_batch_dims);
int atg__nnpack_available();
void atg__nnpack_spatial_convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg__nnpack_spatial_convolution_backward_input(tensor *, tensor input, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len);
void atg__nnpack_spatial_convolution_backward_weight(tensor *, tensor input, int64_t *weightsize_data, int weightsize_len, tensor grad_output, int64_t *padding_data, int padding_len);
int64_t atg__nnz(tensor self);
void atg__pack_padded_sequence(tensor *, tensor input, tensor lengths, int batch_first);
void atg__pack_padded_sequence_backward(tensor *, tensor grad, int64_t *input_size_data, int input_size_len, tensor batch_sizes, int batch_first);
@ -145,16 +152,18 @@ void atg__sample_dirichlet(tensor *, tensor self);
void atg__saturate_weight_to_fp16(tensor *, tensor weight);
void atg__segment_reduce_backward(tensor *, tensor grad, tensor output, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, int64_t axis);
void atg__shape_as_tensor(tensor *, tensor self);
void atg__slow_conv2d_backward(tensor *, tensor grad_input, tensor grad_weight, tensor grad_bias, tensor grad_output, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg__sobol_engine_draw(tensor *, tensor quasi, int64_t n, tensor sobolstate, int64_t dimension, int64_t num_generated, int dtype);
void atg__sobol_engine_ff_(tensor *, tensor self, int64_t n, tensor sobolstate, int64_t dimension, int64_t num_generated);
void atg__sobol_engine_initialize_state_(tensor *, tensor self, int64_t dimension);
void atg__sobol_engine_scramble_(tensor *, tensor self, tensor ltm, int64_t dimension);
void atg__softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__softmax_backward_data_out(tensor *, tensor grad_input, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__softmax_backward_data_out(tensor *, tensor grad_input, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__solve_helper(tensor *, tensor self, tensor A);
void atg__sparse_addmm(tensor *, tensor self, tensor sparse, tensor dense);
void atg__sparse_broadcast_to(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__sparse_coo_tensor_unsafe(tensor *, tensor indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_coo_tensor_with_dims(tensor *, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_coo_tensor_with_dims_and_tensors(tensor *, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, tensor indices, tensor values, int options_kind, int options_device);
@ -177,7 +186,6 @@ void atg__stack(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg__stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg__standard_gamma(tensor *, tensor self);
void atg__standard_gamma_grad(tensor *, tensor self, tensor output);
void atg__svd_helper(tensor *, tensor self, int some, int compute_uv);
void atg__symeig_helper(tensor *, tensor self, int eigenvectors, int upper);
void atg__test_ambiguous_defaults(tensor *, tensor dummy, int64_t a, int64_t b);
void atg__test_ambiguous_defaults_b(tensor *, tensor dummy, int64_t a, char* b_ptr, int b_len);
@ -185,13 +193,35 @@ void atg__test_optional_filled_intlist(tensor *, tensor values, int64_t *addends
void atg__test_optional_intlist(tensor *, tensor values, int64_t *addends_data, int addends_len);
void atg__test_serialization_subcmul(tensor *, tensor self, tensor other);
void atg__test_string_default(tensor *, tensor dummy, char* a_ptr, int a_len, char* b_ptr, int b_len);
void atg__test_warn_in_autograd(tensor *, tensor self);
void atg__to_copy(tensor *, tensor self, int options_kind, int options_device, int non_blocking);
tensor *atg__to_cpu(tensor *tensors_data, int tensors_len);
void atg__torch_cuda_cu_linker_symbol_op(tensor *, tensor self);
void atg__trilinear(tensor *, tensor i1, tensor i2, tensor i3, int64_t *expand1_data, int expand1_len, int64_t *expand2_data, int expand2_len, int64_t *expand3_data, int expand3_len, int64_t *sumdim_data, int sumdim_len, int64_t unroll_dim);
void atg__unique(tensor *, tensor self, int sorted, int return_inverse);
void atg__unique2(tensor *, tensor self, int sorted, int return_inverse, int return_counts);
void atg__unpack_dual(tensor *, tensor dual, int64_t level);
void atg__unsafe_view(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__upsample_bicubic2d_aa(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
int atg__use_cudnn_ctc_loss(tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank);
int atg__use_cudnn_rnn_flatten_weight();
void atg__values(tensor *, tensor self);
@ -250,6 +280,7 @@ void atg_addmv_out(tensor *, tensor out, tensor self, tensor mat, tensor vec);
void atg_addr(tensor *, tensor self, tensor vec1, tensor vec2);
void atg_addr_(tensor *, tensor self, tensor vec1, tensor vec2);
void atg_addr_out(tensor *, tensor out, tensor self, tensor vec1, tensor vec2);
void atg_adjoint(tensor *, tensor self);
void atg_affine_grid_generator(tensor *, tensor theta, int64_t *size_data, int size_len, int align_corners);
void atg_affine_grid_generator_backward(tensor *, tensor grad, int64_t *size_data, int size_len, int align_corners);
void atg_alias(tensor *, tensor self);
@ -292,6 +323,9 @@ void atg_arcsinh(tensor *, tensor self);
void atg_arcsinh_(tensor *, tensor self);
void atg_arcsinh_out(tensor *, tensor out, tensor self);
void atg_arctan(tensor *, tensor self);
void atg_arctan2(tensor *, tensor self, tensor other);
void atg_arctan2_(tensor *, tensor self, tensor other);
void atg_arctan2_out(tensor *, tensor out, tensor self, tensor other);
void atg_arctan_(tensor *, tensor self);
void atg_arctan_out(tensor *, tensor out, tensor self);
void atg_arctanh(tensor *, tensor self);
@ -302,6 +336,7 @@ void atg_argmax_out(tensor *, tensor out, tensor self, int64_t dim_v, uint8_t di
void atg_argmin(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argmin_out(tensor *, tensor out, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argsort(tensor *, tensor self, int64_t dim, int descending);
void atg_argwhere(tensor *, tensor self);
void atg_as_strided(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_asin(tensor *, tensor self);
@ -479,7 +514,6 @@ void atg_conv2d_padding(tensor *, tensor input, tensor weight, tensor bias, int6
void atg_conv3d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv3d_padding(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv_depthwise3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_conv_depthwise3d_backward(tensor *, tensor grad_input, tensor grad_weight, tensor grad_bias, tensor grad_output, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_conv_tbc(tensor *, tensor self, tensor weight, tensor bias, int64_t pad);
void atg_conv_tbc_backward(tensor *, tensor self, tensor input, tensor weight, tensor bias, int64_t pad);
void atg_conv_transpose1d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t groups, int64_t *dilation_data, int dilation_len);
@ -518,16 +552,8 @@ void atg_cudnn_batch_norm(tensor *, tensor input, tensor weight, tensor bias, te
void atg_cudnn_batch_norm_backward(tensor *, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon, tensor reserveSpace);
void atg_cudnn_convolution(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_add_relu(tensor *, tensor self, tensor weight, tensor z, scalar alpha, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_backward_input(tensor *, int64_t *self_size_data, int self_size_len, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_deprecated(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_deprecated2(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_relu(tensor *, tensor self, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_transpose(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_transpose_backward_input(tensor *, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_transpose_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_transpose_deprecated(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_transpose_deprecated2(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_grid_sampler(tensor *, tensor self, tensor grid);
void atg_cudnn_grid_sampler_backward(tensor *, tensor self, tensor grid, tensor grad_output);
int atg_cudnn_is_acceptable(tensor self);
@ -562,6 +588,7 @@ void atg_diag_out(tensor *, tensor out, tensor self, int64_t diagonal);
void atg_diagflat(tensor *, tensor self, int64_t offset);
void atg_diagonal(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_scatter(tensor *, tensor self, tensor src, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diff(tensor *, tensor self, int64_t n, int64_t dim, tensor prepend, tensor append);
void atg_diff_out(tensor *, tensor out, tensor self, int64_t n, int64_t dim, tensor prepend, tensor append);
void atg_digamma(tensor *, tensor self);
@ -676,7 +703,11 @@ void atg_fft_fftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *di
void atg_fft_fftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fftshift(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_fft_hfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_hfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_hfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ifft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
@ -685,7 +716,11 @@ void atg_fft_ifftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *d
void atg_fft_ifftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifftshift(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_fft_ihfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ihfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ihfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_irfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
@ -798,7 +833,6 @@ void atg_greater_tensor_(tensor *, tensor self, tensor other);
void atg_greater_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_grid_sampler(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_2d(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_2d_backward(tensor *, tensor grad_output, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_3d(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_3d_backward(tensor *, tensor grad_output, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_group_norm(tensor *, tensor input, int64_t num_groups, tensor weight, tensor bias, double eps, int cudnn_enabled);
@ -871,8 +905,7 @@ void atg_imag(tensor *, tensor self);
void atg_index(tensor *, tensor self, tensor *indices_data, int indices_len);
void atg_index_add(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_add_(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_add_alpha(tensor *, tensor self, int64_t dim, tensor index, tensor source, scalar alpha);
void atg_index_add_alpha_(tensor *, tensor self, int64_t dim, tensor index, tensor source, scalar alpha);
void atg_index_add_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_copy(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_copy_(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_fill(tensor *, tensor self, int64_t dim, tensor index, scalar value);
@ -982,8 +1015,11 @@ void atg_linalg_cond(tensor *, tensor self, scalar p);
void atg_linalg_cond_out(tensor *, tensor out, tensor self, scalar p);
void atg_linalg_cond_p_str(tensor *, tensor self, char* p_ptr, int p_len);
void atg_linalg_cond_p_str_out(tensor *, tensor out, tensor self, char* p_ptr, int p_len);
void atg_linalg_cross(tensor *, tensor self, tensor other, int64_t dim);
void atg_linalg_cross_out(tensor *, tensor out, tensor self, tensor other, int64_t dim);
void atg_linalg_det(tensor *, tensor self);
void atg_linalg_det_out(tensor *, tensor out, tensor self);
void atg_linalg_diagonal(tensor *, tensor A, int64_t offset, int64_t dim1, int64_t dim2);
void atg_linalg_eig(tensor *, tensor self);
void atg_linalg_eig_out(tensor *, tensor eigenvalues, tensor eigenvectors, tensor self);
void atg_linalg_eigh(tensor *, tensor self, char* UPLO_ptr, int UPLO_len);
@ -1000,12 +1036,21 @@ void atg_linalg_inv_ex_inverse(tensor *, tensor inverse, tensor info, tensor sel
void atg_linalg_inv_out(tensor *, tensor out, tensor self);
void atg_linalg_lstsq(tensor *, tensor self, tensor b, double rcond_v, uint8_t rcond_null, char* driver_ptr, int driver_len);
void atg_linalg_lstsq_out(tensor *, tensor solution, tensor residuals, tensor rank, tensor singular_values, tensor self, tensor b, double rcond_v, uint8_t rcond_null, char* driver_ptr, int driver_len);
void atg_linalg_lu_factor(tensor *, tensor A, int pivot);
void atg_linalg_lu_factor_ex(tensor *, tensor A, int pivot, int check_errors);
void atg_linalg_lu_factor_ex_out(tensor *, tensor LU, tensor pivots, tensor info, tensor A, int pivot, int check_errors);
void atg_linalg_lu_factor_out(tensor *, tensor LU, tensor pivots, tensor A, int pivot);
void atg_linalg_matmul(tensor *, tensor self, tensor other);
void atg_linalg_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg_linalg_matrix_exp(tensor *, tensor self);
void atg_linalg_matrix_power(tensor *, tensor self, int64_t n);
void atg_linalg_matrix_power_out(tensor *, tensor out, tensor self, int64_t n);
void atg_linalg_matrix_rank(tensor *, tensor self, double tol_v, uint8_t tol_null, int hermitian);
void atg_linalg_matrix_rank_out(tensor *, tensor out, tensor self, double tol_v, uint8_t tol_null, int hermitian);
void atg_linalg_matrix_rank(tensor *, tensor self, double tol, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_float(tensor *, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_float_out(tensor *, tensor out, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_tensor(tensor *, tensor input, tensor atol, tensor rtol, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_tensor_out(tensor *, tensor out, tensor input, tensor atol, tensor rtol, int hermitian);
void atg_linalg_matrix_rank_out(tensor *, tensor out, tensor self, double tol, int hermitian);
void atg_linalg_matrix_rank_out_tol_tensor(tensor *, tensor out, tensor input, tensor tol, int hermitian);
void atg_linalg_matrix_rank_tol_tensor(tensor *, tensor input, tensor tol, int hermitian);
void atg_linalg_multi_dot(tensor *, tensor *tensors_data, int tensors_len);
@ -1015,6 +1060,10 @@ void atg_linalg_norm_ord_str(tensor *, tensor self, char* ord_ptr, int ord_len,
void atg_linalg_norm_ord_str_out(tensor *, tensor out, tensor self, char* ord_ptr, int ord_len, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_out(tensor *, tensor out, tensor self, scalar ord, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_pinv(tensor *, tensor self, double rcond, int hermitian);
void atg_linalg_pinv_atol_rtol_float(tensor *, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_pinv_atol_rtol_float_out(tensor *, tensor out, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_pinv_atol_rtol_tensor(tensor *, tensor self, tensor atol, tensor rtol, int hermitian);
void atg_linalg_pinv_atol_rtol_tensor_out(tensor *, tensor out, tensor self, tensor atol, tensor rtol, int hermitian);
void atg_linalg_pinv_out(tensor *, tensor out, tensor self, double rcond, int hermitian);
void atg_linalg_pinv_out_rcond_tensor(tensor *, tensor out, tensor self, tensor rcond, int hermitian);
void atg_linalg_pinv_rcond_tensor(tensor *, tensor self, tensor rcond, int hermitian);
@ -1024,18 +1073,20 @@ void atg_linalg_slogdet(tensor *, tensor self);
void atg_linalg_slogdet_out(tensor *, tensor sign, tensor logabsdet, tensor self);
void atg_linalg_solve(tensor *, tensor input, tensor other);
void atg_linalg_solve_out(tensor *, tensor out, tensor input, tensor other);
void atg_linalg_svd(tensor *, tensor self, int full_matrices);
void atg_linalg_svd_u(tensor *, tensor U, tensor S, tensor Vh, tensor self, int full_matrices);
void atg_linalg_svdvals(tensor *, tensor input);
void atg_linalg_svdvals_out(tensor *, tensor out, tensor input);
void atg_linalg_solve_triangular(tensor *, tensor self, tensor B, int upper, int left, int unitriangular);
void atg_linalg_solve_triangular_out(tensor *, tensor out, tensor self, tensor B, int upper, int left, int unitriangular);
void atg_linalg_svd(tensor *, tensor A, int full_matrices);
void atg_linalg_svd_u(tensor *, tensor U, tensor S, tensor Vh, tensor A, int full_matrices);
void atg_linalg_svdvals(tensor *, tensor A);
void atg_linalg_svdvals_out(tensor *, tensor out, tensor A);
void atg_linalg_tensorinv(tensor *, tensor self, int64_t ind);
void atg_linalg_tensorinv_out(tensor *, tensor out, tensor self, int64_t ind);
void atg_linalg_tensorsolve(tensor *, tensor self, tensor other, int64_t *dims_data, int dims_len);
void atg_linalg_tensorsolve_out(tensor *, tensor out, tensor self, tensor other, int64_t *dims_data, int dims_len);
void atg_linear(tensor *, tensor input, tensor weight, tensor bias);
void atg_linear_out(tensor *, tensor out, tensor input, tensor weight, tensor bias);
void atg_linspace(tensor *, scalar start, scalar end, int64_t steps_v, uint8_t steps_null, int options_kind, int options_device);
void atg_linspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps_v, uint8_t steps_null);
void atg_linspace(tensor *, scalar start, scalar end, int64_t steps, int options_kind, int options_device);
void atg_linspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps);
void atg_log(tensor *, tensor self);
void atg_log10(tensor *, tensor self);
void atg_log10_(tensor *, tensor self);
@ -1078,8 +1129,8 @@ void atg_logit_(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_backward(tensor *, tensor grad_output, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_out(tensor *, tensor out, tensor self, double eps_v, uint8_t eps_null);
void atg_logspace(tensor *, scalar start, scalar end, int64_t steps_v, uint8_t steps_null, double base, int options_kind, int options_device);
void atg_logspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps_v, uint8_t steps_null, double base);
void atg_logspace(tensor *, scalar start, scalar end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps, double base);
void atg_logsumexp(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_logsumexp_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_lstm(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
@ -1111,6 +1162,7 @@ void atg_matmul(tensor *, tensor self, tensor other);
void atg_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg_matrix_exp(tensor *, tensor self);
void atg_matrix_exp_backward(tensor *, tensor self, tensor grad);
void atg_matrix_h(tensor *, tensor self);
void atg_matrix_power(tensor *, tensor self, int64_t n);
void atg_matrix_power_out(tensor *, tensor out, tensor self, int64_t n);
void atg_matrix_rank(tensor *, tensor self, int symmetric);
@ -1150,6 +1202,7 @@ void atg_median_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_median_dim_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
tensor *atg_meshgrid(tensor *tensors_data, int tensors_len);
tensor *atg_meshgrid_indexing(tensor *tensors_data, int tensors_len, char* indexing_ptr, int indexing_len);
void atg_mh(tensor *, tensor self);
void atg_min(tensor *, tensor self);
void atg_min_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_min_dim_min(tensor *, tensor min, tensor min_indices, tensor self, int64_t dim, int keepdim);
@ -1160,15 +1213,8 @@ void atg_minimum_out(tensor *, tensor out, tensor self, tensor other);
void atg_miopen_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_miopen_batch_norm_backward(tensor *, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon);
void atg_miopen_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_backward_bias(tensor *, tensor grad_output);
void atg_miopen_convolution_backward_input(tensor *, int64_t *self_size_data, int self_size_len, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_transpose(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_transpose_backward_input(tensor *, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_transpose_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_depthwise_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_depthwise_convolution_backward_input(tensor *, int64_t *self_size_data, int self_size_len, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_depthwise_convolution_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_rnn(tensor *, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
void atg_mish(tensor *, tensor self);
void atg_mish_(tensor *, tensor self);
@ -1177,8 +1223,6 @@ void atg_mish_out(tensor *, tensor out, tensor self);
void atg_mkldnn_adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_mkldnn_adaptive_avg_pool2d_backward(tensor *, tensor grad_output, tensor self);
void atg_mkldnn_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_mkldnn_convolution_backward_input(tensor *, int64_t *self_size_data, int self_size_len, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int bias_defined);
void atg_mkldnn_convolution_backward_weights(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int bias_defined);
void atg_mkldnn_linear(tensor *, tensor self, tensor weight, tensor bias);
void atg_mkldnn_linear_backward_input(tensor *, int64_t *input_size_data, int input_size_len, tensor grad_output, tensor weight);
void atg_mkldnn_linear_backward_weights(tensor *, tensor grad_output, tensor input, tensor weight, int bias_defined);
@ -1202,6 +1246,7 @@ void atg_mse_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_o
void atg_mse_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_msort(tensor *, tensor self);
void atg_msort_out(tensor *, tensor out, tensor self);
void atg_mt(tensor *, tensor self);
void atg_mul(tensor *, tensor self, tensor other);
void atg_mul_(tensor *, tensor self, tensor other);
void atg_mul_out(tensor *, tensor out, tensor self, tensor other);
@ -1233,14 +1278,10 @@ void atg_nanmean_out(tensor *, tensor out, tensor self, int64_t *dim_data, int d
void atg_nanmedian(tensor *, tensor self);
void atg_nanmedian_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_nanmedian_dim_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
void atg_nanquantile(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_nanquantile_new(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_new_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_new_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_new_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_nanquantile_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_nanquantile_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_nanquantile(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nansum(tensor *, tensor self, int dtype);
void atg_nansum_dim_intlist(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_nansum_intlist_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
@ -1250,6 +1291,9 @@ void atg_narrow_copy_out(tensor *, tensor out, tensor self, int64_t dim, int64_t
void atg_narrow_tensor(tensor *, tensor self, int64_t dim, tensor start, int64_t length);
void atg_native_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg_native_batch_norm_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg_native_channel_shuffle(tensor *, tensor self, int64_t groups);
void atg_native_dropout(tensor *, tensor input, double p, int train);
void atg_native_dropout_backward(tensor *, tensor grad_output, tensor mask, double scale);
void atg_native_group_norm(tensor *, tensor input, tensor weight, tensor bias, int64_t n, int64_t C, int64_t HxW, int64_t group, double eps);
void atg_native_layer_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, tensor bias, double eps);
void atg_native_norm(tensor *, tensor self);
@ -1358,16 +1402,13 @@ double atg_q_scale(tensor self);
int64_t atg_q_zero_point(tensor self);
void atg_qr(tensor *, tensor self, int some);
void atg_qr_q(tensor *, tensor Q, tensor R, tensor self, int some);
void atg_quantile(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_quantile_new(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_new_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_new_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_new_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_quantile_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_quantile_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_quantile(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantize_per_channel(tensor *, tensor self, tensor scales, tensor zero_points, int64_t axis, int dtype);
void atg_quantize_per_tensor(tensor *, tensor self, double scale, int64_t zero_point, int dtype);
void atg_quantize_per_tensor_dynamic(tensor *, tensor self, int dtype, int reduce_range);
void atg_quantize_per_tensor_tensor_qparams(tensor *, tensor self, tensor scale, tensor zero_point, int dtype);
tensor *atg_quantize_per_tensor_tensors(tensor *tensors_data, int tensors_len, tensor scales, tensor zero_points, int dtype);
void atg_quantized_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor mean, tensor var, double eps, double output_scale, int64_t output_zero_point);
@ -1466,6 +1507,9 @@ void atg_roll(tensor *, tensor self, int64_t *shifts_data, int shifts_len, int64
void atg_rot90(tensor *, tensor self, int64_t k, int64_t *dims_data, int dims_len);
void atg_round(tensor *, tensor self);
void atg_round_(tensor *, tensor self);
void atg_round_decimals(tensor *, tensor self, int64_t decimals);
void atg_round_decimals_(tensor *, tensor self, int64_t decimals);
void atg_round_decimals_out(tensor *, tensor out, tensor self, int64_t decimals);
void atg_round_out(tensor *, tensor out, tensor self);
void atg_row_stack(tensor *, tensor *tensors_data, int tensors_len);
void atg_row_stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
@ -1496,12 +1540,13 @@ void atg_scatter_value_out(tensor *, tensor out, tensor self, int64_t dim, tenso
void atg_scatter_value_reduce(tensor *, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_scatter_value_reduce_(tensor *, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_scatter_value_reduce_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_searchsorted(tensor *, tensor sorted_sequence, tensor self, int out_int32, int right);
void atg_searchsorted_scalar(tensor *, tensor sorted_sequence, scalar self_scalar, int out_int32, int right);
void atg_searchsorted_tensor_out(tensor *, tensor out, tensor sorted_sequence, tensor self, int out_int32, int right);
void atg_searchsorted(tensor *, tensor sorted_sequence, tensor self, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_searchsorted_scalar(tensor *, tensor sorted_sequence, scalar self_scalar, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_searchsorted_tensor_out(tensor *, tensor out, tensor sorted_sequence, tensor self, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_segment_reduce(tensor *, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor indices, int64_t axis, int unsafe, scalar initial);
void atg_select(tensor *, tensor self, int64_t dim, int64_t index);
void atg_select_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t index);
void atg_select_scatter(tensor *, tensor self, tensor src, int64_t dim, int64_t index);
void atg_selu(tensor *, tensor self);
void atg_selu_(tensor *, tensor self);
void atg_set_(tensor *, tensor self);
@ -1536,6 +1581,7 @@ void atg_sinh_(tensor *, tensor self);
void atg_sinh_out(tensor *, tensor out, tensor self);
void atg_slice(tensor *, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t start, int64_t end, int64_t step);
void atg_slice_scatter(tensor *, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slogdet(tensor *, tensor self);
void atg_slow_conv3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_slow_conv3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
@ -1556,8 +1602,8 @@ void atg_soft_margin_loss_backward_grad_input(tensor *, tensor grad_input, tenso
void atg_soft_margin_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_softplus(tensor *, tensor self);
void atg_softplus_backward(tensor *, tensor grad_output, tensor self, scalar beta, scalar threshold, tensor output);
void atg_softplus_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar beta, scalar threshold, tensor output);
void atg_softplus_backward(tensor *, tensor grad_output, tensor self, scalar beta, scalar threshold);
void atg_softplus_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar beta, scalar threshold);
void atg_softplus_out(tensor *, tensor out, tensor self);
void atg_softshrink(tensor *, tensor self);
void atg_softshrink_backward(tensor *, tensor grad_output, tensor self, scalar lambd);
@ -1578,6 +1624,8 @@ int64_t atg_sparse_dim(tensor self);
void atg_sparse_mask(tensor *, tensor self, tensor mask);
void atg_sparse_resize_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_and_clear_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_sampled_addmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_sparse_sampled_addmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg_special_digamma(tensor *, tensor self);
void atg_special_digamma_out(tensor *, tensor out, tensor self);
void atg_special_entr(tensor *, tensor self);
@ -1627,10 +1675,11 @@ void atg_special_polygamma(tensor *, int64_t n, tensor self);
void atg_special_polygamma_out(tensor *, tensor out, int64_t n, tensor self);
void atg_special_psi(tensor *, tensor self);
void atg_special_psi_out(tensor *, tensor out, tensor self);
void atg_special_round(tensor *, tensor self);
void atg_special_round_out(tensor *, tensor out, tensor self);
void atg_special_round(tensor *, tensor self, int64_t decimals);
void atg_special_round_out(tensor *, tensor out, tensor self, int64_t decimals);
void atg_special_sinc(tensor *, tensor self);
void atg_special_sinc_out(tensor *, tensor out, tensor self);
void atg_special_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_special_xlog1py(tensor *, tensor self, tensor other);
void atg_special_xlog1py_other_scalar(tensor *, tensor self, scalar other);
void atg_special_xlog1py_other_scalar_out(tensor *, tensor out, tensor self, scalar other);

View File

@ -1,7 +1,7 @@
#!/bin/bash
GOTCH_VERSION="${GOTCH_VER:-v0.6.2}"
CUDA_VERSION="${CUDA_VER:-11.1}"
CUDA_VERSION="${CUDA_VER:-11.3}"
if [ -z $GOPATH ] then
$GOPATH="$HOME/go"

View File

@ -1,7 +1,7 @@
#!/bin/bash
LIBTORCH_VERSION="${LIBTORCH_VER:-1.10.0}"
CUDA_VERSION="${CUDA_VER:-11.1}"
LIBTORCH_VERSION="${LIBTORCH_VER:-1.11.0}"
CUDA_VERSION="${CUDA_VER:-11.3}"
if [ "${CUDA_VERSION}"=="cpu" ]; then
CU_VERSION="cpu"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff