Merge pull request #13 from sugarme/pytorch-1.7

Update to Pytorch 1.7
This commit is contained in:
Sugarme 2020-11-03 00:29:26 +11:00 committed by GitHub
commit bea0e28542
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 136212 additions and 15918 deletions

View File

@ -41,3 +41,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Added drawing image label at `example/yolo` example
- Added some example images and README files for `example/yolo` and `example/neural-style-transfer`
## [0.3.0]
### Changed
- Updated to Pytorch C++ APIs v1.7.0
- Switched back to `lib.AtoAddParametersOld` as the `ato_add_parameters` has not been implemented correctly. Using the updated API will cause optimizer stops working.

View File

@ -5,32 +5,32 @@
- **GoTch** is a C++ Libtorch Go binding for developing and implementing deep learning projects in Go.
- This package is to create a thin wrapper of Libtorch to make use of its tensor APIs and CUDA support while implementing as much idiomatic Go as possible.
- There are about **1129** auto-generated tensor APIs.
- There are about **1404** auto-generated tensor APIs.
## Dependencies
- **Libtorch** C++ v1.5.0 library of [Pytorch](https://pytorch.org/)
- **Libtorch** C++ v1.7.0 library of [Pytorch](https://pytorch.org/)
## Installation
- **CPU**
Default values: `LIBTORCH_VER=1.5.1` and `GOTCH_VER=v0.1.7`
Default values: `LIBTORCH_VER=1.7.0` and `GOTCH_VER=v0.3.0`
```bash
go get -u github.com/sugarme/gotch@v0.1.7
bash ${GOPATH}/pkg/mod/github.com/sugarme/gotch@v0.1.7/setup-cpu.sh
go get -u github.com/sugarme/gotch@v0.3.0
bash ${GOPATH}/pkg/mod/github.com/sugarme/gotch@v0.3.0/setup-cpu.sh
```
- **GPU**
Default values: `LIBTORCH_VER=1.5.1`, `CUDA_VER=10.1` and `GOTCH_VER=v0.1.7`
Default values: `LIBTORCH_VER=1.7.0`, `CUDA_VER=10.1` and `GOTCH_VER=v0.3.0`
```bash
go get -u github.com/sugarme/gotch@v0.1.7
bash ${GOPATH}/pkg/mod/github.com/sugarme/gotch@v0.1.7/setup-gpu.sh
go get -u github.com/sugarme/gotch@v0.3.0
bash ${GOPATH}/pkg/mod/github.com/sugarme/gotch@v0.3.0/setup-gpu.sh
```

View File

@ -116,9 +116,9 @@ func main() {
sumLoss += loss.Float64Values()[0]
cntLoss += 1.0
batchTs.MustDrop()
batchNarrow.MustDrop()
xsOnehotTmp.MustDrop()
// batchTs.MustDrop()
// batchNarrow.MustDrop()
// xsOnehotTmp.MustDrop()
xsOnehot.MustDrop()
ys.MustDrop()
lstmOut.MustDrop()

View File

@ -117,21 +117,21 @@ func runCNN1() {
logits := net.ForwardT(bImages, true)
loss := logits.CrossEntropyForLogits(bLabels)
// loss = loss.MustSetRequiresGrad(true)
// loss = loss.MustSetRequiresGrad(true, false)
opt.BackwardStep(loss)
epocLoss = loss.MustShallowClone()
epocLoss.Detach_()
// fmt.Printf("completed \t %v batches\t %.2f\n", i, loss.Values()[0])
// fmt.Printf("completed \t %v batches\t %.2f\n", i, loss.Float64Values()[0])
bImages.MustDrop()
bLabels.MustDrop()
}
vs.Freeze()
// vs.Freeze()
testAccuracy := nn.BatchAccuracyForLogits(vs, net, testImages, testLabels, vs.Device(), 1024)
vs.Unfreeze()
// vs.Unfreeze()
fmt.Printf("Epoch: %v\t Loss: %.2f \t Test accuracy: %.2f%%\n", epoch, epocLoss.Float64Values()[0], testAccuracy*100.0)
if testAccuracy > bestAccuracy {
bestAccuracy = testAccuracy

View File

@ -45,7 +45,7 @@ func runLinear() {
})
testLogits := ds.TestImages.MustMm(ws, false).MustAdd(bs, true)
testAccuracy := testLogits.MustArgmax(-1, false, true).MustEq1(ds.TestLabels, true).MustTotype(gotch.Float, true).MustMean(gotch.Float, true).MustView([]int64{-1}, true).MustFloat64Value([]int64{0})
testAccuracy := testLogits.MustArgmax([]int64{-1}, false, true).MustEq1(ds.TestLabels, true).MustTotype(gotch.Float, true).MustMean(gotch.Float, true).MustView([]int64{-1}, true).MustFloat64Value([]int64{0})
fmt.Printf("Epoch: %v - Loss: %.3f - Test accuracy: %.2f%%\n", epoch, loss.Float64Values()[0], testAccuracy*100)

Binary file not shown.

Binary file not shown.

View File

@ -271,7 +271,7 @@ func upsample(prevChannels int64) (retVal1 int64, retVal2 interface{}) {
h := res[2]
w := res[3]
return xs.MustUpsampleNearest2d([]int64{h * 2, w * 2}, 2.0, 2.0, false)
return xs.MustUpsampleNearest2d([]int64{h * 2, w * 2}, []float64{2.0}, []float64{2.0}, false)
})
return prevChannels, Layer{Val: layer}

View File

@ -28,7 +28,8 @@ let excluded_functions =
; "_amp_non_finite_check_and_unscale_"
; "_cummin_helper"
; "_cummax_helper"
; "retain_grad" ]
; "retain_grad"
; "_validate_sparse_coo_tensor_args" ]
let no_tensor_options =
Set.of_list
@ -47,7 +48,7 @@ let no_tensor_options =
* (module String)
* ["add"; "add_"; "div"; "div_"; "mul"; "mul_"; "sub"; "sub_"; "nll_loss"]
* *)
let excluded_prefixes = ["_thnn_"; "_th_"; "thnn_"; "th_"]
let excluded_prefixes = ["_thnn_"; "_th_"; "thnn_"; "th_"; "_foreach"]
let excluded_suffixes = ["_forward"; "_forward_out"]
@ -79,7 +80,9 @@ module Func = struct
type arg_type =
| Bool
| Int64
| Int64Option
| Double
| DoubleOption
| Tensor
| TensorOption
| IntList
@ -104,8 +107,8 @@ module Func = struct
let arg_type_of_string str ~is_nullable =
match String.lowercase str with
| "bool" -> Some Bool
| "int64_t" -> Some Int64
| "double" -> Some Double
| "int64_t" -> Some (if is_nullable then Int64Option else Int64)
| "double" -> Some (if is_nullable then DoubleOption else Double)
| "booltensor" | "indextensor" | "tensor" ->
Some (if is_nullable then TensorOption else Tensor)
| "tensoroptions" -> Some TensorOptions
@ -127,6 +130,10 @@ module Func = struct
| TensorOptions ->
Printf.sprintf "int %s_kind, int %s_device" arg_name arg_name
| String -> Printf.sprintf "char* %s_ptr, int %s_len" arg_name arg_name
| Int64Option ->
Printf.sprintf "int64_t %s_v, uint8_t %s_null" arg_name arg_name
| DoubleOption ->
Printf.sprintf "double %s_v, uint8_t %s_null" arg_name arg_name
| otherwise ->
let simple_type_cstring =
match otherwise with
@ -138,7 +145,9 @@ module Func = struct
| ScalarType -> "int"
| Device -> "int"
| Scalar -> "scalar"
| String | IntList | TensorList | TensorOptions -> assert false
| Int64Option | DoubleOption | String | IntList | TensorList
|TensorOptions ->
assert false
in
Printf.sprintf "%s %s" simple_type_cstring arg_name )
|> String.concat ~sep:", "
@ -162,6 +171,14 @@ module Func = struct
Printf.sprintf
"at::device(device_of_int(%s_device)).dtype(at::ScalarType(%s_kind))"
arg_name arg_name
| Int64Option ->
Printf.sprintf
"%s_null ? c10::nullopt : c10::optional<int64_t>(%s_v)" arg_name
arg_name
| DoubleOption ->
Printf.sprintf
"%s_null ? c10::nullopt : c10::optional<double>(%s_v)" arg_name
arg_name
| ScalarType -> Printf.sprintf "at::ScalarType(%s)" arg_name
| Device -> Printf.sprintf "device_of_int(%s)" arg_name
| _ -> arg_name )
@ -229,6 +246,8 @@ module Func = struct
| String -> single_param "string"
| IntList -> Printf.sprintf "%sData []int64, %sLen int" an an
| TensorList -> Printf.sprintf "%sData []Ctensor, %sLen int" an an
| Int64Option -> Printf.sprintf "%sVal int64, %sNull int" an an
| DoubleOption -> Printf.sprintf "%sVal float64, %sNull int" an an
| TensorOptions -> Printf.sprintf "%sKind int32, %sDevice int32" an an
)
|> String.concat ~sep:", "
@ -250,6 +269,8 @@ module Func = struct
| String -> Printf.sprintf "c%s, c%sLen" an an
| IntList -> Printf.sprintf "c%sDataPtr, c%sLen" an an
| TensorList -> Printf.sprintf "c%sDataPtr, c%sLen" an an
| Int64Option -> Printf.sprintf "c%sVal, c%sNull" an an
| DoubleOption -> Printf.sprintf "c%sVal, c%sNull" an an
| TensorOptions -> Printf.sprintf "c%sKind, c%sDevice" an an )
|> String.concat ~sep:", "
@ -291,6 +312,18 @@ module Func = struct
c%sDataPtr := (*Ctensor)(unsafe.Pointer(&%sData[0]))\n\
c%sLen := *(*C.int)(unsafe.Pointer(&%sLen))"
an an an an
| Int64Option ->
Printf.sprintf
"\n\
c%sVal := *(*C.int64_t)(unsafe.Pointer(&%sVal))\n\
c%sNull := *(*C.uint8_t)(unsafe.Pointer(&%sNull))"
an an an an
| DoubleOption ->
Printf.sprintf
"\n\
c%sVal := *(*C.double)(unsafe.Pointer(&%sVal))\n\
c%sNull := *(*C.uint8_t)(unsafe.Pointer(&%sNull))"
an an an an
| TensorOptions ->
Printf.sprintf
"\n\
@ -356,6 +389,8 @@ module Func = struct
| TensorOptions -> "gotch.KindDevice"
| Scalar -> "*Scalar"
| ScalarType -> "gotch.DType"
| Int64Option -> "[]int64"
| DoubleOption -> "[]float64"
| Device -> "gotch.Device"
in
match arg.arg_type with
@ -436,6 +471,8 @@ module Func = struct
| String -> Printf.sprintf "%s" name
| IntList -> Printf.sprintf "%s, len(%s)" name name
| TensorList -> Printf.sprintf "c%s, len(c%s)" name name
| Int64Option -> Printf.sprintf "c%sVal, c%sNull" name name
| DoubleOption -> Printf.sprintf "c%sVal, c%sNull" name name
| TensorOption -> Printf.sprintf "%s.ctensor" name
| _ -> name )
|> String.concat ~sep:", "
@ -456,6 +493,24 @@ module Func = struct
| Device -> ""
| String -> ""
| IntList -> ""
| Int64Option ->
Printf.sprintf
"var c%sVal int64 = 0\n\
\ var c%sNull int = 1\n\
\ if len(%s) > 0 {\n\
\ c%sVal = %s[0]\n\
\ c%sNull = 0\n\
\ }\n"
an an an an an an
| DoubleOption ->
Printf.sprintf
"var c%sVal float64 = 0.0\n\
\ var c%sNull int = 1\n\
\ if len(%s) > 0 {\n\
\ c%sVal = %s[0]\n\
\ c%sNull = 0\n\
\ }\n"
an an an an an an
| TensorList ->
Printf.sprintf
" var c%s []lib.Ctensor\n\
@ -687,7 +742,16 @@ let write_wrapper funcs filename =
; "Split"
; "SplitWithSizes"
; "Unbind"
; "Where" ]
; "Where"
; "Atleast1d1"
; "Atleast2d1"
; "Atleast3d1"
; "Dequantize1"
; "QuantizePerTensor1"
; "UnsafeChunk"
; "UnsafeSplit"
; "UnsafeSplitWithSizes"
; "AlignTensors" ]
in
if
List.exists excluded_funcs ~f:(fun name ->
@ -793,7 +857,16 @@ let write_must_wrapper funcs filename =
; "Split"
; "SplitWithSizes"
; "Unbind"
; "Where" ]
; "Where"
; "Atleast1d1"
; "Atleast2d1"
; "Atleast3d1"
; "Dequantize1"
; "QuantizePerTensor1"
; "UnsafeChunk"
; "UnsafeSplit"
; "UnsafeSplitWithSizes"
; "AlignTensors" ]
in
if
List.exists excluded_funcs ~f:(fun name ->
@ -943,7 +1016,7 @@ let run ~yaml_filename ~cpp_filename ~ffi_filename ~must_wrapper_filename
write_wrapper funcs wrapper_filename
let () =
run ~yaml_filename:"gen/pytorch/Declarations-v1.5.0.yaml"
run ~yaml_filename:"gen/pytorch/Declarations-v1.7.0.yaml"
~cpp_filename:"libtch/torch_api_generated"
~ffi_filename:"libtch/c-generated.go"
~must_wrapper_filename:"tensor/must-tensor-generated.go"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -401,8 +401,9 @@ func AtoSgd(learningRate, momentum, dampening, weightDecay float64, nesterov int
return C.ato_sgd(clearningRate, cmomentum, cdampening, cweightDecay, cnesterov)
}
// NOTE. Backward compat for param group not updated (#261)
// void ato_add_parameters(optimizer, tensor *, int ntensors);
func AtoAddParameters(coptimizer Coptimizer, tensors []Ctensor, ntensors int) {
func AtoAddParametersOld(coptimizer Coptimizer, tensors []Ctensor, ntensors int) {
var ctensors []C.tensor
for i := 0; i < len(tensors); i++ {
@ -412,7 +413,23 @@ func AtoAddParameters(coptimizer Coptimizer, tensors []Ctensor, ntensors int) {
cntensors := *(*C.int)(unsafe.Pointer(&ntensors))
// Just give pointer to the first element of ctensors slice
C.ato_add_parameters(coptimizer, &ctensors[0], cntensors)
C.ato_add_parameters_old(coptimizer, &ctensors[0], cntensors)
}
// NOTE. This function is not working correctly. Need to update!!!
// DO NOT USE!!!!!
// TODO. updated
func AtoAddParameters(coptimizer Coptimizer, tensors []Ctensor, ntensors int) {
var ctensors []C.tensor
for i := 0; i < len(tensors); i++ {
ctensors = append(ctensors, (C.tensor)(tensors[i]))
}
cntensors := *(*C.size_t)(unsafe.Pointer(&ntensors))
// Just give pointer to the first element of ctensors slice
C.ato_add_parameters(coptimizer, ctensors[0], cntensors)
}
// void ato_set_learning_rate(optimizer, double learning_rate);

View File

@ -1,5 +1,7 @@
#include<torch/csrc/autograd/engine.h>
#include<torch/csrc/jit/runtime/graph_executor.h>
#include<torch/torch.h>
#include<ATen/autocast_mode.h>
#include<torch/script.h>
#include<stdexcept>
#include<vector>
@ -43,6 +45,15 @@ tensor at_new_tensor() {
return nullptr;
}
tensor at_tensor_of_blob(void *data, int64_t *dims, size_t ndims, int64_t *strides, size_t nstrides, int type, int device) {
PROTECT(
at::TensorOptions blobOptions = at::TensorOptions().device(device_of_int(device)).dtype(torch::ScalarType(type));
return new torch::Tensor(torch::from_blob(data, torch::IntArrayRef(dims, ndims), torch::IntArrayRef(strides, nstrides), blobOptions));
)
return nullptr;
}
tensor at_tensor_of_data(void *vs, int64_t *dims, size_t ndims, size_t element_size_in_bytes, int type) {
PROTECT(
torch::Tensor tensor = torch::zeros(torch::IntArrayRef(dims, ndims), torch::ScalarType(type));
@ -90,6 +101,11 @@ int at_defined(tensor t) {
return -1;
}
int at_is_mkldnn(tensor t) {
PROTECT(return t->is_mkldnn();)
return -1;
}
int at_is_sparse(tensor t) {
PROTECT(return t->is_sparse();)
return -1;
@ -107,6 +123,13 @@ void at_shape(tensor t, int64_t *dims) {
)
}
void at_stride(tensor t, int64_t *dims) {
PROTECT(
int i = 0;
for (int64_t dim: t->strides()) dims[i++] = dim;
)
}
int at_scalar_type(tensor t) {
PROTECT(
return static_cast<int>(t->scalar_type());
@ -114,6 +137,46 @@ int at_scalar_type(tensor t) {
return -1;
}
void at__amp_non_finite_check_and_unscale(tensor t, tensor found_inf, tensor inf_scale) {
PROTECT(
at::_amp_non_finite_check_and_unscale_(*t, *found_inf, *inf_scale);
)
}
void at_autocast_clear_cache() {
at::autocast::clear_cache();
}
int at_autocast_decrement_nesting() {
PROTECT(
return at::autocast::decrement_nesting();
)
return -1;
}
int at_autocast_increment_nesting() {
PROTECT(
return at::autocast::increment_nesting();
)
return -1;
}
bool at_autocast_is_enabled() {
PROTECT(
return at::autocast::is_enabled();
)
return -1;
}
bool at_autocast_set_enabled(bool b) {
PROTECT(
bool is_enabled = at::autocast::is_enabled();
at::autocast::set_enabled(b);
return is_enabled;
)
return -1;
}
int at_device(tensor t) {
PROTECT(
auto device = t->device();
@ -417,6 +480,20 @@ optimizer ato_adam(double learning_rate,
return nullptr;
}
optimizer ato_adamw(double learning_rate,
double beta1,
double beta2,
double weight_decay) {
PROTECT(
auto options =
torch::optim::AdamWOptions(learning_rate)
.betas(std::tuple<double, double>(beta1, beta2))
.weight_decay(weight_decay);
return new torch::optim::AdamW(vector<torch::Tensor>(), options);
)
return nullptr;
}
optimizer ato_rms_prop(double learning_rate,
double alpha,
double eps,
@ -453,24 +530,63 @@ optimizer ato_sgd(double learning_rate,
return nullptr;
}
void ato_add_parameters(optimizer t, tensor *tensors, int ntensors) {
// NOTE. backward compat as param group (#261) not updated yet.
void ato_add_parameters_old(optimizer t, tensor *tensors, int ntensors) {
PROTECT(
for (int i = 0; i < ntensors; ++i)
t->param_groups()[0].params().push_back(*(tensors[i]));
)
}
void ato_add_parameters(optimizer t, tensor tensor, size_t group) {
PROTECT(
auto &groups = t->param_groups();
while (groups.size() <= group) {
groups.push_back(torch::optim::OptimizerParamGroup({}, t->defaults().clone()));
}
groups[group].params().push_back(*tensor);
)
}
template <class T>
void set_lr(optimizer t, double learning_rate) {
torch::optim::OptimizerOptions* d = &(t->defaults());
if (auto p = dynamic_cast<T*>(d)) {
p->lr(learning_rate);
for (auto &param_group: t->param_groups()) {
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto p2 = dynamic_cast<T*>(d)) {
p2->lr(learning_rate);
}
else throw std::invalid_argument("unexpected param group type");
}
}
}
void ato_set_learning_rate(optimizer t, double learning_rate) {
PROTECT(
torch::optim::OptimizerOptions* d = &(t->defaults());
if (auto adam = dynamic_cast<torch::optim::AdamOptions*>(d))
adam->lr(learning_rate);
else if (auto rms = dynamic_cast<torch::optim::RMSpropOptions*>(d))
rms->lr(learning_rate);
else if (auto sgd = dynamic_cast<torch::optim::SGDOptions*>(d))
sgd->lr(learning_rate);
else
throw std::invalid_argument("unexpected optimizer");
set_lr<torch::optim::AdamOptions>(t, learning_rate);
set_lr<torch::optim::AdamWOptions>(t, learning_rate);
set_lr<torch::optim::RMSpropOptions>(t, learning_rate);
set_lr<torch::optim::SGDOptions>(t, learning_rate);
)
}
template <class T>
void set_lr_group(optimizer t, size_t group, double learning_rate) {
auto &param_group = t->param_groups().at(group);
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto p = dynamic_cast<T*>(d)) {
p->lr(learning_rate);
}
}
void ato_set_learning_rate_group(optimizer t, size_t group, double learning_rate) {
PROTECT(
set_lr_group<torch::optim::AdamOptions>(t, group, learning_rate);
set_lr_group<torch::optim::AdamWOptions>(t, group, learning_rate);
set_lr_group<torch::optim::RMSpropOptions>(t, group, learning_rate);
set_lr_group<torch::optim::SGDOptions>(t, group, learning_rate);
)
}
@ -480,16 +596,115 @@ void ato_set_momentum(optimizer t, double momentum) {
if (auto adam = dynamic_cast<torch::optim::AdamOptions*>(d)) {
auto betas = adam->betas();
adam->betas(std::tuple<double, double>(momentum, get<1>(betas)));
for (auto &param_group: t->param_groups()) {
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto adam2 = dynamic_cast<torch::optim::AdamOptions*>(d)) {
adam2->betas(std::tuple<double, double>(momentum, get<1>(betas)));
}
else throw std::invalid_argument("unexpected param group type");
}
}
else if (auto rms = dynamic_cast<torch::optim::RMSpropOptions*>(d))
rms->momentum(momentum);
else if (auto sgd = dynamic_cast<torch::optim::SGDOptions*>(d))
else if (auto adamw = dynamic_cast<torch::optim::AdamWOptions*>(d)) {
auto betas = adamw->betas();
adamw->betas(std::tuple<double, double>(momentum, get<1>(betas)));
for (auto &param_group: t->param_groups()) {
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto adamw2 = dynamic_cast<torch::optim::AdamWOptions*>(d)) {
adamw2->betas(std::tuple<double, double>(momentum, get<1>(betas)));
}
else throw std::invalid_argument("unexpected param group type");
}
}
else if (auto rms = dynamic_cast<torch::optim::RMSpropOptions*>(d)) {
for (auto &param_group: t->param_groups()) {
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto rms2 = dynamic_cast<torch::optim::RMSpropOptions*>(d)) {
rms2->momentum(momentum);
}
else throw std::invalid_argument("unexpected param group type");
}
}
else if (auto sgd = dynamic_cast<torch::optim::SGDOptions*>(d)) {
sgd->momentum(momentum);
for (auto &param_group: t->param_groups()) {
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto sgd2 = dynamic_cast<torch::optim::SGDOptions*>(d)) {
sgd2->momentum(momentum);
}
else throw std::invalid_argument("unexpected param group type");
}
}
else
throw std::invalid_argument("unexpected optimizer");
)
}
void ato_set_momentum_group(optimizer t, size_t group, double momentum) {
PROTECT(
auto &param_group = t->param_groups().at(group);
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto adam = dynamic_cast<torch::optim::AdamOptions*>(d)) {
auto betas = adam->betas();
adam->betas(std::tuple<double, double>(momentum, get<1>(betas)));
}
else if (auto adamw = dynamic_cast<torch::optim::AdamWOptions*>(d)) {
auto betas = adamw->betas();
adamw->betas(std::tuple<double, double>(momentum, get<1>(betas)));
}
else if (auto rms = dynamic_cast<torch::optim::RMSpropOptions*>(d)) {
rms->momentum(momentum);
}
if (auto sgd = dynamic_cast<torch::optim::SGDOptions*>(d)) {
sgd->momentum(momentum);
}
else
throw std::invalid_argument("unexpected optimizer");
)
}
template <class T>
void set_weight_decay(optimizer t, double weight_decay) {
torch::optim::OptimizerOptions* d = &(t->defaults());
if (auto p = dynamic_cast<T*>(d)) {
p->weight_decay(weight_decay);
for (auto &param_group: t->param_groups()) {
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto p2 = dynamic_cast<T*>(d)) {
p2->weight_decay(weight_decay);
}
else throw std::invalid_argument("unexpected param group type");
}
}
}
void ato_set_weight_decay(optimizer t, double weight_decay) {
PROTECT(
set_weight_decay<torch::optim::AdamOptions>(t, weight_decay);
set_weight_decay<torch::optim::AdamWOptions>(t, weight_decay);
set_weight_decay<torch::optim::RMSpropOptions>(t, weight_decay);
set_weight_decay<torch::optim::SGDOptions>(t, weight_decay);
)
}
template <class T>
void set_weight_decay_group(optimizer t, size_t group, double weight_decay) {
auto &param_group = t->param_groups().at(group);
torch::optim::OptimizerOptions* d = &(param_group.options());
if (auto p = dynamic_cast<T*>(d)) {
p->weight_decay(weight_decay);
}
}
void ato_set_weight_decay_group(optimizer t, size_t group, double weight_decay) {
PROTECT(
set_weight_decay_group<torch::optim::AdamOptions>(t, group, weight_decay);
set_weight_decay_group<torch::optim::AdamWOptions>(t, group, weight_decay);
set_weight_decay_group<torch::optim::RMSpropOptions>(t, group, weight_decay);
set_weight_decay_group<torch::optim::SGDOptions>(t, group, weight_decay);
)
}
void ato_zero_grad(optimizer t) {
PROTECT(t->zero_grad();)
}
@ -590,7 +805,7 @@ tensor atm_forward(module m, tensor *tensors, int ntensors) {
std::vector<torch::jit::IValue> inputs;
for (int i = 0; i < ntensors; ++i)
inputs.push_back(*(tensors[i]));
torch::jit::IValue output = m->forward(inputs);
torch::jit::IValue output = m->forward(std::move(inputs));
if (!output.isTensor())
throw std::invalid_argument("forward did not return a tensor");
return new torch::Tensor(output.toTensor());
@ -605,7 +820,31 @@ ivalue atm_forward_(module m,
std::vector<torch::jit::IValue> inputs;
for (int i = 0; i < nivalues; ++i)
inputs.push_back(*(ivalues[i]));
torch::jit::IValue output = m->forward(inputs);
torch::jit::IValue output = m->forward(std::move(inputs));
return new torch::jit::IValue(output);
)
return nullptr;
}
tensor atm_method(module m, char *method_name, tensor *tensors, int ntensors) {
PROTECT(
std::vector<torch::jit::IValue> inputs;
for (int i = 0; i < ntensors; ++i)
inputs.push_back(*(tensors[i]));
torch::jit::IValue output = m->get_method(method_name)(std::move(inputs));
if (!output.isTensor())
throw std::invalid_argument("method did not return a tensor");
return new torch::Tensor(output.toTensor());
)
return nullptr;
}
ivalue atm_method_(module m, char *method_name, ivalue *ivalues, int nivalues) {
PROTECT(
std::vector<torch::jit::IValue> inputs;
for (int i = 0; i < nivalues; ++i)
inputs.push_back(*(ivalues[i]));
torch::jit::IValue output = m->get_method(method_name)(std::move(inputs));
return new torch::jit::IValue(output);
)
return nullptr;
@ -615,12 +854,40 @@ void atm_free(module m) {
delete(m);
}
void atm_save(module m, char *filename) {
PROTECT(
m->save(filename);
)
}
void atm_to(module m, int device, int dtype, bool non_blocking) {
PROTECT(
m->to(device_of_int(device), at::ScalarType(dtype), non_blocking);
)
}
int atm_get_profiling_mode() {
PROTECT(
return torch::jit::getProfilingMode();
)
return 0;
}
void atm_set_profiling_mode(int b) {
PROTECT(
torch::jit::getProfilingMode() = (bool)b;
)
}
void atm_named_parameters(module m, void *data, void (*f)(void *, char *, tensor)) {
PROTECT(
for (const auto &p : m->named_parameters()) {
auto v = p.value;
f(data, (char*)p.name.c_str(), new torch::Tensor(v));
}
)
}
ivalue ati_tensor(tensor t) {
PROTECT(
return new torch::jit::IValue(*t);
@ -718,6 +985,15 @@ ivalue ati_bool_list(char *is, int nvalues) {
return nullptr;
}
ivalue ati_string_list(char **is, int nvalues) {
PROTECT(
c10::List<string> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(string(is[i]));
return new torch::jit::IValue(vec);
)
return nullptr;
}
ivalue ati_tensor_list(tensor *is, int nvalues) {
PROTECT(
c10::List<at::Tensor> vec;
@ -855,7 +1131,7 @@ void ati_to_int_list(ivalue i,
PROTECT(
auto vec = i->toIntList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
throw std::invalid_argument("unexpected list<int> size");
}
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
@ -868,7 +1144,7 @@ void ati_to_double_list(ivalue i,
PROTECT(
auto vec = i->toDoubleList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
throw std::invalid_argument("unexpected list<double> size");
}
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
@ -881,7 +1157,7 @@ void ati_to_bool_list(ivalue i,
PROTECT(
auto vec = i->toBoolList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
throw std::invalid_argument("unexpected list<bool> size");
}
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
@ -894,7 +1170,7 @@ void ati_to_tensor_list(ivalue i,
PROTECT(
auto vec = i->toTensorList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected tuple size");
throw std::invalid_argument("unexpected list<tensor> size");
}
for (int i = 0; i < noutputs; ++i)
outputs[i] = new torch::Tensor(vec[i]);

View File

@ -1,6 +1,6 @@
#ifndef __TORCH_API_H__
#define __TORCH_API_H__
#include<stdint.h>
#include <stdint.h>
#ifdef __cplusplus
thread_local char *torch_last_err = nullptr;
@ -11,11 +11,11 @@ typedef torch::Scalar *scalar;
typedef torch::optim::Optimizer *optimizer;
typedef torch::jit::script::Module *module;
typedef torch::jit::IValue *ivalue;
#define PROTECT(x) \
try { \
x \
} catch (const exception& e) { \
torch_last_err = strdup(e.what()); \
#define PROTECT(x) \
try { \
x \
} catch (const exception &e) { \
torch_last_err = strdup(e.what()); \
}
#else
typedef void *tensor;
@ -28,18 +28,33 @@ typedef void *ivalue;
char *get_and_reset_last_err(); // thread-local
void at_manual_seed(int64_t);
tensor at_new_tensor();
tensor at_tensor_of_data(void *vs, int64_t *dims, size_t ndims, size_t element_size_in_bytes, int type);
void at_copy_data(tensor tensor, void *vs, size_t numel, size_t element_size_in_bytes);
tensor at_tensor_of_blob(void *data, int64_t *dims, size_t ndims,
int64_t *strides, size_t nstrides, int type,
int device);
tensor at_tensor_of_data(void *vs, int64_t *dims, size_t ndims,
size_t element_size_in_bytes, int type);
void at_copy_data(tensor tensor, void *vs, size_t numel,
size_t element_size_in_bytes);
tensor at_shallow_clone(tensor);
void *at_data_ptr(tensor);
int at_defined(tensor);
int at_is_mkldnn(tensor);
int at_is_sparse(tensor);
int at_device(tensor);
size_t at_dim(tensor);
void at_shape(tensor, int64_t *);
void at_stride(tensor, int64_t *);
int at_scalar_type(tensor);
void at__amp_non_finite_check_and_unscale(tensor, tensor, tensor);
void at_autocast_clear_cache();
int at_autocast_decrement_nesting();
int at_autocast_increment_nesting();
bool at_autocast_is_enabled();
bool at_autocast_set_enabled(bool b);
void at_backward(tensor, int, int);
int at_requires_grad(tensor);
int at_grad_set_enabled(int);
@ -50,8 +65,10 @@ void at_fill_int64(tensor, int64_t);
double at_double_value_at_indexes(tensor, int64_t *indexes, int indexes_len);
int64_t at_int64_value_at_indexes(tensor, int64_t *indexes, int indexes_len);
void at_set_double_value_at_indexes(tensor, int *indexes, int indexes_len, double v);
void at_set_int64_value_at_indexes(tensor, int *indexes, int indexes_len, int64_t v);
void at_set_double_value_at_indexes(tensor, int *indexes, int indexes_len,
double v);
void at_set_int64_value_at_indexes(tensor, int *indexes, int indexes_len,
int64_t v);
void at_copy_(tensor dst, tensor src);
@ -63,14 +80,20 @@ tensor at_load_image(char *filename);
int at_save_image(tensor, char *filename);
tensor at_resize_image(tensor, int w, int h);
void at_save_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename);
void at_save_multi(tensor *tensors, char **tensor_names, int ntensors,
char *filename);
/* [at_load_multi] takes as input an array of nullptr for [tensors]. */
void at_load_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename);
void at_load_multi(tensor *tensors, char **tensor_names, int ntensors,
char *filename);
/* [at_load_multi_] takes as input an array of allocation [tensors]. */
void at_load_multi_(tensor *tensors, char **tensor_names, int ntensors, char *filename);
void at_load_multi_(tensor *tensors, char **tensor_names, int ntensors,
char *filename);
void at_load_callback(char *filename, void *data, void (*f)(void *, char *, tensor));
void at_load_callback_with_device(char *filename, void *data, void (*f)(void *, char *, tensor), int device_id);
void at_load_callback(char *filename, void *data,
void (*f)(void *, char *, tensor));
void at_load_callback_with_device(char *filename, void *data,
void (*f)(void *, char *, tensor),
int device_id);
int at_get_num_interop_threads();
@ -82,32 +105,27 @@ void at_set_num_threads(int n_threads);
void at_free(tensor);
void at_run_backward(tensor *tensors,
int ntensors,
tensor *inputs,
int ninputs,
tensor *outputs,
int keep_graph,
int create_graph);
void at_run_backward(tensor *tensors, int ntensors, tensor *inputs, int ninputs,
tensor *outputs, int keep_graph, int create_graph);
optimizer ato_adam(double learning_rate,
double beta1,
double beta2,
optimizer ato_adam(double learning_rate, double beta1, double beta2,
double weight_decay);
optimizer ato_rms_prop(double learning_rate,
double alpha,
double eps,
double weight_decay,
double momentum,
int centered);
optimizer ato_sgd(double learning_rate,
double momentum,
double dampening,
double weight_decay,
int nesterov);
void ato_add_parameters(optimizer, tensor *, int ntensors);
optimizer ato_adamw(double learning_rate, double beta1, double beta2,
double weight_decay);
optimizer ato_rms_prop(double learning_rate, double alpha, double eps,
double weight_decay, double momentum, int centered);
optimizer ato_sgd(double learning_rate, double momentum, double dampening,
double weight_decay, int nesterov);
// NOTE. switch back as param group #261 not updated yet.
// Backward compat
void ato_add_parameters_old(optimizer, tensor *, int ntensors);
void ato_add_parameters(optimizer, tensor, size_t group);
void ato_set_learning_rate(optimizer, double learning_rate);
void ato_set_momentum(optimizer, double momentum);
void ato_set_learning_rate_group(optimizer, size_t group, double learning_rate);
void ato_set_momentum_group(optimizer, size_t group, double momentum);
void ato_set_weight_decay(optimizer t, double weight_decay);
void ato_set_weight_decay_group(optimizer t, size_t group, double weight_decay);
void ato_zero_grad(optimizer);
void ato_step(optimizer);
void ato_free(optimizer);
@ -129,11 +147,16 @@ module atm_load_on_device(char *, int device);
module atm_load_str(char *, size_t sz);
module atm_load_str_on_device(char *, size_t sz, int device);
tensor atm_forward(module, tensor *tensors, int ntensors);
ivalue atm_forward_(module,
ivalue *ivalues,
int nivalues);
ivalue atm_forward_(module, ivalue *ivalues, int nivalues);
tensor atm_method(module, char *method_name, tensor *tensors, int ntensors);
ivalue atm_method_(module, char *method_name, ivalue *ivalues, int nivalues);
void atm_free(module);
void atm_to(module m, int device, int dtype, bool non_blocking);
void atm_save(module m, char *);
int atm_get_profiling_mode();
void atm_set_profiling_mode(int);
void atm_named_parameters(module, void *data,
void (*f)(void *, char *, tensor));
ivalue ati_none();
ivalue ati_tensor(tensor);
@ -147,6 +170,7 @@ ivalue ati_generic_dict(ivalue *, int);
ivalue ati_int_list(int64_t *, int);
ivalue ati_double_list(double *, int);
ivalue ati_bool_list(char *, int);
ivalue ati_string_list(char **, int);
ivalue ati_tensor_list(tensor *, int);
tensor ati_to_tensor(ivalue);

File diff suppressed because it is too large Load Diff

View File

@ -22,11 +22,17 @@ void atg___xor__(tensor *, tensor self, scalar other);
void atg___xor__1(tensor *, tensor self, tensor other);
void atg__adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg__adaptive_avg_pool2d_backward(tensor *, tensor grad_output, tensor self);
void atg__addr(tensor *, tensor self, tensor vec1, tensor vec2);
void atg__addr_(tensor *, tensor self, tensor vec1, tensor vec2);
void atg__addr_out(tensor *, tensor out, tensor self, tensor vec1, tensor vec2);
void atg__add_batch_dim(tensor *, tensor self, int64_t batch_dim, int64_t level);
void atg__add_relu(tensor *, tensor self, tensor other);
void atg__add_relu_(tensor *, tensor self, tensor other);
void atg__add_relu_out(tensor *, tensor out, tensor self, tensor other);
void atg__addmv_impl_(tensor *, tensor self, tensor self2, tensor mat, tensor vec);
void atg__aminmax(tensor *, tensor self);
void atg__aminmax1(tensor *, tensor self, int64_t dim, int keepdim);
void atg__amp_update_scale(tensor *, tensor growth_tracker, tensor current_scale, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval);
void atg__baddbmm_mkl_(tensor *, tensor self, tensor batch1, tensor batch2);
void atg__bmm(tensor *, tensor self, tensor mat2, int deterministic);
void atg__bmm_out(tensor *, tensor out, tensor self, tensor mat2, int deterministic);
void atg__cast_byte(tensor *, tensor self, int non_blocking);
void atg__cast_char(tensor *, tensor self, int non_blocking);
void atg__cast_double(tensor *, tensor self, int non_blocking);
@ -41,7 +47,11 @@ void atg__cdist_backward(tensor *, tensor grad, tensor x1, tensor x2, double p,
void atg__cholesky_helper(tensor *, tensor self, int upper);
void atg__cholesky_solve_helper(tensor *, tensor self, tensor A, int upper);
void atg__coalesced_(tensor *, tensor self, int coalesced);
void atg__compute_linear_combination(tensor *, tensor input, tensor coefficients);
void atg__compute_linear_combination_out(tensor *, tensor out, tensor input, tensor coefficients);
void atg__conj(tensor *, tensor self);
void atg__convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled);
void atg__convolution1(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32);
void atg__convolution_nogroup(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len);
void atg__copy_from(tensor *, tensor self, tensor dst, int non_blocking);
void atg__ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity);
@ -59,28 +69,35 @@ void atg__dirichlet_grad(tensor *, tensor x, tensor alpha, tensor total);
void atg__embedding_bag(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset);
void atg__embedding_bag_backward(tensor *, tensor grad, tensor indices, tensor offsets, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights);
void atg__embedding_bag_dense_backward(tensor *, tensor grad, tensor indices, tensor offsets, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights);
void atg__embedding_bag_forward_only(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset);
void atg__embedding_bag_per_sample_weights_backward(tensor *, tensor grad, tensor weight, tensor indices, tensor offsets, tensor offset2bag, int64_t mode);
void atg__embedding_bag_sparse_backward(tensor *, tensor grad, tensor indices, tensor offsets, tensor offset2bag, tensor bag_size, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights);
void atg__empty_affine_quantized(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device, double scale, int64_t zero_point);
void atg__empty_per_channel_affine_quantized(tensor *, int64_t *size_data, int size_len, tensor scales, tensor zero_points, int64_t axis, int options_kind, int options_device);
void atg__euclidean_dist(tensor *, tensor x1, tensor x2);
void atg__fake_quantize_learnable_per_channel_affine(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
void atg__fake_quantize_learnable_per_channel_affine_backward(tensor *, tensor grad, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
void atg__fake_quantize_learnable_per_tensor_affine(tensor *, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max);
void atg__fake_quantize_learnable_per_tensor_affine_backward(tensor *, tensor grad, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max);
void atg__fft_with_size(tensor *, tensor self, int64_t signal_ndim, int complex_input, int complex_output, int inverse, int64_t *checked_signal_sizes_data, int checked_signal_sizes_len, int normalized, int onesided, int64_t *output_sizes_data, int output_sizes_len);
void atg__fft_with_size1(tensor *, tensor self, int64_t signal_ndim, int complex_input, int complex_output, int inverse, int64_t *checked_signal_sizes_data, int checked_signal_sizes_len, int64_t normalization, int onesided, int64_t *output_sizes_data, int output_sizes_len);
void atg__fused_dropout(tensor *, tensor self, double p);
void atg__gather_sparse_backward(tensor *, tensor self, int64_t dim, tensor index, tensor grad);
void atg__grid_sampler_2d_cpu_fallback(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__grid_sampler_2d_cpu_fallback_backward(tensor *, tensor grad_output, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__index_copy_(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg__index_put_impl_(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate, int unsafe);
void atg__indices(tensor *, tensor self);
void atg__inverse_helper(tensor *, tensor self);
void atg__log_softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__log_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__logcumsumexp(tensor *, tensor self, int64_t dim);
void atg__logcumsumexp_out(tensor *, tensor out, tensor self, int64_t dim);
void atg__lu_solve_helper(tensor *, tensor self, tensor LU_data, tensor LU_pivots);
void atg__lu_with_info(tensor *, tensor self, int pivot, int check_errors);
void atg__make_per_channel_quantized_tensor(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis);
void atg__make_per_tensor_quantized_tensor(tensor *, tensor self, double scale, int64_t zero_point);
void atg__masked_scale(tensor *, tensor self, tensor mask, double scale);
void atg__max(tensor *, tensor self, int64_t dim, int keepdim);
void atg__max_out(tensor *, tensor max, tensor max_indices, tensor self, int64_t dim, int keepdim);
void atg__min(tensor *, tensor self, int64_t dim, int keepdim);
void atg__min_out(tensor *, tensor min, tensor min_indices, tensor self, int64_t dim, int keepdim);
void atg__mkldnn_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len);
void atg__mkldnn_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg__mkldnn_transpose_(tensor *, tensor self, int64_t dim0, int64_t dim1);
@ -96,9 +113,11 @@ void atg__pack_padded_sequence_backward(tensor *, tensor grad, int64_t *input_si
void atg__pad_packed_sequence(tensor *, tensor data, tensor batch_sizes, int batch_first, scalar padding_value, int64_t total_length);
void atg__pdist_backward(tensor *, tensor grad, tensor self, double p, tensor pdist);
void atg__qr_helper(tensor *, tensor self, int some);
void atg__remove_batch_dim(tensor *, tensor self, int64_t level, int64_t batch_size, int64_t out_dim);
void atg__reshape_from_tensor(tensor *, tensor self, tensor shape);
void atg__s_where(tensor *, tensor condition, tensor self, tensor other);
void atg__sample_dirichlet(tensor *, tensor self);
void atg__saturate_weight_to_fp16(tensor *, tensor weight);
void atg__shape_as_tensor(tensor *, tensor self);
void atg__sobol_engine_draw(tensor *, tensor quasi, int64_t n, tensor sobolstate, int64_t dimension, int64_t num_generated, int dtype);
void atg__sobol_engine_ff_(tensor *, tensor self, int64_t n, tensor sobolstate, int64_t dimension, int64_t num_generated);
@ -111,7 +130,13 @@ void atg__sparse_addmm(tensor *, tensor self, tensor sparse, tensor dense);
void atg__sparse_coo_tensor_unsafe(tensor *, tensor indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_coo_tensor_with_dims(tensor *, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_coo_tensor_with_dims_and_tensors(tensor *, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, tensor indices, tensor values, int options_kind, int options_device);
void atg__sparse_log_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg__sparse_log_softmax1(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__sparse_log_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_mm(tensor *, tensor sparse, tensor dense);
void atg__sparse_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg__sparse_softmax1(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__sparse_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_sum(tensor *, tensor self);
void atg__sparse_sum1(tensor *, tensor self, int dtype);
void atg__sparse_sum2(tensor *, tensor self, int64_t *dim_data, int dim_len);
@ -122,6 +147,9 @@ void atg__standard_gamma_grad(tensor *, tensor self, tensor output);
void atg__std(tensor *, tensor self, int unbiased);
void atg__svd_helper(tensor *, tensor self, int some, int compute_uv);
void atg__symeig_helper(tensor *, tensor self, int eigenvectors, int upper);
void atg__test_optional_filled_intlist(tensor *, tensor values, int64_t *addends_data, int addends_len);
void atg__test_optional_intlist(tensor *, tensor values, int64_t *addends_data, int addends_len);
void atg__test_serialization_subcmul(tensor *, tensor self, tensor other);
void atg__triangular_solve_helper(tensor *, tensor self, tensor A, int upper, int transpose, int unitriangular);
void atg__trilinear(tensor *, tensor i1, tensor i2, tensor i3, int64_t *expand1_data, int expand1_len, int64_t *expand2_data, int expand2_len, int64_t *expand3_data, int expand3_len, int64_t *sumdim_data, int sumdim_len, int64_t unroll_dim);
void atg__unique(tensor *, tensor self, int sorted, int return_inverse);
@ -136,9 +164,15 @@ void atg__weight_norm_differentiable_backward(tensor *, tensor grad_w, tensor sa
void atg_abs(tensor *, tensor self);
void atg_abs_(tensor *, tensor self);
void atg_abs_out(tensor *, tensor out, tensor self);
void atg_absolute(tensor *, tensor self);
void atg_absolute_(tensor *, tensor self);
void atg_absolute_out(tensor *, tensor out, tensor self);
void atg_acos(tensor *, tensor self);
void atg_acos_(tensor *, tensor self);
void atg_acos_out(tensor *, tensor out, tensor self);
void atg_acosh(tensor *, tensor self);
void atg_acosh_(tensor *, tensor self);
void atg_acosh_out(tensor *, tensor out, tensor self);
void atg_adaptive_avg_pool1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
@ -188,6 +222,10 @@ void atg_all1(tensor *, tensor self, int64_t dim, int keepdim);
void atg_all_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim);
void atg_alpha_dropout(tensor *, tensor input, double p, int train);
void atg_alpha_dropout_(tensor *, tensor self, double p, int train);
void atg_amax(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amax_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amin(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amin_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_angle(tensor *, tensor self);
void atg_angle_out(tensor *, tensor out, tensor self);
void atg_any(tensor *, tensor self);
@ -198,29 +236,59 @@ void atg_arange1(tensor *, scalar start, scalar end, int options_kind, int optio
void atg_arange2(tensor *, scalar start, scalar end, scalar step, int options_kind, int options_device);
void atg_arange_out(tensor *, tensor out, scalar end);
void atg_arange_out1(tensor *, tensor out, scalar start, scalar end);
void atg_argmax(tensor *, tensor self, int64_t dim, int keepdim);
void atg_argmin(tensor *, tensor self, int64_t dim, int keepdim);
void atg_arccos(tensor *, tensor self);
void atg_arccos_(tensor *, tensor self);
void atg_arccos_out(tensor *, tensor out, tensor self);
void atg_arccosh(tensor *, tensor self);
void atg_arccosh_(tensor *, tensor self);
void atg_arccosh_out(tensor *, tensor out, tensor self);
void atg_arcsin(tensor *, tensor self);
void atg_arcsin_(tensor *, tensor self);
void atg_arcsin_out(tensor *, tensor out, tensor self);
void atg_arcsinh(tensor *, tensor self);
void atg_arcsinh_(tensor *, tensor self);
void atg_arcsinh_out(tensor *, tensor out, tensor self);
void atg_arctan(tensor *, tensor self);
void atg_arctan_(tensor *, tensor self);
void atg_arctan_out(tensor *, tensor out, tensor self);
void atg_arctanh(tensor *, tensor self);
void atg_arctanh_(tensor *, tensor self);
void atg_arctanh_out(tensor *, tensor out, tensor self);
void atg_argmax(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argmin(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argsort(tensor *, tensor self, int64_t dim, int descending);
void atg_as_strided(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset);
void atg_as_strided_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset);
void atg_as_strided(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_asin(tensor *, tensor self);
void atg_asin_(tensor *, tensor self);
void atg_asin_out(tensor *, tensor out, tensor self);
void atg_asinh(tensor *, tensor self);
void atg_asinh_(tensor *, tensor self);
void atg_asinh_out(tensor *, tensor out, tensor self);
void atg_atan(tensor *, tensor self);
void atg_atan2(tensor *, tensor self, tensor other);
void atg_atan2_(tensor *, tensor self, tensor other);
void atg_atan2_out(tensor *, tensor out, tensor self, tensor other);
void atg_atan_(tensor *, tensor self);
void atg_atan_out(tensor *, tensor out, tensor self);
void atg_atanh(tensor *, tensor self);
void atg_atanh_(tensor *, tensor self);
void atg_atanh_out(tensor *, tensor out, tensor self);
void atg_atleast_1d(tensor *, tensor self);
tensor *atg_atleast_1d1(tensor *tensors_data, int tensors_len);
void atg_atleast_2d(tensor *, tensor self);
tensor *atg_atleast_2d1(tensor *tensors_data, int tensors_len);
void atg_atleast_3d(tensor *, tensor self);
tensor *atg_atleast_3d1(tensor *tensors_data, int tensors_len);
void atg_avg_pool1d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad);
void atg_avg_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override);
void atg_avg_pool2d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override);
void atg_avg_pool2d_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override);
void atg_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override);
void atg_avg_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override);
void atg_avg_pool3d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override);
void atg_avg_pool3d_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override);
void atg_avg_pool3d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override);
void atg_avg_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_baddbmm(tensor *, tensor self, tensor batch1, tensor batch2);
void atg_baddbmm_(tensor *, tensor self, tensor batch1, tensor batch2);
void atg_baddbmm_out(tensor *, tensor out, tensor self, tensor batch1, tensor batch2);
@ -232,7 +300,7 @@ void atg_batch_norm_backward_reduce(tensor *, tensor grad_out, tensor input, ten
void atg_batch_norm_elemt(tensor *, tensor input, tensor weight, tensor bias, tensor mean, tensor invstd, double eps);
void atg_batch_norm_elemt_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, tensor mean, tensor invstd, double eps);
void atg_batch_norm_gather_stats(tensor *, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, int64_t count);
void atg_batch_norm_gather_stats_with_counts(tensor *, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, int64_t *counts_data, int counts_len);
void atg_batch_norm_gather_stats_with_counts(tensor *, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, tensor counts);
void atg_batch_norm_stats(tensor *, tensor input, double eps);
void atg_batch_norm_update_stats(tensor *, tensor input, tensor running_mean, tensor running_var, double momentum);
void atg_bernoulli(tensor *, tensor self);
@ -248,6 +316,7 @@ void atg_binary_cross_entropy_out(tensor *, tensor out, tensor self, tensor targ
void atg_binary_cross_entropy_with_logits(tensor *, tensor self, tensor target, tensor weight, tensor pos_weight, int64_t reduction);
void atg_binary_cross_entropy_with_logits_backward(tensor *, tensor grad_output, tensor self, tensor target, tensor weight, tensor pos_weight, int64_t reduction);
void atg_bincount(tensor *, tensor self, tensor weights, int64_t minlength);
void atg_binomial(tensor *, tensor count, tensor prob);
void atg_bitwise_and(tensor *, tensor self, scalar other);
void atg_bitwise_and1(tensor *, tensor self, tensor other);
void atg_bitwise_and_(tensor *, tensor self, scalar other);
@ -271,20 +340,25 @@ void atg_bitwise_xor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_xor_out1(tensor *, tensor out, tensor self, scalar other);
void atg_blackman_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_blackman_window1(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_block_diag(tensor *, tensor *tensors_data, int tensors_len);
void atg_bmm(tensor *, tensor self, tensor mat2);
void atg_bmm_out(tensor *, tensor out, tensor self, tensor mat2);
tensor *atg_broadcast_tensors(tensor *tensors_data, int tensors_len);
void atg_bucketize(tensor *, tensor self, tensor boundaries, int out_int32, int right);
void atg_bucketize1(tensor *, scalar self_scalar, tensor boundaries, int out_int32, int right);
void atg_bucketize_out(tensor *, tensor out, tensor self, tensor boundaries, int out_int32, int right);
void atg_cartesian_prod(tensor *, tensor *tensors_data, int tensors_len);
void atg_cat(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_cat_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_cauchy_(tensor *, tensor self, double median, double sigma);
void atg_cdist(tensor *, tensor x1, tensor x2, double p, int64_t compute_mode);
void atg_cdist(tensor *, tensor x1, tensor x2, double p, int64_t compute_mode_v, uint8_t compute_mode_null);
void atg_ceil(tensor *, tensor self);
void atg_ceil_(tensor *, tensor self);
void atg_ceil_out(tensor *, tensor out, tensor self);
void atg_celu(tensor *, tensor self);
void atg_celu_(tensor *, tensor self);
void atg_chain_matmul(tensor *, tensor *matrices_data, int matrices_len);
void atg_channel_shuffle(tensor *, tensor self, int64_t groups);
void atg_cholesky(tensor *, tensor self, int upper);
void atg_cholesky_inverse(tensor *, tensor self, int upper);
void atg_cholesky_inverse_out(tensor *, tensor out, tensor self, int upper);
@ -301,12 +375,17 @@ void atg_clamp_min(tensor *, tensor self, scalar min);
void atg_clamp_min_(tensor *, tensor self, scalar min);
void atg_clamp_min_out(tensor *, tensor out, tensor self, scalar min);
void atg_clamp_out(tensor *, tensor out, tensor self, scalar min, scalar max);
void atg_clip(tensor *, tensor self, scalar min, scalar max);
void atg_clip_(tensor *, tensor self, scalar min, scalar max);
void atg_clip_out(tensor *, tensor out, tensor self, scalar min, scalar max);
void atg_coalesce(tensor *, tensor self);
void atg_col2im(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_col2im_backward(tensor *, tensor grad_output, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_col2im_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_col2im_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_combinations(tensor *, tensor self, int64_t r, int with_replacement);
void atg_complex(tensor *, tensor real, tensor imag);
void atg_complex_out(tensor *, tensor out, tensor real, tensor imag);
void atg_conj(tensor *, tensor self);
void atg_conj_out(tensor *, tensor out, tensor self);
void atg_constant_pad_nd(tensor *, tensor self, int64_t *pad_data, int pad_len);
@ -330,8 +409,10 @@ void atg_cosh_(tensor *, tensor self);
void atg_cosh_out(tensor *, tensor out, tensor self);
void atg_cosine_embedding_loss(tensor *, tensor input1, tensor input2, tensor target, double margin, int64_t reduction);
void atg_cosine_similarity(tensor *, tensor x1, tensor x2, int64_t dim, double eps);
void atg_cross(tensor *, tensor self, tensor other, int64_t dim);
void atg_cross_out(tensor *, tensor out, tensor self, tensor other, int64_t dim);
void atg_count_nonzero(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_count_nonzero1(tensor *, tensor self, int64_t dim_v, uint8_t dim_null);
void atg_cross(tensor *, tensor self, tensor other, int64_t dim_v, uint8_t dim_null);
void atg_cross_out(tensor *, tensor out, tensor self, tensor other, int64_t dim_v, uint8_t dim_null);
void atg_ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int64_t reduction, int zero_infinity);
void atg_ctc_loss1(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int64_t reduction, int zero_infinity);
void atg_cudnn_affine_grid_generator(tensor *, tensor theta, int64_t n, int64_t C, int64_t H, int64_t W);
@ -340,32 +421,42 @@ void atg_cudnn_batch_norm(tensor *, tensor input, tensor weight, tensor bias, te
void atg_cudnn_batch_norm_backward(tensor *, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon, tensor reserveSpace);
void atg_cudnn_convolution(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution1(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_backward_input(tensor *, int64_t *self_size_data, int self_size_len, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution2(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_backward_input(tensor *, int64_t *self_size_data, int self_size_len, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_transpose(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_transpose1(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_transpose_backward_input(tensor *, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_transpose_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_cudnn_convolution_transpose2(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_transpose_backward_input(tensor *, tensor grad_output, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_transpose_backward_weight(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_grid_sampler(tensor *, tensor self, tensor grid);
void atg_cudnn_grid_sampler_backward(tensor *, tensor self, tensor grid, tensor grad_output);
void atg_cummax(tensor *, tensor self, int64_t dim);
void atg_cummax_out(tensor *, tensor values, tensor indices, tensor self, int64_t dim);
void atg_cummaxmin_backward(tensor *, tensor grad, tensor input, tensor indices, int64_t dim);
void atg_cummin(tensor *, tensor self, int64_t dim);
void atg_cummin_out(tensor *, tensor values, tensor indices, tensor self, int64_t dim);
void atg_cumprod(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumprod_backward(tensor *, tensor grad, tensor input, int64_t dim);
void atg_cumprod_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_cumsum(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumsum_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_data(tensor *, tensor self);
void atg_deg2rad(tensor *, tensor self);
void atg_deg2rad_(tensor *, tensor self);
void atg_deg2rad_out(tensor *, tensor out, tensor self);
void atg_dequantize(tensor *, tensor self);
tensor *atg_dequantize1(tensor *tensors_data, int tensors_len);
void atg_det(tensor *, tensor self);
void atg_detach(tensor *, tensor self);
void atg_detach_(tensor *, tensor self);
void atg_diag(tensor *, tensor self, int64_t diagonal);
void atg_diag_backward(tensor *, tensor grad, int64_t *input_sizes_data, int input_sizes_len, int64_t diagonal);
void atg_diag_embed(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diag_out(tensor *, tensor out, tensor self, int64_t diagonal);
void atg_diagflat(tensor *, tensor self, int64_t offset);
void atg_diagonal(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_backward(tensor *, tensor grad, int64_t *input_sizes_data, int input_sizes_len, int64_t offset, int64_t dim1, int64_t dim2);
void atg_digamma(tensor *, tensor self);
void atg_digamma_(tensor *, tensor self);
void atg_digamma_out(tensor *, tensor out, tensor self);
@ -375,10 +466,17 @@ void atg_div1(tensor *, tensor self, scalar other);
void atg_div_(tensor *, tensor self, tensor other);
void atg_div_1(tensor *, tensor self, scalar other);
void atg_div_out(tensor *, tensor out, tensor self, tensor other);
void atg_divide(tensor *, tensor self, tensor other);
void atg_divide1(tensor *, tensor self, scalar other);
void atg_divide_(tensor *, tensor self, tensor other);
void atg_divide_1(tensor *, tensor self, scalar other);
void atg_divide_out(tensor *, tensor out, tensor self, tensor other);
void atg_dot(tensor *, tensor self, tensor tensor);
void atg_dot_out(tensor *, tensor out, tensor self, tensor tensor);
void atg_dropout(tensor *, tensor input, double p, int train);
void atg_dropout_(tensor *, tensor self, double p, int train);
void atg_dstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_dstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_eig(tensor *, tensor self, int eigenvectors);
void atg_eig_out(tensor *, tensor e, tensor v, tensor self, int eigenvectors);
void atg_einsum(tensor *, char* equation_ptr, int equation_len, tensor *tensors_data, int tensors_len);
@ -395,7 +493,9 @@ void atg_embedding_renorm_(tensor *, tensor self, tensor indices, double max_nor
void atg_embedding_sparse_backward(tensor *, tensor grad, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq);
void atg_empty(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_empty_like(tensor *, tensor self);
void atg_empty_meta(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_empty_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_empty_quantized(tensor *, int64_t *size_data, int size_len, tensor qtensor);
void atg_empty_strided(tensor *, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int options_kind, int options_device);
void atg_eq(tensor *, tensor self, scalar other);
void atg_eq1(tensor *, tensor self, tensor other);
@ -413,6 +513,9 @@ void atg_erfinv(tensor *, tensor self);
void atg_erfinv_(tensor *, tensor self);
void atg_erfinv_out(tensor *, tensor out, tensor self);
void atg_exp(tensor *, tensor self);
void atg_exp2(tensor *, tensor self);
void atg_exp2_(tensor *, tensor self);
void atg_exp2_out(tensor *, tensor out, tensor self);
void atg_exp_(tensor *, tensor self);
void atg_exp_out(tensor *, tensor out, tensor self);
void atg_expand(tensor *, tensor self, int64_t *size_data, int size_len, int implicit);
@ -441,11 +544,26 @@ void atg_feature_alpha_dropout_(tensor *, tensor self, double p, int train);
void atg_feature_dropout(tensor *, tensor input, double p, int train);
void atg_feature_dropout_(tensor *, tensor self, double p, int train);
void atg_fft(tensor *, tensor self, int64_t signal_ndim, int normalized);
void atg_fft_fft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_fftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ifft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ifftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_irfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_irfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_rfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fill_(tensor *, tensor self, scalar value);
void atg_fill_1(tensor *, tensor self, tensor value);
void atg_fill_diagonal_(tensor *, tensor self, scalar fill_value, int wrap);
void atg_fix(tensor *, tensor self);
void atg_fix_(tensor *, tensor self);
void atg_fix_out(tensor *, tensor out, tensor self);
void atg_flatten(tensor *, tensor self, int64_t start_dim, int64_t end_dim);
void atg_flip(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_fliplr(tensor *, tensor self);
void atg_flipud(tensor *, tensor self);
void atg_floor(tensor *, tensor self);
void atg_floor_(tensor *, tensor self);
void atg_floor_divide(tensor *, tensor self, tensor other);
@ -474,12 +592,16 @@ void atg_fractional_max_pool3d_out(tensor *, tensor output, tensor indices, tens
void atg_frobenius_norm(tensor *, tensor self);
void atg_frobenius_norm1(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_frobenius_norm_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_from_file(tensor *, char* filename_ptr, int filename_len, int shared, int64_t size, int options_kind, int options_device);
void atg_from_file(tensor *, char* filename_ptr, int filename_len, int shared, int64_t size_v, uint8_t size_null, int options_kind, int options_device);
void atg_full(tensor *, int64_t *size_data, int size_len, scalar fill_value, int options_kind, int options_device);
void atg_full_like(tensor *, tensor self, scalar fill_value);
void atg_full_out(tensor *, tensor out, int64_t *size_data, int size_len, scalar fill_value);
void atg_gather(tensor *, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gather_backward(tensor *, tensor grad, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gather_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gcd(tensor *, tensor self, tensor other);
void atg_gcd_(tensor *, tensor self, tensor other);
void atg_gcd_out(tensor *, tensor out, tensor self, tensor other);
void atg_ge(tensor *, tensor self, scalar other);
void atg_ge1(tensor *, tensor self, tensor other);
void atg_ge_(tensor *, tensor self, scalar other);
@ -498,6 +620,18 @@ void atg_glu_backward(tensor *, tensor grad_output, tensor self, int64_t dim);
void atg_glu_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t dim);
void atg_glu_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_grad(tensor *, tensor self);
void atg_greater(tensor *, tensor self, scalar other);
void atg_greater1(tensor *, tensor self, tensor other);
void atg_greater_(tensor *, tensor self, scalar other);
void atg_greater_1(tensor *, tensor self, tensor other);
void atg_greater_equal(tensor *, tensor self, scalar other);
void atg_greater_equal1(tensor *, tensor self, tensor other);
void atg_greater_equal_(tensor *, tensor self, scalar other);
void atg_greater_equal_1(tensor *, tensor self, tensor other);
void atg_greater_equal_out(tensor *, tensor out, tensor self, scalar other);
void atg_greater_equal_out1(tensor *, tensor out, tensor self, tensor other);
void atg_greater_out(tensor *, tensor out, tensor self, scalar other);
void atg_greater_out1(tensor *, tensor out, tensor self, tensor other);
void atg_grid_sampler(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_2d(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_2d_backward(tensor *, tensor grad_output, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
@ -525,16 +659,31 @@ void atg_hardsigmoid(tensor *, tensor self);
void atg_hardsigmoid_(tensor *, tensor self);
void atg_hardsigmoid_backward(tensor *, tensor grad_output, tensor self);
void atg_hardsigmoid_out(tensor *, tensor out, tensor self);
void atg_hardswish(tensor *, tensor self);
void atg_hardswish_(tensor *, tensor self);
void atg_hardswish_backward(tensor *, tensor grad_output, tensor self);
void atg_hardswish_out(tensor *, tensor out, tensor self);
void atg_hardtanh(tensor *, tensor self);
void atg_hardtanh_(tensor *, tensor self);
void atg_hardtanh_backward(tensor *, tensor grad_output, tensor self, scalar min_val, scalar max_val);
void atg_hardtanh_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar min_val, scalar max_val);
void atg_hardtanh_out(tensor *, tensor out, tensor self);
void atg_heaviside(tensor *, tensor self, tensor values);
void atg_heaviside_(tensor *, tensor self, tensor values);
void atg_heaviside_out(tensor *, tensor out, tensor self, tensor values);
void atg_hinge_embedding_loss(tensor *, tensor self, tensor target, double margin, int64_t reduction);
void atg_histc(tensor *, tensor self, int64_t bins);
void atg_histc_out(tensor *, tensor out, tensor self, int64_t bins);
void atg_hspmm(tensor *, tensor mat1, tensor mat2);
void atg_hspmm_out(tensor *, tensor out, tensor mat1, tensor mat2);
void atg_hstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_hstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_hypot(tensor *, tensor self, tensor other);
void atg_hypot_(tensor *, tensor self, tensor other);
void atg_hypot_out(tensor *, tensor out, tensor self, tensor other);
void atg_i0(tensor *, tensor self);
void atg_i0_(tensor *, tensor self);
void atg_i0_out(tensor *, tensor out, tensor self);
void atg_ifft(tensor *, tensor self, int64_t signal_ndim, int normalized);
void atg_im2col(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_im2col_backward(tensor *, tensor grad_output, int64_t *input_size_data, int input_size_len, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
@ -553,8 +702,10 @@ void atg_index_fill_1(tensor *, tensor self, int64_t dim, tensor index, tensor v
void atg_index_put(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg_index_put_(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg_index_select(tensor *, tensor self, int64_t dim, tensor index);
void atg_index_select_backward(tensor *, tensor grad, int64_t *self_sizes_data, int self_sizes_len, int64_t dim, tensor index);
void atg_index_select_out(tensor *, tensor out, tensor self, int64_t dim, tensor index);
void atg_indices(tensor *, tensor self);
void atg_infinitely_differentiable_gelu_backward(tensor *, tensor grad, tensor self);
void atg_instance_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int use_input_stats, double momentum, double eps, int cudnn_enabled);
void atg_int_repr(tensor *, tensor self);
void atg_inverse(tensor *, tensor self);
@ -564,8 +715,17 @@ void atg_isclose(tensor *, tensor self, tensor other, double rtol, double atol,
void atg_isfinite(tensor *, tensor self);
void atg_isinf(tensor *, tensor self);
void atg_isnan(tensor *, tensor self);
void atg_kl_div(tensor *, tensor self, tensor target, int64_t reduction);
void atg_kl_div_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_isneginf(tensor *, tensor self);
void atg_isneginf_out(tensor *, tensor out, tensor self);
void atg_isposinf(tensor *, tensor self);
void atg_isposinf_out(tensor *, tensor out, tensor self);
void atg_isreal(tensor *, tensor self);
void atg_istft(tensor *, tensor self, int64_t n_fft, int64_t hop_length_v, uint8_t hop_length_null, int64_t win_length_v, uint8_t win_length_null, tensor window, int center, int normalized, int onesided, int64_t length_v, uint8_t length_null, int return_complex);
void atg_kaiser_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_kaiser_window1(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_kaiser_window2(tensor *, int64_t window_length, int periodic, double beta, int options_kind, int options_device);
void atg_kl_div(tensor *, tensor self, tensor target, int64_t reduction, int log_target);
void atg_kl_div_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction, int log_target);
void atg_kthvalue(tensor *, tensor self, int64_t k, int64_t dim, int keepdim);
void atg_kthvalue_out(tensor *, tensor values, tensor indices, tensor self, int64_t k, int64_t dim, int keepdim);
void atg_l1_loss(tensor *, tensor self, tensor target, int64_t reduction);
@ -573,6 +733,9 @@ void atg_l1_loss_backward(tensor *, tensor grad_output, tensor self, tensor targ
void atg_l1_loss_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_l1_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_layer_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, tensor bias, double eps, int cudnn_enable);
void atg_lcm(tensor *, tensor self, tensor other);
void atg_lcm_(tensor *, tensor self, tensor other);
void atg_lcm_out(tensor *, tensor out, tensor self, tensor other);
void atg_le(tensor *, tensor self, scalar other);
void atg_le1(tensor *, tensor self, tensor other);
void atg_le_(tensor *, tensor self, scalar other);
@ -589,12 +752,29 @@ void atg_lerp_(tensor *, tensor self, tensor end, scalar weight);
void atg_lerp_1(tensor *, tensor self, tensor end, tensor weight);
void atg_lerp_out(tensor *, tensor out, tensor self, tensor end, scalar weight);
void atg_lerp_out1(tensor *, tensor out, tensor self, tensor end, tensor weight);
void atg_less(tensor *, tensor self, scalar other);
void atg_less1(tensor *, tensor self, tensor other);
void atg_less_(tensor *, tensor self, scalar other);
void atg_less_1(tensor *, tensor self, tensor other);
void atg_less_equal(tensor *, tensor self, scalar other);
void atg_less_equal1(tensor *, tensor self, tensor other);
void atg_less_equal_(tensor *, tensor self, scalar other);
void atg_less_equal_1(tensor *, tensor self, tensor other);
void atg_less_equal_out(tensor *, tensor out, tensor self, scalar other);
void atg_less_equal_out1(tensor *, tensor out, tensor self, tensor other);
void atg_less_out(tensor *, tensor out, tensor self, scalar other);
void atg_less_out1(tensor *, tensor out, tensor self, tensor other);
void atg_lgamma(tensor *, tensor self);
void atg_lgamma_(tensor *, tensor self);
void atg_lgamma_out(tensor *, tensor out, tensor self);
void atg_linalg_det(tensor *, tensor self);
void atg_linalg_norm(tensor *, tensor self, scalar ord, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm1(tensor *, tensor self, char* ord_ptr, int ord_len, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_out(tensor *, tensor out, tensor self, scalar ord, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_out1(tensor *, tensor out, tensor self, char* ord_ptr, int ord_len, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linear(tensor *, tensor input, tensor weight, tensor bias);
void atg_linspace(tensor *, scalar start, scalar end, int64_t steps, int options_kind, int options_device);
void atg_linspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps);
void atg_linspace(tensor *, scalar start, scalar end, int64_t steps_v, uint8_t steps_null, int options_kind, int options_device);
void atg_linspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps_v, uint8_t steps_null);
void atg_log(tensor *, tensor self);
void atg_log10(tensor *, tensor self);
void atg_log10_(tensor *, tensor self);
@ -613,6 +793,12 @@ void atg_log_sigmoid_backward(tensor *, tensor grad_output, tensor self, tensor
void atg_log_sigmoid_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor buffer);
void atg_log_sigmoid_out(tensor *, tensor out, tensor self);
void atg_log_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_logaddexp(tensor *, tensor self, tensor other);
void atg_logaddexp2(tensor *, tensor self, tensor other);
void atg_logaddexp2_out(tensor *, tensor out, tensor self, tensor other);
void atg_logaddexp_out(tensor *, tensor out, tensor self, tensor other);
void atg_logcumsumexp(tensor *, tensor self, int64_t dim);
void atg_logcumsumexp_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_logdet(tensor *, tensor self);
void atg_logical_and(tensor *, tensor self, tensor other);
void atg_logical_and_(tensor *, tensor self, tensor other);
@ -626,8 +812,13 @@ void atg_logical_or_out(tensor *, tensor out, tensor self, tensor other);
void atg_logical_xor(tensor *, tensor self, tensor other);
void atg_logical_xor_(tensor *, tensor self, tensor other);
void atg_logical_xor_out(tensor *, tensor out, tensor self, tensor other);
void atg_logspace(tensor *, scalar start, scalar end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps, double base);
void atg_logit(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_backward(tensor *, tensor grad_output, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_out(tensor *, tensor out, tensor self, double eps_v, uint8_t eps_null);
void atg_logspace(tensor *, scalar start, scalar end, int64_t steps_v, uint8_t steps_null, double base, int options_kind, int options_device);
void atg_logspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps_v, uint8_t steps_null, double base);
void atg_logsumexp(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_logsumexp_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_lstm(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
@ -651,9 +842,12 @@ void atg_masked_fill_1(tensor *, tensor self, tensor mask, tensor value);
void atg_masked_scatter(tensor *, tensor self, tensor mask, tensor source);
void atg_masked_scatter_(tensor *, tensor self, tensor mask, tensor source);
void atg_masked_select(tensor *, tensor self, tensor mask);
void atg_masked_select_backward(tensor *, tensor grad, tensor input, tensor mask);
void atg_masked_select_out(tensor *, tensor out, tensor self, tensor mask);
void atg_matmul(tensor *, tensor self, tensor other);
void atg_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg_matrix_exp(tensor *, tensor self);
void atg_matrix_exp_backward(tensor *, tensor self, tensor grad);
void atg_matrix_power(tensor *, tensor self, int64_t n);
void atg_matrix_rank(tensor *, tensor self, int symmetric);
void atg_matrix_rank1(tensor *, tensor self, double tol, int symmetric);
@ -682,7 +876,8 @@ void atg_max_unpool3d(tensor *, tensor self, tensor indices, int64_t *output_siz
void atg_max_unpool3d_backward(tensor *, tensor grad_output, tensor self, tensor indices, int64_t *output_size_data, int output_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_max_unpool3d_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor indices, int64_t *output_size_data, int output_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_max_unpool3d_out(tensor *, tensor out, tensor self, tensor indices, int64_t *output_size_data, int output_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_max_values(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_maximum(tensor *, tensor self, tensor other);
void atg_maximum_out(tensor *, tensor out, tensor self, tensor other);
void atg_mean(tensor *, tensor self, int dtype);
void atg_mean1(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_mean_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
@ -695,7 +890,8 @@ void atg_min1(tensor *, tensor self, tensor other);
void atg_min2(tensor *, tensor self, int64_t dim, int keepdim);
void atg_min_out(tensor *, tensor out, tensor self, tensor other);
void atg_min_out1(tensor *, tensor min, tensor min_indices, tensor self, int64_t dim, int keepdim);
void atg_min_values(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_minimum(tensor *, tensor self, tensor other);
void atg_minimum_out(tensor *, tensor out, tensor self, tensor other);
void atg_miopen_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_miopen_batch_norm_backward(tensor *, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon);
void atg_miopen_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
@ -715,11 +911,15 @@ void atg_mkldnn_convolution_backward_input(tensor *, int64_t *self_size_data, in
void atg_mkldnn_convolution_backward_weights(tensor *, int64_t *weight_size_data, int weight_size_len, tensor grad_output, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int bias_defined);
void atg_mkldnn_linear(tensor *, tensor input, tensor weight, tensor bias);
void atg_mkldnn_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_reorder_conv2d_weight(tensor *, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_mkldnn_reorder_conv3d_weight(tensor *, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_mm(tensor *, tensor self, tensor mat2);
void atg_mm_out(tensor *, tensor out, tensor self, tensor mat2);
void atg_mode(tensor *, tensor self, int64_t dim, int keepdim);
void atg_mode_out(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
void atg_movedim(tensor *, tensor self, int64_t *source_data, int source_len, int64_t *destination_data, int destination_len);
void atg_movedim1(tensor *, tensor self, int64_t source, int64_t destination);
void atg_mse_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_mse_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_mse_loss_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction);
@ -737,17 +937,31 @@ void atg_multilabel_margin_loss_backward_out(tensor *, tensor grad_input, tensor
void atg_multilabel_margin_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_multinomial(tensor *, tensor self, int64_t num_samples, int replacement);
void atg_multinomial_out(tensor *, tensor out, tensor self, int64_t num_samples, int replacement);
void atg_multiply(tensor *, tensor self, tensor other);
void atg_multiply1(tensor *, tensor self, scalar other);
void atg_multiply_(tensor *, tensor self, tensor other);
void atg_multiply_1(tensor *, tensor self, scalar other);
void atg_multiply_out(tensor *, tensor out, tensor self, tensor other);
void atg_mv(tensor *, tensor self, tensor vec);
void atg_mv_out(tensor *, tensor out, tensor self, tensor vec);
void atg_mvlgamma(tensor *, tensor self, int64_t p);
void atg_mvlgamma_(tensor *, tensor self, int64_t p);
void atg_nanquantile(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_nanquantile1(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_nanquantile_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_nanquantile_out1(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_nansum(tensor *, tensor self, int dtype);
void atg_nansum1(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_nansum_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_narrow(tensor *, tensor self, int64_t dim, int64_t start, int64_t length);
void atg_narrow1(tensor *, tensor self, int64_t dim, tensor start, int64_t length);
void atg_narrow_copy(tensor *, tensor self, int64_t dim, int64_t start, int64_t length);
void atg_native_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg_native_batch_norm_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg_native_group_norm(tensor *, tensor input, tensor weight, tensor bias, int64_t n, int64_t C, int64_t HxW, int64_t group, double eps);
void atg_native_layer_norm(tensor *, tensor input, tensor weight, tensor bias, int64_t M, int64_t n, double eps);
void atg_native_norm(tensor *, tensor self);
void atg_native_norm1(tensor *, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_ne(tensor *, tensor self, scalar other);
void atg_ne1(tensor *, tensor self, tensor other);
void atg_ne_(tensor *, tensor self, scalar other);
@ -757,9 +971,15 @@ void atg_ne_out1(tensor *, tensor out, tensor self, tensor other);
void atg_neg(tensor *, tensor self);
void atg_neg_(tensor *, tensor self);
void atg_neg_out(tensor *, tensor out, tensor self);
void atg_negative(tensor *, tensor self);
void atg_negative_(tensor *, tensor self);
void atg_negative_out(tensor *, tensor out, tensor self);
void atg_new_empty(tensor *, tensor self, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_new_full(tensor *, tensor self, int64_t *size_data, int size_len, scalar fill_value, int options_kind, int options_device);
void atg_new_zeros(tensor *, tensor self, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_nextafter(tensor *, tensor self, tensor other);
void atg_nextafter_(tensor *, tensor self, tensor other);
void atg_nextafter_out(tensor *, tensor out, tensor self, tensor other);
void atg_nll_loss(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss2d(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss2d_backward(tensor *, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
@ -783,6 +1003,12 @@ void atg_normal_out(tensor *, tensor out, tensor mean, double std);
void atg_normal_out1(tensor *, tensor out, double mean, tensor std);
void atg_normal_out2(tensor *, tensor out, tensor mean, tensor std);
void atg_normal_out3(tensor *, tensor out, double mean, double std, int64_t *size_data, int size_len);
void atg_not_equal(tensor *, tensor self, scalar other);
void atg_not_equal1(tensor *, tensor self, tensor other);
void atg_not_equal_(tensor *, tensor self, scalar other);
void atg_not_equal_1(tensor *, tensor self, tensor other);
void atg_not_equal_out(tensor *, tensor out, tensor self, scalar other);
void atg_not_equal_out1(tensor *, tensor out, tensor self, tensor other);
void atg_nuclear_norm(tensor *, tensor self, int keepdim);
void atg_nuclear_norm1(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_nuclear_norm_out(tensor *, tensor out, tensor self, int keepdim);
@ -796,6 +1022,8 @@ void atg_orgqr(tensor *, tensor self, tensor input2);
void atg_orgqr_out(tensor *, tensor out, tensor self, tensor input2);
void atg_ormqr(tensor *, tensor self, tensor input2, tensor input3, int left, int transpose);
void atg_ormqr_out(tensor *, tensor out, tensor self, tensor input2, tensor input3, int left, int transpose);
void atg_outer(tensor *, tensor self, tensor vec2);
void atg_outer_out(tensor *, tensor out, tensor self, tensor vec2);
void atg_pairwise_distance(tensor *, tensor x1, tensor x2, double p, double eps, int keepdim);
void atg_pdist(tensor *, tensor self, double p);
void atg_permute(tensor *, tensor self, int64_t *dims_data, int dims_len);
@ -804,6 +1032,8 @@ void atg_pinverse(tensor *, tensor self, double rcond);
void atg_pixel_shuffle(tensor *, tensor self, int64_t upscale_factor);
void atg_poisson(tensor *, tensor self);
void atg_poisson_nll_loss(tensor *, tensor input, tensor target, int log_input, int full, double eps, int64_t reduction);
void atg_polar(tensor *, tensor abs, tensor angle);
void atg_polar_out(tensor *, tensor out, tensor abs, tensor angle);
void atg_polygamma(tensor *, int64_t n, tensor self);
void atg_polygamma_(tensor *, tensor self, int64_t n);
void atg_polygamma_out(tensor *, tensor out, int64_t n, tensor self);
@ -825,18 +1055,23 @@ void atg_q_per_channel_scales(tensor *, tensor self);
void atg_q_per_channel_zero_points(tensor *, tensor self);
void atg_qr(tensor *, tensor self, int some);
void atg_qr_out(tensor *, tensor Q, tensor R, tensor self, int some);
void atg_quantile(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_quantile1(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_quantile_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_quantile_out1(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_quantize_per_channel(tensor *, tensor self, tensor scales, tensor zero_points, int64_t axis, int dtype);
void atg_quantize_per_tensor(tensor *, tensor self, double scale, int64_t zero_point, int dtype);
tensor *atg_quantize_per_tensor1(tensor *tensors_data, int tensors_len, tensor scales, tensor zero_points, int dtype);
void atg_quantized_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor mean, tensor var, double eps, double output_scale, int64_t output_zero_point);
void atg_quantized_gru(tensor *, tensor input, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_quantized_gru1(tensor *, tensor data, tensor batch_sizes, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_quantized_gru_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_lstm(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first, int dtype, int use_dynamic);
void atg_quantized_lstm1(tensor *, tensor data, tensor batch_sizes, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int dtype, int use_dynamic);
void atg_quantized_lstm_cell(tensor *, tensor input, tensor *hx_data, int hx_len, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_max_pool1d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_rnn_relu_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_rnn_tanh_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_rad2deg(tensor *, tensor self);
void atg_rad2deg_(tensor *, tensor self);
void atg_rad2deg_out(tensor *, tensor out, tensor self);
void atg_rand(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_rand_like(tensor *, tensor self);
void atg_rand_out(tensor *, tensor out, int64_t *size_data, int size_len);
@ -851,7 +1086,7 @@ void atg_randn_like(tensor *, tensor self);
void atg_randn_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_random_(tensor *, tensor self);
void atg_random_1(tensor *, tensor self, int64_t to);
void atg_random_2(tensor *, tensor self, int64_t from, int64_t to);
void atg_random_2(tensor *, tensor self, int64_t from, int64_t to_v, uint8_t to_null);
void atg_randperm(tensor *, int64_t n, int options_kind, int options_device);
void atg_randperm_out(tensor *, tensor out, int64_t n);
void atg_range(tensor *, scalar start, scalar end, int options_kind, int options_device);
@ -882,8 +1117,8 @@ void atg_renorm_(tensor *, tensor self, scalar p, int64_t dim, scalar maxnorm);
void atg_renorm_out(tensor *, tensor out, tensor self, scalar p, int64_t dim, scalar maxnorm);
void atg_repeat(tensor *, tensor self, int64_t *repeats_data, int repeats_len);
void atg_repeat_interleave(tensor *, tensor repeats);
void atg_repeat_interleave1(tensor *, tensor self, tensor repeats, int64_t dim);
void atg_repeat_interleave2(tensor *, tensor self, int64_t repeats, int64_t dim);
void atg_repeat_interleave1(tensor *, tensor self, tensor repeats, int64_t dim_v, uint8_t dim_null);
void atg_repeat_interleave2(tensor *, tensor self, int64_t repeats, int64_t dim_v, uint8_t dim_null);
void atg_replication_pad1d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad1d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad1d_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
@ -896,7 +1131,7 @@ void atg_replication_pad3d(tensor *, tensor self, int64_t *padding_data, int pad
void atg_replication_pad3d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_requires_grad_(tensor *, tensor self, int _requires_grad);
void atg_requires_grad_(tensor *, tensor self, int requires_grad);
void atg_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len);
void atg_reshape_as(tensor *, tensor self, tensor other);
void atg_resize_(tensor *, tensor self, int64_t *size_data, int size_len);
@ -929,14 +1164,23 @@ void atg_scatter(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter1(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_scatter_(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_1(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_scatter_2(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len);
void atg_scatter_3(tensor *, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_scatter_add(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_add_(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_searchsorted(tensor *, tensor sorted_sequence, tensor self, int out_int32, int right);
void atg_searchsorted1(tensor *, tensor sorted_sequence, scalar self_scalar, int out_int32, int right);
void atg_searchsorted_out(tensor *, tensor out, tensor sorted_sequence, tensor self, int out_int32, int right);
void atg_select(tensor *, tensor self, int64_t dim, int64_t index);
void atg_select_backward(tensor *, tensor grad, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t index);
void atg_selu(tensor *, tensor self);
void atg_selu_(tensor *, tensor self);
void atg_set_(tensor *, tensor self);
void atg_set_1(tensor *, tensor self, tensor source);
void atg_set_requires_grad(tensor *, tensor self, int r);
void atg_sgn(tensor *, tensor self);
void atg_sgn_(tensor *, tensor self);
void atg_sgn_out(tensor *, tensor out, tensor self);
void atg_sigmoid(tensor *, tensor self);
void atg_sigmoid_(tensor *, tensor self);
void atg_sigmoid_backward(tensor *, tensor grad_output, tensor output);
@ -945,6 +1189,12 @@ void atg_sigmoid_out(tensor *, tensor out, tensor self);
void atg_sign(tensor *, tensor self);
void atg_sign_(tensor *, tensor self);
void atg_sign_out(tensor *, tensor out, tensor self);
void atg_signbit(tensor *, tensor self);
void atg_signbit_out(tensor *, tensor out, tensor self);
void atg_silu(tensor *, tensor self);
void atg_silu_(tensor *, tensor self);
void atg_silu_backward(tensor *, tensor grad_output, tensor self);
void atg_silu_out(tensor *, tensor out, tensor self);
void atg_sin(tensor *, tensor self);
void atg_sin_(tensor *, tensor self);
void atg_sin_out(tensor *, tensor out, tensor self);
@ -952,6 +1202,7 @@ void atg_sinh(tensor *, tensor self);
void atg_sinh_(tensor *, tensor self);
void atg_sinh_out(tensor *, tensor out, tensor self);
void atg_slice(tensor *, tensor self, int64_t dim, int64_t start, int64_t end, int64_t step);
void atg_slice_backward(tensor *, tensor grad, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t start, int64_t end, int64_t step);
void atg_slogdet(tensor *, tensor self);
void atg_slow_conv3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_slow_conv3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
@ -962,10 +1213,10 @@ void atg_slow_conv_transpose2d_out(tensor *, tensor out, tensor self, tensor wei
void atg_slow_conv_transpose3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_smm(tensor *, tensor self, tensor mat2);
void atg_smooth_l1_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_smooth_l1_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_smooth_l1_loss_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_smooth_l1_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_smooth_l1_loss(tensor *, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction, double beta);
void atg_soft_margin_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_soft_margin_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_soft_margin_loss_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction);
@ -1009,12 +1260,17 @@ void atg_std1(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiase
void atg_std_mean(tensor *, tensor self, int unbiased);
void atg_std_mean1(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_std_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_stft(tensor *, tensor self, int64_t n_fft, int64_t hop_length, int64_t win_length, tensor window, int normalized, int onesided);
void atg_stft(tensor *, tensor self, int64_t n_fft, int64_t hop_length_v, uint8_t hop_length_null, int64_t win_length_v, uint8_t win_length_null, tensor window, int normalized, int onesided, int return_complex);
void atg_sub(tensor *, tensor self, tensor other);
void atg_sub1(tensor *, tensor self, scalar other);
void atg_sub_(tensor *, tensor self, tensor other);
void atg_sub_1(tensor *, tensor self, scalar other);
void atg_sub_out(tensor *, tensor out, tensor self, tensor other);
void atg_subtract(tensor *, tensor self, tensor other);
void atg_subtract1(tensor *, tensor self, scalar other);
void atg_subtract_(tensor *, tensor self, tensor other);
void atg_subtract_1(tensor *, tensor self, scalar other);
void atg_subtract_out(tensor *, tensor out, tensor self, tensor other);
void atg_sum(tensor *, tensor self, int dtype);
void atg_sum1(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_sum_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
@ -1026,6 +1282,7 @@ void atg_symeig_out(tensor *, tensor e, tensor V, tensor self, int eigenvectors,
void atg_t(tensor *, tensor self);
void atg_t_(tensor *, tensor self);
void atg_take(tensor *, tensor self, tensor index);
void atg_take_backward(tensor *, tensor grad, tensor input, tensor index);
void atg_take_out(tensor *, tensor out, tensor self, tensor index);
void atg_tan(tensor *, tensor self);
void atg_tan_(tensor *, tensor self);
@ -1055,6 +1312,7 @@ void atg_topk(tensor *, tensor self, int64_t k, int64_t dim, int largest, int so
void atg_topk_out(tensor *, tensor values, tensor indices, tensor self, int64_t k, int64_t dim, int largest, int sorted);
void atg_totype(tensor *, tensor self, int scalar_type);
void atg_trace(tensor *, tensor self);
void atg_trace_backward(tensor *, tensor grad, int64_t *sizes_data, int sizes_len);
void atg_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_transpose_(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_trapz(tensor *, tensor y, tensor x, int64_t dim);
@ -1080,51 +1338,67 @@ void atg_trunc_(tensor *, tensor self);
void atg_trunc_out(tensor *, tensor out, tensor self);
void atg_type_as(tensor *, tensor self, tensor other);
tensor *atg_unbind(tensor self, int64_t dim);
void atg_unflatten(tensor *, tensor self, int64_t dim, int64_t *sizes_data, int sizes_len);
void atg_unfold(tensor *, tensor self, int64_t dimension, int64_t size, int64_t step);
void atg_unfold_backward(tensor *, tensor grad_in, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t size, int64_t step);
void atg_uniform_(tensor *, tensor self, double from, double to);
void atg_unique_consecutive(tensor *, tensor self, int return_inverse, int return_counts, int64_t dim);
void atg_unique_consecutive(tensor *, tensor self, int return_inverse, int return_counts, int64_t dim_v, uint8_t dim_null);
void atg_unique_dim(tensor *, tensor self, int64_t dim, int sorted, int return_inverse, int return_counts);
void atg_unique_dim_consecutive(tensor *, tensor self, int64_t dim, int return_inverse, int return_counts);
tensor *atg_unsafe_chunk(tensor self, int64_t chunks, int64_t dim);
tensor *atg_unsafe_split(tensor self, int64_t split_size, int64_t dim);
tensor *atg_unsafe_split_with_sizes(tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_unsqueeze(tensor *, tensor self, int64_t dim);
void atg_unsqueeze_(tensor *, tensor self, int64_t dim);
void atg_upsample_bicubic2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h, double scales_w);
void atg_upsample_bicubic2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h, double scales_w);
void atg_upsample_bicubic2d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h, double scales_w);
void atg_upsample_bicubic2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h, double scales_w);
void atg_upsample_bilinear2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h, double scales_w);
void atg_upsample_bilinear2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h, double scales_w);
void atg_upsample_bilinear2d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h, double scales_w);
void atg_upsample_bilinear2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h, double scales_w);
void atg_upsample_linear1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales);
void atg_upsample_linear1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales);
void atg_upsample_linear1d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales);
void atg_upsample_linear1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales);
void atg_upsample_nearest1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales);
void atg_upsample_nearest1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales);
void atg_upsample_nearest1d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales);
void atg_upsample_nearest1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales);
void atg_upsample_nearest2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_h, double scales_w);
void atg_upsample_nearest2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h, double scales_w);
void atg_upsample_nearest2d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h, double scales_w);
void atg_upsample_nearest2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_h, double scales_w);
void atg_upsample_nearest3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_d, double scales_h, double scales_w);
void atg_upsample_nearest3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d, double scales_h, double scales_w);
void atg_upsample_nearest3d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d, double scales_h, double scales_w);
void atg_upsample_nearest3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_d, double scales_h, double scales_w);
void atg_upsample_trilinear3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_d, double scales_h, double scales_w);
void atg_upsample_trilinear3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_d, double scales_h, double scales_w);
void atg_upsample_trilinear3d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_d, double scales_h, double scales_w);
void atg_upsample_trilinear3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_d, double scales_h, double scales_w);
void atg_upsample_bicubic2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_linear1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_backward_out(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_value_selecting_reduction_backward(tensor *, tensor grad, int64_t dim, tensor indices, int64_t *sizes_data, int sizes_len, int keepdim);
void atg_values(tensor *, tensor self);
void atg_vander(tensor *, tensor x, int64_t n_v, uint8_t n_null, int increasing);
void atg_var(tensor *, tensor self, int unbiased);
void atg_var1(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_var_mean(tensor *, tensor self, int unbiased);
void atg_var_mean1(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_var_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_vdot(tensor *, tensor self, tensor other);
void atg_vdot_out(tensor *, tensor out, tensor self, tensor other);
void atg_view(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_view_as(tensor *, tensor self, tensor other);
void atg_view_as_complex(tensor *, tensor self);
void atg_view_as_real(tensor *, tensor self);
void atg_vstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_vstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
tensor *atg_where(tensor condition);
void atg_where1(tensor *, tensor condition, tensor self, tensor other);
void atg_where2(tensor *, tensor condition, scalar self_scalar, tensor other);
void atg_where3(tensor *, tensor condition, tensor self, scalar other);
void atg_where4(tensor *, tensor condition, scalar self_scalar, scalar other);
void atg_zero_(tensor *, tensor self);
void atg_zeros(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_zeros_like(tensor *, tensor self);

View File

@ -254,6 +254,40 @@ func BatchAccuracyForLogits(vs *VarStore, m ts.ModuleT, xs, ys *ts.Tensor, d got
return sumAccuracy / sampleCount
}
func BatchAccuracyForLogitsOld(vs *VarStore, m ts.ModuleT, xs, ys *ts.Tensor, d gotch.Device, batchSize int) (retVal float64) {
var (
sumAccuracy float64 = 0.0
sampleCount float64 = 0.0
)
vs.Freeze()
defer vs.Unfreeze()
iter2 := ts.MustNewIter2(xs, ys, int64(batchSize))
for {
item, ok := iter2.Next()
if !ok {
break
}
size := float64(item.Data.MustSize()[0])
bImages := item.Data.MustTo(d, true)
bLabels := item.Label.MustTo(d, true)
logits := m.ForwardT(bImages, false)
acc := logits.AccuracyForLogits(bLabels)
sumAccuracy += acc.Float64Values()[0] * size
sampleCount += size
bImages.MustDrop()
bLabels.MustDrop()
acc.MustDrop()
}
return sumAccuracy / sampleCount
}
// BatchAccuracyForLogitIdx is an alternative of BatchAccuracyForLogits to
// calculate accuracy for specified batch on module weight. It uses tensor
// indexing instead of Iter2

View File

@ -1,8 +1,8 @@
#!/bin/bash
# Env
GOTCH_VERSION="${GOTCH_VER:-v0.2.0}"
LIBTORCH_VERSION="${LIBTORCH_VER:-1.5.1}"
GOTCH_VERSION="${GOTCH_VER:-v0.3.0}"
LIBTORCH_VERSION="${LIBTORCH_VER:-1.7.0}"
GOTCH="$GOPATH/pkg/mod/github.com/sugarme/gotch@$GOTCH_VERSION"
LIBTORCH="$GOPATH/pkg/mod/github.com/sugarme/gotch@$GOTCH_VERSION/libtch/libtorch"

View File

@ -1,7 +1,7 @@
#!/bin/bash
GOTCH_VERSION="${GOTCH_VER:-v0.2.0}"
LIBTORCH_VERSION="${LIBTORCH_VER:-1.5.1}"
GOTCH_VERSION="${GOTCH_VER:-v0.3.0}"
LIBTORCH_VERSION="${LIBTORCH_VER:-1.7.0}"
CUDA_VERSION="${CUDA_VER:-10.1}"
CU_VERSION="${CUDA_VERSION//./}"

18
setup.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash
export GOTCH_VERSION="${GOTCH_VER:-v0.3.0}"
export LIBTORCH_VERSION="${LIBTORCH_VER:-1.7.0}"
export CUDA_VERSION="${CUDA_VER:-10.1}"
export CU_VERSION="${CUDA_VERSION//./}"
export GOTCH="$HOME/projects/sugarme/gotch"
export LIBTORCH="$HOME/projects/sugarme/gotch/libtch/libtorch"
export LIBRARY_PATH="$LIBTORCH/lib"
export CPATH="$LIBTORCH/lib:$LIBTORCH/include:$LIBTORCH/include/torch/csrc/api/include"
export LD_LIBRARY_PATH="$LIBTORCH/lib:/usr/lib64-nvidia:/usr/local/cuda-${CUDA_VERSION}/lib64"
sudo rm -rf $LIBTORCH
sudo mkdir -p $LIBTORCH
wget -O /tmp/libtorch-cxx11-abi-shared-with-deps-${LIBTORCH_VERSION}%2Bcu${CU_VERSION}.zip https://download.pytorch.org/libtorch/cu${CU_VERSION}/libtorch-cxx11-abi-shared-with-deps-${LIBTORCH_VERSION}%2Bcu${CU_VERSION}.zip
sudo unzip /tmp/libtorch-cxx11-abi-shared-with-deps-${LIBTORCH_VERSION}%2Bcu${CU_VERSION}.zip -d $GOTCH/libtch

File diff suppressed because it is too large Load Diff

View File

@ -67,7 +67,8 @@ func (co *COptimizer) AddParameters(tensors []Tensor) error {
ntensors := len(tensors)
lib.AtoAddParameters(co.coptimizer, ctensors, ntensors)
// NOTE. temporary switch back as param group not updated yet!
lib.AtoAddParametersOld(co.coptimizer, ctensors, ntensors)
return TorchErr()
}

View File

@ -19,7 +19,7 @@ func (ts *Tensor) CrossEntropyForLogits(targets *Tensor) (retVal *Tensor) {
// AccuracyForLogits returns the average accuracy for some given logits assuming that
// targets represent ground-truth.
func (ts *Tensor) AccuracyForLogits(targets *Tensor) (retVal *Tensor) {
argmax := ts.MustArgmax(-1, false, true)
argmax := ts.MustArgmax([]int64{-1}, false, true)
eq1 := argmax.MustEq1(targets, true)
return eq1.MustTotype(gotch.Float, true).MustMean(gotch.Float, true)
}

File diff suppressed because it is too large Load Diff

View File

@ -1171,7 +1171,7 @@ func (ts *Tensor) Swish() *Tensor {
}
func (ts *Tensor) AvgPool2DDefault(ksize int64, del bool) *Tensor {
return ts.MustAvgPool2d([]int64{ksize, ksize}, []int64{ksize, ksize}, []int64{0, 0}, false, true, 1, del)
return ts.MustAvgPool2d([]int64{ksize, ksize}, []int64{ksize, ksize}, []int64{0, 0}, false, true, []int64{1}, del)
}
// SaveMultiNew saves a slice of named tensors to the given file path.

View File

@ -103,7 +103,7 @@ func densenet(p *nn.Path, cIn, cOut, bnSize int64, blockConfig []int64, growth i
seq.AddFn(nn.NewFunc(func(xs *ts.Tensor) *ts.Tensor {
tmp1 := xs.MustRelu(false)
tmp2 := tmp1.MustAvgPool2d([]int64{7, 7}, []int64{1, 1}, []int64{0, 0}, false, true, 1, true)
tmp2 := tmp1.MustAvgPool2d([]int64{7, 7}, []int64{1, 1}, []int64{0, 0}, false, true, []int64{1}, true)
res := tmp2.FlatView()
tmp2.MustDrop()
return res

View File

@ -78,7 +78,7 @@ func inceptionA(p *nn.Path, cIn, cPool int64) ts.ModuleT {
b3Ts := b3Tmp2.ApplyT(b33, train)
b3Tmp2.MustDrop()
bpoolTmp := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, 9, false)
bpoolTmp := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, []int64{9}, false)
bpoolTs := bpoolTmp.ApplyT(bpool, train)
res := ts.MustCat([]ts.Tensor{*b1Ts, *b2Ts, *b3Ts, *bpoolTs}, 1)
@ -145,7 +145,7 @@ func inceptionC(p *nn.Path, cIn int64, c7 int64) ts.ModuleT {
b3Ts := b3Tmp4.ApplyT(b35, train)
b3Tmp4.MustDrop()
bpTmp1 := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, 9, false)
bpTmp1 := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, []int64{9}, false)
bpoolTs := bpTmp1.ApplyT(bpool, train)
return ts.MustCat([]ts.Tensor{*b1Ts, *b2Ts, *b3Ts, *bpoolTs}, 1)
@ -211,7 +211,7 @@ func inceptionE(p *nn.Path, cIn int64) ts.ModuleT {
b3bTs := b3Tmp2.ApplyT(b33b, train)
b3Ts := ts.MustCat([]ts.Tensor{*b3aTs, *b3bTs}, 1)
bpTmp1 := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, 9, false)
bpTmp1 := xs.MustAvgPool2d([]int64{3, 3}, []int64{1, 1}, []int64{1, 1}, false, true, []int64{9}, false)
bpoolTs := bpTmp1.ApplyT(bpool, train)
return ts.MustCat([]ts.Tensor{*b1Ts, *b2Ts, *b3Ts, *bpoolTs}, 1)