WIP: torch/device, torch/kind. Clean-up and re-structure

This commit is contained in:
sugarme 2020-05-27 06:44:37 +10:00
parent 313590d87b
commit 816e6109ea
39 changed files with 209 additions and 58987 deletions

View File

@ -3,10 +3,26 @@ package main
import (
t "github.com/sugarme/gotch/torch"
type Tensor struct {
c_tensor *t.C_tensor
func FnOfSlice(data []float64) (retVal Tensor, err error) {
dataLen := len(data)
dat := unsafe.Pointer(data)
c_tensor := t.AtTensorOfData(dat, int64(dataLen), 1, 7, 7)
retVal = Tensor{c_tensor}
return retVal, nil
func main() {
t := t.NewTensor()

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +0,0 @@
extern "C" {
void dummy_cuda_dependency();
namespace at {
namespace cuda {
int warp_size();
void dummy_cuda_dependency() {

View File

@ -1,9 +0,0 @@
package libtch
type C_scalar struct {
_private [1]uint8
type C_tensor struct {
_private [1]uint8

View File

@ -1,5 +0,0 @@
package libtch
type Tensor struct {
C_tensor *C_tensor

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,894 +0,0 @@
#include "torch_api.h"
#include "stb_image.h"
#include "stb_image_write.h"
#include "stb_image_resize.h"
using namespace std;
char *get_and_reset_last_err() {
char *tmp = torch_last_err;
torch_last_err = nullptr;
return tmp;
void at_manual_seed(int64_t seed) {
vector<torch::Tensor> of_carray_tensor(torch::Tensor **vs, int len) {
vector<torch::Tensor> result;
for (int i = 0; i < len; ++i) result.push_back(*(vs[i]));
return result;
at::Device device_of_int(int d) {
if (d < 0) return at::Device(at::kCPU);
return at::Device(at::kCUDA, /*index=*/d);
tensor at_new_tensor() {
return new torch::Tensor();
return nullptr;
tensor at_tensor_of_data(void *vs, int64_t *dims, size_t ndims, size_t element_size_in_bytes, int type) {
torch::Tensor tensor = torch::zeros(torch::IntArrayRef(dims, ndims), torch::ScalarType(type));
if (element_size_in_bytes != tensor.element_size())
throw std::invalid_argument("incoherent element sizes in bytes");
void *tensor_data = tensor.data_ptr();
memcpy(tensor_data, vs, tensor.numel() * element_size_in_bytes);
return new torch::Tensor(tensor);
return nullptr;
void at_copy_data(tensor tensor, void *vs, size_t numel, size_t elt_size_in_bytes) {
if (elt_size_in_bytes != tensor->element_size())
throw std::invalid_argument("incoherent element sizes in bytes");
if (numel > tensor->numel())
throw std::invalid_argument("target numel is larger than tensor numel");
if (tensor->device().type() != at::kCPU) {
torch::Tensor tmp_tensor = tensor->to(at::kCPU).contiguous();
void *tensor_data = tmp_tensor.data_ptr();
memcpy(vs, tensor_data, numel * elt_size_in_bytes);
else {
auto tmp_tensor = tensor->contiguous();
void *tensor_data = tmp_tensor.data_ptr();
memcpy(vs, tensor_data, numel * elt_size_in_bytes);
tensor at_shallow_clone(tensor t) {
PROTECT(return new torch::Tensor(*t);)
return nullptr;
void *at_data_ptr(tensor t) {
PROTECT(return t->data_ptr();)
return nullptr;
int at_defined(tensor t) {
PROTECT(return t->defined();)
return -1;
int at_is_sparse(tensor t) {
PROTECT(return t->is_sparse();)
return -1;
size_t at_dim(tensor t) {
PROTECT(return t->dim();)
return -1;
void at_shape(tensor t, int64_t *dims) {
int i = 0;
for (int64_t dim : t->sizes()) dims[i++] = dim;
int at_scalar_type(tensor t) {
return static_cast<int>(t->scalar_type());
return -1;
int at_device(tensor t) {
auto device = t->device();
if (device.type() == at::kCPU) return -1;
if (device.type() == at::kCUDA) return device.index();
return -2;
void at_backward(tensor t, int keep_graph, int create_graph) {
PROTECT(t->backward({}, keep_graph, create_graph);)
int at_requires_grad(tensor t) {
PROTECT(return t->requires_grad();)
return -1;
int at_grad_set_enabled(int b) {
bool is_enabled = torch::autograd::GradMode::is_enabled();
return is_enabled;
return -1;
tensor at_get(tensor t, int index) {
PROTECT(return new torch::Tensor((*t)[index]);)
return nullptr;
template<typename T>
T at_value_at_indexes(tensor t, int64_t *indexes, int indexes_len) {
torch::Tensor tensor = *t;
for (int i = 0; i < indexes_len; ++i) {
tensor = tensor[indexes[i]];
return tensor.item<T>();
return T();
double at_double_value_at_indexes(tensor t, int64_t *indexes, int indexes_len) {
return at_value_at_indexes<double>(t, indexes, indexes_len);
int64_t at_int64_value_at_indexes(tensor t, int64_t *indexes, int indexes_len) {
return at_value_at_indexes<int64_t>(t, indexes, indexes_len);
template<typename T>
void at_set_value_at_indexes(tensor t, int *indexes, int indexes_len, T v) {
torch::Tensor tensor = *t;
for (int i = 0; i < indexes_len; ++i) {
tensor = tensor[indexes[i]];
void at_set_double_value_at_indexes(tensor t, int *indexes, int indexes_len, double v) {
at_set_value_at_indexes<double>(t, indexes, indexes_len, v);
void at_set_int64_value_at_indexes(tensor t, int *indexes, int indexes_len, int64_t v) {
at_set_value_at_indexes<int64_t>(t, indexes, indexes_len, v);
void at_fill_double(tensor t, double v) {
void at_fill_int64(tensor t, int64_t v) {
void at_print(tensor t) {
torch::Tensor *tensor = (torch::Tensor*)t;
cout << *tensor << endl;
char *at_to_string(tensor t, int line_size) {
std::ostringstream oss;
torch::print(oss, *t, line_size);
return strdup(oss.str().c_str());
return nullptr;
void at_copy_(tensor dst, tensor src) {
void at_save(tensor t, char *filename) {
PROTECT(torch::save(*t, filename);)
void at_save_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename) {
torch::serialize::OutputArchive archive;
for (int i = 0; i < ntensors; ++i)
archive.write(std::string(tensor_names[i]), *(tensors[i]), /* buffer=*/ false);
void at_load_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename) {
torch::serialize::InputArchive archive;
vector<torch::Tensor> ts(ntensors);
for (int i = 0; i < ntensors; ++i)
archive.read(std::string(tensor_names[i]), ts[i]);
// Only allocate the new tensor now so that if there is an exception raised during
// [read], no memory has to be freed.
for (int i = 0; i < ntensors; ++i)
tensors[i] = new torch::Tensor(ts[i]);
void at_load_callback(char *filename, void *data, void (*f)(void *, char *, tensor)) {
auto module = torch::jit::load(filename);
for (const auto &p : module.named_parameters()) {
auto v = p.value;
f(data, (char*)p.name.c_str(), new torch::Tensor(v));
void at_load_callback_with_device(char *filename, void *data, void (*f)(void *, char *, tensor), int device_id) {
auto module = torch::jit::load(filename, device_of_int(device_id));
for (const auto &p : module.named_parameters()) {
auto v = p.value;
f(data, (char*)p.name.c_str(), new torch::Tensor(v));
void at_load_multi_(tensor *tensors, char **tensor_names, int ntensors, char *filename) {
torch::NoGradGuard no_grad;
torch::serialize::InputArchive archive;
for (int i = 0; i < ntensors; ++i) {
if (tensors[i]->device().type() == at::kCPU)
archive.read(std::string(tensor_names[i]), *(tensors[i]));
else {
torch::Tensor tmp_tensor = torch::empty_like(*(tensors[i]), at::device(at::kCPU));
archive.read(std::string(tensor_names[i]), tmp_tensor);
tensor at_load(char *filename) {
torch::Tensor tensor;
torch::load(tensor, filename);
return new torch::Tensor(tensor);
return nullptr;
tensor at_load_image(char *filename) {
int w = -1;
int h = -1;
int c = -1;
void *data = stbi_load(filename, &w, &h, &c, 3);
if (data == nullptr)
throw std::invalid_argument(stbi_failure_reason());
torch::Tensor tensor = torch::zeros({ h, w, 3 }, at::ScalarType::Byte);
memcpy(tensor.data_ptr(), data, h * w * 3);
return new torch::Tensor(tensor);
return nullptr;
bool ends_with(const char *str, const char *suffix) {
int suffix_len = strlen(suffix);
int str_len = strlen(str);
if (str_len < suffix_len) return false;
for (int i = 1; i <= suffix_len; ++i)
if (str[str_len-i] != suffix[suffix_len-i]) return false;
return true;
int at_save_image(tensor tensor, char *filename) {
auto sizes = tensor->sizes();
if (sizes.size() != 3)
throw std::invalid_argument("invalid number of dimensions, should be 3");
int h = sizes[0];
int w = sizes[1];
int c = sizes[2];
auto tmp_tensor = tensor->contiguous();
void *tensor_data = tmp_tensor.data_ptr();
if (ends_with(filename, ".jpg"))
return stbi_write_jpg(filename, w, h, c, tensor_data, 90);
if (ends_with(filename, ".bmp"))
return stbi_write_bmp(filename, w, h, c, tensor_data);
if (ends_with(filename, ".tga"))
return stbi_write_tga(filename, w, h, c, tensor_data);
return stbi_write_png(filename, w, h, c, tensor_data, 0);
return -1;
int at_get_num_interop_threads() {
PROTECT(return at::get_num_interop_threads();)
return -1;
int at_get_num_threads() {
PROTECT(return at::get_num_threads();)
return -1;
void at_set_num_interop_threads(int n_threads) {
void at_set_num_threads(int n_threads) {
tensor at_resize_image(tensor tensor, int out_w, int out_h) {
auto sizes = tensor->sizes();
if (sizes.size() != 3)
throw std::invalid_argument("invalid number of dimensions, should be 3");
int h = sizes[0];
int w = sizes[1];
int c = sizes[2];
auto tmp_tensor = tensor->contiguous();
const unsigned char *tensor_data = (unsigned char*)tmp_tensor.data_ptr();
torch::Tensor out = torch::zeros({ out_h, out_w, c }, at::ScalarType::Byte);
stbir_resize_uint8(tensor_data, w, h, 0, (unsigned char*)out.data_ptr(), out_w, out_h, 0, c);
return new torch::Tensor(out);
return nullptr;
void at_free(tensor t) {
void at_run_backward(tensor *tensors,
int ntensors,
tensor *inputs,
int ninputs,
tensor *outputs,
int keep_graph,
int create_graph) {
vector<torch::autograd::Edge> roots;
for (int i = 0; i < ntensors; ++i)
vector<torch::autograd::Edge> inputs_;
for (int i = 0; i < ninputs; ++i) {
if (!inputs[i]->requires_grad())
throw std::invalid_argument("one of the input tensor does not use set_requires_grad");
vector<torch::autograd::Variable> grads;
for (int i = 0; i < ntensors; ++i)
auto vl = torch::autograd::Engine::get_default_engine().execute(roots, grads, keep_graph, create_graph, inputs_);
for (int i = 0; i < ninputs; ++i) {
outputs[i] = static_cast<tensor>(new torch::autograd::Variable(vl[i]));
optimizer ato_adam(double learning_rate,
double beta1,
double beta2,
double weight_decay) {
auto options =
.betas(std::tuple<double, double>(beta1, beta2))
return new torch::optim::Adam(vector<torch::Tensor>(), options);
return nullptr;
optimizer ato_rms_prop(double learning_rate,
double alpha,
double eps,
double weight_decay,
double momentum,
int centered) {
auto options =
.centered(centered != 0);
return new torch::optim::RMSprop(vector<torch::Tensor>(), options);
return nullptr;
optimizer ato_sgd(double learning_rate,
double momentum,
double dampening,
double weight_decay,
int nesterov) {
auto options =
return new torch::optim::SGD(vector<torch::Tensor>(), options);
return nullptr;
void ato_add_parameters(optimizer t, tensor *tensors, int ntensors) {
for (int i = 0; i < ntensors; ++i)
void ato_set_learning_rate(optimizer t, double learning_rate) {
torch::optim::OptimizerOptions* d = &(t->defaults());
if (auto adam = dynamic_cast<torch::optim::AdamOptions*>(d))
else if (auto rms = dynamic_cast<torch::optim::RMSpropOptions*>(d))
else if (auto sgd = dynamic_cast<torch::optim::SGDOptions*>(d))
throw std::invalid_argument("unexpected optimizer");
void ato_set_momentum(optimizer t, double momentum) {
torch::optim::OptimizerOptions* d = &(t->defaults());
if (auto adam = dynamic_cast<torch::optim::AdamOptions*>(d)) {
auto betas = adam->betas();
adam->betas(std::tuple<double, double>(momentum, get<1>(betas)));
else if (auto rms = dynamic_cast<torch::optim::RMSpropOptions*>(d))
else if (auto sgd = dynamic_cast<torch::optim::SGDOptions*>(d))
throw std::invalid_argument("unexpected optimizer");
void ato_zero_grad(optimizer t) {
void ato_step(optimizer t) {
void ato_free(optimizer t) {
scalar ats_int(int64_t v) {
PROTECT(return new torch::Scalar(v);)
return nullptr;
scalar ats_float(double v) {
PROTECT(return new torch::Scalar(v);)
return nullptr;
int64_t ats_to_int(scalar s) {
PROTECT(return s->toLong();)
return -1;
double ats_to_float(scalar s) {
PROTECT(return s->toDouble();)
return 0.;
char *ats_to_string(scalar s) {
using namespace at;
std::ostringstream oss;
oss << (*s);
return strdup(oss.str().c_str());
return nullptr;
void ats_free(scalar s) {
int atc_cuda_device_count() {
PROTECT(return torch::cuda::device_count();)
return -1;
int atc_cuda_is_available() {
PROTECT(return torch::cuda::is_available();)
return -1;
int atc_cudnn_is_available() {
PROTECT(return torch::cuda::cudnn_is_available();)
return -1;
void atc_set_benchmark_cudnn(int b) {
module atm_load(char *filename) {
return new torch::jit::script::Module(torch::jit::load(filename));
return nullptr;
module atm_load_str(char *data, size_t sz) {
std::istringstream stream(std::string(data, sz));
return new torch::jit::script::Module(torch::jit::load(stream));
return nullptr;
tensor atm_forward(module m, tensor *tensors, int ntensors) {
std::vector<torch::jit::IValue> inputs;
for (int i = 0; i < ntensors; ++i)
torch::jit::IValue output = m->forward(inputs);
if (!output.isTensor())
throw std::invalid_argument("forward did not return a tensor");
return new torch::Tensor(output.toTensor());
return nullptr;
ivalue atm_forward_(module m,
ivalue *ivalues,
int nivalues) {
std::vector<torch::jit::IValue> inputs;
for (int i = 0; i < nivalues; ++i)
torch::jit::IValue output = m->forward(inputs);
return new torch::jit::IValue(output);
return nullptr;
void atm_free(module m) {
void atm_to(module m, int device, int dtype, bool non_blocking) {
m->to(device_of_int(device), at::ScalarType(dtype), non_blocking);
ivalue ati_tensor(tensor t) {
return new torch::jit::IValue(*t);
return nullptr;
ivalue ati_int(int64_t i) {
return new torch::jit::IValue(i);
return nullptr;
ivalue ati_double(double d) {
return new torch::jit::IValue(d);
return nullptr;
ivalue ati_bool(int i) {
return new torch::jit::IValue((bool)i);
return nullptr;
ivalue ati_string(char *s) {
string str(s);
return new torch::jit::IValue(str);
return nullptr;
ivalue ati_none() {
return new torch::jit::IValue();
return nullptr;
ivalue ati_tuple(ivalue *is, int nvalues) {
vector<torch::jit::IValue> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(*(is[i]));
return new torch::jit::IValue(torch::ivalue::Tuple::create(vec));
return nullptr;
ivalue ati_generic_list(ivalue *is, int nvalues) {
c10::List<torch::jit::IValue> vec(c10::AnyType::get());
for (int i = 0; i < nvalues; ++i) vec.push_back(*(is[i]));
return new torch::jit::IValue(c10::List<torch::jit::IValue>(vec));
return nullptr;
ivalue ati_generic_dict(ivalue *is, int nvalues) {
c10::Dict<torch::jit::IValue, torch::jit::IValue> dict(c10::AnyType::get(), c10::AnyType::get());
for (int i = 0; i < nvalues; ++i) dict.insert(*(is[2*i]), *(is[2*i+1]));
return new torch::jit::IValue(dict);
return nullptr;
ivalue ati_int_list(int64_t *is, int nvalues) {
c10::List<int64_t> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(is[i]);
return new torch::jit::IValue(vec);
return nullptr;
ivalue ati_double_list(double *is, int nvalues) {
c10::List<double> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(is[i]);
return new torch::jit::IValue(vec);
return nullptr;
ivalue ati_bool_list(char *is, int nvalues) {
c10::List<bool> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(is[i] != 0);
return new torch::jit::IValue(vec);
return nullptr;
ivalue ati_tensor_list(tensor *is, int nvalues) {
c10::List<at::Tensor> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(*(is[i]));
return new torch::jit::IValue(vec);
return nullptr;
int ati_tag(ivalue i) {
if (i->isNone()) return 0;
else if (i->isTensor()) return 1;
else if (i->isDouble()) return 2;
else if (i->isInt()) return 3;
else if (i->isBool()) return 4;
else if (i->isTuple()) return 5;
else if (i->isIntList()) return 6;
else if (i->isDoubleList()) return 7;
else if (i->isBoolList()) return 8;
else if (i->isString()) return 9;
else if (i->isTensorList()) return 10;
else if (i->isList()) return 12;
else if (i->isGenericDict()) return 13;
throw std::invalid_argument(("unsupported tag" + i->tagKind()).c_str());
return -1;
return -1;
int64_t ati_to_int(ivalue i) {
return i->toInt();
return -1;
double ati_to_double(ivalue i) {
return i->toDouble();
return 0;
int ati_to_bool(ivalue i) {
return i->toBool();
return -1;
char *ati_to_string(ivalue i) {
auto str = i->toStringRef();
return strdup(str.c_str());
return nullptr;
tensor ati_to_tensor(ivalue i) {
return new torch::Tensor(i->toTensor());
return nullptr;
int ati_length(ivalue i) {
if (i->isTuple()) return i->toTuple()->elements().size();
else if (i->isIntList()) return i->toIntList().size();
else if (i->isDoubleList()) return i->toDoubleList().size();
else if (i->isBoolList()) return i->toBoolList().size();
else if (i->isString()) return i->toStringRef().size();
else if (i->isTensorList()) return i->toTensorList().size();
else if (i->isList()) return i->toList().size();
else if (i->isGenericDict()) return i->toGenericDict().size();
throw std::invalid_argument(("unsupported tag for length " + i->tagKind()).c_str());
return -1;
return -1;
int ati_tuple_length(ivalue i) {
return i->toTuple()->elements().size();
return -1;
void ati_to_tuple(ivalue i,
ivalue *outputs,
int noutputs) {
auto vec = i->toTuple()->elements();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected tuple size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = new torch::jit::IValue(vec[i]);
void ati_to_generic_list(ivalue i,
ivalue *outputs,
int noutputs) {
auto vec = i->toList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = new torch::jit::IValue(vec[i]);
void ati_to_generic_dict(ivalue i,
ivalue *outputs,
int noutputs) {
auto dict = i->toGenericDict();
if (dict.size() != noutputs) {
throw std::invalid_argument("unexpected dict size");
int k = 0;
for (auto it = dict.begin(); it != dict.end(); ++it) {
outputs[k++] = new torch::jit::IValue(it->key());
outputs[k++] = new torch::jit::IValue(it->value());
void ati_to_int_list(ivalue i,
int64_t *outputs,
int noutputs) {
auto vec = i->toIntList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
void ati_to_double_list(ivalue i,
double *outputs,
int noutputs) {
auto vec = i->toDoubleList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
void ati_to_bool_list(ivalue i,
char *outputs,
int noutputs) {
auto vec = i->toBoolList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
void ati_to_tensor_list(ivalue i,
tensor *outputs,
int noutputs) {
auto vec = i->toTensorList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected tuple size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = new torch::Tensor(vec[i]);
void ati_free(ivalue i) {
#include "torch_api_generated.cpp.h"

View File

@ -1,175 +0,0 @@
#ifndef __TORCH_API_H__
#define __TORCH_API_H__
#ifdef __cplusplus
thread_local char *torch_last_err = nullptr;
extern "C" {
typedef torch::Tensor *tensor;
typedef torch::Scalar *scalar;
typedef torch::optim::Optimizer *optimizer;
typedef torch::jit::script::Module *module;
typedef torch::jit::IValue *ivalue;
#define PROTECT(x) \
try { \
x \
} catch (const exception& e) { \
torch_last_err = strdup(e.what()); \
typedef void *tensor;
typedef void *optimizer;
typedef void *scalar;
typedef void *module;
typedef void *ivalue;
char *get_and_reset_last_err(); // thread-local
void at_manual_seed(int64_t);
tensor at_new_tensor();
tensor at_tensor_of_data(void *vs, int64_t *dims, size_t ndims, size_t element_size_in_bytes, int type);
void at_copy_data(tensor tensor, void *vs, size_t numel, size_t element_size_in_bytes);
tensor at_shallow_clone(tensor);
void *at_data_ptr(tensor);
int at_defined(tensor);
int at_is_sparse(tensor);
int at_device(tensor);
size_t at_dim(tensor);
void at_shape(tensor, int64_t *);
int at_scalar_type(tensor);
void at_backward(tensor, int, int);
int at_requires_grad(tensor);
int at_grad_set_enabled(int);
tensor at_get(tensor, int index);
void at_fill_double(tensor, double);
void at_fill_int64(tensor, int64_t);
double at_double_value_at_indexes(tensor, int64_t *indexes, int indexes_len);
int64_t at_int64_value_at_indexes(tensor, int64_t *indexes, int indexes_len);
void at_set_double_value_at_indexes(tensor, int *indexes, int indexes_len, double v);
void at_set_int64_value_at_indexes(tensor, int *indexes, int indexes_len, int64_t v);
void at_copy_(tensor dst, tensor src);
void at_print(tensor);
char *at_to_string(tensor, int line_size);
void at_save(tensor, char *filename);
tensor at_load(char *filename);
tensor at_load_image(char *filename);
int at_save_image(tensor, char *filename);
tensor at_resize_image(tensor, int w, int h);
void at_save_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename);
/* [at_load_multi] takes as input an array of nullptr for [tensors]. */
void at_load_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename);
/* [at_load_multi_] takes as input an array of allocation [tensors]. */
void at_load_multi_(tensor *tensors, char **tensor_names, int ntensors, char *filename);
void at_load_callback(char *filename, void *data, void (*f)(void *, char *, tensor));
void at_load_callback_with_device(char *filename, void *data, void (*f)(void *, char *, tensor), int device_id);
int at_get_num_interop_threads();
int at_get_num_threads();
void at_set_num_interop_threads(int n_threads);
void at_set_num_threads(int n_threads);
void at_free(tensor);
void at_run_backward(tensor *tensors,
int ntensors,
tensor *inputs,
int ninputs,
tensor *outputs,
int keep_graph,
int create_graph);
optimizer ato_adam(double learning_rate,
double beta1,
double beta2,
double weight_decay);
optimizer ato_rms_prop(double learning_rate,
double alpha,
double eps,
double weight_decay,
double momentum,
int centered);
optimizer ato_sgd(double learning_rate,
double momentum,
double dampening,
double weight_decay,
int nesterov);
void ato_add_parameters(optimizer, tensor *, int ntensors);
void ato_set_learning_rate(optimizer, double learning_rate);
void ato_set_momentum(optimizer, double momentum);
void ato_zero_grad(optimizer);
void ato_step(optimizer);
void ato_free(optimizer);
scalar ats_int(int64_t);
scalar ats_float(double);
int64_t ats_to_int(scalar);
double ats_to_float(scalar);
char *ats_to_string(scalar);
void ats_free(scalar);
int atc_cuda_device_count();
int atc_cuda_is_available();
int atc_cudnn_is_available();
void atc_set_benchmark_cudnn(int b);
module atm_load(char *);
module atm_load_str(char *, size_t sz);
tensor atm_forward(module, tensor *tensors, int ntensors);
ivalue atm_forward_(module,
ivalue *ivalues,
int nivalues);
void atm_free(module);
void atm_to(module m, int device, int dtype, bool non_blocking);
ivalue ati_none();
ivalue ati_tensor(tensor);
ivalue ati_int(int64_t);
ivalue ati_double(double);
ivalue ati_bool(int);
ivalue ati_string(char *);
ivalue ati_tuple(ivalue *, int);
ivalue ati_generic_list(ivalue *, int);
ivalue ati_generic_dict(ivalue *, int);
ivalue ati_int_list(int64_t *, int);
ivalue ati_double_list(double *, int);
ivalue ati_bool_list(char *, int);
ivalue ati_tensor_list(tensor *, int);
tensor ati_to_tensor(ivalue);
int64_t ati_to_int(ivalue);
double ati_to_double(ivalue);
char *ati_to_string(ivalue);
int ati_to_bool(ivalue);
int ati_length(ivalue);
int ati_tuple_length(ivalue);
void ati_to_tuple(ivalue, ivalue *, int);
void ati_to_generic_list(ivalue, ivalue *, int);
void ati_to_generic_dict(ivalue, ivalue *, int);
void ati_to_int_list(ivalue, int64_t *, int);
void ati_to_double_list(ivalue, double *, int);
void ati_to_bool_list(ivalue, char *, int);
void ati_to_tensor_list(ivalue, tensor *, int);
int ati_tag(ivalue);
void ati_free(ivalue);
#include "torch_api_generated.h"
#ifdef __cplusplus

View File

@ -1,12 +0,0 @@
extern "C" {
void dummy_cuda_dependency();
namespace at {
namespace cuda {
int warp_size();
void dummy_cuda_dependency() {

View File

@ -1,6 +0,0 @@
extern "C" {
void dummy_cuda_dependency();
void dummy_cuda_dependency() {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,894 +0,0 @@
#include "torch_api.h"
#include "stb_image.h"
#include "stb_image_write.h"
#include "stb_image_resize.h"
using namespace std;
char *get_and_reset_last_err() {
char *tmp = torch_last_err;
torch_last_err = nullptr;
return tmp;
void at_manual_seed(int64_t seed) {
vector<torch::Tensor> of_carray_tensor(torch::Tensor **vs, int len) {
vector<torch::Tensor> result;
for (int i = 0; i < len; ++i) result.push_back(*(vs[i]));
return result;
at::Device device_of_int(int d) {
if (d < 0) return at::Device(at::kCPU);
return at::Device(at::kCUDA, /*index=*/d);
tensor at_new_tensor() {
return new torch::Tensor();
return nullptr;
tensor at_tensor_of_data(void *vs, int64_t *dims, size_t ndims, size_t element_size_in_bytes, int type) {
torch::Tensor tensor = torch::zeros(torch::IntArrayRef(dims, ndims), torch::ScalarType(type));
if (element_size_in_bytes != tensor.element_size())
throw std::invalid_argument("incoherent element sizes in bytes");
void *tensor_data = tensor.data_ptr();
memcpy(tensor_data, vs, tensor.numel() * element_size_in_bytes);
return new torch::Tensor(tensor);
return nullptr;
void at_copy_data(tensor tensor, void *vs, size_t numel, size_t elt_size_in_bytes) {
if (elt_size_in_bytes != tensor->element_size())
throw std::invalid_argument("incoherent element sizes in bytes");
if (numel > tensor->numel())
throw std::invalid_argument("target numel is larger than tensor numel");
if (tensor->device().type() != at::kCPU) {
torch::Tensor tmp_tensor = tensor->to(at::kCPU).contiguous();
void *tensor_data = tmp_tensor.data_ptr();
memcpy(vs, tensor_data, numel * elt_size_in_bytes);
else {
auto tmp_tensor = tensor->contiguous();
void *tensor_data = tmp_tensor.data_ptr();
memcpy(vs, tensor_data, numel * elt_size_in_bytes);
tensor at_shallow_clone(tensor t) {
PROTECT(return new torch::Tensor(*t);)
return nullptr;
void *at_data_ptr(tensor t) {
PROTECT(return t->data_ptr();)
return nullptr;
int at_defined(tensor t) {
PROTECT(return t->defined();)
return -1;
int at_is_sparse(tensor t) {
PROTECT(return t->is_sparse();)
return -1;
size_t at_dim(tensor t) {
PROTECT(return t->dim();)
return -1;
void at_shape(tensor t, int64_t *dims) {
int i = 0;
for (int64_t dim : t->sizes()) dims[i++] = dim;
int at_scalar_type(tensor t) {
return static_cast<int>(t->scalar_type());
return -1;
int at_device(tensor t) {
auto device = t->device();
if (device.type() == at::kCPU) return -1;
if (device.type() == at::kCUDA) return device.index();
return -2;
void at_backward(tensor t, int keep_graph, int create_graph) {
PROTECT(t->backward({}, keep_graph, create_graph);)
int at_requires_grad(tensor t) {
PROTECT(return t->requires_grad();)
return -1;
int at_grad_set_enabled(int b) {
bool is_enabled = torch::autograd::GradMode::is_enabled();
return is_enabled;
return -1;
tensor at_get(tensor t, int index) {
PROTECT(return new torch::Tensor((*t)[index]);)
return nullptr;
template<typename T>
T at_value_at_indexes(tensor t, int64_t *indexes, int indexes_len) {
torch::Tensor tensor = *t;
for (int i = 0; i < indexes_len; ++i) {
tensor = tensor[indexes[i]];
return tensor.item<T>();
return T();
double at_double_value_at_indexes(tensor t, int64_t *indexes, int indexes_len) {
return at_value_at_indexes<double>(t, indexes, indexes_len);
int64_t at_int64_value_at_indexes(tensor t, int64_t *indexes, int indexes_len) {
return at_value_at_indexes<int64_t>(t, indexes, indexes_len);
template<typename T>
void at_set_value_at_indexes(tensor t, int *indexes, int indexes_len, T v) {
torch::Tensor tensor = *t;
for (int i = 0; i < indexes_len; ++i) {
tensor = tensor[indexes[i]];
void at_set_double_value_at_indexes(tensor t, int *indexes, int indexes_len, double v) {
at_set_value_at_indexes<double>(t, indexes, indexes_len, v);
void at_set_int64_value_at_indexes(tensor t, int *indexes, int indexes_len, int64_t v) {
at_set_value_at_indexes<int64_t>(t, indexes, indexes_len, v);
void at_fill_double(tensor t, double v) {
void at_fill_int64(tensor t, int64_t v) {
void at_print(tensor t) {
torch::Tensor *tensor = (torch::Tensor*)t;
cout << *tensor << endl;
char *at_to_string(tensor t, int line_size) {
std::ostringstream oss;
torch::print(oss, *t, line_size);
return strdup(oss.str().c_str());
return nullptr;
void at_copy_(tensor dst, tensor src) {
void at_save(tensor t, char *filename) {
PROTECT(torch::save(*t, filename);)
void at_save_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename) {
torch::serialize::OutputArchive archive;
for (int i = 0; i < ntensors; ++i)
archive.write(std::string(tensor_names[i]), *(tensors[i]), /* buffer=*/ false);
void at_load_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename) {
torch::serialize::InputArchive archive;
vector<torch::Tensor> ts(ntensors);
for (int i = 0; i < ntensors; ++i)
archive.read(std::string(tensor_names[i]), ts[i]);
// Only allocate the new tensor now so that if there is an exception raised during
// [read], no memory has to be freed.
for (int i = 0; i < ntensors; ++i)
tensors[i] = new torch::Tensor(ts[i]);
void at_load_callback(char *filename, void *data, void (*f)(void *, char *, tensor)) {
auto module = torch::jit::load(filename);
for (const auto &p : module.named_parameters()) {
auto v = p.value;
f(data, (char*)p.name.c_str(), new torch::Tensor(v));
void at_load_callback_with_device(char *filename, void *data, void (*f)(void *, char *, tensor), int device_id) {
auto module = torch::jit::load(filename, device_of_int(device_id));
for (const auto &p : module.named_parameters()) {
auto v = p.value;
f(data, (char*)p.name.c_str(), new torch::Tensor(v));
void at_load_multi_(tensor *tensors, char **tensor_names, int ntensors, char *filename) {
torch::NoGradGuard no_grad;
torch::serialize::InputArchive archive;
for (int i = 0; i < ntensors; ++i) {
if (tensors[i]->device().type() == at::kCPU)
archive.read(std::string(tensor_names[i]), *(tensors[i]));
else {
torch::Tensor tmp_tensor = torch::empty_like(*(tensors[i]), at::device(at::kCPU));
archive.read(std::string(tensor_names[i]), tmp_tensor);
tensor at_load(char *filename) {
torch::Tensor tensor;
torch::load(tensor, filename);
return new torch::Tensor(tensor);
return nullptr;
tensor at_load_image(char *filename) {
int w = -1;
int h = -1;
int c = -1;
void *data = stbi_load(filename, &w, &h, &c, 3);
if (data == nullptr)
throw std::invalid_argument(stbi_failure_reason());
torch::Tensor tensor = torch::zeros({ h, w, 3 }, at::ScalarType::Byte);
memcpy(tensor.data_ptr(), data, h * w * 3);
return new torch::Tensor(tensor);
return nullptr;
bool ends_with(const char *str, const char *suffix) {
int suffix_len = strlen(suffix);
int str_len = strlen(str);
if (str_len < suffix_len) return false;
for (int i = 1; i <= suffix_len; ++i)
if (str[str_len-i] != suffix[suffix_len-i]) return false;
return true;
int at_save_image(tensor tensor, char *filename) {
auto sizes = tensor->sizes();
if (sizes.size() != 3)
throw std::invalid_argument("invalid number of dimensions, should be 3");
int h = sizes[0];
int w = sizes[1];
int c = sizes[2];
auto tmp_tensor = tensor->contiguous();
void *tensor_data = tmp_tensor.data_ptr();
if (ends_with(filename, ".jpg"))
return stbi_write_jpg(filename, w, h, c, tensor_data, 90);
if (ends_with(filename, ".bmp"))
return stbi_write_bmp(filename, w, h, c, tensor_data);
if (ends_with(filename, ".tga"))
return stbi_write_tga(filename, w, h, c, tensor_data);
return stbi_write_png(filename, w, h, c, tensor_data, 0);
return -1;
int at_get_num_interop_threads() {
PROTECT(return at::get_num_interop_threads();)
return -1;
int at_get_num_threads() {
PROTECT(return at::get_num_threads();)
return -1;
void at_set_num_interop_threads(int n_threads) {
void at_set_num_threads(int n_threads) {
tensor at_resize_image(tensor tensor, int out_w, int out_h) {
auto sizes = tensor->sizes();
if (sizes.size() != 3)
throw std::invalid_argument("invalid number of dimensions, should be 3");
int h = sizes[0];
int w = sizes[1];
int c = sizes[2];
auto tmp_tensor = tensor->contiguous();
const unsigned char *tensor_data = (unsigned char*)tmp_tensor.data_ptr();
torch::Tensor out = torch::zeros({ out_h, out_w, c }, at::ScalarType::Byte);
stbir_resize_uint8(tensor_data, w, h, 0, (unsigned char*)out.data_ptr(), out_w, out_h, 0, c);
return new torch::Tensor(out);
return nullptr;
void at_free(tensor t) {
void at_run_backward(tensor *tensors,
int ntensors,
tensor *inputs,
int ninputs,
tensor *outputs,
int keep_graph,
int create_graph) {
vector<torch::autograd::Edge> roots;
for (int i = 0; i < ntensors; ++i)
vector<torch::autograd::Edge> inputs_;
for (int i = 0; i < ninputs; ++i) {
if (!inputs[i]->requires_grad())
throw std::invalid_argument("one of the input tensor does not use set_requires_grad");
vector<torch::autograd::Variable> grads;
for (int i = 0; i < ntensors; ++i)
auto vl = torch::autograd::Engine::get_default_engine().execute(roots, grads, keep_graph, create_graph, inputs_);
for (int i = 0; i < ninputs; ++i) {
outputs[i] = static_cast<tensor>(new torch::autograd::Variable(vl[i]));
optimizer ato_adam(double learning_rate,
double beta1,
double beta2,
double weight_decay) {
auto options =
.betas(std::tuple<double, double>(beta1, beta2))
return new torch::optim::Adam(vector<torch::Tensor>(), options);
return nullptr;
optimizer ato_rms_prop(double learning_rate,
double alpha,
double eps,
double weight_decay,
double momentum,
int centered) {
auto options =
.centered(centered != 0);
return new torch::optim::RMSprop(vector<torch::Tensor>(), options);
return nullptr;
optimizer ato_sgd(double learning_rate,
double momentum,
double dampening,
double weight_decay,
int nesterov) {
auto options =
return new torch::optim::SGD(vector<torch::Tensor>(), options);
return nullptr;
void ato_add_parameters(optimizer t, tensor *tensors, int ntensors) {
for (int i = 0; i < ntensors; ++i)
void ato_set_learning_rate(optimizer t, double learning_rate) {
torch::optim::OptimizerOptions* d = &(t->defaults());
if (auto adam = dynamic_cast<torch::optim::AdamOptions*>(d))
else if (auto rms = dynamic_cast<torch::optim::RMSpropOptions*>(d))
else if (auto sgd = dynamic_cast<torch::optim::SGDOptions*>(d))
throw std::invalid_argument("unexpected optimizer");
void ato_set_momentum(optimizer t, double momentum) {
torch::optim::OptimizerOptions* d = &(t->defaults());
if (auto adam = dynamic_cast<torch::optim::AdamOptions*>(d)) {
auto betas = adam->betas();
adam->betas(std::tuple<double, double>(momentum, get<1>(betas)));
else if (auto rms = dynamic_cast<torch::optim::RMSpropOptions*>(d))
else if (auto sgd = dynamic_cast<torch::optim::SGDOptions*>(d))
throw std::invalid_argument("unexpected optimizer");
void ato_zero_grad(optimizer t) {
void ato_step(optimizer t) {
void ato_free(optimizer t) {
scalar ats_int(int64_t v) {
PROTECT(return new torch::Scalar(v);)
return nullptr;
scalar ats_float(double v) {
PROTECT(return new torch::Scalar(v);)
return nullptr;
int64_t ats_to_int(scalar s) {
PROTECT(return s->toLong();)
return -1;
double ats_to_float(scalar s) {
PROTECT(return s->toDouble();)
return 0.;
char *ats_to_string(scalar s) {
using namespace at;
std::ostringstream oss;
oss << (*s);
return strdup(oss.str().c_str());
return nullptr;
void ats_free(scalar s) {
int atc_cuda_device_count() {
PROTECT(return torch::cuda::device_count();)
return -1;
int atc_cuda_is_available() {
PROTECT(return torch::cuda::is_available();)
return -1;
int atc_cudnn_is_available() {
PROTECT(return torch::cuda::cudnn_is_available();)
return -1;
void atc_set_benchmark_cudnn(int b) {
module atm_load(char *filename) {
return new torch::jit::script::Module(torch::jit::load(filename));
return nullptr;
module atm_load_str(char *data, size_t sz) {
std::istringstream stream(std::string(data, sz));
return new torch::jit::script::Module(torch::jit::load(stream));
return nullptr;
tensor atm_forward(module m, tensor *tensors, int ntensors) {
std::vector<torch::jit::IValue> inputs;
for (int i = 0; i < ntensors; ++i)
torch::jit::IValue output = m->forward(inputs);
if (!output.isTensor())
throw std::invalid_argument("forward did not return a tensor");
return new torch::Tensor(output.toTensor());
return nullptr;
ivalue atm_forward_(module m,
ivalue *ivalues,
int nivalues) {
std::vector<torch::jit::IValue> inputs;
for (int i = 0; i < nivalues; ++i)
torch::jit::IValue output = m->forward(inputs);
return new torch::jit::IValue(output);
return nullptr;
void atm_free(module m) {
void atm_to(module m, int device, int dtype, bool non_blocking) {
m->to(device_of_int(device), at::ScalarType(dtype), non_blocking);
ivalue ati_tensor(tensor t) {
return new torch::jit::IValue(*t);
return nullptr;
ivalue ati_int(int64_t i) {
return new torch::jit::IValue(i);
return nullptr;
ivalue ati_double(double d) {
return new torch::jit::IValue(d);
return nullptr;
ivalue ati_bool(int i) {
return new torch::jit::IValue((bool)i);
return nullptr;
ivalue ati_string(char *s) {
string str(s);
return new torch::jit::IValue(str);
return nullptr;
ivalue ati_none() {
return new torch::jit::IValue();
return nullptr;
ivalue ati_tuple(ivalue *is, int nvalues) {
vector<torch::jit::IValue> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(*(is[i]));
return new torch::jit::IValue(torch::ivalue::Tuple::create(vec));
return nullptr;
ivalue ati_generic_list(ivalue *is, int nvalues) {
c10::List<torch::jit::IValue> vec(c10::AnyType::get());
for (int i = 0; i < nvalues; ++i) vec.push_back(*(is[i]));
return new torch::jit::IValue(c10::List<torch::jit::IValue>(vec));
return nullptr;
ivalue ati_generic_dict(ivalue *is, int nvalues) {
c10::Dict<torch::jit::IValue, torch::jit::IValue> dict(c10::AnyType::get(), c10::AnyType::get());
for (int i = 0; i < nvalues; ++i) dict.insert(*(is[2*i]), *(is[2*i+1]));
return new torch::jit::IValue(dict);
return nullptr;
ivalue ati_int_list(int64_t *is, int nvalues) {
c10::List<int64_t> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(is[i]);
return new torch::jit::IValue(vec);
return nullptr;
ivalue ati_double_list(double *is, int nvalues) {
c10::List<double> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(is[i]);
return new torch::jit::IValue(vec);
return nullptr;
ivalue ati_bool_list(char *is, int nvalues) {
c10::List<bool> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(is[i] != 0);
return new torch::jit::IValue(vec);
return nullptr;
ivalue ati_tensor_list(tensor *is, int nvalues) {
c10::List<at::Tensor> vec;
for (int i = 0; i < nvalues; ++i) vec.push_back(*(is[i]));
return new torch::jit::IValue(vec);
return nullptr;
int ati_tag(ivalue i) {
if (i->isNone()) return 0;
else if (i->isTensor()) return 1;
else if (i->isDouble()) return 2;
else if (i->isInt()) return 3;
else if (i->isBool()) return 4;
else if (i->isTuple()) return 5;
else if (i->isIntList()) return 6;
else if (i->isDoubleList()) return 7;
else if (i->isBoolList()) return 8;
else if (i->isString()) return 9;
else if (i->isTensorList()) return 10;
else if (i->isList()) return 12;
else if (i->isGenericDict()) return 13;
throw std::invalid_argument(("unsupported tag" + i->tagKind()).c_str());
return -1;
return -1;
int64_t ati_to_int(ivalue i) {
return i->toInt();
return -1;
double ati_to_double(ivalue i) {
return i->toDouble();
return 0;
int ati_to_bool(ivalue i) {
return i->toBool();
return -1;
char *ati_to_string(ivalue i) {
auto str = i->toStringRef();
return strdup(str.c_str());
return nullptr;
tensor ati_to_tensor(ivalue i) {
return new torch::Tensor(i->toTensor());
return nullptr;
int ati_length(ivalue i) {
if (i->isTuple()) return i->toTuple()->elements().size();
else if (i->isIntList()) return i->toIntList().size();
else if (i->isDoubleList()) return i->toDoubleList().size();
else if (i->isBoolList()) return i->toBoolList().size();
else if (i->isString()) return i->toStringRef().size();
else if (i->isTensorList()) return i->toTensorList().size();
else if (i->isList()) return i->toList().size();
else if (i->isGenericDict()) return i->toGenericDict().size();
throw std::invalid_argument(("unsupported tag for length " + i->tagKind()).c_str());
return -1;
return -1;
int ati_tuple_length(ivalue i) {
return i->toTuple()->elements().size();
return -1;
void ati_to_tuple(ivalue i,
ivalue *outputs,
int noutputs) {
auto vec = i->toTuple()->elements();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected tuple size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = new torch::jit::IValue(vec[i]);
void ati_to_generic_list(ivalue i,
ivalue *outputs,
int noutputs) {
auto vec = i->toList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = new torch::jit::IValue(vec[i]);
void ati_to_generic_dict(ivalue i,
ivalue *outputs,
int noutputs) {
auto dict = i->toGenericDict();
if (dict.size() != noutputs) {
throw std::invalid_argument("unexpected dict size");
int k = 0;
for (auto it = dict.begin(); it != dict.end(); ++it) {
outputs[k++] = new torch::jit::IValue(it->key());
outputs[k++] = new torch::jit::IValue(it->value());
void ati_to_int_list(ivalue i,
int64_t *outputs,
int noutputs) {
auto vec = i->toIntList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
void ati_to_double_list(ivalue i,
double *outputs,
int noutputs) {
auto vec = i->toDoubleList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
void ati_to_bool_list(ivalue i,
char *outputs,
int noutputs) {
auto vec = i->toBoolList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected list size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = vec[i];
void ati_to_tensor_list(ivalue i,
tensor *outputs,
int noutputs) {
auto vec = i->toTensorList();
if (vec.size() != noutputs) {
throw std::invalid_argument("unexpected tuple size");
for (int i = 0; i < noutputs; ++i)
outputs[i] = new torch::Tensor(vec[i]);
void ati_free(ivalue i) {
#include "torch_api_generated.cpp.h"

View File

@ -1,175 +0,0 @@
#ifndef __TORCH_API_H__
#define __TORCH_API_H__
#ifdef __cplusplus
thread_local char *torch_last_err = nullptr;
extern "C" {
typedef torch::Tensor *tensor;
typedef torch::Scalar *scalar;
typedef torch::optim::Optimizer *optimizer;
typedef torch::jit::script::Module *module;
typedef torch::jit::IValue *ivalue;
#define PROTECT(x) \
try { \
x \
} catch (const exception& e) { \
torch_last_err = strdup(e.what()); \
typedef void *tensor;
typedef void *optimizer;
typedef void *scalar;
typedef void *module;
typedef void *ivalue;
char *get_and_reset_last_err(); // thread-local
void at_manual_seed(int64_t);
tensor at_new_tensor();
tensor at_tensor_of_data(void *vs, int64_t *dims, size_t ndims, size_t element_size_in_bytes, int type);
void at_copy_data(tensor tensor, void *vs, size_t numel, size_t element_size_in_bytes);
tensor at_shallow_clone(tensor);
void *at_data_ptr(tensor);
int at_defined(tensor);
int at_is_sparse(tensor);
int at_device(tensor);
size_t at_dim(tensor);
void at_shape(tensor, int64_t *);
int at_scalar_type(tensor);
void at_backward(tensor, int, int);
int at_requires_grad(tensor);
int at_grad_set_enabled(int);
tensor at_get(tensor, int index);
void at_fill_double(tensor, double);
void at_fill_int64(tensor, int64_t);
double at_double_value_at_indexes(tensor, int64_t *indexes, int indexes_len);
int64_t at_int64_value_at_indexes(tensor, int64_t *indexes, int indexes_len);
void at_set_double_value_at_indexes(tensor, int *indexes, int indexes_len, double v);
void at_set_int64_value_at_indexes(tensor, int *indexes, int indexes_len, int64_t v);
void at_copy_(tensor dst, tensor src);
void at_print(tensor);
char *at_to_string(tensor, int line_size);
void at_save(tensor, char *filename);
tensor at_load(char *filename);
tensor at_load_image(char *filename);
int at_save_image(tensor, char *filename);
tensor at_resize_image(tensor, int w, int h);
void at_save_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename);
/* [at_load_multi] takes as input an array of nullptr for [tensors]. */
void at_load_multi(tensor *tensors, char **tensor_names, int ntensors, char *filename);
/* [at_load_multi_] takes as input an array of allocation [tensors]. */
void at_load_multi_(tensor *tensors, char **tensor_names, int ntensors, char *filename);
void at_load_callback(char *filename, void *data, void (*f)(void *, char *, tensor));
void at_load_callback_with_device(char *filename, void *data, void (*f)(void *, char *, tensor), int device_id);
int at_get_num_interop_threads();
int at_get_num_threads();
void at_set_num_interop_threads(int n_threads);
void at_set_num_threads(int n_threads);
void at_free(tensor);
void at_run_backward(tensor *tensors,
int ntensors,
tensor *inputs,
int ninputs,
tensor *outputs,
int keep_graph,
int create_graph);
optimizer ato_adam(double learning_rate,
double beta1,
double beta2,
double weight_decay);
optimizer ato_rms_prop(double learning_rate,
double alpha,
double eps,
double weight_decay,
double momentum,
int centered);
optimizer ato_sgd(double learning_rate,
double momentum,
double dampening,
double weight_decay,
int nesterov);
void ato_add_parameters(optimizer, tensor *, int ntensors);
void ato_set_learning_rate(optimizer, double learning_rate);
void ato_set_momentum(optimizer, double momentum);
void ato_zero_grad(optimizer);
void ato_step(optimizer);
void ato_free(optimizer);
scalar ats_int(int64_t);
scalar ats_float(double);
int64_t ats_to_int(scalar);
double ats_to_float(scalar);
char *ats_to_string(scalar);
void ats_free(scalar);
int atc_cuda_device_count();
int atc_cuda_is_available();
int atc_cudnn_is_available();
void atc_set_benchmark_cudnn(int b);
module atm_load(char *);
module atm_load_str(char *, size_t sz);
tensor atm_forward(module, tensor *tensors, int ntensors);
ivalue atm_forward_(module,
ivalue *ivalues,
int nivalues);
void atm_free(module);
void atm_to(module m, int device, int dtype, bool non_blocking);
ivalue ati_none();
ivalue ati_tensor(tensor);
ivalue ati_int(int64_t);
ivalue ati_double(double);
ivalue ati_bool(int);
ivalue ati_string(char *);
ivalue ati_tuple(ivalue *, int);
ivalue ati_generic_list(ivalue *, int);
ivalue ati_generic_dict(ivalue *, int);
ivalue ati_int_list(int64_t *, int);
ivalue ati_double_list(double *, int);
ivalue ati_bool_list(char *, int);
ivalue ati_tensor_list(tensor *, int);
tensor ati_to_tensor(ivalue);
int64_t ati_to_int(ivalue);
double ati_to_double(ivalue);
char *ati_to_string(ivalue);
int ati_to_bool(ivalue);
int ati_length(ivalue);
int ati_tuple_length(ivalue);
void ati_to_tuple(ivalue, ivalue *, int);
void ati_to_generic_list(ivalue, ivalue *, int);
void ati_to_generic_dict(ivalue, ivalue *, int);
void ati_to_int_list(ivalue, int64_t *, int);
void ati_to_double_list(ivalue, double *, int);
void ati_to_bool_list(ivalue, char *, int);
void ati_to_tensor_list(ivalue, tensor *, int);
int ati_tag(ivalue);
void ati_free(ivalue);
#include "torch_api_generated.h"
#ifdef __cplusplus

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

torch/device.go Normal file
View File

@ -0,0 +1,97 @@
package torch
import (
lib "github.com/sugarme/gotch/torch/libtch"
type Device struct {
Name string
Value int
type Cuda struct {
Count int
var (
CPU Device = Device{Name: "CPU", Value: -1}
CUDA Cuda = Cuda{Name: "CUDA", Value: 0, Count: 1}
func CudaBuilder(v uint) Device {
// TODO: fully initiate cuda here
return Device{Name: "CUDA", Value: int(v)}
// Cuda methods:
// =============
// DeviceCount returns the number of GPU that can be used.
func (cu Cuda) DeviceCount() int64 {
cInt := lib.Atc_cuda_device_count()
return int64(cInt)
// CudnnIsAvailable returns true if cuda support is available
func (cu Cuda) IsAvailable() bool {
return lib.Atc_cuda_is_available()
// CudnnIsAvailable return true if cudnn support is available
func (cu Cuda) CudnnIsAvailable() bool {
return lib.Atc_cudnn_is_available()
// CudnnSetBenchmark sets cudnn benchmark mode
// When set cudnn will try to optimize the generators during the first network
// runs and then use the optimized architecture in the following runs. This can
// result in significant performance improvements.
func (cu Cuda) CudnnSetBenchmark(b bool) {
switch b {
case true:
case false:
// Device methods:
func (d Device) CInt() CInt {
switch {
case d.Name == "CPU":
return -1
case d.Name == "CUDA":
// TODO: create a function to retrieve cuda_index
var deviceIndex int = d.Value
return CInt(deviceIndex)
log.Fatal("Not reachable")
func (d Device) OfCInt(v CInt) Device {
switch {
case v == -1:
return Device{Name: "CPU", Value: 1}
case v >= 0:
return CudaBuilder(uint(v))
log.Fatalf("Unexpected device %v", v)
// CudaIfAvailable returns a GPU device if available, else default to CPU
func (d Device) CudaIfAvailable() Device {
switch {
case CUDA.IsAvailable():
return CudaBuilder(0)
return CPU

View File

@ -1,6 +0,0 @@
extern "C" {
void dummy_cuda_dependency();
void dummy_cuda_dependency() {

torch/kind.go Normal file
View File

@ -0,0 +1,55 @@
package torch
import (
type Kind struct {
type CInt = int32
* Uint8,
* Int8,
* Int16,
* Int,
* Int64,
* Half,
* Float,
* Double,
* ComplexHalf,
* ComplexFloat,
* ComplexDouble,
* Bool,
* */
// TODO: recode these types
var (
Bool = Kind{reflect.TypeOf(true)}
Int = Kind{reflect.TypeOf(int(1))}
Int8 = Kind{reflect.TypeOf(int8(1))}
Int16 = Kind{reflect.TypeOf(int16(1))}
Int32 = Kind{reflect.TypeOf(int32(1))}
Int64 = Kind{reflect.TypeOf(int64(1))}
Uint = Kind{reflect.TypeOf(uint(1))}
Uint8 = Kind{reflect.TypeOf(uint8(1))}
Uint16 = Kind{reflect.TypeOf(uint16(1))}
Uint32 = Kind{reflect.TypeOf(uint32(1))}
Uint64 = Kind{reflect.TypeOf(uint64(1))}
Float32 = Kind{reflect.TypeOf(float32(1))}
Float64 = Kind{reflect.TypeOf(float64(1))}
Complex64 = Kind{reflect.TypeOf(complex64(1))}
Complex128 = Kind{reflect.TypeOf(complex128(1))}
String = Kind{reflect.TypeOf("")}
// aliases
Byte = Uint8
// extras
Uintptr = Kind{reflect.TypeOf(uintptr(0))}
UnsafePointer = Kind{reflect.TypeOf(unsafe.Pointer(&Uintptr))}

View File

@ -0,0 +1,3 @@
// NOTE: this file would be automatically generated by executing `gen` OCaml
// folder.
package libtch

torch/libtch/device.go Normal file
View File

@ -0,0 +1,11 @@
// NOTE: functions in this file would be automatically generated
// and named as `c-generated.go`
package libtch
//#include "stdbool.h"
//#include "torch_api.h"
import "C"
func Atc_cuda_device_count() int {
return C.atc_cuda_device_count()

View File

@ -1,4 +1,4 @@
package torch
package libtch
// #cgo CXXFLAGS: -std=c++17 -I${SRCDIR} -g -O3
// #cgo CFLAGS: -I${SRCDIR} -O3 -Wall -Wno-unused-variable -Wno-deprecated-declarations -Wno-c++11-narrowing -g -Wno-sign-compare -Wno-unused-function

torch/libtch/tensor.go Normal file
View File

@ -0,0 +1,26 @@
package libtch
//#include "stdbool.h"
//#include "torch_api.h"
import "C"
import (
type c_void unsafe.Pointer
type size_t uint
type C_tensor struct {
_private uint8
func NewTensor() *C_tensor {
t := C.at_new_tensor()
return &C_tensor{_private: *(*uint8)(unsafe.Pointer(&t))}
func AtTensorOfData(vs c_void, dims int64, ndims size_t, elt_size_in_bytes size_t, kind c_int) *C_tensor {
t := C.at_tensor_of_data(vs, dims, ndims, elt_size_in_bytes, kind)
return &C_tensor{_private: *(*uint8)(unsafe.Pointer(&t))}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +0,0 @@
package torch
//#include "stdbool.h"
//#include "torch_api.h"
import "C"
import (
// "fmt"
// "reflect"
type C_tensor struct {
_private uint8
func NewTensor() *C_tensor {
ct := C.at_new_tensor()
return &C_tensor{_private: *(*uint8)(unsafe.Pointer(&ct))}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff