diff --git a/CHANGELOG.md b/CHANGELOG.md index a216a06..98b1cdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed incorrect indexing at `dutil/Dataset.Next()` - Added `nn.MSELoss()` - reworked `ts.Format()` +- Added conv2d benchmark ## [Nofix] - ctype `long` caused compiling error in MacOS as noted on [#44]. Not working on linux box. diff --git a/ts/README.md b/ts/README.md new file mode 100644 index 0000000..3207bc8 --- /dev/null +++ b/ts/README.md @@ -0,0 +1,63 @@ +# BENCHMARK + +## Convolution 2D + +Ref. +1. https://tigress-web.princeton.edu/~jdh4/PyTorchPerformanceTuningGuide_GTC2021.pdf +2. https://github.com/soumith/convnet-benchmarks + +Benchmark tensor operation `conv2d` forward propagation: +- input shape: `[32, 64, 64, 64]` +- kernel: `[64, 3, 3]` + +goos: linux +goarch: amd64 +pkg: github.com/sugarme/gotch/ts +cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz +BenchmarkConv2dCPU-8 100 21198303 ns/op +BenchmarkConv2dCUDA-8 100 2201213 ns/op + +```bash +name time/op +Conv2dCPU-8 21.2ms ± 0% +Conv2dCUDA-8 2.20ms ± 0% +``` + +Benchmark against Pytorch 1.11 CUDA 11 + +```bash +conv2d-CPU(x): 56.7 ms +conv2d-CUDA(x): 38.0 ms +``` + +benchmark Python code below + +```python +import torch +import torch.nn.functional as F +import timeit + +x = torch.randn(32, 64, 64, 64) + +def conv2dCPU(x): + conv1 = torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0, bias=False) + return conv1(x) + +def conv2dCUDA(x): + x = x.cuda() + conv1 = torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0, bias=False).cuda() + return conv1(x) + +t0 = timeit.Timer( + stmt='conv2dCPU(x)', + setup='from __main__ import conv2dCPU', + globals={'x': x}) + +t1 = timeit.Timer( + stmt='conv2dCUDA(x)', + setup='from __main__ import conv2dCUDA', + globals={'x': x}) + +print(f'conv2d-CPU(x): {t0.timeit(100) / 100 * 1e3:>5.1f} ms') +print(f'conv2d-CUDA(x): {t1.timeit(100) / 100 * 1e3:>5.1f} ms') +``` diff --git a/ts/benchmark-conv2d_test.go b/ts/benchmark-conv2d_test.go new file mode 100644 index 0000000..d53dfc1 --- /dev/null +++ b/ts/benchmark-conv2d_test.go @@ -0,0 +1,72 @@ +package ts_test + +import ( + "testing" + + "github.com/sugarme/gotch" + "github.com/sugarme/gotch/ts" +) + +// GOMAXPROCS=8 go test -bench=BenchmarkConv2d -benchtime=100x -run=^a | tee op-conv-bench.txt +// benchstat op-conv-bench.txt +func BenchmarkConv2dCPU(b *testing.B) { + // var shape []int64 = []int64{64, 3, 224, 224} + var shape []int64 = []int64{32, 64, 64, 64} + device := gotch.CPU + x := ts.MustRandn(shape, gotch.Float, device) + // kDims := []int64{1, 3, 3, 3} + kDims := []int64{1, 64, 3, 3} + kernelTemplate := []int64{ + 1, 1, 1, + 1, -8, 1, + 1, 1, 1, + } + var kernelData []int64 + for i := 0; i < int(kDims[0]*kDims[1]); i++ { + kernelData = append(kernelData, kernelTemplate...) + } + weight := ts.MustOfSlice(kernelData).MustView(kDims, true).MustTotype(gotch.Float, true).MustTo(device, true) + + stride := []int64{1, 1} + padding := []int64{0, 0} + dilation := []int64{1, 1} + for i := 0; i < b.N; i++ { + out, err := ts.Conv2d(x, weight, ts.NewTensor(), stride, padding, dilation, 1) + if err != nil { + panic(err) + } + out.MustDrop() + } +} + +// GOMAXPROCS=8 go test -bench=BenchmarkConv2d -benchtime=100x -run=^a | tee op-conv-bench.txt +// benchstat op-conv-bench.txt +func BenchmarkConv2dCUDA(b *testing.B) { + // var shape []int64 = []int64{64, 3, 224, 224} + var shape []int64 = []int64{32, 64, 64, 64} + device := gotch.CudaIfAvailable() + x := ts.MustRandn(shape, gotch.Float, device) + // kDims := []int64{1, 3, 3, 3} + kDims := []int64{1, 64, 3, 3} + kernelTemplate := []int64{ + 1, 1, 1, + 1, -8, 1, + 1, 1, 1, + } + var kernelData []int64 + for i := 0; i < int(kDims[0]*kDims[1]); i++ { + kernelData = append(kernelData, kernelTemplate...) + } + weight := ts.MustOfSlice(kernelData).MustView(kDims, true).MustTotype(gotch.Float, true).MustTo(device, true) + + stride := []int64{1, 1} + padding := []int64{0, 0} + dilation := []int64{1, 1} + for i := 0; i < b.N; i++ { + out, err := ts.Conv2d(x, weight, ts.NewTensor(), stride, padding, dilation, 1) + if err != nil { + panic(err) + } + out.MustDrop() + } +}