added Conv2D benchmark

2022-05-06 18:10:38 +10:00 · 2022-05-06 18:10:38 +10:00 · 670d1e9cdf
commit 670d1e9cdf
parent 7e4799eb52
3 changed files with 136 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed incorrect indexing at `dutil/Dataset.Next()`
 - Added `nn.MSELoss()`
 - reworked `ts.Format()`
 - Added conv2d benchmark
 ## [Nofix]
 - ctype `long` caused compiling error in MacOS as noted on [#44]. Not working on linux box.
--- a/ts/README.md
+++ b/ts/README.md
@ -0,0 +1,63 @@
 # BENCHMARK
 ## Convolution 2D
 Ref.
 1. https://tigress-web.princeton.edu/~jdh4/PyTorchPerformanceTuningGuide_GTC2021.pdf
 2. https://github.com/soumith/convnet-benchmarks
 Benchmark tensor operation `conv2d` forward propagation:
 - input shape: `[32, 64, 64, 64]`
 - kernel:            `[64, 3, 3]`
 goos: linux
 goarch: amd64
 pkg: github.com/sugarme/gotch/ts
 cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
 BenchmarkConv2dCPU-8                 100          21198303 ns/op
 BenchmarkConv2dCUDA-8                100           2201213 ns/op
 ```bash
 name          time/op
 Conv2dCPU-8   21.2ms ± 0%
 Conv2dCUDA-8  2.20ms ± 0%
 ```
 Benchmark against Pytorch 1.11 CUDA 11 
 ```bash
 conv2d-CPU(x):   56.7 ms
 conv2d-CUDA(x):   38.0 ms
 ```
 benchmark Python code below
 ```python
 import torch
 import torch.nn.functional as F
 import timeit
 x = torch.randn(32, 64, 64, 64)
 def conv2dCPU(x):
    conv1 = torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0, bias=False)
    return conv1(x)
 def conv2dCUDA(x):
    x = x.cuda()
    conv1 = torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0, bias=False).cuda()
    return conv1(x)
 t0 = timeit.Timer(
    stmt='conv2dCPU(x)',
    setup='from __main__ import conv2dCPU',
    globals={'x': x})
 t1 = timeit.Timer(
    stmt='conv2dCUDA(x)',
    setup='from __main__ import conv2dCUDA',
    globals={'x': x})
 print(f'conv2d-CPU(x):  {t0.timeit(100) / 100 * 1e3:>5.1f} ms')
 print(f'conv2d-CUDA(x):  {t1.timeit(100) / 100 * 1e3:>5.1f} ms')
 ```
--- a/ts/benchmark-conv2d_test.go
+++ b/ts/benchmark-conv2d_test.go
@ -0,0 +1,72 @@
 package ts_test
 import (
 	"testing"
 	"github.com/sugarme/gotch"
 	"github.com/sugarme/gotch/ts"
 )
 // GOMAXPROCS=8 go test -bench=BenchmarkConv2d -benchtime=100x -run=^a | tee op-conv-bench.txt
 // benchstat op-conv-bench.txt
 func BenchmarkConv2dCPU(b *testing.B) {
 	// var shape []int64 = []int64{64, 3, 224, 224}
 	var shape []int64 = []int64{32, 64, 64, 64}
 	device := gotch.CPU
 	x := ts.MustRandn(shape, gotch.Float, device)
 	// kDims := []int64{1, 3, 3, 3}
 	kDims := []int64{1, 64, 3, 3}
 	kernelTemplate := []int64{
 		1, 1, 1,
 		1, -8, 1,
 		1, 1, 1,
 	}
 	var kernelData []int64
 	for i := 0; i < int(kDims[0]*kDims[1]); i++ {
 		kernelData = append(kernelData, kernelTemplate...)
 	}
 	weight := ts.MustOfSlice(kernelData).MustView(kDims, true).MustTotype(gotch.Float, true).MustTo(device, true)
 	stride := []int64{1, 1}
 	padding := []int64{0, 0}
 	dilation := []int64{1, 1}
 	for i := 0; i < b.N; i++ {
 		out, err := ts.Conv2d(x, weight, ts.NewTensor(), stride, padding, dilation, 1)
 		if err != nil {
 			panic(err)
 		}
 		out.MustDrop()
 	}
 }
 // GOMAXPROCS=8 go test -bench=BenchmarkConv2d -benchtime=100x -run=^a | tee op-conv-bench.txt
 // benchstat op-conv-bench.txt
 func BenchmarkConv2dCUDA(b *testing.B) {
 	// var shape []int64 = []int64{64, 3, 224, 224}
 	var shape []int64 = []int64{32, 64, 64, 64}
 	device := gotch.CudaIfAvailable()
 	x := ts.MustRandn(shape, gotch.Float, device)
 	// kDims := []int64{1, 3, 3, 3}
 	kDims := []int64{1, 64, 3, 3}
 	kernelTemplate := []int64{
 		1, 1, 1,
 		1, -8, 1,
 		1, 1, 1,
 	}
 	var kernelData []int64
 	for i := 0; i < int(kDims[0]*kDims[1]); i++ {
 		kernelData = append(kernelData, kernelTemplate...)
 	}
 	weight := ts.MustOfSlice(kernelData).MustView(kDims, true).MustTotype(gotch.Float, true).MustTo(device, true)
 	stride := []int64{1, 1}
 	padding := []int64{0, 0}
 	dilation := []int64{1, 1}
 	for i := 0; i < b.N; i++ {
 		out, err := ts.Conv2d(x, weight, ts.NewTensor(), stride, padding, dilation, 1)
 		if err != nil {
 			panic(err)
 		}
 		out.MustDrop()
 	}
 }