283 lines
8.2 KiB
Go
283 lines
8.2 KiB
Go
package vision
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
nn "github.com/sugarme/gotch/nn"
|
|
ts "github.com/sugarme/gotch/tensor"
|
|
)
|
|
|
|
// ResNet implementation.
|
|
//
|
|
// See "Deep Residual Learning for Image Recognition" He et al. 2015
|
|
// https://arxiv.org/abs/1512.03385
|
|
|
|
func conv2d(path *nn.Path, cIn, cOut, ksize, padding, stride int64) *nn.Conv2D {
|
|
config := nn.DefaultConv2DConfig()
|
|
config.Stride = []int64{stride, stride}
|
|
config.Padding = []int64{padding, padding}
|
|
config.Bias = false
|
|
|
|
return nn.NewConv2D(path, cIn, cOut, ksize, config)
|
|
}
|
|
|
|
func downSample(path *nn.Path, cIn, cOut, stride int64) ts.ModuleT {
|
|
|
|
if stride != 1 || cIn != cOut {
|
|
seq := nn.SeqT()
|
|
seq.Add(conv2d(path.Sub("0"), cIn, cOut, 1, 0, stride))
|
|
seq.Add(nn.BatchNorm2D(path.Sub("1"), cOut, nn.DefaultBatchNormConfig()))
|
|
|
|
return seq
|
|
}
|
|
|
|
return nn.SeqT()
|
|
}
|
|
|
|
func basicBlock(path *nn.Path, cIn, cOut, stride int64) ts.ModuleT {
|
|
|
|
conv1 := conv2d(path.Sub("conv1"), cIn, cOut, 3, 1, stride)
|
|
bn1 := nn.BatchNorm2D(path.Sub("bn1"), cOut, nn.DefaultBatchNormConfig())
|
|
conv2 := conv2d(path.Sub("conv2"), cOut, cOut, 3, 1, 1)
|
|
bn2 := nn.BatchNorm2D(path.Sub("bn2"), cOut, nn.DefaultBatchNormConfig())
|
|
downsample := downSample(path.Sub("downsample"), cIn, cOut, stride)
|
|
|
|
return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor {
|
|
c1 := xs.Apply(conv1)
|
|
bn1 := c1.ApplyT(bn1, train)
|
|
c1.MustDrop()
|
|
relu := bn1.MustRelu(true)
|
|
c2 := relu.Apply(conv2)
|
|
relu.MustDrop()
|
|
bn2 := c2.ApplyT(bn2, train)
|
|
c2.MustDrop()
|
|
|
|
dsl := xs.ApplyT(downsample, train)
|
|
dslAdd := dsl.MustAdd(bn2, true)
|
|
res := dslAdd.MustRelu(true)
|
|
|
|
return res
|
|
})
|
|
}
|
|
|
|
func basicLayer(path *nn.Path, cIn, cOut, stride, cnt int64) ts.ModuleT {
|
|
|
|
layer := nn.SeqT()
|
|
layer.Add(basicBlock(path.Sub("0"), cIn, cOut, stride))
|
|
|
|
for blockIndex := 1; blockIndex < int(cnt); blockIndex++ {
|
|
layer.Add(basicBlock(path.Sub(fmt.Sprint(blockIndex)), cOut, cOut, 1))
|
|
}
|
|
|
|
return layer
|
|
}
|
|
|
|
func resnet(path *nn.Path, nclasses int64, c1, c2, c3, c4 int64) nn.FuncT {
|
|
conv1 := conv2d(path.Sub("conv1"), 3, 64, 7, 3, 2)
|
|
bn1 := nn.BatchNorm2D(path.Sub("bn1"), 64, nn.DefaultBatchNormConfig())
|
|
layer1 := basicLayer(path.Sub("layer1"), 64, 64, 1, c1)
|
|
layer2 := basicLayer(path.Sub("layer2"), 64, 128, 2, c2)
|
|
layer3 := basicLayer(path.Sub("layer3"), 128, 256, 2, c3)
|
|
layer4 := basicLayer(path.Sub("layer4"), 256, 512, 2, c4)
|
|
|
|
if nclasses > 0 {
|
|
// With final layer
|
|
linearConfig := nn.DefaultLinearConfig()
|
|
fc := nn.NewLinear(path.Sub("fc"), 512, nclasses, linearConfig)
|
|
|
|
return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor {
|
|
c1 := xs.Apply(conv1)
|
|
xs.MustDrop()
|
|
bn1 := c1.ApplyT(bn1, train)
|
|
c1.MustDrop()
|
|
relu := bn1.MustRelu(true)
|
|
maxpool := relu.MustMaxPool2d([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true)
|
|
l1 := maxpool.ApplyT(layer1, train)
|
|
l2 := l1.ApplyT(layer2, train)
|
|
l1.MustDrop()
|
|
l3 := l2.ApplyT(layer3, train)
|
|
l2.MustDrop()
|
|
l4 := l3.ApplyT(layer4, train)
|
|
l3.MustDrop()
|
|
avgpool := l4.MustAdaptiveAvgPool2d([]int64{1, 1}, false)
|
|
l4.MustDrop()
|
|
fv := avgpool.FlatView()
|
|
avgpool.MustDrop()
|
|
|
|
retVal := fv.ApplyOpt(ts.WithModule(fc))
|
|
fv.MustDrop()
|
|
return retVal
|
|
})
|
|
|
|
} else {
|
|
// No final layer
|
|
return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor {
|
|
c1 := xs.Apply(conv1)
|
|
xs.MustDrop()
|
|
bn1 := c1.ApplyT(bn1, train)
|
|
c1.MustDrop()
|
|
relu := bn1.MustRelu(true)
|
|
maxpool := relu.MustMaxPool2d([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true)
|
|
l1 := maxpool.ApplyT(layer1, train)
|
|
maxpool.MustDrop()
|
|
l2 := l1.ApplyT(layer2, train)
|
|
l1.MustDrop()
|
|
l3 := l2.ApplyT(layer3, train)
|
|
l2.MustDrop()
|
|
l4 := l3.ApplyT(layer4, train)
|
|
l3.MustDrop()
|
|
avgpool := l4.MustAdaptiveAvgPool2d([]int64{1, 1}, false)
|
|
l4.MustDrop()
|
|
retVal := avgpool.FlatView()
|
|
avgpool.MustDrop()
|
|
|
|
return retVal
|
|
})
|
|
}
|
|
}
|
|
|
|
// Creates a ResNet-18 model.
|
|
func ResNet18(path *nn.Path, numClasses int64) nn.FuncT {
|
|
return resnet(path, numClasses, 2, 2, 2, 2)
|
|
}
|
|
|
|
func ResNet18NoFinalLayer(path *nn.Path) nn.FuncT {
|
|
return resnet(path, 0, 2, 2, 2, 2)
|
|
}
|
|
|
|
func ResNet34(path *nn.Path, numClasses int64) nn.FuncT {
|
|
return resnet(path, numClasses, 3, 4, 6, 3)
|
|
}
|
|
|
|
func ResNet34NoFinalLayer(path *nn.Path) nn.FuncT {
|
|
return resnet(path, 0, 3, 4, 6, 3)
|
|
}
|
|
|
|
// Bottleneck versions for ResNet 50, 101, and 152.
|
|
func bottleneckBlock(path *nn.Path, cIn, cOut, stride, e int64) ts.ModuleT {
|
|
|
|
eDim := e * cOut
|
|
conv1 := conv2d(path.Sub("conv1"), cIn, cOut, 1, 0, 1)
|
|
bn1 := nn.BatchNorm2D(path.Sub("bn1"), cOut, nn.DefaultBatchNormConfig())
|
|
conv2 := conv2d(path.Sub("conv2"), cOut, cOut, 3, 1, stride)
|
|
bn2 := nn.BatchNorm2D(path.Sub("bn2"), cOut, nn.DefaultBatchNormConfig())
|
|
conv3 := conv2d(path.Sub("conv3"), cOut, eDim, 1, 0, 1)
|
|
bn3 := nn.BatchNorm2D(path.Sub("bn3"), eDim, nn.DefaultBatchNormConfig())
|
|
downsample := downSample(path.Sub("downsample"), cIn, eDim, stride)
|
|
|
|
return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor {
|
|
c1 := xs.Apply(conv1)
|
|
bn1 := c1.ApplyT(bn1, train)
|
|
c1.MustDrop()
|
|
relu1 := bn1.MustRelu(true)
|
|
c2 := relu1.Apply(conv2)
|
|
relu1.MustDrop()
|
|
bn2 := c2.ApplyT(bn2, train)
|
|
relu2 := bn2.MustRelu(true)
|
|
c3 := relu2.Apply(conv3)
|
|
relu2.MustDrop()
|
|
bn3 := c3.ApplyT(bn3, train)
|
|
|
|
dsl := xs.ApplyT(downsample, train)
|
|
add := dsl.MustAdd(bn3, true)
|
|
bn3.MustDrop()
|
|
res := add.MustRelu(true)
|
|
return res
|
|
})
|
|
}
|
|
|
|
func bottleneckLayer(path *nn.Path, cIn, cOut, stride, cnt int64) ts.ModuleT {
|
|
|
|
layer := nn.SeqT()
|
|
layer.Add(bottleneckBlock(path.Sub("0"), cIn, cOut, stride, 4))
|
|
for blockIndex := 1; blockIndex < int(cnt); blockIndex++ {
|
|
layer.Add(bottleneckBlock(path.Sub(fmt.Sprint(blockIndex)), (cOut * 4), cOut, 1, 4))
|
|
}
|
|
|
|
return layer
|
|
}
|
|
|
|
func bottleneckResnet(path *nn.Path, nclasses int64, c1, c2, c3, c4 int64) ts.ModuleT {
|
|
conv1 := conv2d(path.Sub("conv1"), 3, 64, 7, 3, 2)
|
|
bn1 := nn.BatchNorm2D(path.Sub("bn1"), 64, nn.DefaultBatchNormConfig())
|
|
layer1 := bottleneckLayer(path.Sub("layer1"), 64, 64, 1, c1)
|
|
layer2 := bottleneckLayer(path.Sub("layer2"), 4*64, 128, 2, c2)
|
|
layer3 := bottleneckLayer(path.Sub("layer3"), 4*128, 256, 2, c3)
|
|
layer4 := bottleneckLayer(path.Sub("layer4"), 4*256, 512, 2, c4)
|
|
|
|
if nclasses > 0 {
|
|
fc := nn.NewLinear(path.Sub("fc"), 4*512, nclasses, nn.DefaultLinearConfig())
|
|
|
|
return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor {
|
|
c1 := xs.Apply(conv1)
|
|
xs.MustDrop()
|
|
bn1 := c1.ApplyT(bn1, train)
|
|
c1.MustDrop()
|
|
relu := bn1.MustRelu(true)
|
|
maxpool := relu.MustMaxPool2d([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true)
|
|
l1 := maxpool.ApplyT(layer1, train)
|
|
l2 := l1.ApplyT(layer2, train)
|
|
l1.MustDrop()
|
|
l3 := l2.ApplyT(layer3, train)
|
|
l2.MustDrop()
|
|
l4 := l3.ApplyT(layer4, train)
|
|
l3.MustDrop()
|
|
avgpool := l4.MustAdaptiveAvgPool2d([]int64{1, 1}, false)
|
|
l4.MustDrop()
|
|
fv := avgpool.FlatView()
|
|
avgpool.MustDrop()
|
|
|
|
retVal := fv.ApplyOpt(ts.WithModule(fc))
|
|
fv.MustDrop()
|
|
return retVal
|
|
})
|
|
} else {
|
|
return nn.NewFuncT(func(xs *ts.Tensor, train bool) *ts.Tensor {
|
|
c1 := xs.Apply(conv1)
|
|
xs.MustDrop()
|
|
bn1 := c1.ApplyT(bn1, train)
|
|
c1.MustDrop()
|
|
relu := bn1.MustRelu(true)
|
|
maxpool := relu.MustMaxPool2d([]int64{3, 3}, []int64{2, 2}, []int64{1, 1}, []int64{1, 1}, false, true)
|
|
l1 := maxpool.ApplyT(layer1, train)
|
|
maxpool.MustDrop()
|
|
l2 := l1.ApplyT(layer2, train)
|
|
l1.MustDrop()
|
|
l3 := l2.ApplyT(layer3, train)
|
|
l2.MustDrop()
|
|
l4 := l3.ApplyT(layer4, train)
|
|
l3.MustDrop()
|
|
avgpool := l4.MustAdaptiveAvgPool2d([]int64{1, 1}, false)
|
|
l4.MustDrop()
|
|
retVal := avgpool.FlatView()
|
|
avgpool.MustDrop()
|
|
|
|
return retVal
|
|
})
|
|
}
|
|
}
|
|
|
|
func ResNet50(path *nn.Path, numClasses int64) ts.ModuleT {
|
|
return bottleneckResnet(path, numClasses, 3, 4, 6, 3)
|
|
}
|
|
|
|
func ResNet50NoFinalLayer(path *nn.Path) ts.ModuleT {
|
|
return bottleneckResnet(path, 0, 3, 4, 6, 3)
|
|
}
|
|
|
|
func ResNet101(path *nn.Path, numClasses int64) ts.ModuleT {
|
|
return bottleneckResnet(path, numClasses, 3, 4, 23, 3)
|
|
}
|
|
|
|
func ResNet101NoFinalLayer(path *nn.Path) ts.ModuleT {
|
|
return bottleneckResnet(path, 0, 3, 4, 23, 3)
|
|
}
|
|
|
|
func ResNet152(path *nn.Path, numClasses int64) ts.ModuleT {
|
|
return bottleneckResnet(path, numClasses, 3, 8, 36, 3)
|
|
}
|
|
|
|
func ResNet150NoFinalLayer(path *nn.Path) ts.ModuleT {
|
|
return bottleneckResnet(path, 0, 3, 8, 36, 3)
|
|
}
|