example/yolo - added drawing label and added some example images

This commit is contained in:
sugarme 2020-11-01 13:15:33 +11:00
parent a6d09580aa
commit ccfea76483
16 changed files with 579 additions and 9 deletions

1
.gitignore vendored
View File

@ -13,7 +13,6 @@
*.json
*.pt
*.ot
*.jpg
target/
_build/

BIN
example/jit/emu.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 367 KiB

BIN
example/jit/image.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 138 KiB

BIN
example/jit/kangaroo.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 330 KiB

BIN
example/jit/koala.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

BIN
example/jit/pig.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 274 KiB

BIN
example/jit/wombat.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 MiB

16
example/yolo/README.md Normal file
View File

@ -0,0 +1,16 @@
# YOLO model
This is an example of implementing YOLO v3 model.
The model weights can be [download here](https://drive.google.com/file/d/16eO9o4rclD929LHweCPW_-7HjKfNKVnA/view?usp=sharing).
Here is an example of image inference using Yolo v3 model.
## Original Image
![Bondi Beach - Original](bondi.jpg "Bondi Beach")
## Yolo v3 inference
![Bondi Beach - Yolo inference](yolo_bondi.jpg "Bondi Beach - YOLO v3")

BIN
example/yolo/bondi.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 224 KiB

147
example/yolo/draw.go Normal file
View File

@ -0,0 +1,147 @@
package main
import (
"image"
"image/color"
// "image/jpeg"
"io/ioutil"
"flag"
"log"
"os"
"path/filepath"
"golang.org/x/image/draw"
"golang.org/x/image/font"
"github.com/sugarme/gotch/example/yolo/freetype"
ts "github.com/sugarme/gotch/tensor"
)
var (
dpi = flag.Float64("dpi", 72, "screen resolution in Dots Per Inch")
fontfile = flag.String("fontfile", "luxisr.ttf", "filename of the ttf font")
hinting = flag.String("hinting", "none", "none | full")
size = flag.Float64("size", 12, "font size in points")
spacing = flag.Float64("spacing", 1.2, "line spacing (e.g. 2 means double spaced)")
wonb = flag.Bool("whiteonblack", false, "white text on a black background")
bound = flag.Bool("bound", true, "generates image with minimum size for the text")
)
func loadImage(file string) (retVal image.Image, err error) {
imagePath, err := filepath.Abs(file)
if err != nil {
return nil, err
}
f, err := os.Open(imagePath)
if err != nil {
return nil, err
}
img, _, err := image.Decode(f)
return img, err
}
func textToImageTs(text []string) *ts.Tensor {
offset := 0
flag.Parse()
// Read font data
fontBytes, err := ioutil.ReadFile(*fontfile)
if err != nil {
log.Println(err)
return nil
}
f, err := freetype.ParseFont(fontBytes)
if err != nil {
log.Println(err)
return nil
}
var width, height int
// Initialize the context.
c := freetype.NewContext()
c.SetDPI(*dpi)
c.SetFont(f)
c.SetFontSize(*size)
switch *hinting {
default:
c.SetHinting(font.HintingNone)
case "full":
c.SetHinting(font.HintingFull)
}
// Measure the text to calculate the minimum size of the image
if *bound {
pt := freetype.Pt(offset, offset+int(c.PointToFixed(*size)>>6))
for _, s := range text {
ptr, err := c.MeasureString(s, pt)
if err != nil {
log.Println(err)
return nil
}
pt.Y += c.PointToFixed(*size * *spacing)
x := int(ptr.X >> 6)
if x > width {
width = x
}
}
width += offset
height = int(pt.Y)>>6 - int(c.PointToFixed(*size)>>6)
// Use default size for the image
} else {
width = 640
height = 480
}
// Creates image with the specified size
fg, bg := image.Black, image.White
ruler := color.RGBA{0xdd, 0xdd, 0xdd, 0xff}
if *wonb {
fg, bg = image.White, image.Black
ruler = color.RGBA{0x22, 0x22, 0x22, 0xff}
}
rgba := image.NewRGBA(image.Rect(0, 0, width, height))
draw.Draw(rgba, rgba.Bounds(), bg, image.ZP, draw.Src)
c.SetClip(rgba.Bounds())
c.SetDst(rgba)
c.SetSrc(fg)
// Draw the guidelines
for i := 0; i < 200; i++ {
rgba.Set(offset, offset+i, ruler)
rgba.Set(offset+i, offset, ruler)
}
// Draw the text.
pt := freetype.Pt(offset, offset+int(c.PointToFixed(*size)>>6))
for _, s := range text {
_, err = c.DrawString(s, pt)
if err != nil {
log.Println(err)
return nil
}
pt.Y += c.PointToFixed(*size * *spacing)
}
var rgb []float64
var r, g, b []float64
for i := 0; i < len(rgba.Pix); i += 4 {
start := i
r = append(r, float64(rgba.Pix[start])/255.0)
g = append(g, float64(rgba.Pix[start+1])/255.0)
b = append(b, float64(rgba.Pix[start+2])/255.0)
}
rgb = append(rgb, r...)
rgb = append(rgb, g...)
rgb = append(rgb, b...)
w := int64(rgba.Rect.Dx())
h := int64(rgba.Rect.Dy())
return ts.MustOfSlice(rgb).MustView([]int64{3, h, w}, false)
}

View File

@ -0,0 +1,366 @@
// Copyright 2010 The Freetype-Go Authors. All rights reserved.
// Use of this source code is governed by your choice of either the
// FreeType License or the GNU General Public License version 2 (or
// any later version), both of which can be found in the LICENSE file.
// The freetype package provides a convenient API to draw text onto an image.
// Use the freetype/raster and freetype/truetype packages for lower level
// control over rasterization and TrueType parsing.
package freetype // import "github.com/golang/freetype"
import (
"errors"
"image"
"image/draw"
"github.com/golang/freetype/raster"
"github.com/golang/freetype/truetype"
"golang.org/x/image/font"
"golang.org/x/image/math/fixed"
)
// These constants determine the size of the glyph cache. The cache is keyed
// primarily by the glyph index modulo nGlyphs, and secondarily by sub-pixel
// position for the mask image. Sub-pixel positions are quantized to
// nXFractions possible values in both the x and y directions.
const (
nGlyphs = 256
nXFractions = 4
nYFractions = 1
)
// An entry in the glyph cache is keyed explicitly by the glyph index and
// implicitly by the quantized x and y fractional offset. It maps to a mask
// image and an offset.
type cacheEntry struct {
valid bool
glyph truetype.Index
advanceWidth fixed.Int26_6
mask *image.Alpha
offset image.Point
}
// ParseFont just calls the Parse function from the freetype/truetype package.
// It is provided here so that code that imports this package doesn't need
// to also include the freetype/truetype package.
func ParseFont(b []byte) (*truetype.Font, error) {
return truetype.Parse(b)
}
// Pt converts from a co-ordinate pair measured in pixels to a fixed.Point26_6
// co-ordinate pair measured in fixed.Int26_6 units.
func Pt(x, y int) fixed.Point26_6 {
return fixed.Point26_6{
X: fixed.Int26_6(x << 6),
Y: fixed.Int26_6(y << 6),
}
}
// A Context holds the state for drawing text in a given font and size.
type Context struct {
r *raster.Rasterizer
f *truetype.Font
glyphBuf truetype.GlyphBuf
// clip is the clip rectangle for drawing.
clip image.Rectangle
// dst and src are the destination and source images for drawing.
dst draw.Image
src image.Image
// fontSize and dpi are used to calculate scale. scale is the number of
// 26.6 fixed point units in 1 em. hinting is the hinting policy.
fontSize, dpi float64
scale fixed.Int26_6
hinting font.Hinting
// cache is the glyph cache.
cache [nGlyphs * nXFractions * nYFractions]cacheEntry
}
// PointToFixed converts the given number of points (as in "a 12 point font")
// into a 26.6 fixed point number of pixels.
func (c *Context) PointToFixed(x float64) fixed.Int26_6 {
return fixed.Int26_6(x * float64(c.dpi) * (64.0 / 72.0))
}
// drawContour draws the given closed contour with the given offset.
func (c *Context) drawContour(ps []truetype.Point, dx, dy fixed.Int26_6) {
if len(ps) == 0 {
return
}
// The low bit of each point's Flags value is whether the point is on the
// curve. Truetype fonts only have quadratic Bézier curves, not cubics.
// Thus, two consecutive off-curve points imply an on-curve point in the
// middle of those two.
//
// See http://chanae.walon.org/pub/ttf/ttf_glyphs.htm for more details.
// ps[0] is a truetype.Point measured in FUnits and positive Y going
// upwards. start is the same thing measured in fixed point units and
// positive Y going downwards, and offset by (dx, dy).
start := fixed.Point26_6{
X: dx + ps[0].X,
Y: dy - ps[0].Y,
}
others := []truetype.Point(nil)
if ps[0].Flags&0x01 != 0 {
others = ps[1:]
} else {
last := fixed.Point26_6{
X: dx + ps[len(ps)-1].X,
Y: dy - ps[len(ps)-1].Y,
}
if ps[len(ps)-1].Flags&0x01 != 0 {
start = last
others = ps[:len(ps)-1]
} else {
start = fixed.Point26_6{
X: (start.X + last.X) / 2,
Y: (start.Y + last.Y) / 2,
}
others = ps
}
}
c.r.Start(start)
q0, on0 := start, true
for _, p := range others {
q := fixed.Point26_6{
X: dx + p.X,
Y: dy - p.Y,
}
on := p.Flags&0x01 != 0
if on {
if on0 {
c.r.Add1(q)
} else {
c.r.Add2(q0, q)
}
} else {
if on0 {
// No-op.
} else {
mid := fixed.Point26_6{
X: (q0.X + q.X) / 2,
Y: (q0.Y + q.Y) / 2,
}
c.r.Add2(q0, mid)
}
}
q0, on0 = q, on
}
// Close the curve.
if on0 {
c.r.Add1(start)
} else {
c.r.Add2(q0, start)
}
}
// rasterize returns the advance width, glyph mask and integer-pixel offset
// to render the given glyph at the given sub-pixel offsets.
// The 26.6 fixed point arguments fx and fy must be in the range [0, 1).
func (c *Context) rasterize(glyph truetype.Index, fx, fy fixed.Int26_6) (
fixed.Int26_6, *image.Alpha, image.Point, error) {
if err := c.glyphBuf.Load(c.f, c.scale, glyph, c.hinting); err != nil {
return 0, nil, image.Point{}, err
}
// Calculate the integer-pixel bounds for the glyph.
xmin := int(fx+c.glyphBuf.Bounds.Min.X) >> 6
ymin := int(fy-c.glyphBuf.Bounds.Max.Y) >> 6
xmax := int(fx+c.glyphBuf.Bounds.Max.X+0x3f) >> 6
ymax := int(fy-c.glyphBuf.Bounds.Min.Y+0x3f) >> 6
if xmin > xmax || ymin > ymax {
return 0, nil, image.Point{}, errors.New("freetype: negative sized glyph")
}
// A TrueType's glyph's nodes can have negative co-ordinates, but the
// rasterizer clips anything left of x=0 or above y=0. xmin and ymin are
// the pixel offsets, based on the font's FUnit metrics, that let a
// negative co-ordinate in TrueType space be non-negative in rasterizer
// space. xmin and ymin are typically <= 0.
fx -= fixed.Int26_6(xmin << 6)
fy -= fixed.Int26_6(ymin << 6)
// Rasterize the glyph's vectors.
c.r.Clear()
e0 := 0
for _, e1 := range c.glyphBuf.Ends {
c.drawContour(c.glyphBuf.Points[e0:e1], fx, fy)
e0 = e1
}
a := image.NewAlpha(image.Rect(0, 0, xmax-xmin, ymax-ymin))
c.r.Rasterize(raster.NewAlphaSrcPainter(a))
return c.glyphBuf.AdvanceWidth, a, image.Point{xmin, ymin}, nil
}
// glyph returns the advance width, glyph mask and integer-pixel offset to
// render the given glyph at the given sub-pixel point. It is a cache for the
// rasterize method. Unlike rasterize, p's co-ordinates do not have to be in
// the range [0, 1).
func (c *Context) glyph(glyph truetype.Index, p fixed.Point26_6) (
fixed.Int26_6, *image.Alpha, image.Point, error) {
// Split p.X and p.Y into their integer and fractional parts.
ix, fx := int(p.X>>6), p.X&0x3f
iy, fy := int(p.Y>>6), p.Y&0x3f
// Calculate the index t into the cache array.
tg := int(glyph) % nGlyphs
tx := int(fx) / (64 / nXFractions)
ty := int(fy) / (64 / nYFractions)
t := ((tg*nXFractions)+tx)*nYFractions + ty
// Check for a cache hit.
if e := c.cache[t]; e.valid && e.glyph == glyph {
return e.advanceWidth, e.mask, e.offset.Add(image.Point{ix, iy}), nil
}
// Rasterize the glyph and put the result into the cache.
advanceWidth, mask, offset, err := c.rasterize(glyph, fx, fy)
if err != nil {
return 0, nil, image.Point{}, err
}
c.cache[t] = cacheEntry{true, glyph, advanceWidth, mask, offset}
return advanceWidth, mask, offset.Add(image.Point{ix, iy}), nil
}
// DrawString draws s at p and returns p advanced by the text extent. The text
// is placed so that the left edge of the em square of the first character of s
// and the baseline intersect at p. The majority of the affected pixels will be
// above and to the right of the point, but some may be below or to the left.
// For example, drawing a string that starts with a 'J' in an italic font may
// affect pixels below and left of the point.
//
// p is a fixed.Point26_6 and can therefore represent sub-pixel positions.
func (c *Context) DrawString(s string, p fixed.Point26_6) (fixed.Point26_6, error) {
if c.f == nil {
return fixed.Point26_6{}, errors.New("freetype: DrawString called with a nil font")
}
prev, hasPrev := truetype.Index(0), false
for _, rune := range s {
index := c.f.Index(rune)
if hasPrev {
kern := c.f.Kern(c.scale, prev, index)
if c.hinting != font.HintingNone {
kern = (kern + 32) &^ 63
}
p.X += kern
}
advanceWidth, mask, offset, err := c.glyph(index, p)
if err != nil {
return fixed.Point26_6{}, err
}
p.X += advanceWidth
glyphRect := mask.Bounds().Add(offset)
dr := c.clip.Intersect(glyphRect)
if !dr.Empty() {
mp := image.Point{0, dr.Min.Y - glyphRect.Min.Y}
draw.DrawMask(c.dst, dr, c.src, image.ZP, mask, mp, draw.Over)
}
prev, hasPrev = index, true
}
return p, nil
}
// MeasureString is identical to DrawString but only measure the text.
func (c *Context) MeasureString(s string, p fixed.Point26_6) (fixed.Point26_6, error) {
if c.f == nil {
return fixed.Point26_6{}, errors.New("freetype: MeasureString called with a nil font")
}
prev, hasPrev := truetype.Index(0), false
for _, rune := range s {
index := c.f.Index(rune)
if hasPrev {
kern := c.f.Kern(c.scale, prev, index)
if c.hinting != font.HintingNone {
kern = (kern + 32) &^ 63
}
p.X += kern
}
advanceWidth, _, _, err := c.glyph(index, p)
if err != nil {
return fixed.Point26_6{}, err
}
p.X += advanceWidth
prev, hasPrev = index, true
}
return p, nil
}
// recalc recalculates scale and bounds values from the font size, screen
// resolution and font metrics, and invalidates the glyph cache.
func (c *Context) recalc() {
c.scale = fixed.Int26_6(c.fontSize * c.dpi * (64.0 / 72.0))
if c.f == nil {
c.r.SetBounds(0, 0)
} else {
// Set the rasterizer's bounds to be big enough to handle the largest glyph.
b := c.f.Bounds(c.scale)
xmin := +int(b.Min.X) >> 6
ymin := -int(b.Max.Y) >> 6
xmax := +int(b.Max.X+63) >> 6
ymax := -int(b.Min.Y-63) >> 6
c.r.SetBounds(xmax-xmin, ymax-ymin)
}
for i := range c.cache {
c.cache[i] = cacheEntry{}
}
}
// SetDPI sets the screen resolution in dots per inch.
func (c *Context) SetDPI(dpi float64) {
if c.dpi == dpi {
return
}
c.dpi = dpi
c.recalc()
}
// SetFont sets the font used to draw text.
func (c *Context) SetFont(f *truetype.Font) {
if c.f == f {
return
}
c.f = f
c.recalc()
}
// SetFontSize sets the font size in points (as in "a 12 point font").
func (c *Context) SetFontSize(fontSize float64) {
if c.fontSize == fontSize {
return
}
c.fontSize = fontSize
c.recalc()
}
// SetHinting sets the hinting policy.
func (c *Context) SetHinting(hinting font.Hinting) {
c.hinting = hinting
for i := range c.cache {
c.cache[i] = cacheEntry{}
}
}
// SetDst sets the destination image for draw operations.
func (c *Context) SetDst(dst draw.Image) {
c.dst = dst
}
// SetSrc sets the source image for draw operations. This is typically an
// image.Uniform.
func (c *Context) SetSrc(src image.Image) {
c.src = src
}
// SetClip sets the clip rectangle for drawing.
func (c *Context) SetClip(clip image.Rectangle) {
c.clip = clip
}
// TODO(nigeltao): implement Context.SetGamma.
// NewContext creates a new Context.
func NewContext() *Context {
return &Context{
r: raster.NewRasterizer(0, 0),
fontSize: 12,
dpi: 72,
scale: 12 << 6,
}
}

BIN
example/yolo/luxisr.ttf Normal file

Binary file not shown.

View File

@ -21,8 +21,8 @@ const (
)
var (
model string
image string
model string
imageFile string
)
type Bbox struct {
@ -71,6 +71,38 @@ func drawRect(t *ts.Tensor, x1, x2, y1, y2 int64) {
color.MustDrop()
}
func drawLabel(t *ts.Tensor, text []string, x, y int64) {
device, err := t.Device()
if err != nil {
log.Fatal(err)
}
label := textToImageTs(text).MustTo(device, true)
labelSize := label.MustSize()
height := labelSize[1]
width := labelSize[2]
imageSize := t.MustSize()
lenY := height
if lenY > imageSize[1] {
lenY = imageSize[1] - y
}
lenX := width
if lenX > imageSize[2] {
lenX = imageSize[2] - x
}
// NOTE: `narrow` will create a tensor (view) that share same storage with
// original one.
tmp1 := t.MustNarrow(2, x, lenX, false)
tmp2 := tmp1.MustNarrow(1, y, lenY, true)
tmp2.Copy_(label)
tmp2.MustDrop()
label.MustDrop()
}
func report(pred *ts.Tensor, img *ts.Tensor, w int64, h int64) *ts.Tensor {
size2, err := pred.Size2()
if err != nil {
@ -176,6 +208,9 @@ func report(pred *ts.Tensor, img *ts.Tensor, w int64, h int64) *ts.Tensor {
drawRect(image, xmin, xmax, max(ymin, ymax-2), ymax)
drawRect(image, xmin, min(xmax, xmin+2), ymin, ymax)
drawRect(image, max(xmin, xmax-2), xmax, ymin, ymax)
label := fmt.Sprintf("%v; %.3f\n", CocoClasses[classIndex], b.confidence)
drawLabel(image, []string{label}, xmin, ymin-15)
}
}
@ -187,7 +222,7 @@ func report(pred *ts.Tensor, img *ts.Tensor, w int64, h int64) *ts.Tensor {
func init() {
flag.StringVar(&model, "model", "../../data/yolo/yolo-v3.pt", "Yolo model weights file")
flag.StringVar(&image, "image", "../../data/yolo/bondi.jpg", "image file to infer")
flag.StringVar(&imageFile, "image", "../../data/yolo/bondi.jpg", "image file to infer")
}
func main() {
@ -203,7 +238,7 @@ func main() {
log.Fatal(err)
}
imagePath, err := filepath.Abs(image)
imagePath, err := filepath.Abs(imageFile)
if err != nil {
log.Fatal(err)
}
@ -256,10 +291,6 @@ func main() {
if err != nil {
log.Fatal(err)
}
// TODO: write label/confidence val next to bouding boxes.
// Naive way is write 'write text on image' rather than on tensor.
// See this: https://stackoverflow.com/questions/38299930
}
func max(v1, v2 int64) (retVal int64) {

BIN
example/yolo/yolo_bondi.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 297 KiB

5
go.mod
View File

@ -1,3 +1,8 @@
module github.com/sugarme/gotch
go 1.14
require (
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5
)

6
go.sum
View File

@ -0,0 +1,6 @@
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/sugarme/playgo v0.0.0-20200730185408-03b868cebe81 h1:s43waOvGVYyjw8i+Ll2Qb/ASt+etXG7LhWetEGTLjbc=
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5 h1:QelT11PB4FXiDEXucrfNckHoFxwt8USGY1ajP1ZF5lM=
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=