example/yolo - added drawing label and added some example images
1
.gitignore
vendored
|
@ -13,7 +13,6 @@
|
||||||
*.json
|
*.json
|
||||||
*.pt
|
*.pt
|
||||||
*.ot
|
*.ot
|
||||||
*.jpg
|
|
||||||
|
|
||||||
target/
|
target/
|
||||||
_build/
|
_build/
|
||||||
|
|
BIN
example/jit/emu.jpg
Normal file
After Width: | Height: | Size: 367 KiB |
BIN
example/jit/image.jpg
Normal file
After Width: | Height: | Size: 138 KiB |
BIN
example/jit/kangaroo.jpg
Normal file
After Width: | Height: | Size: 330 KiB |
BIN
example/jit/koala.jpg
Normal file
After Width: | Height: | Size: 124 KiB |
BIN
example/jit/pig.jpg
Normal file
After Width: | Height: | Size: 274 KiB |
BIN
example/jit/wombat.jpg
Normal file
After Width: | Height: | Size: 1.8 MiB |
16
example/yolo/README.md
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# YOLO model
|
||||||
|
|
||||||
|
This is an example of implementing YOLO v3 model.
|
||||||
|
|
||||||
|
The model weights can be [download here](https://drive.google.com/file/d/16eO9o4rclD929LHweCPW_-7HjKfNKVnA/view?usp=sharing).
|
||||||
|
|
||||||
|
Here is an example of image inference using Yolo v3 model.
|
||||||
|
|
||||||
|
## Original Image
|
||||||
|
|
||||||
|
![Bondi Beach - Original](bondi.jpg "Bondi Beach")
|
||||||
|
|
||||||
|
## Yolo v3 inference
|
||||||
|
|
||||||
|
![Bondi Beach - Yolo inference](yolo_bondi.jpg "Bondi Beach - YOLO v3")
|
||||||
|
|
BIN
example/yolo/bondi.jpg
Normal file
After Width: | Height: | Size: 224 KiB |
147
example/yolo/draw.go
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"image"
|
||||||
|
"image/color"
|
||||||
|
// "image/jpeg"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"flag"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"golang.org/x/image/draw"
|
||||||
|
"golang.org/x/image/font"
|
||||||
|
|
||||||
|
"github.com/sugarme/gotch/example/yolo/freetype"
|
||||||
|
ts "github.com/sugarme/gotch/tensor"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
dpi = flag.Float64("dpi", 72, "screen resolution in Dots Per Inch")
|
||||||
|
fontfile = flag.String("fontfile", "luxisr.ttf", "filename of the ttf font")
|
||||||
|
hinting = flag.String("hinting", "none", "none | full")
|
||||||
|
size = flag.Float64("size", 12, "font size in points")
|
||||||
|
spacing = flag.Float64("spacing", 1.2, "line spacing (e.g. 2 means double spaced)")
|
||||||
|
wonb = flag.Bool("whiteonblack", false, "white text on a black background")
|
||||||
|
bound = flag.Bool("bound", true, "generates image with minimum size for the text")
|
||||||
|
)
|
||||||
|
|
||||||
|
func loadImage(file string) (retVal image.Image, err error) {
|
||||||
|
imagePath, err := filepath.Abs(file)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
f, err := os.Open(imagePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
img, _, err := image.Decode(f)
|
||||||
|
return img, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func textToImageTs(text []string) *ts.Tensor {
|
||||||
|
offset := 0
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Read font data
|
||||||
|
fontBytes, err := ioutil.ReadFile(*fontfile)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := freetype.ParseFont(fontBytes)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var width, height int
|
||||||
|
// Initialize the context.
|
||||||
|
c := freetype.NewContext()
|
||||||
|
c.SetDPI(*dpi)
|
||||||
|
c.SetFont(f)
|
||||||
|
c.SetFontSize(*size)
|
||||||
|
|
||||||
|
switch *hinting {
|
||||||
|
default:
|
||||||
|
c.SetHinting(font.HintingNone)
|
||||||
|
case "full":
|
||||||
|
c.SetHinting(font.HintingFull)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Measure the text to calculate the minimum size of the image
|
||||||
|
if *bound {
|
||||||
|
pt := freetype.Pt(offset, offset+int(c.PointToFixed(*size)>>6))
|
||||||
|
for _, s := range text {
|
||||||
|
ptr, err := c.MeasureString(s, pt)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
pt.Y += c.PointToFixed(*size * *spacing)
|
||||||
|
x := int(ptr.X >> 6)
|
||||||
|
if x > width {
|
||||||
|
width = x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
width += offset
|
||||||
|
height = int(pt.Y)>>6 - int(c.PointToFixed(*size)>>6)
|
||||||
|
// Use default size for the image
|
||||||
|
} else {
|
||||||
|
width = 640
|
||||||
|
height = 480
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates image with the specified size
|
||||||
|
fg, bg := image.Black, image.White
|
||||||
|
ruler := color.RGBA{0xdd, 0xdd, 0xdd, 0xff}
|
||||||
|
if *wonb {
|
||||||
|
fg, bg = image.White, image.Black
|
||||||
|
ruler = color.RGBA{0x22, 0x22, 0x22, 0xff}
|
||||||
|
}
|
||||||
|
rgba := image.NewRGBA(image.Rect(0, 0, width, height))
|
||||||
|
draw.Draw(rgba, rgba.Bounds(), bg, image.ZP, draw.Src)
|
||||||
|
c.SetClip(rgba.Bounds())
|
||||||
|
c.SetDst(rgba)
|
||||||
|
c.SetSrc(fg)
|
||||||
|
|
||||||
|
// Draw the guidelines
|
||||||
|
for i := 0; i < 200; i++ {
|
||||||
|
rgba.Set(offset, offset+i, ruler)
|
||||||
|
rgba.Set(offset+i, offset, ruler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw the text.
|
||||||
|
pt := freetype.Pt(offset, offset+int(c.PointToFixed(*size)>>6))
|
||||||
|
for _, s := range text {
|
||||||
|
_, err = c.DrawString(s, pt)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
pt.Y += c.PointToFixed(*size * *spacing)
|
||||||
|
}
|
||||||
|
|
||||||
|
var rgb []float64
|
||||||
|
var r, g, b []float64
|
||||||
|
for i := 0; i < len(rgba.Pix); i += 4 {
|
||||||
|
start := i
|
||||||
|
r = append(r, float64(rgba.Pix[start])/255.0)
|
||||||
|
g = append(g, float64(rgba.Pix[start+1])/255.0)
|
||||||
|
b = append(b, float64(rgba.Pix[start+2])/255.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
rgb = append(rgb, r...)
|
||||||
|
rgb = append(rgb, g...)
|
||||||
|
rgb = append(rgb, b...)
|
||||||
|
|
||||||
|
w := int64(rgba.Rect.Dx())
|
||||||
|
h := int64(rgba.Rect.Dy())
|
||||||
|
|
||||||
|
return ts.MustOfSlice(rgb).MustView([]int64{3, h, w}, false)
|
||||||
|
}
|
366
example/yolo/freetype/freetype.go
Normal file
|
@ -0,0 +1,366 @@
|
||||||
|
// Copyright 2010 The Freetype-Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by your choice of either the
|
||||||
|
// FreeType License or the GNU General Public License version 2 (or
|
||||||
|
// any later version), both of which can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// The freetype package provides a convenient API to draw text onto an image.
|
||||||
|
// Use the freetype/raster and freetype/truetype packages for lower level
|
||||||
|
// control over rasterization and TrueType parsing.
|
||||||
|
package freetype // import "github.com/golang/freetype"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"image"
|
||||||
|
"image/draw"
|
||||||
|
|
||||||
|
"github.com/golang/freetype/raster"
|
||||||
|
"github.com/golang/freetype/truetype"
|
||||||
|
"golang.org/x/image/font"
|
||||||
|
"golang.org/x/image/math/fixed"
|
||||||
|
)
|
||||||
|
|
||||||
|
// These constants determine the size of the glyph cache. The cache is keyed
|
||||||
|
// primarily by the glyph index modulo nGlyphs, and secondarily by sub-pixel
|
||||||
|
// position for the mask image. Sub-pixel positions are quantized to
|
||||||
|
// nXFractions possible values in both the x and y directions.
|
||||||
|
const (
|
||||||
|
nGlyphs = 256
|
||||||
|
nXFractions = 4
|
||||||
|
nYFractions = 1
|
||||||
|
)
|
||||||
|
|
||||||
|
// An entry in the glyph cache is keyed explicitly by the glyph index and
|
||||||
|
// implicitly by the quantized x and y fractional offset. It maps to a mask
|
||||||
|
// image and an offset.
|
||||||
|
type cacheEntry struct {
|
||||||
|
valid bool
|
||||||
|
glyph truetype.Index
|
||||||
|
advanceWidth fixed.Int26_6
|
||||||
|
mask *image.Alpha
|
||||||
|
offset image.Point
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseFont just calls the Parse function from the freetype/truetype package.
|
||||||
|
// It is provided here so that code that imports this package doesn't need
|
||||||
|
// to also include the freetype/truetype package.
|
||||||
|
func ParseFont(b []byte) (*truetype.Font, error) {
|
||||||
|
return truetype.Parse(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pt converts from a co-ordinate pair measured in pixels to a fixed.Point26_6
|
||||||
|
// co-ordinate pair measured in fixed.Int26_6 units.
|
||||||
|
func Pt(x, y int) fixed.Point26_6 {
|
||||||
|
return fixed.Point26_6{
|
||||||
|
X: fixed.Int26_6(x << 6),
|
||||||
|
Y: fixed.Int26_6(y << 6),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A Context holds the state for drawing text in a given font and size.
|
||||||
|
type Context struct {
|
||||||
|
r *raster.Rasterizer
|
||||||
|
f *truetype.Font
|
||||||
|
glyphBuf truetype.GlyphBuf
|
||||||
|
// clip is the clip rectangle for drawing.
|
||||||
|
clip image.Rectangle
|
||||||
|
// dst and src are the destination and source images for drawing.
|
||||||
|
dst draw.Image
|
||||||
|
src image.Image
|
||||||
|
// fontSize and dpi are used to calculate scale. scale is the number of
|
||||||
|
// 26.6 fixed point units in 1 em. hinting is the hinting policy.
|
||||||
|
fontSize, dpi float64
|
||||||
|
scale fixed.Int26_6
|
||||||
|
hinting font.Hinting
|
||||||
|
// cache is the glyph cache.
|
||||||
|
cache [nGlyphs * nXFractions * nYFractions]cacheEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
// PointToFixed converts the given number of points (as in "a 12 point font")
|
||||||
|
// into a 26.6 fixed point number of pixels.
|
||||||
|
func (c *Context) PointToFixed(x float64) fixed.Int26_6 {
|
||||||
|
return fixed.Int26_6(x * float64(c.dpi) * (64.0 / 72.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
// drawContour draws the given closed contour with the given offset.
|
||||||
|
func (c *Context) drawContour(ps []truetype.Point, dx, dy fixed.Int26_6) {
|
||||||
|
if len(ps) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// The low bit of each point's Flags value is whether the point is on the
|
||||||
|
// curve. Truetype fonts only have quadratic Bézier curves, not cubics.
|
||||||
|
// Thus, two consecutive off-curve points imply an on-curve point in the
|
||||||
|
// middle of those two.
|
||||||
|
//
|
||||||
|
// See http://chanae.walon.org/pub/ttf/ttf_glyphs.htm for more details.
|
||||||
|
|
||||||
|
// ps[0] is a truetype.Point measured in FUnits and positive Y going
|
||||||
|
// upwards. start is the same thing measured in fixed point units and
|
||||||
|
// positive Y going downwards, and offset by (dx, dy).
|
||||||
|
start := fixed.Point26_6{
|
||||||
|
X: dx + ps[0].X,
|
||||||
|
Y: dy - ps[0].Y,
|
||||||
|
}
|
||||||
|
others := []truetype.Point(nil)
|
||||||
|
if ps[0].Flags&0x01 != 0 {
|
||||||
|
others = ps[1:]
|
||||||
|
} else {
|
||||||
|
last := fixed.Point26_6{
|
||||||
|
X: dx + ps[len(ps)-1].X,
|
||||||
|
Y: dy - ps[len(ps)-1].Y,
|
||||||
|
}
|
||||||
|
if ps[len(ps)-1].Flags&0x01 != 0 {
|
||||||
|
start = last
|
||||||
|
others = ps[:len(ps)-1]
|
||||||
|
} else {
|
||||||
|
start = fixed.Point26_6{
|
||||||
|
X: (start.X + last.X) / 2,
|
||||||
|
Y: (start.Y + last.Y) / 2,
|
||||||
|
}
|
||||||
|
others = ps
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.r.Start(start)
|
||||||
|
q0, on0 := start, true
|
||||||
|
for _, p := range others {
|
||||||
|
q := fixed.Point26_6{
|
||||||
|
X: dx + p.X,
|
||||||
|
Y: dy - p.Y,
|
||||||
|
}
|
||||||
|
on := p.Flags&0x01 != 0
|
||||||
|
if on {
|
||||||
|
if on0 {
|
||||||
|
c.r.Add1(q)
|
||||||
|
} else {
|
||||||
|
c.r.Add2(q0, q)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if on0 {
|
||||||
|
// No-op.
|
||||||
|
} else {
|
||||||
|
mid := fixed.Point26_6{
|
||||||
|
X: (q0.X + q.X) / 2,
|
||||||
|
Y: (q0.Y + q.Y) / 2,
|
||||||
|
}
|
||||||
|
c.r.Add2(q0, mid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
q0, on0 = q, on
|
||||||
|
}
|
||||||
|
// Close the curve.
|
||||||
|
if on0 {
|
||||||
|
c.r.Add1(start)
|
||||||
|
} else {
|
||||||
|
c.r.Add2(q0, start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// rasterize returns the advance width, glyph mask and integer-pixel offset
|
||||||
|
// to render the given glyph at the given sub-pixel offsets.
|
||||||
|
// The 26.6 fixed point arguments fx and fy must be in the range [0, 1).
|
||||||
|
func (c *Context) rasterize(glyph truetype.Index, fx, fy fixed.Int26_6) (
|
||||||
|
fixed.Int26_6, *image.Alpha, image.Point, error) {
|
||||||
|
|
||||||
|
if err := c.glyphBuf.Load(c.f, c.scale, glyph, c.hinting); err != nil {
|
||||||
|
return 0, nil, image.Point{}, err
|
||||||
|
}
|
||||||
|
// Calculate the integer-pixel bounds for the glyph.
|
||||||
|
xmin := int(fx+c.glyphBuf.Bounds.Min.X) >> 6
|
||||||
|
ymin := int(fy-c.glyphBuf.Bounds.Max.Y) >> 6
|
||||||
|
xmax := int(fx+c.glyphBuf.Bounds.Max.X+0x3f) >> 6
|
||||||
|
ymax := int(fy-c.glyphBuf.Bounds.Min.Y+0x3f) >> 6
|
||||||
|
if xmin > xmax || ymin > ymax {
|
||||||
|
return 0, nil, image.Point{}, errors.New("freetype: negative sized glyph")
|
||||||
|
}
|
||||||
|
// A TrueType's glyph's nodes can have negative co-ordinates, but the
|
||||||
|
// rasterizer clips anything left of x=0 or above y=0. xmin and ymin are
|
||||||
|
// the pixel offsets, based on the font's FUnit metrics, that let a
|
||||||
|
// negative co-ordinate in TrueType space be non-negative in rasterizer
|
||||||
|
// space. xmin and ymin are typically <= 0.
|
||||||
|
fx -= fixed.Int26_6(xmin << 6)
|
||||||
|
fy -= fixed.Int26_6(ymin << 6)
|
||||||
|
// Rasterize the glyph's vectors.
|
||||||
|
c.r.Clear()
|
||||||
|
e0 := 0
|
||||||
|
for _, e1 := range c.glyphBuf.Ends {
|
||||||
|
c.drawContour(c.glyphBuf.Points[e0:e1], fx, fy)
|
||||||
|
e0 = e1
|
||||||
|
}
|
||||||
|
a := image.NewAlpha(image.Rect(0, 0, xmax-xmin, ymax-ymin))
|
||||||
|
c.r.Rasterize(raster.NewAlphaSrcPainter(a))
|
||||||
|
return c.glyphBuf.AdvanceWidth, a, image.Point{xmin, ymin}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// glyph returns the advance width, glyph mask and integer-pixel offset to
|
||||||
|
// render the given glyph at the given sub-pixel point. It is a cache for the
|
||||||
|
// rasterize method. Unlike rasterize, p's co-ordinates do not have to be in
|
||||||
|
// the range [0, 1).
|
||||||
|
func (c *Context) glyph(glyph truetype.Index, p fixed.Point26_6) (
|
||||||
|
fixed.Int26_6, *image.Alpha, image.Point, error) {
|
||||||
|
|
||||||
|
// Split p.X and p.Y into their integer and fractional parts.
|
||||||
|
ix, fx := int(p.X>>6), p.X&0x3f
|
||||||
|
iy, fy := int(p.Y>>6), p.Y&0x3f
|
||||||
|
// Calculate the index t into the cache array.
|
||||||
|
tg := int(glyph) % nGlyphs
|
||||||
|
tx := int(fx) / (64 / nXFractions)
|
||||||
|
ty := int(fy) / (64 / nYFractions)
|
||||||
|
t := ((tg*nXFractions)+tx)*nYFractions + ty
|
||||||
|
// Check for a cache hit.
|
||||||
|
if e := c.cache[t]; e.valid && e.glyph == glyph {
|
||||||
|
return e.advanceWidth, e.mask, e.offset.Add(image.Point{ix, iy}), nil
|
||||||
|
}
|
||||||
|
// Rasterize the glyph and put the result into the cache.
|
||||||
|
advanceWidth, mask, offset, err := c.rasterize(glyph, fx, fy)
|
||||||
|
if err != nil {
|
||||||
|
return 0, nil, image.Point{}, err
|
||||||
|
}
|
||||||
|
c.cache[t] = cacheEntry{true, glyph, advanceWidth, mask, offset}
|
||||||
|
return advanceWidth, mask, offset.Add(image.Point{ix, iy}), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DrawString draws s at p and returns p advanced by the text extent. The text
|
||||||
|
// is placed so that the left edge of the em square of the first character of s
|
||||||
|
// and the baseline intersect at p. The majority of the affected pixels will be
|
||||||
|
// above and to the right of the point, but some may be below or to the left.
|
||||||
|
// For example, drawing a string that starts with a 'J' in an italic font may
|
||||||
|
// affect pixels below and left of the point.
|
||||||
|
//
|
||||||
|
// p is a fixed.Point26_6 and can therefore represent sub-pixel positions.
|
||||||
|
func (c *Context) DrawString(s string, p fixed.Point26_6) (fixed.Point26_6, error) {
|
||||||
|
if c.f == nil {
|
||||||
|
return fixed.Point26_6{}, errors.New("freetype: DrawString called with a nil font")
|
||||||
|
}
|
||||||
|
prev, hasPrev := truetype.Index(0), false
|
||||||
|
for _, rune := range s {
|
||||||
|
index := c.f.Index(rune)
|
||||||
|
if hasPrev {
|
||||||
|
kern := c.f.Kern(c.scale, prev, index)
|
||||||
|
if c.hinting != font.HintingNone {
|
||||||
|
kern = (kern + 32) &^ 63
|
||||||
|
}
|
||||||
|
p.X += kern
|
||||||
|
}
|
||||||
|
advanceWidth, mask, offset, err := c.glyph(index, p)
|
||||||
|
if err != nil {
|
||||||
|
return fixed.Point26_6{}, err
|
||||||
|
}
|
||||||
|
p.X += advanceWidth
|
||||||
|
glyphRect := mask.Bounds().Add(offset)
|
||||||
|
dr := c.clip.Intersect(glyphRect)
|
||||||
|
if !dr.Empty() {
|
||||||
|
mp := image.Point{0, dr.Min.Y - glyphRect.Min.Y}
|
||||||
|
draw.DrawMask(c.dst, dr, c.src, image.ZP, mask, mp, draw.Over)
|
||||||
|
}
|
||||||
|
prev, hasPrev = index, true
|
||||||
|
}
|
||||||
|
return p, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MeasureString is identical to DrawString but only measure the text.
|
||||||
|
func (c *Context) MeasureString(s string, p fixed.Point26_6) (fixed.Point26_6, error) {
|
||||||
|
if c.f == nil {
|
||||||
|
return fixed.Point26_6{}, errors.New("freetype: MeasureString called with a nil font")
|
||||||
|
}
|
||||||
|
prev, hasPrev := truetype.Index(0), false
|
||||||
|
for _, rune := range s {
|
||||||
|
index := c.f.Index(rune)
|
||||||
|
if hasPrev {
|
||||||
|
kern := c.f.Kern(c.scale, prev, index)
|
||||||
|
if c.hinting != font.HintingNone {
|
||||||
|
kern = (kern + 32) &^ 63
|
||||||
|
}
|
||||||
|
p.X += kern
|
||||||
|
}
|
||||||
|
advanceWidth, _, _, err := c.glyph(index, p)
|
||||||
|
if err != nil {
|
||||||
|
return fixed.Point26_6{}, err
|
||||||
|
}
|
||||||
|
p.X += advanceWidth
|
||||||
|
prev, hasPrev = index, true
|
||||||
|
}
|
||||||
|
return p, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// recalc recalculates scale and bounds values from the font size, screen
|
||||||
|
// resolution and font metrics, and invalidates the glyph cache.
|
||||||
|
func (c *Context) recalc() {
|
||||||
|
c.scale = fixed.Int26_6(c.fontSize * c.dpi * (64.0 / 72.0))
|
||||||
|
if c.f == nil {
|
||||||
|
c.r.SetBounds(0, 0)
|
||||||
|
} else {
|
||||||
|
// Set the rasterizer's bounds to be big enough to handle the largest glyph.
|
||||||
|
b := c.f.Bounds(c.scale)
|
||||||
|
xmin := +int(b.Min.X) >> 6
|
||||||
|
ymin := -int(b.Max.Y) >> 6
|
||||||
|
xmax := +int(b.Max.X+63) >> 6
|
||||||
|
ymax := -int(b.Min.Y-63) >> 6
|
||||||
|
c.r.SetBounds(xmax-xmin, ymax-ymin)
|
||||||
|
}
|
||||||
|
for i := range c.cache {
|
||||||
|
c.cache[i] = cacheEntry{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetDPI sets the screen resolution in dots per inch.
|
||||||
|
func (c *Context) SetDPI(dpi float64) {
|
||||||
|
if c.dpi == dpi {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.dpi = dpi
|
||||||
|
c.recalc()
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetFont sets the font used to draw text.
|
||||||
|
func (c *Context) SetFont(f *truetype.Font) {
|
||||||
|
if c.f == f {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.f = f
|
||||||
|
c.recalc()
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetFontSize sets the font size in points (as in "a 12 point font").
|
||||||
|
func (c *Context) SetFontSize(fontSize float64) {
|
||||||
|
if c.fontSize == fontSize {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.fontSize = fontSize
|
||||||
|
c.recalc()
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetHinting sets the hinting policy.
|
||||||
|
func (c *Context) SetHinting(hinting font.Hinting) {
|
||||||
|
c.hinting = hinting
|
||||||
|
for i := range c.cache {
|
||||||
|
c.cache[i] = cacheEntry{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetDst sets the destination image for draw operations.
|
||||||
|
func (c *Context) SetDst(dst draw.Image) {
|
||||||
|
c.dst = dst
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetSrc sets the source image for draw operations. This is typically an
|
||||||
|
// image.Uniform.
|
||||||
|
func (c *Context) SetSrc(src image.Image) {
|
||||||
|
c.src = src
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetClip sets the clip rectangle for drawing.
|
||||||
|
func (c *Context) SetClip(clip image.Rectangle) {
|
||||||
|
c.clip = clip
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(nigeltao): implement Context.SetGamma.
|
||||||
|
|
||||||
|
// NewContext creates a new Context.
|
||||||
|
func NewContext() *Context {
|
||||||
|
return &Context{
|
||||||
|
r: raster.NewRasterizer(0, 0),
|
||||||
|
fontSize: 12,
|
||||||
|
dpi: 72,
|
||||||
|
scale: 12 << 6,
|
||||||
|
}
|
||||||
|
}
|
BIN
example/yolo/luxisr.ttf
Normal file
|
@ -21,8 +21,8 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
model string
|
model string
|
||||||
image string
|
imageFile string
|
||||||
)
|
)
|
||||||
|
|
||||||
type Bbox struct {
|
type Bbox struct {
|
||||||
|
@ -71,6 +71,38 @@ func drawRect(t *ts.Tensor, x1, x2, y1, y2 int64) {
|
||||||
color.MustDrop()
|
color.MustDrop()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func drawLabel(t *ts.Tensor, text []string, x, y int64) {
|
||||||
|
device, err := t.Device()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
label := textToImageTs(text).MustTo(device, true)
|
||||||
|
|
||||||
|
labelSize := label.MustSize()
|
||||||
|
height := labelSize[1]
|
||||||
|
width := labelSize[2]
|
||||||
|
|
||||||
|
imageSize := t.MustSize()
|
||||||
|
lenY := height
|
||||||
|
if lenY > imageSize[1] {
|
||||||
|
lenY = imageSize[1] - y
|
||||||
|
}
|
||||||
|
|
||||||
|
lenX := width
|
||||||
|
if lenX > imageSize[2] {
|
||||||
|
lenX = imageSize[2] - x
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: `narrow` will create a tensor (view) that share same storage with
|
||||||
|
// original one.
|
||||||
|
|
||||||
|
tmp1 := t.MustNarrow(2, x, lenX, false)
|
||||||
|
tmp2 := tmp1.MustNarrow(1, y, lenY, true)
|
||||||
|
tmp2.Copy_(label)
|
||||||
|
tmp2.MustDrop()
|
||||||
|
label.MustDrop()
|
||||||
|
}
|
||||||
|
|
||||||
func report(pred *ts.Tensor, img *ts.Tensor, w int64, h int64) *ts.Tensor {
|
func report(pred *ts.Tensor, img *ts.Tensor, w int64, h int64) *ts.Tensor {
|
||||||
size2, err := pred.Size2()
|
size2, err := pred.Size2()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -176,6 +208,9 @@ func report(pred *ts.Tensor, img *ts.Tensor, w int64, h int64) *ts.Tensor {
|
||||||
drawRect(image, xmin, xmax, max(ymin, ymax-2), ymax)
|
drawRect(image, xmin, xmax, max(ymin, ymax-2), ymax)
|
||||||
drawRect(image, xmin, min(xmax, xmin+2), ymin, ymax)
|
drawRect(image, xmin, min(xmax, xmin+2), ymin, ymax)
|
||||||
drawRect(image, max(xmin, xmax-2), xmax, ymin, ymax)
|
drawRect(image, max(xmin, xmax-2), xmax, ymin, ymax)
|
||||||
|
|
||||||
|
label := fmt.Sprintf("%v; %.3f\n", CocoClasses[classIndex], b.confidence)
|
||||||
|
drawLabel(image, []string{label}, xmin, ymin-15)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,7 +222,7 @@ func report(pred *ts.Tensor, img *ts.Tensor, w int64, h int64) *ts.Tensor {
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
flag.StringVar(&model, "model", "../../data/yolo/yolo-v3.pt", "Yolo model weights file")
|
flag.StringVar(&model, "model", "../../data/yolo/yolo-v3.pt", "Yolo model weights file")
|
||||||
flag.StringVar(&image, "image", "../../data/yolo/bondi.jpg", "image file to infer")
|
flag.StringVar(&imageFile, "image", "../../data/yolo/bondi.jpg", "image file to infer")
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -203,7 +238,7 @@ func main() {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
imagePath, err := filepath.Abs(image)
|
imagePath, err := filepath.Abs(imageFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -256,10 +291,6 @@ func main() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: write label/confidence val next to bouding boxes.
|
|
||||||
// Naive way is write 'write text on image' rather than on tensor.
|
|
||||||
// See this: https://stackoverflow.com/questions/38299930
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func max(v1, v2 int64) (retVal int64) {
|
func max(v1, v2 int64) (retVal int64) {
|
||||||
|
|
BIN
example/yolo/yolo_bondi.jpg
Normal file
After Width: | Height: | Size: 297 KiB |
5
go.mod
|
@ -1,3 +1,8 @@
|
||||||
module github.com/sugarme/gotch
|
module github.com/sugarme/gotch
|
||||||
|
|
||||||
go 1.14
|
go 1.14
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0
|
||||||
|
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5
|
||||||
|
)
|
||||||
|
|
6
go.sum
|
@ -0,0 +1,6 @@
|
||||||
|
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
|
||||||
|
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
|
||||||
|
github.com/sugarme/playgo v0.0.0-20200730185408-03b868cebe81 h1:s43waOvGVYyjw8i+Ll2Qb/ASt+etXG7LhWetEGTLjbc=
|
||||||
|
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5 h1:QelT11PB4FXiDEXucrfNckHoFxwt8USGY1ajP1ZF5lM=
|
||||||
|
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|