From 28143397d625cce1f89f4161cba97c0dddd9004c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 29 Aug 2019 10:18:51 +0200 Subject: [PATCH] Add image.Exif Note that we will probably need to add some metadata cache for this to scale. Fixes #4600 --- go.mod | 1 + go.sum | 2 + resources/image.go | 49 ++++++ resources/image_test.go | 26 +++ resources/images/config.go | 30 ++++ resources/images/config_test.go | 10 ++ resources/images/exif/exif.go | 242 ++++++++++++++++++++++++++++ resources/images/exif/exif_test.go | 83 ++++++++++ resources/images/image.go | 29 +++- resources/resource/resourcetypes.go | 2 + resources/resource_spec.go | 6 +- resources/transform.go | 5 + 12 files changed, 483 insertions(+), 2 deletions(-) create mode 100644 resources/images/exif/exif.go create mode 100644 resources/images/exif/exif_test.go diff --git a/go.mod b/go.mod index b309366e..2bb6d8f3 100644 --- a/go.mod +++ b/go.mod @@ -41,6 +41,7 @@ require ( github.com/pkg/errors v0.8.1 github.com/rogpeppe/go-internal v1.3.0 github.com/russross/blackfriday v1.5.3-0.20190124082335-a477dd164691 + github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd github.com/sanity-io/litter v1.1.0 github.com/spf13/afero v1.2.2 github.com/spf13/cast v1.3.0 diff --git a/go.sum b/go.sum index e1056d54..685d846a 100644 --- a/go.sum +++ b/go.sum @@ -263,6 +263,8 @@ github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNue github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday v1.5.3-0.20190124082335-a477dd164691 h1:auJkuUc4uOuZNoH9jGLvqVaDLiuCOh/LY+Qw5NBFo4I= github.com/russross/blackfriday v1.5.3-0.20190124082335-a477dd164691/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd h1:CmH9+J6ZSsIjUK3dcGsnCnO41eRBOnY12zwkn5qVwgc= +github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= github.com/sanity-io/litter v1.1.0 h1:BllcKWa3VbZmOZbDCoszYLk7zCsKHz5Beossi8SUcTc= github.com/sanity-io/litter v1.1.0/go.mod h1:CJ0VCw2q4qKU7LaQr3n7UOSHzgEMgcGco7N/SkZQPjw= github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ= diff --git a/resources/image.go b/resources/image.go index 7113284f..b06a8452 100644 --- a/resources/image.go +++ b/resources/image.go @@ -21,6 +21,9 @@ import ( _ "image/png" "os" "strings" + "sync" + + "github.com/gohugoio/hugo/resources/images/exif" "github.com/gohugoio/hugo/resources/internal" @@ -48,12 +51,56 @@ var ( type imageResource struct { *images.Image + // When a image is processed in a chain, this holds the reference to the + // original (first). + root *imageResource + + exifInit sync.Once + exifInitErr error + exif *exif.Exif + baseResource } +// ImageData contains image related data, typically Exif. +type ImageData map[string]interface{} + +func (i *imageResource) Exif() (*exif.Exif, error) { + return i.root.getExif() +} + +func (i *imageResource) getExif() (*exif.Exif, error) { + + i.exifInit.Do(func() { + supportsExif := i.Format == images.JPEG || i.Format == images.TIFF + if !supportsExif { + return + } + + f, err := i.root.ReadSeekCloser() + if err != nil { + i.exifInitErr = err + return + } + defer f.Close() + + x, err := i.getSpec().imaging.DecodeExif(f) + if err != nil { + i.exifInitErr = err + return + } + + i.exif = x + + }) + + return i.exif, i.exifInitErr +} + func (i *imageResource) Clone() resource.Resource { gr := i.baseResource.Clone().(baseResource) return &imageResource{ + root: i.root, Image: i.WithSpec(gr), baseResource: gr, } @@ -74,6 +121,7 @@ func (i *imageResource) cloneWithUpdates(u *transformationUpdate) (baseResource, } return &imageResource{ + root: i.root, Image: img, baseResource: base, }, nil @@ -217,6 +265,7 @@ func (i *imageResource) clone(img image.Image) *imageResource { return &imageResource{ Image: image, + root: i.root, baseResource: spec, } } diff --git a/resources/image_test.go b/resources/image_test.go index 330a3af4..4968190e 100644 --- a/resources/image_test.go +++ b/resources/image_test.go @@ -332,6 +332,32 @@ func TestSVGImageContent(t *testing.T) { c.Assert(content.(string), qt.Contains, ``) } +func TestImageExif(t *testing.T) { + c := qt.New(t) + image := fetchImage(c, "sunset.jpg") + + x, err := image.Exif() + c.Assert(err, qt.IsNil) + c.Assert(x, qt.Not(qt.IsNil)) + + c.Assert(x.Date.Format("2006-01-02"), qt.Equals, "2017-10-27") + + // Malaga: https://goo.gl/taazZy + c.Assert(x.Lat, qt.Equals, float64(36.59744166666667)) + c.Assert(x.Long, qt.Equals, float64(-4.50846)) + + v, found := x.Values["LensModel"] + c.Assert(found, qt.Equals, true) + lensModel, ok := v.(string) + c.Assert(ok, qt.Equals, true) + c.Assert(lensModel, qt.Equals, "smc PENTAX-DA* 16-50mm F2.8 ED AL [IF] SDM") + + resized, _ := image.Resize("300x200") + x2, _ := resized.Exif() + c.Assert(x2, qt.Equals, x) + +} + func TestImageOperationsGolden(t *testing.T) { c := qt.New(t) c.Parallel() diff --git a/resources/images/config.go b/resources/images/config.go index b6121efa..a290922a 100644 --- a/resources/images/config.go +++ b/resources/images/config.go @@ -119,6 +119,11 @@ func DecodeConfig(m map[string]interface{}) (Imaging, error) { i.ResampleFilter = filter } + if strings.TrimSpace(i.Exif.IncludeFields) == "" && strings.TrimSpace(i.Exif.ExcludeFields) == "" { + // Don't change this for no good reason. Please don't. + i.Exif.ExcludeFields = "GPS|Exif|Exposure[M|P|B]|Contrast|Resolution|Sharp|JPEG|Metering|Sensing|Saturation|ColorSpace|Flash|WhiteBalance" + } + return i, nil } @@ -279,4 +284,29 @@ type Imaging struct { // The anchor to use in Fill. Default is "smart", i.e. Smart Crop. Anchor string + + Exif ExifConfig +} + +type ExifConfig struct { + + // Regexp matching the Exif fields you want from the (massive) set of Exif info + // available. As we cache this info to disk, this is for performance and + // disk space reasons more than anything. + // If you want it all, put ".*" in this config setting. + // Note that if neither this or ExcludeFields is set, Hugo will return a small + // default set. + IncludeFields string + + // Regexp matching the Exif fields you want to exclude. This may be easier to use + // than IncludeFields above, depending on what you want. + ExcludeFields string + + // Hugo extracts the "photo taken" date/time into .Date by default. + // Set this to true to turn it off. + DisableDate bool + + // Hugo extracts the "photo taken where" (GPS latitude and longitude) into + // .Long and .Lat. Set this to true to turn it off. + DisableLatLong bool } diff --git a/resources/images/config_test.go b/resources/images/config_test.go index 91f4b663..46b0c985 100644 --- a/resources/images/config_test.go +++ b/resources/images/config_test.go @@ -64,6 +64,16 @@ func TestDecodeConfig(t *testing.T) { }) c.Assert(err, qt.IsNil) c.Assert(imaging.Anchor, qt.Equals, "smart") + + imaging, err = DecodeConfig(map[string]interface{}{ + "exif": map[string]interface{}{ + "disableLatLong": true, + }, + }) + c.Assert(err, qt.IsNil) + c.Assert(imaging.Exif.DisableLatLong, qt.Equals, true) + c.Assert(imaging.Exif.ExcludeFields, qt.Equals, "GPS|Exif|Exposure[M|P|B]|Contrast|Resolution|Sharp|JPEG|Metering|Sensing|Saturation|ColorSpace|Flash|WhiteBalance") + } func TestDecodeImageConfig(t *testing.T) { diff --git a/resources/images/exif/exif.go b/resources/images/exif/exif.go new file mode 100644 index 00000000..7a3c982c --- /dev/null +++ b/resources/images/exif/exif.go @@ -0,0 +1,242 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exif + +import ( + "bytes" + "fmt" + "io" + "math/big" + "regexp" + "strings" + "time" + "unicode" + "unicode/utf8" + + _exif "github.com/rwcarlsen/goexif/exif" + "github.com/rwcarlsen/goexif/tiff" +) + +const exifTimeLayout = "2006:01:02 15:04:05" + +type Exif struct { + Lat float64 + Long float64 + Date time.Time + Values map[string]interface{} +} + +type Decoder struct { + includeFieldsRe *regexp.Regexp + excludeFieldsrRe *regexp.Regexp + noDate bool + noLatLong bool +} + +func IncludeFields(expression string) func(*Decoder) error { + return func(d *Decoder) error { + re, err := compileRegexp(expression) + if err != nil { + return err + } + d.includeFieldsRe = re + return nil + } +} + +func ExcludeFields(expression string) func(*Decoder) error { + return func(d *Decoder) error { + re, err := compileRegexp(expression) + if err != nil { + return err + } + d.excludeFieldsrRe = re + return nil + } +} + +func WithLatLongDisabled(disabled bool) func(*Decoder) error { + return func(d *Decoder) error { + d.noLatLong = disabled + return nil + } +} + +func WithDateDisabled(disabled bool) func(*Decoder) error { + return func(d *Decoder) error { + d.noDate = disabled + return nil + } +} + +func compileRegexp(expression string) (*regexp.Regexp, error) { + expression = strings.TrimSpace(expression) + if expression == "" { + return nil, nil + } + if !strings.HasPrefix(expression, "(") { + // Make it case insensitive + expression = "(?i)" + expression + } + + return regexp.Compile(expression) + +} + +func NewDecoder(options ...func(*Decoder) error) (*Decoder, error) { + d := &Decoder{} + for _, opt := range options { + if err := opt(d); err != nil { + return nil, err + } + } + + return d, nil +} + +func (d *Decoder) Decode(r io.Reader) (ex *Exif, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("Exif failed: %v", r) + } + }() + + var x *_exif.Exif + x, err = _exif.Decode(r) + if err != nil { + if err.Error() == "EOF" { + + // Found no Exif + return nil, nil + } + return + } + + var tm time.Time + var lat, long float64 + + if !d.noDate { + tm, _ = x.DateTime() + } + + if !d.noLatLong { + lat, long, _ = x.LatLong() + } + + walker := &exifWalker{x: x, vals: make(map[string]interface{}), includeMatcher: d.includeFieldsRe, excludeMatcher: d.excludeFieldsrRe} + if err = x.Walk(walker); err != nil { + return + } + + ex = &Exif{Lat: lat, Long: long, Date: tm, Values: walker.vals} + + return +} + +func decodeTag(x *_exif.Exif, f _exif.FieldName, t *tiff.Tag) (interface{}, error) { + switch t.Format() { + case tiff.StringVal, tiff.UndefVal: + s := nullString(t.Val) + if strings.Contains(string(f), "DateTime") { + if d, err := tryParseDate(x, s); err == nil { + return d, nil + } + } + return s, nil + case tiff.OtherVal: + return "unknown", nil + } + + var rv []interface{} + + for i := 0; i < int(t.Count); i++ { + switch t.Format() { + case tiff.RatVal: + n, d, _ := t.Rat2(i) + rat := big.NewRat(n, d) + if n == 1 { + rv = append(rv, rat) + } else { + f, _ := rat.Float64() + rv = append(rv, f) + } + + case tiff.FloatVal: + v, _ := t.Float(i) + rv = append(rv, v) + case tiff.IntVal: + v, _ := t.Int(i) + rv = append(rv, v) + } + } + + if t.Count == 1 { + if len(rv) == 1 { + return rv[0], nil + } + } + + return rv, nil + +} + +// Code borrowed from exif.DateTime and adjusted. +func tryParseDate(x *_exif.Exif, s string) (time.Time, error) { + dateStr := strings.TrimRight(s, "\x00") + // TODO(bep): look for timezone offset, GPS time, etc. + timeZone := time.Local + if tz, _ := x.TimeZone(); tz != nil { + timeZone = tz + } + return time.ParseInLocation(exifTimeLayout, dateStr, timeZone) + +} + +type exifWalker struct { + x *_exif.Exif + vals map[string]interface{} + includeMatcher *regexp.Regexp + excludeMatcher *regexp.Regexp +} + +func (e *exifWalker) Walk(f _exif.FieldName, tag *tiff.Tag) error { + name := string(f) + if e.excludeMatcher != nil && e.excludeMatcher.MatchString(name) { + return nil + } + if e.includeMatcher != nil && !e.includeMatcher.MatchString(name) { + return nil + } + val, err := decodeTag(e.x, f, tag) + if err != nil { + return err + } + e.vals[name] = val + return nil +} + +func nullString(in []byte) string { + var rv bytes.Buffer + for _, b := range in { + if unicode.IsPrint(rune(b)) { + rv.WriteByte(b) + } + } + rvs := rv.String() + if utf8.ValidString(rvs) { + return rvs + } + + return "" +} diff --git a/resources/images/exif/exif_test.go b/resources/images/exif/exif_test.go new file mode 100644 index 00000000..eee60c08 --- /dev/null +++ b/resources/images/exif/exif_test.go @@ -0,0 +1,83 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exif + +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/gohugoio/hugo/htesting/hqt" + + qt "github.com/frankban/quicktest" +) + +func TestExif(t *testing.T) { + c := qt.New(t) + f, err := os.Open(filepath.FromSlash("../../testdata/sunset.jpg")) + c.Assert(err, qt.IsNil) + defer f.Close() + + d, err := NewDecoder(IncludeFields("Lens|Date")) + c.Assert(err, qt.IsNil) + x, err := d.Decode(f) + c.Assert(err, qt.IsNil) + c.Assert(x.Date.Format("2006-01-02"), qt.Equals, "2017-10-27") + + // Malaga: https://goo.gl/taazZy + c.Assert(x.Lat, qt.Equals, float64(36.59744166666667)) + c.Assert(x.Long, qt.Equals, float64(-4.50846)) + + v, found := x.Values["LensModel"] + c.Assert(found, qt.Equals, true) + lensModel, ok := v.(string) + c.Assert(ok, qt.Equals, true) + c.Assert(lensModel, qt.Equals, "smc PENTAX-DA* 16-50mm F2.8 ED AL [IF] SDM") + + v, found = x.Values["DateTime"] + c.Assert(found, qt.Equals, true) + c.Assert(v, hqt.IsSameType, time.Time{}) + +} + +func TestExifPNG(t *testing.T) { + c := qt.New(t) + + f, err := os.Open(filepath.FromSlash("../../testdata/gohugoio.png")) + c.Assert(err, qt.IsNil) + defer f.Close() + + d, err := NewDecoder() + c.Assert(err, qt.IsNil) + _, err = d.Decode(f) + c.Assert(err, qt.Not(qt.IsNil)) +} + +func BenchmarkDecodeExif(b *testing.B) { + c := qt.New(b) + f, err := os.Open(filepath.FromSlash("../../testdata/sunset.jpg")) + c.Assert(err, qt.IsNil) + defer f.Close() + + d, err := NewDecoder() + c.Assert(err, qt.IsNil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err = d.Decode(f) + c.Assert(err, qt.IsNil) + f.Seek(0, 0) + } +} diff --git a/resources/images/image.go b/resources/images/image.go index d04c1e93..aa7d567a 100644 --- a/resources/images/image.go +++ b/resources/images/image.go @@ -22,6 +22,8 @@ import ( "io" "sync" + "github.com/gohugoio/hugo/resources/images/exif" + "github.com/disintegration/gift" "golang.org/x/image/bmp" "golang.org/x/image/tiff" @@ -154,8 +156,33 @@ func (i *Image) initConfig() error { return nil } +func NewImageProcessor(cfg Imaging) (*ImageProcessor, error) { + e := cfg.Exif + exifDecoder, err := exif.NewDecoder( + exif.WithDateDisabled(e.DisableDate), + exif.WithLatLongDisabled(e.DisableLatLong), + exif.ExcludeFields(e.ExcludeFields), + exif.IncludeFields(e.IncludeFields), + ) + + if err != nil { + return nil, err + } + + return &ImageProcessor{ + Cfg: cfg, + exifDecoder: exifDecoder, + }, nil + +} + type ImageProcessor struct { - Cfg Imaging + Cfg Imaging + exifDecoder *exif.Decoder +} + +func (p *ImageProcessor) DecodeExif(r io.Reader) (*exif.Exif, error) { + return p.exifDecoder.Decode(r) } func (p *ImageProcessor) ApplyFiltersFromConfig(src image.Image, conf ImageConfig) (image.Image, error) { diff --git a/resources/resource/resourcetypes.go b/resources/resource/resourcetypes.go index 4322b3c1..f6b6d2af 100644 --- a/resources/resource/resourcetypes.go +++ b/resources/resource/resourcetypes.go @@ -17,6 +17,7 @@ import ( "github.com/disintegration/gift" "github.com/gohugoio/hugo/langs" "github.com/gohugoio/hugo/media" + "github.com/gohugoio/hugo/resources/images/exif" "github.com/gohugoio/hugo/common/hugio" ) @@ -49,6 +50,7 @@ type ImageOps interface { Fit(spec string) (Image, error) Resize(spec string) (Image, error) Filter(filters ...gift.Filter) (Image, error) + Exif() (*exif.Exif, error) } type ResourceTypesProvider interface { diff --git a/resources/resource_spec.go b/resources/resource_spec.go index 528a2bd5..cd8d6147 100644 --- a/resources/resource_spec.go +++ b/resources/resource_spec.go @@ -47,7 +47,10 @@ func NewSpec( return nil, err } - imaging := &images.ImageProcessor{Cfg: imgConfig} + imaging, err := images.NewImageProcessor(imgConfig) + if err != nil { + return nil, err + } if logger == nil { logger = loggers.NewErrorLogger() @@ -273,6 +276,7 @@ func (r *Spec) newResource(sourceFs afero.Fs, fd ResourceSourceDescriptor) (reso Image: images.NewImage(imgFormat, r.imaging, nil, gr), baseResource: gr, } + ir.root = ir return newResourceAdapter(gr.spec, fd.LazyPublish, ir), nil } diff --git a/resources/transform.go b/resources/transform.go index 0792515c..eb282eab 100644 --- a/resources/transform.go +++ b/resources/transform.go @@ -22,6 +22,7 @@ import ( "sync" "github.com/disintegration/gift" + "github.com/gohugoio/hugo/resources/images/exif" "github.com/spf13/afero" bp "github.com/gohugoio/hugo/bufferpool" @@ -181,6 +182,10 @@ func (r *resourceAdapter) Height() int { return r.getImageOps().Height() } +func (r *resourceAdapter) Exif() (*exif.Exif, error) { + return r.getImageOps().Exif() +} + func (r *resourceAdapter) Key() string { r.init(false, false) return r.target.(resource.Identifier).Key()