Skip to content

Commit 7926dce

Browse files
author
Carlos Gomez
committed
Add smaller duplicate check to google takeout
1 parent 1c65d89 commit 7926dce

File tree

5 files changed

+102
-26
lines changed

5 files changed

+102
-26
lines changed

adapters/googlePhotos/googlephotos.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ package gp
33
import (
44
"bytes"
55
"context"
6+
"fmt"
67
"io/fs"
78
"log/slog"
9+
"math"
810
"path"
911
"path/filepath"
1012
"sort"
@@ -352,16 +354,81 @@ func (toc *TakeoutCmd) passTwo(ctx context.Context, gOut chan *assets.Group) err
352354
// image *assetFile
353355
// }
354356

357+
func formatBytes(s int64) string {
358+
suffixes := []string{"B", "KB", "MB", "GB"}
359+
bytes := float64(s)
360+
base := 1024.0
361+
if bytes < base {
362+
return fmt.Sprintf("%.0f %s", bytes, suffixes[0])
363+
}
364+
exp := int64(0)
365+
for bytes >= base && exp < int64(len(suffixes)-1) {
366+
bytes /= base
367+
exp++
368+
}
369+
roundedSize := math.Round(bytes*10) / 10
370+
return fmt.Sprintf("%.1f %s", roundedSize, suffixes[exp])
371+
}
372+
373+
type FileInfo struct {
374+
matches []struct {
375+
extension string
376+
size int64
377+
}
378+
largestSize int64
379+
}
380+
355381
func (toc *TakeoutCmd) handleDir(ctx context.Context, dir string, gOut chan *assets.Group) error {
356382
catalog := toc.catalogs[dir]
357383

384+
fileSizeByExt := map[string]*FileInfo{}
385+
for name, a := range catalog.matchedFiles {
386+
fileName := strings.TrimSuffix(name, path.Ext(name))
387+
fileInfo, ok := fileSizeByExt[fileName]
388+
if !ok {
389+
fileInfo = &FileInfo{}
390+
fileSizeByExt[fileName] = fileInfo
391+
}
392+
fileInfo.matches = append(fileInfo.matches, struct {
393+
extension string
394+
size int64
395+
}{
396+
extension: path.Ext(a.OriginalFileName),
397+
size: int64(a.FileSize),
398+
})
399+
if int64(a.FileSize) > fileInfo.largestSize {
400+
fileInfo.largestSize = int64(a.FileSize)
401+
}
402+
}
403+
358404
dirEntries := make([]*assets.Asset, 0, len(catalog.matchedFiles))
359405

360406
// Filter and sort the files
361407
for name := range catalog.matchedFiles {
362408
a := catalog.matchedFiles[name]
363409
key := fileKeyTracker{baseName: name, size: int64(a.FileSize)}
364410
track, _ := toc.fileTracker.Load(key) // track := to.fileTracker[key]
411+
412+
fileName := strings.TrimSuffix(name, path.Ext(name))
413+
fileInfo := fileSizeByExt[fileName]
414+
if fileInfo != nil && int64(a.FileSize) < fileInfo.largestSize {
415+
sortedMatches := make([]struct {
416+
extension string
417+
size int64
418+
}, len(fileInfo.matches))
419+
copy(sortedMatches, fileInfo.matches)
420+
sort.Slice(sortedMatches, func(i, j int) bool {
421+
return sortedMatches[i].size > sortedMatches[j].size
422+
})
423+
424+
var matchesInfo []string
425+
for _, m := range sortedMatches {
426+
matchesInfo = append(matchesInfo, fmt.Sprintf("[%s-%s]", m.extension, formatBytes(m.size)))
427+
}
428+
429+
toc.logMessage(ctx, fileevent.AnalysisSmallerLocalDuplicate, a.File, strings.Join(matchesInfo, "; "))
430+
continue
431+
}
365432
if track.status == fileevent.Uploaded {
366433
a.Close()
367434
toc.logMessage(ctx, fileevent.AnalysisLocalDuplicate, a.File, "local duplicate")

app/upload/ui.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,10 +270,11 @@ func (uc *UpCmd) newUI(ctx context.Context, a *app.Application) *uiPage {
270270
ui.addCounter(ui.prepareCounts, 3, "Discarded files", fileevent.DiscoveredDiscarded)
271271
ui.addCounter(ui.prepareCounts, 4, "Unsupported files", fileevent.DiscoveredUnsupported)
272272
ui.addCounter(ui.prepareCounts, 5, "Duplicates in the input", fileevent.AnalysisLocalDuplicate)
273-
ui.addCounter(ui.prepareCounts, 6, "Files with a sidecar", fileevent.AnalysisAssociatedMetadata)
274-
ui.addCounter(ui.prepareCounts, 7, "Files without sidecar", fileevent.AnalysisMissingAssociatedMetadata)
273+
ui.addCounter(ui.prepareCounts, 6, "Smaller Duplicates", fileevent.AnalysisSmallerLocalDuplicate)
274+
ui.addCounter(ui.prepareCounts, 7, "Files with a sidecar", fileevent.AnalysisAssociatedMetadata)
275+
ui.addCounter(ui.prepareCounts, 8, "Files without sidecar", fileevent.AnalysisMissingAssociatedMetadata)
275276

276-
ui.prepareCounts.SetSize(8, 2, 1, 1).SetColumns(30, 10)
277+
ui.prepareCounts.SetSize(9, 2, 1, 1).SetColumns(30, 10)
277278

278279
ui.uploadCounts = tview.NewGrid()
279280
ui.uploadCounts.SetBorder(true).SetTitle("Uploading")

app/upload/upload.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ func NewUploadCommand(ctx context.Context, app *app.Application) *cobra.Command
132132

133133
// Run is called back by the actual asset reader
134134
func (uc *UpCmd) Run(cmd *cobra.Command, adapter adapters.Reader) error {
135-
uc.Mode = UpModeFolder // TODO
135+
uc.Mode = UpModeGoogleTakeout // TODO
136136

137137
// ready to run
138138
ctx := cmd.Context()

internal/fileevent/fileevents.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ const (
3434
AnalysisAssociatedMetadata
3535
AnalysisMissingAssociatedMetadata
3636
AnalysisLocalDuplicate
37+
AnalysisSmallerLocalDuplicate
3738

3839
UploadNotSelected
3940
UploadUpgraded // = "Server's asset upgraded"
@@ -70,6 +71,7 @@ var _code = map[Code]string{
7071
AnalysisAssociatedMetadata: "associated metadata file",
7172
AnalysisMissingAssociatedMetadata: "missing associated metadata file",
7273
AnalysisLocalDuplicate: "file duplicated in the input",
74+
AnalysisSmallerLocalDuplicate: "smaller duplicate",
7375

7476
UploadNotSelected: "file not selected",
7577
UploadUpgraded: "server's asset upgraded with the input",
@@ -100,6 +102,7 @@ var _logLevels = map[Code]slog.Level{
100102
AnalysisAssociatedMetadata: slog.LevelInfo,
101103
AnalysisMissingAssociatedMetadata: slog.LevelWarn,
102104
AnalysisLocalDuplicate: slog.LevelWarn,
105+
AnalysisSmallerLocalDuplicate: slog.LevelWarn,
103106
UploadNotSelected: slog.LevelWarn,
104107
UploadUpgraded: slog.LevelInfo,
105108
UploadServerBetter: slog.LevelInfo,
@@ -178,6 +181,7 @@ func (r *Recorder) Report() string {
178181
DiscoveredDiscarded,
179182
DiscoveredUnsupported,
180183
AnalysisLocalDuplicate,
184+
AnalysisSmallerLocalDuplicate,
181185
AnalysisAssociatedMetadata,
182186
AnalysisMissingAssociatedMetadata,
183187
} {
@@ -195,6 +199,7 @@ func (r *Recorder) Report() string {
195199
DiscoveredDiscarded,
196200
DiscoveredUnsupported,
197201
AnalysisLocalDuplicate,
202+
AnalysisSmallerLocalDuplicate,
198203
AnalysisAssociatedMetadata,
199204
AnalysisMissingAssociatedMetadata,
200205
} {
@@ -258,7 +263,8 @@ func (r *Recorder) TotalProcessed(forcedMissingJSON bool) int64 {
258263
atomic.LoadInt64(&r.counts[UploadServerDuplicate]) +
259264
atomic.LoadInt64(&r.counts[UploadServerBetter]) +
260265
atomic.LoadInt64(&r.counts[DiscoveredDiscarded]) +
261-
atomic.LoadInt64(&r.counts[AnalysisLocalDuplicate])
266+
atomic.LoadInt64(&r.counts[AnalysisLocalDuplicate]) +
267+
atomic.LoadInt64(&r.counts[AnalysisSmallerLocalDuplicate])
262268
if !forcedMissingJSON {
263269
v += atomic.LoadInt64(&r.counts[AnalysisMissingAssociatedMetadata])
264270
}

internal/journal/journal.go

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,27 @@ type Journal struct {
1414
type Action string
1515

1616
const (
17-
DiscoveredFile Action = "File"
18-
ScannedImage Action = "Scanned image"
19-
ScannedVideo Action = "Scanned video"
20-
Discarded Action = "Discarded"
21-
Uploaded Action = "Uploaded"
22-
Upgraded Action = "Server's asset upgraded"
23-
ERROR Action = "Error"
24-
LocalDuplicate Action = "Local duplicate"
25-
ServerDuplicate Action = "Server has photo"
26-
Stacked Action = "Stacked"
27-
ServerBetter Action = "Server's asset is better"
28-
Album Action = "Added to an album"
29-
LivePhoto Action = "Live photo"
30-
FailedVideo Action = "Failed video"
31-
Unsupported Action = "File type not supported"
32-
Metadata Action = "Metadata files"
33-
AssociatedMetadata Action = "Associated with metadata"
34-
INFO Action = "Info"
35-
NotSelected Action = "Not selected because options"
36-
ServerError Action = "Server error"
17+
DiscoveredFile Action = "File"
18+
ScannedImage Action = "Scanned image"
19+
ScannedVideo Action = "Scanned video"
20+
Discarded Action = "Discarded"
21+
Uploaded Action = "Uploaded"
22+
Upgraded Action = "Server's asset upgraded"
23+
ERROR Action = "Error"
24+
LocalDuplicate Action = "Local duplicate"
25+
SmallerLocalDuplicate Action = "Smaller Local duplicate"
26+
ServerDuplicate Action = "Server has photo"
27+
Stacked Action = "Stacked"
28+
ServerBetter Action = "Server's asset is better"
29+
Album Action = "Added to an album"
30+
LivePhoto Action = "Live photo"
31+
FailedVideo Action = "Failed video"
32+
Unsupported Action = "File type not supported"
33+
Metadata Action = "Metadata files"
34+
AssociatedMetadata Action = "Associated with metadata"
35+
INFO Action = "Info"
36+
NotSelected Action = "Not selected because options"
37+
ServerError Action = "Server error"
3738
)
3839

3940
func NewJournal(log Logger) *Journal {
@@ -71,7 +72,7 @@ func (j *Journal) AddEntry(file string, action Action, comment ...string) {
7172

7273
func (j *Journal) Report() {
7374
checkFiles := j.counts[ScannedImage] + j.counts[ScannedVideo] + j.counts[Metadata] + j.counts[Unsupported] + j.counts[FailedVideo] + j.counts[Discarded]
74-
handledFiles := j.counts[NotSelected] + j.counts[LocalDuplicate] + j.counts[ServerDuplicate] + j.counts[ServerBetter] + j.counts[Uploaded] + j.counts[Upgraded] + j.counts[ServerError]
75+
handledFiles := j.counts[NotSelected] + j.counts[LocalDuplicate] + j.counts[SmallerLocalDuplicate] + j.counts[ServerDuplicate] + j.counts[ServerBetter] + j.counts[Uploaded] + j.counts[Upgraded] + j.counts[ServerError]
7576
j.Log.OK("Scan of the sources:")
7677
j.Log.OK("%6d files in the input", j.counts[DiscoveredFile])
7778
j.Log.OK("--------------------------------------------------------")
@@ -91,6 +92,7 @@ func (j *Journal) Report() {
9192
j.Log.OK("%6d files already on the server", j.counts[ServerDuplicate])
9293
j.Log.OK("%6d discarded files because of options", j.counts[NotSelected])
9394
j.Log.OK("%6d discarded files because duplicated in the input", j.counts[LocalDuplicate])
95+
j.Log.OK("%6d discarded files because smaller duplicate", j.counts[SmallerLocalDuplicate])
9496
j.Log.OK("%6d discarded files because server has a better image", j.counts[ServerBetter])
9597
j.Log.OK("%6d errors when uploading", j.counts[ServerError])
9698

0 commit comments

Comments
 (0)