Skip to content

Commit a12c6e3

Browse files
authored
Merge pull request #46 from rofinn/rf/sync-fix
Fix sync to always respect walkpath order.
2 parents fb115af + 3dd463f commit a12c6e3

File tree

2 files changed

+163
-48
lines changed

2 files changed

+163
-48
lines changed

src/path.jl

Lines changed: 77 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -514,37 +514,93 @@ function Base.mv(src::AbstractPath, dst::AbstractPath; force=false)
514514
end
515515

516516
"""
517-
sync(src::AbstractPath, dst::AbstractPath; delete=false)
517+
sync([f::Function,] src::AbstractPath, dst::AbstractPath; delete=false, overwrite=true)
518518
519-
Recursively copy new and updated files from the source path to the
520-
destination. If delete is true then files at the destination that don't
521-
exist at the source will be removed.
519+
Recursively copy new and updated files from the source path to the destination.
520+
If delete is true then files at the destination that don't exist at the source will be removed.
521+
By default, source files are sent to the destination if they have different sizes or the source has newer
522+
last modified date.
523+
524+
Optionally, you can specify a function `f` which will take a `src` and `dst` path and return
525+
true if the `src` should be sent. This may be useful if you'd like to use a checksum for
526+
comparison.
522527
"""
523-
function sync(src::AbstractPath, dst::AbstractPath; delete=false)
524-
# Create an index of all of the source files
525-
index = Dict(Tuple(setdiff(p.segments, src.segments)) => p for p in walkpath(src))
528+
function sync(src::AbstractPath, dst::AbstractPath; kwargs...)
529+
sync(should_sync, src, dst; kwargs...)
530+
end
526531

527-
if exists(dst)
528-
for p in walkpath(dst)
529-
k = Tuple(setdiff(p.segments, dst.segments))
532+
function sync(f::Function, src::AbstractPath, dst::AbstractPath; delete=false, overwrite=true)
533+
# Throw an error if the source path doesn't exist at all
534+
exists(src) || throw(ArgumentError("$src does not exist"))
535+
536+
# If the top level source is just a file then try to just sync that
537+
# without calling walkpath
538+
if isfile(src)
539+
# If the destination exists then we should make sure it is a file and check
540+
# if we should copy the source over.
541+
if exists(dst)
542+
isfile(dst) || throw(ArgumentError("$dst is not a file"))
543+
if overwrite && f(src, dst)
544+
cp(src, dst; force=true)
545+
end
546+
else
547+
cp(src, dst)
548+
end
549+
else
550+
isdir(src) || throw(ArgumentError("$src is neither a file or directory."))
551+
if exists(dst) && !isdir(dst)
552+
throw(ArgumentError("$dst is not a directory while $src is"))
553+
end
554+
555+
# Create an index of all of the source files
556+
src_paths = collect(walkpath(src))
557+
index = Dict(
558+
Tuple(setdiff(p.segments, src.segments)) => i for (i, p) in enumerate(src_paths)
559+
)
530560

531-
if haskey(index, k)
532-
if modified(index[k]) > modified(p)
533-
cp(index[k], p; force=true)
561+
if exists(dst)
562+
for p in walkpath(dst)
563+
k = Tuple(setdiff(p.segments, dst.segments))
564+
565+
if haskey(index, k)
566+
src_path = src_paths[index[k]]
567+
if overwrite && f(src_path, p)
568+
cp(src_path, p; force=true)
569+
end
570+
571+
delete!(index, k)
572+
elseif delete
573+
rm(p; recursive=true)
534574
end
575+
end
535576

536-
delete!(index, k)
537-
elseif delete
538-
rm(p; recursive=true)
577+
# Finally, copy over files that don't exist at the destination
578+
# But we need to iterate through it in a way that respects the original
579+
# walkpath order otherwise we may end up trying to copy a file before its parents.
580+
index_pairs = collect(pairs(index))
581+
index_pairs = index_pairs[sortperm(last.(index_pairs))]
582+
for (seg, i) in index_pairs
583+
cp(src_paths[i], Path(dst, tuple(dst.segments..., seg...)); force=true)
539584
end
585+
else
586+
cp(src, dst)
540587
end
588+
end
589+
end
541590

542-
# Finally, copy over files that don't exist at the destination
543-
for (seg, p) in index
544-
cp(p, Path(dst, tuple(dst.segments..., seg...)); force=true)
545-
end
591+
function should_sync(src::AbstractPath, dst::AbstractPath)
592+
src_stat = stat(src)
593+
dst_stat = stat(dst)
594+
595+
if src_stat.size != dst_stat.size || src_stat.mtime > dst_stat.mtime
596+
@debug(
597+
"syncing: $src -> $dst, " *
598+
"size: $(src_stat.size) -> $(dst_stat.size), " *
599+
"modified_time: $(src_stat.mtime) -> $(dst_stat.mtime)"
600+
)
601+
return true
546602
else
547-
cp(src, dst)
603+
return false
548604
end
549605
end
550606

src/test.jl

Lines changed: 86 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -612,34 +612,93 @@ module TestPaths
612612

613613
function test_sync(ps::PathSet)
614614
@testset "sync" begin
615-
# Base cp case
616-
sync(ps.foo, ps.qux / "foo")
617-
@test exists(ps.qux / "foo" / "baz.txt")
615+
@testset "empty destination" begin
616+
sync(ps.foo, ps.qux / "foo")
617+
@test exists(ps.qux / "foo" / "baz.txt")
618618

619-
# Test that the copied baz file has a newer modified time
620-
baz_t = modified(ps.qux / "foo" / "baz.txt")
621-
@test modified(ps.baz) < baz_t
622-
623-
# Don't cp unchanged files when a new file is added
624-
# NOTE: sleep before we make a new file, so it's clear tha the
625-
# modified time has changed.
626-
sleep(1)
627-
write(ps.foo / "test.txt", "New File")
628-
sync(ps.foo, ps.qux / "foo")
629-
@test exists(ps.qux / "foo" / "test.txt")
630-
@test read(ps.qux / "foo" / "test.txt", String) == "New File"
631-
@test modified(ps.qux / "foo" / "baz.txt") == baz_t
632-
@test modified(ps.qux / "foo" / "test.txt") > baz_t
633-
634-
# Test not deleting a file on sync
635-
rm(ps.foo / "test.txt")
636-
sync(ps.foo, ps.qux / "foo")
637-
@test exists(ps.qux / "foo" / "test.txt")
638-
639-
# Test passing delete flag
640-
sync(ps.foo, ps.qux / "foo"; delete=true)
641-
@test !exists(ps.qux / "foo" / "test.txt")
642-
rm(ps.qux / "foo"; recursive=true)
619+
# Test that the copied baz file has a newer modified time
620+
baz_t = modified(ps.qux / "foo" / "baz.txt")
621+
@test modified(ps.baz) < baz_t
622+
end
623+
624+
@testset "empty source" begin
625+
@test_throws ArgumentError sync(ps.root / "quux", ps.foo)
626+
end
627+
628+
@testset "new source" begin
629+
# Don't cp unchanged files when a new file is added
630+
# NOTE: sleep before we make a new file, so it's clear that the
631+
# modified time has changed.
632+
baz_t = modified(ps.qux / "foo" / "baz.txt")
633+
sleep(1)
634+
write(ps.foo / "test.txt", "New src")
635+
sync(ps.foo, ps.qux / "foo")
636+
@test exists(ps.qux / "foo" / "test.txt")
637+
@test read(ps.qux / "foo" / "test.txt", String) == "New src"
638+
@test modified(ps.qux / "foo" / "baz.txt") == baz_t
639+
@test modified(ps.qux / "foo" / "test.txt") > baz_t
640+
end
641+
642+
@testset "new destination" begin
643+
# Newer file of the same size is likely the result of an upload which
644+
# will always have a newer last modified time.
645+
test_t = modified(ps.foo / "test.txt")
646+
sleep(1)
647+
write(ps.qux / "foo" / "test.txt", "New dst")
648+
@test modified(ps.qux / "foo" / "test.txt") > test_t
649+
sync(ps.foo, ps.qux / "foo")
650+
@test read(ps.qux / "foo" / "test.txt", String) == "New dst"
651+
@test modified(ps.qux / "foo" / "test.txt") > test_t
652+
end
653+
654+
@testset "no delete" begin
655+
# Test not deleting a file on sync
656+
rm(ps.foo / "test.txt")
657+
sync(ps.foo, ps.qux / "foo")
658+
@test exists(ps.qux / "foo" / "test.txt")
659+
end
660+
661+
@testset "delete" begin
662+
# Test passing delete flag
663+
sync(ps.foo, ps.qux / "foo"; delete=true)
664+
@test !exists(ps.qux / "foo" / "test.txt")
665+
rm(ps.qux / "foo"; recursive=true)
666+
end
667+
668+
@testset "mixed types" begin
669+
@testset "directory -> file" begin
670+
@test_throws ArgumentError sync(ps.foo, ps.quux)
671+
end
672+
673+
@testset "file -> directory" begin
674+
@test_throws ArgumentError sync(ps.quux, ps.foo)
675+
end
676+
end
677+
678+
@testset "walkpath order" begin
679+
# Test a condtion where the index could reorder the walkpath order.
680+
tmp_src = ps.root / "tmp-src"
681+
mkdir(tmp_src)
682+
src_file = tmp_src / "file1"
683+
write(src_file, "Hello World!")
684+
685+
src_folder = tmp_src / "folder1"
686+
mkdir(src_folder)
687+
src_folder_file = src_folder / "file2"
688+
write(src_folder_file, "") # empty file
689+
690+
src_folder2 = src_folder / "folder2" # nested folders
691+
mkdir(src_folder2)
692+
src_folder2_file = src_folder2 / "file3"
693+
write(src_folder2_file, "Test")
694+
695+
tmp_dst = ps.root / "tmp_dst"
696+
mkdir(tmp_dst)
697+
sync(tmp_src, tmp_dst)
698+
@test exists(tmp_dst / "folder1" / "folder2" / "file3")
699+
rm(tmp_src; recursive=true)
700+
rm(tmp_dst; recursive=true)
701+
end
643702
end
644703
end
645704

0 commit comments

Comments
 (0)