Skip to content

Commit 7aac9c2

Browse files
committed
Add String() to BinaryFragment
Implement base85 encoding and improve how binary patches are displayed. This fails the tests due to differences between the Git and Go implementation of zlib. I need to decide how to resolve this.
1 parent ac2e2c5 commit 7aac9c2

File tree

6 files changed

+239
-9
lines changed

6 files changed

+239
-9
lines changed

gitdiff/base85.go

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ func init() {
1919
}
2020

2121
// base85Decode decodes Base85-encoded data from src into dst. It uses the
22-
// alphabet defined by base85.c in the Git source tree, which appears to be
23-
// unique. src must contain at least len(dst) bytes of encoded data.
22+
// alphabet defined by base85.c in the Git source tree. src must contain at
23+
// least len(dst) bytes of encoded data.
2424
func base85Decode(dst, src []byte) error {
2525
var v uint32
2626
var n, ndst int
@@ -50,3 +50,42 @@ func base85Decode(dst, src []byte) error {
5050
}
5151
return nil
5252
}
53+
54+
// base85Encode encodes src in Base85, writing the result to dst. It uses the
55+
// alphabet defined by base85.c in the Git source tree.
56+
func base85Encode(dst, src []byte) {
57+
var di, si int
58+
59+
encode := func(v uint32) {
60+
dst[di+0] = b85Alpha[(v/(85*85*85*85))%85]
61+
dst[di+1] = b85Alpha[(v/(85*85*85))%85]
62+
dst[di+2] = b85Alpha[(v/(85*85))%85]
63+
dst[di+3] = b85Alpha[(v/85)%85]
64+
dst[di+4] = b85Alpha[v%85]
65+
}
66+
67+
n := (len(src) / 4) * 4
68+
for si < n {
69+
encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]))
70+
si += 4
71+
di += 5
72+
}
73+
74+
var v uint32
75+
switch len(src) - si {
76+
case 3:
77+
v |= uint32(src[si+2]) << 8
78+
fallthrough
79+
case 2:
80+
v |= uint32(src[si+1]) << 16
81+
fallthrough
82+
case 1:
83+
v |= uint32(src[si+0]) << 24
84+
encode(v)
85+
}
86+
}
87+
88+
// base85Len returns the length of n bytes of Base85 encoded data.
89+
func base85Len(n int) int {
90+
return (n + 3) / 4 * 5
91+
}

gitdiff/base85_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package gitdiff
22

33
import (
4+
"bytes"
5+
"fmt"
6+
"math/rand"
47
"testing"
58
)
69

@@ -58,3 +61,60 @@ func TestBase85Decode(t *testing.T) {
5861
})
5962
}
6063
}
64+
65+
func TestBase85Encode(t *testing.T) {
66+
tests := map[string]struct {
67+
Input []byte
68+
Output string
69+
}{
70+
"zeroBytes": {
71+
Input: []byte{},
72+
Output: "",
73+
},
74+
"twoBytes": {
75+
Input: []byte{0xCA, 0xFE},
76+
Output: "%KiWV",
77+
},
78+
"fourBytes": {
79+
Input: []byte{0x0, 0x0, 0xCA, 0xFE},
80+
Output: "007GV",
81+
},
82+
"sixBytes": {
83+
Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE},
84+
Output: "007GV%KiWV",
85+
},
86+
}
87+
88+
for name, test := range tests {
89+
t.Run(name, func(t *testing.T) {
90+
dst := make([]byte, len(test.Output))
91+
base85Encode(dst, test.Input)
92+
for i, b := range test.Output {
93+
if dst[i] != byte(b) {
94+
t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i])
95+
}
96+
}
97+
})
98+
}
99+
}
100+
101+
func TestBase85Roundtrip(t *testing.T) {
102+
r := rand.New(rand.NewSource(72)) // chosen by fair dice roll
103+
104+
for _, size := range []int{64, 85, 1025} {
105+
t.Run(fmt.Sprintf("size%d", size), func(t *testing.T) {
106+
in := make([]byte, size)
107+
r.Read(in)
108+
109+
dst := make([]byte, base85Len(size))
110+
out := make([]byte, size)
111+
112+
base85Encode(dst, in)
113+
base85Decode(out, dst)
114+
115+
if !bytes.Equal(in, out) {
116+
t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst))
117+
}
118+
})
119+
}
120+
}

gitdiff/gitdiff.go

Lines changed: 98 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
package gitdiff
22

33
import (
4+
"bytes"
5+
"compress/zlib"
46
"errors"
57
"fmt"
68
"os"
9+
"strconv"
710
"strings"
811
)
912

@@ -128,8 +131,21 @@ func (f *File) String() string {
128131
diff.WriteByte('\n')
129132
}
130133

131-
// The "---" and "+++" lines only appear for patches with fragments
132-
if len(f.TextFragments) > 0 || f.BinaryFragment != nil {
134+
if f.IsBinary {
135+
if f.BinaryFragment == nil {
136+
diff.WriteString("Binary files differ\n")
137+
} else {
138+
diff.WriteString("GIT binary patch\n")
139+
diff.WriteString(f.BinaryFragment.String())
140+
if f.ReverseBinaryFragment != nil {
141+
diff.WriteByte('\n')
142+
diff.WriteString(f.ReverseBinaryFragment.String())
143+
}
144+
}
145+
}
146+
147+
// The "---" and "+++" lines only appear for text patches with fragments
148+
if len(f.TextFragments) > 0 {
133149
diff.WriteString("--- ")
134150
if f.OldName == "" {
135151
diff.WriteString("/dev/null")
@@ -145,11 +161,7 @@ func (f *File) String() string {
145161
writeQuotedName(&diff, "b/"+f.NewName)
146162
}
147163
diff.WriteByte('\n')
148-
}
149164

150-
if f.IsBinary {
151-
// TODO(bkeyes): add string method for BinaryFragments
152-
} else {
153165
for _, frag := range f.TextFragments {
154166
diff.WriteString(frag.String())
155167
}
@@ -344,3 +356,83 @@ const (
344356
// BinaryPatchLiteral indicates the data is the exact file content
345357
BinaryPatchLiteral
346358
)
359+
360+
func (f *BinaryFragment) String() string {
361+
const (
362+
maxBytesPerLine = 52
363+
)
364+
365+
var diff strings.Builder
366+
367+
switch f.Method {
368+
case BinaryPatchDelta:
369+
diff.WriteString("delta ")
370+
case BinaryPatchLiteral:
371+
diff.WriteString("literal ")
372+
}
373+
diff.Write(strconv.AppendInt(nil, f.Size, 10))
374+
diff.WriteByte('\n')
375+
376+
data := deflateBinaryChunk(f.Data)
377+
n := (len(data) / maxBytesPerLine) * maxBytesPerLine
378+
379+
buf := make([]byte, base85Len(maxBytesPerLine))
380+
for i := 0; i < n; i += maxBytesPerLine {
381+
base85Encode(buf, data[i:i+maxBytesPerLine])
382+
diff.WriteByte('z')
383+
diff.Write(buf)
384+
diff.WriteByte('\n')
385+
}
386+
if remainder := len(data) - n; remainder > 0 {
387+
buf = buf[0:base85Len(remainder)]
388+
389+
sizeChar := byte(remainder)
390+
if remainder <= 26 {
391+
sizeChar = 'A' + sizeChar - 1
392+
} else {
393+
sizeChar = 'a' + sizeChar - 27
394+
}
395+
396+
base85Encode(buf, data[n:])
397+
diff.WriteByte(sizeChar)
398+
diff.Write(buf)
399+
diff.WriteByte('\n')
400+
}
401+
402+
return diff.String()
403+
}
404+
405+
// TODO(bkeyes): The 'compress/flate' package does not produce minimal output
406+
// streams. Instead of flagging that the last block of data represents the end
407+
// of the stream, it always writes a final empty block to mark the end. Git's
408+
// implementation using the 'zlib' C library does not do this, which means that
409+
// what we produce for binary patches does not match the input, even though it
410+
// is valid.
411+
//
412+
// This is mostly a problem for my tests, where I compare the input and output
413+
// bytes. This comparison isn't required, but is helpful to catch invalid
414+
// output that might otherwise still parse.
415+
//
416+
// Options for fixing this:
417+
//
418+
// 1. Fix the tests to compare parsed objects instead of raw patches, at least
419+
// for binary patches. This means writing something to do reasonable
420+
// comparisons of File structs.
421+
//
422+
// 2. Add my own deflate function. By default, Git appears to use no
423+
// compression on binary patch data, which means "delfate" is just adding
424+
// the appropriate headers and checksums around the data. This would fix my
425+
// tests but means we could never emit compressed data, so we'd differ from
426+
// Git in other situations.
427+
//
428+
// Either way, there will be situations in which re-formatted binary patches
429+
// differ from the original inputs.
430+
func deflateBinaryChunk(data []byte) []byte {
431+
var b bytes.Buffer
432+
433+
zw, _ := zlib.NewWriterLevel(&b, zlib.NoCompression)
434+
_, _ = zw.Write(data)
435+
_ = zw.Close()
436+
437+
return b.Bytes()
438+
}

gitdiff/gitdiff_string_test.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ import (
88

99
func TestFile_String(t *testing.T) {
1010
sources := []string{
11+
"testdata/string/binary_modify.patch",
12+
"testdata/string/binary_new.patch",
1113
"testdata/string/copy.patch",
14+
"testdata/string/copy_modify.patch",
1215
"testdata/string/delete.patch",
1316
"testdata/string/mode.patch",
1417
"testdata/string/mode_modify.patch",
@@ -18,7 +21,6 @@ func TestFile_String(t *testing.T) {
1821
"testdata/string/new_mode.patch",
1922
"testdata/string/rename.patch",
2023
"testdata/string/rename_modify.patch",
21-
"testdata/string/copy_modify.patch",
2224
}
2325

2426
for _, src := range sources {
@@ -46,3 +48,20 @@ func assertParseSingleFile(t *testing.T, src string, b []byte) *File {
4648
}
4749
return files[0]
4850
}
51+
52+
/*
53+
func TestDecode(t *testing.T) {
54+
actual := []byte("cmV-O")
55+
mine := []byte("cmV)N")
56+
57+
dst := make([]byte, 4)
58+
59+
base85Decode(dst, actual)
60+
t.Logf("actual: %x / %b", dst, dst)
61+
62+
base85Decode(dst, mine)
63+
t.Logf(" mine: %x / %b", dst, dst)
64+
65+
t.FailNow()
66+
}
67+
*/
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
diff --git a/file.bin b/file.bin
2+
index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644
3+
GIT binary patch
4+
delta 66
5+
QcmeZhVVvM$!$1K50C&Ox;s5{u
6+
7+
delta 5
8+
McmZo+^qAlQ00i9urT_o{
9+
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
diff --git a/file.bin b/file.bin
2+
new file mode 100644
3+
index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c
4+
GIT binary patch
5+
literal 72
6+
zcmV-O0Jr~td-`u6JcK&{KDK=<a#;v1^LR5&K)zQ0=Goz82(?nJ6_nD`f#8O9p}}{P
7+
eiXim+rDI+BDadMQmMsO5Sw@;DbrCA+PamP;Ng_@F
8+
9+
literal 0
10+
HcmV?d00001
11+

0 commit comments

Comments
 (0)