From 1b319c61c9db35bb65c0aed61e373cd826fe9de8 Mon Sep 17 00:00:00 2001 From: Sandeep Kalra Date: Tue, 30 May 2017 22:35:32 -0500 Subject: [PATCH 1/8] Create shortUrl.go Added Go Impl. --- Go/shortUrl.go | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 Go/shortUrl.go diff --git a/Go/shortUrl.go b/Go/shortUrl.go new file mode 100644 index 0000000..df577d9 --- /dev/null +++ b/Go/shortUrl.go @@ -0,0 +1,54 @@ +/** + * ShortURL: Bijective conversion between natural numbers (IDs) and short strings + * Licensed under the MIT License (https://opensource.org/licenses/MIT) + * + * ShortURL::encode() takes an ID and turns it into a short string + * ShortURL::decode() takes a short string and turns it into an ID + * + * Features: + * + large alphabet (51 chars) and thus very short resulting strings + * + proof against offensive words (removed 'a', 'e', 'i', 'o' and 'u') + * + unambiguous (removed 'I', 'l', '1', 'O' and '0') + **/ +package ShortUrl + +import ( + "fmt" + "strings" +) + +const ( + Alphabets = "23456789bcdfghjkmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ-_" + Base = len(Alphabets) +) + +type Codec struct{} + +func (c *Codec) Encode(n int) string { + var s string + for n > 0 { + c := string(Alphabets[n%Base]) + fmt.Println(c) + s = c + s + n /= Base + } + return s +} + +func (c *Codec) Decode(path string) (int, error) { + n := 0 + for _, c := range path { + i := strings.Index(Alphabets, string(c)) + if i < 0 { + return 0, fmt.Errorf("Invalid input %s", path) + } else { + n = n*Base + i + } + + } + return n, nil +} +func InitShortingCodec() *Codec { + codec := Codec{} + return &codec +} From db8b1e37b73b352452d72c8e0e0b20cfa926116c Mon Sep 17 00:00:00 2001 From: Deepshikha Kalra Date: Wed, 31 May 2017 18:15:49 -0500 Subject: [PATCH 2/8] fixed typos, and made error more clearer --- Go/shortUrl.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Go/shortUrl.go b/Go/shortUrl.go index df577d9..38366ff 100644 --- a/Go/shortUrl.go +++ b/Go/shortUrl.go @@ -10,7 +10,7 @@ * + proof against offensive words (removed 'a', 'e', 'i', 'o' and 'u') * + unambiguous (removed 'I', 'l', '1', 'O' and '0') **/ -package ShortUrl +package ShortURL import ( "fmt" @@ -27,9 +27,7 @@ type Codec struct{} func (c *Codec) Encode(n int) string { var s string for n > 0 { - c := string(Alphabets[n%Base]) - fmt.Println(c) - s = c + s + s = string(Alphabets[n%Base]) + s n /= Base } return s @@ -40,7 +38,7 @@ func (c *Codec) Decode(path string) (int, error) { for _, c := range path { i := strings.Index(Alphabets, string(c)) if i < 0 { - return 0, fmt.Errorf("Invalid input %s", path) + return 0, fmt.Errorf("Invalid character %s in input %s", string(c), path) } else { n = n*Base + i } @@ -48,7 +46,8 @@ func (c *Codec) Decode(path string) (int, error) { } return n, nil } -func InitShortingCodec() *Codec { + +func InitShorteningCodec() *Codec { codec := Codec{} return &codec } From d43fc83493977c156c067aa97db3c82517b035e3 Mon Sep 17 00:00:00 2001 From: Sandeep Kalra Date: Thu, 14 Jun 2018 12:31:08 -0500 Subject: [PATCH 3/8] Optimized Encode function; rather than using string() to create string object everytime, rely on strings.builder for doing the work on the given object. The builder does not provide a way to insert in front, so we have to reverse the whole thing later. --- Go/shortUrl.go | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/Go/shortUrl.go b/Go/shortUrl.go index 38366ff..87a67f4 100644 --- a/Go/shortUrl.go +++ b/Go/shortUrl.go @@ -24,6 +24,35 @@ const ( type Codec struct{} +// reverseChar Utility to reverse string with only UTF8 +func reverseChars(s string) string { + bytes := []byte(s) + for i, j := 0, len(bytes)-1; i < j; i, j = i+1, j-1 { + bytes[i], bytes[j] = bytes[j], bytes[i] + } + return string(bytes) +} + +func reverseRunes(s string) string { + runes := []rune(s) + for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { + runes[i], runes[j] = runes[j], runes[i] + } + return string(runes) +} + +// Encode: given a generated number, get the URL back +func (c *Codec) Encode(n int) string { + sb := strings.Builder{} + for n > 0 { + sb.WriteByte(Alphabets[n%Base]) + n /= Base + } + // We know that Alphabet set is UTF8, so we will use reverseChars. + return reverseChars(sb.String()) +} + +/* Deprecated: Faster version is available func (c *Codec) Encode(n int) string { var s string for n > 0 { @@ -32,7 +61,8 @@ func (c *Codec) Encode(n int) string { } return s } - +*/ +// Decode: given a URL(path), the decoder decodes it to a unique number. func (c *Codec) Decode(path string) (int, error) { n := 0 for _, c := range path { @@ -47,6 +77,7 @@ func (c *Codec) Decode(path string) (int, error) { return n, nil } +//InitShorteningCodec : Codec for shortening URL and doing the reverse. func InitShorteningCodec() *Codec { codec := Codec{} return &codec From 794b14081ce46f629c825003b5ba31c4b76a178e Mon Sep 17 00:00:00 2001 From: Sandeep Kalra Date: Thu, 14 Jun 2018 15:07:47 -0500 Subject: [PATCH 4/8] removed reverseRunes. Made each function available outside a common Codec structure --- Go/shortUrl.go | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/Go/shortUrl.go b/Go/shortUrl.go index 87a67f4..844273e 100644 --- a/Go/shortUrl.go +++ b/Go/shortUrl.go @@ -22,8 +22,6 @@ const ( Base = len(Alphabets) ) -type Codec struct{} - // reverseChar Utility to reverse string with only UTF8 func reverseChars(s string) string { bytes := []byte(s) @@ -33,16 +31,8 @@ func reverseChars(s string) string { return string(bytes) } -func reverseRunes(s string) string { - runes := []rune(s) - for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { - runes[i], runes[j] = runes[j], runes[i] - } - return string(runes) -} - // Encode: given a generated number, get the URL back -func (c *Codec) Encode(n int) string { +func Encode(n int) string { sb := strings.Builder{} for n > 0 { sb.WriteByte(Alphabets[n%Base]) @@ -52,18 +42,8 @@ func (c *Codec) Encode(n int) string { return reverseChars(sb.String()) } -/* Deprecated: Faster version is available -func (c *Codec) Encode(n int) string { - var s string - for n > 0 { - s = string(Alphabets[n%Base]) + s - n /= Base - } - return s -} -*/ // Decode: given a URL(path), the decoder decodes it to a unique number. -func (c *Codec) Decode(path string) (int, error) { +func Decode(path string) (int, error) { n := 0 for _, c := range path { i := strings.Index(Alphabets, string(c)) @@ -76,9 +56,3 @@ func (c *Codec) Decode(path string) (int, error) { } return n, nil } - -//InitShorteningCodec : Codec for shortening URL and doing the reverse. -func InitShorteningCodec() *Codec { - codec := Codec{} - return &codec -} From 3531efeab4544722aa98d87fac4fbb30b3a9e938 Mon Sep 17 00:00:00 2001 From: Sandeep Kalra Date: Sat, 16 Jun 2018 23:35:11 -0500 Subject: [PATCH 5/8] added testing, and more cases to see benchmark results. --- Go/shortUrl.go | 68 +++++++++++++++++++++++++++++++++++---------- Go/shortUrl_test.go | 61 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 14 deletions(-) create mode 100644 Go/shortUrl_test.go diff --git a/Go/shortUrl.go b/Go/shortUrl.go index 844273e..4912f48 100644 --- a/Go/shortUrl.go +++ b/Go/shortUrl.go @@ -10,7 +10,8 @@ * + proof against offensive words (removed 'a', 'e', 'i', 'o' and 'u') * + unambiguous (removed 'I', 'l', '1', 'O' and '0') **/ -package ShortURL + +package shorturl import ( "fmt" @@ -18,12 +19,14 @@ import ( ) const ( + // Alphabets is "set of allowed alphabets" Alphabets = "23456789bcdfghjkmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ-_" - Base = len(Alphabets) + // Base is const size of alphabets string + Base = len(Alphabets) ) -// reverseChar Utility to reverse string with only UTF8 -func reverseChars(s string) string { +// ReverseChars Utility to reverse string with only UTF8 +func ReverseChars(s string) string { bytes := []byte(s) for i, j := 0, len(bytes)-1; i < j; i, j = i+1, j-1 { bytes[i], bytes[j] = bytes[j], bytes[i] @@ -31,28 +34,65 @@ func reverseChars(s string) string { return string(bytes) } -// Encode: given a generated number, get the URL back +//Reverse string assuming that its all runes. +func Reverse(s string) string { + runes := []rune(s) + for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { + runes[i], runes[j] = runes[j], runes[i] + } + return string(runes) +} + +// EncodeNew Given a generated number, get the URL back +func EncodeNew(n int) string { + b := make([]byte, 20, 20) + for n > 0 { + b = append([]byte{Alphabets[n%Base]}, b...) + n /= Base + } + return string(b) +} + +// EncodeFast Given a generated number, get the URL back +func EncodeFast(n int) string { + sb := strings.Builder{} + for n > 0 { + sb.WriteByte(Alphabets[n%Base]) + n /= Base + } + // we know that alphabets are all chars + return ReverseChars(sb.String()) +} + +// Encode Given a generated number, get the URL back func Encode(n int) string { sb := strings.Builder{} for n > 0 { sb.WriteByte(Alphabets[n%Base]) n /= Base } - // We know that Alphabet set is UTF8, so we will use reverseChars. - return reverseChars(sb.String()) + return Reverse(sb.String()) } -// Decode: given a URL(path), the decoder decodes it to a unique number. +//EncodeOld gives the old implementation +func EncodeOld(n int) string { + s := "" + for n > 0 { + s = string(Alphabets[n%Base]) + s + n /= Base + } + return s +} + +// Decode Given a URL(path), the decoder decodes it to a unique number. func Decode(path string) (int, error) { n := 0 for _, c := range path { - i := strings.Index(Alphabets, string(c)) - if i < 0 { - return 0, fmt.Errorf("Invalid character %s in input %s", string(c), path) - } else { - n = n*Base + i + index := strings.IndexRune(Alphabets, c) + if index < 0 { + return 0, fmt.Errorf("Invalid character %c in input %s", c, path) } - + n = n*Base + index } return n, nil } diff --git a/Go/shortUrl_test.go b/Go/shortUrl_test.go new file mode 100644 index 0000000..8371562 --- /dev/null +++ b/Go/shortUrl_test.go @@ -0,0 +1,61 @@ +package shorturl + +import ( + "fmt" + "testing" +) + +//go test -run Encode +func TestEncodeDecode(t *testing.T) { + path := []string{ + "tvwxyzBF", + "2BCDFGHJP", + } + for _, v := range path { + i, e := Decode(v) + if e != nil { + t.Fail() + } + s := Encode(i) + + if v != s { + t.Fail() + fmt.Println("expected :", v, "Got: ", s) + } + } +} + +//BenchmarkEncodeNew run command: go test -bench=Fast +func BenchmarkEncodeNew(t *testing.B) { + for i := 0; i < 10000000; i++ { + EncodeNew(i) + } +} + +//BenchmarkEncodeFast run command : go test -bench=Fast +func BenchmarkEncodeFast(t *testing.B) { + for i := 0; i < 10000000; i++ { + EncodeFast(i) + } +} + +// BenchmarkEncodeOriginal run command : go test -bench=Original +func BenchmarkEncodeOriginal(t *testing.B) { + for i := 0; i < 10000000; i++ { + Encode(i) + } +} + +// BenchmarkEncodeOld run command : go test -bench=Old +func BenchmarkEncodeOld(t *testing.B) { + for i := 0; i < 10000000; i++ { + EncodeOld(i) + } +} + +// BenchmarkDecode run command; go test -bench=Decode +func BenchmarkDecode(t *testing.B) { + for i := 0; i < 10000000; i++ { + Decode("BCDFGHJP") + } +} From 3dec4f8eb1d8ce70e81d256de33003c5b6a6029c Mon Sep 17 00:00:00 2001 From: Sandeep Kalra Date: Sun, 17 Jun 2018 00:00:57 -0500 Subject: [PATCH 6/8] fixed bug#33 --- Go/shortUrl.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Go/shortUrl.go b/Go/shortUrl.go index 4912f48..d4729ba 100644 --- a/Go/shortUrl.go +++ b/Go/shortUrl.go @@ -67,9 +67,9 @@ func EncodeFast(n int) string { // Encode Given a generated number, get the URL back func Encode(n int) string { sb := strings.Builder{} - for n > 0 { + for n > 1 { sb.WriteByte(Alphabets[n%Base]) - n /= Base + n = n / Base } return Reverse(sb.String()) } @@ -79,14 +79,14 @@ func EncodeOld(n int) string { s := "" for n > 0 { s = string(Alphabets[n%Base]) + s - n /= Base + n = n / Base } return s } // Decode Given a URL(path), the decoder decodes it to a unique number. func Decode(path string) (int, error) { - n := 0 + n := 1 for _, c := range path { index := strings.IndexRune(Alphabets, c) if index < 0 { From a4d4ff71c15ff48215a39753af77986f0d3e56dc Mon Sep 17 00:00:00 2001 From: Sandeep Kalra Date: Sun, 17 Jun 2018 23:43:53 -0500 Subject: [PATCH 7/8] undo fix #33, as it was a invalid test, and also removed code for alternate implementation and the corresponding benchmark tests --- Go/shortUrl.go | 44 ++------------------------------------------ Go/shortUrl_test.go | 30 ++++++------------------------ 2 files changed, 8 insertions(+), 66 deletions(-) diff --git a/Go/shortUrl.go b/Go/shortUrl.go index d4729ba..bf6c28b 100644 --- a/Go/shortUrl.go +++ b/Go/shortUrl.go @@ -25,15 +25,6 @@ const ( Base = len(Alphabets) ) -// ReverseChars Utility to reverse string with only UTF8 -func ReverseChars(s string) string { - bytes := []byte(s) - for i, j := 0, len(bytes)-1; i < j; i, j = i+1, j-1 { - bytes[i], bytes[j] = bytes[j], bytes[i] - } - return string(bytes) -} - //Reverse string assuming that its all runes. func Reverse(s string) string { runes := []rune(s) @@ -43,50 +34,19 @@ func Reverse(s string) string { return string(runes) } -// EncodeNew Given a generated number, get the URL back -func EncodeNew(n int) string { - b := make([]byte, 20, 20) - for n > 0 { - b = append([]byte{Alphabets[n%Base]}, b...) - n /= Base - } - return string(b) -} - -// EncodeFast Given a generated number, get the URL back -func EncodeFast(n int) string { - sb := strings.Builder{} - for n > 0 { - sb.WriteByte(Alphabets[n%Base]) - n /= Base - } - // we know that alphabets are all chars - return ReverseChars(sb.String()) -} - // Encode Given a generated number, get the URL back func Encode(n int) string { sb := strings.Builder{} - for n > 1 { + for n > 0 { sb.WriteByte(Alphabets[n%Base]) n = n / Base } return Reverse(sb.String()) } -//EncodeOld gives the old implementation -func EncodeOld(n int) string { - s := "" - for n > 0 { - s = string(Alphabets[n%Base]) + s - n = n / Base - } - return s -} - // Decode Given a URL(path), the decoder decodes it to a unique number. func Decode(path string) (int, error) { - n := 1 + n := 0 for _, c := range path { index := strings.IndexRune(Alphabets, c) if index < 0 { diff --git a/Go/shortUrl_test.go b/Go/shortUrl_test.go index 8371562..77c6287 100644 --- a/Go/shortUrl_test.go +++ b/Go/shortUrl_test.go @@ -8,8 +8,9 @@ import ( //go test -run Encode func TestEncodeDecode(t *testing.T) { path := []string{ - "tvwxyzBF", + "tvwxyzBF2", "2BCDFGHJP", + "", } for _, v := range path { i, e := Decode(v) @@ -19,26 +20,14 @@ func TestEncodeDecode(t *testing.T) { s := Encode(i) if v != s { - t.Fail() - fmt.Println("expected :", v, "Got: ", s) + if v != string(Alphabets[0])+s { // v may start with Alphabet[0], which in base51 can mean 0. + t.Fail() + fmt.Println("expected :", v, "Got: ", s) + } } } } -//BenchmarkEncodeNew run command: go test -bench=Fast -func BenchmarkEncodeNew(t *testing.B) { - for i := 0; i < 10000000; i++ { - EncodeNew(i) - } -} - -//BenchmarkEncodeFast run command : go test -bench=Fast -func BenchmarkEncodeFast(t *testing.B) { - for i := 0; i < 10000000; i++ { - EncodeFast(i) - } -} - // BenchmarkEncodeOriginal run command : go test -bench=Original func BenchmarkEncodeOriginal(t *testing.B) { for i := 0; i < 10000000; i++ { @@ -46,13 +35,6 @@ func BenchmarkEncodeOriginal(t *testing.B) { } } -// BenchmarkEncodeOld run command : go test -bench=Old -func BenchmarkEncodeOld(t *testing.B) { - for i := 0; i < 10000000; i++ { - EncodeOld(i) - } -} - // BenchmarkDecode run command; go test -bench=Decode func BenchmarkDecode(t *testing.B) { for i := 0; i < 10000000; i++ { From 10605eeb858b8a38292067c5183112523acb0b8c Mon Sep 17 00:00:00 2001 From: Sandeep Kalra Date: Fri, 22 Jun 2018 21:12:20 -0500 Subject: [PATCH 8/8] added basic test pgK8p->123456789 --- Go/shortUrl_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Go/shortUrl_test.go b/Go/shortUrl_test.go index 77c6287..899adc8 100644 --- a/Go/shortUrl_test.go +++ b/Go/shortUrl_test.go @@ -10,6 +10,7 @@ func TestEncodeDecode(t *testing.T) { path := []string{ "tvwxyzBF2", "2BCDFGHJP", + "pgK8p", "", } for _, v := range path { @@ -18,7 +19,7 @@ func TestEncodeDecode(t *testing.T) { t.Fail() } s := Encode(i) - + fmt.Println(i, "<==>", s) if v != s { if v != string(Alphabets[0])+s { // v may start with Alphabet[0], which in base51 can mean 0. t.Fail()