Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: UTF-8 and base256emoji #46

Merged
merged 3 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions base256emoji.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package multibase

import (
"strconv"
"strings"
"unicode/utf8"
)

var base256emojiTable = [256]rune{
// Curated list, this is just a list of things that *somwhat* are related to our comunity
'🚀', '🪐', '☄', '🛰', '🌌', // Space
'🌑', '🌒', '🌓', '🌔', '🌕', '🌖', '🌗', '🌘', // Moon
'🌍', '🌏', '🌎', // Our Home, for now (earth)
'☉', '☀', // Our Garden, for now (sol)
'💻', '🖥', '💾', '💿', // Computer
// The rest is completed from https://home.unicode.org/emoji/emoji-frequency/ at the time of creation (december 2021) (the data is from 2019), most used first until we reach 256.
// We exclude modifier based emojies (such as flags) as they are bigger than one single codepoint.
// Some other emojies were removed adhoc for various reasons.
'😂', '❤', '😍', '🤣', '😊', '🙏', '💕', '😭', '😘', '👍',
'😅', '👏', '😁', '🔥', '🥰', '💔', '💖', '💙', '😢', '🤔',
'😆', '🙄', '💪', '😉', '☺', '👌', '🤗', '💜', '😔', '😎',
'😇', '🌹', '🤦', '🎉', '💞', '✌', '✨', '🤷', '😱', '😌',
'🌸', '🙌', '😋', '💗', '💚', '😏', '💛', '🙂', '💓', '🤩',
'😄', '😀', '🖤', '😃', '💯', '🙈', '👇', '🎶', '😒', '🤭',
'❣', '😜', '💋', '👀', '😪', '😑', '💥', '🙋', '😞', '😩',
'😡', '🤪', '👊', '🥳', '😥', '🤤', '👉', '💃', '😳', '✋',
'😚', '😝', '😴', '🌟', '😬', '🙃', '🍀', '🌷', '😻', '😓',
'⭐', '✅', '🥺', '🌈', '😈', '🤘', '💦', '✔', '😣', '🏃',
'💐', '☹', '🎊', '💘', '😠', '☝', '😕', '🌺', '🎂', '🌻',
'😐', '🖕', '💝', '🙊', '😹', '🗣', '💫', '💀', '👑', '🎵',
'🤞', '😛', '🔴', '😤', '🌼', '😫', '⚽', '🤙', '☕', '🏆',
'🤫', '👈', '😮', '🙆', '🍻', '🍃', '🐶', '💁', '😲', '🌿',
'🧡', '🎁', '⚡', '🌞', '🎈', '❌', '✊', '👋', '😰', '🤨',
'😶', '🤝', '🚶', '💰', '🍓', '💢', '🤟', '🙁', '🚨', '💨',
'🤬', '✈', '🎀', '🍺', '🤓', '😙', '💟', '🌱', '😖', '👶',
'🥴', '▶', '➡', '❓', '💎', '💸', '⬇', '😨', '🌚', '🦋',
'😷', '🕺', '⚠', '🙅', '😟', '😵', '👎', '🤲', '🤠', '🤧',
'📌', '🔵', '💅', '🧐', '🐾', '🍒', '😗', '🤑', '🌊', '🤯',
'🐷', '☎', '💧', '😯', '💆', '👆', '🎤', '🙇', '🍑', '❄',
'🌴', '💣', '🐸', '💌', '📍', '🥀', '🤢', '👅', '💡', '💩',
'👐', '📸', '👻', '🤐', '🤮', '🎼', '🥵', '🚩', '🍎', '🍊',
'👼', '💍', '📣', '🥂',
}

var base256emojiReverseTable map[rune]byte

func init() {
base256emojiReverseTable = make(map[rune]byte, len(base256emojiTable))
for i, v := range base256emojiTable {
base256emojiReverseTable[v] = byte(i)
}
}

func base256emojiEncode(in []byte) string {
var l int
for _, v := range in {
l += utf8.RuneLen(base256emojiTable[v])
}
var out strings.Builder
out.Grow(l)
for _, v := range in {
out.WriteRune(base256emojiTable[v])
}
return out.String()
}

type base256emojiCorruptInputError struct {
index int
char rune
}

func (e base256emojiCorruptInputError) Error() string {
return "illegal base256emoji data at input byte " + strconv.FormatInt(int64(e.index), 10) + ", char: '" + string(e.char) + "'"
}

func (e base256emojiCorruptInputError) String() string {
return e.Error()
}

func base256emojiDecode(in string) ([]byte, error) {
out := make([]byte, utf8.RuneCountInString(in))
var stri int
for i := 0; len(in) > 0; i++ {
r, n := utf8.DecodeRuneInString(in)
in = in[n:]
var ok bool
out[i], ok = base256emojiReverseTable[r]
if !ok {
return nil, base256emojiCorruptInputError{stri, r}
}
stri += n
}
return out, nil
}
26 changes: 26 additions & 0 deletions base256emoji_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package multibase

import "testing"

func TestBase256EmojiAlphabet(t *testing.T) {
var c uint
for _, v := range base256emojiTable {
if v != rune(0) {
c++
}
}
if c != 256 {
t.Errorf("Base256Emoji count is wrong, expected 256, got %d.", c)
}
}

func TestBase256EmojiUniq(t *testing.T) {
m := make(map[rune]struct{}, len(base256emojiTable))
for i, v := range base256emojiTable {
_, ok := m[v]
if ok {
t.Errorf("Base256Emoji duplicate %s at index %d.", string(v), i)
}
m[v] = struct{}{}
}
}
6 changes: 4 additions & 2 deletions encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package multibase

import (
"fmt"
"unicode/utf8"
)

// Encoder is a multibase encoding that is verified to be supported and
Expand Down Expand Up @@ -36,8 +37,9 @@ func EncoderByName(str string) (Encoder, error) {
var ok bool
if len(str) == 0 {
return Encoder{-1}, fmt.Errorf("empty multibase encoding")
} else if len(str) == 1 {
base = Encoding(str[0])
} else if utf8.RuneCountInString(str) == 1 {
r, _ := utf8.DecodeRuneInString(str)
Jorropo marked this conversation as resolved.
Show resolved Hide resolved
base = Encoding(r)
_, ok = EncodingToStr[base]
} else {
base, ok = Encodings[str]
Expand Down
6 changes: 4 additions & 2 deletions encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package multibase

import (
"testing"
"unicode/utf8"
)

func TestInvalidCode(t *testing.T) {
Expand Down Expand Up @@ -43,9 +44,10 @@ func TestEncoder(t *testing.T) {
}
// Test that an encoder can be created from the single letter
// prefix
_, err = EncoderByName(str[0:1])
r, _ := utf8.DecodeRuneInString(str)
_, err = EncoderByName(string(r))
if err != nil {
t.Fatalf("EncoderByName(%s) failed: %v", str[0:1], err)
t.Fatalf("EncoderByName(%s) failed: %v", string(r), err)
}
}
}
51 changes: 30 additions & 21 deletions multibase.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/base64"
"encoding/hex"
"fmt"
"unicode/utf8"

b58 "github.com/mr-tron/base58/base58"
b32 "github.com/multiformats/go-base32"
Expand Down Expand Up @@ -38,31 +39,33 @@ const (
Base64url = 'u'
Base64pad = 'M'
Base64urlPad = 'U'
Base256Emoji = '🚀'
)

// EncodingToStr is a map of the supported encoding, unsupported encoding
// specified in standard are left out
var EncodingToStr = map[Encoding]string{
0x00: "identity",
'0': "base2",
'f': "base16",
'F': "base16upper",
'b': "base32",
'B': "base32upper",
'c': "base32pad",
'C': "base32padupper",
'v': "base32hex",
'V': "base32hexupper",
't': "base32hexpad",
'T': "base32hexpadupper",
'k': "base36",
'K': "base36upper",
'z': "base58btc",
'Z': "base58flickr",
'm': "base64",
'u': "base64url",
'M': "base64pad",
'U': "base64urlpad",
0x00: "identity",
'0': "base2",
'f': "base16",
'F': "base16upper",
'b': "base32",
'B': "base32upper",
'c': "base32pad",
'C': "base32padupper",
'v': "base32hex",
'V': "base32hexupper",
't': "base32hexpad",
'T': "base32hexpadupper",
'k': "base36",
'K': "base36upper",
'z': "base58btc",
'Z': "base58flickr",
'm': "base64",
'u': "base64url",
'M': "base64pad",
'U': "base64urlpad",
Base256Emoji: "base256emoji",
}

var Encodings = map[string]Encoding{}
Expand Down Expand Up @@ -123,6 +126,8 @@ func Encode(base Encoding, data []byte) (string, error) {
return string(Base64url) + base64.RawURLEncoding.EncodeToString(data), nil
case Base64:
return string(Base64) + base64.RawStdEncoding.EncodeToString(data), nil
case Base256Emoji:
return string(Base256Emoji) + base256emojiEncode(data), nil
default:
return "", ErrUnsupportedEncoding
}
Expand All @@ -135,7 +140,8 @@ func Decode(data string) (Encoding, []byte, error) {
return 0, nil, fmt.Errorf("cannot decode multibase for zero length string")
}

enc := Encoding(data[0])
r, _ := utf8.DecodeRuneInString(data)
enc := Encoding(r)

switch enc {
case Identity:
Expand Down Expand Up @@ -179,6 +185,9 @@ func Decode(data string) (Encoding, []byte, error) {
case Base64url:
bytes, err := base64.RawURLEncoding.DecodeString(data[1:])
return Base64url, bytes, err
case Base256Emoji:
bytes, err := base256emojiDecode(data[4:])
return Base256Emoji, bytes, err
default:
return -1, nil, ErrUnsupportedEncoding
}
Expand Down
1 change: 1 addition & 0 deletions multibase_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ var encodedSamples = map[Encoding]string{
Base64url: "uRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE",
Base64pad: "MRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE=",
Base64urlPad: "URGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE=",
Base256Emoji: "🚀💛✋💃✋😻😈🥺🤤🍀🌟💐✋😅✋💦✋🥺🏃😈😴🌟😻😝👏👏👏",
}

func testEncode(t *testing.T, encoding Encoding, bytes []byte, expected string) {
Expand Down
2 changes: 1 addition & 1 deletion spec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func TestSpec(t *testing.T) {
}
}
func TestSpecVectors(t *testing.T) {
files, err := filepath.Glob("spec/tests/test[0-9]*.csv")
files, err := filepath.Glob("spec/tests/*.csv")
if err != nil {
t.Fatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "v0.0.3"
"version": "v0.1.0"
}