Base64 encode in Cadence

I am currently trying to find a way to Base64 encode the NFT metadata. I’ve created some code (contract and script) to try it out.
Please take a look at the code in this Playground.

Contract

// Note: This utility does not support multibyte strings.

pub contract Base64Util {

    pub fun encode(_ str: String): String {
        let binaryStr = Base64Util.toBinaryStr(str)

        let binaryStrArray = Base64Util.splitToArray(binaryStr, splitNum: 6, padStr: "0")

        let base64Map: {String: String} = {
            "000000": "A", "000001": "B", "000010": "C", "000011": "D",
            "000100": "E", "000101": "F", "000110": "G", "000111": "H",
            "001000": "I", "001001": "J", "001010": "K", "001011": "L",
            "001100": "M", "001101": "N", "001110": "O", "001111": "P",
            "010000": "Q", "010001": "R", "010010": "S", "010011": "T",
            "010100": "U", "010101": "V", "010110": "W", "010111": "X",
            "011000": "Y", "011001": "Z", "011010": "a", "011011": "b",
            "011100": "c", "011101": "d", "011110": "e", "011111": "f",
            "100000": "g", "100001": "h", "100010": "i", "100011": "j",
            "100100": "k", "100101": "l", "100110": "m", "100111": "n",
            "101000": "o", "101001": "p", "101010": "q", "101011": "r",
            "101100": "s", "101101": "t", "101110": "u", "101111": "v",
            "110000": "w", "110001": "x", "110010": "y", "110011": "z",
            "110100": "0", "110101": "1", "110110": "2", "110111": "3",
            "111000": "4", "111001": "5", "111010": "6", "111011": "7",
            "111100": "8", "111101": "9", "111110": "+", "111111": "/"
        }
        var res = ""
        for binStr in binaryStrArray {
            res = res.concat(base64Map[binStr]!)
        }
        return res
    }

    pub fun encodeFromDict(_ dict: {String: String}): String {
        let jsonStr = Base64Util.dictToJsonStr(dict)
        return Base64Util.encode(jsonStr)
    }

    priv fun toBinaryStr(_ str: String): String {
        let asciiMap = {
            " ": "00100000", "!": "00100001", "\"": "00100010", "#": "00100011",
            "$": "00100100", "%": "00100101", "&": "00100110", "'": "00100111",
            "(": "00101000", ")": "00101001", "*": "00101010", "+": "00101011",
            ",": "00101100", "-": "00101101", ".": "00101110", "/": "00101111",
            "0": "00110000", "1": "00110001", "2": "00110010", "3": "00110011",
            "4": "00110100", "5": "00110101", "6": "00110110", "7": "00110111",
            "8": "00111000", "9": "00111001", ":": "00111010", ";": "00111011",
            "<": "00111100", "=": "00111101", ">": "00111110", "?": "00111111",
            "@": "01000000", "A": "01000001", "B": "01000010", "C": "01000011",
            "D": "01000100", "E": "01000101", "F": "01000110", "G": "01000111",
            "H": "01001000", "I": "01001001", "J": "01001010", "K": "01001011",
            "L": "01001100", "M": "01001101", "N": "01001110", "O": "01001111",
            "P": "01010000", "Q": "01010001", "R": "01010010", "S": "01010011",
            "T": "01010100", "U": "01010101", "V": "01010110", "W": "01010111",
            "X": "01011000", "Y": "01011001", "Z": "01011010", "[": "01011011",
            "\\": "01011100", "]": "01011101", "^": "01011110", "_": "01011111",
            "`": "01100000", "a": "01100001", "b": "01100010", "c": "01100011",
            "d": "01100100", "e": "01100101", "f": "01100110", "g": "01100111",
            "h": "01101000", "i": "01101001", "j": "01101010", "k": "01101011",
            "l": "01101100", "m": "01101101", "n": "01101110", "o": "01101111",
            "p": "01110000", "q": "01110001", "r": "01110010", "s": "01110011",
            "t": "01110100", "u": "01110101", "v": "01110110", "w": "01110111",
            "x": "01111000", "y": "01111001", "z": "01111010", "{": "01111011",
            "|": "01111100", "}": "01111101", "~": "01111110"
        }
        var res = ""
        var i = 0
        while i < str.length {
            let s = str.slice(from: i, upTo: i + 1)
            res = res.concat(asciiMap[s]!)
            i = i + 1
        }
        return res
    }

    priv fun splitToArray(_ originalStr: String, splitNum: Int, padStr: String): [String] {
        let str = Base64Util.pad(originalStr, splitNum, padStr)
        var res: [String] = [];
        var i = 0
        while i < str.length {
            res.append(str.slice(from: i, upTo: i + splitNum))
            i = i + splitNum
        }
        return res
    }

    priv fun pad(_ str: String, _ splitNum: Int, _ padStr: String): String {
        let padNum = splitNum - str.length % splitNum
        if padNum == 0 {
            return str
        }

        var res = str
        var i = 0
        while i < padNum {
            res = res.concat(padStr)
            i = i + 1
        }
        return res
    }

    priv fun dictToJsonStr(_ dict: {String: String}): String {
        var res = "{"
        var flag = false
        for key in dict.keys {
            if !flag {
                flag = true
            } else {
                res = res.concat(",")
            }
            res = res.concat("\"")
                    .concat(key)
                    .concat("\":\"")
                    .concat(Base64Util.escape(dict[key]!))
                    .concat("\"")
        }
        res = res.concat("}")
        return res
    }

    priv fun escape(_ str: String): String {
        var res = ""
        var i = 0
        while i < str.length {
            let s = str.slice(from: i, upTo: i + 1)
            if s == "\"" || s == "\\" {
            res = res.concat("\\")
            }
            res = res.concat(s)
            i = i + 1
        }
        return res
    }
}

Script:

import Base64Util from 0x01

pub fun main() {
    let metadata = {
        "key1": "value1",
        "key2": "value2"
    }
    log(Base64Util.encodeFromDict(metadata))

    let str = "test str"
    log(Base64Util.encode(str))
}

This code works well for ascii strings, but not for multi-byte characters, which is probably difficult with the current specification of Cadence.

I would like to return a Base64-encoded string of NFT metadata when I call the tokenURI() function, like Uniswap v3’s NFT does. (Uniswap v3 NFT contract is very interesting, and I think other projects may use this format as well.)

In order to support metadata containing multibyte characters, I would like to see, for example, the standard function for String type include a function to convert the value to ByteArray.
Alternatively, if the current Cadence specification also allows for Base64 conversion of multibyte characters, I would like to discuss how to do that.

This is really interesting! I’m glad you made something like this. Why did you choose to encode it like this? Just because that is how uniswap does it? When you say it doesn’t work for multibyte characters, do you just mean like [UInt8] types?

1 Like

Thanks for the comment. Yes, the reason is that Uniswap uses it, and simply my interest.
The Uniswap v3 code is very strange, the result of tokenURI() function is a Base64 string (not a URI lol). I think it is Base64 encoded to make it a string that can be used for URI, but I don’t know why they use tokenURI().

The problem I had with implementing this was that there was no way in Cadence to convert from a string to a byte array. In my code above, I simply converted the ascii string to byte code using a map.
I am thinking that there should be a standard function to convert a UTF-8 string to a byte array ([UInt8]). (For example, a function equivalent to bytes(message, 'UTF-8') in Python)

@turbolent What do you think about this?

Argh, I had written up a comment, but didn’t hit reply :smiley:

I think we could easily add Base64 encoding and decoding to Cadence’s standard library.

Just to clarify, strings in Cadence are always UTF-8 encoded, and each character is a grapheme cluster, one or more code points, which in turn are encoded as multiple bytes.

I assume that the Base64 encoding would be directly on raw bytes, i.e. the UTF-8 encoding, rather than on characters (grapheme clusters).

I also agree that we could expose a string’s UTF-8 encoding, e.g. as a field let utf8: [UInt8].

As for the JSON encoding: As we already have a spec to encode Cadence values in JSON, I think that could be exposed that to Cadence programs, or alternatively it might be best to implement this like in your example in Cadence itself (dictToJsonStr).

Please feel free to open feature requests in the Cadence repo for:

  • Base64 encoding and decoding
  • Exposing the UTF-8 encoding of a string
2 Likes

I have implemented the utf8 field in Add a `String.utf8` field by turbolent · Pull Request #954 · onflow/cadence · GitHub

1 Like

Here’s a Cadence port of the base 64 encoding function documented here: Algorithm Implementation/Miscellaneous/Base64 - Wikibooks, open books for an open world.

It uses the new String.utf8 property


pub let baseChars: [String] = [
    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
    "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "/"
]

pub fun base64encode(_ data: [UInt8]): String {

    var encoded = ""
    var padding = ""
    var padCount = data.length % 3

    // Add a right zero pad to make the input a multiple of 3 characters
    if padCount > 0 {
        while padCount < 3 {
            padding = padding.concat("=")
            data.append(0)
            padCount = padCount + 1
        }
    }

    // Increment over the length of the input, three bytes at a time
    var i = 0
    while i < data.length {

        // Each three bytes become one 24-bit number
        let n = (UInt32(data[i]) << 16 as UInt32)
            + (UInt32(data[i + 1 as Int]) << 8 as UInt32)
            + UInt32(data[i + 2 as Int])

        // This 24-bit number gets separated into four 6-bit numbers
        let n1 = (n >> 18 as UInt32) & 63 as UInt32
        let n2 = (n >> 12 as UInt32) & 63 as UInt32
        let n3 = (n >> 6 as UInt32) & 63 as UInt32
        let n4 = n & 63 as UInt32

        // Those four 6-bit numbers are used as indices into the base64 character list
        encoded = encoded
            .concat(baseChars[n1])
            .concat(baseChars[n2])
            .concat(baseChars[n3])
            .concat(baseChars[n4])

        i = i + 3
    }

    return encoded
        .slice(from: 0, upTo: encoded.length - padding.length)
        .concat(padding)
}

pub fun main() {

    var s = "this is a test".utf8

    assert(base64encode("this is a test".utf8) == "dGhpcyBpcyBhIHRlc3Q=")
}

I’ve opened Improve type inferrence for binary expressions by turbolent · Pull Request #957 · onflow/cadence · GitHub to improve type inference for binary expressions. Once merged, the code above can be simplified a lot:

let n = UInt32(data[i]) << 16
    + UInt32(data[i + 1]) << 8
    + UInt32(data[i + 2])

let n1 = (n >> 18) & 63
let n2 = (n >> 12) & 63
let n3 = (n >> 6) & 63
let n4 = n & 63

I’m glad you created these PRs. Thank you.