diff --git a/.github/workflows/busted.yml b/.github/workflows/busted.yml new file mode 100644 index 0000000..406982e --- /dev/null +++ b/.github/workflows/busted.yml @@ -0,0 +1,45 @@ +name: Busted + +on: [ push, pull_request ] + +jobs: + + busted: + strategy: + fail-fast: false + matrix: + luaVersion: [ "5.4" ] + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup 'lua' + uses: leafo/gh-actions-lua@v9 + with: + luaVersion: ${{ matrix.luaVersion }} + + - name: Setup 'luarocks' + uses: leafo/gh-actions-luarocks@v4 + + - name: Setup dependencies + run: | + luarocks install --deps-only sqids-lua-*.rockspec + luarocks install busted + luarocks install luacov + luarocks install luacov-coveralls + + - name: Build 'sqids' + run: | + luarocks make + + - name: Run 'busted' + # disable project-local path prefixes to force use of system installation + run: busted --coverage --output=gtest -Xoutput --color + + - name: Report test coverage + if: ${{ success() }} + continue-on-error: true + run: luacov-coveralls -i sqids.lua + env: + COVERALLS_REPO_TOKEN: ${{ github.token }} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a8e6b9a --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +/luarocks +/lua +/lua_modules +/.luarocks +/luacov.stats.out +/luacov.report.out +*.rock diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bc9e070 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +all: build + +build: + luarocks make + +publish: + luarocks pack sqids-lua + luarocks upload --api-key=${LUAROCKS_API_KEY} sqids-lua-*.rockspec + +clean: + rm -rf sqids-lua-*.rock diff --git a/README.md b/README.md index 64cb61f..5b1774d 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,60 @@ Sqids (pronounced "squids") is a small library that lets you generate YouTube-looking IDs from numbers. It's good for link shortening, fast & URL-safe ID generation and decoding back into numbers for quicker database lookups. -## Getting started +Features: -@todo +- **Encode multiple numbers** - generate short IDs from one or several non-negative numbers +- **Quick decoding** - easily decode IDs back into numbers +- **Unique IDs** - generate unique IDs by shuffling the alphabet once +- **ID padding** - provide minimum length to make IDs more uniform +- **URL safe** - auto-generated IDs do not contain common profanity +- **Randomized output** - Sequential input provides nonconsecutive IDs +- **Many implementations** - Support for [40+ programming languages](https://sqids.org/) -## Examples +## 🧰 Use-cases -@todo +Good for: -## License +- Generating IDs for public URLs (eg: link shortening) +- Generating IDs for internal systems (eg: event tracking) +- Decoding for quicker database lookups (eg: by primary keys) + +Not good for: + +- Sensitive data (this is not an encryption library) +- User IDs (can be decoded revealing user count) + +## 🚀 Getting started + +Sqids is available on [LuaRocks](https://luarocks.org/modules/nascarsayan/sqids-lua): + +```bash +luarocks install sqids-lua +``` + +## 👩‍💻 Examples + + +```lua +local Sqids = require("sqids") +local sqids = Sqids.new() + +local encoded = sqids:encode({ 1, 2, 3 }) -- 86Rf07 +local decoded = sqids:decode(encoded) -- 1, 2, 3 +``` + +Enforce a *minimum* length for IDs: + +```lua +local Sqids = require("sqids") +local sqids = Sqids.new({ + minLength = 10, +}) + +local id = sqids:encode({ 1, 2, 3 }) -- 86Rf07xd4z +local numbers = sqids:decode(id) -- [1, 2, 3] +``` + +## 📝 License [MIT](LICENSE) diff --git a/blocklist.json b/blocklist.json new file mode 100644 index 0000000..87779eb --- /dev/null +++ b/blocklist.json @@ -0,0 +1,562 @@ +[ + "0rgasm", + "1d10t", + "1d1ot", + "1di0t", + "1diot", + "1eccacu10", + "1eccacu1o", + "1eccacul0", + "1eccaculo", + "1mbec11e", + "1mbec1le", + "1mbeci1e", + "1mbecile", + "a11upat0", + "a11upato", + "a1lupat0", + "a1lupato", + "aand", + "ah01e", + "ah0le", + "aho1e", + "ahole", + "al1upat0", + "al1upato", + "allupat0", + "allupato", + "ana1", + "ana1e", + "anal", + "anale", + "anus", + "arrapat0", + "arrapato", + "arsch", + "arse", + "ass", + "b00b", + "b00be", + "b01ata", + "b0ceta", + "b0iata", + "b0ob", + "b0obe", + "b0sta", + "b1tch", + "b1te", + "b1tte", + "ba1atkar", + "balatkar", + "bastard0", + "bastardo", + "batt0na", + "battona", + "bitch", + "bite", + "bitte", + "bo0b", + "bo0be", + "bo1ata", + "boceta", + "boiata", + "boob", + "boobe", + "bosta", + "bran1age", + "bran1er", + "bran1ette", + "bran1eur", + "bran1euse", + "branlage", + "branler", + "branlette", + "branleur", + "branleuse", + "c0ck", + "c0g110ne", + "c0g11one", + "c0g1i0ne", + "c0g1ione", + "c0gl10ne", + "c0gl1one", + "c0gli0ne", + "c0glione", + "c0na", + "c0nnard", + "c0nnasse", + "c0nne", + "c0u111es", + "c0u11les", + "c0u1l1es", + "c0u1lles", + "c0ui11es", + "c0ui1les", + "c0uil1es", + "c0uilles", + "c11t", + "c11t0", + "c11to", + "c1it", + "c1it0", + "c1ito", + "cabr0n", + "cabra0", + "cabrao", + "cabron", + "caca", + "cacca", + "cacete", + "cagante", + "cagar", + "cagare", + "cagna", + "cara1h0", + "cara1ho", + "caracu10", + "caracu1o", + "caracul0", + "caraculo", + "caralh0", + "caralho", + "cazz0", + "cazz1mma", + "cazzata", + "cazzimma", + "cazzo", + "ch00t1a", + "ch00t1ya", + "ch00tia", + "ch00tiya", + "ch0d", + "ch0ot1a", + "ch0ot1ya", + "ch0otia", + "ch0otiya", + "ch1asse", + "ch1avata", + "ch1er", + "ch1ng0", + "ch1ngadaz0s", + "ch1ngadazos", + "ch1ngader1ta", + "ch1ngaderita", + "ch1ngar", + "ch1ngo", + "ch1ngues", + "ch1nk", + "chatte", + "chiasse", + "chiavata", + "chier", + "ching0", + "chingadaz0s", + "chingadazos", + "chingader1ta", + "chingaderita", + "chingar", + "chingo", + "chingues", + "chink", + "cho0t1a", + "cho0t1ya", + "cho0tia", + "cho0tiya", + "chod", + "choot1a", + "choot1ya", + "chootia", + "chootiya", + "cl1t", + "cl1t0", + "cl1to", + "clit", + "clit0", + "clito", + "cock", + "cog110ne", + "cog11one", + "cog1i0ne", + "cog1ione", + "cogl10ne", + "cogl1one", + "cogli0ne", + "coglione", + "cona", + "connard", + "connasse", + "conne", + "cou111es", + "cou11les", + "cou1l1es", + "cou1lles", + "coui11es", + "coui1les", + "couil1es", + "couilles", + "cracker", + "crap", + "cu10", + "cu1att0ne", + "cu1attone", + "cu1er0", + "cu1ero", + "cu1o", + "cul0", + "culatt0ne", + "culattone", + "culer0", + "culero", + "culo", + "cum", + "cunt", + "d11d0", + "d11do", + "d1ck", + "d1ld0", + "d1ldo", + "damn", + "de1ch", + "deich", + "depp", + "di1d0", + "di1do", + "dick", + "dild0", + "dildo", + "dyke", + "encu1e", + "encule", + "enema", + "enf01re", + "enf0ire", + "enfo1re", + "enfoire", + "estup1d0", + "estup1do", + "estupid0", + "estupido", + "etr0n", + "etron", + "f0da", + "f0der", + "f0ttere", + "f0tters1", + "f0ttersi", + "f0tze", + "f0utre", + "f1ca", + "f1cker", + "f1ga", + "fag", + "fica", + "ficker", + "figa", + "foda", + "foder", + "fottere", + "fotters1", + "fottersi", + "fotze", + "foutre", + "fr0c10", + "fr0c1o", + "fr0ci0", + "fr0cio", + "fr0sc10", + "fr0sc1o", + "fr0sci0", + "fr0scio", + "froc10", + "froc1o", + "froci0", + "frocio", + "frosc10", + "frosc1o", + "frosci0", + "froscio", + "fuck", + "g00", + "g0o", + "g0u1ne", + "g0uine", + "gandu", + "go0", + "goo", + "gou1ne", + "gouine", + "gr0gnasse", + "grognasse", + "haram1", + "harami", + "haramzade", + "hund1n", + "hundin", + "id10t", + "id1ot", + "idi0t", + "idiot", + "imbec11e", + "imbec1le", + "imbeci1e", + "imbecile", + "j1zz", + "jerk", + "jizz", + "k1ke", + "kam1ne", + "kamine", + "kike", + "leccacu10", + "leccacu1o", + "leccacul0", + "leccaculo", + "m1erda", + "m1gn0tta", + "m1gnotta", + "m1nch1a", + "m1nchia", + "m1st", + "mam0n", + "mamahuev0", + "mamahuevo", + "mamon", + "masturbat10n", + "masturbat1on", + "masturbate", + "masturbati0n", + "masturbation", + "merd0s0", + "merd0so", + "merda", + "merde", + "merdos0", + "merdoso", + "mierda", + "mign0tta", + "mignotta", + "minch1a", + "minchia", + "mist", + "musch1", + "muschi", + "n1gger", + "neger", + "negr0", + "negre", + "negro", + "nerch1a", + "nerchia", + "nigger", + "orgasm", + "p00p", + "p011a", + "p01la", + "p0l1a", + "p0lla", + "p0mp1n0", + "p0mp1no", + "p0mpin0", + "p0mpino", + "p0op", + "p0rca", + "p0rn", + "p0rra", + "p0uff1asse", + "p0uffiasse", + "p1p1", + "p1pi", + "p1r1a", + "p1rla", + "p1sc10", + "p1sc1o", + "p1sci0", + "p1scio", + "p1sser", + "pa11e", + "pa1le", + "pal1e", + "palle", + "pane1e1r0", + "pane1e1ro", + "pane1eir0", + "pane1eiro", + "panele1r0", + "panele1ro", + "paneleir0", + "paneleiro", + "patakha", + "pec0r1na", + "pec0rina", + "pecor1na", + "pecorina", + "pen1s", + "pendej0", + "pendejo", + "penis", + "pip1", + "pipi", + "pir1a", + "pirla", + "pisc10", + "pisc1o", + "pisci0", + "piscio", + "pisser", + "po0p", + "po11a", + "po1la", + "pol1a", + "polla", + "pomp1n0", + "pomp1no", + "pompin0", + "pompino", + "poop", + "porca", + "porn", + "porra", + "pouff1asse", + "pouffiasse", + "pr1ck", + "prick", + "pussy", + "put1za", + "puta", + "puta1n", + "putain", + "pute", + "putiza", + "puttana", + "queca", + "r0mp1ba11e", + "r0mp1ba1le", + "r0mp1bal1e", + "r0mp1balle", + "r0mpiba11e", + "r0mpiba1le", + "r0mpibal1e", + "r0mpiballe", + "rand1", + "randi", + "rape", + "recch10ne", + "recch1one", + "recchi0ne", + "recchione", + "retard", + "romp1ba11e", + "romp1ba1le", + "romp1bal1e", + "romp1balle", + "rompiba11e", + "rompiba1le", + "rompibal1e", + "rompiballe", + "ruff1an0", + "ruff1ano", + "ruffian0", + "ruffiano", + "s1ut", + "sa10pe", + "sa1aud", + "sa1ope", + "sacanagem", + "sal0pe", + "salaud", + "salope", + "saugnapf", + "sb0rr0ne", + "sb0rra", + "sb0rrone", + "sbattere", + "sbatters1", + "sbattersi", + "sborr0ne", + "sborra", + "sborrone", + "sc0pare", + "sc0pata", + "sch1ampe", + "sche1se", + "sche1sse", + "scheise", + "scheisse", + "schlampe", + "schwachs1nn1g", + "schwachs1nnig", + "schwachsinn1g", + "schwachsinnig", + "schwanz", + "scopare", + "scopata", + "sexy", + "sh1t", + "shit", + "slut", + "sp0mp1nare", + "sp0mpinare", + "spomp1nare", + "spompinare", + "str0nz0", + "str0nza", + "str0nzo", + "stronz0", + "stronza", + "stronzo", + "stup1d", + "stupid", + "succh1am1", + "succh1ami", + "succhiam1", + "succhiami", + "sucker", + "t0pa", + "tapette", + "test1c1e", + "test1cle", + "testic1e", + "testicle", + "tette", + "topa", + "tr01a", + "tr0ia", + "tr0mbare", + "tr1ng1er", + "tr1ngler", + "tring1er", + "tringler", + "tro1a", + "troia", + "trombare", + "turd", + "twat", + "vaffancu10", + "vaffancu1o", + "vaffancul0", + "vaffanculo", + "vag1na", + "vagina", + "verdammt", + "verga", + "w1chsen", + "wank", + "wichsen", + "x0ch0ta", + "x0chota", + "xana", + "xoch0ta", + "xochota", + "z0cc01a", + "z0cc0la", + "z0cco1a", + "z0ccola", + "z1z1", + "z1zi", + "ziz1", + "zizi", + "zocc01a", + "zocc0la", + "zocco1a", + "zoccola" +] \ No newline at end of file diff --git a/spec/alphabet_spec.lua b/spec/alphabet_spec.lua new file mode 100644 index 0000000..2463be3 --- /dev/null +++ b/spec/alphabet_spec.lua @@ -0,0 +1,59 @@ +require 'busted.runner' () + +describe("Alphabet tests", function() + local Sqids = require("sqids") + + it("simple", function() + local sqids = Sqids.new({ + alphabet = "0123456789abcdef" + }) + + local numbers = { 1, 2, 3 } + local id = "489158" + + assert.are.equal(sqids:encode(numbers), id) + assert.are.same(sqids:decode(id), numbers) + end) + + it("short alphabet", function() + local sqids = Sqids.new({ + alphabet = "abc" + }) + + local numbers = { 1, 2, 3 } + assert.are.same(sqids:decode(sqids:encode(numbers)), numbers) + end) + + it("long alphabet", function() + local sqids = Sqids.new({ + alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()-_+|{}[];:'\"/?.>,<`~" + }) + + local numbers = { 1, 2, 3 } + assert.are.same(sqids:decode(sqids:encode(numbers)), numbers) + end) + + it("multibyte characters", function() + assert.has_error(function() + Sqids.new({ + alphabet = "ë1092" + }) + end, "Alphabet cannot contain multibyte characters") + end) + + it("repeating alphabet characters", function() + assert.has_error(function() + Sqids.new({ + alphabet = "aabcdefg" + }) + end, "Alphabet must contain unique characters") + end) + + it("too short of an alphabet", function() + assert.has_error(function() + Sqids.new({ + alphabet = "ab" + }) + end, "Alphabet length must be at least 3") + end) +end) diff --git a/spec/blocklist_spec.lua b/spec/blocklist_spec.lua new file mode 100644 index 0000000..58d103f --- /dev/null +++ b/spec/blocklist_spec.lua @@ -0,0 +1,121 @@ +describe("Blocklist tests", function() + local Sqids = require("sqids") + + it("if no custom blocklist param, use the default blocklist", function() + local sqids = Sqids.new() + + assert.are.same(sqids:decode('aho1e'), {4572721}) + assert.are.equal(sqids:encode({4572721}), 'JExTR') + end) + + it("if an empty blocklist param passed, don't use any blocklist", function() + local sqids = Sqids.new({ + blocklist = {} + }) + + assert.are.same(sqids:decode('aho1e'), {4572721}) + assert.are.equal(sqids:encode({4572721}), 'aho1e') + end) + + it("if a non-empty blocklist param passed, use only that", function() + local sqids = Sqids.new({ + blocklist = { + 'ArUO' -- originally encoded [100000] + } + }) + + -- make sure we don't use the default blocklist + assert.are.same(sqids:decode('aho1e'), {4572721}) + assert.are.equal(sqids:encode({4572721}), 'aho1e') + + -- make sure we are using the passed blocklist + assert.are.same(sqids:decode('ArUO'), {100000}) + assert.are.equal(sqids:encode({100000}), 'QyG4') + assert.are.same(sqids:decode('QyG4'), {100000}) + end) + + it("blocklist", function() + local sqids = Sqids.new({ + blocklist = { + 'JSwXFaosAN', -- normal result of 1st encoding, let's block that word on purpose + 'OCjV9JK64o', -- result of 2nd encoding + 'rBHf', -- result of 3rd encoding is `4rBHfOiqd3`, let's block a substring + '79SM', -- result of 4th encoding is `dyhgw479SM`, let's block the postfix + '7tE6' -- result of 4th encoding is `7tE6jdAHLe`, let's block the prefix + } + }) + + local encodedResult = sqids:encode({1000000, 2000000}) + local decodedResult = sqids:decode('1aYeB7bRUt') + + assert.are.equal(encodedResult, '1aYeB7bRUt') + assert.are.same(decodedResult, {1000000, 2000000}) + end) + + it("decoding blocklist words should still work", function() + local sqids = Sqids.new({ + blocklist = { + '86Rf07', 'se8ojk', 'ARsz1p', 'Q8AI49', '5sQRZO' + } + }) + + local decodedResult1 = sqids:decode('86Rf07') + local decodedResult2 = sqids:decode('se8ojk') + local decodedResult3 = sqids:decode('ARsz1p') + local decodedResult4 = sqids:decode('Q8AI49') + local decodedResult5 = sqids:decode('5sQRZO') + + assert.are.same(decodedResult1, {1, 2, 3}) + assert.are.same(decodedResult2, {1, 2, 3}) + assert.are.same(decodedResult3, {1, 2, 3}) + assert.are.same(decodedResult4, {1, 2, 3}) + assert.are.same(decodedResult5, {1, 2, 3}) + end) + + it("match against a short blocklist word test", function() + local sqids = Sqids.new({ + blocklist = { + 'pnd' + } + }) + + local decodedResult = sqids:decode(sqids:encode({1000})) + + assert.are.same(decodedResult, {1000}) + end) + + it("blocklist filtering in constructor test", function() + local sqids = Sqids.new({ + alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + blocklist = { + 'sxnzkl' -- lowercase blocklist in only-uppercase alphabet + } + }) + + local id = sqids:encode({1, 2, 3}) + local numbers = sqids:decode(id) + + assert.are.equal(id, 'IBSHOZ') -- without blocklist, would've been "SXNZKL" + assert.are.same(numbers, {1, 2, 3}) + end) + + it("max encoding attempts test", function() + local alphabet = 'abc' + local minLength = 3 + local blocklist = { + 'cab', 'abc', 'bca' + } + + local sqids = Sqids.new({ + alphabet = alphabet, + minLength = minLength, + blocklist = blocklist + }) + + assert.are.equal(#alphabet, minLength) + assert.are.equal(#blocklist, minLength) + + local success, _ = pcall(function() sqids:encode({0}) end) + assert.is_false(success) + end) +end) diff --git a/spec/encoding_spec.lua b/spec/encoding_spec.lua new file mode 100644 index 0000000..f9f521a --- /dev/null +++ b/spec/encoding_spec.lua @@ -0,0 +1,129 @@ +describe("Encoding tests", function() + local Sqids = require("sqids") + + it("match against a short blocklist word", function() + local sqids = Sqids.new({ + blocklist = { + 'pnd' + } + }) + + local decodedResult = sqids:decode(sqids:encode({ 1000 })) + + assert.are.same(decodedResult, { 1000 }) + end) + + it("blocklist filtering in constructor", function() + local sqids = Sqids.new({ + alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + blocklist = { + 'sxnzkl' -- lowercase blocklist in only-uppercase alphabet + } + }) + + local id = sqids:encode({ 1, 2, 3 }) + local numbers = sqids:decode(id) + + assert.are.equal(id, 'IBSHOZ') -- without blocklist, would've been "SXNZKL" + assert.are.same(numbers, { 1, 2, 3 }) + end) + + it("max encoding attempts", function() + local alphabet = 'abc' + local minLength = 3 + local blocklist = { + 'cab', 'abc', 'bca' + } + + local sqids = Sqids.new({ + alphabet = alphabet, + minLength = minLength, + blocklist = blocklist + }) + + assert.are.equal(#alphabet, minLength) + assert.are.equal(#blocklist, minLength) + + local success, _ = pcall(function() sqids:encode({ 0 }) end) + assert.is_false(success) + end) + + it("incremental numbers, same index 0", function() + local sqids = Sqids.new() + + local ids = { + SvIz = { 0, 0 }, + n3qa = { 0, 1 }, + tryF = { 0, 2 }, + eg6q = { 0, 3 }, + rSCF = { 0, 4 }, + sR8x = { 0, 5 }, + uY2M = { 0, 6 }, + ['74dI'] = { 0, 7 }, + ['30WX'] = { 0, 8 }, + moxr = { 0, 9 } + } + + for id, numbers in pairs(ids) do + assert.are.equal(sqids:encode(numbers), id) + assert.are.same(sqids:decode(id), numbers) + end + end) + + it("incremental numbers, same index 1", function() + local sqids = Sqids.new() + + local ids = { + SvIz = { 0, 0 }, + nWqP = { 1, 0 }, + tSyw = { 2, 0 }, + eX68 = { 3, 0 }, + rxCY = { 4, 0 }, + sV8a = { 5, 0 }, + uf2K = { 6, 0 }, + ['7Cdk'] = { 7, 0 }, + ['3aWP'] = { 8, 0 }, + m2xn = { 9, 0 } + } + + for id, numbers in pairs(ids) do + assert.are.equal(sqids:encode(numbers), id) + assert.are.same(sqids:decode(id), numbers) + end + end) + + it("multi input", function() + local sqids = Sqids.new() + + local numbers = {} + for i = 0, 99 do + table.insert(numbers, i) + end + + local output = sqids:decode(sqids:encode(numbers)) + assert.are.same(numbers, output) + end) + + it("decoding empty string", function() + local sqids = Sqids.new() + assert.are.same(sqids:decode(''), {}) + end) + + it("decoding an ID with an invalid character", function() + local sqids = Sqids.new() + assert.are.same(sqids:decode('*'), {}) + end) + + it("encode out-of-range numbers", function() + local encodingError = + "Encoding supports numbers between 0 and 2 ^ 53 - 1" + + local sqids = Sqids.new() + local success, _ = pcall(function() sqids:encode({ -1 }) end) + assert.is_false(success) + + local success2, _ = pcall(function() sqids:encode({ 2 ^ 53 }) end) + assert.is_false(success2) + end) + +end) diff --git a/spec/minlength_spec.lua b/spec/minlength_spec.lua new file mode 100644 index 0000000..8d3ddc0 --- /dev/null +++ b/spec/minlength_spec.lua @@ -0,0 +1,111 @@ +describe("MinLength tests", function() + local Sqids = require("sqids") + local defaultOptions = { + alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + } + it("simple", function() + local sqids = Sqids.new({ + minLength = #defaultOptions.alphabet + }) + + local numbers = { 1, 2, 3 } + local id = "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTM" + + assert.are.equal(sqids:encode(numbers), id) + assert.are.same(sqids:decode(id), numbers) + end) + + it("incremental", function() + + local numbers = { 1, 2, 3 } + + local map = { + [6] = "86Rf07", + [7] = "86Rf07x", + [8] = "86Rf07xd", + [9] = "86Rf07xd4", + [10] = "86Rf07xd4z", + [11] = "86Rf07xd4zB", + [12] = "86Rf07xd4zBm", + [13] = "86Rf07xd4zBmi", + [#defaultOptions.alphabet + 0] = "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTM", + [#defaultOptions.alphabet + 1] = "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTMy", + [#defaultOptions.alphabet + 2] = "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTMyf", + [#defaultOptions.alphabet + 3] = "86Rf07xd4zBmiJXQG6otHEbew02c3PWsUOLZxADhCpKj7aVFv9I8RquYrNlSTMyf1" + } + + for minLength, id in pairs(map) do + local sqids = Sqids.new({ + minLength = tonumber(minLength) + }) + + assert.are.equal(sqids:encode(numbers), id) + assert.are.equal(#sqids:encode(numbers), tonumber(minLength)) + assert.are.same(sqids:decode(id), numbers) + end + end) + + it("incremental numbers", function() + + local sqids = Sqids.new({ + minLength = #defaultOptions.alphabet + }) + + local ids = { + ["SvIzsqYMyQwI3GWgJAe17URxX8V924Co0DaTZLtFjHriEn5bPhcSkfmvOslpBu"] = { 0, 0 }, + ["n3qafPOLKdfHpuNw3M61r95svbeJGk7aAEgYn4WlSjXURmF8IDqZBy0CT2VxQc"] = { 0, 1 }, + ["tryFJbWcFMiYPg8sASm51uIV93GXTnvRzyfLleh06CpodJD42B7OraKtkQNxUZ"] = { 0, 2 }, + ["eg6ql0A3XmvPoCzMlB6DraNGcWSIy5VR8iYup2Qk4tjZFKe1hbwfgHdUTsnLqE"] = { 0, 3 }, + ["rSCFlp0rB2inEljaRdxKt7FkIbODSf8wYgTsZM1HL9JzN35cyoqueUvVWCm4hX"] = { 0, 4 }, + ["sR8xjC8WQkOwo74PnglH1YFdTI0eaf56RGVSitzbjuZ3shNUXBrqLxEJyAmKv2"] = { 0, 5 }, + ["uY2MYFqCLpgx5XQcjdtZK286AwWV7IBGEfuS9yTmbJvkzoUPeYRHr4iDs3naN0"] = { 0, 6 }, + ["74dID7X28VLQhBlnGmjZrec5wTA1fqpWtK4YkaoEIM9SRNiC3gUJH0OFvsPDdy"] = { 0, 7 }, + ["30WXpesPhgKiEI5RHTY7xbB1GnytJvXOl2p0AcUjdF6waZDo9Qk8VLzMuWrqCS"] = { 0, 8 }, + ["moxr3HqLAK0GsTND6jowfZz3SUx7cQ8aC54Pl1RbIvFXmEJuBMYVeW9yrdOtin"] = { 0, 9 } + } + + for id, numbers in pairs(ids) do + assert.are.equal(sqids:encode(numbers), id) + assert.are.same(sqids:decode(id), numbers) + end + end) + + it("min lengths", function() + for _, minLength in ipairs({ 0, 1, 5, 10, #defaultOptions.alphabet }) do + for _, numbers in ipairs({ + { 0 }, + { 0, 0, 0, 0, 0 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, + { 100, 200, 300 }, + { 1000, 2000, 3000 }, + { 1000000 }, + { 2 ^ 53 - 1 } + }) do + local sqids = Sqids.new({ + minLength = minLength + }) + + local id = sqids:encode(numbers) + assert.is_true(#id >= minLength) + assert.are.same(sqids:decode(id), numbers) + end + end + end) + + it("out-of-range invalid min length", function() + local minLengthLimit = 255 + local minLengthError = "Minimum length has to be between 0 and " .. minLengthLimit + + assert.has_error(function() + Sqids.new({ + minLength = -1 + }) + end, minLengthError) + + assert.has_error(function() + Sqids.new({ + minLength = minLengthLimit + 1 + }) + end, minLengthError) + end) +end) diff --git a/sqids-lua-0.1-6.rockspec b/sqids-lua-0.1-6.rockspec new file mode 100644 index 0000000..b4eb2b4 --- /dev/null +++ b/sqids-lua-0.1-6.rockspec @@ -0,0 +1,26 @@ +package = "sqids-lua" +version = "0.1-6" +source = { + url = "git://github.com/nascarsayan/sqids-lua.git", +} +description = { + summary = "Sqids (pronounced \"squids\") is a small library that lets you generate YouTube-looking IDs from numbers.", + detailed = "Sqids (pronounced \"squids\") is a small library that lets you generate YouTube-looking IDs from numbers. It's good for link shortening, fast & URL-safe ID generation and decoding back into numbers for quicker database lookups.", + homepage = "https://github.com/sqids/sqids-lua", + license = "MIT" +} +dependencies = { + "lua >= 5.1", + "lua-cjson" +} +build = { + type = "builtin", + modules = { + sqids = "sqids.lua" + }, + install = { + lua = { + ["blocklist"] = "blocklist.json" + } + } +} diff --git a/sqids.lua b/sqids.lua new file mode 100644 index 0000000..80452f5 --- /dev/null +++ b/sqids.lua @@ -0,0 +1,349 @@ +-- package.cpath = "lua_modules/lib/lua/5.4/?.so;" .. package.path +-- package.path = "lua_modules/share/lua/5.4/?.lua;" .. package.path + +local cjson = require "cjson" + +DefaultOptions = { + alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", + minLength = 0, + blocklist = {} +} + +local minLengthLimit = 255 + +local function get_script_path() + local str = debug.getinfo(2, "S").source:sub(2) + local parent_dir = str:match("(.*/)") + if parent_dir then + return parent_dir + end + return "./" +end + +local current_directory = get_script_path() +local blocklistFilePath = current_directory .. "blocklist.json" + +-- Load blocklist from blocklist.json file +local blocklistFile = io.open(blocklistFilePath, "r") +if not blocklistFile then + error("Cannot open blocklist.json") +end + +local blocklistData = blocklistFile:read("*all") +blocklistFile:close() + +local success, defaultBlocklist = pcall(cjson.decode, blocklistData) +if not success or type(defaultBlocklist) ~= "table" then + error("Invalid blocklist data in blocklist.json") +end + +local Sqids = {} +Sqids.__index = Sqids + +local function toId(num, alphabet) + local id = {} + local chars = {} + for i = 1, #alphabet do + table.insert(chars, alphabet:sub(i, i)) + end + + local result = num + repeat + table.insert(id, 1, chars[result % #chars + 1]) + result = math.floor(result / #chars) + until result <= 0 + + return table.concat(id) +end + +local function isBlockedId(id, blocklist) + id = id:lower() + + for _, word in ipairs(blocklist) do + if #word <= #id then + if #id <= 3 or #word <= 3 then + if id == word then + return true + end + elseif string.match(word, "%d") then + if id:sub(1, #word) == word or id:sub(#id - #word + 1) == word then + return true + end + elseif string.find(id, word) then + return true + end + end + end + + return false +end + +-- Helper function to determine the maximum unsigned integer value based on Lua's capabilities +local function maxValue() + -- Determine the maximum unsigned integer value based on Lua's capabilities + -- For Lua, it's typically 2^53 - 1 due to number representation + return 2 ^ 53 - 1 +end + +-- Helper function to convert an ID to a number +local function toNumber(id, alphabet) + local charIdx = {} + for i = 1, #alphabet do + charIdx[alphabet:sub(i, i)] = i - 1 + end + + local result = 0 + for i = 1, #id do + result = result * #alphabet + charIdx[id:sub(i, i)] + end + + return result +end + +-- Helper function to check if a string contains unique characters +local function hasUniqueChars(str) + local charSet = {} + for i = 1, #str do + local c = str:sub(i, i) + if charSet[c] then + return false + end + charSet[c] = true + end + return true +end + +-- consistent shuffle (always produces the same result given the input) +local function shuffle(alphabet) + local chars = {} + for i = 1, #alphabet do + table.insert(chars, alphabet:sub(i, i)) + end + + for i = 1, #chars - 1 do + local j = #chars - i + 1 + local r = (i * j + chars[i]:byte() + chars[j]:byte()) % #chars + 1 + chars[i], chars[r] = chars[r], chars[i] + end + + return table.concat(chars) +end + +-- encodeNumbers function (internal) in Lua +local function encodeNumbers(sq, numbers, increment) + increment = increment or 0 -- Default value for increment + + -- if increment is greater than alphabet length, we've reached max attempts + if increment > #sq.alphabet then + error('Reached max attempts to re-generate the ID') + end + + -- get a semi-random offset from input numbers + local offset = 0 + for i, v in ipairs(numbers) do + offset = sq.alphabet:byte(v % #sq.alphabet + 1) + i + offset + end + offset = offset % #sq.alphabet + + -- if there is a non-zero `increment`, it's an internal attempt to re-generate the ID + offset = (offset + increment) % #sq.alphabet + + -- re-arrange alphabet so that second-half goes in front of the first-half + local alphabet = sq.alphabet:sub(offset + 1) .. sq.alphabet:sub(1, offset) + + -- `prefix` is the first character in the generated ID, used for randomization + local prefix = alphabet:sub(1, 1) + + -- reverse alphabet (otherwise for [0, x] `offset` and `separator` will be the same char) + alphabet = alphabet:reverse() + + -- final ID will always have the `prefix` character at the beginning + local ret = { prefix } + + -- encode input array + for i, num in ipairs(numbers) do + -- the first character of the alphabet is going to be reserved for the `separator` + local alphabetWithoutSeparator = alphabet:sub(2) + table.insert(ret, toId(num, alphabetWithoutSeparator)) + + -- if not the last number + if i < #numbers then + -- `separator` character is used to isolate numbers within the ID + table.insert(ret, alphabet:sub(1, 1)) + + -- shuffle on every iteration + alphabet = shuffle(alphabet) + end + end + + -- join all the parts to form an ID + local id = table.concat(ret, '') + + -- handle `minLength` requirement, if the ID is too short + if sq.minLength > #id then + -- append a separator + id = id .. alphabet:sub(1, 1) + + -- keep appending `separator` + however much alphabet is needed + -- for decoding: two separators next to each other is what tells us the rest are junk characters + while sq.minLength - #id > 0 do + alphabet = shuffle(alphabet) + id = id .. alphabet:sub(1, math.min(sq.minLength - #id, #alphabet)) + end + end + + -- if ID has a blocked word anywhere, restart with a +1 increment + if isBlockedId(id, sq.blocklist) then + id = encodeNumbers(sq, numbers, increment + 1) + end + + return id +end + +function Sqids.new(options) + options = options or {} + + local alphabet = options.alphabet or DefaultOptions.alphabet + local minLength = options.minLength or DefaultOptions.minLength + local blocklist = options.blocklist or defaultBlocklist + + -- Validate the alphabet + if #alphabet ~= utf8.len(alphabet) then + error('Alphabet cannot contain multibyte characters') + end + + if #alphabet < 3 then + error('Alphabet length must be at least 3') + end + + if not hasUniqueChars(alphabet) then + error('Alphabet must contain unique characters') + end + + -- Validate the minimum length + if type(minLength) ~= 'number' or minLength < 0 or minLength > minLengthLimit then + error('Minimum length has to be between 0 and ' .. minLengthLimit) + end + + -- Filter the blocklist + local filteredBlocklist = {} + local alphabetChars = alphabet:lower():gsub('.', function(c) return c .. '\1' end) + for _, word in ipairs(blocklist) do + if #word >= 3 then + local wordLowercased = word:lower() + local intersection = wordLowercased:gsub('.', function(c) return alphabetChars:find(c, 1, true) and c or '' end) + if intersection == wordLowercased then + table.insert(filteredBlocklist, wordLowercased) + end + end + end + + local instance = { + alphabet = shuffle(alphabet), + minLength = minLength, + blocklist = filteredBlocklist + } + + setmetatable(instance, Sqids) + return instance +end + +-- encode function in Lua +function Sqids:encode(numbers) + -- if no numbers passed, return an empty string + if #numbers == 0 then + return '' + end + + -- don't allow out-of-range numbers [might be lang-specific] + local inRangeNumbers = {} + for _, n in ipairs(numbers) do + if n >= 0 and n <= maxValue() then + table.insert(inRangeNumbers, n) + else + error("Encoding supports numbers between 0 and " .. maxValue()) + end + end + + return encodeNumbers(self, numbers) +end + +-- Decode function in Lua +function Sqids:decode(id) + local ret = {} -- Array of unsigned integers + + -- if an empty string, return an empty array + if id == '' then + return ret + end + + -- if a character is not in the alphabet, return an empty array + local alphabetChars = {} + for i = 1, #self.alphabet do + table.insert(alphabetChars, self.alphabet:sub(i, i)) + end + + for i = 1, #id do + local c = id:sub(i, i) + local found = false + for _, char in ipairs(alphabetChars) do + if char == c then + found = true + break + end + end + + if not found then + return ret + end + end + + -- first character is always the `prefix` + local prefix = id:sub(1, 1) + + -- `offset` is the semi-random position that was generated during encoding + local offset = self.alphabet:find(prefix, 1, true) + + -- re-arrange alphabet back into its original form + local alphabet = self.alphabet:sub(offset) .. self.alphabet:sub(1, offset - 1) + + -- reverse alphabet + alphabet = alphabet:reverse() + + -- now it's safe to remove the prefix character from ID, it's not needed anymore + id = id:sub(2) + + -- decode + while #id > 0 do + local separator = alphabet:sub(1, 1) + + -- we need the first part to the left of the separator to decode the number + local chunks = {} + for chunk in id:gmatch(string.format("([^%s]*)", separator)) do + table.insert(chunks, chunk) + end + + if #chunks > 0 then + -- if chunk is empty, we are done (the rest are junk characters) + if chunks[1] == '' then + return ret + end + + -- decode the number without using the `separator` character + local alphabetWithoutSeparator = alphabet:sub(2) + table.insert(ret, toNumber(chunks[1], alphabetWithoutSeparator)) + + -- if this ID has multiple numbers, shuffle the alphabet because that's what encoding function did + if #chunks > 1 then + alphabet = shuffle(alphabet) + end + end + + -- `id` is now going to be everything to the right of the `separator` + id = table.concat(chunks, separator, 2) + end + + return ret +end + +return Sqids