Skip to content

Commit

Permalink
use (packed) array instead of typed array
Browse files Browse the repository at this point in the history
  • Loading branch information
cometkim committed Nov 29, 2024
1 parent 5094e7a commit 94ed937
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 29 deletions.
10 changes: 10 additions & 0 deletions .changeset/modern-wasps-grin.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
"unicode-segmenter": patch
---

Improved perf and bundle size a bit

It seems using `TypedArray` isn't helpful,
and deref many prototypes may cause deopt.

`Array` is good enough while it ensures it's packed.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb

| Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) |
|------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 17,352 | 12,822 | 5,307 | 4,089 |
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 17,313 | 12,783 | 5,285 | 3,946 |
| `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 |
| `grapheme-splitter` | 10.0.0 | ✖️ | 122,252 | 23,680 | 7,852 | 4,841 |
| `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 491,043 | 318,721 | 54,248 | 34,380 |
Expand All @@ -270,7 +270,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb

| Name | Bytecode size | Bytecode size (gzip)* |
|------------------------------|--------------:|----------------------:|
| `unicode-segmenter/grapheme` | 24,538 | 12,788 |
| `unicode-segmenter/grapheme` | 24,386 | 12,690 |
| `graphemer` | 133,949 | 31,710 |
| `grapheme-splitter` | 63,810 | 19,125 |

Expand Down
18 changes: 9 additions & 9 deletions scripts/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -573,19 +573,19 @@ export const ${typeName} = {

f.write(`
export const ${name}_buffer = initUnicodeRangeBuffer(
new Uint32Array(${breakTable.length * 2}),
Array(${breakTable.length * 2}),
/** @type {UnicodeRangeEncoding} */
('${breakTable.map(x => `${x[0] === 0 ? '' : x[0].toString(36)},${x[1] === 0 ? '' : x[1].toString(36)}`).join(',')}')
);
export const ${name}_cats = initLookupTableBuffer(
new Uint8Array(${breakTable.length}),
Array(${breakTable.length}),
/** @type {LookupTableEncoding} */
('${breakTable.map(x => inversed[x[2]].toString(36)).join('')}')
);
const ${name}_lookup = initLookupTableBuffer(
new Uint16Array(${lookupTable.length}),
Array(${lookupTable.length}),
/** @type {LookupTableEncoding} */
('${lookupTable.map(x => x === 0 ? '' : x.toString(36)).join(',')}'),
','
Expand Down Expand Up @@ -637,7 +637,7 @@ import { initUnicodeRangeBuffer } from './core.js';
* The Unicode \`Indic_Conjunct_Break=Consonant\` derived property table
*/
export const consonant_buffer = initUnicodeRangeBuffer(
new Uint16Array(${table.length * 2}),
Array(${table.length * 2}),
/** @type {UnicodeRangeEncoding} */
('${table.map(x => `${x[0] ? x[0].toString(36) : ''},${x[1] ? x[1].toString(36) : ''}`).join(',')}')
);
Expand Down Expand Up @@ -675,7 +675,7 @@ import { initUnicodeRangeBuffer } from './core.js';
* @type {UnicodeRangeBuffer}
*/
export const letter_buffer = initUnicodeRangeBuffer(
new Uint32Array(${gencats['L'].length * 2}),
Array(${gencats['L'].length * 2}),
/** @type {UnicodeRangeEncoding} */
('${encodeRanges(gencats['L'])}')
);
Expand All @@ -686,7 +686,7 @@ export const letter_buffer = initUnicodeRangeBuffer(
* @type {UnicodeRangeBuffer}
*/
export const numeric_buffer = initUnicodeRangeBuffer(
new Uint32Array(${gencats['N'].length * 2}),
Array(${gencats['N'].length * 2}),
/** @type {UnicodeRangeEncoding} */
('${encodeRanges(gencats['N'])}')
);
Expand All @@ -697,7 +697,7 @@ export const numeric_buffer = initUnicodeRangeBuffer(
* @type {UnicodeRangeBuffer}
*/
export const alphabetic_buffer = initUnicodeRangeBuffer(
new Uint32Array(${derived['Alphabetic'].length * 2}),
Array(${derived['Alphabetic'].length * 2}),
/** @type {UnicodeRangeEncoding} */
('${encodeRanges(derived['Alphabetic'])}')
);
Expand Down Expand Up @@ -727,7 +727,7 @@ import { initUnicodeRangeBuffer } from './core.js';
* @type {UnicodeRangeBuffer}
*/
export const emoji_presentation_buffer = initUnicodeRangeBuffer(
new Uint32Array(${emojiProps['Emoji_Presentation'].length * 2}),
Array(${emojiProps['Emoji_Presentation'].length * 2}),
/** @type {UnicodeRangeEncoding} */
('${encodeRanges(emojiProps['Emoji_Presentation'])}')
);
Expand All @@ -738,7 +738,7 @@ export const emoji_presentation_buffer = initUnicodeRangeBuffer(
* @type {UnicodeRangeBuffer}
*/
export const extended_pictographic_buffer = initUnicodeRangeBuffer(
new Uint32Array(${emojiProps['Extended_Pictographic'].length * 2}),
Array(${emojiProps['Extended_Pictographic'].length * 2}),
/** @type {UnicodeRangeEncoding} */
('${encodeRanges(emojiProps['Extended_Pictographic'])}')
);
Expand Down
4 changes: 2 additions & 2 deletions src/_emoji_data.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { initUnicodeRangeBuffer } from './core.js';
* @type {UnicodeRangeBuffer}
*/
export const emoji_presentation_buffer = initUnicodeRangeBuffer(
new Uint32Array(160),
Array(160),
/** @type {UnicodeRangeEncoding} */
('6xm,1,73d,3,73k,,73n,,7i5,1,7is,1,7k8,b,7lr,,7mb,,7mp,,7my,1,7nh,1,7no,1,7ny,,7o4,,7oq,,7oy,1,7p1,,7p6,,7p9,,7ph,,7pm,1,7qg,,7rg,,7ri,,7rn,2,7rr,,7th,2,7u8,,7un,,8ij,1,8k0,,8k5,,2pz8,,2q4v,,2qa6,,2qa9,9,2qcm,p,2qdd,,2qe2,,2qen,,2qeq,4,2qew,2,2qfk,1,2qkg,w,2qlp,8,2qlz,1x,2qny,l,2qow,16,2qq7,4,2qqo,g,2qr8,,2qrc,1y,2qtc,,2qte,56,2qyn,1q,2r0r,3,2r0w,n,2r22,,2r2t,1,2r38,,2r5n,2c,2r9c,1x,2rbg,,2rbk,2,2rbp,2,2rbw,3,2rcb,1,2rck,8,2rj4,b,2rjk,,2rrg,1a,2rss,9,2rt3,54,2s1c,c,2s1s,9,2s27,1j,2s3y,e,2s4f,a,2s4w,8')
);
Expand All @@ -27,7 +27,7 @@ export const emoji_presentation_buffer = initUnicodeRangeBuffer(
* @type {UnicodeRangeBuffer}
*/
export const extended_pictographic_buffer = initUnicodeRangeBuffer(
new Uint32Array(156),
Array(156),
/** @type {UnicodeRangeEncoding} */
('4p,,4u,,6d8,,6dl,,6jm,,6k9,,6ms,5,6nd,1,6xm,1,6y0,,70o,,72n,,73d,a,73s,2,79e,,7fu,1,7g6,,7gg,,7i3,3,7i8,5,7if,b,7is,35,7m8,39,7pk,a,7pw,,7py,,7q5,,7q9,,7qg,,7qr,1,7r8,,7rb,,7rg,,7ri,,7rn,2,7rr,,7s3,4,7th,2,7tt,,7u8,,7un,,850,1,8hx,2,8ij,1,8k0,,8k5,,9io,,9j1,,9zr,,9zt,,2pz4,73,2q6l,2,2q7j,,2q98,5,2q9q,1,2qa6,,2qa9,9,2qb1,1k,2qdd,e,2qe2,,2qen,,2qeq,8,2qf0,3,2qfd,c1,2qrk,8t,2r0m,7d,2r9c,3j,2rg4,b,2rit,16,2rkc,3,2rm0,7,2rmi,5,2rns,7,2rou,29,2rrg,1a,2rss,9,2rt3,c8,2scg,sd')
);
6 changes: 3 additions & 3 deletions src/_general_data.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 94ed937

Please sign in to comment.