diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 0000000..0f47db1 --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,5 @@ +version: 2 +updates: + - package-ecosystem: npm + directory: "/" + schedule: {interval: weekly} diff --git a/.npmignore b/.npmignore index 1d5fb8c..c3620b8 100644 --- a/.npmignore +++ b/.npmignore @@ -9,6 +9,7 @@ yarn.lock .gitattributes .prettierrc.yaml rollup.config.js +vitest.config.ts # Platform specified files. .DS_Store diff --git a/.npmrc b/.npmrc deleted file mode 100644 index ae64359..0000000 --- a/.npmrc +++ /dev/null @@ -1 +0,0 @@ -//registry.npmjs.org/:_authToken=${NPM_TOKEN} diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e1bf5a..e5b37bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,9 @@ -## v0.1.1 +## v1.0.0 - Add MIT License to node manifest. -- GitHub Actions for check. +- GitHub Actions for check and dependabot. +- Support all registered wordless languages by default. +- Vitest to validate basic functions. - Readme manifest details. ## v0.1.0 diff --git a/README.md b/README.md index c61c4af..9c5e2ac 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,110 @@ # Markdown-it Wordless -A markdown-it plugin to optimize wordless multi-language space render. +A markdown-it plugin to optimize wordless multi-language line-break render. -When a paragraph is long in markdown, we usually separate them into lines. +When a paragraph is long in markdown, we usually separate them into lines, +and it will finally be rendered into a single line inside HTML. But for wordless languages (such as Chinese and Japanese), -an extra line break will cause an unnecessary white space. -You can definitely set: +they do not use spaces to separate words, +that they don't need a space to be added when processing line-break. + +If you are only working with a single wordless language, +you can definitely use the following code, +which will disable all spaces when line break +(render single `\n` into an empty string rather than a space): ```ts +import md from "markdown-it" md.renderer.rules.softbreak = () => "" ``` -to disable all spaces when line break, -but how about the condition when resolving multi-languages? +But once working with multi-languages, +especially when there's a mix of wordless and wordful languages, +such as using Chinese and English in a single markdown document, +such options cannot handle all cases. +So here comes this `"markdown-it-wordless"` plugin, +and you can use it like this: + +```ts +import md from "markdown-it" +import {Options} from "markdown-it-wordless" +md.use(wordless) +``` + +## Basic functions -You can use this plugin to resolve the problem. -In this plugin, you can config in details -to resolve line break in multi-languages. -For example, when working with Chinese and English, -you can enable the softbreak for English but disable it for Chinese -by following configurations: +1. Wordful languages (such as English and Arabic) will be rendered as usual. +2. It won't add a space when line break between the same wordless language. +3. It will add a space when line break between different wordless languages. +4. Specially, Chinese and Japanese will be treated as a same language, + as there are many shared characters between them, + and their character styles are almost the same. +5. Although Korean characters are like Chinese and Japanese (CJK), + Korean is not a wordless language, it uses spaces to separate words. + +## Use it with VitePress + +[VitePress](https://vitepress.dev) is an excellent static site generator, +and this package is also inspired when the author using VitePress. +It's strongly recommended to add such plugin to VitePress +if you are using wordless languages. And here's how to config: ```ts -import {Options, chineseAndJapanese, wordless} from "markdown-it-wordless" +// /.vitepress/config.ts +import {defineConfig} from "vitepress" +import {wordless} from "markdown-it-wordless" + +export default defineConfig({ + markdown: { + config(md) { + md.use(wordless) + }, + }, + // Other configs... +}) +``` +## Customize to optimize performance + +The default option will enable optimization +for all registered wordless languages inside this package. +If you want to optimize performance, +you can specify what exactly wordless language you are using. +You may also specify what wordful language you are using, +because there's only optimization for wordful languages +which unicode is less than `0x0dff`. + +Here's a simple example +if you will only use Chinese or Japanese as wordless languages: + +```ts +import md from "markdown-it" +import {wordless, chineseAndJapanese, Options} from "markdown-it-wordless" md.use(wordless, {supportWordless: [chineseAndJapanese]}) ``` + +Such optimization is unnecessary in most cases, +because this plugin will not slow down the rendering process a lot +in common cases (only a few milliseconds). +And if you do want to customize, +please make sure you've understand the source code. +Please refer to [`data.ts`](./data.ts) for more details, +and here's documentation for each item in details. + +## About the supported languages + +You can find all supported languages +in the source code of [`data.ts`](./data.ts). +Each language or language series is an exported const +that you can import and call. + +The languages series are based on the [Unicode](https://unicode.org/charts/). +Most of the languages are coded manually and some of them are +generated by several AI models. So that there might be mistakes, +and the author cannot guarantee the accuracy of the data +because it's almost impossible for a single person to learn all such languages. + +If you are native speaker of one of the those wordless languages +and you find there are some mistakes, +or if there's even some wordless languages not included in this package, +please feel free to open an issue. diff --git a/data.ts b/data.ts index fc56f63..57871bf 100644 --- a/data.ts +++ b/data.ts @@ -1,17 +1,30 @@ /** Ensure an array is not empty. */ type NonEmptyArray = [T, ...T[]] -/** A range of unicode numbers, mark its begin and end, the end is included. */ +/** + * A range of unicode numbers, + * mark its begin and end. + * The end is included (using `<=` rather than `<` in source code). + */ export type Range = [number, number] -/** Unicode {@link Range}s of a single language. */ +/** + * Unicode {@link Range}s of a single language. + * It is a non-empty array of {@link Range} + * because a single language might contains multiple ranges in unicode. + */ export type LanguageRanges = NonEmptyArray /** - * The default value is empty, you need to add it manually. - * Parsing wordless languages costs a lot. - * It's strongly recommended to only introduce the required series. - * For example: + * The default value will enable all languages registered inside + * the {@link allWordless} const, and enable optimization for + * {@link commonWords} by default. + * + * If you'd like to customize the support languages to improve performance, + * you can config like the following example: + * The following example only enables wordless languages optimization + * for Chinese and Japanese, all other wordless languages will be omitted. + * * ```ts * import {wordless, chineseAndJapanese, Options} from 'markdown-it-wordless' * md.use(wordless, {supportWordless: [chineseAndJapanese]}) @@ -30,46 +43,6 @@ export type Options = { supportWordless: LanguageRanges[] } -/** - * @param code unicode number of a character. - * @param options {@link Options} for the wordless languages and - * a series of non-wordless languages for optimization. - * @returns Index of the character in the given wordless language series, - * if there's not {@link Range} contains such code, - * it means this is not a character of a wordless language, - * and it will return -1. And if it's an emoji, it will return -2. - */ -export function langIndexOf(code: number, options?: Options): number { - options = { - optimizeWords: options?.optimizeWords ?? [commonWords], - supportWordless: options?.supportWordless ?? [], - } - - // Process optimizations. - for (const ranges of options!.optimizeWords!) { - for (const range of ranges) { - if (code >= range[0] && code <= range[1]) return -1 - } - } - - // Process Emoji. - for (const ranges of emoji) { - for (const range of ranges) { - if (code >= range[0] && code <= range[1]) return -2 - } - } - - // Process wordless language index. - const wordless = options!.supportWordless! - for (let index = 0; index < wordless.length; index++) { - const ranges = wordless[index] - for (const range of ranges) { - if (code >= range[0] && code <= range[1]) return index - } - } - return -1 -} - /** Unicode from zero to 0x0dff, commonly used language with words. */ export const commonWords: LanguageRanges = [[0x0000, 0x0dff]] @@ -104,7 +77,9 @@ export const chineseAndJapanese: LanguageRanges = [ // [0x3040, 0x309f], // 日文平假名/平仮名ひらがな // [0x30a0, 0x30ff], // 日文片假名/片仮名カタカナ [0x3040, 0x30ff], + [0x3100, 0x312f], // 传统拼音注音符号(ㄆㄧㄣ ㄧㄣ) [0x3190, 0x319f], // 甲乙丙丁天地人... + [0x31a0, 0x31bf], // 传统拼音注音字母(ㄆㄧㄣ ㄧㄣ) // [0x31c0, 0x31ef], // 笔画/筆画 // [0x31f0, 0x31ff], // 日文片假名扩展/片仮名カタカナの拡張 @@ -168,12 +143,6 @@ export const xishuangbannaOldDai: LanguageRanges = [[0x1a20, 0x1aaf]] /** 江永女书 */ export const jiangyongWomanScript: LanguageRanges = [[0x1b170, 0x1b2ff]] -/** 旧版拼音 */ -export const oldChinesePinyin: LanguageRanges = [ - [0x3100, 0x312f], - [0x31a0, 0x31bf], -] - /** 契丹小字 */ export const khitanSmallScript: LanguageRanges = [[0x18b00, 0x18cff]] @@ -192,3 +161,62 @@ export const cuneiform: LanguageRanges = [ /** Ancient Egyptian hieroglyphs. */ export const hieroglyphics: LanguageRanges = [[0x13000, 0x1345f]] + +/** Enable optimization for all registered wordless languages. */ +export const allWordless: LanguageRanges[] = [ + chineseAndJapanese, + tibetan, + thai, + lao, + cambodian, + burmese, + yi, + dehongDai, + xishuangbannaNewDai, + xishuangbannaOldDai, + jiangyongWomanScript, + khitanSmallScript, + tangut, + cuneiform, + hieroglyphics, +] + +/** + * @param code unicode number of a character. + * @param options {@link Options} for the wordless languages and + * a series of non-wordless languages for optimization. + * @returns Index of the character in the given wordless language series, + * if there's not {@link Range} contains such code, + * it means this is not a character of a wordless language, + * and it will return -1. And if it's an emoji, it will return -2. + */ +export function langIndexOf(code: number, options?: Options): number { + options = { + optimizeWords: options?.optimizeWords ?? [commonWords], + supportWordless: options?.supportWordless ?? allWordless, + } + + // Process optimizations. + for (const ranges of options!.optimizeWords!) { + for (const range of ranges) { + if (code >= range[0] && code <= range[1]) return -1 + } + } + + // Process Emoji. + for (const ranges of emoji) { + for (const range of ranges) { + if (code >= range[0] && code <= range[1]) return -2 + } + } + + // Process wordless language index. + const wordless = options!.supportWordless! + for (let index = 0; index < wordless.length; index++) { + const ranges = wordless[index] + for (const range of ranges) { + if (code >= range[0] && code <= range[1]) return index + } + } + return -1 +} diff --git a/index.ts b/index.ts index 5118940..0295174 100644 --- a/index.ts +++ b/index.ts @@ -1,4 +1,5 @@ import md from "markdown-it" +import MarkdownIt from "markdown-it/index.mjs" import type {Options} from "./data" import {langIndexOf} from "./data" @@ -9,16 +10,34 @@ export * from "./data" const space = " " /** - * The default {@link Options} contains no wordless languages, - * that you need to add required optimization manually. - * Render wordless languages cost a lot, - * it's recommended to only add required language ranges. + * A markdown-it plugin to optimize wordless multi-language line-break render. + * See [readme](./README.md) of this package for more details. + * Here's the minimal examples on how to use it: * - * For example, if you are using Chinese or Japanese with English, - * you may consider code like this: * ```ts - * import {wordless, chineseAndJapanese, Options} from 'markdown-it-wordless' - * md.use(wordless, {supportWordless: [chineseAndJapanese]}) + * import md from "markdown-it" + * import {wordless} from 'markdown-it-wordless' + * md.use(wordless) + * ``` + * + * ## For VitePress users + * + * If you are using [VitePress](https://vitepress.dev), + * you may config like this: + * + * ```ts + * // /.vitepress/config.ts + * import {defineConfig} from "vitepress" + * import {wordless} from "markdown-it-wordless" + * + * export default defineConfig({ + * markdown: { + * config(md) { + * md.use(wordless) + * }, + * }, + * // Other configs... + * }) * ``` */ export function wordless(md: md, options?: Options) { @@ -31,3 +50,13 @@ export function wordless(md: md, options?: Options) { return before === after && before !== -1 && before != -2 ? "" : space } } + +if (import.meta.vitest) { + const {expect, test} = import.meta.vitest + test("basic function", function () { + const raw = "English\nにほんご\n中文\n中文\nབོད་ཡིག།\nབོད་ཡིག།" + expect(new MarkdownIt().use(wordless).render(raw)).toBe( + "

English にほんご中文中文 བོད་ཡིག།བོད་ཡིག།

\n", + ) + }) +} diff --git a/package.json b/package.json index a1db9ca..db68b2c 100644 --- a/package.json +++ b/package.json @@ -1,15 +1,15 @@ { "name": "markdown-it-wordless", - "description": "A markdown-it plugin for wordless languages optimization.", - "version": "0.1.1", + "description": "A markdown-it plugin for wordless languages line-break.", + "version": "1.0.0", "type": "module", "license": "MIT", + "homepage": "https://github.com/treeinfra/markdown-it-wordless", "author": { "name": "James Aprosail", "email": "aprosail@outlook.com", "url": "https://github.com/aprosail" }, - "homepage": "https://github.com/treeinfra/markdown-it-wordless", "exports": { ".": { "require": "./index.cjs", @@ -21,12 +21,15 @@ "format.check": "prettier . --check", "format": "prettier . --write", "build": "rollup --config rollup.config.js", - "review": "npm run format.check && npm run build" + "review": "npm run format.check && npm test && npm run build", + "prepublishOnly": "npm run review", + "test": "vitest run" }, "dependencies": { "markdown-it": "^14.1.0" }, "devDependencies": { + "@rollup/plugin-replace": "^5.0.7", "@rollup/plugin-terser": "^0.4.4", "@rollup/plugin-typescript": "^11.1.6", "@types/markdown-it": "^14.1.1", @@ -34,6 +37,7 @@ "rollup": "^4.18.0", "rollup-plugin-dts": "^6.1.1", "tslib": "^2.6.3", - "typescript": "^5.5.2" + "typescript": "^5.5.2", + "vitest": "^1.6.0" } } diff --git a/rollup.config.js b/rollup.config.js index db9ee99..2e0bf4e 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -1,3 +1,4 @@ +import replace from "@rollup/plugin-replace" import terser from "@rollup/plugin-terser" import typescript from "@rollup/plugin-typescript" import {defineConfig} from "rollup" @@ -6,12 +7,20 @@ import dts from "rollup-plugin-dts" export default defineConfig([ { plugins: [ + replace({"import.meta.vitest": "undefined", preventAssignment: true}), typescript({ compilerOptions: {allowSyntheticDefaultImports: true}, sourceMap: true, }), terser(), ], + external(id) { + const prefixes = ["markdown-it"] + for (const prefix of prefixes) { + if (id.startsWith(prefix)) return true + } + return false + }, input: "index.ts", output: [ {file: "index.js", format: "esm", sourcemap: true}, diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..4b5d5d8 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "module": "ESNext", + "target": "ESNext", + "esModuleInterop": true, + "types": ["vitest/importMeta"] + }, + "include": ["index.ts"], + "exclude": ["node_modules", "*.js", "*.mjs"] +} diff --git a/vitest.config.ts b/vitest.config.ts new file mode 100644 index 0000000..73bf9ad --- /dev/null +++ b/vitest.config.ts @@ -0,0 +1,2 @@ +import {defineConfig} from "vitest/config" +export default defineConfig({test: {includeSource: ["*.ts"]}})