prepare for stable version

treeinfra · Jun 23, 2024 · f156d44 · f156d44
2 parents efe1092 + 943edc3
commit f156d44
Show file tree

Hide file tree

Showing 11 changed files with 252 additions and 81 deletions.
diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
@@ -0,0 +1,5 @@
+version: 2
+updates:
+  - package-ecosystem: npm
+    directory: "/"
+    schedule: {interval: weekly}
diff --git a/.npmignore b/.npmignore
@@ -9,6 +9,7 @@ yarn.lock
 .gitattributes
 .prettierrc.yaml
 rollup.config.js
+vitest.config.ts
 
 # Platform specified files.
 .DS_Store

diff --git a/.npmrc b/.npmrc
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,9 @@
-## v0.1.1
+## v1.0.0
 
 - Add MIT License to node manifest.
-- GitHub Actions for check.
+- GitHub Actions for check and dependabot.
+- Support all registered wordless languages by default.
+- Vitest to validate basic functions.
 - Readme manifest details.
 
 ## v0.1.0

diff --git a/README.md b/README.md
@@ -1,28 +1,110 @@
 # Markdown-it Wordless
 
-A markdown-it plugin to optimize wordless multi-language space render.
+A markdown-it plugin to optimize wordless multi-language line-break render.
 
-When a paragraph is long in markdown, we usually separate them into lines.
+When a paragraph is long in markdown, we usually separate them into lines,
+and it will finally be rendered into a single line inside HTML.
 But for wordless languages (such as Chinese and Japanese),
-an extra line break will cause an unnecessary white space.
-You can definitely set:
+they do not use spaces to separate words,
+that they don't need a space to be added when processing line-break.
+
+If you are only working with a single wordless language,
+you can definitely use the following code,
+which will disable all spaces when line break
+(render single `\n` into an empty string rather than a space):
 
 ```ts
+import md from "markdown-it"
 md.renderer.rules.softbreak = () => ""
 ```
 
-to disable all spaces when line break,
-but how about the condition when resolving multi-languages?
+But once working with multi-languages,
+especially when there's a mix of wordless and wordful languages,
+such as using Chinese and English in a single markdown document,
+such options cannot handle all cases.
+So here comes this `"markdown-it-wordless"` plugin,
+and you can use it like this:
+
+```ts
+import md from "markdown-it"
+import {Options} from "markdown-it-wordless"
+md.use(wordless)
+```
+
+## Basic functions
 
-You can use this plugin to resolve the problem.
-In this plugin, you can config in details
-to resolve line break in multi-languages.
-For example, when working with Chinese and English,
-you can enable the softbreak for English but disable it for Chinese
-by following configurations:
+1. Wordful languages (such as English and Arabic) will be rendered as usual.
+2. It won't add a space when line break between the same wordless language.
+3. It will add a space when line break between different wordless languages.
+4. Specially, Chinese and Japanese will be treated as a same language,
+   as there are many shared characters between them,
+   and their character styles are almost the same.
+5. Although Korean characters are like Chinese and Japanese (CJK),
+   Korean is not a wordless language, it uses spaces to separate words.
+
+## Use it with VitePress
+
+[VitePress](https://vitepress.dev) is an excellent static site generator,
+and this package is also inspired when the author using VitePress.
+It's strongly recommended to add such plugin to VitePress
+if you are using wordless languages. And here's how to config:
 
 ```ts
-import {Options, chineseAndJapanese, wordless} from "markdown-it-wordless"
+// <root>/.vitepress/config.ts
+import {defineConfig} from "vitepress"
+import {wordless} from "markdown-it-wordless"
+
+export default defineConfig({
+  markdown: {
+    config(md) {
+      md.use(wordless)
+    },
+  },
+  // Other configs...
+})
+```
 
+## Customize to optimize performance
+
+The default option will enable optimization
+for all registered wordless languages inside this package.
+If you want to optimize performance,
+you can specify what exactly wordless language you are using.
+You may also specify what wordful language you are using,
+because there's only optimization for wordful languages
+which unicode is less than `0x0dff`.
+
+Here's a simple example
+if you will only use Chinese or Japanese as wordless languages:
+
+```ts
+import md from "markdown-it"
+import {wordless, chineseAndJapanese, Options} from "markdown-it-wordless"
 md.use<Options>(wordless, {supportWordless: [chineseAndJapanese]})
 ```
+
+Such optimization is unnecessary in most cases,
+because this plugin will not slow down the rendering process a lot
+in common cases (only a few milliseconds).
+And if you do want to customize,
+please make sure you've understand the source code.
+Please refer to [`data.ts`](./data.ts) for more details,
+and here's documentation for each item in details.
+
+## About the supported languages
+
+You can find all supported languages
+in the source code of [`data.ts`](./data.ts).
+Each language or language series is an exported const
+that you can import and call.
+
+The languages series are based on the [Unicode](https://unicode.org/charts/).
+Most of the languages are coded manually and some of them are
+generated by several AI models. So that there might be mistakes,
+and the author cannot guarantee the accuracy of the data
+because it's almost impossible for a single person to learn all such languages.
+
+If you are native speaker of one of the those wordless languages
+and you find there are some mistakes,
+or if there's even some wordless languages not included in this package,
+please feel free to open an issue.
diff --git a/data.ts b/data.ts
@@ -1,17 +1,30 @@
 /** Ensure an array is not empty. */
 type NonEmptyArray<T> = [T, ...T[]]
 
-/** A range of unicode numbers, mark its begin and end, the end is included. */
+/**
+ * A range of unicode numbers,
+ * mark its begin and end.
+ * The end is included (using `<=` rather than `<` in source code).
+ */
 export type Range = [number, number]
 
-/** Unicode {@link Range}s of a single language. */
+/**
+ * Unicode {@link Range}s of a single language.
+ * It is a non-empty array of {@link Range}
+ * because a single language might contains multiple ranges in unicode.
+ */
 export type LanguageRanges = NonEmptyArray<Range>
 
 /**
- * The default value is empty, you need to add it manually.
- * Parsing wordless languages costs a lot.
- * It's strongly recommended to only introduce the required series.
- * For example:
+ * The default value will enable all languages registered inside
+ * the {@link allWordless} const, and enable optimization for
+ * {@link commonWords} by default.
+ *
+ * If you'd like to customize the support languages to improve performance,
+ * you can config like the following example:
+ * The following example only enables wordless languages optimization
+ * for Chinese and Japanese, all other wordless languages will be omitted.
+ *
  * ```ts
  * import {wordless, chineseAndJapanese, Options} from 'markdown-it-wordless'
  * md.use<Options>(wordless, {supportWordless: [chineseAndJapanese]})
@@ -30,46 +43,6 @@ export type Options = {
   supportWordless: LanguageRanges[]
 }
 
-/**
- * @param code unicode number of a character.
- * @param options {@link Options} for the wordless languages and
- * a series of non-wordless languages for optimization.
- * @returns Index of the character in the given wordless language series,
- * if there's not {@link Range} contains such code,
- * it means this is not a character of a wordless language,
- * and it will return -1. And if it's an emoji, it will return -2.
- */
-export function langIndexOf(code: number, options?: Options): number {
-  options = {
-    optimizeWords: options?.optimizeWords ?? [commonWords],
-    supportWordless: options?.supportWordless ?? [],
-  }
-
-  // Process optimizations.
-  for (const ranges of options!.optimizeWords!) {
-    for (const range of ranges) {
-      if (code >= range[0] && code <= range[1]) return -1
-    }
-  }
-
-  // Process Emoji.
-  for (const ranges of emoji) {
-    for (const range of ranges) {
-      if (code >= range[0] && code <= range[1]) return -2
-    }
-  }
-
-  // Process wordless language index.
-  const wordless = options!.supportWordless!
-  for (let index = 0; index < wordless.length; index++) {
-    const ranges = wordless[index]
-    for (const range of ranges) {
-      if (code >= range[0] && code <= range[1]) return index
-    }
-  }
-  return -1
-}
-
 /** Unicode from zero to 0x0dff, commonly used language with words. */
 export const commonWords: LanguageRanges = [[0x0000, 0x0dff]]
 
@@ -104,7 +77,9 @@ export const chineseAndJapanese: LanguageRanges = [
   // [0x3040, 0x309f], // 日文平假名/平仮名ひらがな
   // [0x30a0, 0x30ff], // 日文片假名/片仮名カタカナ
   [0x3040, 0x30ff],
+  [0x3100, 0x312f], // 传统拼音注音符号(ㄆㄧㄣ ㄧㄣ)
   [0x3190, 0x319f], // 甲乙丙丁天地人...
+  [0x31a0, 0x31bf], // 传统拼音注音字母(ㄆㄧㄣ ㄧㄣ)
 
   // [0x31c0, 0x31ef], // 笔画/筆画
   // [0x31f0, 0x31ff], // 日文片假名扩展/片仮名カタカナの拡張
@@ -168,12 +143,6 @@ export const xishuangbannaOldDai: LanguageRanges = [[0x1a20, 0x1aaf]]
 /** 江永女书 */
 export const jiangyongWomanScript: LanguageRanges = [[0x1b170, 0x1b2ff]]
 
-/** 旧版拼音 */
-export const oldChinesePinyin: LanguageRanges = [
-  [0x3100, 0x312f],
-  [0x31a0, 0x31bf],
-]
-
 /** 契丹小字 */
 export const khitanSmallScript: LanguageRanges = [[0x18b00, 0x18cff]]
 
@@ -192,3 +161,62 @@ export const cuneiform: LanguageRanges = [
 
 /** Ancient Egyptian hieroglyphs. */
 export const hieroglyphics: LanguageRanges = [[0x13000, 0x1345f]]
+
+/** Enable optimization for all registered wordless languages. */
+export const allWordless: LanguageRanges[] = [
+  chineseAndJapanese,
+  tibetan,
+  thai,
+  lao,
+  cambodian,
+  burmese,
+  yi,
+  dehongDai,
+  xishuangbannaNewDai,
+  xishuangbannaOldDai,
+  jiangyongWomanScript,
+  khitanSmallScript,
+  tangut,
+  cuneiform,
+  hieroglyphics,
+]
+
+/**
+ * @param code unicode number of a character.
+ * @param options {@link Options} for the wordless languages and
+ * a series of non-wordless languages for optimization.
+ * @returns Index of the character in the given wordless language series,
+ * if there's not {@link Range} contains such code,
+ * it means this is not a character of a wordless language,
+ * and it will return -1. And if it's an emoji, it will return -2.
+ */
+export function langIndexOf(code: number, options?: Options): number {
+  options = {
+    optimizeWords: options?.optimizeWords ?? [commonWords],
+    supportWordless: options?.supportWordless ?? allWordless,
+  }
+
+  // Process optimizations.
+  for (const ranges of options!.optimizeWords!) {
+    for (const range of ranges) {
+      if (code >= range[0] && code <= range[1]) return -1
+    }
+  }
+
+  // Process Emoji.
+  for (const ranges of emoji) {
+    for (const range of ranges) {
+      if (code >= range[0] && code <= range[1]) return -2
+    }
+  }
+
+  // Process wordless language index.
+  const wordless = options!.supportWordless!
+  for (let index = 0; index < wordless.length; index++) {
+    const ranges = wordless[index]
+    for (const range of ranges) {
+      if (code >= range[0] && code <= range[1]) return index
+    }
+  }
+  return -1
+}
diff --git a/index.ts b/index.ts
@@ -1,4 +1,5 @@
 import md from "markdown-it"
+import MarkdownIt from "markdown-it/index.mjs"
 
 import type {Options} from "./data"
 import {langIndexOf} from "./data"
@@ -9,16 +10,34 @@ export * from "./data"
 const space = " "
 
 /**
- * The default {@link Options} contains no wordless languages,
- * that you need to add required optimization manually.
- * Render wordless languages cost a lot,
- * it's recommended to only add required language ranges.
+ * A markdown-it plugin to optimize wordless multi-language line-break render.
+ * See [readme](./README.md) of this package for more details.
+ * Here's the minimal examples on how to use it:
  *
- * For example, if you are using Chinese or Japanese with English,
- * you may consider code like this:
  * ```ts
- * import {wordless, chineseAndJapanese, Options} from 'markdown-it-wordless'
- * md.use<Options>(wordless, {supportWordless: [chineseAndJapanese]})
+ * import md from "markdown-it"
+ * import {wordless} from 'markdown-it-wordless'
+ * md.use(wordless)
+ * ```
+ *
+ * ## For VitePress users
+ *
+ * If you are using [VitePress](https://vitepress.dev),
+ * you may config like this:
+ *
+ * ```ts
+ * // <root>/.vitepress/config.ts
+ * import {defineConfig} from "vitepress"
+ * import {wordless} from "markdown-it-wordless"
+ *
+ * export default defineConfig({
+ *   markdown: {
+ *     config(md) {
+ *       md.use(wordless)
+ *     },
+ *   },
+ *   // Other configs...
+ * })
  * ```
  */
 export function wordless(md: md, options?: Options) {
@@ -31,3 +50,13 @@ export function wordless(md: md, options?: Options) {
     return before === after && before !== -1 && before != -2 ? "" : space
   }
 }
+
+if (import.meta.vitest) {
+  const {expect, test} = import.meta.vitest
+  test("basic function", function () {
+    const raw = "English\nにほんご\n中文\n中文\nབོད་ཡིག།\nབོད་ཡིག།"
+    expect(new MarkdownIt().use(wordless).render(raw)).toBe(
+      "<p>English にほんご中文中文 བོད་ཡིག།བོད་ཡིག།</p>\n",
+    )
+  })
+}