diff --git a/.vscode/settings.json b/.vscode/settings.json index 4c96c16..42b0849 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,7 +8,7 @@ "**/package-lock.json": true, "package.json.lerna_backup": true, "packages/**/node_modules": true, - "packages/**/tsconfig.json": true + // "packages/**/tsconfig.json": true }, "editor.wordSeparators": "`~!@#%^&*()-=+[{]}\\|;:'\",.<>/?", } \ No newline at end of file diff --git a/build/publish.js b/build/publish.js index 42b6224..caa4999 100644 --- a/build/publish.js +++ b/build/publish.js @@ -4,7 +4,7 @@ const chalk = require('chalk') const semver = require('semver') const cp = require('child_process') const program = require('commander') -const { exec, execSync } = require('./util') +const { exec } = require('./util') function toVersion(version) { return `${version.major}.${version.minor}.${version.patch}` @@ -13,12 +13,11 @@ function toVersion(version) { class Package { constructor(name) { this.current = require(`../packages/${name}/package.json`) - this.previous = JSON.parse(cp.execSync(`git show HEAD:packages/${name}/package.json`).toString('utf8')) + this.previous = JSON.parse(cp.execSync(`git show HEAD:packages/${name}/package.json`).toString().trim()) this.major = semver.major(this.previous.version) this.minor = semver.minor(this.previous.version) this.patch = semver.patch(this.previous.version) this.newVersion = this.current.version - delete this.current.gitHead } bump(flag) { @@ -48,10 +47,10 @@ packageNames.forEach(name => packages[name] = new Package(name)) program .usage('[options] [names...]') .option('-a, --all') - .option('-M, --major') - .option('-m, --minor') - .option('-p, --patch') - .option('-N, --no-npm') + .option('-1, --major') + .option('-2, --minor') + .option('-3, --patch') + .option('-p, --publish') .parse(process.argv) const flag = program.major ? 'major' : program.minor ? 'minor' : 'patch' @@ -83,25 +82,30 @@ ${chalk.cyanBright(packages[name].newVersion)}`) let counter = 0, promise = Promise.resolve(), failed = false -if (program.npm) { - packageNames.forEach((name) => { - if (packages[name].newVersion !== packages[name].previous.version) { - if (packages[name].current.private) return - const npmVersion = execSync(`npm show ${packages[name].current.name} version`) - if (semver.gte(npmVersion, packages[name].newVersion)) return - counter += 1 - fs.writeFileSync( - path.join(__dirname, `../packages/${name}/package.json`), - JSON.stringify(packages[name], null, 2), - ) - promise = promise.then((code) => { - failed = failed || code - return exec(`cd packages/${name} && npm publish`) - }) - } - }) +if (program.publish) { + console.log('\nWaiting for packages to publish ...') } +packageNames.forEach((name) => { + if (packages[name].newVersion !== packages[name].previous.version) { + fs.writeFileSync( + path.join(__dirname, `../packages/${name}/package.json`), + JSON.stringify(packages[name], null, 2), + ) + if (packages[name].current.private || !program.publish) return + const npmVersion = cp.execSync(`npm show ${packages[name].current.name} version`).toString().trim() + if (semver.gte(npmVersion, packages[name].newVersion)) return + console.log(` - ${name} (${packages[name].current.name}): \ +${chalk.green(npmVersion)} => \ +${chalk.greenBright(packages[name].newVersion)}`) + counter += 1 + promise = promise.then((code) => { + failed = failed || code + return exec(`cd packages/${name} && npm publish`) + }) + } +}) + promise.then(() => { if (!counter) { console.log('No packages to publish.') diff --git a/packages/cli/package.json b/packages/cli/package.json index 29f6b57..0acb1ba 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@marklet/cli", - "version": "1.1.4", + "version": "1.1.5", "description": "A command line interface for marklet.", "author": "jjyyxx <1449843302@qq.com>", "contributors": [ @@ -19,8 +19,8 @@ "url": "https://github.com/obstudio/Marklet/issues" }, "dependencies": { - "@marklet/dev-server": "^1.0.11", - "@marklet/parser": "^1.0.4", + "@marklet/dev-server": "^1.0.12", + "@marklet/parser": "^1.1.0", "chalk": "^2.4.1", "commander": "^2.18.0", "js-yaml": "^3.12.0" diff --git a/packages/core/package.json b/packages/core/package.json index 6a5da86..c294c16 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,7 +1,7 @@ { "name": "@marklet/core", - "version": "1.0.9", - "description": "A lexer for marklet.", + "version": "2.0.0", + "description": "Some core conceptions of marklet.", "author": "shigma <1700011071@pku.edu.cn>", "contributors": [ "jjyyxx <1449843302@qq.com>" diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 1c481e1..9f71c60 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -1,27 +1,9 @@ -type ResultMap any>> = { - [key in keyof T]: ReturnType -} +export type StringLike = string | RegExp -type StringLike = string | RegExp -type Capture = RegExpExecArray & ResultMap -type GetterFunction = (this: Lexer, capture: RegExpExecArray) => any -type GetterFunctionMap = Record -export interface LexerConfig { [key: string]: any } -export interface LexerOptions { - /** lexer capture getters */ - getters?: GetterFunctionMap - /** lexer rule regex macros */ - macros?: Record - /** entrance context */ - entrance?: string - /** default context */ - default?: string - /** assign start/end to tokens */ - requireBound?: boolean - /** other configurations */ - config?: LexerConfig -} +export type LexerConfig = Record +export type LexerMacros = Record +export type TokenLike = string | LexerToken export interface LexerToken { type?: string text?: string @@ -31,9 +13,18 @@ export interface LexerToken { [key: string]: any } -export type TokenLike = string | LexerToken -interface LexerIncludeRule { include: string } -interface LexerRegexRule { +export type LexerRule< + S extends StringLike = RegExp, + T extends LexerInstance = LexerInstance, + R extends RegExpExecArray = RegExpExecArray, +> = LexerIncludeRule | LexerRegexRule + +export interface LexerIncludeRule { include: string } +export interface LexerRegexRule< + S extends StringLike = RegExp, + T extends LexerInstance = LexerInstance, + R extends RegExpExecArray = RegExpExecArray, +> { /** the regular expression to execute */ regex?: S /** @@ -42,23 +33,24 @@ interface LexerRegexRule { * - `e`: match end of line * - `i`: ignore case * - `p`: pop from the current context + * - `s`: pop when no match is found * - `t`: match top level context */ flags?: string /** default type of the token */ type?: string /** whether the rule is to be executed */ - test?: string | boolean | ((config: LexerConfig) => boolean) + test?: string | boolean | ((this: T, config: LexerConfig) => boolean) /** a result token */ token?: TokenLike | TokenLike[] | (( - this: Lexer, capture: Capture, content: TokenLike[], rule: this + this: T, capture: R, content: TokenLike[] ) => TokenLike | TokenLike[]) /** the inner context */ - push?: string | LexerRule[] | (( - this: Lexer, capture: Capture - ) => string | LexerRule[] | false) + push?: string | LexerRule[] /** pop from the current context */ pop?: boolean + /** pop when no match is found */ + strict?: boolean /** match when the context begins */ context_begins?: boolean /** match top level context */ @@ -69,221 +61,58 @@ interface LexerRegexRule { eol?: boolean } -interface LexerWarning { - message: string -} - -type LexerContext = string | NativeLexerRule[] -type LexerRule = LexerRegexRule | LexerIncludeRule -type LooseLexerRule = LexerRule -type NativeLexerRule = LexerRule -export type LexerRules = Record - -function getString(string: StringLike): string { +/** Transform a string-like object into a raw string. */ +export function getString(string: StringLike): string { return string instanceof RegExp ? string.source : string } -export class Lexer { - config: LexerConfig - private rules: Record = {} - private getters: GetterFunctionMap - private entrance: string - private default: string - private requireBound: boolean - private _warnings: LexerWarning[] - private _isRunning: boolean = false - - constructor(rules: LexerRules, options: LexerOptions = {}) { - this.getters = options.getters || {} - this.config = options.config || {} - this.entrance = options.entrance || 'main' - this.default = options.default || 'text' - this.requireBound = !!options.requireBound - - const _macros = options.macros || {} - const macros: Record = {} - for (const key in _macros) { - macros[key] = getString(_macros[key]) +export function parseRule(rule: LexerRule, macros: LexerMacros = {}): LexerRule { + if (!('include' in rule)) { + if (rule.regex === undefined) { + rule.regex = /(?=[\s\S])/ + if (!rule.type) rule.type = 'default' } - - function resolve(rule: LooseLexerRule): NativeLexerRule { - if (!('include' in rule)) { - if (rule.regex === undefined) { - rule.regex = /(?=[\s\S])/ - if (!rule.type) rule.type = 'default' - } - if (rule.test === undefined) rule.test = true - let src = getString(rule.regex) - let flags = '' - for (const key in macros) { - src = src.replace(new RegExp(`{{${key}}}`, 'g'), `(?:${macros[key]})`) - } - rule.flags = rule.flags || '' - if (rule.flags.replace(/[biept]/g, '')) { - throw new Error(`'${rule.flags}' contains invalid rule flags.`) - } - if (rule.flags.includes('p')) rule.pop = true - if (rule.flags.includes('b')) rule.context_begins = true - if (rule.flags.includes('t')) rule.top_level = true - if (rule.flags.includes('e') || rule.eol) src += ' *(?:\\n+|$)' - if (rule.flags.includes('i') || rule.ignore_case) flags += 'i' - rule.regex = new RegExp('^(?:' + src + ')', flags) - if (rule.push instanceof Array) rule.push.forEach(resolve) - } - return rule + if (rule.test === undefined) rule.test = true + let src = getString(rule.regex) + let flags = '' + for (const key in macros) { + src = src.replace(new RegExp(`{{${key}}}`, 'g'), `(?:${macros[key]})`) } - - for (const key in rules) { - this.rules[key] = rules[key].map(resolve) + rule.flags = rule.flags || '' + if (rule.flags.replace(/[biepst]/g, '')) { + throw new Error(`'${rule.flags}' contains invalid rule flags.`) } + if (rule.flags.includes('p')) rule.pop = true + if (rule.flags.includes('s')) rule.strict = true + if (rule.flags.includes('b')) rule.context_begins = true + if (rule.flags.includes('t')) rule.top_level = true + if (rule.flags.includes('e') || rule.eol) src += ' *(?:\\n+|$)' + if (rule.flags.includes('i') || rule.ignore_case) flags += 'i' + rule.regex = new RegExp('^(?:' + src + ')', flags) + if (rule.push instanceof Array) rule.push.forEach(_rule => parseRule(_rule, macros)) } + return rule as LexerRule +} - private getContext(context: LexerContext): LexerRegexRule[] { - const result = typeof context === 'string' ? this.rules[context] : context - if (!result) throw new Error(`Context '${context}' was not found.`) - for (let i = result.length - 1; i >= 0; i -= 1) { - const rule: NativeLexerRule = result[i] - if ('include' in rule) { - result.splice(i, 1, ...this.getContext(rule.include)) - } - } - return []>result - } - - private _parse(source: string, context: LexerContext, isTopLevel: boolean = false): { - index: number - result: TokenLike[] - warnings: LexerWarning[] - } { - let index = 0, unmatch = '' - const result: TokenLike[] = [] - const rules = this.getContext(context) - const warnings: LexerWarning[] = this._warnings = [] - source = source.replace(/\r\n/g, '\n') - while (source) { - /** - * Matching status: - * 0. No match was found - * 1. Found match and continue - * 2. Found match and pop - */ - let status = 0 - for (const rule of rules) { - if (rule.top_level && !isTopLevel) continue - if (rule.context_begins && index) continue - - // test - let test = rule.test - if (typeof test === 'string') { - if (test.charAt(0) === '!') { - test = !this.config[test.slice(1)] - } else { - test = this.config[test] - } - } else if (typeof test === 'function') { - test = test.call(this, this.config) - } - if (!test) continue - - // regex - const capture = rule.regex.exec(source) - if (!capture) continue - source = source.slice(capture[0].length) - const start = index - index += capture[0].length - - // pop - const pop = rule.pop - status = pop ? 2 : 1 - - // push - let content: TokenLike[] = [], push = rule.push - if (typeof push === 'function') push = push.call(this, capture) - if (push) { - const subtoken = this._parse(source, push) - content = subtoken.result.map((tok) => { - if (this.requireBound && typeof tok === 'object') { - tok.start += index - tok.end += index - } - return tok - }) - warnings.concat(subtoken.warnings) - source = source.slice(subtoken.index) - index += subtoken.index - } - - // detect error - if (!pop && index === start) { - throw new Error(`Endless loop at '${ - source.slice(0, 10) - } ${ - source.length > 10 ? '...' : '' - }'.`) - } - - // resolve unmatch - if (unmatch) { - result.push(unmatch) - unmatch = '' - } - - // token - let token = rule.token - if (typeof token === 'function') { - for (const key in this.getters) { // redundant define led to some efficiency loss, consider monkey-patch RegExpExecArray or try other solutions? - Object.defineProperty(capture, key, { - get: () => this.getters[key].call(this, capture) - }) - } - token = token.call(this, capture, content) - } else if (token === undefined) { - if (push) { - token = content - } else if (!pop) { - token = capture[0] - } - } - if (token instanceof Array) token = { content: token } - if (token) { - if (typeof token === 'object') { - token.type = token.type || rule.type - if (this.requireBound) { - token.start = start - token.end = index - } - } - result.push(token) - } - - break - } - - if (!status) { - unmatch += source.charAt(0) - source = source.slice(1) - index += 1 - } - if (status === 2) break - } +export interface LexerInstance { + config: LexerConfig + parse(source: string): any +} - if (unmatch) result.push(unmatch) - return { index, result, warnings } - } +export interface InlineLexerResult { + index: number + output: string +} - pushWarning(message: string) { - this._warnings.push({ message }) - } +export interface InlineLexerInstance extends LexerInstance { + parse(source: string): InlineLexerResult +} - parse(source: string, context?: string): TokenLike[] { - let result - if (this._isRunning) { - result = this._parse(source, context || this.default).result - } else { - this._isRunning = true - result = this._parse(source, context || this.entrance, true).result - this._isRunning = false - } - return result - } -} \ No newline at end of file +export enum MatchStatus { + /** No match was found */ + NO_MATCH, + /** Found match and continue */ + CONTINUE, + /** Found match and pop */ + POP, +} diff --git a/packages/core/tsconfig.json b/packages/core/tsconfig.json index e305f8d..8faa3c3 100644 --- a/packages/core/tsconfig.json +++ b/packages/core/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "../../tsconfig.json", + "extends": "../../tsconfig.base.json", "include": [ "src" ], diff --git a/packages/detok/package.json b/packages/detok/package.json index 4284baa..dfdb3af 100644 --- a/packages/detok/package.json +++ b/packages/detok/package.json @@ -1,6 +1,6 @@ { "name": "@marklet/detok", - "version": "1.0.9", + "version": "1.0.10", "description": "A detokenizer for marklet.", "author": "jjyyxx <1449843302@qq.com>", "contributors": [ @@ -24,6 +24,6 @@ "cheerio": "^1.0.0-rc.2" }, "devDependencies": { - "@marklet/core": "^1.0.4" + "@marklet/core": "^2.0.0" } } \ No newline at end of file diff --git a/packages/detok/tsconfig.json b/packages/detok/tsconfig.json index 4e48185..9677f08 100644 --- a/packages/detok/tsconfig.json +++ b/packages/detok/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "../../tsconfig.json", + "extends": "../../tsconfig.base.json", "include": [ "src" ], diff --git a/packages/dev-server/package.json b/packages/dev-server/package.json index 23a25ae..4b2c846 100644 --- a/packages/dev-server/package.json +++ b/packages/dev-server/package.json @@ -1,6 +1,6 @@ { "name": "@marklet/dev-server", - "version": "1.0.11", + "version": "1.0.12", "description": "A develop server for marklet.", "author": "jjyyxx <1449843302@qq.com>", "contributors": [ @@ -21,7 +21,7 @@ "url": "https://github.com/obstudio/Marklet/issues" }, "dependencies": { - "@marklet/parser": "^1.0.4", + "@marklet/parser": "^1.1.0", "@marklet/renderer": "^1.1.2", "vue": "^2.5.17", "ws": "^6.0.0" diff --git a/packages/dev-server/tsconfig.json b/packages/dev-server/tsconfig.json index 59a996a..30874db 100644 --- a/packages/dev-server/tsconfig.json +++ b/packages/dev-server/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "../../tsconfig.json", + "extends": "../../tsconfig.base.json", "include": [ "src" ], diff --git a/packages/inline/package.json b/packages/inline/package.json new file mode 100644 index 0000000..e6038e5 --- /dev/null +++ b/packages/inline/package.json @@ -0,0 +1,26 @@ +{ + "name": "@marklet/inline", + "version": "1.0.0", + "description": "A fast inline lexer for marklet.", + "author": "shigma <1700011071@pku.edu.cn>", + "contributors": [ + "jjyyxx <1449843302@qq.com>" + ], + "homepage": "https://github.com/obstudio/Marklet", + "license": "MIT", + "main": "dist/index.js", + "typings": "dist/index.d.ts", + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/obstudio/Marklet.git" + }, + "bugs": { + "url": "https://github.com/obstudio/Marklet/issues" + }, + "dependencies": { + "@marklet/core": "^2.0.0" + } +} diff --git a/packages/inline/src/index.ts b/packages/inline/src/index.ts new file mode 100644 index 0000000..b52b68d --- /dev/null +++ b/packages/inline/src/index.ts @@ -0,0 +1,112 @@ +import { + StringLike, + LexerConfig, + LexerRegexRule, + InlineLexerInstance, + InlineLexerResult, + MatchStatus, + parseRule, +} from '@marklet/core' + +class InlineCapture extends Array implements RegExpExecArray { + index: number + input: string + lexer: InlineLexer + + constructor(lexer: InlineLexer, array: RegExpExecArray) { + super(...array) + this.lexer = lexer + this.index = array.index + this.input = array.input + } + + get inner(): string { + const match = this.reverse().find(item => !!item) + return match ? this.lexer.parse(match).output : '' + } +} + +type InlineLexerRule = LexerRegexRule + +export type InlineLexerRules = InlineLexerRule[] + +export class InlineLexer implements InlineLexerInstance { + config: LexerConfig + private rules: InlineLexerRule[] + + constructor(rules: InlineLexerRules, config: LexerConfig = {}) { + this.rules = rules.map(rule => parseRule(rule) as InlineLexerRule) + this.config = config || {} + } + + private _parse(source: string): InlineLexerResult { + let index = 0, unmatch = '', output = '' + while (source) { + let status: MatchStatus = MatchStatus.NO_MATCH + for (const rule of this.rules) { + if (rule.context_begins && index) continue + + // test + let test = rule.test + if (typeof test === 'string') { + if (test.charAt(0) === '!') { + test = !this.config[test.slice(1)] + } else { + test = !!this.config[test] + } + } else if (typeof test === 'function') { + test = test.call(this, this.config) + } + if (!test) continue + + // regex + const match = rule.regex.exec(source) + if (!match) continue + if (!match[0].length && !rule.pop) { + throw new Error(`Endless loop at '${ + source.slice(0, 10) + } ${ + source.length > 10 ? '...' : '' + }'.`) + } + const capture = new InlineCapture(this, match) + source = source.slice(capture[0].length) + index += capture[0].length + + // pop + status = rule.pop ? MatchStatus.POP : MatchStatus.CONTINUE + + // resolve unmatch + if (unmatch) { + output += unmatch + unmatch = '' + } + + // token + let token = rule.token + if (typeof token === 'function') { + token = token.call(this, capture) + } else if (token === undefined) { + token = capture[0] + } + output += token + + break + } + + if (status === MatchStatus.POP) break + if (status === MatchStatus.NO_MATCH) { + unmatch += source.charAt(0) + source = source.slice(1) + index += 1 + } + } + + if (unmatch) output += unmatch + return { index, output } + } + + parse(source: string): InlineLexerResult { + return this._parse(source.replace(/\r\n/g, '\n')) + } +} \ No newline at end of file diff --git a/packages/inline/tsconfig.json b/packages/inline/tsconfig.json new file mode 100644 index 0000000..9677f08 --- /dev/null +++ b/packages/inline/tsconfig.json @@ -0,0 +1,13 @@ +{ + "extends": "../../tsconfig.base.json", + "include": [ + "src" + ], + "compilerOptions": { + "outDir": "dist", + "rootDir": "src" + }, + "references": [ + { "path": "../core" } + ] +} \ No newline at end of file diff --git a/packages/lexer/package.json b/packages/lexer/package.json new file mode 100644 index 0000000..5cf1771 --- /dev/null +++ b/packages/lexer/package.json @@ -0,0 +1,26 @@ +{ + "name": "@marklet/lexer", + "version": "1.0.10", + "description": "A document lexer for marklet.", + "author": "shigma <1700011071@pku.edu.cn>", + "contributors": [ + "jjyyxx <1449843302@qq.com>" + ], + "homepage": "https://github.com/obstudio/Marklet", + "license": "MIT", + "main": "dist/index.js", + "typings": "dist/index.d.ts", + "files": [ + "dist" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/obstudio/Marklet.git" + }, + "bugs": { + "url": "https://github.com/obstudio/Marklet/issues" + }, + "dependencies": { + "@marklet/core": "^2.0.0" + } +} \ No newline at end of file diff --git a/packages/lexer/src/index.ts b/packages/lexer/src/index.ts new file mode 100644 index 0000000..27faa37 --- /dev/null +++ b/packages/lexer/src/index.ts @@ -0,0 +1,216 @@ +import { + StringLike, + LexerMacros, + LexerConfig, + LexerRule, + LexerInstance, + LexerRegexRule, + InlineLexerInstance, + TokenLike, + MatchStatus, + parseRule, + getString, +} from '@marklet/core' + +export interface LexerOptions { + /** lexer rule regex macros */ + macros?: LexerMacros + /** entrance context */ + entrance?: string + /** default inline context */ + inlineEntrance?: string + /** assign start/end to tokens */ + requireBound?: boolean + /** other configurations */ + config?: LexerConfig +} + +type NativeLexerContext = LexerRegexRule[] | InlineLexerInstance +export type LexerContexts = Record[] | InlineLexerInstance> + +interface LexerResult { + index: number + result: TokenLike[] +} + +export class Lexer implements LexerInstance { + config: LexerConfig + private contexts: Record = {} + private entrance: string + private inlineEntrance: string + private requireBound: boolean + + constructor(contexts: LexerContexts, options: LexerOptions = {}) { + this.config = options.config || {} + this.entrance = options.entrance || 'main' + this.inlineEntrance = options.inlineEntrance || 'text' + this.requireBound = !!options.requireBound + + const _macros = options.macros || {} + const macros: Record = {} + for (const key in _macros) { + macros[key] = getString(_macros[key]) + } + for (const key in contexts) { + const context = contexts[key] + this.contexts[key] = context instanceof Array + ? context.map(rule => parseRule(rule, macros)) + : context + } + } + + private getContext(context: string | InlineLexerInstance | LexerRule[], strictMode?: boolean) { + const result = typeof context === 'string' ? this.contexts[context] : context + if (!result) throw new Error(`Context '${context}' was not found.`) + if (result instanceof Array) { + for (let i = result.length - 1; i >= 0; i -= 1) { + const rule: LexerRule = result[i] + if ('include' in rule) { + const includes = this.getContext(rule.include) + if (includes instanceof Array) { + result.splice(i, 1, ...includes) + } else { + result.splice(i, 1, { + regex: /^(?=[\s\S])/, + push: rule.include, + strict: true, + }) + } + } + } + if (strictMode) { + result.push({ + regex: /^(?=[\s\S])/, + pop: true, + }) + } + } + return result as NativeLexerContext + } + + private _parse(source: string, context: NativeLexerContext, isTopLevel?: boolean): LexerResult { + let index = 0, unmatch = '' + const result: TokenLike[] = [] + + // apply inline lexer + if (!(context instanceof Array)) { + const result = context.parse(source) + return { + index: result.index, + result: [result.output], + } + } + + while (source) { + let status: MatchStatus = MatchStatus.NO_MATCH + for (const rule of context) { + if (rule.top_level && !isTopLevel) continue + if (rule.context_begins && index) continue + + // test + let test = rule.test + if (typeof test === 'string') { + if (test.charAt(0) === '!') { + test = !this.config[test.slice(1)] + } else { + test = !!this.config[test] + } + } else if (typeof test === 'function') { + test = test.call(this, this.config) + } + if (!test) continue + + // regex + const capture = rule.regex.exec(source) + if (!capture) continue + source = source.slice(capture[0].length) + const start = index + index += capture[0].length + + // pop + const pop = rule.pop + status = pop ? MatchStatus.POP : MatchStatus.CONTINUE + + // push + let content: TokenLike[] = [], push = rule.push + if (push) { + const context = this.getContext(push, rule.strict) + const subtoken = this._parse(source, context) + content = subtoken.result.map((tok) => { + if (this.requireBound && typeof tok === 'object') { + tok.start += index + tok.end += index + } + return tok + }) + source = source.slice(subtoken.index) + index += subtoken.index + } + + // detect error + if (!pop && index === start) { + throw new Error(`Endless loop at '${ + source.slice(0, 10) + } ${ + source.length > 10 ? '...' : '' + }'.`) + } + + // resolve unmatch + if (unmatch) { + result.push(unmatch) + unmatch = '' + } + + // token + let token = rule.token + if (typeof token === 'function') { + token = token.call(this, capture, content) + } else if (token === undefined) { + if (push) { + token = content + } else if (!pop) { + token = capture[0] + } + } + if (token instanceof Array) token = { content: token } + if (token) { + if (typeof token === 'object') { + token.type = token.type || rule.type + if (this.requireBound) { + token.start = start + token.end = index + } + } + result.push(token) + } + + break + } + + if (status === MatchStatus.POP) break + if (status === MatchStatus.NO_MATCH) { + unmatch += source.charAt(0) + source = source.slice(1) + index += 1 + } + } + + if (unmatch) result.push(unmatch) + return { index, result } + } + + inline(source: string, context: string = this.inlineEntrance): string { + const inlineContext = this.getContext(context) + if (inlineContext instanceof Array) { + throw new Error(`'${context}' is not a inline context.`) + } + return inlineContext.parse(source).output + } + + parse(source: string, context: string = this.entrance): TokenLike[] { + const initialContext = this.getContext(context) + source = source.replace(/\r\n/g, '\n') + return this._parse(source, initialContext, true).result + } +} diff --git a/packages/lexer/tsconfig.json b/packages/lexer/tsconfig.json new file mode 100644 index 0000000..9677f08 --- /dev/null +++ b/packages/lexer/tsconfig.json @@ -0,0 +1,13 @@ +{ + "extends": "../../tsconfig.base.json", + "include": [ + "src" + ], + "compilerOptions": { + "outDir": "dist", + "rootDir": "src" + }, + "references": [ + { "path": "../core" } + ] +} \ No newline at end of file diff --git a/packages/marklet/package.json b/packages/marklet/package.json index 2a64db9..e9262d3 100644 --- a/packages/marklet/package.json +++ b/packages/marklet/package.json @@ -1,6 +1,6 @@ { "name": "markletjs", - "version": "1.1.12", + "version": "1.1.13", "description": "A markup language designed for API manual pages.", "author": "jjyyxx <1449843302@qq.com>", "contributors": [ @@ -28,8 +28,8 @@ "url": "https://github.com/obstudio/Marklet/issues" }, "dependencies": { - "@marklet/cli": "^1.1.4", - "@marklet/parser": "^1.0.4", + "@marklet/cli": "^1.1.5", + "@marklet/parser": "^1.1.0", "@marklet/renderer": "^1.1.2" } } \ No newline at end of file diff --git a/packages/parser/package.json b/packages/parser/package.json index ad9afcc..a6e8865 100644 --- a/packages/parser/package.json +++ b/packages/parser/package.json @@ -1,6 +1,6 @@ { "name": "@marklet/parser", - "version": "1.0.9", + "version": "1.1.0", "description": "A document lexer for marklet.", "author": "shigma <1700011071@pku.edu.cn>", "contributors": [ @@ -21,6 +21,8 @@ "url": "https://github.com/obstudio/Marklet/issues" }, "dependencies": { - "@marklet/core": "^1.0.4" + "@marklet/core": "^2.0.0", + "@marklet/lexer": "^1.0.10", + "@marklet/inline": "^1.0.0" } } \ No newline at end of file diff --git a/packages/parser/src/index.ts b/packages/parser/src/index.ts index 4cee43f..488a4ef 100644 --- a/packages/parser/src/index.ts +++ b/packages/parser/src/index.ts @@ -1,4 +1,6 @@ -import { Lexer, LexerConfig, TokenLike } from '@marklet/core' +import { LexerConfig, TokenLike } from '@marklet/core' +import { InlineLexer } from '@marklet/inline' +import { Lexer } from '@marklet/lexer' function escape(html: string): string { return html @@ -22,183 +24,59 @@ interface MarkletLexerConfig extends LexerConfig { default_language?: string } -class MarkletLexer extends Lexer { +class MarkletInlineLexer extends InlineLexer { constructor(config: MarkletLexerConfig = {}) { - super({ - main: [{ - type: 'newline', - regex: /\n+/, - token: null - }, { - type: 'heading', - regex: /(#{1,4}) +([^\n]+?)( +#)?/, - eol: true, - token(cap) { - let text, center - if (this.config.header_align && cap[3]) { - text = this.parse(cap[2], 'text').join('') - center = true - } else { - text = this.parse(cap[2] + (cap[3] || ''), 'text').join('') - center = false - } - return { level: cap[1].length, text, center } - } - }, { - type: 'section', - test: 'allow_section', - regex: /(\^{1,4}) +([^\n]+?)/, - eol: true, - push: 'main', - token(cap) { - const text = this.parse(cap[2], 'text').join('') - return { level: cap[1].length, text } - } - }, { - type: 'quote', - regex: />([\w-]*) +/, - push: 'block', - token: (cap, content) => ({ style: cap[1], content }) - }, { - type: 'separator', - regex: / *([-=])(\1|\.\1| \1)\2+/, - eol: true, - token: (cap) => ({ - thick: cap[1] === '=', - style: cap[2].length === 1 ? 'normal' - : cap[2][0] === ' ' ? 'dashed' : 'dotted' - }) - }, { - type: 'codeblock', - regex: / *(`{3,}) *([\w-]+)? *\n([\s\S]*?)\n? *\1/, - eol: true, - token(cap) { - return { - lang: cap[2] || this.config.default_language, - text: cap[3] || '', - } - } - }, { - type: 'usages', - regex: /(?= *\? +\S)/, - push: [{ - type: 'usage', - regex: / *\? +([^\n]+?)/, - eol: true, - push: [{ - regex: /(?= *\? )/, - pop: true - }, { - include: 'text' - }], - token(cap, cont) { - return { - text: this.parse(cap[1], 'text').join(''), - content: cont - } - } - }, { - pop: true - }] - }, { - type: 'list', - regex: / *(?={{bullet}} +[^\n]+)/, - push: [{ - type: 'item', - regex: /( *)({{bullet}}) +(?=[^\n]+)/, - push: [{ - regex: /\n? *(?={{bullet}} +[^\n]+)/, - pop: true - }, { - include: 'text' - }], - token(cap, cont) { - return { - text: cont.join(''), - ordered: cap[2].length > 1, - indent: cap[1].length, - } - } - }, { - pop: true - }], - token: (_, cont) => collect(cont) - }, { - type: 'inlinelist', - regex: /(?=\+)/, - push: [{ - type: 'item', - regex: /\+/, - push: [{ - regex: /\+?$|\+\n(?=\+)|\+?(?=\n)|(?=\+)/, - pop: true - }, { - include: 'text' - }], - token(_, cont) { - return cont.join('') - } - }, { - regex: /\n|$/, - pop: true - }], - token: (_, cont) => ({ content: cont }) - }, { - type: 'table', - regex: /$^/, // FIXME: placeholder for syntax discussion - push: [], - token: (_, cont) => ({ content: cont }) - }, { - type: 'paragraph', - push: 'text', - token: (_, cont) => ({ text: cont.join('') }) - }], - block: [{ - regex: /\n[ \t]*\n/, + super([ + { + regex: /(?=\n[ \t]*(\n|$))/, pop: true - }, { - include: 'main' - }], - text: [{ + }, + { type: 'escape', regex: /\\([\s\S])/, token: (cap) => cap[1] - }, { - regex: /(?=\n[ \t]*(\n|$))/, - pop: true - }, { + }, + { type: 'newline', regex: /\n/, token: '
' - }, { + }, + { type: 'code', regex: /(`+)\s*([\s\S]*?[^`]?)\s*\1(?!`)/, token: (cap) => `${escape(cap[2])}` - }, { + }, + { type: 'strikeout', regex: /-(?=\S)([\s\S]*?\S)-(?!-)/, - token: (cap) => `${cap.next}` - }, { + token: (cap) => `${cap.inner}` + }, + { type: 'underline', regex: /_(?=\S)([\s\S]*?\S)_(?!_)/, - token: (cap) => `${cap.next}` - }, { + token: (cap) => `${cap.inner}` + }, + { type: 'bold', regex: /\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, - token: (cap) => `${cap.next}` - }, { + token: (cap) => `${cap.inner}` + }, + { type: 'italic', regex: /\*(?=\S)([\s\S]*?\S)\*(?!\*)/, - token: (cap) => `${cap.next}` - }, { + token: (cap) => `${cap.inner}` + }, + { type: 'comment', regex: /\(\((?=\S)([\s\S]*?\S)\)\)(?!\))/, - token: (cap) => `${cap.next}` - }, { + token: (cap) => `${cap.inner}` + }, + { type: 'package', regex: /{{(?=\S)([\s\S]*?\S)}}(?!})/, - token: (cap) => `${cap.next}` - }, { + token: (cap) => `${cap.inner}` + }, + { type: 'link', regex: /\[(?:([^\]|]+)\|)?([^\]]+)\]/, token(cap) { @@ -218,17 +96,181 @@ class MarkletLexer extends Lexer { `${text}` : // TODO: special treatment like necessary? `${text}` } - }] + } + ], config) + } +} + +class MarkletLexer extends Lexer { + constructor(config: MarkletLexerConfig = {}) { + super({ + text: new MarkletInlineLexer(config), + main: [ + { + type: 'newline', + regex: /\n+/, + token: null + }, + { + type: 'heading', + regex: /(#{1,4}) +([^\n]+?)( +#)?/, + eol: true, + token(cap) { + let text, center + if (this.config.header_align && cap[3]) { + text = this.inline(cap[2]) + center = true + } else { + text = this.inline(cap[2] + (cap[3] || '')) + center = false + } + return { level: cap[1].length, text, center } + } + }, + { + type: 'section', + test: 'allow_section', + regex: /(\^{1,4}) +([^\n]+?)/, + eol: true, + push: 'main', + token(cap) { + return { + level: cap[1].length, + text: this.inline(cap[2]), + } + } + }, + { + type: 'quote', + regex: />([\w-]*) +/, + push: 'block', + token: (cap, content) => ({ style: cap[1], content }) + }, + { + type: 'separator', + regex: / *([-=])(\1|\.\1| \1)\2+/, + eol: true, + token: (cap) => ({ + thick: cap[1] === '=', + style: cap[2].length === 1 ? 'normal' + : cap[2][0] === ' ' ? 'dashed' : 'dotted' + }) + }, + { + type: 'codeblock', + regex: / *(`{3,}) *([\w-]+)? *\n([\s\S]*?)\n? *\1/, + eol: true, + token(cap) { + return { + lang: cap[2] || this.config.default_language, + text: cap[3] || '', + } + } + }, + { + type: 'usages', + regex: /(?= *\? +\S)/, + strict: true, + push: [ + { + type: 'usage', + regex: / *\? +([^\n]+?)/, + eol: true, + push: [ + { + regex: /(?= *\? )/, + pop: true + }, + { + include: 'text' + } + ], + token(cap, cont) { + return { + text: this.inline(cap[1]), + content: cont, + } + } + } + ] + }, + { + type: 'list', + regex: / *(?={{bullet}} +[^\n]+)/, + strict: true, + push: [ + { + type: 'item', + regex: /( *)({{bullet}}) +(?=[^\n]+)/, + push: [{ + regex: /\n? *(?={{bullet}} +[^\n]+)/, + pop: true + }, { + include: 'text' + }], + token(cap, cont) { + return { + text: cont.join(''), + ordered: cap[2].length > 1, + indent: cap[1].length, + } + } + } + ], + token: (_, cont) => collect(cont) + }, + { + type: 'inlinelist', + regex: /(?=\+)/, + push: [ + { + type: 'item', + regex: /\+/, + push: [ + { + regex: /\+?$|\+\n(?=\+)|\+?(?=\n)|(?=\+)/, + pop: true + }, + { + include: 'text' + } + ], + token(_, cont) { + return cont.join('') + } + }, + { + regex: /\n|$/, + pop: true + } + ], + token: (_, cont) => ({ content: cont }) + }, + { + type: 'table', + regex: /$^/, // FIXME: placeholder for syntax discussion + push: [], + token: (_, cont) => ({ content: cont }) + }, + { + type: 'paragraph', + push: 'text', + token: (_, cont) => ({ text: cont.join('') }) + } + ], + block: [ + { + regex: /\n[ \t]*\n/, + pop: true + }, + { + include: 'main' + } + ], }, { macros: { bullet: /-|\d+\./, }, - getters: { - next(capture) { - const result = this.parse(capture.reverse().find(item => !!item) || '') - return result/* .map(token => token.text || token) */.join('') - }, - }, config: { header_align: true, allow_section: true, @@ -256,5 +298,6 @@ export function parse(options: ParseOptions): TokenLike[] { export { MarkletLexer as Lexer, + MarkletInlineLexer as InlineLexer, MarkletLexerConfig as LexerConfig, } diff --git a/packages/parser/tsconfig.json b/packages/parser/tsconfig.json index 4e48185..8a214b6 100644 --- a/packages/parser/tsconfig.json +++ b/packages/parser/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "../../tsconfig.json", + "extends": "../../tsconfig.base.json", "include": [ "src" ], @@ -8,6 +8,8 @@ "rootDir": "src" }, "references": [ - { "path": "../core" } + { "path": "../core" }, + { "path": "../inline" }, + { "path": "../lexer" } ] } \ No newline at end of file diff --git a/packages/renderer/package.json b/packages/renderer/package.json index e8928a7..15538f3 100644 --- a/packages/renderer/package.json +++ b/packages/renderer/package.json @@ -21,7 +21,7 @@ "url": "https://github.com/obstudio/Marklet/issues" }, "dependencies": { - "@marklet/core": "^1.0.9" + "@marklet/core": "^2.0.0-beta.0" }, "peerDependencies": { "vue": "^2.5.17" diff --git a/packages/syntax/package.json b/packages/syntax/package.json index d676b55..9000d29 100644 --- a/packages/syntax/package.json +++ b/packages/syntax/package.json @@ -1,6 +1,6 @@ { "name": "@marklet/syntax", - "version": "1.0.9", + "version": "1.0.10", "description": "A common language lexer for marklet.", "author": "shigma <1700011071@pku.edu.cn>", "homepage": "https://github.com/obstudio/Marklet", @@ -18,7 +18,7 @@ "url": "https://github.com/obstudio/Marklet/issues" }, "dependencies": { - "@marklet/core": "^1.0.4", + "@marklet/lexer": "^1.0.10", "js-yaml": "^3.12.0" } } \ No newline at end of file diff --git a/packages/syntax/src/index.ts b/packages/syntax/src/index.ts index 5993151..7ee1b73 100644 --- a/packages/syntax/src/index.ts +++ b/packages/syntax/src/index.ts @@ -1,4 +1,4 @@ -import { Lexer, LexerRules } from '@marklet/core' +import { Lexer, LexerContexts } from '@marklet/lexer' type SyntaxRule = SyntaxMetaRule | SyntaxIncludeRule | SyntaxRegexRule interface SyntaxToken { scope: string, text: string } @@ -48,6 +48,6 @@ export class SyntaxLexer extends Lexer { }) } for (const key in contexts) traverse(contexts[key]) - super(contexts, { macros }) + super(contexts as LexerContexts, { macros }) } } diff --git a/packages/syntax/tsconfig.json b/packages/syntax/tsconfig.json index 4e48185..004deda 100644 --- a/packages/syntax/tsconfig.json +++ b/packages/syntax/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "../../tsconfig.json", + "extends": "../../tsconfig.base.json", "include": [ "src" ], @@ -8,6 +8,6 @@ "rootDir": "src" }, "references": [ - { "path": "../core" } + { "path": "../lexer" } ] } \ No newline at end of file diff --git a/packages/test/package.json b/packages/test/package.json index ec964a9..8a6ecc3 100644 --- a/packages/test/package.json +++ b/packages/test/package.json @@ -1,6 +1,6 @@ { "name": "@marklet/test", - "version": "1.0.9", + "version": "1.0.10", "private": true, "author": "jjyyxx <1449843302@qq.com>", "homepage": "https://github.com/obstudio/Marklet", @@ -17,6 +17,6 @@ }, "dependencies": { "@marklet/detok": "^1.0.4", - "markletjs": "^1.1.4" + "markletjs": "^1.1.13" } } \ No newline at end of file diff --git a/tsconfig.base.json b/tsconfig.base.json new file mode 100644 index 0000000..77c864d --- /dev/null +++ b/tsconfig.base.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "module": "commonjs", + "declaration": true, + "target": "es2018", + "noImplicitAny": true, + "composite": true, + "esModuleInterop": true + } +} \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json index 6f38192..6dc9dbf 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,16 +1,10 @@ { - "compilerOptions": { - "module": "commonjs", - "declaration": true, - "target": "es2018", - "noImplicitAny": true, - "composite": true, - "esModuleInterop": true - }, "references": [ { "path": "./packages/core" }, { "path": "./packages/dev-server" }, { "path": "./packages/detok" }, + { "path": "./packages/inline" }, + { "path": "./packages/lexer" }, { "path": "./packages/parser" }, { "path": "./packages/syntax" } ],