From a2ee372ed0dbb686ce5d252f945d4831b68b4bd5 Mon Sep 17 00:00:00 2001
From: Sunshine <sunshine_40@qq.com>
Date: Wed, 5 Jun 2024 07:31:36 +0800
Subject: [PATCH] Implement "fallback" searching strategy and remove dedicated
 Chinese search support.

---
 Cargo.lock                                   |  40 -
 Cargo.toml                                   |   5 +-
 guide/src/format/configuration/general.md    |   3 +-
 guide/src/format/configuration/renderers.md  |   3 +-
 src/renderer/html_handlebars/hbs_renderer.rs |   8 +-
 src/renderer/html_handlebars/search.rs       | 173 ++--
 src/renderer/html_handlebars/search/lang.rs  |  48 +
 src/theme/css/chrome.css                     |   4 +
 src/theme/searcher/languages/lunr.zh.js      | 145 ---
 src/theme/searcher/mod.rs                    |   2 +-
 src/theme/searcher/searcher.fallback.js      | 934 +++++++++++++++++++
 src/utils/mod.rs                             |  13 +-
 12 files changed, 1126 insertions(+), 252 deletions(-)
 create mode 100644 src/renderer/html_handlebars/search/lang.rs
 delete mode 100644 src/theme/searcher/languages/lunr.zh.js
 create mode 100644 src/theme/searcher/searcher.fallback.js

diff --git a/Cargo.lock b/Cargo.lock
index 1986f598c0..7859cf3dec 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -248,15 +248,6 @@ version = "1.0.97"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4"
 
-[[package]]
-name = "cedarwood"
-version = "0.4.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90"
-dependencies = [
- "smallvec",
-]
-
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
@@ -506,7 +497,6 @@ version = "3.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "41e83863a500656dfa214fee6682de9c5b9f03de6860fec531235ed2ae9f6571"
 dependencies = [
- "jieba-rs",
  "lindera",
  "lindera-core",
  "regex",
@@ -757,15 +747,6 @@ dependencies = [
  "slab",
 ]
 
-[[package]]
-name = "fxhash"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
-dependencies = [
- "byteorder",
-]
-
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -1106,21 +1087,6 @@ version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
 
-[[package]]
-name = "jieba-rs"
-version = "0.6.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93f0c1347cd3ac8d7c6e3a2dc33ac496d365cf09fc0831aa61111e1a6738983e"
-dependencies = [
- "cedarwood",
- "fxhash",
- "hashbrown 0.14.5",
- "lazy_static",
- "phf 0.11.2",
- "phf_codegen 0.11.2",
- "regex",
-]
-
 [[package]]
 name = "js-sys"
 version = "0.3.69"
@@ -1150,12 +1116,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "lazy_static"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
-
 [[package]]
 name = "libc"
 version = "0.2.154"
diff --git a/Cargo.toml b/Cargo.toml
index 7c866ab23f..1aced5975b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -65,7 +65,10 @@ default = ["watch", "serve", "search"]
 watch = ["dep:notify", "dep:notify-debouncer-mini", "dep:ignore", "dep:pathdiff", "dep:walkdir"]
 serve = ["dep:futures-util", "dep:tokio", "dep:warp"]
 search = ["dep:elasticlunr-rs", "dep:ammonia"]
-search-non-english = ["search", "elasticlunr-rs/languages"]
+search-non-english = ["search", "elasticlunr-rs/ar", "elasticlunr-rs/da", "elasticlunr-rs/de", "elasticlunr-rs/du",
+    "elasticlunr-rs/es", "elasticlunr-rs/fi", "elasticlunr-rs/fr", "elasticlunr-rs/hu", "elasticlunr-rs/it",
+    "elasticlunr-rs/ja", "elasticlunr-rs/ko", "elasticlunr-rs/no", "elasticlunr-rs/pt", "elasticlunr-rs/ro",
+    "elasticlunr-rs/ru", "elasticlunr-rs/sv", "elasticlunr-rs/tr"]
 
 [[bin]]
 doc = false
diff --git a/guide/src/format/configuration/general.md b/guide/src/format/configuration/general.md
index 3a10898c1f..3930219cf3 100644
--- a/guide/src/format/configuration/general.md
+++ b/guide/src/format/configuration/general.md
@@ -47,7 +47,8 @@ This is general information about your book.
   key in the configuration file.
 - **language:** The main language of the book, which is used as a language attribute `<html lang="en">` for example.
   This is also used to derive the direction of text (RTL, LTR) within the book.
-  When `search-non-english` feature is enabled, this may change the behavior of the search functionality provided by the HTML renderer.
+  When it is specified to a non-English language, an alternative indexing / searching strategy would be applied to the search functionality provided by the HTML renderer.
+  When `search-non-english` feature is enabled, additional language-specific search support may kick in.
 - **text-direction**: The direction of text in the book: Left-to-right (LTR) or Right-to-left (RTL). Possible values: `ltr`, `rtl`.
   When not specified, the text direction is derived from the book's `language` attribute.
 
diff --git a/guide/src/format/configuration/renderers.md b/guide/src/format/configuration/renderers.md
index 04d9912b95..2d6f000d56 100644
--- a/guide/src/format/configuration/renderers.md
+++ b/guide/src/format/configuration/renderers.md
@@ -263,7 +263,8 @@ copy-js = true           # include Javascript code for search
 - **enable:** Enables the search feature. Defaults to `true`.
 - **limit-results:** The maximum number of search results. Defaults to `30`.
 - **teaser-word-count:** The number of words used for a search result teaser.
-  Defaults to `30`.
+  When `book.language` is set to a non-English language, this limit might
+  be exceeded in case too many keywords are matched. Defaults to `30`.
 - **use-boolean-and:** Define the logical link between multiple search words. If
   true, all search words must appear in each result. Defaults to `false`.
 - **boost-title:** Boost factor for the search result score if a search word
diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs
index 606e87c65b..6d8573722c 100644
--- a/src/renderer/html_handlebars/hbs_renderer.rs
+++ b/src/renderer/html_handlebars/hbs_renderer.rs
@@ -544,10 +544,10 @@ impl Renderer for HtmlHandlebars {
         {
             let search = html_config.search.clone().unwrap_or_default();
             if search.enable {
-                let language = book_config
-                    .language
-                    .as_deref()
-                    .and_then(|lang| lang.parse().ok());
+                let language = match book_config.language.as_deref() {
+                    None => Err("en".to_string()),
+                    Some(language) => language.parse(),
+                };
                 #[allow(unused_variables)]
                 let extra_language_subtag =
                     super::search::create_files(&search, language, destination, book)?;
diff --git a/src/renderer/html_handlebars/search.rs b/src/renderer/html_handlebars/search.rs
index 66a733a0db..8fcb501476 100644
--- a/src/renderer/html_handlebars/search.rs
+++ b/src/renderer/html_handlebars/search.rs
@@ -1,3 +1,5 @@
+mod lang;
+
 use std::borrow::Cow;
 use std::collections::{HashMap, HashSet};
 use std::fmt::Display;
@@ -32,9 +34,8 @@ fn tokenize(text: &str) -> Vec<String> {
 /// Languages that wouldn't work with the current feature flag config are included.
 #[derive(Debug, Copy, Clone)]
 #[non_exhaustive]
-pub enum SupportedNonEnglishLanguage {
+pub(crate) enum ExtraSupportedLanguage {
     Arabic,
-    Chinese,
     Danish,
     Dutch,
     Finnish,
@@ -53,22 +54,21 @@ pub enum SupportedNonEnglishLanguage {
     Turkish,
 }
 
-impl FromStr for SupportedNonEnglishLanguage {
-    type Err = ();
+impl FromStr for ExtraSupportedLanguage {
+    type Err = String;
 
     /// A language tag can be like "zh" / "zh-CN" / "zh-Hans" / "zh-Hans-CN")
     /// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/lang#language_tag_syntax
+    /// if the language doesn't have extra support, `Err` is returned with the language subtag.
     fn from_str(language_tag: &str) -> Result<Self, Self::Err> {
-        use SupportedNonEnglishLanguage::*;
-        match language_tag
+        use ExtraSupportedLanguage::*;
+        let language_subtag = language_tag
             .split('-')
             .next()
             .expect("splitting a string always returns at least 1 fragment")
-            .to_ascii_lowercase()
-            .as_str()
-        {
+            .to_ascii_lowercase();
+        match language_subtag.as_str() {
             "ar" => Ok(Arabic),
-            "zh" => Ok(Chinese),
             "da" => Ok(Danish),
             "nl" => Ok(Dutch),
             "fi" => Ok(Finnish),
@@ -85,22 +85,21 @@ impl FromStr for SupportedNonEnglishLanguage {
             "es" => Ok(Spanish),
             "sv" => Ok(Swedish),
             "tr" => Ok(Turkish),
-            _ => Err(()),
+            _ => Err(language_subtag),
         }
     }
 }
 
-impl TryFrom<SupportedNonEnglishLanguage> for Box<dyn elasticlunr::Language> {
+impl TryFrom<ExtraSupportedLanguage> for Box<dyn elasticlunr::Language> {
     type Error = ();
 
     #[cfg(feature = "search-non-english")]
     /// Returns `Ok` if and only if `language.lunr_js_content()` returns `Some`.
-    fn try_from(language: SupportedNonEnglishLanguage) -> std::result::Result<Self, Self::Error> {
+    fn try_from(language: ExtraSupportedLanguage) -> std::result::Result<Self, Self::Error> {
         use elasticlunr::lang as el;
-        use SupportedNonEnglishLanguage::*;
+        use ExtraSupportedLanguage::*;
         match language {
             Arabic => Ok(Box::new(el::Arabic::new())),
-            Chinese => Ok(Box::new(el::Chinese::new())),
             Danish => Ok(Box::new(el::Danish::new())),
             Dutch => Ok(Box::new(el::Dutch::new())),
             Finnish => Ok(Box::new(el::Finnish::new())),
@@ -121,18 +120,17 @@ impl TryFrom<SupportedNonEnglishLanguage> for Box<dyn elasticlunr::Language> {
     }
 
     #[cfg(not(feature = "search-non-english"))]
-    fn try_from(_: SupportedNonEnglishLanguage) -> std::result::Result<Self, Self::Error> {
+    fn try_from(_: ExtraSupportedLanguage) -> std::result::Result<Self, Self::Error> {
         Err(())
     }
 }
 
-impl Display for SupportedNonEnglishLanguage {
-    /// Displays as language subtag (e.g. "zh" for Chinese).
+impl Display for ExtraSupportedLanguage {
+    /// Displays as language subtag (e.g. "de" for German).
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        use SupportedNonEnglishLanguage::*;
+        use ExtraSupportedLanguage::*;
         f.write_str(match self {
             Arabic => "ar",
-            Chinese => "zh",
             Danish => "da",
             Dutch => "nl",
             Finnish => "fi",
@@ -154,14 +152,13 @@ impl Display for SupportedNonEnglishLanguage {
 }
 
 #[cfg(feature = "search-non-english")]
-impl SupportedNonEnglishLanguage {
+impl ExtraSupportedLanguage {
     /// Returns `Some` if and only if `self.try_into::<Box<dyn elasticlunr::Language>>()` returns `Ok`.
-    pub(crate) fn lunr_js_content(self) -> Option<&'static [u8]> {
+    pub fn lunr_js_content(self) -> Option<&'static [u8]> {
         use searcher::lang::*;
-        use SupportedNonEnglishLanguage::*;
+        use ExtraSupportedLanguage::*;
         match self {
             Arabic => Some(ARABIC_JS),
-            Chinese => Some(CHINESE_JS),
             Danish => Some(DANISH_JS),
             Dutch => Some(DUTCH_JS),
             Finnish => Some(FINNISH_JS),
@@ -185,40 +182,53 @@ impl SupportedNonEnglishLanguage {
 /// Creates all files required for search.
 /// Returns the language subtag if extra `lunr.stemmer.support.js` &
 /// `lunr.*.js` files should be imported.
-/// E.g., returns "zh" when `lunr.stemmer.support.js` & `lunr.zh.js` should be imported.
+/// E.g., returns "ja" when `lunr.stemmer.support.js` & `lunr.ja.js` should be imported.
 pub fn create_files(
     search_config: &Search,
-    language: Option<SupportedNonEnglishLanguage>,
+    language: Result<ExtraSupportedLanguage, String>,
     destination: &Path,
     book: &Book,
 ) -> Result<Option<String>> {
-    #[allow(unused_variables)]
-    let (mut index, extra_language_subtag) = match language.and_then(|l| l.try_into().ok()) {
-        None => {
-            if let Some(non_english_language) = language {
+    let potentially_supported_language = language.as_ref().ok().copied();
+    let (mut index, extra_language_subtag, use_fallback);
+
+    match language.and_then(|l| l.try_into().map_err(|_| l.to_string())) {
+        Err(subtag) => {
+            if let Some(language) = potentially_supported_language {
                 warn!(
-                    "mdBook compiled without {non_english_language:?}(`{non_english_language}`) \
-                    search support though it's available"
+                    "mdBook compiled without {language:?}(`{language}`) \
+                        search support though it's available"
                 );
                 warn!(
                     "please reinstall with `cargo install mdbook --force --features \
-                     search-non-english`"
+                        search-non-english`"
                 );
-                warn!("to enable {non_english_language:?} search support")
+                warn!("to enable {language:?} search support")
             }
-            (
-                IndexBuilder::new()
-                    .add_field_with_tokenizer("title", Box::new(&tokenize))
-                    .add_field_with_tokenizer("body", Box::new(&tokenize))
-                    .add_field_with_tokenizer("breadcrumbs", Box::new(&tokenize))
-                    .build(),
-                None,
-            )
+            match subtag.as_str() {
+                "en" => {
+                    index = IndexBuilder::new()
+                        .add_field_with_tokenizer("title", Box::new(&tokenize))
+                        .add_field_with_tokenizer("body", Box::new(&tokenize))
+                        .add_field_with_tokenizer("breadcrumbs", Box::new(&tokenize))
+                        .build();
+                    use_fallback = false;
+                }
+                _ => {
+                    index = Index::with_language(
+                        Box::new(lang::Fallback::new()),
+                        &["title", "body", "breadcrumbs"],
+                    );
+                    use_fallback = true;
+                }
+            };
+            extra_language_subtag = None;
+        }
+        Ok(elasticlunr_language) => {
+            index = Index::with_language(elasticlunr_language, &["title", "body", "breadcrumbs"]);
+            extra_language_subtag = potentially_supported_language.map(|l| l.to_string());
+            use_fallback = false;
         }
-        Some(elasticlunr_language) => (
-            Index::with_language(elasticlunr_language, &["title", "body", "breadcrumbs"]),
-            language.map(|l| l.to_string()),
-        ),
     };
 
     let mut doc_urls = Vec::with_capacity(book.sections.len());
@@ -240,7 +250,15 @@ pub fn create_files(
             "searchindex.js",
             format!("Object.assign(window.search, {});", index).as_bytes(),
         )?;
-        utils::fs::write_file(destination, "searcher.js", searcher::JS)?;
+        utils::fs::write_file(
+            destination,
+            "searcher.js",
+            if use_fallback {
+                searcher::FALLBACK_JS
+            } else {
+                searcher::JS
+            },
+        )?;
         utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?;
         utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?;
         #[cfg(feature = "search-non-english")]
@@ -363,8 +381,9 @@ fn render_item(
                 breadcrumbs.push(heading.clone());
             }
             Event::Start(Tag::FootnoteDefinition(name)) => {
-                let number = footnote_numbers.len() + 1;
-                footnote_numbers.entry(name).or_insert(number);
+                let len = footnote_numbers.len() + 1;
+                let number = footnote_numbers.entry(name).or_insert(len);
+                body.push_str(&format!("[^{}]: ", number))
             }
             Event::Html(html) => {
                 let mut html_block = html.into_string();
@@ -389,15 +408,57 @@ fn render_item(
                 // blocks, and worse case you have some noise in the index.
                 body.push_str(&clean_html(&html));
             }
-            Event::Start(_) | Event::End(_) | Event::Rule | Event::SoftBreak | Event::HardBreak => {
-                // Insert spaces where HTML output would usually separate text
-                // to ensure words don't get merged together
-                if in_heading {
-                    heading.push(' ');
-                } else {
-                    body.push(' ');
+            // Insert spaces where HTML output would usually separate text
+            // to ensure words don't get merged together
+            Event::Start(tag) => {
+                let target = if in_heading { &mut heading } else { &mut body };
+                match tag {
+                    Tag::Paragraph
+                    | Tag::Heading { .. }
+                    | Tag::BlockQuote
+                    | Tag::CodeBlock(_)
+                    | Tag::HtmlBlock
+                    | Tag::List(_)
+                    | Tag::Table(_)
+                    | Tag::TableHead
+                    | Tag::TableRow
+                    | Tag::TableCell
+                    | Tag::Emphasis
+                    | Tag::Strong
+                    | Tag::Link { .. }
+                    | Tag::MetadataBlock(_) => {}
+                    Tag::Item => target.push_str("* "),
+                    Tag::Strikethrough => target.push_str("~~"),
+                    Tag::Image { .. } => target.push_str("[image: "),
+                    Tag::FootnoteDefinition(_) => unreachable!(),
+                }
+            }
+            Event::End(tag_end) => {
+                let target = if in_heading { &mut heading } else { &mut body };
+                match tag_end {
+                    TagEnd::Paragraph
+                    | TagEnd::Heading(_)
+                    | TagEnd::BlockQuote
+                    | TagEnd::CodeBlock
+                    | TagEnd::Item
+                    | TagEnd::TableHead
+                    | TagEnd::TableRow => target.push_str("\n"),
+                    TagEnd::HtmlBlock
+                    | TagEnd::List(_)
+                    | TagEnd::FootnoteDefinition
+                    | TagEnd::Table
+                    | TagEnd::Emphasis
+                    | TagEnd::Strong
+                    | TagEnd::Link
+                    | TagEnd::MetadataBlock(_) => {}
+                    TagEnd::TableCell => target.push('\t'),
+                    TagEnd::Strikethrough => target.push_str("~~"),
+                    TagEnd::Image => target.push(']'),
                 }
             }
+            Event::Rule => {}
+            Event::SoftBreak => body.push(' '),
+            Event::HardBreak => body.push('\n'),
             Event::Text(text) | Event::Code(text) => {
                 if in_heading {
                     heading.push_str(&text);
diff --git a/src/renderer/html_handlebars/search/lang.rs b/src/renderer/html_handlebars/search/lang.rs
new file mode 100644
index 0000000000..33f877729a
--- /dev/null
+++ b/src/renderer/html_handlebars/search/lang.rs
@@ -0,0 +1,48 @@
+use crate::renderer::html_handlebars::search::MAX_WORD_LENGTH_TO_INDEX;
+use elasticlunr::lang::English;
+use elasticlunr::Pipeline;
+use once_cell::sync::OnceCell;
+use regex::Regex;
+
+pub struct Fallback {
+    tokenize_regex: &'static Regex,
+    english: &'static English,
+}
+
+impl Fallback {
+    pub fn new() -> Self {
+        static TOKENIZE_REGEX: OnceCell<Regex> = OnceCell::new();
+        static ENGLISH: OnceCell<English> = OnceCell::new();
+        Self {
+            tokenize_regex: TOKENIZE_REGEX.get_or_init(|| Regex::new(
+                r"[\p{Unified_Ideograph}\p{Hangul}]|[^\p{White_Space}\p{P}\p{Sm}\p{CurrencySymbol}\p{So}\p{Unified_Ideograph}\p{Hangul}\p{Z}\p{C}]+|\p{So}\p{Sk}?(\u200D\p{So}\p{Sk}?)*"
+            ).unwrap()),
+            english: ENGLISH.get_or_init(English::new),
+        }
+    }
+}
+
+impl elasticlunr::Language for Fallback {
+    fn name(&self) -> String {
+        "English, Chinese, Japanese, Korean, Vietnamese".into()
+    }
+
+    fn code(&self) -> String {
+        "en".into()
+    }
+
+    fn tokenize(&self, text: &str) -> Vec<String> {
+        self.tokenize_regex
+            .find_iter(text)
+            .map(|s| s.as_str())
+            .filter(|s| s.len() <= MAX_WORD_LENGTH_TO_INDEX)
+            .map(|s| s.to_lowercase())
+            .collect()
+    }
+
+    fn make_pipeline(&self) -> Pipeline {
+        let mut pipeline = self.english.make_pipeline();
+        pipeline.queue.drain(0..2);
+        pipeline
+    }
+}
diff --git a/src/theme/css/chrome.css b/src/theme/css/chrome.css
index 83b7969bce..0c23d1a9b4 100644
--- a/src/theme/css/chrome.css
+++ b/src/theme/css/chrome.css
@@ -370,6 +370,10 @@ ul#searchresults li {
 ul#searchresults li.focus {
     background-color: var(--searchresults-li-bg);
 }
+ul#searchresults li a em {
+    font-weight: bold;
+    font-style: normal;
+}
 ul#searchresults span.teaser {
     display: block;
     clear: both;
diff --git a/src/theme/searcher/languages/lunr.zh.js b/src/theme/searcher/languages/lunr.zh.js
deleted file mode 100644
index 48f5890d96..0000000000
--- a/src/theme/searcher/languages/lunr.zh.js
+++ /dev/null
@@ -1,145 +0,0 @@
-/*!
- * Lunr languages, `Chinese` language
- * https://github.com/MihaiValentin/lunr-languages
- *
- * Copyright 2019, Felix Lian (repairearth)
- * http://www.mozilla.org/MPL/
- */
-/*!
- * based on
- * Snowball zhvaScript Library v0.3
- * http://code.google.com/p/urim/
- * http://snowball.tartarus.org/
- *
- * Copyright 2010, Oleg Mazko
- * http://www.mozilla.org/MPL/
- */
-
-/**
- * export the module via AMD, CommonJS or as a browser global
- * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
- */
-;
-(function(root, factory) {
-  if (typeof define === 'function' && define.amd) {
-    // AMD. Register as an anonymous module.
-    define(factory)
-  } else if (typeof exports === 'object') {
-    /**
-     * Node. Does not work with strict CommonJS, but
-     * only CommonJS-like environments that support module.exports,
-     * like Node.
-     */
-    module.exports = factory(require('@node-rs/jieba'))
-  } else {
-    // Browser globals (root is window)
-    factory()(root.lunr);
-  }
-}(this, function(nodejieba) {
-  /**
-   * Just return a value to define the module export.
-   * This example returns an object, but the module
-   * can return a function as the exported value.
-   */
-  return function(lunr, nodejiebaDictJson) {
-    /* throw error if lunr is not yet included */
-    if ('undefined' === typeof lunr) {
-      throw new Error('Lunr is not present. Please include / require Lunr before this script.');
-    }
-
-    /* throw error if lunr stemmer support is not yet included */
-    if ('undefined' === typeof lunr.stemmerSupport) {
-      throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.');
-    }
-
-    /*
-    Chinese tokenization is trickier, since it does not
-    take into account spaces.
-    Since the tokenization function is represented different
-    internally for each of the Lunr versions, this had to be done
-    in order to try to try to pick the best way of doing this based
-    on the Lunr version
-     */
-    var isLunr2 = lunr.version[0] == "2";
-
-    /* register specific locale function */
-    lunr.zh = function() {
-      this.pipeline.reset();
-      this.pipeline.add(
-        lunr.zh.trimmer,
-        lunr.zh.stopWordFilter,
-        lunr.zh.stemmer
-      );
-
-      // change the tokenizer for Chinese one
-      if (isLunr2) { // for lunr version 2.0.0
-        this.tokenizer = lunr.zh.tokenizer;
-      } else {
-        if (lunr.tokenizer) { // for lunr version 0.6.0
-          lunr.tokenizer = lunr.zh.tokenizer;
-        }
-        if (this.tokenizerFn) { // for lunr version 0.7.0 -> 1.0.0
-          this.tokenizerFn = lunr.zh.tokenizer;
-        }
-      }
-    };
-
-    lunr.zh.tokenizer = function(obj) {
-      if (!arguments.length || obj == null || obj == undefined) return []
-      if (Array.isArray(obj)) return obj.map(function(t) {
-        return isLunr2 ? new lunr.Token(t.toLowerCase()) : t.toLowerCase()
-      })
-
-      nodejiebaDictJson && nodejieba.load(nodejiebaDictJson)
-
-      var str = obj.toString().trim().toLowerCase();
-      var tokens = [];
-
-      nodejieba.cut(str, true).forEach(function(seg) {
-        tokens = tokens.concat(seg.split(' '))
-      })
-
-      tokens = tokens.filter(function(token) {
-        return !!token;
-      });
-
-      var fromIndex = 0
-
-      return tokens.map(function(token, index) {
-        if (isLunr2) {
-          var start = str.indexOf(token, fromIndex)
-
-          var tokenMetadata = {}
-          tokenMetadata["position"] = [start, token.length]
-          tokenMetadata["index"] = index
-
-          fromIndex = start
-
-          return new lunr.Token(token, tokenMetadata);
-        } else {
-          return token
-        }
-      });
-    }
-
-    /* lunr trimmer function */
-    lunr.zh.wordCharacters = "\\w\u4e00-\u9fa5";
-    lunr.zh.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.zh.wordCharacters);
-    lunr.Pipeline.registerFunction(lunr.zh.trimmer, 'trimmer-zh');
-
-    /* lunr stemmer function */
-    lunr.zh.stemmer = (function() {
-
-      /* TODO Chinese stemmer  */
-      return function(word) {
-        return word;
-      }
-    })();
-    lunr.Pipeline.registerFunction(lunr.zh.stemmer, 'stemmer-zh');
-
-    /* lunr stop word filter. see https://www.ranks.nl/stopwords/chinese-stopwords */
-    lunr.zh.stopWordFilter = lunr.generateStopWordFilter(
-      '的 一 不 在 人 有 是 为 為 以 于 於 上 他 而 后 後 之 来 來 及 了 因 下 可 到 由 这 這 与 與 也 此 但 并 並 个 個 其 已 无 無 小 我 们 們 起 最 再 今 去 好 只 又 或 很 亦 某 把 那 你 乃 它 吧 被 比 别 趁 当 當 从 從 得 打 凡 儿 兒 尔 爾 该 該 各 给 給 跟 和 何 还 還 即 几 幾 既 看 据 據 距 靠 啦 另 么 麽 每 嘛 拿 哪 您 凭 憑 且 却 卻 让 讓 仍 啥 如 若 使 谁 誰 虽 雖 随 隨 同 所 她 哇 嗡 往 些 向 沿 哟 喲 用 咱 则 則 怎 曾 至 致 着 著 诸 諸 自'.split(' '));
-    lunr.Pipeline.registerFunction(lunr.zh.stopWordFilter, 'stopWordFilter-zh');
-  };
-}))
\ No newline at end of file
diff --git a/src/theme/searcher/mod.rs b/src/theme/searcher/mod.rs
index 62b8214f5a..860528b59d 100644
--- a/src/theme/searcher/mod.rs
+++ b/src/theme/searcher/mod.rs
@@ -2,6 +2,7 @@
 //! the "search" cargo feature is disabled.
 
 pub static JS: &[u8] = include_bytes!("searcher.js");
+pub static FALLBACK_JS: &[u8] = include_bytes!("searcher.fallback.js");
 pub static MARK_JS: &[u8] = include_bytes!("mark.min.js");
 pub static ELASTICLUNR_JS: &[u8] = include_bytes!("elasticlunr.min.js");
 
@@ -9,7 +10,6 @@ pub static ELASTICLUNR_JS: &[u8] = include_bytes!("elasticlunr.min.js");
 pub mod lang {
     pub static STEMMER_SUPPORT_JS: &[u8] = include_bytes!("lunr.stemmer.support.js");
     pub static ARABIC_JS: &[u8] = include_bytes!("languages/lunr.ar.js");
-    pub static CHINESE_JS: &[u8] = include_bytes!("languages/lunr.zh.js");
     pub static DANISH_JS: &[u8] = include_bytes!("languages/lunr.da.js");
     pub static DUTCH_JS: &[u8] = include_bytes!("languages/lunr.nl.js");
     pub static FINNISH_JS: &[u8] = include_bytes!("languages/lunr.fi.js");
diff --git a/src/theme/searcher/searcher.fallback.js b/src/theme/searcher/searcher.fallback.js
new file mode 100644
index 0000000000..716a07b769
--- /dev/null
+++ b/src/theme/searcher/searcher.fallback.js
@@ -0,0 +1,934 @@
+"use strict";
+window.search = window.search || {};
+(function search(search) {
+    // Search functionality
+    //
+    // You can use !hasFocus() to prevent keyhandling in your key
+    // event handlers while the user is typing their search.
+
+    if (!Mark || !elasticlunr) {
+        return;
+    }
+
+    //IE 11 Compatibility from https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
+    if (!String.prototype.startsWith) {
+        String.prototype.startsWith = function(search, pos) {
+            return this.substr(!pos || pos < 0 ? 0 : +pos, search.length) === search;
+        };
+    }
+
+    var search_wrap = document.getElementById('search-wrapper'),
+        searchbar = document.getElementById('searchbar'),
+        searchbar_outer = document.getElementById('searchbar-outer'),
+        searchresults = document.getElementById('searchresults'),
+        searchresults_outer = document.getElementById('searchresults-outer'),
+        searchresults_header = document.getElementById('searchresults-header'),
+        searchicon = document.getElementById('search-toggle'),
+        content = document.getElementById('content'),
+
+        searchindex = null,
+        doc_urls = [],
+        results_options = {
+            teaser_word_count: 30,
+            limit_results: 30,
+        },
+        search_options = {
+            bool: "AND",
+            expand: true,
+            fields: {
+                title: {boost: 1},
+                body: {boost: 1},
+                breadcrumbs: {boost: 0}
+            }
+        },
+        mark_exclude = [],
+        marker = new Mark(content),
+        current_searchterm = "",
+        URL_SEARCH_PARAM = 'search',
+        URL_MARK_PARAM = 'highlight',
+        teaser_count = 0,
+
+        SEARCH_HOTKEY_KEYCODE = 83,
+        ESCAPE_KEYCODE = 27,
+        DOWN_KEYCODE = 40,
+        UP_KEYCODE = 38,
+        SELECT_KEYCODE = 13;
+
+    const REGEX_WHITE_SPACE = /\p{White_Space}+/gu,
+        REGEX_SEARCH_SPLITTER = /(?:([\p{Unified_Ideograph}\uAC00-\uD7AF]|[^\p{White_Space}\p{P}\p{Sm}\p{Sc}\p{So}\p{Unified_Ideograph}\uAC00-\uD7AF\p{Z}\p{C}]+|\p{So}\p{Sk}?(?:\u200D\p{So}\p{Sk}?)*)|([\p{P}\p{Sm}\p{Sc}\p{Z}\p{C}]+))\p{White_Space}*/gu,
+        REGEX_STEM = /([a-zA-Z0-9]+)|[^a-zA-Z0-9]+/gu,
+        REGEX_ESCAPE = /[.*+?^${}()|[\]\\]/gu,
+        REGEX_DEFAULT_BEGIN = /^[^\p{White_Space}\p{P}\p{Sm}\p{Sc}\p{So}\p{Unified_Ideograph}\uAC00-\uD7AF\p{Z}\p{C}]/u,
+        REGEX_DEFAULT_END = /[^\p{White_Space}\p{P}\p{Sm}\p{Sc}\p{So}\p{Unified_Ideograph}\uAC00-\uD7AF\p{Z}\p{C}]$/u,
+        REGEX_SENTENCE = /.+?(?:[。？！．](?:(?![\r\n])[\p{White_Space}\p{Po}])*[\r\n]*|(?:[.?!](?:(?![\r\n])[\p{White_Space}\p{Po}])*?(?:(?![\r\n])\p{White_Space})+)+(?=[^\p{L}]*(?!\p{Ll})\p{L})|[\r\n]+)|.+?$/gu,
+        REGEX_CLAUSE = /.*?(?:(?:[，；]|……)[\p{White_Space}\p{Po}]*|[,;](?:\p{Po}*?\p{White_Space}+)+)|.+?$/gus,
+        REGEX_SEGMENT = /([\p{Unified_Ideograph}\uAC00-\uD7AF]+)|([^\p{White_Space}\p{P}\p{Sm}\p{Sc}\p{So}\p{Unified_Ideograph}\uAC00-\uD7AF\p{Z}\p{C}]+)|(\p{So}\p{Sk}?(?:\u200D\p{So}\p{Sk}?)*)|([\p{White_Space}\p{P}\p{Sm}\p{Sc}\p{Z}\p{C}]+)/gu;
+
+    function hasFocus() {
+        return searchbar === document.activeElement;
+    }
+
+    function removeChildren(elem) {
+        while (elem.firstChild) {
+            elem.removeChild(elem.firstChild);
+        }
+    }
+
+    // Helper to parse a url into its building blocks.
+    function parseURL(url) {
+        var a =  document.createElement('a');
+        a.href = url;
+        return {
+            source: url,
+            protocol: a.protocol.replace(':',''),
+            host: a.hostname,
+            port: a.port,
+            params: (function(){
+                var ret = {};
+                var seg = a.search.replace(/^\?/,'').split('&');
+                var len = seg.length, i = 0, s;
+                for (;i<len;i++) {
+                    if (!seg[i]) { continue; }
+                    s = seg[i].split('=');
+                    ret[s[0]] = s[1];
+                }
+                return ret;
+            })(),
+            file: (a.pathname.match(/\/([^/?#]+)$/i) || [,''])[1],
+            hash: a.hash.replace('#',''),
+            path: a.pathname.replace(/^([^/])/,'/$1')
+        };
+    }
+
+    // Helper to recreate a url string from its building blocks.
+    function renderURL(urlobject) {
+        var url = urlobject.protocol + "://" + urlobject.host;
+        if (urlobject.port != "") {
+            url += ":" + urlobject.port;
+        }
+        url += urlobject.path;
+        var joiner = "?";
+        for(var prop in urlobject.params) {
+            if(urlobject.params.hasOwnProperty(prop)) {
+                url += joiner + prop + "=" + urlobject.params[prop];
+                joiner = "&";
+            }
+        }
+        if (urlobject.hash != "") {
+            url += "#" + urlobject.hash;
+        }
+        return url;
+    }
+
+    // Helper to escape html special chars for displaying the teasers
+    var escapeHTML = (function() {
+        var MAP = {
+            '&': '&amp;',
+            '<': '&lt;',
+            '>': '&gt;',
+            '"': '&#34;',
+            "'": '&#39;'
+        };
+        var repl = function (c, inMap) { return inMap ? MAP[c] : "<br/>"; };
+        return function (s) {
+            return s.replace(/([&<>'"])|[\r\n]+/g, repl);
+        };
+    })();
+
+    function formatSearchMetric(count, searchterm) {
+        if (count == 1) {
+            return count + " search result for '" + searchterm + "':";
+        } else if (count == 0) {
+            return "No search results for '" + searchterm + "'.";
+        } else {
+            return count + " search results for '" + searchterm + "':";
+        }
+    }
+
+    function formatSearchResult(result, searchTerms) {
+        var teaser = makeTeaser(result.doc, searchTerms);
+        if (!teaser) return;
+
+        teaser_count++;
+
+        // The ?URL_MARK_PARAM= parameter belongs inbetween the page and the #heading-anchor
+        var url = doc_urls[result.ref].split("#");
+        if (url.length == 1) { // no anchor found
+            url.push("");
+        }
+
+        return '<a href="' + path_to_root + url[0] + '?' + URL_MARK_PARAM + '=' + searchTerms.url + '#' + url[1]
+            + '" aria-details="teaser_' + teaser_count + '">' + teaser.breadcrumbs + '</a>'
+            + '<span class="teaser" id="teaser_' + teaser_count + '" aria-label="Search Result Teaser">'
+            + teaser.body + '</span>';
+    }
+
+    // `targets` is an array of {begin: number, end: number} that has been sorted by begin
+    // in ascending order, and shouldn't overlap.
+    // `range` is {begin: number, end: number}
+    function highlightAndEscape(text, targets, range) {
+        const limit = range ? range.end : text.length;
+        var lastEnd = range ? range.begin : 0;
+        if (!targets.length) return escapeHTML(text.slice(lastEnd, limit));
+
+        for (var i = 0; targets[i].end <= lastEnd; i++) ; // skip targets before range
+        const parts = [], begin = targets[i].begin;
+        if (lastEnd > begin) lastEnd = begin;
+
+        for (; i < targets.length; i++) {
+            const target = targets[i], begin = target.begin;
+            if (begin >= limit) break; // omit targets after range
+            const end = target.end;
+            parts.push(escapeHTML(text.slice(lastEnd, begin)), '<em>', escapeHTML(text.slice(begin, end)), '</em>');
+            lastEnd = end;
+        }
+        parts.push(escapeHTML(text.slice(lastEnd, limit).trimEnd()));
+
+        return "".concat(...parts);
+    }
+
+    // Merge overlapping or contiguous ranges
+    function mergeRanges(ranges) {
+        if (!ranges.length) return [];
+
+        var last = {begin: ranges[0].begin, end: ranges[0].end};
+        const result = [last];
+        for (const range of ranges.slice(1)) {
+            if (last.end < range.begin) {
+                last = {begin: range.begin, end: range.end};
+                result.push(last);
+            } else if (last.end < range.end) {
+                last.end = range.end;
+            }
+        }
+        return result;
+    }
+
+    class StructuredText {
+        constructor(text) {
+            this.original = text;
+            this.segments = new Map();
+            this.pos = 0;
+            this.stemmedPos = 0; // `this` is passed to the constructors, and the `pos` fields will be updated there.
+            this.sentences = text.match(REGEX_SENTENCE).map(match => new Sentence(match, this));
+            this.stemmed = "".concat(...this.segments.values().map(segment => segment.stemmed.text));
+            delete this.pos;
+            delete this.stemmedPos;
+        }
+
+        originalPos(stemmedPos) {
+            if (stemmedPos <= 0) return stemmedPos;
+            const offset = stemmedPos - this.stemmed.length;
+            if (offset >= 0) return this.original.length + offset;
+            const segment = this.segments.get(stemmedPos);
+            if (segment) return segment.lower.begin;
+            for (var pos = stemmedPos - 1; ; pos--) {
+                const segment = this.segments.get(pos);
+                if (segment) {
+                    return segment.lower.begin + (segment instanceof DefaultSegment ? segment.lower.text.length : stemmedPos - pos);
+                }
+            }
+        }
+
+        segmentAtStemmed(stemmedPos) {
+            if (stemmedPos < 0) return;
+            if (stemmedPos >= this.stemmed.length) return;
+            const segment = this.segments.get(stemmedPos);
+            if (segment) return segment;
+            for (var pos = stemmedPos - 1; ; pos--) {
+                const segment = this.segments.get(pos);
+                if (segment) return segment;
+            }
+        }
+
+        // `begin` and `end`are indexed on stemmed text.
+        wordCount(begin, end) {
+            if (begin >= end) return 0;
+            const segment = this.segmentAtStemmed(begin), segmentEnd = segment.stemmed.end;
+            if (segment instanceof IdeographSegment) {
+                return [...this.stemmed.slice(begin, Math.min(end, segmentEnd))].length / 2 + this.wordCount(segmentEnd, end);
+            } else {
+                return segment.wordCount + this.wordCount(segmentEnd, end);
+            }
+        }
+
+        // `targetsInStemmed` is an array of {begin: number, end: number} that has been sorted by begin in ascending order.
+        // `ranges` is an array of {begin: number, end: number}
+        highlightAndEscapeByStemmed(targetsInStemmed, ranges) {
+            targetsInStemmed = mergeRanges(targetsInStemmed);
+            if (!Array.isArray(ranges)) return this.highlightAndEscapeByStemmedInRange(targetsInStemmed, ranges);
+            ranges = mergeRanges(ranges);
+            if (!ranges.length) return "";
+            const parts = ranges.map(range => this.highlightAndEscapeByStemmedInRange(targetsInStemmed, range));
+            if (ranges[0].begin > 0) parts.unshift("");
+            if (ranges[ranges.length - 1].end < this.stemmed.length) parts.push("");
+            return parts.join("……");
+        }
+
+        highlightAndEscapeByStemmedInRange(targetsInStemmed, range) {
+            return highlightAndEscape(this.original, targetsInStemmed.map(target => {
+                return {begin: this.originalPos(target.begin), end: this.originalPos(target.end)};
+            }), range ? {begin: this.originalPos(range.begin), end: this.originalPos(range.end)} : undefined);
+        }
+
+        // Expands `range`'s end by `wordCount` words.
+        // `range` is like {begin: number, end: number} and is indexed on stemmed text.
+        // The range is modified in-place.
+        // `limit` is where the range would stop expanding even the required `wordCount` isn't satisfied.
+        // In this case the remaining `wordCount` to be expanded is returned (otherwise undefined is returned)
+        // If `limit` is undefined, expanding would stop at the end of the text.
+        expandEnd(range, wordCount, limit) {
+            if (typeof limit !== "number" || limit > this.stemmed.length) limit = this.stemmed.length;
+            if (range.end < range.begin) range.end = range.begin;
+            if (range.end >= limit) {
+                if (wordCount < 1) return;
+                return wordCount;
+            }
+            const pos = range.end, segment = this.segmentAtStemmed(pos);
+            if (segment instanceof IdeographSegment) {
+                if (wordCount * 2 < 1) return;
+                const end = Math.min(segment.stemmed.end, limit);
+                const slice = [...this.stemmed.slice(pos, end)];
+                const remainingWordCount = wordCount - slice.length / 2;
+                if (remainingWordCount < 0) {
+                    range.end += "".concat(...slice.slice(0, wordCount * 2)).length;
+                    return;
+                }
+                range.end = end;
+                return this.expandEnd(range, remainingWordCount, limit);
+            } else {
+                wordCount -= segment.wordCount;
+                if (wordCount < 0) return;
+                range.end = Math.min(segment.stemmed.end, limit);
+                return this.expandEnd(range, wordCount, limit);
+            }
+        }
+
+        // Counterpart to expandEnd
+        expandBegin(range, wordCount, limit) {
+            if (wordCount < 1) return;
+            if (typeof limit !== "number" || limit < 0) limit = 0;
+            if (range.begin > range.end) range.begin = range.end;
+            if (range.begin <= limit) {
+                if (wordCount < 1) return;
+                return wordCount;
+            }
+            const pos = range.begin, segment = this.segmentAtStemmed(pos - 1);
+            if (segment instanceof IdeographSegment) {
+                if (wordCount * 2 < 1) return;
+                const begin = Math.max(segment.stemmed.begin, limit);
+                const slice = [...this.stemmed.slice(begin, pos)];
+                const remainingWordCount = wordCount - slice.length / 2;
+                if (remainingWordCount < 0) {
+                    range.begin -= "".concat(...slice.slice(-wordCount * 2)).length;
+                    return;
+                }
+                range.begin = begin;
+                return this.expandBegin(range, remainingWordCount, limit);
+            } else {
+                wordCount -= segment.wordCount;
+                range.begin = Math.max(segment.stemmed.begin, limit);
+                return this.expandBegin(range, wordCount, limit);
+            }
+        }
+
+        // Expands `range`'s end to `type`'s boundary.
+        // `range` is like {begin: number, end: number} and is indexed on stemmed text.
+        // The range is modified in-place.
+        // `limit` is where the range would stop expanding even the required `type`'s boundary isn't reached.
+        // In this case true is returned (otherwise false is returned)
+        // If `limit` is undefined, expanding would stop at the end of the text.
+        expandEndToBoundary(range, type, limit) {
+            if (typeof limit !== "number") limit = this.stemmed.length;
+            if (range.end < range.begin) range.end = range.begin;
+            if (range.end >= limit) return true;
+            var part = this.segmentAtStemmed(range.end);
+            while (!(part instanceof type)) part = part.parent;
+            const partEnd = part.stemmed.end;
+            if (partEnd <= limit) {
+                range.end = partEnd;
+                return false;
+            }
+            range.end = limit;
+            return true;
+        }
+
+        // Counterpart to expandEndToBoundary
+        expandBeginToBoundary(range, type, limit) {
+            if (typeof limit !== "number") limit = 0;
+            if (range.begin > range.end) range.begin = range.end;
+            if (range.begin <= limit) return true;
+            var part = this.segmentAtStemmed(range.begin - 1);
+            while (!(part instanceof type)) part = part.parent;
+            const partBegin = part.stemmed.begin;
+            if (partBegin >= limit) {
+                range.begin = partBegin;
+                return false;
+            }
+            range.begin = limit;
+            return true;
+        }
+
+        // Counterpart to expandEndToBoundary
+        shrinkEndToBoundary(range, type, limit) {
+            if (typeof limit !== "number") limit = range.begin;
+            if (range.end > this.stemmed.length) range.end = this.stemmed.length;
+            if (range.end <= limit) return true;
+            var part = this.segmentAtStemmed(range.end - 1);
+            while (!(part instanceof type)) part = part.parent;
+            const partBegin = part.stemmed.begin;
+            if (partBegin >= limit) {
+                range.end = partBegin;
+                return false;
+            }
+            range.end = limit;
+            return true;
+        }
+
+        // Counterpart to expandBeginToBoundary
+        shrinkBeginToBoundary(range, type, limit) {
+            if (typeof limit !== "number") limit = range.end;
+            if (range.begin < 0) range.begin = 0;
+            if (range.begin >= limit) return true;
+            var part = this.segmentAtStemmed(range.begin);
+            while (!(part instanceof type)) part = part.parent;
+            const partEnd = part.stemmed.end;
+            if (partEnd <= limit) {
+                range.begin = partEnd;
+                return false;
+            }
+            range.begin = limit;
+            return true;
+        }
+    }
+
+    class Sentence {
+        constructor(original, base) {
+            this.original = {text: original, begin: base.pos}
+            const begin = base.stemmedPos;
+            this.clauses = original.toLowerCase().match(REGEX_CLAUSE).map(match => new Clause(match, this, base));
+            this.stemmed = {begin, end: base.stemmedPos}
+        }
+    }
+
+    class Clause {
+        constructor(lower, parent, base) {
+            this.lower = {text: lower, begin: base.pos}
+            const begin = base.stemmedPos, segments = [];
+            for (const match of lower.matchAll(REGEX_SEGMENT)) {
+                if (match[1]) {
+                    segments.push(new IdeographSegment(match[0], this, base));
+                } else if (match[2]) {
+                    segments.push(new DefaultSegment(match[0], this, base));
+                } else if (match[3]) {
+                    segments.push(new EmojiSegment(match[0], this, base));
+                } else if (match[4]) {
+                    segments.push(new NonWordSegment(match[0], this, base));
+                }
+            }
+            this.segments = segments;
+            this.stemmed = {begin, end: base.stemmedPos};
+            this.parent = parent;
+        }
+    }
+
+    class Segment {
+        constructor(lower, stemmed, parent, base) {
+            this.lower = {text: lower, begin: base.pos}
+            const begin = base.stemmedPos;
+            base.pos += lower.length;
+            base.stemmedPos += stemmed.length;
+            base.segments.set(begin, this);
+            this.stemmed = {text: stemmed, begin, end: base.stemmedPos}
+            this.parent = parent;
+        }
+    }
+
+    class IdeographSegment extends Segment {
+        constructor(lower, parent, base) {
+            super(lower, lower, parent, base);
+            this.wordCount = [...lower].length / 2; // 2 characters count as 1 word
+        }
+    }
+
+    class EmojiSegment extends Segment {
+        constructor(lower, parent, base) {
+            super(lower, lower, parent, base);
+        }
+
+        get wordCount() {
+            return 1;
+        }
+    }
+
+    class NonWordSegment extends Segment {
+        constructor(lower, parent, base) {
+            super(lower, lower, parent, base);
+        }
+
+        get wordCount() {
+            return 0;
+        }
+    }
+
+    class DefaultSegment extends Segment {
+        constructor(lower, parent, base) {
+            super(lower, elasticlunr.stemmer(lower), parent, base);
+        }
+
+        get wordCount() {
+            return 1;
+        }
+    }
+
+    function makeTeaser(doc, searchTerms) {
+        const body = new StructuredText(doc.body), breadcrumbs = new StructuredText(doc.breadcrumbs),
+            requireMatchAll = search_options.bool === 'AND', matchesInBody = [], matchesInBreadcrumbs = [];
+        var termCountInBody = 0;
+        for (const [index, regex] of searchTerms.regex.entries()) {
+            const currentTermInBody = [];
+            for (const match of body.stemmed.matchAll(regex)) {
+                currentTermInBody.push({
+                    begin: match.index, end: match.index + match[0].length, index
+                });
+            }
+            const currentTermInBreadcrumbs = [];
+            for (const match of breadcrumbs.stemmed.matchAll(regex)) {
+                currentTermInBreadcrumbs.push({
+                    begin: match.index, end: match.index + match[0].length
+                });
+            }
+            if (currentTermInBody.length) {
+                termCountInBody++;
+            } else if (requireMatchAll && !currentTermInBreadcrumbs.length) {
+                return;
+            }
+            matchesInBody.push(...currentTermInBody);
+            matchesInBreadcrumbs.push(...currentTermInBreadcrumbs);
+        }
+        if (!termCountInBody && !matchesInBreadcrumbs.length) return;
+        matchesInBreadcrumbs.sort((a, b) => a.begin - b.begin);
+
+        if (!matchesInBody.length) {
+            const range = {begin: 0, end: 0};
+            body.expandEnd(range, results_options.teaser_word_count);
+            var highlightedBody = body.highlightAndEscapeByStemmed(matchesInBody, [range]);
+            return {
+                body: highlightedBody,
+                breadcrumbs: breadcrumbs.highlightAndEscapeByStemmed(matchesInBreadcrumbs)
+            };
+        }
+        matchesInBody.sort((a, b) => a.begin - b.begin);
+
+        // Find the minimum window that contains at least one occurrence of each search term.
+        // `matches` is an array of { begin: number, end: number, index: number } where index is the index of search term.
+        // `termCount` is the number of unique search terms occurred.
+        function minWindow(matches, termCount) {
+            var begin = 0, end = 0, termCountInRange = 0, result = {begin: 0, end: body.stemmed.length};
+            const termCountTableInRange = [];
+
+            // Contract window's begin until it no longer contains all keywords
+            function contractWindow() {
+                while (true) {
+                    const index = matches[begin].index;
+                    begin++;
+                    termCountTableInRange[index]--;
+                    if (!termCountTableInRange[index]) {
+                        const currentWindow = {
+                            begin: matches[begin - 1].begin, end: matches[end - 1].end
+                        };
+                        if (currentWindow.end - currentWindow.begin < result.end - result.begin) result = currentWindow;
+                        break;
+                    }
+                }
+            }
+
+            // Expand window's end until it contains all keywords
+            while (end < matches.length) {
+                const index = matches[end].index;
+                end++;
+                if (termCountTableInRange[index]) {
+                    termCountTableInRange[index]++;
+                } else {
+                    termCountTableInRange[index] = 1;
+                    termCountInRange++;
+                    if (termCountInRange >= termCount) {
+                        contractWindow();
+                        break;
+                    }
+                }
+            }
+
+            // Expand window's end until it contains all keywords again
+            while (end < matches.length) {
+                const index = matches[end].index;
+                end++;
+                termCountTableInRange[index]++;
+                if (termCountTableInRange[index] === 1) contractWindow();
+            }
+            return result;
+        }
+
+        const range = minWindow(matchesInBody, termCountInBody);
+        const rawBegin = range.begin, rawEnd = range.end;
+        body.expandBeginToBoundary(range, Sentence);
+        body.expandEndToBoundary(range, Sentence);
+
+        const wordCountLimit = results_options.teaser_word_count;
+        var ranges = [], wordCount = body.wordCount(range.begin, range.end);
+        if (wordCount < wordCountLimit) {
+            var oldBegin, oldWordCount;
+            do {
+                oldBegin = range.begin;
+                oldWordCount = wordCount;
+                const reachedLimit = body.expandBeginToBoundary(range, Sentence);
+                wordCount = body.wordCount(range.begin, range.end);
+                if (reachedLimit) break;
+            } while (wordCount < wordCountLimit);
+            if (wordCount > wordCountLimit) {
+                range.begin = oldBegin;
+                wordCount = oldWordCount;
+            }
+            if (wordCount < wordCountLimit) {
+                const remainingWordCount = body.expandEnd(range, wordCountLimit - wordCount);
+                if (remainingWordCount) body.expandBegin(range, remainingWordCount);
+            }
+            ranges.push(range);
+        } else if (wordCount === wordCountLimit) {
+            ranges.push(range);
+        } else {
+            // When `range` can't be shrunk to `wordCountLimit`, the actual wordCount is returned.
+            function tryShrink(range, wordCount, wordCountLimit, rawBegin, rawEnd) {
+                var oldEnd;
+                do {
+                    oldEnd = range.end;
+                    if (body.shrinkEndToBoundary(range, Clause, rawEnd)) {
+                        range.end = oldEnd;
+                        break;
+                    }
+                    wordCount = body.wordCount(range.begin, range.end);
+                } while (wordCount > wordCountLimit);
+                if (wordCount <= wordCountLimit) {
+                    if (wordCount < wordCountLimit) body.expandEnd(range, wordCountLimit - wordCount);
+                    ranges.push(range);
+                } else {
+                    var oldBegin;
+                    do {
+                        oldBegin = range.begin;
+                        if (body.shrinkBeginToBoundary(range, Clause, rawBegin)) {
+                            range.begin = oldBegin;
+                            break;
+                        }
+                        wordCount = body.wordCount(range.begin, range.end);
+                    } while (wordCount > wordCountLimit);
+                    if (wordCount > wordCountLimit) return wordCount;
+                    if (wordCount < wordCountLimit) body.expandBegin(range, wordCountLimit - wordCount);
+                }
+            }
+            wordCount = tryShrink(range, wordCount, wordCountLimit, rawBegin, rawEnd);
+            if (!wordCount) {
+                ranges.push(range);
+            } else {
+                // split the result into pieces and shrink them individually, then join them with ……
+                var freshMatchesInBody = matchesInBody.filter(match => match.begin >= range.begin && match.end <= range.end);
+                for (const sentence of body.sentences) {
+                    const sentenceEnd = sentence.stemmed.end;
+                    if (sentenceEnd > freshMatchesInBody[0].begin) {
+                        ranges.push({begin: sentence.stemmed.begin, end: sentenceEnd});
+                        const currentIndex = freshMatchesInBody[0].index;
+                        freshMatchesInBody = freshMatchesInBody.filter(match => match.index !== currentIndex);
+                        while (freshMatchesInBody.length && sentenceEnd > freshMatchesInBody[0].begin) {
+                            const currentIndex = freshMatchesInBody[0].index;
+                            freshMatchesInBody = freshMatchesInBody.filter(match => match.index !== currentIndex);
+                        }
+                        if (!freshMatchesInBody.length) break;
+                    }
+                }
+                const wordCountList = ranges.map(range => body.wordCount(range.begin, range.end));
+                wordCount = wordCountList.reduce((sum, wordCount) => sum + wordCount);
+                var exceedingWordCount = wordCount - wordCountLimit;
+                if (exceedingWordCount < 0) {
+                    var remainingWordCount = wordCountLimit - wordCount;
+                    for (var i = 0; i < ranges.length - 1; i++) {
+                        remainingWordCount = body.expandEnd(ranges[i], remainingWordCount, ranges[i + 1].begin);
+                        if (!remainingWordCount) break;
+                    }
+                    if (remainingWordCount) body.expandEnd(ranges[i], remainingWordCount);
+                } else if (exceedingWordCount > 0) {
+                    const reversedMatchesInBody = [...matchesInBody];
+                    reversedMatchesInBody.sort((a, b) => b.end - a.end);
+                    for (i = ranges.length - 1; i >= 0; i--) {
+                        const range = ranges[i];
+                        const actualWordCount = tryShrink(range, wordCountList[i], wordCountList[i] - exceedingWordCount,
+                            matchesInBody.find(match => match.begin >= range.begin).begin,
+                            reversedMatchesInBody.find(match => match.end <= range.end).end);
+                        if (!actualWordCount) break;
+                        exceedingWordCount -= wordCountList[i] - actualWordCount;
+                    }
+                }
+            }
+        }
+        return {
+            body: body.highlightAndEscapeByStemmed(matchesInBody, ranges),
+            breadcrumbs: breadcrumbs.highlightAndEscapeByStemmed(matchesInBreadcrumbs)
+        };
+    }
+
+    function init(config) {
+        results_options = config.results_options;
+        search_options = config.search_options;
+        searchbar_outer = config.searchbar_outer;
+        doc_urls = config.doc_urls;
+        searchindex = elasticlunr.Index.load(config.index);
+
+        // Set up events
+        searchicon.addEventListener('click', function(e) { searchIconClickHandler(); }, false);
+        searchbar.addEventListener('keyup', function(e) { searchbarKeyUpHandler(); }, false);
+        document.addEventListener('keydown', function(e) { globalKeyHandler(e); }, false);
+        // If the user uses the browser buttons, do the same as if a reload happened
+        window.onpopstate = function(e) { doSearchOrMarkFromUrl(); };
+        // Suppress "submit" events so the page doesn't reload when the user presses Enter
+        document.addEventListener('submit', function(e) { e.preventDefault(); }, false);
+
+        // If reloaded, do the search or mark again, depending on the current url parameters
+        doSearchOrMarkFromUrl();
+    }
+
+    function unfocusSearchbar() {
+        // hacky, but just focusing a div only works once
+        var tmp = document.createElement('input');
+        tmp.setAttribute('style', 'position: absolute; opacity: 0;');
+        searchicon.appendChild(tmp);
+        tmp.focus();
+        tmp.remove();
+    }
+
+    // On reload or browser history backwards/forwards events, parse the url and do search or mark
+    function doSearchOrMarkFromUrl() {
+        // Check current URL for search request
+        var url = parseURL(window.location.href);
+        if (url.params.hasOwnProperty(URL_SEARCH_PARAM)
+            && url.params[URL_SEARCH_PARAM] != "") {
+            showSearch(true);
+            searchbar.value = decodeURIComponent(
+                (url.params[URL_SEARCH_PARAM] + '').replace(/\+/g, '%20'));
+            searchbarKeyUpHandler(); // -> doSearch()
+        } else {
+            showSearch(false);
+        }
+
+        if (url.params.hasOwnProperty(URL_MARK_PARAM)) {
+            var words = decodeURIComponent(url.params[URL_MARK_PARAM]).split(' ');
+            marker.mark(words, {
+                exclude: mark_exclude
+            });
+
+            var markers = document.querySelectorAll("mark");
+            function hide() {
+                for (var i = 0; i < markers.length; i++) {
+                    markers[i].classList.add("fade-out");
+                    window.setTimeout(function(e) { marker.unmark(); }, 300);
+                }
+            }
+            for (var i = 0; i < markers.length; i++) {
+                markers[i].addEventListener('click', hide);
+            }
+        }
+    }
+
+    // Eventhandler for keyevents on `document`
+    function globalKeyHandler(e) {
+        if (e.altKey || e.ctrlKey || e.metaKey || e.shiftKey || e.target.type === 'textarea' || e.target.type === 'text' || !hasFocus() && /^(?:input|select|textarea)$/i.test(e.target.nodeName)) { return; }
+
+        if (e.keyCode === ESCAPE_KEYCODE) {
+            e.preventDefault();
+            searchbar.classList.remove("active");
+            setSearchUrlParameters("",
+                (searchbar.value.trim() !== "") ? "push" : "replace");
+            if (hasFocus()) {
+                unfocusSearchbar();
+            }
+            showSearch(false);
+            marker.unmark();
+        } else if (!hasFocus() && e.keyCode === SEARCH_HOTKEY_KEYCODE) {
+            e.preventDefault();
+            showSearch(true);
+            window.scrollTo(0, 0);
+            searchbar.select();
+        } else if (hasFocus() && e.keyCode === DOWN_KEYCODE) {
+            e.preventDefault();
+            unfocusSearchbar();
+            searchresults.firstElementChild.classList.add("focus");
+        } else if (!hasFocus() && (e.keyCode === DOWN_KEYCODE
+            || e.keyCode === UP_KEYCODE
+            || e.keyCode === SELECT_KEYCODE)) {
+            // not `:focus` because browser does annoying scrolling
+            var focused = searchresults.querySelector("li.focus");
+            if (!focused) return;
+            e.preventDefault();
+            if (e.keyCode === DOWN_KEYCODE) {
+                var next = focused.nextElementSibling;
+                if (next) {
+                    focused.classList.remove("focus");
+                    next.classList.add("focus");
+                }
+            } else if (e.keyCode === UP_KEYCODE) {
+                focused.classList.remove("focus");
+                var prev = focused.previousElementSibling;
+                if (prev) {
+                    prev.classList.add("focus");
+                } else {
+                    searchbar.select();
+                }
+            } else { // SELECT_KEYCODE
+                window.location.assign(focused.querySelector('a'));
+            }
+        }
+    }
+
+    function showSearch(yes) {
+        if (yes) {
+            search_wrap.classList.remove('hidden');
+            searchicon.setAttribute('aria-expanded', 'true');
+        } else {
+            search_wrap.classList.add('hidden');
+            searchicon.setAttribute('aria-expanded', 'false');
+            var results = searchresults.children;
+            for (var i = 0; i < results.length; i++) {
+                results[i].classList.remove("focus");
+            }
+        }
+    }
+
+    function showResults(yes) {
+        if (yes) {
+            searchresults_outer.classList.remove('hidden');
+        } else {
+            searchresults_outer.classList.add('hidden');
+        }
+    }
+
+    // Eventhandler for search icon
+    function searchIconClickHandler() {
+        if (search_wrap.classList.contains('hidden')) {
+            showSearch(true);
+            window.scrollTo(0, 0);
+            searchbar.select();
+        } else {
+            showSearch(false);
+        }
+    }
+
+    // Eventhandler for keyevents while the searchbar is focused
+    function searchbarKeyUpHandler() {
+        var searchterm = searchbar.value.trim();
+        if (searchterm != "") {
+            searchbar.classList.add("active");
+            doSearch(searchterm);
+        } else {
+            searchbar.classList.remove("active");
+            showResults(false);
+            removeChildren(searchresults);
+        }
+
+        setSearchUrlParameters(searchterm, "push_if_new_search_else_replace");
+
+        // Remove marks
+        marker.unmark();
+    }
+
+    // Update current url with ?URL_SEARCH_PARAM= parameter, remove ?URL_MARK_PARAM and #heading-anchor .
+    // `action` can be one of "push", "replace", "push_if_new_search_else_replace"
+    // and replaces or pushes a new browser history item.
+    // "push_if_new_search_else_replace" pushes if there is no `?URL_SEARCH_PARAM=abc` yet.
+    function setSearchUrlParameters(searchterm, action) {
+        var url = parseURL(window.location.href);
+        var first_search = !url.params.hasOwnProperty(URL_SEARCH_PARAM);
+        if (searchterm != "" || action == "push_if_new_search_else_replace") {
+            url.params[URL_SEARCH_PARAM] = searchterm;
+            delete url.params[URL_MARK_PARAM];
+            url.hash = "";
+        } else {
+            delete url.params[URL_MARK_PARAM];
+            delete url.params[URL_SEARCH_PARAM];
+        }
+        // A new search will also add a new history item, so the user can go back
+        // to the page prior to searching. A updated search term will only replace
+        // the url.
+        if (action == "push" || (action == "push_if_new_search_else_replace" && first_search)) {
+            history.pushState({}, document.title, renderURL(url));
+        } else if (action == "replace" || (action == "push_if_new_search_else_replace" && !first_search)) {
+            history.replaceState({}, document.title, renderURL(url));
+        }
+    }
+
+    function preprocessSearchTerms(searchTerms) {
+        const original = searchTerms.split(REGEX_WHITE_SPACE);
+        const stemmed = original.map(term => term.toLowerCase().replace(REGEX_STEM, (match, english) => english ? elasticlunr.stemmer(match) : match));
+        return {
+            original,
+            stemmed,
+            lunr: searchTerms.replace(REGEX_SEARCH_SPLITTER, (_, word) => word ? `${word} ` : ""),
+            regex: stemmed.map(term => {
+                var escaped = term.replace(REGEX_ESCAPE, '\\$&');
+                if (REGEX_DEFAULT_BEGIN.test(term)) {
+                    escaped = "(?<![^\\p{White_Space}\\p{P}\\p{Sm}\\p{Sc}\\p{So}\\p{Unified_Ideograph}\\uAC00-\\uD7AF\\p{Z}\\p{C}])" + escaped;
+                }
+                if (REGEX_DEFAULT_END.test(term)) {
+                    escaped += search_options.expand ? "[^\\p{White_Space}\\p{P}\\p{Sm}\\p{Sc}\\p{So}\\p{Unified_Ideograph}\\uAC00-\\uD7AF\\p{Z}\\p{C}]*" : "(?![^\\p{White_Space}\\p{P}\\p{Sm}\\p{Sc}\\p{So}\\p{Unified_Ideograph}\\uAC00-\\uD7AF\\p{Z}\\p{C}])";
+                }
+                return new RegExp(escaped, 'gu');
+            }),
+            // encodeURIComponent escapes all chars that could allow an XSS except
+            // for '. Due to that we also manually replace ' with its url-encoded
+            // representation (%27).
+            url: encodeURIComponent(searchTerms.replace(/\'/g, "%27"))
+        };
+    }
+
+    function doSearch(searchterm) {
+
+        // Don't search the same twice
+        if (current_searchterm == searchterm) { return; }
+        else { current_searchterm = searchterm; }
+
+        if (searchindex == null) { return; }
+
+        const searchTerms = preprocessSearchTerms(searchterm);
+
+        // Do the actual search
+        var results = searchindex.search(searchTerms.lunr, search_options);
+
+        // Clear and insert results
+        removeChildren(searchresults);
+        var resultCount = 0;
+        for (const result of results) {
+            const resultHtml = formatSearchResult(result, searchTerms);
+            if (!resultHtml) continue;
+            var resultElem = document.createElement('li');
+            resultElem.innerHTML = resultHtml;
+            searchresults.appendChild(resultElem);
+            resultCount++;
+            if (resultCount >= results_options.limit_results) break;
+        }
+
+        // Display search metrics
+        searchresults_header.innerText = formatSearchMetric(resultCount, searchterm);
+
+        // Display results
+        showResults(true);
+    }
+
+    fetch(path_to_root + 'searchindex.json')
+        .then(response => response.json())
+        .then(json => init(json))
+        .catch(error => { // Try to load searchindex.js if fetch failed
+            var script = document.createElement('script');
+            script.src = path_to_root + 'searchindex.js';
+            script.onload = () => init(window.search);
+            document.head.appendChild(script);
+        });
+
+    // Exported functions
+    search.hasFocus = hasFocus;
+})(window.search);
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 2b17cc7d84..f6e324062d 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -7,7 +7,7 @@ use crate::errors::Error;
 use log::error;
 use once_cell::sync::Lazy;
 use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd};
-use regex::Regex;
+use regex::{Captures, Regex};
 
 use std::borrow::Cow;
 use std::collections::HashMap;
@@ -19,10 +19,17 @@ pub use self::string::{
     take_rustdoc_include_lines,
 };
 
-/// Replaces multiple consecutive whitespace characters with a single space character.
+/// Replaces multiple consecutive whitespace characters with a single space character
+/// if there's no line break, otherwise replaces with a single "\n".
 pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
     static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s\s+").unwrap());
-    RE.replace_all(text, " ")
+    RE.replace_all(text, |caps: &Captures<'_>| {
+        if caps[0].contains(['\r', '\n']) {
+            "\n"
+        } else {
+            " "
+        }
+    })
 }
 
 /// Convert the given string to a valid HTML element ID.