-
-
Notifications
You must be signed in to change notification settings - Fork 68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ERROR] TypeError: Cannot read properties of undefined (reading 'load') #234
Comments
I fiddled around with the parse.js file from the node_modules lib a bit. Changed the method to include some console.logs function html2text(html, type, url = "?") {
console.log("HTML input:", html ? html.slice(0, 100) : 'null'); // Show start of HTML
console.log("Type:", type);
console.log("URL:", url);
console.log("cheerio_1.default exists:", !!cheerio_1.default); // Check if cheerio is loaded
console.log("cheerio_1.default type:", typeof cheerio_1.default); // What type of object is it?
if (cheerio_1.default) {
console.log("cheerio_1.default keys:", Object.keys(cheerio_1.default)); // What methods are available?
}
const $ = cheerio_1.default.load(html);
console.log("Cheerio loaded:", $ ? "yes" : "no"); And the output is:
|
Update: "use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.getDocusaurusTag = exports.html2text = void 0;
const tslib_1 = require("tslib");
-const cheerio_1 = tslib_1.__importDefault(require("cheerio"));
+const cheerio = require("cheerio");
const logger_1 = tslib_1.__importDefault(require("./logger"));
// We insert whitespace after text from any of these tags
const BLOCK_TAGS = [
"address",
"article",
"aside",
"blockquote",
"canvas",
"dd",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"header",
"hr",
"li",
"main",
"nav",
"noscript",
"ol",
"p",
"pre",
"section",
"table",
"tfoot",
"ul",
"video",
// Not strictly block tags, but still.
"td",
"th",
];
function _getText($, el) {
if (Array.isArray(el)) {
let content = "";
el.forEach((el) => {
content += _getText($, el);
if (el.type === "tag" &&
(BLOCK_TAGS.includes(el.name) ||
// for lines in code blocks
(el.name === "span" && $(el).attr("class") === "token-line"))) {
content += " ";
}
});
return content;
}
else if (el.type === "text") {
return el.data.replace(/\n/g, " ");
}
else if (el.type === "tag") {
return _getText($, $(el).contents().get());
}
else if (["style", "script", "comment"].includes(el.type)) {
return "";
}
else {
logger_1.default.warn(`Received an unknown element while extracting content from HTML files. This should never happen. Please open an issue at https://github.com/cmfcmf/docusaurus-search-local/issues if you see this message (debug: got type ${el.type}).`);
return "";
}
}
function getText($, el) {
return _getText($, el).replace(/\s+/g, " ").trim();
}
function html2text(html, type, url = "?") {
- const $ = cheerio_1.default.load(html);
+ const $ = cheerio.load(html);
// Remove copy buttons from code boxes
$("div[class^=codeBlockContent_] button").remove();
if (type === "docs") {
// Remove version badges
$("span")
.filter((_, element) => $(element).hasClass("badge") &&
$(element).text().startsWith("Version:"))
.remove();
}
if (type === "docs" || type === "blog") {
const HEADINGS = "h1, h2, h3";
const pageTitle = $("article h1").first().text();
const sections = [];
// Parse tags, and add them to the first section.
const tags = $("article footer ul[class^=tags_] li")
.map((_, element) => $(element).text())
.toArray();
// Make sure to also adjust the highlighting functionality in the client
// if you change the top element here.
$("article")
.find(HEADINGS)
.each((i, heading) => {
const title = $(heading)
.contents()
// Remove elements that are marked as aria-hidden and the hash-link.
// This is mainly done to remove anchors like these:
//
// <a aria-hidden="true" tabindex="-1" class="hash-link" href="#first-subheader" title="Direct link to heading">#</a>
// <a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="first-header"></a>
// <a class="hash-link" href="#first-header" title="Direct link to heading">#</a>
.not("a[aria-hidden=true], a.hash-link")
.text();
const hash = $(heading).find("a.hash-link").attr("href") || "";
let $sectionElements;
if ($(heading).parents(".markdown").length === 0) {
// $(heading) is the page title
const $firstElement = $("article")
.children() // div.markdown, header
.not("header") // div.markdown
.children() // h1, p, p, h2, ...
.first(); // h1 || p
if ($firstElement.filter(HEADINGS).length) {
// The first element is a header. This section is empty.
sections.push({
title,
hash,
content: "",
tags: i === 0 ? tags : [],
});
return;
}
$sectionElements = $firstElement
.nextUntil(`${HEADINGS}, header`)
.addBack();
}
else {
// If the users uses a h1 tag as part of the markdown, Docusaurus will generate a header
// around it for some reason, which we need to ignore.
//
// <header>
// <h1 class="h1Heading_27L5">FIRST HEADER</h1>
// </header>
const root = $(heading).parent("header").length
? $(heading).parent()
: $(heading);
$sectionElements = root.nextUntil(`${HEADINGS}, header`);
}
const content = getText($, $sectionElements.get());
sections.push({
title,
hash,
content,
tags: i === 0 ? tags : [],
});
});
const docSidebarParentCategories = type === "docs"
? $(".theme-doc-sidebar-container .menu__link--active")
.map((_, element) => $(element).text())
.get()
.slice(0, -1)
: undefined;
return { pageTitle, sections, docSidebarParentCategories };
}
else if (type === "page") {
$("a[aria-hidden=true]").remove();
let $pageTitle = $("h1").first();
if (!$pageTitle.length) {
$pageTitle = $("title");
}
const pageTitle = $pageTitle.text();
// Make sure to also adjust the highlighting functionality in the client
// if you change the top element here.
const $main = $("main").first();
if (!$main.length) {
logger_1.default.warn("Page has no <main>, therefore no content was indexed for this page.", { url });
}
return {
pageTitle,
sections: [
{
title: pageTitle,
hash: "",
content: $main.length ? getText($, $main.get()) : "",
tags: [],
},
],
};
}
else {
throw new Error(`Cannot index files of unknown type ${type}!`);
}
}
exports.html2text = html2text;
function getDocusaurusTag(html) {
- const $ = cheerio_1.default.load(html);
+ const $ = cheerio.load(html);
const tag = $('meta[name="docusaurus_tag"]').attr("content");
if (!tag || tag.length === 0) {
throw new Error("The `docusaurus_tag` meta tag could not be found. Please make sure that your page is wrapped in the `<Layout>` component (from `@theme/Layout`). If it is, then this is a bug, please report it.");
}
return tag;
}
exports.getDocusaurusTag = getDocusaurusTag;
Is there any reason you are using "tslib" to do this? |
meanwhile I published my "fixed" version as @gentledepp/docusaurus-search-local |
Hello @gentledepp , thank you very much for your version, it basically works for me. Even if you type in a search, I don't get any search results displayed. The good thing is that the search seems to work, because if you press ENTER, you end up on a page that contains the search result. Can you perhaps give me a tip on what I'm doing wrong? I am using it with Docusaurus 3.7.0. Many thanks for your help. |
Hi @m1w31l you can fix the ui issue by manually adding missing translation: Create file {
"cmfcmf/d-s-l.searchBar.placeholder": {
"message": "Search...",
"description": "Search..."
},
"cmfcmf/d-s-l.searchBar.noResults": {
"message": "No results found.",
"description": "No results found."
},
"cmfcmf/d-s-l.searchBar.clearButtonTitle": {
"message": "Clear",
"description": "Clear"
},
"cmfcmf/d-s-l.searchBar.detachedCancelButtonText": {
"message": "Cancel",
"description": "Cancel"
},
"cmfcmf/d-s-l.searchBar.submitButtonTitle": {
"message": "Submit",
"description": "Submit"
}
} References: |
Hello @gentledepp , thank you very much for your help. And now only the search bar appears. But the preview still seems to be invisible, if you go down with the cursor keys and then press ENTER, you will also be taken to other pages. Do you know why this is the case or is it the same for you? |
hi! All I did was taking the sources of this plugin from the "node_modules", copied it, replaced that one line of code that broke the search for us and published it with a different name. That being said - we are still on docusaurus 2.x My tip: Try to find the issue in the plugin - you can just change the source code of it in the node_modules folder and it will be picked up. (Add console.logs and so on to figure out what is wrong) I hope that helps :-| |
Hello, |
In August 2024 Cheerio went from RC12 to 1.0.0. This included a bunch of breaking changes, one of which is cmfcmf/docusaurus-search-local#234. There isn't a fix available for the latest version of Cheerio, but updating dependencies pulls in the latest non-RC version automatically. To work around this, we'll pin Cheerio to the RC until the issue is fixed. This change enabled a full update of the dependency tree as well as bumping Docusaurus to latest.
Hello @spikeheap , thank you very much for the update.
Another observation I have made. |
Hi @m1w31l & @gentledepp We came up against the same problem, and diagnosed that the error was introduced by For us, we fixed the issue and upgraded the rest of our depedency tree by pinning Cheerio to "dependencies": {
"@cmfcmf/docusaurus-search-local": "^1.2.0",
"@docusaurus/core": "^3.7.0",
"@docusaurus/preset-classic": "^3.7.0",
"@mdx-js/react": "^3.0.0",
"cheerio": "1.0.0-rc.12"
} This saved us from having to fork the repo, but does mean we're pinned to an older version. I believe we're on React 18 because an unrelated dependency restricts to <19, but that's really helpful to know if/when it does get updated. |
Please describe the bug you are seeing
I am building my docusaurus project, and I get this error
How can we best reproduce the bug?
No response
Which version of Docusaurus and @cmfcmf/docusaurus-search-local are you using?
I am using docusaurus with the following dependencies
This is my tsconfig.json
Which Node.js version are you using?
No response
Which theme are you using?
Yes, I am using @docusaurus/theme-classic
How does your plugin config look like?
Additional context
It seems to have to do with this line:
but I have no clue what is going on there.
When looking at the cheerio index.js that is shown here in the popup:
There is no "default" field or anything like that. (Sorry, I am not a node specialist :-|)
Can you please give ma any hint on what I could try to do?
The text was updated successfully, but these errors were encountered: