From ed96f83fc7ae2d14e134073f71beca01fab33fe4 Mon Sep 17 00:00:00 2001 From: jiaojiaodubai <63148861+jiaojiaodubai@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:31:03 +0800 Subject: [PATCH] innerData -> data; getWith -> get --- CNKI thinker.js | 22 +++++++++---------- CNKI.js | 26 +++++++++++------------ Douban.js | 16 +++++++------- National Standards Open System - China.js | 16 +++++++------- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/CNKI thinker.js b/CNKI thinker.js index 553fafc..6b88c03 100644 --- a/CNKI thinker.js +++ b/CNKI thinker.js @@ -96,22 +96,22 @@ async function scrape(doc, url = doc.location.href) { newItem.creators.forEach(creator => creator.fieldMode = 1); let labels = new TextLabels(doc, '.bc_a, .desc-info'); Z.debug(labels.data.map(arr => [arr[0], ZU.trimInternal(arr[1])])); - newItem.edition = labels.getWith('版次'); + newItem.edition = labels.get('版次'); switch (newItem.itemType) { case 'book': - newItem.numPages = labels.getWith('页数'); + newItem.numPages = labels.get('页数'); break; case 'bookSection': newItem.bookTitle = text(doc, '.book-p'); - newItem.pages = labels.getWith('页码'); + newItem.pages = labels.get('页码'); break; } - newItem.publisher = text(doc, '.xqy_g') || labels.getWith('出版社'); - newItem.date = ZU.strToISO(labels.getWith('出版时间').replace(/(\d{4})(0?\d{1,2})(\d{1,2})/, '$1-$2-$3')); + newItem.publisher = text(doc, '.xqy_g') || labels.get('出版社'); + newItem.date = ZU.strToISO(labels.get('出版时间').replace(/(\d{4})(0?\d{1,2})(\d{1,2})/, '$1-$2-$3')); newItem.language = 'zh-CN'; - newItem.ISBN = labels.getWith('国际标准书号ISBN') || tryMatch(url, /bookcode=(\d{10,13})/, 1); + newItem.ISBN = labels.get('国际标准书号ISBN') || tryMatch(url, /bookcode=(\d{10,13})/, 1); newItem.url = url; - newItem.libraryCatalog = labels.getWith('所属分类'); + newItem.libraryCatalog = labels.get('所属分类'); newItem.notes.push(innerText(doc, '.xqy_bd')); extra.add('CNKICite', text(doc, '.book_zb_yy span:last-child')); extra.add('price', text(doc, '#OriginalPrice')); @@ -131,7 +131,7 @@ class TextLabels { .replace(/\n([^】\]::]+?\n)/g, ' $1') .split('\n')); // innerText在详情页表现良好,但在多条目表现欠佳,故统一使用经过处理的text - this.innerData = text(doc, selector) + this.data = text(doc, selector) .replace(/^[\s\n]*/gm, '') .replace(/:\n/g, ': ') .replace(/\n\/\n/g, ' / ') @@ -144,17 +144,17 @@ class TextLabels { ]); } - getWith(label) { + get(label) { if (Array.isArray(label)) { let result = label - .map(aLabel => this.getWith(aLabel)) + .map(aLabel => this.get(aLabel)) .find(value => value); return result ? result : ''; } let pattern = new RegExp(label); - let keyVal = this.innerData.find(element => pattern.test(element[0])); + let keyVal = this.data.find(element => pattern.test(element[0])); return keyVal ? ZU.trimInternal(keyVal[1]) : ''; diff --git a/CNKI.js b/CNKI.js index 292e433..1cc299a 100644 --- a/CNKI.js +++ b/CNKI.js @@ -1004,7 +1004,7 @@ async function parseRefer(referText, doc, url, itemKey) { extra.set('applyDate', labels.get(['实施日期', '實施日期']), true); break; case 'patent': - // item.place = labels.getWith('地址'); + // item.place = labels.get('地址'); item.filingDate = labels.get(['申请日', '申請日', 'ApplicationDate']); item.applicationNumber = labels.get(['申请\\(专利\\)号', '申請\\(專利\\)號', 'ApplicationNumber']); item.issueDate = labels.get(['授权公告日', '授權公告日', 'IssuanceDate']); @@ -1222,20 +1222,20 @@ async function addPubDetail(item, extra, ids, doc) { } const container = { originalContainerTitle: ZU.capitalizeTitle(text(pubDoc, '.infobox > h3 > p')), - innerData: Array.from(pubDoc.querySelectorAll('.listbox li p')) + data: Array.from(pubDoc.querySelectorAll('.listbox li p')) .map(element => [tryMatch(ZU.trimInternal(element.textContent), /^[[【]?[\s\S]+?[】\]::]/).replace(/\s/g, ''), attr(element, 'span', 'title') || text(element, 'span')]) .filter(arr => arr[0]), - getWith: function (label) { + get: function (label) { if (Array.isArray(label)) { let result = label - .map(aLabel => this.getWith(aLabel)) + .map(aLabel => this.get(aLabel)) .find(element => element); return result ? result : ''; } let pattern = new RegExp(label, 'i'); - let keyValPair = this.innerData.find(arr => pattern.test(arr[0])); + let keyValPair = this.data.find(arr => pattern.test(arr[0])); return keyValPair ? ZU.trimInternal(keyValPair[1]) : ''; @@ -1246,27 +1246,27 @@ async function addPubDetail(item, extra, ids, doc) { extra.set('original-container-title', container.originalContainerTitle, true); switch (item.itemType) { case 'journalArticle': { - item.ISSN = container.getWith('ISSN'); + item.ISSN = container.get('ISSN'); extra.set('publicationTag', Array.from(pubDoc.querySelectorAll('.journalType2 > span')).map(element => ZU.trimInternal(element.textContent)).join(', ')); extra.set('CIF', text(pubDoc, '#evaluateInfo span:not([title])', 0)); extra.set('AIF', text(pubDoc, '#evaluateInfo span:not([title])', 1)); break; } case 'conferencePaper': - item.publisher = container.getWith('出版单位'); - item.date = ZU.strToISO(container.getWith(['出版时间', '出版日期', 'PublishingDate'])); - container.getWith(['编者', '編者', 'Editor']).split('、').forEach(creator => item.creators.push({ + item.publisher = container.get('出版单位'); + item.date = ZU.strToISO(container.get(['出版时间', '出版日期', 'PublishingDate'])); + container.get(['编者', '編者', 'Editor']).split('、').forEach(creator => item.creators.push({ firstName: '', lastName: creator.replace(/\(.*?\)$/, ''), creatorType: 'editor', fieldMode: 1 })); - // extra.set('organizer', container.getWith('主办单位'), true); + // extra.set('organizer', container.get('主办单位'), true); break; case 'bookSection': { - item.ISBN = container.getWith('ISBN'); - item.date = ZU.strToISO(container.getWith('出版时间')); - item.publisher = container.getWith('出版者'); + item.ISBN = container.get('ISBN'); + item.date = ZU.strToISO(container.get('出版时间')); + item.publisher = container.get('出版者'); } } } diff --git a/Douban.js b/Douban.js index 065c0ca..7f0e7c3 100644 --- a/Douban.js +++ b/Douban.js @@ -252,39 +252,39 @@ async function scrape(doc, url = doc.location.href) { class TextLabels { constructor(doc, selector, label) { - this.innerData = []; + this.data = []; let arr = text(doc, selector) .replace(/^\s*/gm, '') .replace(/\n+/g, '\n') .split('\n'); for (let i = 0; i < arr.length; i++) { if (i > 0 && !label.test(arr[i])) { - this.innerData.push(this.innerData.pop() + arr[i]); + this.data.push(this.data.pop() + arr[i]); } else { - this.innerData.push(arr[i]); + this.data.push(arr[i]); } } - Z.debug(this.innerData); + Z.debug(this.data); // innerText在详情页表现良好,但在多条目表现欠佳,故统一使用经过处理的text - this.innerData = this.innerData + this.data = this.data .map(keyVal => [ tryMatch(keyVal, new RegExp(`^${label.source}`)).replace(/\s/g, ''), tryMatch(keyVal, new RegExp(`^${label.source}(.+)`), 1) ]); } - getWith(label) { + get(label) { if (Array.isArray(label)) { let result = label - .map(aLabel => this.getWith(aLabel)) + .map(aLabel => this.get(aLabel)) .find(value => value); return result ? result : ''; } let pattern = new RegExp(label); - let keyVal = this.innerData.find(element => pattern.test(element[0])); + let keyVal = this.data.find(element => pattern.test(element[0])); return keyVal ? ZU.trimInternal(keyVal[1]) : ''; diff --git a/National Standards Open System - China.js b/National Standards Open System - China.js index 1c6a98b..1f1e048 100644 --- a/National Standards Open System - China.js +++ b/National Standards Open System - China.js @@ -79,15 +79,15 @@ async function scrape(doc, url = doc.location.href) { let labels = new Cells(doc, '.row div.col-xs-12'); Z.debug(labels.data.map(arr => [arr[0], ZU.trimInternal(arr[1].innerText)])); let textLabels = new TextLabels(doc, '.container table:nth-child(2)'); - Z.debug(textLabels.innerData); - newItem.title = textLabels.getWith('中文标准名称'); + Z.debug(textLabels.data); + newItem.title = textLabels.get('中文标准名称'); newItem.number = tryMatch(text(doc, 'td > h1'), /:([\w /-]+)/, 1).replace('-', '—'); - newItem.status = textLabels.getWith('标准状态').split(' ')[0]; + newItem.status = textLabels.get('标准状态').split(' ')[0]; newItem.date = labels.get(['发布日期', '实施日期']); newItem.url = url; newItem.language = 'zh-CN'; newItem.libraryCatalog = '国家标准全文公开系统'; - newItem.extra += addExtra('original-title', textLabels.getWith('英文标准名称')); + newItem.extra += addExtra('original-title', textLabels.get('英文标准名称')); newItem.extra += addExtra('CCS', labels.get('CCS')); newItem.extra += addExtra('ICS', labels.get('ICS')); newItem.extra += addExtra('applyDate', labels.get('实施日期')); @@ -141,7 +141,7 @@ class Cells { class TextLabels { constructor(doc, selector) { // innerText在详情页表现良好,但在多条目表现欠佳,故统一使用经过处理的text - this.innerData = text(doc, selector) + this.data = text(doc, selector) .replace(/^[\s\n]*/gm, '') .replace(/:\n/g, ': ') .replace(/\n([^】\]::]+?\n)/g, ' $1') @@ -152,17 +152,17 @@ class TextLabels { ]); } - getWith(label) { + get(label) { if (Array.isArray(label)) { let result = label - .map(aLabel => this.getWith(aLabel)) + .map(aLabel => this.get(aLabel)) .find(value => value); return result ? result : ''; } let pattern = new RegExp(label); - let keyVal = this.innerData.find(element => pattern.test(element[0])); + let keyVal = this.data.find(element => pattern.test(element[0])); return keyVal ? ZU.trimInternal(keyVal[1]) : '';