Skip to content

Commit

Permalink
innerData -> data; getWith -> get
Browse files Browse the repository at this point in the history
  • Loading branch information
jiaojiaodubai committed Aug 20, 2024
1 parent 309fd0c commit ed96f83
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 40 deletions.
22 changes: 11 additions & 11 deletions CNKI thinker.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,22 +96,22 @@ async function scrape(doc, url = doc.location.href) {
newItem.creators.forEach(creator => creator.fieldMode = 1);
let labels = new TextLabels(doc, '.bc_a, .desc-info');
Z.debug(labels.data.map(arr => [arr[0], ZU.trimInternal(arr[1])]));
newItem.edition = labels.getWith('版次');
newItem.edition = labels.get('版次');
switch (newItem.itemType) {
case 'book':
newItem.numPages = labels.getWith('页数');
newItem.numPages = labels.get('页数');
break;
case 'bookSection':
newItem.bookTitle = text(doc, '.book-p');
newItem.pages = labels.getWith('页码');
newItem.pages = labels.get('页码');
break;
}
newItem.publisher = text(doc, '.xqy_g') || labels.getWith('出版社');
newItem.date = ZU.strToISO(labels.getWith('出版时间').replace(/(\d{4})(0?\d{1,2})(\d{1,2})/, '$1-$2-$3'));
newItem.publisher = text(doc, '.xqy_g') || labels.get('出版社');
newItem.date = ZU.strToISO(labels.get('出版时间').replace(/(\d{4})(0?\d{1,2})(\d{1,2})/, '$1-$2-$3'));
newItem.language = 'zh-CN';
newItem.ISBN = labels.getWith('国际标准书号ISBN') || tryMatch(url, /bookcode=(\d{10,13})/, 1);
newItem.ISBN = labels.get('国际标准书号ISBN') || tryMatch(url, /bookcode=(\d{10,13})/, 1);
newItem.url = url;
newItem.libraryCatalog = labels.getWith('所属分类');
newItem.libraryCatalog = labels.get('所属分类');
newItem.notes.push(innerText(doc, '.xqy_bd'));
extra.add('CNKICite', text(doc, '.book_zb_yy span:last-child'));
extra.add('price', text(doc, '#OriginalPrice'));
Expand All @@ -131,7 +131,7 @@ class TextLabels {
.replace(/\n([^】\]::]+?\n)/g, ' $1')
.split('\n'));
// innerText在详情页表现良好,但在多条目表现欠佳,故统一使用经过处理的text
this.innerData = text(doc, selector)
this.data = text(doc, selector)
.replace(/^[\s\n]*/gm, '')
.replace(/:\n/g, ': ')
.replace(/\n\/\n/g, ' / ')
Expand All @@ -144,17 +144,17 @@ class TextLabels {
]);
}

getWith(label) {
get(label) {
if (Array.isArray(label)) {
let result = label
.map(aLabel => this.getWith(aLabel))
.map(aLabel => this.get(aLabel))
.find(value => value);
return result
? result
: '';
}
let pattern = new RegExp(label);
let keyVal = this.innerData.find(element => pattern.test(element[0]));
let keyVal = this.data.find(element => pattern.test(element[0]));
return keyVal
? ZU.trimInternal(keyVal[1])
: '';
Expand Down
26 changes: 13 additions & 13 deletions CNKI.js
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,7 @@ async function parseRefer(referText, doc, url, itemKey) {
extra.set('applyDate', labels.get(['实施日期', '實施日期']), true);
break;
case 'patent':
// item.place = labels.getWith('地址');
// item.place = labels.get('地址');
item.filingDate = labels.get(['申请日', '申請日', 'ApplicationDate']);
item.applicationNumber = labels.get(['申请\\(专利\\)号', '申請\\(專利\\)號', 'ApplicationNumber']);
item.issueDate = labels.get(['授权公告日', '授權公告日', 'IssuanceDate']);
Expand Down Expand Up @@ -1222,20 +1222,20 @@ async function addPubDetail(item, extra, ids, doc) {
}
const container = {
originalContainerTitle: ZU.capitalizeTitle(text(pubDoc, '.infobox > h3 > p')),
innerData: Array.from(pubDoc.querySelectorAll('.listbox li p'))
data: Array.from(pubDoc.querySelectorAll('.listbox li p'))
.map(element => [tryMatch(ZU.trimInternal(element.textContent), /^[[【]?[\s\S]+?[】\]::]/).replace(/\s/g, ''), attr(element, 'span', 'title') || text(element, 'span')])
.filter(arr => arr[0]),
getWith: function (label) {
get: function (label) {
if (Array.isArray(label)) {
let result = label
.map(aLabel => this.getWith(aLabel))
.map(aLabel => this.get(aLabel))
.find(element => element);
return result
? result
: '';
}
let pattern = new RegExp(label, 'i');
let keyValPair = this.innerData.find(arr => pattern.test(arr[0]));
let keyValPair = this.data.find(arr => pattern.test(arr[0]));
return keyValPair
? ZU.trimInternal(keyValPair[1])
: '';
Expand All @@ -1246,27 +1246,27 @@ async function addPubDetail(item, extra, ids, doc) {
extra.set('original-container-title', container.originalContainerTitle, true);
switch (item.itemType) {
case 'journalArticle': {
item.ISSN = container.getWith('ISSN');
item.ISSN = container.get('ISSN');
extra.set('publicationTag', Array.from(pubDoc.querySelectorAll('.journalType2 > span')).map(element => ZU.trimInternal(element.textContent)).join(', '));
extra.set('CIF', text(pubDoc, '#evaluateInfo span:not([title])', 0));
extra.set('AIF', text(pubDoc, '#evaluateInfo span:not([title])', 1));
break;
}
case 'conferencePaper':
item.publisher = container.getWith('出版单位');
item.date = ZU.strToISO(container.getWith(['出版时间', '出版日期', 'PublishingDate']));
container.getWith(['编者', '編者', 'Editor']).split('、').forEach(creator => item.creators.push({
item.publisher = container.get('出版单位');
item.date = ZU.strToISO(container.get(['出版时间', '出版日期', 'PublishingDate']));
container.get(['编者', '編者', 'Editor']).split('、').forEach(creator => item.creators.push({
firstName: '',
lastName: creator.replace(/\(.*?\)$/, ''),
creatorType: 'editor',
fieldMode: 1
}));
// extra.set('organizer', container.getWith('主办单位'), true);
// extra.set('organizer', container.get('主办单位'), true);
break;
case 'bookSection': {
item.ISBN = container.getWith('ISBN');
item.date = ZU.strToISO(container.getWith('出版时间'));
item.publisher = container.getWith('出版者');
item.ISBN = container.get('ISBN');
item.date = ZU.strToISO(container.get('出版时间'));
item.publisher = container.get('出版者');
}
}
}
Expand Down
16 changes: 8 additions & 8 deletions Douban.js
Original file line number Diff line number Diff line change
Expand Up @@ -252,39 +252,39 @@ async function scrape(doc, url = doc.location.href) {

class TextLabels {
constructor(doc, selector, label) {
this.innerData = [];
this.data = [];
let arr = text(doc, selector)
.replace(/^\s*/gm, '')
.replace(/\n+/g, '\n')
.split('\n');
for (let i = 0; i < arr.length; i++) {
if (i > 0 && !label.test(arr[i])) {
this.innerData.push(this.innerData.pop() + arr[i]);
this.data.push(this.data.pop() + arr[i]);
}
else {
this.innerData.push(arr[i]);
this.data.push(arr[i]);
}
}
Z.debug(this.innerData);
Z.debug(this.data);
// innerText在详情页表现良好,但在多条目表现欠佳,故统一使用经过处理的text
this.innerData = this.innerData
this.data = this.data
.map(keyVal => [
tryMatch(keyVal, new RegExp(`^${label.source}`)).replace(/\s/g, ''),
tryMatch(keyVal, new RegExp(`^${label.source}(.+)`), 1)
]);
}

getWith(label) {
get(label) {
if (Array.isArray(label)) {
let result = label
.map(aLabel => this.getWith(aLabel))
.map(aLabel => this.get(aLabel))
.find(value => value);
return result
? result
: '';
}
let pattern = new RegExp(label);
let keyVal = this.innerData.find(element => pattern.test(element[0]));
let keyVal = this.data.find(element => pattern.test(element[0]));
return keyVal
? ZU.trimInternal(keyVal[1])
: '';
Expand Down
16 changes: 8 additions & 8 deletions National Standards Open System - China.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ async function scrape(doc, url = doc.location.href) {
let labels = new Cells(doc, '.row div.col-xs-12');
Z.debug(labels.data.map(arr => [arr[0], ZU.trimInternal(arr[1].innerText)]));
let textLabels = new TextLabels(doc, '.container table:nth-child(2)');
Z.debug(textLabels.innerData);
newItem.title = textLabels.getWith('中文标准名称');
Z.debug(textLabels.data);
newItem.title = textLabels.get('中文标准名称');
newItem.number = tryMatch(text(doc, 'td > h1'), /:([\w /-]+)/, 1).replace('-', '—');
newItem.status = textLabels.getWith('标准状态').split(' ')[0];
newItem.status = textLabels.get('标准状态').split(' ')[0];
newItem.date = labels.get(['发布日期', '实施日期']);
newItem.url = url;
newItem.language = 'zh-CN';
newItem.libraryCatalog = '国家标准全文公开系统';
newItem.extra += addExtra('original-title', textLabels.getWith('英文标准名称'));
newItem.extra += addExtra('original-title', textLabels.get('英文标准名称'));
newItem.extra += addExtra('CCS', labels.get('CCS'));
newItem.extra += addExtra('ICS', labels.get('ICS'));
newItem.extra += addExtra('applyDate', labels.get('实施日期'));
Expand Down Expand Up @@ -141,7 +141,7 @@ class Cells {
class TextLabels {
constructor(doc, selector) {
// innerText在详情页表现良好,但在多条目表现欠佳,故统一使用经过处理的text
this.innerData = text(doc, selector)
this.data = text(doc, selector)
.replace(/^[\s\n]*/gm, '')
.replace(/:\n/g, ': ')
.replace(/\n([^】\]::]+?\n)/g, ' $1')
Expand All @@ -152,17 +152,17 @@ class TextLabels {
]);
}

getWith(label) {
get(label) {
if (Array.isArray(label)) {
let result = label
.map(aLabel => this.getWith(aLabel))
.map(aLabel => this.get(aLabel))
.find(value => value);
return result
? result
: '';
}
let pattern = new RegExp(label);
let keyVal = this.innerData.find(element => pattern.test(element[0]));
let keyVal = this.data.find(element => pattern.test(element[0]));
return keyVal
? ZU.trimInternal(keyVal[1])
: '';
Expand Down

0 comments on commit ed96f83

Please sign in to comment.