Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: replace moment with dayjs #737

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,11 @@
"@postlight/ci-failed-test-reporter": "^1.0",
"browser-request": "github:postlight/browser-request#feat-add-headers-to-response",
"cheerio": "^0.22.0",
"dayjs": "^1.11.7",
"difflib": "github:postlight/difflib.js",
"ellipsize": "0.1.0",
"iconv-lite": "0.5.0",
"jquery": "^3.5.0",
"moment": "^2.23.0",
"moment-parseformat": "3.0.0",
"moment-timezone": "0.5.37",
"postman-request": "^2.88.1-postman.31",
"string-direction": "^0.1.2",
"turndown": "^7.1.1",
Expand All @@ -134,16 +132,14 @@
},
"bundleDependencies": [
"jquery",
"moment-timezone",
"browser-request"
],
"browser": {
"main": "./dist/mercury.web.js",
"cheerio": "./src/shims/cheerio-query",
"jquery": "./node_modules/jquery/dist/jquery.min.js",
"postman-request": "browser-request",
"iconv-lite": "./src/shims/iconv-lite",
"moment-timezone": "./node_modules/moment-timezone/builds/moment-timezone-with-data-2012-2022.min.js"
"iconv-lite": "./src/shims/iconv-lite"
},
"husky": {
"hooks": {
Expand Down
38 changes: 26 additions & 12 deletions src/cleaners/date-published.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import moment from 'moment-timezone';
import parseFormat from 'moment-parseformat';
// Is there a compelling reason to use moment here?
// Mostly only being used for the isValid() method,
// but could just check for 'Invalid Date' string.
import dayjs from 'dayjs';
import customParseFormat from 'dayjs/plugin/customParseFormat';
import utc from 'dayjs/plugin/utc';
import timezonePlugin from 'dayjs/plugin/timezone';
import advancedFormat from 'dayjs/plugin/advancedFormat';

import {
MS_DATE_STRING,
Expand All @@ -16,6 +16,11 @@ import {
TIME_WITH_OFFSET_RE,
} from './constants';

dayjs.extend(customParseFormat);
dayjs.extend(utc);
dayjs.extend(timezonePlugin);
dayjs.extend(advancedFormat);

export function cleanDateString(dateString) {
return (dateString.match(SPLIT_DATE_STRING) || [])
.join(' ')
Expand All @@ -27,21 +32,30 @@ export function cleanDateString(dateString) {

export function createDate(dateString, timezone, format) {
if (TIME_WITH_OFFSET_RE.test(dateString)) {
return moment(new Date(dateString));
return dayjs(new Date(dateString));
}

if (TIME_AGO_STRING.test(dateString)) {
const fragments = TIME_AGO_STRING.exec(dateString);
return moment().subtract(fragments[1], fragments[2]);
return dayjs().subtract(fragments[1], fragments[2]);
}

if (TIME_NOW_STRING.test(dateString)) {
return moment();
return dayjs();
}

return timezone
? moment.tz(dateString, format || parseFormat(dateString), timezone)
: moment(dateString, format || parseFormat(dateString));
if (timezone) {
try {
return format
? dayjs.tz(dateString, format, timezone)
: dayjs.tz(dayjs(dateString).format('YYYY-MM-DD HH:mm:ss'), timezone);
} catch (error) {
// return an intentionally invalid dayjs object,
// in case the input needs to be cleaned first
return dayjs('');
}
}
return format ? dayjs(dateString, format) : dayjs(dateString);
}

// Take a date published string, and hopefully return a date out of
Expand All @@ -62,7 +76,7 @@ export default function cleanDatePublished(

if (!date.isValid()) {
dateString = cleanDateString(dateString);
date = createDate(dateString, timezone, format);
date = createDate(dateString, timezone);
}

return date.isValid() ? date.toISOString() : null;
Expand Down
32 changes: 16 additions & 16 deletions src/cleaners/date-published.test.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import assert from 'assert';
import moment from 'moment-timezone';
import dayjs from 'dayjs';

import cleanDatePublished, { cleanDateString } from './date-published';

describe('cleanDatePublished(dateString)', () => {
it('returns a date', () => {
const datePublished = cleanDatePublished('published: 1/1/2020');

assert.equal(datePublished, moment('1/1/2020', 'MM/DD/YYYY').toISOString());
assert.equal(datePublished, dayjs('1/1/2020', 'M/D/YYYY').toISOString());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is how the old tests were too, but can we change all these expected dates into hard-coded strings? I think that would make the functionality we're testing more clear: we expect strings of varying format and quality, and we always return a string in a specific format.

So this line would become:

assert.equal(datePublished, '2020-01-01T00:00:00.000Z');

I think with that change we could probably remove dayjs as a dependency from this test file entirely.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exactly, i'll have a commit with these changes shortly ✅

});

it('returns null if date is invalid', () => {
Expand All @@ -28,37 +28,37 @@ describe('cleanDatePublished(dateString)', () => {
it('accepts a custom date format', () => {
// The JS date parser is forgiving, but
// it needs am/pm separated from a time
const datePublished = cleanDatePublished('Mon Aug 03 12:45:00 EDT 2015', {
const datePublished = cleanDatePublished('Aug 03 12:45:00 EDT 2015', {
timezone: 'America/New_York',
format: 'ddd MMM DD HH:mm:ss zz YYYY',
format: 'MMM DD HH:mm:ss z YYYY',
});
assert.equal(datePublished, '2015-08-03T16:45:00.000Z');
});

it('can handle dates formatted as "[just|right] now"', () => {
const date1 = cleanDatePublished('now');
const newDate1 = moment(date1)
const newDate1 = dayjs(date1)
.format()
.split('T')[0];
const expectedDate1 = moment()
const expectedDate1 = dayjs()
.format()
.split('T')[0];
assert.equal(newDate1, expectedDate1);

const date2 = cleanDatePublished('just now');
const newDate2 = moment(date2)
const newDate2 = dayjs(date2)
.format()
.split('T')[0];
const expectedDate2 = moment()
const expectedDate2 = dayjs()
.format()
.split('T')[0];
assert.equal(newDate2, expectedDate2);

const date3 = cleanDatePublished('right now');
const newDate3 = moment(date3)
const newDate3 = dayjs(date3)
.format()
.split('T')[0];
const expectedDate3 = moment()
const expectedDate3 = dayjs()
.format()
.split('T')[0];
assert.equal(newDate3, expectedDate3);
Expand All @@ -69,30 +69,30 @@ describe('cleanDatePublished(dateString)', () => {
// "X days ago" will not be accurate down to the exact time
// "X months ago" will not be accurate down to the exact day
const date1 = cleanDatePublished('1 hour ago');
const newDate1 = moment(date1)
const newDate1 = dayjs(date1)
.format()
.split('T')[0];
const expectedDate1 = moment()
const expectedDate1 = dayjs()
.subtract(1, 'hour')
.format()
.split('T')[0];
assert.equal(newDate1, expectedDate1);

const date2 = cleanDatePublished('5 days ago');
const newDate2 = moment(date2)
const newDate2 = dayjs(date2)
.format()
.split('T')[0];
const expectedDate2 = moment()
const expectedDate2 = dayjs()
.subtract(5, 'days')
.format()
.split('T')[0];
assert.equal(newDate2, expectedDate2);

const date3 = cleanDatePublished('10 months ago');
const newDate3 = moment(date3)
const newDate3 = dayjs(date3)
.format()
.split('T')[0];
const expectedDate3 = moment()
const expectedDate3 = dayjs()
.subtract(10, 'months')
.format()
.split('T')[0];
Expand Down
1 change: 1 addition & 0 deletions src/extractors/custom/clinicaltrials.gov/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export const ClinicaltrialsGovExtractor = {
date_published: {
// selectors: ['span.term[data-term="Last Update Posted"]'],
selectors: ['div:has(> span.term[data-term="Last Update Posted"])'],
timezone: 'UTC',
},

content: {
Expand Down
3 changes: 1 addition & 2 deletions src/extractors/custom/clinicaltrials.gov/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -59,7 +58,7 @@ describe('ClinicaltrialsGovExtractor', () => {

// Update these values with the expected values from
// the article.
assert.equal(moment(date_published).format('YYYY-MM-DD'), '2018-11-21');
assert.equal(date_published, '2018-11-21T00:00:00.000Z');
});

it('returns the content', async () => {
Expand Down
2 changes: 0 additions & 2 deletions src/extractors/custom/fortune.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ export const FortuneComExtractor = {

date_published: {
selectors: ['.MblGHNMJ'],

timezone: 'UTC',
},

lead_image_url: {
Expand Down
1 change: 1 addition & 0 deletions src/extractors/custom/genius.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export const GeniusComExtractor = {
},
],
],
timezone: 'UTC',
},

dek: {
Expand Down
4 changes: 1 addition & 3 deletions src/extractors/custom/genius.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -51,11 +50,10 @@ describe('GeniusComExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/genius.com/index.js.
const { date_published } = await result;
const newDatePublished = moment(date_published).format();

// Update these values with the expected values from
// the article.
assert.equal(newDatePublished.split('T')[0], '1984-06-25');
assert.equal(date_published, '1984-06-25T00:00:00.000Z');
});

it('returns the lead_image_url', async () => {
Expand Down
2 changes: 0 additions & 2 deletions src/extractors/custom/gothamist.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ export const GothamistComExtractor = {
'abbr',
'abbr.published',
],

timezone: 'America/New_York',
},

dek: {
Expand Down
2 changes: 0 additions & 2 deletions src/extractors/custom/news.nationalgeographic.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ export const NewsNationalgeographicComExtractor = {

date_published: {
selectors: [['meta[name="article:published_time"]', 'value']],
format: 'ddd MMM DD HH:mm:ss zz YYYY',
timezone: 'EST',
},

dek: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ describe('NewsNationalgeographicComExtractor', () => {

// Update these values with the expected values from
// the article.
assert.equal(date_published, '2015-08-03T17:45:00.000Z');
assert.equal(date_published, '2015-08-03T16:45:00.000Z');
});

it('returns the dek', async () => {
Expand Down
1 change: 1 addition & 0 deletions src/extractors/custom/people.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export const PeopleComExtractor = {
'.mntl-attribution__item-date',
['meta[name="article:published_time"]', 'value'],
],
timezone: 'UTC',
},

lead_image_url: {
Expand Down
7 changes: 1 addition & 6 deletions src/extractors/custom/people.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -55,13 +54,9 @@ describe('PeopleComExtractor', () => {
// in ./src/extractors/custom/people.com/index.js.
const { date_published } = await result;

const new_date_published = moment(date_published)
.format()
.split('T')[0];

// Update these values with the expected values from
// the article.
assert.equal(new_date_published, '2016-12-12');
assert.equal(date_published, '2016-12-12T09:22:00.000Z');
});

it('returns the lead_image_url', async () => {
Expand Down
1 change: 1 addition & 0 deletions src/extractors/custom/pitchfork.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export const PitchforkComExtractor = {

date_published: {
selectors: ['div[class^="InfoSliceWrapper-"]', ['.pub-date', 'datetime']],
timezone: 'UTC',
},

dek: {
Expand Down
6 changes: 1 addition & 5 deletions src/extractors/custom/pitchfork.com/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment-timezone';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -41,11 +40,8 @@ describe('PitchforkComExtractor', () => {

it('returns the date_published', async () => {
const { date_published } = await result;
const new_date_published = moment(date_published)
.format()
.split('T')[0];

assert.equal(new_date_published, '2019-06-07');
assert.equal(date_published, '2019-06-07T00:00:00.000Z');
});

it('returns the dek', async () => {
Expand Down
4 changes: 1 addition & 3 deletions src/extractors/custom/takagi-hiromitsu.jp/index.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import moment from 'moment';

import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
Expand Down Expand Up @@ -57,11 +56,10 @@ describe('TakagihiromitsuJpExtractor', () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/takagi-hiromitsu.jp/index.js.
const { date_published } = await result;
const newDatePublished = moment(date_published).format();

// Update these values with the expected values from
// the article.
assert.equal(newDatePublished.split('T')[0], '2019-02-17');
assert.equal(date_published, '2019-02-17T14:34:06.000Z');
});

it('returns the dek', async () => {
Expand Down
2 changes: 1 addition & 1 deletion src/extractors/custom/www.al.com/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export const WwwAlComExtractor = {

date_published: {
selectors: [['meta[name="article_date_original"]', 'value']],
timezone: 'EST',
timezone: 'America/Chicago',
},

lead_image_url: {
Expand Down
2 changes: 1 addition & 1 deletion src/extractors/custom/www.al.com/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ describe('WwwAlComExtractor', () => {

// Update these values with the expected values from
// the article.
assert.equal(date_published, '2016-12-22T23:47:00.000Z');
assert.equal(date_published, '2016-12-23T00:47:00.000Z');
});

it('returns the lead_image_url', async () => {
Expand Down
Loading