Skip to content

Commit

Permalink
fix: date parsing improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
KiraLT committed Jan 23, 2024
1 parent efdade1 commit 626e143
Show file tree
Hide file tree
Showing 10 changed files with 55 additions and 21 deletions.
4 changes: 2 additions & 2 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "isomorphic-htmlparser",
"image": "mcr.microsoft.com/devcontainers/universal:2",
"features": {
"ghcr.io/devcontainers/features/node:1": {}
"ghcr.io/devcontainers/features/node:1": {},
},
"updateContentCommand": "npm install"
"updateContentCommand": "npm install",
}
2 changes: 1 addition & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:

- uses: actions/setup-node@v3
with:
node-version: 18
node-version: 20
cache: 'npm'

- run: npm ci
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/publish-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ jobs:

- uses: actions/setup-node@v3
with:
node-version: 18
node-version: 20
cache: 'npm'

- run: npm ci

- run: npm run build

- name: Setup Pages
uses: actions/configure-pages@v3

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:

- uses: actions/setup-node@v3
with:
node-version: 18
node-version: 20
cache: 'npm'

- run: npm ci
Expand All @@ -30,7 +30,7 @@ jobs:

- uses: actions/setup-node@v3
with:
node-version: 18
node-version: 20
cache: 'npm'

- run: npm ci
Expand Down
6 changes: 2 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
# [1.2.0](https://github.com/KiraLT/isomorphic-htmlparser/compare/v1.1.1...v1.2.0) (2023-11-02)


### Bug Fixes

* improved extraction filters ([a4ab272](https://github.com/KiraLT/isomorphic-htmlparser/commit/a4ab272e066b62a443e827bbb6259e2ce6a8a3ef))

- improved extraction filters ([a4ab272](https://github.com/KiraLT/isomorphic-htmlparser/commit/a4ab272e066b62a443e827bbb6259e2ce6a8a3ef))

### Features

* number parser extraction filters ([4948811](https://github.com/KiraLT/isomorphic-htmlparser/commit/4948811ea4cc3aca3eb36e879f92a854fa13e0e8))
- number parser extraction filters ([4948811](https://github.com/KiraLT/isomorphic-htmlparser/commit/4948811ea4cc3aca3eb36e879f92a854fa13e0e8))

## [1.1.1](https://github.com/KiraLT/isomorphic-htmlparser/compare/v1.1.0...v1.1.1) (2023-03-22)

Expand Down
8 changes: 7 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
},
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"common-stuff": "^1.10.3"
"common-stuff": "^1.10.3",
"dayjs": "^1.11.10"
}
}
14 changes: 11 additions & 3 deletions spec/parser.spec.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { defaultFilters } from '../src/extraction'
import { parseHTML } from '../src/node'

const html = `
Expand All @@ -7,8 +8,6 @@ const html = `
<h1>
My First Heading
</h1>
<p>My first paragraph.</p>
<div class="body">
Expand All @@ -24,6 +23,9 @@ const html = `
<div id="date">
2023-11-10 10:30
</div>
<div id="date2">
Dec. 20th '23
</div>
</body>
</html>
`
Expand Down Expand Up @@ -132,6 +134,12 @@ describe('parseHTML', () => {
it('extracts date', () => {
const dom = parseHTML(html)

expect(dom.extract('#date @ text | parseDate')).toBe(1699605000000)
// Date parsing depends of current timezone
expect(dom.extract('#date @ text | parseDate')).toBeGreaterThan(
1699600000000,
)
expect(
dom.extract('#date2 @ text | parseDate:"MMM. Do \'YY"'),
).toBeGreaterThan(1699600000000)
})
})
27 changes: 24 additions & 3 deletions src/extraction.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
import { ensureArray, getByKey, titleCase, parseSize } from 'common-stuff'
import dayjs from 'dayjs'
import customParseFormat from 'dayjs/plugin/customParseFormat'
import advancedFormat from 'dayjs/plugin/advancedFormat'

import 'dayjs/locale/en'

dayjs.extend(customParseFormat)
dayjs.extend(advancedFormat)

interface ExtractExpression {
selector: string
Expand Down Expand Up @@ -172,7 +180,7 @@ export const defaultFilters = {
* const output = `el`
* ```
*/
slice: (value: unknown, start: unknown, end: unknown): string => {
slice: (value: unknown, start?: unknown, end?: unknown): string => {
return String(value).slice(
parseInt(String(start), 10) || undefined,
parseInt(String(end)) || undefined,
Expand Down Expand Up @@ -229,8 +237,21 @@ export const defaultFilters = {
* const output = 1699605000000
* ```
*/
parseDate: (value: unknown): number | undefined => {
const parsed = Date.parse(defaultFilters.trim(value))
parseDate: (
value: unknown,
format?: unknown,
locale?: unknown,
): number | undefined => {
const cleanValue = (v: string) => v.replace(/[.]/g, ' ')

const parsed = dayjs(
cleanValue(defaultFilters.trim(value)),
format ? cleanValue(String(format)) : undefined,
locale ? String(locale) : undefined,
)
.toDate()
.getTime()

return isNaN(parsed) ? undefined : parsed
},
}
4 changes: 2 additions & 2 deletions tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
/* Additional Checks */
"noImplicitReturns": true /* Report error when not all code paths in function return a value. */,
"noFallthroughCasesInSwitch": true /* Report errors for fallthrough cases in switch statement. */,
"noUncheckedIndexedAccess": true /* Turning on noUncheckedIndexedAccess will add undefined to any un-declared field in the type. */
"noUncheckedIndexedAccess": true /* Turning on noUncheckedIndexedAccess will add undefined to any un-declared field in the type. */,
},
"include": ["src/**/*"]
"include": ["src/**/*"],
}

0 comments on commit 626e143

Please sign in to comment.