1
- import render from 'dom-serializer'
2
- import { selectAll , selectOne } from 'css-select'
3
- import { DomUtils , parseDocument } from 'htmlparser2'
4
-
5
- const { getAttributeValue, textContent } = DomUtils
1
+ import Docpa from 'docpa'
6
2
7
3
function getRowWithColumns (
8
- tableDoc : ReturnType < typeof parseDocument > ,
4
+ tableDoc : Docpa ,
9
5
selectors : [ string , string ] ,
10
6
shouldBeText : boolean ,
11
7
trim : boolean
12
8
) {
13
9
const rowSelector = selectors [ 0 ] || 'tr'
14
10
const colSelector = selectors [ 1 ] || 'td,th'
15
11
16
- return selectAll ( rowSelector , tableDoc ) . map ( ( tr ) =>
17
- selectAll ( colSelector , tr ) . map ( ( td ) => {
18
- const rowspan = ( td && + getAttributeValue ( td as any , 'rowspan' ) ! ) || 1
19
- const colspan = ( td && + getAttributeValue ( td as any , 'colspan' ) ! ) || 1
20
- const value =
21
- ( td && ( shouldBeText ? textContent ( td ) : render ( td . children ) ) ) || ''
12
+ return tableDoc . querySelectorAll ( rowSelector ) . map ( ( tr ) => {
13
+ if ( ! tr ) return [ ]
14
+ return tr . querySelectorAll ( colSelector ) . map ( ( td ) => {
15
+ if ( ! td ) return { value : '' , colspan : 1 , rowspan : 1 }
16
+
17
+ const rowspan = + ( td . getAttribute ( 'rowspan' ) || 1 ) || 1
18
+ const colspan = + ( td . getAttribute ( 'colspan' ) || 1 ) || 1
19
+
20
+ const value = ( shouldBeText ? td . textContent : td . innerHTML ) || ''
21
+
22
22
return { value : trim ? value . trim ( ) : value , colspan, rowspan }
23
23
} )
24
- )
24
+ } )
25
25
}
26
26
27
27
interface ParseTableOptions {
@@ -39,8 +39,8 @@ function parseTable(html: string, options: ParseTableOptions) {
39
39
trim = true ,
40
40
} = options
41
41
42
- const document = parseDocument ( html )
43
- const table = selectOne ( tableSelector , document )
42
+ const document = new Docpa ( html )
43
+ const table = document . querySelector ( tableSelector )
44
44
45
45
if ( ! table ) throw new Error ( `${ tableSelector } not found in document.` )
46
46
0 commit comments