2
2
'use strict' ;
3
3
// index.js
4
4
5
+
6
+ // modules
5
7
const fs = require ( 'fs' ) ;
8
+ const path = require ( 'path' ) ;
6
9
const child_process = require ( 'child_process' ) ;
7
10
8
11
9
- // 1.
10
- // child_process.execSync(`python ./pdf/tools/pdf2txt.py -o ${xmlPath} ${pdfPath}`);
12
+ // command line tools
13
+ const pythonTool = path . join ( __dirname , 'tools' , 'pdf2txt.py' ) ;
14
+
15
+
16
+ // translation source files
17
+ const sourceFiles = fs . readdirSync ( path . join ( __dirname , 'pdf' ) )
18
+ . filter ( base => path . parse ( base ) . ext === '.pdf' ) ;
19
+
20
+
21
+ // translating
22
+ sourceFiles . forEach ( file => {
23
+ // path
24
+ const name = path . parse ( file ) . name ;
25
+ const pdfPath = path . join ( __dirname , 'pdf' , name + '.pdf' ) ;
26
+ const xmlPath = path . join ( __dirname , 'xml' , name + '.xml' ) ;
27
+ const jsonPath = path . join ( __dirname , 'json' , name + '.json' ) ;
28
+ const txtPath = path . join ( __dirname , 'txt' , name + '.txt' ) ;
29
+ const mdPath = path . join ( __dirname , 'md' , name + '.md' ) ;
30
+
31
+
32
+ // 1. pdf to xml
33
+ console . log ( `Transforming and parsing ${ name } .pdf ...` ) ;
34
+ child_process . execSync ( `python ${ pythonTool } -o ${ xmlPath } ${ pdfPath } ` ) ;
35
+
11
36
37
+ // 2. xml to txt
38
+ console . log ( `Extracting structural information from ${ name } .pdf ...` ) ;
39
+ child_process . execSync ( `node ./scripts/xml2txt.js ${ xmlPath } ${ jsonPath } ${ txtPath } ` ) ;
12
40
13
41
14
- // 2.
15
- // child_process.execSync('node xml2txt.js');
16
42
43
+ // 3. translation,两种写法均可
44
+ // const stream = child_process.execSync('node ./scripts/translate.js', { encoding: 'utf8' });
45
+ // console.log(stream);
17
46
18
47
19
- // 3. 翻译,两种写法均可
20
- const stream = child_process . execSync ( 'node ./scripts/translate.js' , { encoding : 'utf8' } ) ;
21
- console . log ( stream ) ;
22
48
23
- // const stream = child_process.spawnSync('node', ['./scripts/translate.js'], { encoding: 'utf8' }); // 这是一个流
24
- // console.log(stream.stdout); // 持续输出这个流
49
+ // const stream = child_process.spawnSync('node', ['./scripts/translate.js', txtPath, 'en', 'zh', 'md'], { encoding: 'utf8' }); // 这是一个流
50
+ // console.log(stream.stdout); // 持续输出这个流
51
+ } ) ;
0 commit comments