Skip to content

Commit

Permalink
#5 Add directory parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
fumikito committed Apr 5, 2023
1 parent c501346 commit 8a5c4ff
Show file tree
Hide file tree
Showing 14 changed files with 496 additions and 16 deletions.
1 change: 0 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@ bin/ export-ignore
.gitignore export-ignore
composer.lock export-ignore
phpunit.xml export-ignore
hamepub export-ignore
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ foreach( $contents as $key => $html ){
$factory->opf->setLang( 'en_US' );
$factory->opf->setTitle( 'My First eBook', 'main-title' );
$factory->opf->putXML();
$factory->container->pubXML();
$factory->container->putXML();
// Save it!
$factory->compile('path/to/epub');
```
Expand All @@ -71,6 +71,10 @@ $factory->compile('path/to/epub');

This library is under alpha and highly experimental. Do not trust this until Beta!

## Acknowledgement

The sample picture is credited by [Public Domain Pictures](https://www.pexels.com/ja-jp/photo/87742/) and [Nadi Lindsay](https://www.pexels.com/ja-jp/photo/3078831/).

## License

As wrote in LICENSE file, this library is released under [MIT](https://opensource.org/licenses/MIT).
9 changes: 9 additions & 0 deletions bin/hamepub
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ $app->registerCommand('generate', function( CommandCall $input ) use ($app, $roo
// This is not file.
$app->getPrinter()->error('Setting file not found.', false);
}
// Generate temp directory.
$tmp_dir = $input->hasParam( 'tmp' ) ? $input->getParam( 'tmp' ) : tempnam( sys_get_temp_dir(), 'hamepub-' );
try {
$packager = \Hametuha\HamePub\Packager::get();
$path = $packager->parse( $file, $tmp_dir );
$app->getPrinter()->success("Generated ePub: {$path}", false);
} catch ( \Exception $e ) {
$app->getPrinter()->error($e->getMessage(), false);
}

});

Expand Down
28 changes: 28 additions & 0 deletions setting.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"root": "./tests/dist/",
"id": "my-sample-ebook",
"isbn": "1234567890123",
"title": "My Sample Book",
"author": [
{
"name": "Kazuo Ishiguro",
"id": "author-1",
"role": "aut"
},
{
"name": "Haruki Murakami",
"id": "author-2",
"role": "aut"
},
{
"name": "Mike Jacob",
"id": "translator-1",
"role": "trl",
"type": "contributor"
}
],
"target": "./tests/tmp",
"published": "2023-01-01T23:00:00Z",
"direction": "ltr",
"cover": "./tests/dist/img/cover.jpg"
}
31 changes: 28 additions & 3 deletions src/Hametuha/HamePub/Oebps/Content.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,39 @@ public function setTitle($string, $id, $type = 'main', $sequence = 1)
$meta['property'] = 'display-seq';
}

/**
* Add author to as meta value.
*
* @param string $value Name of author.
* @param string $id ID of author.
* @param string $tag Default is 'creator' or 'contributor'
* @param string $role Role tag. See {https://www.loc.gov/marc/relators/relaterm.html}
*
* @return void
*/
public function addAuthor( $value, $id, $tag = 'creator', $role = '' ) {
$creator = $this->dom->metadata->addChild( $tag, $this->h( $value ), Schemas::DC );
$creator['id'] = $id;
if ( $role ) {
$meta = $this->dom->metadata->addChild( 'meta', $role );
$meta['refines'] = '#' . $id;
$meta['property'] = 'role';
$meta['scheme'] = 'marc:relators';
$meta['id'] = 'role-of-' . $id;
}
}

/**
* Add modified date
*
* @param int $timestamp UTC timestamp
* @param int|string $timestamp If int, treated as UTC timestamp.
*/
public function setModifiedDate($timestamp)
{
$this->addMeta('meta', date('Y-m-d\TH:i:s\Z', $timestamp), [
if ( is_int( $timestamp ) ) {
$timestamp = date('Y-m-d\TH:i:s\Z', $timestamp);
}
$this->addMeta('meta', $timestamp, [
'property' => 'dcterms:modified',
]);
}
Expand All @@ -104,7 +129,7 @@ public function addMeta($tag, $value, array $attributes = [])
}

/**
* Add item to
* Add item to Content OPF.
*
* @param string $relative_path
* @param string $id If empty, path will convert to id
Expand Down
2 changes: 1 addition & 1 deletion src/Hametuha/HamePub/Oebps/Toc.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public function getHTML($header = '', $footer = '')
</head>
<body>
HTML;
$html .= $this->getNavHTML();
$html .= $this->getNavHTML( $title );
$html .= <<<HTML
{$footer}
</body>
Expand Down
147 changes: 145 additions & 2 deletions src/Hametuha/HamePub/Packager.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,154 @@

namespace Hametuha\HamePub;

use Hametuha\HamePub\Parser\SettingParser;
use Hametuha\HamePub\Pattern\Singleton;
use PHPUnit\Framework\Error\Error;

/**
* Package ePub from directory.
*/
class Packager extends Singleton
{
class Packager extends Singleton {

use SettingParser;

/**
* @var array Setting.
*/
protected $setting = [];

/**
* @var string[] HTML strings.
*/
protected $htmls = [];

/**
* @var string Newest modified HTML.
*/
protected $newest = 0;

/**
* Parse and save setting.
*
* @param string $file Path to setting file.
* @param string $tmp_dir Temporary directory.
*
* @return string ePub file path.
* @throws \Exception
*/
public function parse( $file = 'setting.json', $tmp_dir = '' ) {
$this->setting = $this->getSettingFromFile( $file );
// Set temporary directory.
if ( empty( $tmp_dir ) ) {
$tmp_dir = tempnam( sys_get_temp_dir(), 'hamepub-' );
}
// Load HTMLs.
$this->loadHtml();
if ( empty( $this->htmls ) ) {
throw new \Exception( 'No HTML files found.' );
}
// Start parsing.
$factory = Factory::init( $this->setting[ 'id' ], $tmp_dir );
// Set metadata.
$factory->opf->setLang( $this->setting[ 'lang' ] );
$factory->opf->setTitle( $this->setting[ 'title' ], 'main-title' );
$factory->opf->setModifiedDate( $this->setting['published'] );
$factory->opf->direction = $this->setting['direction'];
// Set authors.
if ( is_array( $this->setting['author'] ) ) {
foreach ( $this->setting['author'] as $index => $author ) {
$factory->opf->addAuthor(
$author['name'],
( $author['id'] ?? sprintf( 'creator-%d', $index + 1 ) ),
( ( isset( $author['type'] ) && $author['type'] === 'contributor' ) ? 'contributor' : 'creator'),
( $author['role'] ?? '' )
);
}
} else {
$factory->opf->addAuthor( $this->setting['author'], 'creator' );
}
foreach ( $this->htmls as $key => $html ) {
// Register toc
$toc = $factory->toc->addChild( $key, $key . '.xhtml' );
// Grab all headers and add them to toc.
$dom = $factory->parser->html5->loadHTML( $html );
// Grab header and add ID attributes.
$factory->parser->grabHeaders( $toc, $dom, true, $this->setting[ 'header' ][ 'max_level' ], $this->setting[ 'header' ][ 'max_level' ] );
// Convert from dom object to string.
$html = $factory->parser->convertToString( $dom );
// Recreate DOM.
$dom = $factory->registerHTML( $key, $html, $this->getLinear( $key ) );
// Grab all images
foreach ( $factory->parser->extractAssets( $dom, 'img', 'src', $this->setting[ 'url_base' ], $this->setting[ 'root' ] ) as $path ) {
$factory->opf->addItem( $path, '' );
}
// Grab all CSS
foreach ( $factory->parser->extractAssets( $dom, 'link', 'href', $this->setting[ 'url_base' ], $this->setting[ 'root' ] ) as $path ) {
$factory->opf->addItem( $path, '' );
}
// Register to OPF
$factory->opf->addItem( "Text/{$key}.xhtml", "{$key}.xhtml" );
// Save HTML
$factory->parser->saveDom( $dom, "{$key}.xhtml" );
}
// If TOC is set, save it.
if ( !empty($this->setting['toc']) ) {
$factory->toc->label = $this->setting['toc'];
$toc_html = $factory->toc->getHTML();
$factory->opf->addItem( 'Text/toc.xhtml', 'toc.xhtml', ['nav'] );
$factory->parser->saveDom( $factory->registerHTML( 'toc', $toc_html, 'no' ), 'toc.xhtml' );
}
// Set OPF.
if ( ! empty( $this->setting[ 'isbn' ] ) ) {
$factory->opf->setIdentifier( $this->setting[ 'isbn' ] );
}
// If cover is set, add it.
if ( $this->setting['cover'] ) {
$factory->addCover( $this->setting['cover'] );
}
$factory->opf->putXML();
$factory->container->putXML();
// Save it!
$target = tempnam( $this->setting['target'], $this->setting['id'] . '-' ) . '.epub';
if ( ! is_writable( dirname( $target ) ) ) {
throw new \Exception( 'Target directory is not writable: ' . $target );
}
$factory->compile( $target );
return $target;
}

/**
* Get linear property.
*
* @param string $key HTML name.
* @return string
*/
protected function getLinear( $key ) {
return in_array( $key, $this->setting['hidden'], true ) ? 'no' : 'yes';
}

/**
* Load HTML and save it in $htmls.
* @return void
*/
public function loadHtml() {
$files = glob( $this->setting[ 'root' ] . '/*.html' );
foreach ( $files as $file ) {
$this->htmls[ basename( $file, '.html' ) ] = file_get_contents( $file );
// Save modified time.
$time = filemtime( $file );
if ( $time > $this->newest ) {
$this->newest = $time;
}
}
}

/**
* Dump setting for debugging.
*
* @return void
*/
public function dumpSetting() {
var_dump( $this->setting, $this->htmls );
}
}
67 changes: 67 additions & 0 deletions src/Hametuha/HamePub/Parser/SettingParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?php

namespace Hametuha\HamePub\Parser;

/**
* Parse setting JSON file and ensure it has all required keys.
*/
trait SettingParser {

/**
* Get setting from file.
*
* @param string $file_path Path to setting file.
* @return array
* @throws \Exception
*/
public function getSettingFromFile( $file_path = './setting.json' ) {
if ( ! file_exists( $file_path ) ) {
throw new \Exception( 'Setting file not found.' );
}
$setting = json_decode( file_get_contents( $file_path ), true );
if ( ! is_array( $setting ) ) {
throw new \Exception( 'Setting file is not valid JSON.' );
}
$setting = array_replace_recursive( $this->defaultSetting(), $setting );
// Validate if isbn is not set.
if ( empty( $setting['id']) ) {
throw new \Exception( 'id fields must not be empty: ' . $setting['id'] );
}
// Validate if published is not set.
if ( ! preg_match( '/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/u', $setting['published' ] ) ) {
throw new \Exception( 'published field is malformed. Should be GMT in ISO8601 e.g. "2000-01-01T00:00:00Z": ' . $setting['published'] );
}
// Validate if author is set.
if ( empty( $setting['author'] ) ) {
throw new \Exception( 'At least 1 author should be set.' );
}
return $setting;
}

/**
* Default setting.
*
* @return array
*/
public function defaultSetting() {
return [
'lang' => 'en',
'id' => '',
'isbn' => '',
'title' => '',
'author' => '',
'published' => '',
'root' => './dist/',
'header' => [
'max_level' => 3,
'depth' => 2,
],
'toc' => 'Table of Contents',
'url_base' => '#\./#u',
'target' => './tmp',
'direction' => 'default',
'hidden' => [ 'toc' ],
'cover' => '',
];
}
}
Loading

0 comments on commit 8a5c4ff

Please sign in to comment.