Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Support for JWPUB format #4

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .abstruse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ matrix:
- image: mrcyjanek/goprod:core-android
env: M=android
script:
- make $M
- if [[ "$M" == "android" && "$ABSTRUSE_BRANCH" != "master" ]]; then true; else make $M; fi
- cp build/deb/*.deb /apt/ || true
- cp build/bin/* /archive || true
- cp build/apk/* /archive || true
17 changes: 10 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
VERSION=2.0.0

APPENDIX=$(shell ./genappendix.sh)
install:
cp build/bin/${BINNAME}_${GOOS}_${GOARCH} /usr/bin/jwstudy
cp dist/debian/logo.png /usr/share/icons/hicolor/scalable/apps/jwstudy.png
cp dist/debian/logo.png /usr/share/pixmaps/jwstudy.png
cp dist/debian/jwstudy.desktop /usr/share/applications

show_appendix:
@echo ${APPENDIX}

android:
goprod -combo="android/386;android/amd64;android/arm;android/arm64;android/all" -tags="nogui" -shouldpkg=true -binname="jwstudy" -version="${VERSION}" -appurl="http://127.0.0.1:4365/" -ldflags="-X main.dataDir=/data/data/x.x.jwstudy/ -X git.mrcyjanek.net/mrcyjanek/jwapi/webui.SPort=4365"
goprod -combo="android/386;android/amd64;android/arm;android/arm64;android/all" -tags="nogui" -shouldpkg=true -binname="jwstudy${APPENDIX}" -version="${VERSION}" -appurl="http://127.0.0.1:4365/" -ldflags="-X main.dataDir=/data/data/x.x.jwstudy/ -X git.mrcyjanek.net/mrcyjanek/jwapi/webui.SPort=4365"

linux-lorca:
goprod -combo="linux/amd64;linux/arm;linux/arm64;linux/386" -binname="jwstudy-lorca" -tags="guilorca" -version="${VERSION}"
goprod -combo="linux/amd64;linux/arm;linux/arm64;linux/386" -binname="jwstudy${APPENDIX}-lorca" -tags="guilorca" -version="${VERSION}"

linux-browser:
goprod -combo="linux/amd64;linux/arm;linux/arm64;linux/386" -binname="jwstudy-browser" -tags="guibrowser" -version="${VERSION}"
goprod -combo="linux/amd64;linux/arm;linux/arm64;linux/386" -binname="jwstudy${APPENDIX}-browser" -tags="guibrowser" -version="${VERSION}"

windows-lorca:
goprod -combo="windows/amd64;windows/386" -binname="jwstudy-lorca" -tags="guilorca" -version="${VERSION}"
goprod -combo="windows/amd64;windows/386" -binname="jwstudy${APPENDIX}-lorca" -tags="guilorca" -version="${VERSION}"

windows-browser:
goprod -combo="windows/amd64;windows/386" -binname="jwstudy-browser" -tags="guibrowser" -version="${VERSION}"
goprod -combo="windows/amd64;windows/386" -binname="jwstudy${APPENDIX}-browser" -tags="guibrowser" -version="${VERSION}"
7 changes: 7 additions & 0 deletions genappendix.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
if [[ "X$ABSTRUSE_BRANCH" == "X" || "X$ABSTRUSE_BRANCH" == "Xmaster" ]];
then
echo -n -e ""
else
echo -n -e "-$ABSTRUSE_BRANCH"
fi;
206 changes: 206 additions & 0 deletions libjw/jwpub.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
package libjw

// Most of this file is inspired by
// https://github.com/Miaosi001/JW-Library-macOS/blob/main/JWLibrary/Utility/JWPubExtractor.swift

import (
"database/sql"
"fmt"
"log"

_ "github.com/mattn/go-sqlite3" // sqlite driver

"git.mrcyjanek.net/mrcyjanek/jwapi/helpers"
)

// THIS DOESN'T WORK
// DO NOT USE

type JWPUBWordMap struct {
WordID int
Word string
SearchIndexDocumentID int
TextUnitCount int
WordOccurrenceCount int
TextUnitIndices []byte
PositionalList []byte
PositionalListIndex []byte
}

// NOTE: This function have a lot of hardcoded values
// It is *not* ready for production usage
func JWPUBtoMarkdown(jwpub string) {
//var wadd = make(map[string]int)
path := helpers.GetDataDir() + "/_tmp_jwpub"
log.Println(jwpub)
err := helpers.Unzip(jwpub, path)
if err != nil {
log.Fatal(err)
}
err = helpers.Unzip(path+"/contents", path+"/c")
if err != nil {
log.Fatal(err)
}
db, err := sql.Open("sqlite3", path+"/c/w_E_202110.db")
if err != nil {
log.Fatal(err)
}
defer db.Close()
row, err := db.Query("SELECT WordId, Word FROM Word WHERE 1")
if err != nil {
log.Fatal(err)
}
var wordsmap []JWPUBWordMap
for row.Next() {
var wid int
var w string
err = row.Scan(&wid, &w)
if err != nil {
log.Fatal(err)
}
r := db.QueryRow("SELECT TextUnitIndices, PositionalList, PositionalListIndex FROM SearchIndexDocument WHERE WordId=?", wid)
var tui []byte
var pl []byte
var pli []byte
err = r.Scan(&tui, &pl, &pli)
if err != nil {
log.Fatal(err)
}
wordsmap = append(wordsmap, JWPUBWordMap{
WordID: wid,
Word: w,
TextUnitIndices: tui,
PositionalList: pl,
PositionalListIndex: pli,
})
}

sIndexes := wordsmap
var loop = true
var docID = 0
var curDocIndex = []byte{128}
var fullText = make(map[int]string)

for loop {
var finded = false
for i := range sIndexes {
if byteStartsWith(sIndexes[i].TextUnitIndices, []byte{128}) {
if byteStartsWith(sIndexes[i].PositionalList, curDocIndex) {
var rem = sIndexes[i].PositionalListIndex[0]
if rem > 128 {
finded = true
var wd = sIndexes[i].Word
//if wd != String(fullText[docID]?.split(separator: " ").last ?? "").unaccent() {
// print(curDocIndex, wd)
// fullText[docID]!.append(wd + " ")
//}
fullText[docID] += " " + wd
sIndexes[i].PositionalList = sIndexes[i].PositionalList[len(curDocIndex):]
//sIndexes[i].PositionalList = sIndexes[i].PositionalList.trimmingCharacters(in: .whitespacesAndNewlines)
rem = rem - 1
sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
sIndexes[i].PositionalListIndex = insertbyte(sIndexes[i].PositionalListIndex, rem, 0)
//sIndexes[i].PositionalListIndex = rem + sIndexes[i].PositionalListIndex
var curDocIndexArray = curDocIndex
var repo = false
for j := range curDocIndexArray {
if j == 0 {
if (curDocIndexArray[j] == 255 && len(curDocIndexArray) == 1) || (curDocIndexArray[j] == 127 && len(curDocIndexArray) > 1) {
repo = true
curDocIndex = []byte{0}
if repo && j == len(curDocIndexArray)-1 {
curDocIndex = append(curDocIndex, 129)
repo = false
}
} else {
curDocIndex = []byte{curDocIndexArray[j] + 1}
repo = false
}
} else {
if repo {
if curDocIndexArray[j] == 255 {
repo = true
curDocIndex = append(curDocIndex, 129)
if repo && j == len(curDocIndexArray)-1 {
curDocIndex = append(curDocIndex, 129)
repo = false
}
} else {
curDocIndex = append(curDocIndex, curDocIndexArray[j]+1)
repo = false
}
} else {
curDocIndex = append(curDocIndex, curDocIndexArray[j])
}
}
}
break
}
}
}
}
if !finded {
var toRem []int
for i := range sIndexes {
var docI = sIndexes[i].TextUnitIndices[0]
sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices[1:]
if docI == 128 {
//sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices.trimmingCharacters(in: .whitespacesAndNewlines)
if len(sIndexes[i].TextUnitIndices) != 0 {
docI = sIndexes[i].TextUnitIndices[0]
sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices[1:]
docI = docI - 1
sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI, 0)
}
} else {
docI = docI - 1
sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI, 0)
}
if len(sIndexes[i].TextUnitIndices) == 0 {
toRem = append(toRem, i)
}
var rem = sIndexes[i].PositionalListIndex[0]
if rem == 128 {
sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
//sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex.trimmingCharacters(in: .whitespacesAndNewlines)
}
}
for i := len(toRem) - 1; i >= 0; i-- {
sIndexes = append(sIndexes[:toRem[i]], sIndexes[toRem[i]+1:]...)
}
fmt.Println(fullText[docID])
docID += 1
curDocIndex = []byte{128}
}
if len(sIndexes) == 0 {
loop = false
}
}
//print(fullText)
//for (id, text) in fullText where text != "" {
// let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("w_I_202110/contents/\(id).txt")
// do {
// print(dir)
// try text.write(to: dir, atomically: true, encoding: String.Encoding.utf8)
// } catch {
// print("Error")
// }
//}

}

func insertbyte(a []byte, c byte, i int) []byte {
return append(a[:i], append([]byte{c}, a[i:]...)...)
}

func byteStartsWith(bs []byte, with []byte) bool {
if len(bs) < len(with) {
return false
}
for i := range with {
if bs[i] != with[i] {
return false
}
}
return true
}
14 changes: 10 additions & 4 deletions libjw/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -724,10 +724,16 @@ func GetPublication(publication string, language string, format string, issue st
}
f.Write(body)
f.Sync()
fmt.Println("[libjw][GetPublication] Extracting...", pub.Title)
err = helpers.Unzip(f.Name(), extractpath)
if err != nil {
return structs.PublicationV2{}, errors.New("[libjw][GetPublication] " + err.Error() + " (zipslip of something? Maybe corrupted download, failed to uzip)")
defer f.Close()
if format == "EPUB" {
fmt.Println("[libjw][GetPublication] Extracting...", pub.Title)
err = helpers.Unzip(f.Name(), extractpath)
if err != nil {
return structs.PublicationV2{}, errors.New("[libjw][GetPublication] " + err.Error() + " (zipslip of something? Maybe corrupted download, failed to uzip)")
}
} else if format == "JWPUB" {
fmt.Println("[libjw][GetPublication] Parsing publication...[JWPUB]", pub.Title)
JWPUBtoMarkdown(f.Name())
}
struc = structs.PublicationV2{
Title: pub.Title,
Expand Down
2 changes: 1 addition & 1 deletion utils/getwoljwlangs/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ func main() {
code := strings.Split(strings.Split(l, `data-rsconf="`)[1], `"`)[0]
title := strings.Split(strings.Split(l, `data-title="`)[1], `"`)[0]

log.Println("code:", code)
log.Println("code:", code, title)
}
}
21 changes: 21 additions & 0 deletions utils/jwpub-test/parse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package main

import (
"log"
"os"

"git.mrcyjanek.net/mrcyjanek/jwapi/helpers"
"git.mrcyjanek.net/mrcyjanek/jwapi/libjw"
)

func main() {
dataDir := helpers.GetDataDir()
helpers.SetDataDir(dataDir)
helpers.Mkdir(dataDir + "/raw")
helpers.DBInit(dataDir)
//libjw.GetPublication("w", "E", "JWPUB", "202110")
if _, err := os.Stat("pub.jwpub"); os.IsNotExist(err) {
log.Fatal("Hey! Please put `pub.jwpub' in this directory, you can get one from this link: https://www.jw.org/download/?issue=202107&output=html&pub=g&fileformat=JWPUB&alllangs=0&langwritten=E&txtCMSLang=E&isBible=0")
}
libjw.JWPUBtoMarkdown("pub.jwpub")
}
21 changes: 2 additions & 19 deletions webui/apiDB.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,14 @@ package webui

import (
"fmt"
"log"
"net/http"
"net/url"
"strings"

"git.mrcyjanek.net/mrcyjanek/jwapi/helpers"
)

func apiDBget(w http.ResponseWriter, req *http.Request) {
url := req.URL.Path
splited := strings.Split(string(url), "/")
if len(splited) < 5 {
fmt.Fprintln(w, "/api/db/get/<key>")
return
}
key := splited[4]
if key == "" {
w.Write([]byte("0"))
return
}
w.Write(helpers.Get(key))
w.Write(helpers.Get(req.URL.RawQuery))
}

func apiDBset(w http.ResponseWriter, req *http.Request) {
Expand All @@ -38,10 +25,6 @@ func apiDBset(w http.ResponseWriter, req *http.Request) {
return
}
query := req.URL.RawQuery
value, err := url.QueryUnescape(query)
if err != nil {
log.Fatal(err)
}
helpers.Set(key, []byte(value))
helpers.Set(key, []byte(query))

}
2 changes: 1 addition & 1 deletion webui/html/static/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function dbGet(key) {
return localStorage[key]
}
var xhr = new XMLHttpRequest();
xhr.open("GET", "/api/db/get/"+encodeURIComponent(key), false);
xhr.open("GET", "/api/db/get?"+encodeURIComponent(key), false);
xhr.onerror = function (e) {
console.error(xhr.statusText);
};
Expand Down