From 724fb67ca95c7c7a71135cc98a5c073f900e1520 Mon Sep 17 00:00:00 2001
From: Czarek Nakamoto <cyjan@mrcyjanek.net>
Date: Fri, 13 Aug 2021 11:33:01 +0200
Subject: [PATCH 1/8] WIP: Support for JWPUB format  * Make abstruse generate
 binaries with correct branches  * Fix small compile errors

---
 Makefile                    |  15 +--
 genappendix.sh              |   7 ++
 libjw/jwpub.go              | 200 ++++++++++++++++++++++++++++++++++++
 libjw/main.go               |  14 ++-
 utils/getwoljwlangs/main.go |   2 +-
 utils/jwpub-test/parse.go   |  15 +++
 6 files changed, 242 insertions(+), 11 deletions(-)
 create mode 100755 genappendix.sh
 create mode 100644 libjw/jwpub.go
 create mode 100644 utils/jwpub-test/parse.go

diff --git a/Makefile b/Makefile
index 8a28cd1..43f20c5 100755
--- a/Makefile
+++ b/Makefile
@@ -1,21 +1,24 @@
 VERSION=2.0.0
-
+APPENDIX=$(shell ./genappendix.sh)
 install:
 	cp build/bin/${BINNAME}_${GOOS}_${GOARCH} /usr/bin/jwstudy
 	cp dist/debian/logo.png /usr/share/icons/hicolor/scalable/apps/jwstudy.png
 	cp dist/debian/jwstudy.desktop /usr/share/applications
 
+show_appendix:
+	@echo ${APPENDIX}
+
 android:
-	goprod -combo="android/386;android/amd64;android/arm;android/arm64;android/all" -tags="nogui" -shouldpkg=true -binname="jwstudy" -version="${VERSION}" -appurl="http://127.0.0.1:4365/"  -ldflags="-X main.dataDir=/data/data/x.x.jwstudy/ -X git.mrcyjanek.net/mrcyjanek/jwapi/webui.SPort=4365"
+	goprod -combo="android/386;android/amd64;android/arm;android/arm64;android/all" -tags="nogui" -shouldpkg=true -binname="jwstudy${APPENDIX}" -version="${VERSION}" -appurl="http://127.0.0.1:4365/"  -ldflags="-X main.dataDir=/data/data/x.x.jwstudy/ -X git.mrcyjanek.net/mrcyjanek/jwapi/webui.SPort=4365"
 
 linux-lorca:
-	goprod -combo="linux/amd64;linux/arm;linux/arm64;linux/386" -binname="jwstudy-lorca" -tags="guilorca" -version="${VERSION}"
+	goprod -combo="linux/amd64;linux/arm;linux/arm64;linux/386" -binname="jwstudy${APPENDIX}-lorca" -tags="guilorca" -version="${VERSION}"
 
 linux-browser:
-	goprod -combo="linux/amd64;linux/arm;linux/arm64;linux/386" -binname="jwstudy-browser" -tags="guibrowser" -version="${VERSION}"
+	goprod -combo="linux/amd64;linux/arm;linux/arm64;linux/386" -binname="jwstudy${APPENDIX}-browser" -tags="guibrowser" -version="${VERSION}"
 
 windows-lorca:
-	goprod -combo="windows/amd64;windows/386" -binname="jwstudy-lorca" -tags="guilorca" -version="${VERSION}"
+	goprod -combo="windows/amd64;windows/386" -binname="jwstudy${APPENDIX}-lorca" -tags="guilorca" -version="${VERSION}"
 
 windows-browser:
-	goprod -combo="windows/amd64;windows/386" -binname="jwstudy-browser" -tags="guibrowser" -version="${VERSION}"
+	goprod -combo="windows/amd64;windows/386" -binname="jwstudy${APPENDIX}-browser" -tags="guibrowser" -version="${VERSION}"
diff --git a/genappendix.sh b/genappendix.sh
new file mode 100755
index 0000000..3a147b2
--- /dev/null
+++ b/genappendix.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+if [[ "X$ABSTRUSE_BRANCH" == "X" || "X$ABSTRUSE_BRANCH" == "Xmaster" ]];
+then
+    echo -n -e ""
+else
+    echo -n -e "-$ABSTRUSE_BRANCH"
+fi;
\ No newline at end of file
diff --git a/libjw/jwpub.go b/libjw/jwpub.go
new file mode 100644
index 0000000..8e4ae73
--- /dev/null
+++ b/libjw/jwpub.go
@@ -0,0 +1,200 @@
+package libjw
+
+// Most of this file is inspired by
+// https://github.com/Miaosi001/JW-Library-macOS/blob/main/JWLibrary/Utility/JWPubExtractor.swift
+
+import (
+	"database/sql"
+	"log"
+	"time"
+
+	_ "github.com/mattn/go-sqlite3" // sqlite driver
+
+	"git.mrcyjanek.net/mrcyjanek/jwapi/helpers"
+)
+
+// THIS DOESN'T WORK
+// DO NOT USE
+
+type JWPUBWordMap struct {
+	WordID                int
+	Word                  string
+	SearchIndexDocumentID int
+	TextUnitCount         int
+	WordOccurrenceCount   int
+	TextUnitIndices       []byte
+	PositionalList        []byte
+	PositionalListIndex   []byte
+}
+
+// NOTE: This function have a lot of hardcoded values
+// It is *not* ready for production usage
+func JWPUBtoMarkdown(jwpub string) {
+	//var wadd = make(map[string]int)
+	path := helpers.GetDataDir() + "/_tmp_jwpub"
+	log.Println(jwpub)
+	err := helpers.Unzip(jwpub, path)
+	if err != nil {
+		log.Fatal(err)
+	}
+	err = helpers.Unzip(path+"/contents", path+"/c")
+	if err != nil {
+		log.Fatal(err)
+	}
+	db, err := sql.Open("sqlite3", path+"/c/fg_E.db")
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer db.Close()
+	row, err := db.Query("SELECT WordId, Word FROM Word WHERE 1")
+	if err != nil {
+		log.Fatal(err)
+	}
+	var wordsmap []JWPUBWordMap
+	for row.Next() {
+		var wid int
+		var w string
+		err = row.Scan(&wid, &w)
+		if err != nil {
+			log.Fatal(err)
+		}
+		r := db.QueryRow("SELECT TextUnitIndices, PositionalListIndex, PositionalList FROM SearchIndexDocument WHERE WordId=?", wid)
+		var tui []byte
+		var pli []byte
+		var pl []byte
+		err = r.Scan(&tui, &pli, &pl)
+		if err != nil {
+			log.Fatal(err)
+		}
+		wordsmap = append(wordsmap, JWPUBWordMap{
+			WordID:              wid,
+			Word:                w,
+			TextUnitIndices:     tui,
+			PositionalList:      pl,
+			PositionalListIndex: pli,
+		})
+	}
+
+	var loop = true
+	var docID = 0
+	var curDocIndex = []byte{128}
+	var fullText = make(map[int]string)
+
+	sIndexes := wordsmap
+	for loop {
+		var finded = false
+		for i := range sIndexes {
+			//log.Println("for i:= range sIndexes")
+			if sIndexes[i].TextUnitIndices[0] == 128 {
+				//log.Println("if sIndexes[i].TextUnitIndices[0] == 128 {")
+				//log.Println("byteStartsWith(sIndexes[i].PositionalList, curDocIndex): ", byteStartsWith(sIndexes[i].PositionalList, curDocIndex))
+				if byteStartsWith(sIndexes[i].PositionalList, curDocIndex) {
+					var rem = sIndexes[i].PositionalListIndex[0]
+					if rem > 128 {
+						finded = true
+						wd := sIndexes[i].Word
+						//if wd != String(fullText[docID]?.split(separator: " ").last ?? "").unaccent() {
+						//	print(curDocIndex, wd)
+						//	fullText[docID]!.append(wd + " ")
+						//}
+						fullText[docID] += " " + wd
+						log.Println("fullText[docID]:", fullText[docID])
+						time.Sleep(time.Second)
+						sIndexes[i].PositionalList = sIndexes[i].PositionalList[len(curDocIndex):]
+						rem = rem - 1
+						sIndexes[i].PositionalListIndex[0] = rem
+						curDocIndexArray := curDocIndex
+						var repo = false
+						for j := range curDocIndexArray {
+							if j == 0 {
+								if (curDocIndexArray[j] == 255 && len(curDocIndexArray) == 1) || (curDocIndexArray[j] == 127 && len(curDocIndexArray) > 1) {
+									repo = true
+									curDocIndex = []byte{0}
+									if repo && j == len(curDocIndexArray)-1 {
+										curDocIndex = append(curDocIndex, 129)
+										repo = false
+									}
+								} else {
+									curDocIndex = []byte{curDocIndexArray[j] + 1}
+									repo = false
+								}
+							} else {
+								if repo {
+									if curDocIndexArray[j] == 255 {
+										repo = true
+										curDocIndex = append(curDocIndex, 129)
+										if repo && j == len(curDocIndexArray)-1 {
+											curDocIndex = append(curDocIndex, 129)
+											repo = false
+										}
+									} else {
+										curDocIndex = append(curDocIndex, curDocIndexArray[j]+1)
+										repo = false
+									}
+								} else {
+									curDocIndex = append(curDocIndex, curDocIndexArray[j])
+								}
+							}
+						}
+						break
+					} else {
+						sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
+					}
+				}
+			}
+		}
+		if !finded {
+			var toRem []int
+			for i := range sIndexes {
+				//var docI = sIndexes[i].TextUnitIndices.prefix(3)
+				//sIndexes[i].TextUnitIndices.removeFirst(3)
+				if sIndexes[i].TextUnitIndices[0] == 128 {
+					if len(sIndexes[i].TextUnitIndices) != 1 {
+						sIndexes[i].TextUnitIndices[1]--
+					}
+				} else {
+					sIndexes[i].TextUnitIndices[0]--
+				}
+				if len(sIndexes[i].TextUnitIndices) == 0 {
+					log.Println("toRem", i)
+					toRem = append(toRem, i)
+				}
+
+				if len(sIndexes[i].PositionalListIndex) > 0 && sIndexes[i].PositionalListIndex[0] == 128 {
+					sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
+				}
+			}
+			for i := len(toRem); i > 0; i-- {
+				log.Println("toRem2", sIndexes[toRem[i]])
+				sIndexes = append(sIndexes[:toRem[i]], sIndexes[toRem[i]+1:]...)
+			}
+			docID++
+			curDocIndex = []byte{128}
+		}
+		if len(sIndexes) == 0 {
+			loop = false
+		}
+	}
+	//	for (id, text) in fullText where text != "" {
+	//		let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("w_I_202110/contents/\(id).txt")
+	//		do {
+	//			print(dir)
+	//			try text.write(to: dir, atomically: true, encoding: String.Encoding.utf8)
+	//		} catch {
+	//			print("Error")
+	//		}
+	//	}
+
+}
+
+func byteStartsWith(bs []byte, with []byte) bool {
+	if len(bs) != len(with) {
+		return false
+	}
+	for i := range bs {
+		if bs[i] != with[i] {
+			return false
+		}
+	}
+	return true
+}
diff --git a/libjw/main.go b/libjw/main.go
index 50cd2cc..2633581 100644
--- a/libjw/main.go
+++ b/libjw/main.go
@@ -724,10 +724,16 @@ func GetPublication(publication string, language string, format string, issue st
 		}
 		f.Write(body)
 		f.Sync()
-		fmt.Println("[libjw][GetPublication] Extracting...", pub.Title)
-		err = helpers.Unzip(f.Name(), extractpath)
-		if err != nil {
-			return structs.PublicationV2{}, errors.New("[libjw][GetPublication] " + err.Error() + " (zipslip of something? Maybe corrupted download, failed to uzip)")
+		defer f.Close()
+		if format == "EPUB" {
+			fmt.Println("[libjw][GetPublication] Extracting...", pub.Title)
+			err = helpers.Unzip(f.Name(), extractpath)
+			if err != nil {
+				return structs.PublicationV2{}, errors.New("[libjw][GetPublication] " + err.Error() + " (zipslip of something? Maybe corrupted download, failed to uzip)")
+			}
+		} else if format == "JWPUB" {
+			fmt.Println("[libjw][GetPublication] Parsing publication...[JWPUB]", pub.Title)
+			JWPUBtoMarkdown(f.Name())
 		}
 		struc = structs.PublicationV2{
 			Title:  pub.Title,
diff --git a/utils/getwoljwlangs/main.go b/utils/getwoljwlangs/main.go
index b31a789..f2923b5 100644
--- a/utils/getwoljwlangs/main.go
+++ b/utils/getwoljwlangs/main.go
@@ -26,6 +26,6 @@ func main() {
 		code := strings.Split(strings.Split(l, `data-rsconf="`)[1], `"`)[0]
 		title := strings.Split(strings.Split(l, `data-title="`)[1], `"`)[0]
 
-		log.Println("code:", code)
+		log.Println("code:", code, title)
 	}
 }
diff --git a/utils/jwpub-test/parse.go b/utils/jwpub-test/parse.go
new file mode 100644
index 0000000..bac2e5c
--- /dev/null
+++ b/utils/jwpub-test/parse.go
@@ -0,0 +1,15 @@
+package main
+
+import (
+	"git.mrcyjanek.net/mrcyjanek/jwapi/helpers"
+	"git.mrcyjanek.net/mrcyjanek/jwapi/libjw"
+)
+
+func main() {
+	dataDir := helpers.GetDataDir()
+	helpers.SetDataDir(dataDir)
+	helpers.Mkdir(dataDir + "/raw")
+	helpers.DBInit(dataDir)
+	libjw.GetPublication("fg", "E", "JWPUB", "")
+	// libjw.JWPUBtoMarkdown("fg_E.jwpub.orig")
+}

From c08e38f5af712d32f0a2078877ddeb363e3dd38e Mon Sep 17 00:00:00 2001
From: Czarek Nakamoto <cyjan@mrcyjanek.net>
Date: Fri, 13 Aug 2021 11:34:58 +0200
Subject: [PATCH 2/8] Add correct path for icon on linux

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 43f20c5..c14f9ec 100755
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ VERSION=2.0.0
 APPENDIX=$(shell ./genappendix.sh)
 install:
 	cp build/bin/${BINNAME}_${GOOS}_${GOARCH} /usr/bin/jwstudy
-	cp dist/debian/logo.png /usr/share/icons/hicolor/scalable/apps/jwstudy.png
+	cp dist/debian/logo.png /usr/share/pixmaps/jwstudy.png
 	cp dist/debian/jwstudy.desktop /usr/share/applications
 
 show_appendix:

From ac53afea72442825de86b8bd65ad9ee5d46891e8 Mon Sep 17 00:00:00 2001
From: Czarek Nakamoto <cyjan@mrcyjanek.net>
Date: Fri, 13 Aug 2021 12:33:33 +0200
Subject: [PATCH 3/8] Fixes for the docID = 0

Rest is *still* not working
---
 libjw/jwpub.go            | 61 +++++++++++++++++++++++++--------------
 utils/jwpub-test/parse.go |  2 +-
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/libjw/jwpub.go b/libjw/jwpub.go
index 8e4ae73..0c7d5f6 100644
--- a/libjw/jwpub.go
+++ b/libjw/jwpub.go
@@ -5,8 +5,8 @@ package libjw
 
 import (
 	"database/sql"
+	"fmt"
 	"log"
-	"time"
 
 	_ "github.com/mattn/go-sqlite3" // sqlite driver
 
@@ -41,7 +41,7 @@ func JWPUBtoMarkdown(jwpub string) {
 	if err != nil {
 		log.Fatal(err)
 	}
-	db, err := sql.Open("sqlite3", path+"/c/fg_E.db")
+	db, err := sql.Open("sqlite3", path+"/c/w_E_202110.db")
 	if err != nil {
 		log.Fatal(err)
 	}
@@ -58,11 +58,11 @@ func JWPUBtoMarkdown(jwpub string) {
 		if err != nil {
 			log.Fatal(err)
 		}
-		r := db.QueryRow("SELECT TextUnitIndices, PositionalListIndex, PositionalList FROM SearchIndexDocument WHERE WordId=?", wid)
+		r := db.QueryRow("SELECT TextUnitIndices, PositionalList, PositionalListIndex FROM SearchIndexDocument WHERE WordId=?", wid)
 		var tui []byte
-		var pli []byte
 		var pl []byte
-		err = r.Scan(&tui, &pli, &pl)
+		var pli []byte
+		err = r.Scan(&tui, &pl, &pli)
 		if err != nil {
 			log.Fatal(err)
 		}
@@ -85,8 +85,7 @@ func JWPUBtoMarkdown(jwpub string) {
 		var finded = false
 		for i := range sIndexes {
 			//log.Println("for i:= range sIndexes")
-			if sIndexes[i].TextUnitIndices[0] == 128 {
-				//log.Println("if sIndexes[i].TextUnitIndices[0] == 128 {")
+			if len(sIndexes[i].TextUnitIndices) > 0 && sIndexes[i].TextUnitIndices[0] == 128 {
 				//log.Println("byteStartsWith(sIndexes[i].PositionalList, curDocIndex): ", byteStartsWith(sIndexes[i].PositionalList, curDocIndex))
 				if byteStartsWith(sIndexes[i].PositionalList, curDocIndex) {
 					var rem = sIndexes[i].PositionalListIndex[0]
@@ -98,8 +97,6 @@ func JWPUBtoMarkdown(jwpub string) {
 						//	fullText[docID]!.append(wd + " ")
 						//}
 						fullText[docID] += " " + wd
-						log.Println("fullText[docID]:", fullText[docID])
-						time.Sleep(time.Second)
 						sIndexes[i].PositionalList = sIndexes[i].PositionalList[len(curDocIndex):]
 						rem = rem - 1
 						sIndexes[i].PositionalListIndex[0] = rem
@@ -143,28 +140,43 @@ func JWPUBtoMarkdown(jwpub string) {
 				}
 			}
 		}
+
+		if fullText[docID] != "" {
+			fmt.Println("fullText[docID:", docID, "]:", fullText[docID])
+		}
 		if !finded {
+			log.Println("finded!")
 			var toRem []int
 			for i := range sIndexes {
 				//var docI = sIndexes[i].TextUnitIndices.prefix(3)
 				//sIndexes[i].TextUnitIndices.removeFirst(3)
-				if sIndexes[i].TextUnitIndices[0] == 128 {
-					if len(sIndexes[i].TextUnitIndices) != 1 {
-						sIndexes[i].TextUnitIndices[1]--
-					}
-				} else {
-					sIndexes[i].TextUnitIndices[0]--
+				var docI byte = 0
+				if len(sIndexes[i].TextUnitIndices) > 0 {
+					docI = sIndexes[i].TextUnitIndices[0]
 				}
 				if len(sIndexes[i].TextUnitIndices) == 0 {
 					log.Println("toRem", i)
 					toRem = append(toRem, i)
-				}
+				} else {
+					sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices[1:]
+					if docI == 128 {
+						log.Println("finded! 1")
+						if len(sIndexes[i].TextUnitIndices) != 0 {
+							sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI-1, 0)
+						}
+					} else {
+						docI--
+						sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI, 0)
+					}
 
-				if len(sIndexes[i].PositionalListIndex) > 0 && sIndexes[i].PositionalListIndex[0] == 128 {
-					sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
+					if len(sIndexes[i].PositionalListIndex) > 0 && sIndexes[i].PositionalListIndex[0] == 128 {
+						sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
+						log.Println("finded! 3")
+					}
 				}
 			}
-			for i := len(toRem); i > 0; i-- {
+			for i := len(toRem) - 1; i > 0; i-- {
+				log.Println("Removing...")
 				log.Println("toRem2", sIndexes[toRem[i]])
 				sIndexes = append(sIndexes[:toRem[i]], sIndexes[toRem[i]+1:]...)
 			}
@@ -174,6 +186,9 @@ func JWPUBtoMarkdown(jwpub string) {
 		if len(sIndexes) == 0 {
 			loop = false
 		}
+		if docID > 10000 {
+			log.Fatal("docID > 10000, this should not happen.")
+		}
 	}
 	//	for (id, text) in fullText where text != "" {
 	//		let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("w_I_202110/contents/\(id).txt")
@@ -187,11 +202,15 @@ func JWPUBtoMarkdown(jwpub string) {
 
 }
 
+func insertbyte(a []byte, c byte, i int) []byte {
+	return append(a[:i], append([]byte{c}, a[i:]...)...)
+}
+
 func byteStartsWith(bs []byte, with []byte) bool {
-	if len(bs) != len(with) {
+	if len(bs) < len(with) {
 		return false
 	}
-	for i := range bs {
+	for i := range with {
 		if bs[i] != with[i] {
 			return false
 		}
diff --git a/utils/jwpub-test/parse.go b/utils/jwpub-test/parse.go
index bac2e5c..36d26e9 100644
--- a/utils/jwpub-test/parse.go
+++ b/utils/jwpub-test/parse.go
@@ -10,6 +10,6 @@ func main() {
 	helpers.SetDataDir(dataDir)
 	helpers.Mkdir(dataDir + "/raw")
 	helpers.DBInit(dataDir)
-	libjw.GetPublication("fg", "E", "JWPUB", "")
+	libjw.GetPublication("w", "E", "JWPUB", "202110")
 	// libjw.JWPUBtoMarkdown("fg_E.jwpub.orig")
 }

From 5fc63bab1e818fbec24ebb37b5185156468b4947 Mon Sep 17 00:00:00 2001
From: Czarek Nakamoto <cyjan@mrcyjanek.net>
Date: Fri, 13 Aug 2021 12:34:53 +0200
Subject: [PATCH 4/8] Don't build apk's on non-master branch

---
 .abstruse.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.abstruse.yml b/.abstruse.yml
index 0662322..4c2ecbb 100644
--- a/.abstruse.yml
+++ b/.abstruse.yml
@@ -11,7 +11,7 @@ matrix:
   - image: mrcyjanek/goprod:core-android
     env: M=android
 script: 
-  - make $M
+  - if [[ "$M" == "android" && "$ABSTRUSE_BRANCH" != "master" ]]; then true; else make $M; fi
   - cp build/deb/*.deb /apt/ || true
   - cp build/bin/* /archive || true
   - cp build/apk/* /archive || true
\ No newline at end of file

From 531ea0dfa1d5639645d32d7936d85155574a0abd Mon Sep 17 00:00:00 2001
From: Czarek Nakamoto <cyjan@mrcyjanek.net>
Date: Fri, 13 Aug 2021 13:13:12 +0200
Subject: [PATCH 5/8] some fixes that do not fix anything

---
 libjw/jwpub.go | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/libjw/jwpub.go b/libjw/jwpub.go
index 0c7d5f6..f0762fb 100644
--- a/libjw/jwpub.go
+++ b/libjw/jwpub.go
@@ -101,6 +101,7 @@ func JWPUBtoMarkdown(jwpub string) {
 						rem = rem - 1
 						sIndexes[i].PositionalListIndex[0] = rem
 						curDocIndexArray := curDocIndex
+						fmt.Println(wd)
 						var repo = false
 						for j := range curDocIndexArray {
 							if j == 0 {
@@ -145,8 +146,7 @@ func JWPUBtoMarkdown(jwpub string) {
 			fmt.Println("fullText[docID:", docID, "]:", fullText[docID])
 		}
 		if !finded {
-			log.Println("finded!")
-			var toRem []int
+			var toRem []int = []int{}
 			for i := range sIndexes {
 				//var docI = sIndexes[i].TextUnitIndices.prefix(3)
 				//sIndexes[i].TextUnitIndices.removeFirst(3)
@@ -155,12 +155,11 @@ func JWPUBtoMarkdown(jwpub string) {
 					docI = sIndexes[i].TextUnitIndices[0]
 				}
 				if len(sIndexes[i].TextUnitIndices) == 0 {
-					log.Println("toRem", i)
+					log.Println("toRem", i, sIndexes[i].Word)
 					toRem = append(toRem, i)
 				} else {
 					sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices[1:]
 					if docI == 128 {
-						log.Println("finded! 1")
 						if len(sIndexes[i].TextUnitIndices) != 0 {
 							sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI-1, 0)
 						}
@@ -168,16 +167,13 @@ func JWPUBtoMarkdown(jwpub string) {
 						docI--
 						sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI, 0)
 					}
-
 					if len(sIndexes[i].PositionalListIndex) > 0 && sIndexes[i].PositionalListIndex[0] == 128 {
 						sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
-						log.Println("finded! 3")
 					}
 				}
 			}
-			for i := len(toRem) - 1; i > 0; i-- {
-				log.Println("Removing...")
-				log.Println("toRem2", sIndexes[toRem[i]])
+			for i := len(toRem) - 1; i >= 0; i-- {
+				log.Println(i, docID, "toRem2", sIndexes[toRem[i]].Word)
 				sIndexes = append(sIndexes[:toRem[i]], sIndexes[toRem[i]+1:]...)
 			}
 			docID++
@@ -185,6 +181,8 @@ func JWPUBtoMarkdown(jwpub string) {
 		}
 		if len(sIndexes) == 0 {
 			loop = false
+		} else {
+			log.Println("len(sIndexes):", len(sIndexes))
 		}
 		if docID > 10000 {
 			log.Fatal("docID > 10000, this should not happen.")

From c327b4e03b222e88e51d1073bd695142bbee017f Mon Sep 17 00:00:00 2001
From: Czarek Nakamoto <cyjan@mrcyjanek.net>
Date: Sat, 14 Aug 2021 19:14:29 +0200
Subject: [PATCH 6/8] No - it still doesn't work, I'm starting from scratch
 again and I want to safe the code somewhere

---
 libjw/jwpub.go            | 16 ++++++----------
 utils/jwpub-test/parse.go | 10 ++++++++--
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/libjw/jwpub.go b/libjw/jwpub.go
index f0762fb..598e078 100644
--- a/libjw/jwpub.go
+++ b/libjw/jwpub.go
@@ -78,30 +78,27 @@ func JWPUBtoMarkdown(jwpub string) {
 	var loop = true
 	var docID = 0
 	var curDocIndex = []byte{128}
-	var fullText = make(map[int]string)
+	var fullText = make(map[int]string, 255)
 
 	sIndexes := wordsmap
 	for loop {
+
 		var finded = false
 		for i := range sIndexes {
-			//log.Println("for i:= range sIndexes")
-			if len(sIndexes[i].TextUnitIndices) > 0 && sIndexes[i].TextUnitIndices[0] == 128 {
-				//log.Println("byteStartsWith(sIndexes[i].PositionalList, curDocIndex): ", byteStartsWith(sIndexes[i].PositionalList, curDocIndex))
+			if sIndexes[i].WordID == 123 {
+				log.Println(sIndexes[i].Word, sIndexes[i].TextUnitIndices, byteStartsWith(sIndexes[i].TextUnitIndices, []byte{128}), sIndexes[i].PositionalList, byteStartsWith(sIndexes[i].PositionalList, curDocIndex), curDocIndex)
+			}
+			if byteStartsWith(sIndexes[i].TextUnitIndices, []byte{128}) {
 				if byteStartsWith(sIndexes[i].PositionalList, curDocIndex) {
 					var rem = sIndexes[i].PositionalListIndex[0]
 					if rem > 128 {
 						finded = true
 						wd := sIndexes[i].Word
-						//if wd != String(fullText[docID]?.split(separator: " ").last ?? "").unaccent() {
-						//	print(curDocIndex, wd)
-						//	fullText[docID]!.append(wd + " ")
-						//}
 						fullText[docID] += " " + wd
 						sIndexes[i].PositionalList = sIndexes[i].PositionalList[len(curDocIndex):]
 						rem = rem - 1
 						sIndexes[i].PositionalListIndex[0] = rem
 						curDocIndexArray := curDocIndex
-						fmt.Println(wd)
 						var repo = false
 						for j := range curDocIndexArray {
 							if j == 0 {
@@ -155,7 +152,6 @@ func JWPUBtoMarkdown(jwpub string) {
 					docI = sIndexes[i].TextUnitIndices[0]
 				}
 				if len(sIndexes[i].TextUnitIndices) == 0 {
-					log.Println("toRem", i, sIndexes[i].Word)
 					toRem = append(toRem, i)
 				} else {
 					sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices[1:]
diff --git a/utils/jwpub-test/parse.go b/utils/jwpub-test/parse.go
index 36d26e9..3661303 100644
--- a/utils/jwpub-test/parse.go
+++ b/utils/jwpub-test/parse.go
@@ -1,6 +1,9 @@
 package main
 
 import (
+	"log"
+	"os"
+
 	"git.mrcyjanek.net/mrcyjanek/jwapi/helpers"
 	"git.mrcyjanek.net/mrcyjanek/jwapi/libjw"
 )
@@ -10,6 +13,9 @@ func main() {
 	helpers.SetDataDir(dataDir)
 	helpers.Mkdir(dataDir + "/raw")
 	helpers.DBInit(dataDir)
-	libjw.GetPublication("w", "E", "JWPUB", "202110")
-	// libjw.JWPUBtoMarkdown("fg_E.jwpub.orig")
+	//libjw.GetPublication("w", "E", "JWPUB", "202110")
+	if _, err := os.Stat("pub.jwpub"); os.IsNotExist(err) {
+		log.Fatal("Hey! Please put `pub.jwpub' in this directory, you can get one from this link: https://www.jw.org/download/?issue=202107&output=html&pub=g&fileformat=JWPUB&alllangs=0&langwritten=E&txtCMSLang=E&isBible=0")
+	}
+	libjw.JWPUBtoMarkdown("pub.jwpub")
 }

From 7e5ba58f6a92a671cfb2666a6dd24cd709d4a21f Mon Sep 17 00:00:00 2001
From: Czarek Nakamoto <cyjan@mrcyjanek.net>
Date: Sat, 14 Aug 2021 19:36:39 +0200
Subject: [PATCH 7/8] **FIXED** jwpub is now reading correcty Yay

---
 libjw/jwpub.go | 91 +++++++++++++++++++++++---------------------------
 1 file changed, 42 insertions(+), 49 deletions(-)

diff --git a/libjw/jwpub.go b/libjw/jwpub.go
index 598e078..89a95c6 100644
--- a/libjw/jwpub.go
+++ b/libjw/jwpub.go
@@ -75,30 +75,33 @@ func JWPUBtoMarkdown(jwpub string) {
 		})
 	}
 
+	sIndexes := wordsmap
 	var loop = true
 	var docID = 0
 	var curDocIndex = []byte{128}
-	var fullText = make(map[int]string, 255)
+	var fullText = make(map[int]string)
 
-	sIndexes := wordsmap
 	for loop {
-
 		var finded = false
 		for i := range sIndexes {
-			if sIndexes[i].WordID == 123 {
-				log.Println(sIndexes[i].Word, sIndexes[i].TextUnitIndices, byteStartsWith(sIndexes[i].TextUnitIndices, []byte{128}), sIndexes[i].PositionalList, byteStartsWith(sIndexes[i].PositionalList, curDocIndex), curDocIndex)
-			}
 			if byteStartsWith(sIndexes[i].TextUnitIndices, []byte{128}) {
 				if byteStartsWith(sIndexes[i].PositionalList, curDocIndex) {
 					var rem = sIndexes[i].PositionalListIndex[0]
 					if rem > 128 {
 						finded = true
-						wd := sIndexes[i].Word
+						var wd = sIndexes[i].Word
+						//if wd != String(fullText[docID]?.split(separator: " ").last ?? "").unaccent() {
+						//	print(curDocIndex, wd)
+						//	fullText[docID]!.append(wd + " ")
+						//}
 						fullText[docID] += " " + wd
 						sIndexes[i].PositionalList = sIndexes[i].PositionalList[len(curDocIndex):]
+						//sIndexes[i].PositionalList = sIndexes[i].PositionalList.trimmingCharacters(in: .whitespacesAndNewlines)
 						rem = rem - 1
-						sIndexes[i].PositionalListIndex[0] = rem
-						curDocIndexArray := curDocIndex
+						sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
+						sIndexes[i].PositionalListIndex = insertbyte(sIndexes[i].PositionalListIndex, rem, 0)
+						//sIndexes[i].PositionalListIndex = rem + sIndexes[i].PositionalListIndex
+						var curDocIndexArray = curDocIndex
 						var repo = false
 						for j := range curDocIndexArray {
 							if j == 0 {
@@ -132,67 +135,57 @@ func JWPUBtoMarkdown(jwpub string) {
 							}
 						}
 						break
-					} else {
-						sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
 					}
 				}
 			}
 		}
-
-		if fullText[docID] != "" {
-			fmt.Println("fullText[docID:", docID, "]:", fullText[docID])
-		}
 		if !finded {
-			var toRem []int = []int{}
+			var toRem []int
 			for i := range sIndexes {
-				//var docI = sIndexes[i].TextUnitIndices.prefix(3)
-				//sIndexes[i].TextUnitIndices.removeFirst(3)
-				var docI byte = 0
-				if len(sIndexes[i].TextUnitIndices) > 0 {
-					docI = sIndexes[i].TextUnitIndices[0]
+				var docI = sIndexes[i].TextUnitIndices[0]
+				sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices[1:]
+				if docI == 128 {
+					//sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices.trimmingCharacters(in: .whitespacesAndNewlines)
+					if len(sIndexes[i].TextUnitIndices) != 0 {
+						docI = sIndexes[i].TextUnitIndices[0]
+						sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices[1:]
+						docI = docI - 1
+						sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI, 0)
+					}
+				} else {
+					docI = docI - 1
+					sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI, 0)
 				}
 				if len(sIndexes[i].TextUnitIndices) == 0 {
 					toRem = append(toRem, i)
-				} else {
-					sIndexes[i].TextUnitIndices = sIndexes[i].TextUnitIndices[1:]
-					if docI == 128 {
-						if len(sIndexes[i].TextUnitIndices) != 0 {
-							sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI-1, 0)
-						}
-					} else {
-						docI--
-						sIndexes[i].TextUnitIndices = insertbyte(sIndexes[i].TextUnitIndices, docI, 0)
-					}
-					if len(sIndexes[i].PositionalListIndex) > 0 && sIndexes[i].PositionalListIndex[0] == 128 {
-						sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
-					}
+				}
+				var rem = sIndexes[i].PositionalListIndex[0]
+				if rem == 128 {
+					sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex[1:]
+					//sIndexes[i].PositionalListIndex = sIndexes[i].PositionalListIndex.trimmingCharacters(in: .whitespacesAndNewlines)
 				}
 			}
 			for i := len(toRem) - 1; i >= 0; i-- {
-				log.Println(i, docID, "toRem2", sIndexes[toRem[i]].Word)
 				sIndexes = append(sIndexes[:toRem[i]], sIndexes[toRem[i]+1:]...)
 			}
-			docID++
+			fmt.Println(fullText[docID])
+			docID += 1
 			curDocIndex = []byte{128}
 		}
 		if len(sIndexes) == 0 {
 			loop = false
-		} else {
-			log.Println("len(sIndexes):", len(sIndexes))
-		}
-		if docID > 10000 {
-			log.Fatal("docID > 10000, this should not happen.")
 		}
 	}
-	//	for (id, text) in fullText where text != "" {
-	//		let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("w_I_202110/contents/\(id).txt")
-	//		do {
-	//			print(dir)
-	//			try text.write(to: dir, atomically: true, encoding: String.Encoding.utf8)
-	//		} catch {
-	//			print("Error")
-	//		}
+	//print(fullText)
+	//for (id, text) in fullText where text != "" {
+	//	let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("w_I_202110/contents/\(id).txt")
+	//	do {
+	//		print(dir)
+	//		try text.write(to: dir, atomically: true, encoding: String.Encoding.utf8)
+	//	} catch {
+	//		print("Error")
 	//	}
+	//}
 
 }
 

From cdb2c5b509ce3c4a3e944f6ce80d606da5eff728 Mon Sep 17 00:00:00 2001
From: Czarek Nakamoto <cyjan@mrcyjanek.net>
Date: Sat, 14 Aug 2021 20:02:27 +0200
Subject: [PATCH 8/8] FIX: highlights were not stored correctly

---
 webui/apiDB.go              | 21 ++-------------------
 webui/html/static/common.js |  2 +-
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/webui/apiDB.go b/webui/apiDB.go
index 225b685..cd198f9 100644
--- a/webui/apiDB.go
+++ b/webui/apiDB.go
@@ -2,27 +2,14 @@ package webui
 
 import (
 	"fmt"
-	"log"
 	"net/http"
-	"net/url"
 	"strings"
 
 	"git.mrcyjanek.net/mrcyjanek/jwapi/helpers"
 )
 
 func apiDBget(w http.ResponseWriter, req *http.Request) {
-	url := req.URL.Path
-	splited := strings.Split(string(url), "/")
-	if len(splited) < 5 {
-		fmt.Fprintln(w, "/api/db/get/<key>")
-		return
-	}
-	key := splited[4]
-	if key == "" {
-		w.Write([]byte("0"))
-		return
-	}
-	w.Write(helpers.Get(key))
+	w.Write(helpers.Get(req.URL.RawQuery))
 }
 
 func apiDBset(w http.ResponseWriter, req *http.Request) {
@@ -38,10 +25,6 @@ func apiDBset(w http.ResponseWriter, req *http.Request) {
 		return
 	}
 	query := req.URL.RawQuery
-	value, err := url.QueryUnescape(query)
-	if err != nil {
-		log.Fatal(err)
-	}
-	helpers.Set(key, []byte(value))
+	helpers.Set(key, []byte(query))
 
 }
diff --git a/webui/html/static/common.js b/webui/html/static/common.js
index f60b0a4..f0c31f3 100644
--- a/webui/html/static/common.js
+++ b/webui/html/static/common.js
@@ -55,7 +55,7 @@ function dbGet(key) {
     return localStorage[key]
   }
   var xhr = new XMLHttpRequest();
-  xhr.open("GET", "/api/db/get/"+encodeURIComponent(key), false);
+  xhr.open("GET", "/api/db/get?"+encodeURIComponent(key), false);
   xhr.onerror = function (e) {
     console.error(xhr.statusText);
   };