From 3e18a92ecbb333955845265614a6ee7273453319 Mon Sep 17 00:00:00 2001 From: catus felis Date: Thu, 20 Oct 2022 03:35:10 +0000 Subject: [PATCH] Update folder structure --- Pipfile | 11 +++++ bin/convert2dic.sh | 3 -- bin/install_linux_packages.sh | 3 ++ bin/lib/dsl_lib.rb | 86 ----------------------------------- bin/requirements.txt | 5 +- bin/tab2dsl/README.md | 33 -------------- bin/tab2dsl/tab2dsl.rb | 76 ------------------------------- 7 files changed, 17 insertions(+), 200 deletions(-) create mode 100644 Pipfile delete mode 100644 bin/convert2dic.sh create mode 100644 bin/install_linux_packages.sh delete mode 100644 bin/lib/dsl_lib.rb delete mode 100644 bin/tab2dsl/README.md delete mode 100644 bin/tab2dsl/tab2dsl.rb diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..c398b0d --- /dev/null +++ b/Pipfile @@ -0,0 +1,11 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] + +[dev-packages] + +[requires] +python_version = "3.10" diff --git a/bin/convert2dic.sh b/bin/convert2dic.sh deleted file mode 100644 index 4645b6d..0000000 --- a/bin/convert2dic.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -perl ./bin/pocketbookdic.pl ./output/stardict/Ngu-vung-Danh-tu-Thien-hoc.ifo \ No newline at end of file diff --git a/bin/install_linux_packages.sh b/bin/install_linux_packages.sh new file mode 100644 index 0000000..df7bcaf --- /dev/null +++ b/bin/install_linux_packages.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +sudo apt-get install dictzip diff --git a/bin/lib/dsl_lib.rb b/bin/lib/dsl_lib.rb deleted file mode 100644 index e0f5f5c..0000000 --- a/bin/lib/dsl_lib.rb +++ /dev/null @@ -1,86 +0,0 @@ -def save_dsl(dsl_name, header_stuff, dict_content) - File.open(dsl_name + ".dsl", "wt", encoding: "UTF-16LE") do |f| - f << header_stuff - f << dict_content - end -end - -def format_header(dict_name, index_lang, contents_lang) - "#NAME \"#{dict_name}\"\n#INDEX_LANGUAGE \"#{index_lang}\"\n#CONTENTS_LANGUAGE \"#{contents_lang}\"\n\n" -end - -def get_header(options) - dict_name = "Dictionary Name" - index_lang = "Source" - contents_lang = "Target" - if options[:dict_name] - dict_name = options[:dict_name] - end - if options[:from_lang] - index_lang = options[:from_lang] - end - if options[:to_lang] - contents_lang = options[:to_lang] - end - - format_header(dict_name, index_lang, contents_lang) -end - -def zipsave(dsl_name, header_stuff, dict_content) - save_dsl(dsl_name, header_stuff, dict_content) - - `dictzip #{dsl_name}.dsl` - - puts "Done! Your dictionary is now available in the file #{dsl_name}.dsl.dz" -end - -def handle_output(options, dsl_name, header_stuff, dict_content) - if options[:debug] - puts header_stuff - puts dict_content - else - zipsave(dsl_name, header_stuff, dict_content) - end -end - -def get_dict_name(dict_source) - dsl_name = File.basename(dict_source, File.extname(dict_source)) -end - -def read_dict_source(dict_source) - if !File.exist?(dict_source) - abort( " Dictionary file not found: '#{dict_source}'") - end - File.read(dict_source) -end - -def skip_lines(line) - skip = false - if line.match(/^==/) - skip = true - end - if line.match(/^$/) - skip = true - end - if line.match(/^\t/) - skip = true - end - skip -end - -def read_stoplist(options, lang) - json = [] - stoplist_dir = "" - - if options[:stopdir] - stoplist_dir = options[:stopdir] - end - - stop_filename = stoplist_dir + lang + ".json" - if !File.exist?(stop_filename) - abort(" Stopfile not found: '#{stop_filename}'") - end - stopwords_file = File.read(stop_filename) - JSON.parse(stopwords_file) -end - diff --git a/bin/requirements.txt b/bin/requirements.txt index c57660c..72988e2 100644 --- a/bin/requirements.txt +++ b/bin/requirements.txt @@ -1,5 +1,6 @@ -Pillow >= 9.2.0 -numpy >= 1.19 PyGlossary >= 3.3.0 +iso_language_codes >= 1.1.0 +pattern.en >= 3.6.0 + # sudo apt-get install dictzip \ No newline at end of file diff --git a/bin/tab2dsl/README.md b/bin/tab2dsl/README.md deleted file mode 100644 index e781129..0000000 --- a/bin/tab2dsl/README.md +++ /dev/null @@ -1,33 +0,0 @@ -tab2dsl - a very basic script to convert tab-separated files into DSL-format dictionaries. - -For a similar program written in Python see [tsv2dsl](https://github.com/fastrizwaan/tsv2dsl). I wrote this script because tsv2dsl can't handle a tab separated file with only 2 columns. Right now tab2dsl has the opposite situation - it only handles files with two columns of tab-separated text. Configurable number of columns and different parameters (e.g. parts of speech, location of the headword, fancy formatting) may be added later. - -# Requirements - -The script requires that the dictzip program is installed in order to compress the final dictionary (dsl.dz files are a small fraction of the size of the uncompressed originals and are supported by most dictionary programs). - -# Usage - -If run without command-line arguments, the script will ask for the name of a source file to process: - - ruby tab2dsl.rb - -Alternatively, the location of the source file can be specified as an argument: - - ruby tab2dsl.rb /path/to/mydictionary.txt - -In either case, you will then be prompted to supply basic information about the dictionary (dictionary name, index language, and contents language). These are required by the DSL format and will be used to construct the dictionary header information. - -This works fine for one-off dictionary conversions. For batch processing, some options for providing the header information on the command-line or from a separate file should eventually be added. - -# Source format - -tab2dsl expects a source file containing two tab-separated columns in which the first column is the headword, and the second column is the body of the entry or definition, e.g.: - - Headword Entry - -These will converted into DSL-format entries with minimal formatting (italicized headwords and indented definitions). - -# License - -MIT -- see LICENSE file for details. diff --git a/bin/tab2dsl/tab2dsl.rb b/bin/tab2dsl/tab2dsl.rb deleted file mode 100644 index 4ed5416..0000000 --- a/bin/tab2dsl/tab2dsl.rb +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/ruby -KuU -# encoding: utf-8 - -# requires: dictzip - -require 'optparse' -require 'json' - -require_relative '../lib/dsl_lib.rb' - -def interactive_mode(dict_source) - if !dict_source - puts "DSL file name:" - dict_source = gets.chomp - end - - tab_data = read_dict_source(dict_source) - dsl_name = get_dict_name(dict_source) - - puts "Dictionary name:" - dict_name = $stdin.gets.chomp - puts "Index Language (from):" - index_lang = $stdin.gets.chomp - puts "Contents Language (to):" - contents_lang = $stdin.gets.chomp - - header_stuff = format_header(dict_name, index_lang, contents_lang) - - puts "Thank you. Your dictionary header looks like this:\n\n" - puts header_stuff - puts "Please wait. Processing dictionary data..." - - dict_content = format_dictionary(tab_data) -end - -def format_dictionary(tab_data) - dict_content = "" - - tab_data.each_line do |line| - if skip_lines(line) - next - end - if line.include? "\t" - line = line.gsub(/\[/, "\\[").gsub(/\]/, "\\]") - - tab1,tab2 = line.chomp.split("\t") - tab2_format = tab2.gsub(tab1, "[i]~[/i]") - dict_entry = tab1 + "\n" + tab2 + "\n\t[m1][b]" + tab1 + "[/b][/m]\n\t[m1]" + tab2_format + "[/m]\n\n" - dict_content << dict_entry - end - end - dict_content -end - -options = {} -OptionParser.new do |opts| - opts.banner = "Usage: tab2dsl.rb [options] [filename]" - - opts.on('-d', '--debug', 'Output dictionary text to stdin (without creating compressed file)') { options[:debug] = true } - opts.on('-f', '--from-lang LANG', 'Name of source language') { |v| options[:from_lang] = v } - opts.on('-m', '--monolingual', 'Only provide unidirectional lookups (default is bidirectional)') { options[:monolingual] = true } - opts.on('-n', '--dict-name NAME', 'Full name of dictionary') { |v| options[:dict_name] = v } - opts.on('-t', '--to-lang LANG', 'Name of target language') { |v| options[:to_lang] = v } - opts.on('-s', '--stoplist LANG', 'Specify a stoplist language to filter keywords') { |v| options[:stoplist] = v } - opts.on('-S', '--stop-dir DIR', 'Specify path of stoplist directory') { |v| options[:stopdir] = v } - -end.parse! - -if ARGV[0] then dict_source = ARGV[0] end - -tab_data = read_dict_source(dict_source) -dsl_name = get_dict_name(dict_source) -header_stuff = get_header(options) -dict_content = format_dictionary(tab_data) - -handle_output(options, dsl_name, header_stuff, dict_content)