ieasybooks · AliOsm · Aug 25, 2023 · Aug 25, 2023 · Aug 25, 2023 · Aug 25, 2023
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@
   <li>تفريغ المواد المرئي والمسموع إلى نصوص باستخدام أحدث تقنيات الذكاء الاصطناعي المقدمة من شركة OpenAI</li>
   <li>إمكانية تفريغ المواد باستخدام تقنيات wit.ai المقدمة من شركة Facebook</li>
   <li>تحميل المحتوى المرئي بشكل مباشر من منصة YouTube سواءً كان المستهدف مادة واحدة أو قائمة تشغيل كاملة</li>
-  <li>توفير صيَغ مخرجات مختلفة كـ <code>txt</code> و <code>srt</code> و <code>vtt</code> و <code>json</code></li>
+  <li>توفير صيَغ مخرجات مختلفة كـ <code>txt</code> و <code>srt</code> و <code>vtt</code> و <code>csv</code> و <code>tsv</code> و <code>json</code></li>
 </ul>
 
 <h2 dir="rtl">متطلبات الاستخدام</h2>
@@ -134,6 +134,8 @@
           <li><code dir="ltr">txt</code></li>
           <li><code dir="ltr">srt</code></li>
           <li><code dir="ltr">vtt</code></li>
+          <li><code dir="ltr">csv</code></li>
+          <li><code dir="ltr">tsv</code></li>
           <li><code dir="ltr">json</code></li>
           <li><code dir="ltr">all</code> <strong>(الاختيار الإفتراضي)</strong></li>
           <li><code dir="ltr">none</code> (لن يتم إنشاء ملف في حال تمرير هذه الصيغة)</li>
@@ -146,15 +148,16 @@
 
 ```
 ➜ tafrigh --help
-usage: tafrigh [-h] [--skip_if_output_exist | --no-skip_if_output_exist] [--playlist_items PLAYLIST_ITEMS] [--verbose | --no-verbose] [-m MODEL_NAME_OR_PATH] [-t {transcribe,translate}]
+usage: tafrigh [-h] [--version] [--skip_if_output_exist | --no-skip_if_output_exist] [--playlist_items PLAYLIST_ITEMS] [--verbose | --no-verbose] [-m MODEL_NAME_OR_PATH] [-t {transcribe,translate}]
                [-l {af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,zh}]
                [--use_faster_whisper | --no-use_faster_whisper] [--beam_size BEAM_SIZE] [--ct2_compute_type {default,int8,int8_float16,int16,float16}] [-w WIT_CLIENT_ACCESS_TOKENS [WIT_CLIENT_ACCESS_TOKENS ...]]
                [--max_cutting_duration [1-17]] [--min_words_per_segment MIN_WORDS_PER_SEGMENT] [--save_files_before_compact | --no-save_files_before_compact] [--save_yt_dlp_responses | --no-save_yt_dlp_responses]
-               [--output_sample OUTPUT_SAMPLE] [-f {all,txt,srt,vtt,json,none} [{all,txt,srt,vtt,json,none} ...]] [-o OUTPUT_DIR]
+               [--output_sample OUTPUT_SAMPLE] [-f {all,txt,srt,vtt,csv,tsv,json,none} [{all,txt,srt,vtt,csv,tsv,json,none} ...]] [-o OUTPUT_DIR]
                urls_or_paths [urls_or_paths ...]
 
 options:
   -h, --help            show this help message and exit
+  --version             show program's version number and exit
 
 Input:
   urls_or_paths         Video/Playlist URLs or local folder/file(s) to transcribe.
@@ -194,7 +197,7 @@ Output:
                         Whether to save the yt-dlp library JSON responses or not. (default: False)
   --output_sample OUTPUT_SAMPLE
                         Samples random compacted segments from the output and generates a CSV file contains the sampled data. Pass 0 to disable this behavior.
-  -f {all,txt,srt,vtt,json,none} [{all,txt,srt,vtt,json,none} ...], --output_formats {all,txt,srt,vtt,json,none} [{all,txt,srt,vtt,json,none} ...]
+  -f {all,txt,srt,vtt,csv,tsv,json,none} [{all,txt,srt,vtt,csv,tsv,json,none} ...], --output_formats {all,txt,srt,vtt,csv,tsv,json,none} [{all,txt,srt,vtt,csv,tsv,json,none} ...]
                         Format of the output file; if not specified, all available formats will be produced.
   -o OUTPUT_DIR, --output_dir OUTPUT_DIR
                         Directory to save the outputs.

diff --git a/colab_notebook.ipynb b/colab_notebook.ipynb
@@ -69,8 +69,7 @@
         "print('جارٍ تجهيز بيئة العمل.')\n",
         "\n",
         "# Setup Tafrigh.\n",
-        "%pip uninstall -y tafrigh\n",
-        "%pip install -U tafrigh[wit,whisper]==1.0.1 -qqq\n",
+        "%pip install -U tafrigh[wit,whisper]==1.1.0 > install_logs.txt\n",
         "\n",
         "# Get inputs.\n",
         "\n",

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tafrigh"
-version = "1.0.1"
-description = "تفريغ النصوص وإنشاء ملفات SRT و VTT باستخدام نماذج Whisper وتقنية OpenAI."
+version = "1.1.0"
+description = "تفريغ النصوص وإنشاء ملفات SRT و VTT باستخدام نماذج Whisper وتقنية wit.ai."
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"

diff --git a/tafrigh/types/transcript_type.py b/tafrigh/types/transcript_type.py
@@ -6,6 +6,8 @@ class TranscriptType(Enum):
     TXT = 'txt'
     SRT = 'srt'
     VTT = 'vtt'
+    CSV = 'csv'
+    TSV = 'tsv'
     JSON = 'json'
     NONE = 'none'
 

diff --git a/tafrigh/writer.py b/tafrigh/writer.py
@@ -1,3 +1,4 @@
+import csv
 import json
 import os
 
@@ -46,6 +47,10 @@ def write(
             self.write_srt(file_path, segments)
         elif format == TranscriptType.VTT:
             self.write_vtt(file_path, segments)
+        elif format == TranscriptType.CSV:
+            self.write_csv(file_path, segments)
+        elif format == TranscriptType.TSV:
+            self.write_csv(file_path, segments, '\t')
         elif format == TranscriptType.JSON:
             self.write_json(file_path, segments)
 
@@ -70,12 +75,23 @@ def write_vtt(
     ) -> None:
         self._write_to_file(file_path, self.generate_vtt(segments))
 
+    def write_csv(
+        self,
+        file_path: str,
+        segments: list[dict[str, Union[str, float]]],
+        delimiter=',',
+    ) -> None:
+        with open(file_path, 'w', encoding='utf-8') as fp:
+            writer = csv.DictWriter(fp, fieldnames=['text', 'start', 'end'], delimiter=delimiter)
+            writer.writeheader()
+            writer.writerows(segments)
+
     def write_json(
         self,
         file_path: str,
         segments: list[dict[str, Union[str, float]]],
     ) -> None:
-        with open(file_path, 'w') as fp:
+        with open(file_path, 'w', encoding='utf-8') as fp:
             json.dump(segments, fp, ensure_ascii=False, indent=2)
 
     def generate_txt(self, segments: list[dict[str, Union[str, float]]]) -> str: