Skip to content

Commit

Permalink
Add custom split logic for scanner
Browse files Browse the repository at this point in the history
  • Loading branch information
justin-taylor committed Dec 17, 2024
1 parent 646078d commit 12a84c9
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 6 deletions.
3 changes: 1 addition & 2 deletions srt.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package astisub

import (
"bufio"
"fmt"
"io"
"strconv"
Expand Down Expand Up @@ -36,7 +35,7 @@ func parseDurationSRT(i string) (d time.Duration, err error) {
func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
// Init
o = NewSubtitles()
var scanner = bufio.NewScanner(i)
var scanner = NewScanner(i)

// Scan
var line string
Expand Down
3 changes: 1 addition & 2 deletions ssa.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package astisub

import (
"bufio"
"fmt"
"io"
"log"
Expand Down Expand Up @@ -134,7 +133,7 @@ func ReadFromSSA(i io.Reader) (o *Subtitles, err error) {
func ReadFromSSAWithOptions(i io.Reader, opts SSAOptions) (o *Subtitles, err error) {
// Init
o = NewSubtitles()
var scanner = bufio.NewScanner(i)
var scanner = NewScanner(i)
var si = &ssaScriptInfo{}
var ss = []*ssaStyle{}
var es = []*ssaEvent{}
Expand Down
32 changes: 32 additions & 0 deletions subtitles.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package astisub

import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"math"
"os"
"path/filepath"
Expand Down Expand Up @@ -927,3 +930,32 @@ func escapeHTML(i string) string {
func unescapeHTML(i string) string {
return htmlUnescaper.Replace(i)
}

func NewScanner(i io.Reader) *bufio.Scanner {
var scanner = bufio.NewScanner(i)
scanner.Split(splitLines)
return scanner
}

func splitLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexAny(data, "\r\n"); i >= 0 {
if data[i] == '\n' {
// We have a line terminated by single newline.
return i + 1, data[0:i], nil
}
advance = i + 1
if len(data) > i+1 && data[i+1] == '\n' {
advance += 1
}
return advance, data[0:i], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}
23 changes: 23 additions & 0 deletions subtitles_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,3 +340,26 @@ func TestHTMLEntity(t *testing.T) {
assert.Equal(t, string(c), w.String())
}
}

func TestNewScanner(t *testing.T) {
exts := []string{"vtt", "srt", "ssa"}
for _, ext := range exts {
s, err := astisub.OpenFile("./testdata/example-in-scan-line." + ext)
assert.NoError(t, err)
assert.Len(t, s.Items, 3)
assert.Equal(t, time.Duration(0), s.Items[0].StartAt)
assert.Equal(t, 3*time.Second+766*time.Millisecond, s.Items[0].EndAt)
assert.Equal(t, "Did one of the last stories strike you as", s.Items[0].Lines[0].String())
assert.Equal(t, "more interesting than the other?", s.Items[0].Lines[1].String())

assert.Equal(t, 3*time.Second+767*time.Millisecond, s.Items[1].StartAt)
assert.Equal(t, 10*time.Second+732*time.Millisecond, s.Items[1].EndAt)
assert.Equal(t, "That's true. You don’t often find 632", s.Items[1].Lines[0].String())
assert.Equal(t, "pieces of gum stuck on a sidewalk", s.Items[1].Lines[1].String())

assert.Equal(t, 10*time.Second+733*time.Millisecond, s.Items[2].StartAt)
assert.Equal(t, 14*time.Second+66*time.Millisecond, s.Items[2].EndAt)
assert.Equal(t, "at a busy bus stop or anywhere", s.Items[2].Lines[0].String())
assert.Equal(t, "else for that matter.", s.Items[2].Lines[1].String())
}
}
1 change: 1 addition & 0 deletions testdata/example-in-scan-line.srt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
100:00:00.000 --> 00:00:03.766Did one of the last stories strike you asmore interesting than the other?200:00:03.767 --> 00:00:10.732That's true. You don’t often find 632pieces of gum stuck on a sidewalk300:00:10.733 --> 00:00:14.066at a busy bus stop or anywhereelse for that matter.
Expand Down
1 change: 1 addition & 0 deletions testdata/example-in-scan-line.ssa
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[Script Info]; Comment 1; Comment 2Collisions: NormalOriginal Script: asticodePlayDepth: 0PlayResY: 600ScriptType: v4.00Script Updated By: version 2.8.01Timer: 100Title: SSA test[V4 Styles]Format: Name, Alignment, AlphaLevel, BackColour, Bold, BorderStyle, Encoding, Fontname, Fontsize, Italic, MarginL, MarginR, MarginV, Outline, OutlineColour, PrimaryColour, SecondaryColour, ShadowStyle: 1,7,0.100,&H80000008,1,7,0,f1,4.000,0,1,4,7,1.000,&H0000ffff,&H0000ffff,&H0000ffff,4.000Style: 2,8,0.200,&H000f0f0f,1,8,1,f2,5.000,0,2,5,8,2.000,&H0000ffff,&H00efefef,&H0000ffff,5.000Style: 3,9,0.300,&H00000008,0,9,2,f3,6.000,0,3,6,9,3.000,&H00000008,&H00b4fcfc,&H00b4fcfc,6.000[Events]Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, TextDialogue: Marked=0,00:00:00.00,00:00:03.766,1,Cher,1234,2345,3456,test,{\pos(400,570)}Did one of the last stories strike you as\nmore interesting than the other?Dialogue: Marked=1,00:00:03.767,00:00:10.732,2,autre,0,0,0,,That's true. You don’t often find 632\npieces of gum stuck on a sidewalkDialogue: Marked=1,00:00:10.733,00:00:14.066,3,autre,0,0,0,,at a busy bus stop or anywhere\nelse for that matter.
Expand Down
1 change: 1 addition & 0 deletions testdata/example-in-scan-line.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
WEBVTT100:00:00.000 --> 00:00:03.766Did one of the last stories strike you asmore interesting than the other?200:00:03.767 --> 00:00:10.732That's true. You don’t often find 632pieces of gum stuck on a sidewalk300:00:10.733 --> 00:00:14.066at a busy bus stop or anywhereelse for that matter.
Expand Down
4 changes: 2 additions & 2 deletions webvtt.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package astisub

import (
"bufio"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -118,7 +117,8 @@ func parseWebVTTTimestampMap(line string) (timestampMap *WebVTTTimestampMap, err
func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
// Init
o = NewSubtitles()
var scanner = bufio.NewScanner(i)
var scanner = NewScanner(i)

var line string
var lineNum int

Expand Down

0 comments on commit 12a84c9

Please sign in to comment.