Skip to content

Commit

Permalink
Merge pull request #32 from terratensor/develop
Browse files Browse the repository at this point in the history
Улучшение обработки цитируемого отрезка
  • Loading branch information
audetv authored Aug 4, 2024
2 parents e98d1ae + ebc09e7 commit 6048e94
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 2 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ down:
docker compose down --remove-orphans
up:
docker compose up --build -d

test:
go test -v ./...
check:
golangci-lint run -v

Expand Down
20 changes: 19 additions & 1 deletion consumer/internal/infra/msgparser/msgparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,25 @@ func (p *Parser) truncateText(text string) string {
}

if utf8.RuneCountInString(truncatedText) < count {
return truncatedText + "…"
return ModifyString(strings.TrimSpace(truncatedText))
}
return truncatedText
}

// ModifyString replaces the last punctuation mark in the input string with an ellipsis.
// It checks the last character of a string and replaces it with "…"
// if it's present in a specified list, otherwise it appends "…" to the end of the string:
func ModifyString(input string) string {
lastRune, _ := utf8.DecodeLastRuneInString(input)
punctuationMarks := []rune{' ', '.', ',', ':', ';', '…', '-', '–', '—', '=', '+'}

for _, punctuationMark := range punctuationMarks {
if lastRune == punctuationMark {
modifiedInput := []rune(input)
modifiedInput[len(modifiedInput)-1] = '…'
return string(modifiedInput)
}
}

return input + "…"
}
181 changes: 181 additions & 0 deletions consumer/internal/infra/msgparser/msgparser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package msgparser

import (
"testing"
)

func TestTruncateText(t *testing.T) {
p := &Parser{maxWords: 6, maxChars: 20}

tests := []struct {
name string
input string
expected string
}{
{
name: "within max words limit",
input: "hello world this is a test",
expected: "hello world this is a test",
},
{
name: "within max words limit utf-8",
input: "привет мир запускаем тест для строки",
expected: "привет мир запускаем тест для строки",
},
{
name: "exceeds max words limit, within max chars limit",
input: "hello world this is a test with more words",
expected: "hello world this is…",
},
{
name: "exceeds max words limit, within max chars limit utf-8",
input: "привет мир запускаем тест для строки, в которой еще больше слов",
expected: "привет мир…",
},
{
name: "exceeds both max words and max chars limits",
input: "hello world this is a very long test with many words and characters",
expected: "hello world this is…",
},
{
name: "empty input text",
input: "",
expected: "",
},
{
name: "single-word input text",
input: "hello",
expected: "hello",
},
{
name: "input text with multiple spaces between words",
input: "hello world this is a test",
expected: "hello world this…",
},
{
name: "input text with multiple spaces between words utf-8",
input: "привет мир запускаем тест для строки",
expected: "привет мир…",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := p.truncateText(tt.input)
if actual != tt.expected {
t.Errorf("truncateText(%q) = %q, want %q", tt.input, actual, tt.expected)
}
})
}
}

func TestModifyString(t *testing.T) {
tests := []struct {
input string
expected string
}{
{
input: "Привет, мир ",
expected: "Привет, мир…",
},
{
input: "Привет, мир.",
expected: "Привет, мир…",
},
{
input: "Привет, мир,",
expected: "Привет, мир…",
},
{
input: "Привет, мир:",
expected: "Привет, мир…",
},
{
input: "Привет, мир;",
expected: "Привет, мир…",
},
{
input: "Привет, мир…",
expected: "Привет, мир…",
},
{
input: "Привет, мир-",
expected: "Привет, мир…",
},
{
input: "Привет, мир–",
expected: "Привет, мир…",
},
{
input: "Привет, мир—",
expected: "Привет, мир…",
},
{
input: "Привет, мир=",
expected: "Привет, мир…",
},
{
input: "Привет, мир+",
expected: "Привет, мир…",
},
{
input: "Привет, мир",
expected: "Привет, мир…",
},
{
input: "Hello, world ",
expected: "Hello, world…",
},
{
input: "Hello, world.",
expected: "Hello, world…",
},
{
input: "Hello, world,",
expected: "Hello, world…",
},
{
input: "Hello, world:",
expected: "Hello, world…",
},
{
input: "Hello, world;",
expected: "Hello, world…",
},
{
input: "Hello, world…",
expected: "Hello, world…",
},
{
input: "Hello, world-",
expected: "Hello, world…",
},
{
input: "Hello, world–",
expected: "Hello, world…",
},
{
input: "Hello, world—",
expected: "Hello, world…",
},
{
input: "Hello, world=",
expected: "Hello, world…",
},
{
input: "Hello, world+",
expected: "Hello, world…",
},
{
input: "Hello, world",
expected: "Hello, world…",
},
}

for _, test := range tests {
result := ModifyString(test.input)
if result != test.expected {
t.Errorf("ModifyString(%q) = %q, expected %q", test.input, result, test.expected)
}
}
}

0 comments on commit 6048e94

Please sign in to comment.