Skip to content

Commit

Permalink
Merge pull request #30 from terratensor/develop
Browse files Browse the repository at this point in the history
Исправлен счетчик символов для utf-8 кириллицы
  • Loading branch information
audetv authored Aug 3, 2024
2 parents 8f044ec + 1db7ae8 commit 5fbc001
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions consumer/internal/infra/msgparser/msgparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,17 +206,24 @@ func getInnerText(node *html.Node) string {
// It takes a string input text and truncates it based on the maximum characters and words allowed.
// Returns the truncated text.
func (p *Parser) truncateText(text string) string {
count := utf8.RuneCountInString(text)
words := strings.Split(text, " ")
if len(words) <= p.maxWords {
return text
}
truncatedText := ""
// TODO улучшить формулу расчета, т.к. символы "", ) и ( и др, установить какие еще.
// в рунах эти символы занимают 3 позиции, надо определять эти символы и пересчитывать count
for _, word := range words {
if len(truncatedText)+len(word)+1 <= p.maxChars {
if utf8.RuneCountInString(truncatedText)+utf8.RuneCountInString(word)+1 <= p.maxChars+10 {
truncatedText += word + " "
} else {
break
}
}
return truncatedText + "…"

if utf8.RuneCountInString(truncatedText) < count {
return truncatedText + "…"
}
return truncatedText
}

0 comments on commit 5fbc001

Please sign in to comment.