From a4c2232041ea3520c452222a15f2484ee384e06a Mon Sep 17 00:00:00 2001 From: Kendall Garner <17521368+kgarner7@users.noreply.github.com> Date: Thu, 2 May 2024 01:54:46 +0000 Subject: [PATCH] Sort repeated lyrics that may be out of order (#2989) With synchronized lyrics with repeated text, there is not a guarantee that the repeat is in order (e.g. `[00:00.00][00:10.00] a\n[00:05.00]b`). This change will post-process lyrics with repeated timestamps in one line to ensure that it is always sorted. --- model/lyrics.go | 101 ++++++++++++++++++++++++++----------------- model/lyrics_test.go | 14 ++++++ 2 files changed, 76 insertions(+), 39 deletions(-) diff --git a/model/lyrics.go b/model/lyrics.go index 4ea68d1e6..e45f53d62 100644 --- a/model/lyrics.go +++ b/model/lyrics.go @@ -1,7 +1,9 @@ package model import ( + "cmp" "regexp" + "slices" "strconv" "strings" @@ -46,6 +48,7 @@ func ToLyrics(language, text string) (*Lyrics, error) { synced := syncRegex.MatchString(text) priorLine := "" validLine := false + repeated := false var timestamps []int64 for _, line := range lines { @@ -82,6 +85,10 @@ func ToLyrics(language, text string) (*Lyrics, error) { } times := timeRegex.FindAllStringSubmatchIndex(line, -1) + if len(times) > 1 { + repeated = true + } + // The second condition is for when there is a timestamp in the middle of // a line (after any text) if times == nil || times[0][0] != 0 { @@ -105,9 +112,6 @@ func ToLyrics(language, text string) (*Lyrics, error) { // [fullStart, fullEnd, hourStart, hourEnd, minStart, minEnd, secStart, secEnd, msStart, msEnd] for _, match := range times { - var hours, millis int64 - var err error - // for multiple matches, we need to check that later matches are not // in the middle of the string if end != 0 { @@ -118,46 +122,11 @@ func ToLyrics(language, text string) (*Lyrics, error) { } end = match[1] - - hourStart := match[2] - if hourStart != -1 { - // subtract 1 because group has : at the end - hourEnd := match[3] - 1 - hours, err = strconv.ParseInt(line[hourStart:hourEnd], 10, 64) - if err != nil { - return nil, err - } - } - - minutes, err := strconv.ParseInt(line[match[4]:match[5]], 10, 64) + timeInMillis, err := parseTime(line, match) if err != nil { return nil, err } - sec, err := strconv.ParseInt(line[match[6]:match[7]], 10, 64) - if err != nil { - return nil, err - } - - msStart := match[8] - if msStart != -1 { - msEnd := match[9] - // +1 offset since this capture group contains . - millis, err = strconv.ParseInt(line[msStart+1:msEnd], 10, 64) - if err != nil { - return nil, err - } - - length := msEnd - msStart - - if length == 3 { - millis *= 10 - } else if length == 2 { - millis *= 100 - } - } - - timeInMillis := (((((hours * 60) + minutes) * 60) + sec) * 1000) + millis timestamps = append(timestamps, timeInMillis) } @@ -186,6 +155,14 @@ func ToLyrics(language, text string) (*Lyrics, error) { } } + // If there are repeated values, there is no guarantee that they are in order + // In this, case, sort the lyrics by start time + if repeated { + slices.SortFunc(structuredLines, func(a, b Line) int { + return cmp.Compare(*a.Start, *b.Start) + }) + } + lyrics := Lyrics{ DisplayArtist: artist, DisplayTitle: title, @@ -198,4 +175,50 @@ func ToLyrics(language, text string) (*Lyrics, error) { return &lyrics, nil } +func parseTime(line string, match []int) (int64, error) { + var hours, millis int64 + var err error + + hourStart := match[2] + if hourStart != -1 { + // subtract 1 because group has : at the end + hourEnd := match[3] - 1 + hours, err = strconv.ParseInt(line[hourStart:hourEnd], 10, 64) + if err != nil { + return 0, err + } + } + + minutes, err := strconv.ParseInt(line[match[4]:match[5]], 10, 64) + if err != nil { + return 0, err + } + + sec, err := strconv.ParseInt(line[match[6]:match[7]], 10, 64) + if err != nil { + return 0, err + } + + msStart := match[8] + if msStart != -1 { + msEnd := match[9] + // +1 offset since this capture group contains . + millis, err = strconv.ParseInt(line[msStart+1:msEnd], 10, 64) + if err != nil { + return 0, err + } + + length := msEnd - msStart + + if length == 3 { + millis *= 10 + } else if length == 2 { + millis *= 100 + } + } + + timeInMillis := (((((hours * 60) + minutes) * 60) + sec) * 1000) + millis + return timeInMillis, nil +} + type LyricList []Lyrics diff --git a/model/lyrics_test.go b/model/lyrics_test.go index 6dedeea2b..54352f77c 100644 --- a/model/lyrics_test.go +++ b/model/lyrics_test.go @@ -101,4 +101,18 @@ var _ = Describe("ToLyrics", func() { {Start: &c, Value: "c"}, })) }) + + It("Properly sorts repeated lyrics out of order", func() { + a, b, c, d, e := int64(0), int64(10000), int64(40000), int64(13*60*1000), int64(1000*60*60*51) + lyrics, err := ToLyrics("xxx", "[00:00.00] [13:00]Repeated\n[00:10.00][51:00:00.00]Test\n[00:40.00]Not repeated") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Synced).To(BeTrue()) + Expect(lyrics.Line).To(Equal([]Line{ + {Start: &a, Value: "Repeated"}, + {Start: &b, Value: "Test"}, + {Start: &c, Value: "Not repeated"}, + {Start: &d, Value: "Repeated"}, + {Start: &e, Value: "Test"}, + })) + }) })