From 46be041e7b86f3fe2bc6cf5f1f64a80c757c0f75 Mon Sep 17 00:00:00 2001 From: Deluan Date: Wed, 18 Sep 2024 19:18:07 -0400 Subject: [PATCH] fix(scanner): improve M3U playlist import times (#2706) --- core/playlists.go | 91 +++++++++++++---------------- core/playlists_test.go | 9 +++ model/mediafile.go | 1 + persistence/mediafile_repository.go | 9 +++ scanner/playlist_importer.go | 6 +- scanner/playlist_importer_test.go | 9 +++ tests/fixtures/empty.txt | 0 utils/slice/slice.go | 61 +++++++++++++++++++ utils/slice/slice_test.go | 32 ++++++++++ 9 files changed, 167 insertions(+), 51 deletions(-) create mode 100644 tests/fixtures/empty.txt diff --git a/core/playlists.go b/core/playlists.go index b9d9fc75b..12e684e48 100644 --- a/core/playlists.go +++ b/core/playlists.go @@ -1,8 +1,6 @@ package core import ( - "bufio" - "bytes" "context" "encoding/json" "errors" @@ -20,6 +18,7 @@ import ( "github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/model/criteria" "github.com/navidrome/navidrome/model/request" + "github.com/navidrome/navidrome/utils/slice" ) type Playlists interface { @@ -133,34 +132,39 @@ func (s *playlists) parseNSP(ctx context.Context, pls *model.Playlist, file io.R func (s *playlists) parseM3U(ctx context.Context, pls *model.Playlist, baseDir string, reader io.Reader) (*model.Playlist, error) { mediaFileRepository := s.ds.MediaFile(ctx) - scanner := bufio.NewScanner(reader) - scanner.Split(scanLines) var mfs model.MediaFiles - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - if strings.HasPrefix(line, "#PLAYLIST:") { - if split := strings.Split(line, ":"); len(split) >= 2 { - pls.Name = split[1] + for lines := range slice.CollectChunks[string](400, slice.LinesFrom(reader)) { + var filteredLines []string + for _, line := range lines { + line := strings.TrimSpace(line) + if strings.HasPrefix(line, "#PLAYLIST:") { + if split := strings.Split(line, ":"); len(split) >= 2 { + pls.Name = split[1] + } + continue } - continue + // Skip empty lines and extended info + if line == "" || strings.HasPrefix(line, "#") { + continue + } + if strings.HasPrefix(line, "file://") { + line = strings.TrimPrefix(line, "file://") + line, _ = url.QueryUnescape(line) + } + if baseDir != "" && !filepath.IsAbs(line) { + line = filepath.Join(baseDir, line) + } + filteredLines = append(filteredLines, line) } - // Skip empty lines and extended info - if line == "" || strings.HasPrefix(line, "#") { - continue - } - if strings.HasPrefix(line, "file://") { - line = strings.TrimPrefix(line, "file://") - line, _ = url.QueryUnescape(line) - } - if baseDir != "" && !filepath.IsAbs(line) { - line = filepath.Join(baseDir, line) - } - mf, err := mediaFileRepository.FindByPath(line) + found, err := mediaFileRepository.FindByPaths(filteredLines) if err != nil { - log.Warn(ctx, "Path in playlist not found", "playlist", pls.Name, "path", line, err) + log.Warn(ctx, "Error reading files from DB", "playlist", pls.Name, err) continue } - mfs = append(mfs, *mf) + if len(found) != len(filteredLines) { + logMissingFiles(ctx, pls, filteredLines, found) + } + mfs = append(mfs, found...) } if pls.Name == "" { pls.Name = time.Now().Format(time.RFC3339) @@ -168,7 +172,20 @@ func (s *playlists) parseM3U(ctx context.Context, pls *model.Playlist, baseDir s pls.Tracks = nil pls.AddMediaFiles(mfs) - return pls, scanner.Err() + return pls, nil +} + +func logMissingFiles(ctx context.Context, pls *model.Playlist, lines []string, found model.MediaFiles) { + missing := make(map[string]bool) + for _, line := range lines { + missing[line] = true + } + for _, mf := range found { + delete(missing, mf.Path) + } + for path := range missing { + log.Warn(ctx, "Path in playlist not found", "playlist", pls.Name, "path", path) + } } func (s *playlists) updatePlaylist(ctx context.Context, newPls *model.Playlist) error { @@ -199,30 +216,6 @@ func (s *playlists) updatePlaylist(ctx context.Context, newPls *model.Playlist) return s.ds.Playlist(ctx).Put(newPls) } -// From https://stackoverflow.com/a/41433698 -func scanLines(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - if i := bytes.IndexAny(data, "\r\n"); i >= 0 { - if data[i] == '\n' { - // We have a line terminated by single newline. - return i + 1, data[0:i], nil - } - advance = i + 1 - if len(data) > i+1 && data[i+1] == '\n' { - advance += 1 - } - return advance, data[0:i], nil - } - // If we're at EOF, we have a final, non-terminated line. Return it. - if atEOF { - return len(data), data, nil - } - // Request more data. - return 0, nil, nil -} - func (s *playlists) Update(ctx context.Context, playlistID string, name *string, comment *string, public *bool, idsToAdd []string, idxToRemove []int) error { diff --git a/core/playlists_test.go b/core/playlists_test.go index 2d17a48f4..54f391cee 100644 --- a/core/playlists_test.go +++ b/core/playlists_test.go @@ -118,6 +118,15 @@ func (r *mockedMediaFile) FindByPath(s string) (*model.MediaFile, error) { }, nil } +func (r *mockedMediaFile) FindByPaths(paths []string) (model.MediaFiles, error) { + var mfs model.MediaFiles + for _, path := range paths { + mf, _ := r.FindByPath(path) + mfs = append(mfs, *mf) + } + return mfs, nil +} + type mockedPlaylist struct { last *model.Playlist model.PlaylistRepository diff --git a/model/mediafile.go b/model/mediafile.go index 8a6979534..ed7b063e6 100644 --- a/model/mediafile.go +++ b/model/mediafile.go @@ -265,6 +265,7 @@ type MediaFileRepository interface { // Queries by path to support the scanner, no Annotations or Bookmarks required in the response FindAllByPath(path string) (MediaFiles, error) FindByPath(path string) (*MediaFile, error) + FindByPaths(paths []string) (MediaFiles, error) FindPathsRecursively(basePath string) ([]string, error) DeleteByPath(path string) (int64, error) diff --git a/persistence/mediafile_repository.go b/persistence/mediafile_repository.go index 6c6380b59..8e21b3625 100644 --- a/persistence/mediafile_repository.go +++ b/persistence/mediafile_repository.go @@ -127,6 +127,15 @@ func (r *mediaFileRepository) FindByPath(path string) (*model.MediaFile, error) return &res[0], nil } +func (r *mediaFileRepository) FindByPaths(paths []string) (model.MediaFiles, error) { + sel := r.newSelect().Columns("*").Where(Eq{"path collate nocase": paths}) + var res model.MediaFiles + if err := r.queryAll(sel, &res); err != nil { + return nil, err + } + return res, nil +} + func cleanPath(path string) string { path = filepath.Clean(path) if !strings.HasSuffix(path, string(os.PathSeparator)) { diff --git a/scanner/playlist_importer.go b/scanner/playlist_importer.go index 0e3e22855..dccf292fa 100644 --- a/scanner/playlist_importer.go +++ b/scanner/playlist_importer.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/mattn/go-zglob" "github.com/navidrome/navidrome/conf" @@ -36,6 +37,7 @@ func (s *playlistImporter) processPlaylists(ctx context.Context, dir string) int return count } for _, f := range files { + started := time.Now() if strings.HasPrefix(f.Name(), ".") { continue } @@ -47,9 +49,9 @@ func (s *playlistImporter) processPlaylists(ctx context.Context, dir string) int continue } if pls.IsSmartPlaylist() { - log.Debug("Imported smart playlist", "name", pls.Name, "lastUpdated", pls.UpdatedAt, "path", pls.Path, "numTracks", pls.SongCount) + log.Debug("Imported smart playlist", "name", pls.Name, "lastUpdated", pls.UpdatedAt, "path", pls.Path, "elapsed", time.Since(started)) } else { - log.Debug("Imported playlist", "name", pls.Name, "lastUpdated", pls.UpdatedAt, "path", pls.Path, "numTracks", pls.SongCount) + log.Debug("Imported playlist", "name", pls.Name, "lastUpdated", pls.UpdatedAt, "path", pls.Path, "numTracks", len(pls.Tracks), "elapsed", time.Since(started)) } s.cacheWarmer.PreCache(pls.CoverArtID()) count++ diff --git a/scanner/playlist_importer_test.go b/scanner/playlist_importer_test.go index 3e4836f52..d33e5250d 100644 --- a/scanner/playlist_importer_test.go +++ b/scanner/playlist_importer_test.go @@ -77,6 +77,15 @@ func (r *mockedMediaFile) FindByPath(s string) (*model.MediaFile, error) { }, nil } +func (r *mockedMediaFile) FindByPaths(paths []string) (model.MediaFiles, error) { + var mfs model.MediaFiles + for _, path := range paths { + mf, _ := r.FindByPath(path) + mfs = append(mfs, *mf) + } + return mfs, nil +} + type mockedPlaylist struct { model.PlaylistRepository } diff --git a/tests/fixtures/empty.txt b/tests/fixtures/empty.txt new file mode 100644 index 000000000..e69de29bb diff --git a/utils/slice/slice.go b/utils/slice/slice.go index 78169685f..4ba55edf5 100644 --- a/utils/slice/slice.go +++ b/utils/slice/slice.go @@ -1,5 +1,12 @@ package slice +import ( + "bufio" + "bytes" + "io" + "iter" +) + func Map[T any, R any](t []T, mapFunc func(T) R) []R { r := make([]R, len(t)) for i, e := range t { @@ -79,3 +86,57 @@ func RangeByChunks[T any](items []T, chunkSize int, cb func([]T) error) error { } return nil } + +func LinesFrom(reader io.Reader) iter.Seq[string] { + return func(yield func(string) bool) { + scanner := bufio.NewScanner(reader) + scanner.Split(scanLines) + for scanner.Scan() { + if !yield(scanner.Text()) { + return + } + } + } +} + +// From https://stackoverflow.com/a/41433698 +func scanLines(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexAny(data, "\r\n"); i >= 0 { + if data[i] == '\n' { + // We have a line terminated by single newline. + return i + 1, data[0:i], nil + } + advance = i + 1 + if len(data) > i+1 && data[i+1] == '\n' { + advance += 1 + } + return advance, data[0:i], nil + } + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), data, nil + } + // Request more data. + return 0, nil, nil +} + +func CollectChunks[T any](n int, it iter.Seq[T]) iter.Seq[[]T] { + return func(yield func([]T) bool) { + var s []T + for x := range it { + s = append(s, x) + if len(s) >= n { + if !yield(s) { + return + } + s = nil + } + } + if len(s) > 0 { + yield(s) + } + } +} diff --git a/utils/slice/slice_test.go b/utils/slice/slice_test.go index 9f6e4d803..ccb937f46 100644 --- a/utils/slice/slice_test.go +++ b/utils/slice/slice_test.go @@ -1,15 +1,19 @@ package slice_test import ( + "os" + "slices" "strconv" "testing" + "github.com/navidrome/navidrome/tests" "github.com/navidrome/navidrome/utils/slice" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) func TestSlice(t *testing.T) { + tests.Init(t, false) RegisterFailHandler(Fail) RunSpecs(t, "Slice Suite") } @@ -90,4 +94,32 @@ var _ = Describe("Slice Utils", func() { Expect(chunks[1]).To(HaveExactElements("d", "e")) }) }) + + DescribeTable("LinesFrom", + func(path string, expected int) { + count := 0 + file, _ := os.Open(path) + defer file.Close() + for _ = range slice.LinesFrom(file) { + count++ + } + Expect(count).To(Equal(expected)) + }, + Entry("returns empty slice for an empty input", "tests/fixtures/empty.txt", 0), + Entry("returns the lines of a file", "tests/fixtures/playlists/pls1.m3u", 3), + Entry("returns empty if file does not exist", "tests/fixtures/NON-EXISTENT", 0), + ) + + DescribeTable("CollectChunks", + func(input []int, n int, expected [][]int) { + result := [][]int{} + for chunks := range slice.CollectChunks[int](n, slices.Values(input)) { + result = append(result, chunks) + } + Expect(result).To(Equal(expected)) + }, + Entry("returns empty slice for an empty input", []int{}, 1, [][]int{}), + Entry("returns the slice in one chunk if len < chunkSize", []int{1, 2, 3}, 10, [][]int{{1, 2, 3}}), + Entry("breaks up the slice if len > chunkSize", []int{1, 2, 3, 4, 5}, 3, [][]int{{1, 2, 3}, {4, 5}}), + ) })