fix(scanner): improve M3U playlist import times (#2706)

This commit is contained in:
Deluan 2024-09-18 19:18:07 -04:00
parent ee2e04b832
commit 46be041e7b
9 changed files with 167 additions and 51 deletions

View file

@ -1,8 +1,6 @@
package core
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
@ -20,6 +18,7 @@ import (
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/model/criteria"
"github.com/navidrome/navidrome/model/request"
"github.com/navidrome/navidrome/utils/slice"
)
type Playlists interface {
@ -133,34 +132,39 @@ func (s *playlists) parseNSP(ctx context.Context, pls *model.Playlist, file io.R
func (s *playlists) parseM3U(ctx context.Context, pls *model.Playlist, baseDir string, reader io.Reader) (*model.Playlist, error) {
mediaFileRepository := s.ds.MediaFile(ctx)
scanner := bufio.NewScanner(reader)
scanner.Split(scanLines)
var mfs model.MediaFiles
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(line, "#PLAYLIST:") {
if split := strings.Split(line, ":"); len(split) >= 2 {
pls.Name = split[1]
for lines := range slice.CollectChunks[string](400, slice.LinesFrom(reader)) {
var filteredLines []string
for _, line := range lines {
line := strings.TrimSpace(line)
if strings.HasPrefix(line, "#PLAYLIST:") {
if split := strings.Split(line, ":"); len(split) >= 2 {
pls.Name = split[1]
}
continue
}
continue
// Skip empty lines and extended info
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if strings.HasPrefix(line, "file://") {
line = strings.TrimPrefix(line, "file://")
line, _ = url.QueryUnescape(line)
}
if baseDir != "" && !filepath.IsAbs(line) {
line = filepath.Join(baseDir, line)
}
filteredLines = append(filteredLines, line)
}
// Skip empty lines and extended info
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if strings.HasPrefix(line, "file://") {
line = strings.TrimPrefix(line, "file://")
line, _ = url.QueryUnescape(line)
}
if baseDir != "" && !filepath.IsAbs(line) {
line = filepath.Join(baseDir, line)
}
mf, err := mediaFileRepository.FindByPath(line)
found, err := mediaFileRepository.FindByPaths(filteredLines)
if err != nil {
log.Warn(ctx, "Path in playlist not found", "playlist", pls.Name, "path", line, err)
log.Warn(ctx, "Error reading files from DB", "playlist", pls.Name, err)
continue
}
mfs = append(mfs, *mf)
if len(found) != len(filteredLines) {
logMissingFiles(ctx, pls, filteredLines, found)
}
mfs = append(mfs, found...)
}
if pls.Name == "" {
pls.Name = time.Now().Format(time.RFC3339)
@ -168,7 +172,20 @@ func (s *playlists) parseM3U(ctx context.Context, pls *model.Playlist, baseDir s
pls.Tracks = nil
pls.AddMediaFiles(mfs)
return pls, scanner.Err()
return pls, nil
}
func logMissingFiles(ctx context.Context, pls *model.Playlist, lines []string, found model.MediaFiles) {
missing := make(map[string]bool)
for _, line := range lines {
missing[line] = true
}
for _, mf := range found {
delete(missing, mf.Path)
}
for path := range missing {
log.Warn(ctx, "Path in playlist not found", "playlist", pls.Name, "path", path)
}
}
func (s *playlists) updatePlaylist(ctx context.Context, newPls *model.Playlist) error {
@ -199,30 +216,6 @@ func (s *playlists) updatePlaylist(ctx context.Context, newPls *model.Playlist)
return s.ds.Playlist(ctx).Put(newPls)
}
// From https://stackoverflow.com/a/41433698
func scanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexAny(data, "\r\n"); i >= 0 {
if data[i] == '\n' {
// We have a line terminated by single newline.
return i + 1, data[0:i], nil
}
advance = i + 1
if len(data) > i+1 && data[i+1] == '\n' {
advance += 1
}
return advance, data[0:i], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}
func (s *playlists) Update(ctx context.Context, playlistID string,
name *string, comment *string, public *bool,
idsToAdd []string, idxToRemove []int) error {

View file

@ -118,6 +118,15 @@ func (r *mockedMediaFile) FindByPath(s string) (*model.MediaFile, error) {
}, nil
}
func (r *mockedMediaFile) FindByPaths(paths []string) (model.MediaFiles, error) {
var mfs model.MediaFiles
for _, path := range paths {
mf, _ := r.FindByPath(path)
mfs = append(mfs, *mf)
}
return mfs, nil
}
type mockedPlaylist struct {
last *model.Playlist
model.PlaylistRepository

View file

@ -265,6 +265,7 @@ type MediaFileRepository interface {
// Queries by path to support the scanner, no Annotations or Bookmarks required in the response
FindAllByPath(path string) (MediaFiles, error)
FindByPath(path string) (*MediaFile, error)
FindByPaths(paths []string) (MediaFiles, error)
FindPathsRecursively(basePath string) ([]string, error)
DeleteByPath(path string) (int64, error)

View file

@ -127,6 +127,15 @@ func (r *mediaFileRepository) FindByPath(path string) (*model.MediaFile, error)
return &res[0], nil
}
func (r *mediaFileRepository) FindByPaths(paths []string) (model.MediaFiles, error) {
sel := r.newSelect().Columns("*").Where(Eq{"path collate nocase": paths})
var res model.MediaFiles
if err := r.queryAll(sel, &res); err != nil {
return nil, err
}
return res, nil
}
func cleanPath(path string) string {
path = filepath.Clean(path)
if !strings.HasSuffix(path, string(os.PathSeparator)) {

View file

@ -5,6 +5,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
"github.com/mattn/go-zglob"
"github.com/navidrome/navidrome/conf"
@ -36,6 +37,7 @@ func (s *playlistImporter) processPlaylists(ctx context.Context, dir string) int
return count
}
for _, f := range files {
started := time.Now()
if strings.HasPrefix(f.Name(), ".") {
continue
}
@ -47,9 +49,9 @@ func (s *playlistImporter) processPlaylists(ctx context.Context, dir string) int
continue
}
if pls.IsSmartPlaylist() {
log.Debug("Imported smart playlist", "name", pls.Name, "lastUpdated", pls.UpdatedAt, "path", pls.Path, "numTracks", pls.SongCount)
log.Debug("Imported smart playlist", "name", pls.Name, "lastUpdated", pls.UpdatedAt, "path", pls.Path, "elapsed", time.Since(started))
} else {
log.Debug("Imported playlist", "name", pls.Name, "lastUpdated", pls.UpdatedAt, "path", pls.Path, "numTracks", pls.SongCount)
log.Debug("Imported playlist", "name", pls.Name, "lastUpdated", pls.UpdatedAt, "path", pls.Path, "numTracks", len(pls.Tracks), "elapsed", time.Since(started))
}
s.cacheWarmer.PreCache(pls.CoverArtID())
count++

View file

@ -77,6 +77,15 @@ func (r *mockedMediaFile) FindByPath(s string) (*model.MediaFile, error) {
}, nil
}
func (r *mockedMediaFile) FindByPaths(paths []string) (model.MediaFiles, error) {
var mfs model.MediaFiles
for _, path := range paths {
mf, _ := r.FindByPath(path)
mfs = append(mfs, *mf)
}
return mfs, nil
}
type mockedPlaylist struct {
model.PlaylistRepository
}

0
tests/fixtures/empty.txt vendored Normal file
View file

View file

@ -1,5 +1,12 @@
package slice
import (
"bufio"
"bytes"
"io"
"iter"
)
func Map[T any, R any](t []T, mapFunc func(T) R) []R {
r := make([]R, len(t))
for i, e := range t {
@ -79,3 +86,57 @@ func RangeByChunks[T any](items []T, chunkSize int, cb func([]T) error) error {
}
return nil
}
func LinesFrom(reader io.Reader) iter.Seq[string] {
return func(yield func(string) bool) {
scanner := bufio.NewScanner(reader)
scanner.Split(scanLines)
for scanner.Scan() {
if !yield(scanner.Text()) {
return
}
}
}
}
// From https://stackoverflow.com/a/41433698
func scanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexAny(data, "\r\n"); i >= 0 {
if data[i] == '\n' {
// We have a line terminated by single newline.
return i + 1, data[0:i], nil
}
advance = i + 1
if len(data) > i+1 && data[i+1] == '\n' {
advance += 1
}
return advance, data[0:i], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}
func CollectChunks[T any](n int, it iter.Seq[T]) iter.Seq[[]T] {
return func(yield func([]T) bool) {
var s []T
for x := range it {
s = append(s, x)
if len(s) >= n {
if !yield(s) {
return
}
s = nil
}
}
if len(s) > 0 {
yield(s)
}
}
}

View file

@ -1,15 +1,19 @@
package slice_test
import (
"os"
"slices"
"strconv"
"testing"
"github.com/navidrome/navidrome/tests"
"github.com/navidrome/navidrome/utils/slice"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestSlice(t *testing.T) {
tests.Init(t, false)
RegisterFailHandler(Fail)
RunSpecs(t, "Slice Suite")
}
@ -90,4 +94,32 @@ var _ = Describe("Slice Utils", func() {
Expect(chunks[1]).To(HaveExactElements("d", "e"))
})
})
DescribeTable("LinesFrom",
func(path string, expected int) {
count := 0
file, _ := os.Open(path)
defer file.Close()
for _ = range slice.LinesFrom(file) {
count++
}
Expect(count).To(Equal(expected))
},
Entry("returns empty slice for an empty input", "tests/fixtures/empty.txt", 0),
Entry("returns the lines of a file", "tests/fixtures/playlists/pls1.m3u", 3),
Entry("returns empty if file does not exist", "tests/fixtures/NON-EXISTENT", 0),
)
DescribeTable("CollectChunks",
func(input []int, n int, expected [][]int) {
result := [][]int{}
for chunks := range slice.CollectChunks[int](n, slices.Values(input)) {
result = append(result, chunks)
}
Expect(result).To(Equal(expected))
},
Entry("returns empty slice for an empty input", []int{}, 1, [][]int{}),
Entry("returns the slice in one chunk if len < chunkSize", []int{1, 2, 3}, 10, [][]int{{1, 2, 3}}),
Entry("breaks up the slice if len > chunkSize", []int{1, 2, 3, 4, 5}, 3, [][]int{{1, 2, 3}, {4, 5}}),
)
})