navidrome/scanner/phase_1_folders.go
Deluan Quintão c795bcfcf7
feat(bfr): Big Refactor: new scanner, lots of new fields and tags, improvements and DB schema changes (#2709)
* fix(server): more race conditions when updating artist/album from external sources

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scanner): add .gitignore syntax to .ndignore. Resolves #1394

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(ui): null

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): pass configfile option to child process

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): resume interrupted fullScans

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): remove old scanner code

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): rename old metadata package

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): move old metadata package

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: tests

Signed-off-by: Deluan <deluan@navidrome.org>

* chore(deps): update Go to 1.23.4

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: logs

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(test):

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: log level

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: remove log message

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: add config for scanner watcher

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: children playlists

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: replace `interface{}` with `any`

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: smart playlists with genres

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: allow any tags in smart playlists

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: artist names in playlists

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: smart playlist's sort by tags

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add moods to child

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add moods to AlbumID3

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(subsonic): use generic JSONArray for OS arrays

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(subsonic): use https in test

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add releaseTypes to AlbumID3

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add recordLabels to AlbumID3

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(subsonic): rename JSONArray to Array

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add artists to AlbumID3

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add artists to Child

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): do not pre-populate smart playlists

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): implement a simplified version of ArtistID3.

See https://github.com/opensubsonic/open-subsonic-api/discussions/120

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add artists to album child

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add contributors to mediafile Child

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add albumArtists to mediafile Child

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add displayArtist and displayAlbumArtist

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add displayComposer to Child

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add roles to ArtistID3

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(subsonic): use " • " separator for displayComposer

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor:

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(subsonic):

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(subsonic): respect `PreferSortTags` config option

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(subsonic):

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: optimize purging non-unused tags

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: don't run 'refresh artist stats' concurrently with other transactions

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor:

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: log message

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: add Scanner.ScanOnStartup config option, default true

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: better json parsing error msg when importing NSPs

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: don't update album's imported_time when updating external_metadata

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: handle interrupted scans and full scans after migrations

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: run `analyze` when migration requires a full rescan

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: run `PRAGMA optimize` at the end of the scan

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: don't update artist's updated_at when updating external_metadata

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: handle multiple artists and roles in smart playlists

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): dim missing tracks

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: album missing logic

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: error encoding in gob

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: separate warnings from errors

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: mark albums as missing if they were contained in a deleted folder

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: add participant names to media_file and album tables

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: use participations in criteria, instead of m2m relationship

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: rename participations to participants

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add moods to album child

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: albumartist role case

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scanner): run scanner as an external process by default

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(ui): show albumArtist names

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(ui): dim out missing albums

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: flaky test

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(server): scrobble buffer mapping. fix #3583

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: more participations renaming

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: listenbrainz scrobbling

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: send release_group_mbid to listenbrainz

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): implement OpenSubsonic explicitStatus field (#3597)

* feat: implement OpenSubsonic explicitStatus field

* fix(subsonic): fix failing snapshot tests

* refactor: create helper for setting explicitStatus

* fix: store smaller values for explicit-status on database

* test: ToAlbum explicitStatus

* refactor: rename explicitStatus helper function

---------

Co-authored-by: Deluan Quintão <deluan@navidrome.org>

* fix: handle album and track tags in the DB based on the mappings.yaml file

Signed-off-by: Deluan <deluan@navidrome.org>

* save similar artists as JSONB

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: getAlbumList byGenre

Signed-off-by: Deluan <deluan@navidrome.org>

* detect changes in PID configuration

Signed-off-by: Deluan <deluan@navidrome.org>

* set default album PID to legacy_pid

Signed-off-by: Deluan <deluan@navidrome.org>

* fix tests

Signed-off-by: Deluan <deluan@navidrome.org>

* fix SIGSEGV

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: don't lose album stars/ratings when migrating

Signed-off-by: Deluan <deluan@navidrome.org>

* store full PID conf in properties

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: keep album annotations when changing PID.Album config

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: reassign album annotations

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: use (display) albumArtist and add links to each artist

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: not showing albums by albumartist

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: error msgs

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: hide PID from Native API

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: album cover art resolution

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: trim participant names

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: reduce watcher log spam

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: panic when initializing the watcher

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: various artists

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: don't store empty lyrics in the DB

Signed-off-by: Deluan <deluan@navidrome.org>

* remove unused methods

Signed-off-by: Deluan <deluan@navidrome.org>

* drop full_text indexes, as they are not being used by SQLite

Signed-off-by: Deluan <deluan@navidrome.org>

* keep album created_at when upgrading

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(ui): null pointer

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: album artwork cache

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: don't expose missing files in Subsonic API

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: searchable interface

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: filter out missing items from subsonic search

* fix: filter out missing items from playlists

* fix: filter out missing items from shares

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add filter by artist role

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): only return albumartists in getIndexes and getArtists endpoints

Signed-off-by: Deluan <deluan@navidrome.org>

* sort roles alphabetically

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: artist playcounts

Signed-off-by: Deluan <deluan@navidrome.org>

* change default Album PID conf

Signed-off-by: Deluan <deluan@navidrome.org>

* fix albumartist link when it does not match any albumartists values

Signed-off-by: Deluan <deluan@navidrome.org>

* fix `Ignoring filter not whitelisted` (role) message

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: trim any names/titles being imported

Signed-off-by: Deluan <deluan@navidrome.org>

* remove unused genre code

Signed-off-by: Deluan <deluan@navidrome.org>

* serialize calls to Last.fm's getArtist

Signed-off-by: Deluan <deluan@navidrome.org>

xxx

Signed-off-by: Deluan <deluan@navidrome.org>

* add counters to genres

Signed-off-by: Deluan <deluan@navidrome.org>

* nit: fix migration `notice` message

Signed-off-by: Deluan <deluan@navidrome.org>

* optimize similar artists query

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: last.fm.getInfo when mbid does not exist

Signed-off-by: Deluan <deluan@navidrome.org>

* ui only show missing items for admins

Signed-off-by: Deluan <deluan@navidrome.org>

* don't allow interaction with missing items

Signed-off-by: Deluan <deluan@navidrome.org>

* Add Missing Files view (WIP)

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: merged tag_counts into tag table

Signed-off-by: Deluan <deluan@navidrome.org>

* add option to completely disable automatic scanner

Signed-off-by: Deluan <deluan@navidrome.org>

* add delete missing files functionality

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: playlists not showing for regular users

Signed-off-by: Deluan <deluan@navidrome.org>

* reduce updateLastAccess frequency to once every minute

Signed-off-by: Deluan <deluan@navidrome.org>

* reduce update player frequency to once every minute

Signed-off-by: Deluan <deluan@navidrome.org>

* add timeout when updating player

Signed-off-by: Deluan <deluan@navidrome.org>

* remove dead code

Signed-off-by: Deluan <deluan@navidrome.org>

* fix duplicated roles in stats

Signed-off-by: Deluan <deluan@navidrome.org>

* add `; ` to artist splitters

Signed-off-by: Deluan <deluan@navidrome.org>

* fix stats query

Signed-off-by: Deluan <deluan@navidrome.org>

* more logs

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: support legacy clients (DSub) by removing OpenSubsonic extra fields - WIP

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: support legacy clients (DSub) by removing OpenSubsonic extra fields - WIP

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: support legacy clients (DSub) by removing OpenSubsonic extra fields - WIP

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: support legacy clients (DSub) by removing OpenSubsonic extra fields - WIP

Signed-off-by: Deluan <deluan@navidrome.org>

* add record label filter

Signed-off-by: Deluan <deluan@navidrome.org>

* add release type filter

Signed-off-by: Deluan <deluan@navidrome.org>

* fix purgeUnused tags

Signed-off-by: Deluan <deluan@navidrome.org>

* add grouping filter to albums

Signed-off-by: Deluan <deluan@navidrome.org>

* allow any album tags to be used in as filters in the API

Signed-off-by: Deluan <deluan@navidrome.org>

* remove empty tags from album info

Signed-off-by: Deluan <deluan@navidrome.org>

* comments in the migration

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: Cannot read properties of undefined

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: listenbrainz scrobbling (#3640)

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: remove duplicated tag values

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: don't ignore the taglib folder!

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: show track subtitle tag

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: show artists stats based on selected role

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: inspect

Signed-off-by: Deluan <deluan@navidrome.org>

* add media type to album info/filters

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: change format of subtitle in the UI

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: subtitle in Subsonic API and search

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: subtitle in UI's player

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: split strings should be case-insensitive

Signed-off-by: Deluan <deluan@navidrome.org>

* disable ScanSchedule

Signed-off-by: Deluan <deluan@navidrome.org>

* increase default sessiontimeout

Signed-off-by: Deluan <deluan@navidrome.org>

* add sqlite command line tool to docker image

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: resources override

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: album PID conf

Signed-off-by: Deluan <deluan@navidrome.org>

* change migration to mark current artists as albumArtists

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): Allow filtering on multiple genres (#3679)

* feat(ui): Allow filtering on multiple genres

Signed-off-by: Henrik Nordvik <henrikno@gmail.com>
Signed-off-by: Deluan <deluan@navidrome.org>

* add multi-genre filter in Album list

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Henrik Nordvik <henrikno@gmail.com>
Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Henrik Nordvik <henrikno@gmail.com>

* add more multi-valued tag filters to Album and Song views

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(ui): unselect missing files after removing

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(ui): song filter

Signed-off-by: Deluan <deluan@navidrome.org>

* fix sharing tracks. fix #3687

Signed-off-by: Deluan <deluan@navidrome.org>

* use rowids when using search for sync (ex: Symfonium)

Signed-off-by: Deluan <deluan@navidrome.org>

* fix "Report Real Paths" option for subsonic clients

Signed-off-by: Deluan <deluan@navidrome.org>

* fix "Report Real Paths" option for subsonic clients for search

Signed-off-by: Deluan <deluan@navidrome.org>

* add libraryPath to Native API /songs endpoint

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): add album version

Signed-off-by: Deluan <deluan@navidrome.org>

* made all tags lowercase as they are case-insensitive anyways.

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): Show full paths, extended properties for album/song (#3691)

* feat(ui): Show full paths, extended properties for album/song

- uses library path + os separator + path
- show participants (album/song) and tags (song)
- make album/participant clickable in show info

* add source to path

* fix pathSeparator in UI

Signed-off-by: Deluan <deluan@navidrome.org>

* fix local artist artwork (#3695)

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: parse vorbis performers

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: clean function into smaller functions

Signed-off-by: Deluan <deluan@navidrome.org>

* fix translations for en and pt

Signed-off-by: Deluan <deluan@navidrome.org>

* add trace log to show annotations reassignment

Signed-off-by: Deluan <deluan@navidrome.org>

* add trace log to show annotations reassignment

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: allow performers without instrument/subrole

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: metadata clean function again

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: optimize split function

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: split function is now a method of TagConf

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: humanize Artist total size

Signed-off-by: Deluan <deluan@navidrome.org>

* add album version to album details

Signed-off-by: Deluan <deluan@navidrome.org>

* don't display album-level tags in SongInfo

Signed-off-by: Deluan <deluan@navidrome.org>

* fix genre clicking in Album Page

Signed-off-by: Deluan <deluan@navidrome.org>

* don't use mbids in Last.fm api calls.

From 1337574018:

With MBID:
```
GET https://ws.audioscrobbler.com/2.0/?api_key=XXXX&artist=Van+Morrison&format=json&lang=en&mbid=a41ac10f-0a56-4672-9161-b83f9b223559&method=artist.getInfo

{
artist: {
name: "Bee Gees",
mbid: "bf0f7e29-dfe1-416c-b5c6-f9ebc19ea810",
url: "https://www.last.fm/music/Bee+Gees",
}
```

Without MBID:
```
GET https://ws.audioscrobbler.com/2.0/?api_key=XXXX&artist=Van+Morrison&format=json&lang=en&method=artist.getInfo

{
artist: {
name: "Van Morrison",
mbid: "a41ac10f-0a56-4672-9161-b83f9b223559",
url: "https://www.last.fm/music/Van+Morrison",
}
```

Signed-off-by: Deluan <deluan@navidrome.org>

* better logging for when the artist folder is not found

Signed-off-by: Deluan <deluan@navidrome.org>

* fix various issues with artist image resolution

Signed-off-by: Deluan <deluan@navidrome.org>

* hide "Additional Tags" header if there are none.

Signed-off-by: Deluan <deluan@navidrome.org>

* simplify tag rendering

Signed-off-by: Deluan <deluan@navidrome.org>

* enhance logging for artist folder detection

Signed-off-by: Deluan <deluan@navidrome.org>

* make folderID consistent for relative and absolute folderPaths

Signed-off-by: Deluan <deluan@navidrome.org>

* handle more folder paths scenarios

Signed-off-by: Deluan <deluan@navidrome.org>

* filter out other roles when SubsonicArtistParticipations = true

Signed-off-by: Deluan <deluan@navidrome.org>

* fix "Cannot read properties of undefined"

Signed-off-by: Deluan <deluan@navidrome.org>

* fix lyrics and comments being truncated (#3701)

* fix lyrics and comments being truncated

* specifically test for lyrics and comment length

* reorder assertions

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Deluan <deluan@navidrome.org>

* fix(server): Expose library_path for playlist (#3705)

Allows showing absolute path for UI, and makes "report real path" work for playlists (Subsonic)

* fix BFR on Windows (#3704)

* fix potential reflected cross-site scripting vulnerability

Signed-off-by: Deluan <deluan@navidrome.org>

* hack to make it work on Windows

* ignore windows executables

* try fixing the pipeline

Signed-off-by: Deluan <deluan@navidrome.org>

* allow MusicFolder in other drives

* move windows local drive logic to local storage implementation

---------

Signed-off-by: Deluan <deluan@navidrome.org>

* increase pagination sizes for missing files

Signed-off-by: Deluan <deluan@navidrome.org>

* reduce level of "already scanning" watcher log message

Signed-off-by: Deluan <deluan@navidrome.org>

* only count folders with audio files in it

See https://github.com/navidrome/navidrome/discussions/3676#discussioncomment-11990930

Signed-off-by: Deluan <deluan@navidrome.org>

* add album version and catalog number to search

Signed-off-by: Deluan <deluan@navidrome.org>

* add `organization` alias for `recordlabel`

Signed-off-by: Deluan <deluan@navidrome.org>

* remove mbid from Last.fm agent

Signed-off-by: Deluan <deluan@navidrome.org>

* feat: support inspect in ui (#3726)

* inspect in ui

* address round 1

* add catalogNum to AlbumInfo

Signed-off-by: Deluan <deluan@navidrome.org>

* remove dependency on metadata_old (deprecated) package

Signed-off-by: Deluan <deluan@navidrome.org>

* add `RawTags` to model

Signed-off-by: Deluan <deluan@navidrome.org>

* support parsing MBIDs for roles (from the https://github.com/kgarner7/picard-all-mbids plugin) (#3698)


* parse standard roles, vorbis/m4a work for now

* fix djmixer

* working roles, use DJ-mix

* add performers to file

* map mbids

* add a few more tests

* add test

Signed-off-by: Deluan <deluan@navidrome.org>

* try to simplify the performers logic

Signed-off-by: Deluan <deluan@navidrome.org>

* stylistic changes

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Deluan <deluan@navidrome.org>

* remove param mutation

Signed-off-by: Deluan <deluan@navidrome.org>

* run automated SQLite optimizations

Signed-off-by: Deluan <deluan@navidrome.org>

* fix playlists import/export on Windows

* fix import playlists

* fix export playlists

* better handling of Windows volumes

Signed-off-by: Deluan <deluan@navidrome.org>

* handle more album ID reassignments

Signed-off-by: Deluan <deluan@navidrome.org>

* allow adding/overriding tags in the config file

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(ui): Fix playlist track id, handle missing tracks better (#3734)

- Use `mediaFileId` instead of `id` for playlist tracks
- Only fetch if the file is not missing
- If extractor fails to get the file, also error (rather than panic)

* optimize DB after each scan.

Signed-off-by: Deluan <deluan@navidrome.org>

* remove sortable from AlbumSongs columns

Signed-off-by: Deluan <deluan@navidrome.org>

* simplify query to get missing tracks

Signed-off-by: Deluan <deluan@navidrome.org>

* mark Scanner.Extractor as deprecated

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Signed-off-by: Henrik Nordvik <henrikno@gmail.com>
Co-authored-by: Caio Cotts <caio@cotts.com.br>
Co-authored-by: Henrik Nordvik <henrikno@gmail.com>
Co-authored-by: Kendall Garner <17521368+kgarner7@users.noreply.github.com>
2025-02-19 20:35:17 -05:00

471 lines
17 KiB
Go

package scanner
import (
"cmp"
"context"
"errors"
"fmt"
"maps"
"path"
"slices"
"sync"
"sync/atomic"
"time"
"github.com/Masterminds/squirrel"
ppl "github.com/google/go-pipeline/pkg/pipeline"
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/consts"
"github.com/navidrome/navidrome/core/artwork"
"github.com/navidrome/navidrome/core/storage"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/model/metadata"
"github.com/navidrome/navidrome/utils"
"github.com/navidrome/navidrome/utils/pl"
"github.com/navidrome/navidrome/utils/slice"
)
func createPhaseFolders(ctx context.Context, state *scanState, ds model.DataStore, cw artwork.CacheWarmer, libs []model.Library) *phaseFolders {
var jobs []*scanJob
for _, lib := range libs {
if lib.LastScanStartedAt.IsZero() {
err := ds.Library(ctx).ScanBegin(lib.ID, state.fullScan)
if err != nil {
log.Error(ctx, "Scanner: Error updating last scan started at", "lib", lib.Name, err)
state.sendWarning(err.Error())
continue
}
// Reload library to get updated state
l, err := ds.Library(ctx).Get(lib.ID)
if err != nil {
log.Error(ctx, "Scanner: Error reloading library", "lib", lib.Name, err)
state.sendWarning(err.Error())
continue
}
lib = *l
} else {
log.Debug(ctx, "Scanner: Resuming previous scan", "lib", lib.Name, "lastScanStartedAt", lib.LastScanStartedAt, "fullScan", lib.FullScanInProgress)
}
job, err := newScanJob(ctx, ds, cw, lib, state.fullScan)
if err != nil {
log.Error(ctx, "Scanner: Error creating scan context", "lib", lib.Name, err)
state.sendWarning(err.Error())
continue
}
jobs = append(jobs, job)
}
return &phaseFolders{jobs: jobs, ctx: ctx, ds: ds, state: state}
}
type scanJob struct {
lib model.Library
fs storage.MusicFS
cw artwork.CacheWarmer
lastUpdates map[string]time.Time
lock sync.Mutex
numFolders atomic.Int64
}
func newScanJob(ctx context.Context, ds model.DataStore, cw artwork.CacheWarmer, lib model.Library, fullScan bool) (*scanJob, error) {
lastUpdates, err := ds.Folder(ctx).GetLastUpdates(lib)
if err != nil {
return nil, fmt.Errorf("getting last updates: %w", err)
}
fileStore, err := storage.For(lib.Path)
if err != nil {
log.Error(ctx, "Error getting storage for library", "library", lib.Name, "path", lib.Path, err)
return nil, fmt.Errorf("getting storage for library: %w", err)
}
fsys, err := fileStore.FS()
if err != nil {
log.Error(ctx, "Error getting fs for library", "library", lib.Name, "path", lib.Path, err)
return nil, fmt.Errorf("getting fs for library: %w", err)
}
lib.FullScanInProgress = lib.FullScanInProgress || fullScan
return &scanJob{
lib: lib,
fs: fsys,
cw: cw,
lastUpdates: lastUpdates,
}, nil
}
func (j *scanJob) popLastUpdate(folderID string) time.Time {
j.lock.Lock()
defer j.lock.Unlock()
lastUpdate := j.lastUpdates[folderID]
delete(j.lastUpdates, folderID)
return lastUpdate
}
// phaseFolders represents the first phase of the scanning process, which is responsible
// for scanning all libraries and importing new or updated files. This phase involves
// traversing the directory tree of each library, identifying new or modified media files,
// and updating the database with the relevant information.
//
// The phaseFolders struct holds the context, data store, and jobs required for the scanning
// process. Each job represents a library being scanned, and contains information about the
// library, file system, and the last updates of the folders.
//
// The phaseFolders struct implements the phase interface, providing methods to produce
// folder entries, process folders, persist changes to the database, and log the results.
type phaseFolders struct {
jobs []*scanJob
ds model.DataStore
ctx context.Context
state *scanState
prevAlbumPIDConf string
}
func (p *phaseFolders) description() string {
return "Scan all libraries and import new/updated files"
}
func (p *phaseFolders) producer() ppl.Producer[*folderEntry] {
return ppl.NewProducer(func(put func(entry *folderEntry)) error {
var err error
p.prevAlbumPIDConf, err = p.ds.Property(p.ctx).DefaultGet(consts.PIDAlbumKey, "")
if err != nil {
return fmt.Errorf("getting album PID conf: %w", err)
}
// TODO Parallelize multiple job when we have multiple libraries
var total int64
var totalChanged int64
for _, job := range p.jobs {
if utils.IsCtxDone(p.ctx) {
break
}
outputChan, err := walkDirTree(p.ctx, job)
if err != nil {
log.Warn(p.ctx, "Scanner: Error scanning library", "lib", job.lib.Name, err)
}
for folder := range pl.ReadOrDone(p.ctx, outputChan) {
job.numFolders.Add(1)
p.state.sendProgress(&ProgressInfo{
LibID: job.lib.ID,
FileCount: uint32(len(folder.audioFiles)),
Path: folder.path,
Phase: "1",
})
if folder.isOutdated() {
if !p.state.fullScan {
if folder.hasNoFiles() && folder.isNew() {
log.Trace(p.ctx, "Scanner: Skipping new folder with no files", "folder", folder.path, "lib", job.lib.Name)
continue
}
log.Trace(p.ctx, "Scanner: Detected changes in folder", "folder", folder.path, "lastUpdate", folder.modTime, "lib", job.lib.Name)
}
totalChanged++
folder.elapsed.Stop()
put(folder)
}
}
total += job.numFolders.Load()
}
log.Debug(p.ctx, "Scanner: Finished loading all folders", "numFolders", total, "numChanged", totalChanged)
return nil
}, ppl.Name("traverse filesystem"))
}
func (p *phaseFolders) measure(entry *folderEntry) func() time.Duration {
entry.elapsed.Start()
return func() time.Duration { return entry.elapsed.Stop() }
}
func (p *phaseFolders) stages() []ppl.Stage[*folderEntry] {
return []ppl.Stage[*folderEntry]{
ppl.NewStage(p.processFolder, ppl.Name("process folder"), ppl.Concurrency(conf.Server.DevScannerThreads)),
ppl.NewStage(p.persistChanges, ppl.Name("persist changes")),
ppl.NewStage(p.logFolder, ppl.Name("log results")),
}
}
func (p *phaseFolders) processFolder(entry *folderEntry) (*folderEntry, error) {
defer p.measure(entry)()
// Load children mediafiles from DB
cursor, err := p.ds.MediaFile(p.ctx).GetCursor(model.QueryOptions{
Filters: squirrel.And{squirrel.Eq{"folder_id": entry.id}},
})
if err != nil {
log.Error(p.ctx, "Scanner: Error loading mediafiles from DB", "folder", entry.path, err)
return entry, err
}
dbTracks := make(map[string]*model.MediaFile)
for mf, err := range cursor {
if err != nil {
log.Error(p.ctx, "Scanner: Error loading mediafiles from DB", "folder", entry.path, err)
return entry, err
}
dbTracks[mf.Path] = &mf
}
// Get list of files to import, based on modtime (or all if fullScan),
// leave in dbTracks only tracks that are missing (not found in the FS)
filesToImport := make(map[string]*model.MediaFile, len(entry.audioFiles))
for afPath, af := range entry.audioFiles {
fullPath := path.Join(entry.path, afPath)
dbTrack, foundInDB := dbTracks[fullPath]
if !foundInDB || p.state.fullScan {
filesToImport[fullPath] = dbTrack
} else {
info, err := af.Info()
if err != nil {
log.Warn(p.ctx, "Scanner: Error getting file info", "folder", entry.path, "file", af.Name(), err)
p.state.sendWarning(fmt.Sprintf("Error getting file info for %s/%s: %v", entry.path, af.Name(), err))
return entry, nil
}
if info.ModTime().After(dbTrack.UpdatedAt) || dbTrack.Missing {
filesToImport[fullPath] = dbTrack
}
}
delete(dbTracks, fullPath)
}
// Remaining dbTracks are tracks that were not found in the FS, so they should be marked as missing
entry.missingTracks = slices.Collect(maps.Values(dbTracks))
// Load metadata from files that need to be imported
if len(filesToImport) > 0 {
err = p.loadTagsFromFiles(entry, filesToImport)
if err != nil {
log.Warn(p.ctx, "Scanner: Error loading tags from files. Skipping", "folder", entry.path, err)
p.state.sendWarning(fmt.Sprintf("Error loading tags from files in %s: %v", entry.path, err))
return entry, nil
}
p.createAlbumsFromMediaFiles(entry)
p.createArtistsFromMediaFiles(entry)
}
return entry, nil
}
const filesBatchSize = 200
// loadTagsFromFiles reads metadata from the files in the given list and populates
// the entry's tracks and tags with the results.
func (p *phaseFolders) loadTagsFromFiles(entry *folderEntry, toImport map[string]*model.MediaFile) error {
tracks := make([]model.MediaFile, 0, len(toImport))
uniqueTags := make(map[string]model.Tag, len(toImport))
for chunk := range slice.CollectChunks(maps.Keys(toImport), filesBatchSize) {
allInfo, err := entry.job.fs.ReadTags(chunk...)
if err != nil {
log.Warn(p.ctx, "Scanner: Error extracting metadata from files. Skipping", "folder", entry.path, err)
return err
}
for filePath, info := range allInfo {
md := metadata.New(filePath, info)
track := md.ToMediaFile(entry.job.lib.ID, entry.id)
tracks = append(tracks, track)
for _, t := range track.Tags.FlattenAll() {
uniqueTags[t.ID] = t
}
// Keep track of any album ID changes, to reassign annotations later
prevAlbumID := ""
if prev := toImport[filePath]; prev != nil {
prevAlbumID = prev.AlbumID
} else {
prevAlbumID = md.AlbumID(track, p.prevAlbumPIDConf)
}
_, ok := entry.albumIDMap[track.AlbumID]
if prevAlbumID != track.AlbumID && !ok {
entry.albumIDMap[track.AlbumID] = prevAlbumID
}
}
}
entry.tracks = tracks
entry.tags = slices.Collect(maps.Values(uniqueTags))
return nil
}
// createAlbumsFromMediaFiles groups the entry's tracks by album ID and creates albums
func (p *phaseFolders) createAlbumsFromMediaFiles(entry *folderEntry) {
grouped := slice.Group(entry.tracks, func(mf model.MediaFile) string { return mf.AlbumID })
albums := make(model.Albums, 0, len(grouped))
for _, group := range grouped {
songs := model.MediaFiles(group)
album := songs.ToAlbum()
albums = append(albums, album)
}
entry.albums = albums
}
// createArtistsFromMediaFiles creates artists from the entry's tracks
func (p *phaseFolders) createArtistsFromMediaFiles(entry *folderEntry) {
participants := make(model.Participants, len(entry.tracks)*3) // preallocate ~3 artists per track
for _, track := range entry.tracks {
participants.Merge(track.Participants)
}
entry.artists = participants.AllArtists()
}
func (p *phaseFolders) persistChanges(entry *folderEntry) (*folderEntry, error) {
defer p.measure(entry)()
p.state.changesDetected.Store(true)
err := p.ds.WithTx(func(tx model.DataStore) error {
// Instantiate all repositories just once per folder
folderRepo := tx.Folder(p.ctx)
tagRepo := tx.Tag(p.ctx)
artistRepo := tx.Artist(p.ctx)
libraryRepo := tx.Library(p.ctx)
albumRepo := tx.Album(p.ctx)
mfRepo := tx.MediaFile(p.ctx)
// Save folder to DB
folder := entry.toFolder()
err := folderRepo.Put(folder)
if err != nil {
log.Error(p.ctx, "Scanner: Error persisting folder to DB", "folder", entry.path, err)
return err
}
// Save all tags to DB
err = tagRepo.Add(entry.tags...)
if err != nil {
log.Error(p.ctx, "Scanner: Error persisting tags to DB", "folder", entry.path, err)
return err
}
// Save all new/modified artists to DB. Their information will be incomplete, but they will be refreshed later
for i := range entry.artists {
err = artistRepo.Put(&entry.artists[i], "name", "mbz_artist_id", "sort_artist_name", "order_artist_name")
if err != nil {
log.Error(p.ctx, "Scanner: Error persisting artist to DB", "folder", entry.path, "artist", entry.artists[i].Name, err)
return err
}
err = libraryRepo.AddArtist(entry.job.lib.ID, entry.artists[i].ID)
if err != nil {
log.Error(p.ctx, "Scanner: Error adding artist to library", "lib", entry.job.lib.ID, "artist", entry.artists[i].Name, err)
return err
}
if entry.artists[i].Name != consts.UnknownArtist && entry.artists[i].Name != consts.VariousArtists {
entry.job.cw.PreCache(entry.artists[i].CoverArtID())
}
}
// Save all new/modified albums to DB. Their information will be incomplete, but they will be refreshed later
for i := range entry.albums {
err = p.persistAlbum(albumRepo, &entry.albums[i], entry.albumIDMap)
if err != nil {
log.Error(p.ctx, "Scanner: Error persisting album to DB", "folder", entry.path, "album", entry.albums[i], err)
return err
}
if entry.albums[i].Name != consts.UnknownAlbum {
entry.job.cw.PreCache(entry.albums[i].CoverArtID())
}
}
// Save all tracks to DB
for i := range entry.tracks {
err = mfRepo.Put(&entry.tracks[i])
if err != nil {
log.Error(p.ctx, "Scanner: Error persisting mediafile to DB", "folder", entry.path, "track", entry.tracks[i], err)
return err
}
}
// Mark all missing tracks as not available
if len(entry.missingTracks) > 0 {
err = mfRepo.MarkMissing(true, entry.missingTracks...)
if err != nil {
log.Error(p.ctx, "Scanner: Error marking missing tracks", "folder", entry.path, err)
return err
}
// Touch all albums that have missing tracks, so they get refreshed in later phases
groupedMissingTracks := slice.ToMap(entry.missingTracks, func(mf *model.MediaFile) (string, struct{}) {
return mf.AlbumID, struct{}{}
})
albumsToUpdate := slices.Collect(maps.Keys(groupedMissingTracks))
err = albumRepo.Touch(albumsToUpdate...)
if err != nil {
log.Error(p.ctx, "Scanner: Error touching album", "folder", entry.path, "albums", albumsToUpdate, err)
return err
}
}
return nil
})
if err != nil {
log.Error(p.ctx, "Scanner: Error persisting changes to DB", "folder", entry.path, err)
}
return entry, err
}
// persistAlbum persists the given album to the database, and reassigns annotations from the previous album ID
func (p *phaseFolders) persistAlbum(repo model.AlbumRepository, a *model.Album, idMap map[string]string) error {
prevID := idMap[a.ID]
log.Trace(p.ctx, "Persisting album", "album", a.Name, "albumArtist", a.AlbumArtist, "id", a.ID, "prevID", cmp.Or(prevID, "nil"))
if err := repo.Put(a); err != nil {
return fmt.Errorf("persisting album %s: %w", a.ID, err)
}
if prevID == "" {
return nil
}
// Reassign annotation from previous album to new album
log.Trace(p.ctx, "Reassigning album annotations", "from", prevID, "to", a.ID, "album", a.Name)
if err := repo.ReassignAnnotation(prevID, a.ID); err != nil {
log.Warn(p.ctx, "Scanner: Could not reassign annotations", "from", prevID, "to", a.ID, "album", a.Name, err)
p.state.sendWarning(fmt.Sprintf("Could not reassign annotations from %s to %s ('%s'): %v", prevID, a.ID, a.Name, err))
}
// Keep created_at field from previous instance of the album
if err := repo.CopyAttributes(prevID, a.ID, "created_at"); err != nil {
// Silently ignore when the previous album is not found
if !errors.Is(err, model.ErrNotFound) {
log.Warn(p.ctx, "Scanner: Could not copy fields", "from", prevID, "to", a.ID, "album", a.Name, err)
p.state.sendWarning(fmt.Sprintf("Could not copy fields from %s to %s ('%s'): %v", prevID, a.ID, a.Name, err))
}
}
// Don't keep track of this mapping anymore
delete(idMap, a.ID)
return nil
}
func (p *phaseFolders) logFolder(entry *folderEntry) (*folderEntry, error) {
logCall := log.Info
if entry.hasNoFiles() {
logCall = log.Trace
}
logCall(p.ctx, "Scanner: Completed processing folder",
"audioCount", len(entry.audioFiles), "imageCount", len(entry.imageFiles), "plsCount", entry.numPlaylists,
"elapsed", entry.elapsed.Elapsed(), "tracksMissing", len(entry.missingTracks),
"tracksImported", len(entry.tracks), "library", entry.job.lib.Name, consts.Zwsp+"folder", entry.path)
return entry, nil
}
func (p *phaseFolders) finalize(err error) error {
errF := p.ds.WithTx(func(tx model.DataStore) error {
for _, job := range p.jobs {
// Mark all folders that were not updated as missing
if len(job.lastUpdates) == 0 {
continue
}
folderIDs := slices.Collect(maps.Keys(job.lastUpdates))
err := tx.Folder(p.ctx).MarkMissing(true, folderIDs...)
if err != nil {
log.Error(p.ctx, "Scanner: Error marking missing folders", "lib", job.lib.Name, err)
return err
}
err = tx.MediaFile(p.ctx).MarkMissingByFolder(true, folderIDs...)
if err != nil {
log.Error(p.ctx, "Scanner: Error marking tracks in missing folders", "lib", job.lib.Name, err)
return err
}
// Touch all albums that have missing folders, so they get refreshed in later phases
_, err = tx.Album(p.ctx).TouchByMissingFolder()
if err != nil {
log.Error(p.ctx, "Scanner: Error touching albums with missing folders", "lib", job.lib.Name, err)
return err
}
}
return nil
})
return errors.Join(err, errF)
}
var _ phase[*folderEntry] = (*phaseFolders)(nil)