From 94e0b7e8b83ea90d7c19542aacf0b462e257ab3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Thu, 12 Jun 2025 18:43:08 +0200 Subject: [PATCH 01/15] Fix immediate problems after previous commit - Remove `util._unicode` usage - The `musicbrainzngs` implementation does not seem useful in our case - Fix `browse_recordings` called with incorrect arguments --- beetsplug/musicbrainz.py | 11 +++++------ poetry.lock | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index dc49d7f261..649f47749b 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -17,10 +17,10 @@ from __future__ import annotations import json +import re import threading import time import traceback -import re from collections import Counter from functools import cached_property from itertools import product @@ -248,15 +248,13 @@ def _make_query(self, query="", fields={}): query_parts = [] if query: - clean_query = util._unicode(query) if fields: - clean_query = re.sub(lucene_special, r"\\\1", clean_query) + clean_query = re.sub(lucene_special, r"\\\1", query) query_parts.append(clean_query.lower()) else: - query_parts.append(clean_query) + query_parts.append(query) for key, value in fields.items(): # Escape Lucene's special characters. - value = util._unicode(value) value = re.sub(lucene_special, r"\\\1", value) if value: value = value.lower() # avoid AND / OR @@ -804,7 +802,8 @@ def album_info(self, release: JSONDict) -> beets.autotag.hooks.AlbumInfo: self._log.debug("Retrieving tracks starting at {}", i) recording_list.extend( MbInterface().browse_recordings( - release=release["id"], + "release", + release["id"], limit=BROWSE_CHUNKSIZE, includes=BROWSE_INCLUDES, offset=i, diff --git a/poetry.lock b/poetry.lock index b47ed5060e..b1584d8591 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3301,4 +3301,4 @@ web = ["flask", "flask-cors"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "bb4f7352f9d55ffa22f09d3482d045cf50a5995714628bddff1419902cdd661f" +content-hash = "0ee33e6710cc82e6fec6c58aa0d451b5058f1979a8dce70c7f3eeb6aa682de88" From 8251c49a66f2c03aaa084046e3296133d70c1420 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Thu, 12 Jun 2025 19:00:01 +0200 Subject: [PATCH 02/15] Fix `BASE_URL` handling `BASE_URL` was changed to point to the API URL as `mbzero` expects it instead of the base URL. However, this change causes two problems: - We pass the API URL by default, but if the config uses a different hostname, then the base URL is still passed to `mbzero` - `BASE_URL` is not only used for `mbzero` but also in `track_url` and `album_url`, which are used to fill a `TrackInfo` - And it seems to me that this does not expects an API URL, but instead the human-readable web page URL To fix these, and to also better name the constants: - We set `BASE_URL` back to its original value - The constant `BASE_HOSTNAME` is added - `MbInterface.hostname` is set to `BASE_HOSTNAME` instead of `BASE_URL` - In `MbInterface._send`, we add the `/ws/2` API URL path --- beetsplug/musicbrainz.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index 649f47749b..4f59bd8a44 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -46,7 +46,8 @@ VARIOUS_ARTISTS_ID = "89ad4ac3-39f7-470e-963a-56509c546377" -BASE_URL = "musicbrainz.org/ws/2" +BASE_HOSTNAME = "musicbrainz.org" +BASE_URL = f"https://{BASE_HOSTNAME}" SKIPPED_TRACKS = ["[data track]"] @@ -177,7 +178,7 @@ class MbInterface: BEETS_USERAGENT = "beets/{} (https://beets.io/)".format(beets.__version__) def __init__(self, useragent=BEETS_USERAGENT): - self.hostname = BASE_URL + self.hostname = BASE_HOSTNAME self.https = True self.useragent = useragent pass @@ -218,11 +219,8 @@ def _search(self, entity, query, limit=None, offset=None, **fields): def _send(self, mbr, limit=None, offset=None): if self.hostname: - mbr.set_url( - "{}://{}".format( - "https" if self.https else "http", self.hostname - ) - ) + scheme = "https" if self.https else "http" + mbr.set_url(f"{scheme}://{self.hostname}/ws/2") opts = {} if limit: opts["limit"] = limit From 7285c72423f84b108100b6f9e21b93ff181166ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Thu, 12 Jun 2025 19:00:29 +0200 Subject: [PATCH 03/15] Fix alias retrieval `alias["alias"]` was not present, but taking the `"name"` seems like the logic thing to do. --- beetsplug/musicbrainz.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index 4f59bd8a44..9b809927e6 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -446,7 +446,7 @@ def _multi_artist_credit( # An artist. if alias: - cur_artist_name = alias["alias"] + cur_artist_name = alias["name"] else: cur_artist_name = el["artist"]["name"] artist_parts.append(cur_artist_name) @@ -1071,8 +1071,13 @@ def _search_api( self._log.debug( "Searching for MusicBrainz {}s with: {!r}", query_type, filters ) + mb_interface = MbInterface() + method = ( + mb_interface.search_recordings + if query_type == "recording" + else mb_interface.search_releases + ) try: - method = getattr(MbInterface(), f"search_{query_type}s") res = method(limit=self.config["searchlimit"].get(int), **filters) except MusicBrainzError as exc: raise MusicBrainzAPIError( From 34772566e2cbf110013d739bd77ad51d0c776688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Thu, 12 Jun 2025 19:04:55 +0200 Subject: [PATCH 04/15] Reuse `MbInterface` to respect config values `MbInterface().set_hostname(...)` is called with the plugin config, that allows users to use another URL than the default one. The problem is that `set_hostname(...)` modifies the object values, but since we were creating a new `MbInterface` each time, the hostname & https would get overwritten to use the default values --- beetsplug/musicbrainz.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index 9b809927e6..6c9eebb449 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -573,6 +573,7 @@ def _is_translation(r): def _find_actual_release_from_pseudo_release( + mb_interface: MbInterface, pseudo_rel: JSONDict, ) -> JSONDict | None: try: @@ -588,7 +589,7 @@ def _find_actual_release_from_pseudo_release( actual_id = translations[0]["target"] - return MbInterface().get_release_by_id(actual_id, includes=RELEASE_INCLUDES) + return mb_interface.get_release_by_id(actual_id, includes=RELEASE_INCLUDES) def _merge_pseudo_and_actual_album( @@ -657,12 +658,13 @@ def __init__(self): "extra_tags": [], }, ) + self.mb_interface = MbInterface() hostname = self.config["host"].as_str() https = self.config["https"].get(bool) # Only call set_hostname when a custom server is configured. Since # musicbrainz-ngs connects to musicbrainz.org with HTTPS by default if hostname != "musicbrainz.org": - MbInterface().set_hostname(hostname, https) + self.mb_interface.set_hostname(hostname, https) _RateLimitsSingleton().set_rate_limit( self.config["ratelimit_interval"].as_number(), self.config["ratelimit"].get(int), @@ -799,7 +801,7 @@ def album_info(self, release: JSONDict) -> beets.autotag.hooks.AlbumInfo: for i in range(0, ntracks, BROWSE_CHUNKSIZE): self._log.debug("Retrieving tracks starting at {}", i) recording_list.extend( - MbInterface().browse_recordings( + self.mb_interface.browse_recordings( "release", release["id"], limit=BROWSE_CHUNKSIZE, @@ -1071,11 +1073,10 @@ def _search_api( self._log.debug( "Searching for MusicBrainz {}s with: {!r}", query_type, filters ) - mb_interface = MbInterface() method = ( - mb_interface.search_recordings + self.mb_interface.search_recordings if query_type == "recording" - else mb_interface.search_releases + else self.mb_interface.search_releases ) try: res = method(limit=self.config["searchlimit"].get(int), **filters) @@ -1120,13 +1121,15 @@ def album_for_id( return None try: - res = MbInterface().get_release_by_id(albumid, includes=RELEASE_INCLUDES) + res = self.mb_interface.get_release_by_id(albumid, includes=RELEASE_INCLUDES) # resolve linked release relations actual_res = None if res.get("status") == "Pseudo-Release": - actual_res = _find_actual_release_from_pseudo_release(res) + actual_res = _find_actual_release_from_pseudo_release( + self.mb_interface, res + ) except MbResponseError: self._log.debug("Album ID match failed.") @@ -1157,7 +1160,7 @@ def track_for_id( return None try: - res = MbInterface().get_recording_by_id(trackid, TRACK_INCLUDES) + res = self.mb_interface.get_recording_by_id(trackid, TRACK_INCLUDES) except MbResponseError: self._log.debug("Track ID match failed.") return None From 638dcbe9bb80795131496daf8987252fdd344202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Wed, 11 Jun 2025 20:44:33 +0000 Subject: [PATCH 05/15] Clean MbInterface API - Remove the 'params' parameter from the '_lookup' method - It was not used and was always an empty dict - And thus also remove the now unused parameters of 'get_release_by_id' and 'get_recording_by_id' - Remove the 'limit' and 'offset' parameters from the '_lookup' method - They do not make sense for this method since a lookup is to get a single entity - Remove the 'query' parameter from the '_make_query' method - It was not used and thus made the method more complex to read and verify - Add a 'json.loads' to 'browse_recordings' - All mbzero requests return bytes, so this one should also be decoded as json - Rename and reorder some parameters to avoid confusion - In particular the '_browse' method where it was easy to make a mistake between the two entity types parameters - Add more type information - Add more documentation --- beetsplug/musicbrainz.py | 213 +++++++++++++++++++++++++++++---------- 1 file changed, 160 insertions(+), 53 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index 6c9eebb449..4afbdde9e6 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -175,6 +175,8 @@ class MbResponseError(mbzerror.MbzWebServiceError): class MbInterface: + """An interface for sending requests using MusicBrainz API""" + BEETS_USERAGENT = "beets/{} (https://beets.io/)".format(beets.__version__) def __init__(self, useragent=BEETS_USERAGENT): @@ -183,41 +185,103 @@ def __init__(self, useragent=BEETS_USERAGENT): self.useragent = useragent pass - def set_hostname(self, hostname, https): + def set_hostname(self, hostname: str, https: bool): self.hostname = hostname self.https = https @_RateLim def _lookup( - self, entity, mbid, includes, limit=None, offset=None, params={} - ): + self, + entity_type: str, + mbid: str, + includes: list[str], + ) -> bytes: + """Send a lookup request to the configured MusicBrainz API to get information + on a single entity + + :param entity_type: The type of entity to look up + :param mbid: The MusicBrainz ID of the entity to look up + :param includes: List of parameters to request more information to be included + about the entity + :return: The response as bytes + """ return self._send( - mbzr.MbzRequestLookup(self.useragent, entity, mbid, includes), - limit=limit, - offset=offset, + mbzr.MbzRequestLookup(self.useragent, entity_type, mbid, includes), ) @_RateLim def _browse( - self, entity, bw_entity, mbid, includes=[], limit=None, offset=None - ): + self, + lookup_entity_type: str, + mbid: str, + linked_entities_type: str, + includes: list[str] = [], + limit: int | None = None, + offset: int | None = None, + ) -> bytes: + """Send a browse request to the configured MusicBrainz API to get entities + linked to looked up one + + :param lookup_entity_type: The type of entity to look up + :param mbid: The MusicBrainz ID of the entity to look up + :param linked_entities_type: The type of linked entities to find + :param includes: List of parameters to request more information to be included + about the entity + :param limit: The number of entities that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The response as bytes + """ return self._send( mbzr.MbzRequestBrowse( - self.useragent, entity, bw_entity, mbid, includes + self.useragent, + linked_entities_type, + lookup_entity_type, + mbid, + includes, ), limit=limit, offset=offset, ) @_RateLim - def _search(self, entity, query, limit=None, offset=None, **fields): + def _search( + self, + entity_type: str, + query: str, + limit: int | None = None, + offset: int | None = None, + **fields, + ) -> bytes: + """Send a search request to the configured MusicBrainz API to search entities + based on a query + + :param entity_type: The type of entity to look up + :param query: The query in the Lucene Search syntax + :param limit: The number of entities that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The response as bytes + """ return self._send( - mbzr.MbzRequestSearch(self.useragent, entity, query), + mbzr.MbzRequestSearch(self.useragent, entity_type, query), limit=limit, offset=offset, ) - def _send(self, mbr, limit=None, offset=None): + def _send( + self, + mbr: mbzr.MbzRequestLookup + | mbzr.MbzRequestSearch + | mbzr.MbzRequestBrowse, + limit: int | None = None, + offset: int | None = None, + ) -> bytes: + """Send the request + + :param mbr: The request object + :param limit: The number of entities that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The response as bytes + """ if self.hostname: scheme = "https" if self.https else "http" mbr.set_url(f"{scheme}://{self.hostname}/ws/2") @@ -228,35 +292,23 @@ def _send(self, mbr, limit=None, offset=None): opts["offset"] = offset return mbr.send(opts=opts) - def _make_params(self, release_status=[], release_type=[]): - params = {} - if len(release_status): - params["status"] = "|".join(release_status) - if len(release_type): - params["type"] = "|".join(release_type) - return params - - def _make_query(self, query="", fields={}): - """`query` is a lucene query string when no fields are set, - but is escaped when any fields are given. `fields` is a dictionary - of key/value query parameters. + def _make_query(self, fields: dict[str, str] = {}) -> str: + """Make a Lucene Query string from a dict of fields + + :param fields: Dict of field keys and values used to build the query. + Values will be properly escaped. + :return: The built Lucene Query string """ # Encode the query terms as a Lucene query string. lucene_special = r'([+\-&|!(){}\[\]\^"~*?:\\\/])' query_parts = [] - if query: - if fields: - clean_query = re.sub(lucene_special, r"\\\1", query) - query_parts.append(clean_query.lower()) - else: - query_parts.append(query) for key, value in fields.items(): # Escape Lucene's special characters. value = re.sub(lucene_special, r"\\\1", value) if value: value = value.lower() # avoid AND / OR - query_parts.append("%s:(%s)" % (key, value)) + query_parts.append(f"{key}:({value})") full_query = " ".join(query_parts).strip() if not full_query: @@ -265,63 +317,118 @@ def _make_query(self, query="", fields={}): return full_query def browse_recordings( - self, bw_entity, mbid, includes=[], limit=None, offset=None - ): - """Get all recordings linked to an artist or a release. - You need to give one MusicBrainz ID.""" - return self._browse( - self, - "recording", - bw_entity, - mbid, - includes, - limit=limit, - offset=offset, + self, + lookup_entity_type: Literal["artist", "collection", "release", "work"], + mbid: str, + includes: list[str] = [], + limit: int | None = None, + offset: int | None = None, + ) -> JSONDict: + """Browse recordings linked to an entity + + :param lookup_entity_type: The type of entity whose recordings are to be browsed + :param mbid: The MusicBrainz ID of the entity + :param includes: List of parameters to request more information to be included + about the recordings + :param limit: The number of recordings that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The JSON-decoded response as an object + """ + return json.loads( + self._browse( + self, + lookup_entity_type, + mbid, + "recording", + includes, + limit=limit, + offset=offset, + ) ) def get_release_by_id( - self, mbid, includes=[], release_status=[], release_type=[] - ): + self, + mbid: str, + includes: list[str] = [], + ) -> JSONDict: + """Get a release from its ID + + :param mbid: The MusicBrainz ID of the release + :param includes: List of parameters to request more information to be included + about the release + :return: The JSON-decoded response as an object + """ return json.loads( self._lookup( self, "release", mbid, includes, - params=self._make_params(release_status, release_type), ) ) def get_recording_by_id( - self, mbid, includes=[], release_status=[], release_type=[] - ): + self, + mbid: str, + includes: list[str] = [], + ) -> JSONDict: + """Get a recording from its ID + + :param mbid: The MusicBrainz ID of the entity + :param includes: List of parameters to request more information to be included + about the recording + :return: The JSON-decoded response as an object + """ return json.loads( self._lookup( self, "recording", mbid, includes, - params=self._make_params(release_status, release_type), ) ) - def search_releases(self, query="", limit=None, offset=None, **fields): + def search_releases( + self, + limit: int | None = None, + offset: int | None = None, + **fields: str, + ) -> JSONDict: + """Search for releases using a query + + :param limit: The number of releases that should be returned + :param offset: Offset used for paging through more than one page of results + :param fields: Dict of fields composing the search query + :return: The JSON-decoded response as an object + """ return json.loads( self._search( self, "release", - query=self._make_query(query, fields), + query=self._make_query(fields), limit=limit, offset=offset, ) ) - def search_recordings(self, query="", limit=None, offset=None, **fields): + def search_recordings( + self, + limit: int | None = None, + offset: int | None = None, + **fields: str, + ) -> JSONDict: + """Search for recordings using a query + + :param limit: The number of recordings that should be returned + :param offset: Offset used for paging through more than one page of results + :param fields: Dict of fields composing the search query + :return: The JSON-decoded response as an object + """ return json.loads( self._search( self, "recording", - query=self._make_query(query, fields), + query=self._make_query(fields), limit=limit, offset=offset, ) From 92952361278f9c61d9d8cccd60b77e91fd33466e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Wed, 11 Jun 2025 22:15:50 +0000 Subject: [PATCH 06/15] Fix JSON data containing many None values When MusicBrainz returns data, it can do so in two formats: - XML, which was what used the previous musicbrainzngs lib - JSON, which is what mbzero uses The thing is that for non-present values in the database, the two formats handle these differently: - XML: The keys are simply not included in the data - JSON: The keys are included and set with the None value Since the rest of the code worked with the XML way of doing things, and also because it is easier to work with (e.g. "key in dict" to check if set), I have added a cleanup step after the JSON parsing that removes all keys with None values recursively. --- beetsplug/musicbrainz.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index 4afbdde9e6..bf4ef6dfad 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -316,6 +316,34 @@ def _make_query(self, fields: dict[str, str] = {}) -> str: return full_query + @staticmethod + def _remove_none_values(data): + """Iterate recursively over a Python object to remove all None values in + dicts + """ + if isinstance(data, dict): + return { + key: MbInterface._remove_none_values(value) + for key, value in data.items() + if value is not None + } + elif isinstance(data, list): + return [MbInterface._remove_none_values(item) for item in data] + else: + return data + + @staticmethod + def _parse_and_clean_json(data: bytes) -> JSONDict: + """Parse the JSON data and remove all None values in dicts. + This is needed as the MusicBrainz JSON data contains None values instead of + simply not setting them in dictionaries. + This is also different from the their XML data which only contains filled + values. + + :param data: JSON data as bytes + """ + return MbInterface._remove_none_values(json.loads(data)) + def browse_recordings( self, lookup_entity_type: Literal["artist", "collection", "release", "work"], @@ -334,7 +362,7 @@ def browse_recordings( :param offset: Offset used for paging through more than one page of results :return: The JSON-decoded response as an object """ - return json.loads( + return MbInterface._parse_and_clean_json( self._browse( self, lookup_entity_type, @@ -358,7 +386,7 @@ def get_release_by_id( about the release :return: The JSON-decoded response as an object """ - return json.loads( + return MbInterface._parse_and_clean_json( self._lookup( self, "release", @@ -379,7 +407,7 @@ def get_recording_by_id( about the recording :return: The JSON-decoded response as an object """ - return json.loads( + return MbInterface._parse_and_clean_json( self._lookup( self, "recording", @@ -401,7 +429,7 @@ def search_releases( :param fields: Dict of fields composing the search query :return: The JSON-decoded response as an object """ - return json.loads( + return MbInterface._parse_and_clean_json( self._search( self, "release", @@ -424,7 +452,7 @@ def search_recordings( :param fields: Dict of fields composing the search query :return: The JSON-decoded response as an object """ - return json.loads( + return MbInterface._parse_and_clean_json( self._search( self, "recording", From 8b68e480f8801422bd7b23a64f3f6186f31bd112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Thu, 12 Jun 2025 19:39:58 +0000 Subject: [PATCH 07/15] Fix `ArtistFlatteningTest::_add_alias` As can be seen in the MusicBrainz JSON response extract, the `alias` key should be replaced with the `name` key. Also, the `primary` key is a `bool`, and not a `str`. MusicBrainz JSON response extract: ```json { ... "artist-credit": [ { "artist": { ... { "name": "Daft Punk", "begin": null, "locale": "en", "type": "Artist name", "primary": true, "end": null, "type-id": "894afba6-2816-3c24-8072-eadb66bd04bc", "ended": false, "sort-name": "Daft Punk" }, ... }, ... }, ... ], ... } ``` --- test/plugins/test_musicbrainz.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index c04f1e2ec0..9d3a47f33f 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -678,12 +678,12 @@ def _credit_dict(self, suffix=""): def _add_alias(self, credit_dict, suffix="", locale="", primary=False): alias = { - "alias": "ALIAS" + suffix, + "name": "ALIAS" + suffix, "locale": locale, "sort-name": "ALIASSORT" + suffix, } if primary: - alias["primary"] = "primary" + alias["primary"] = True if "aliases" not in credit_dict["artist"]: credit_dict["artist"]["aliases"] = [] credit_dict["artist"]["aliases"].append(alias) From 5e5d7b034e34bf4f8f9c180b6ef0915c9a31c6c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Thu, 12 Jun 2025 20:22:00 +0000 Subject: [PATCH 08/15] Fix `mbzero` exception handling The previous exception handling did not work correctly: for example, trying to match by ID and giving an invalid ID would raise an exception and stop the program. As per the `mbzero` documentation and source code, all the requests we make through `mbzero` either succeed, or raise a `mbzerror.MbzWebServiceError` (more specifically a `MbzRequestError` but it does not seem public as per the documentation, and does not brind any useful information). The problem is that this single exception type is not fine-grained enough for our use-cases. I have added custom error types and a conversion step to allow for more precise exception handling. An upstream issue has been created for implementing something of the sort directly in `mbzero` as we need to use things that are not documented and thus may not have strong stability guarantees. --- beetsplug/musicbrainz.py | 75 ++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index bf4ef6dfad..ed51a04952 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -27,6 +27,7 @@ from typing import TYPE_CHECKING, Any from urllib.parse import urljoin +import requests.exceptions # For mbzero exception handling from mbzero import mbzerror from mbzero import mbzrequest as mbzr @@ -162,18 +163,60 @@ def __call__(self, *args, **kwargs): # Musicbrainz library interface -class MbWebServiceError(mbzerror.MbzWebServiceError): +class MbInterfaceError(Exception): + """Base class for exceptions raised by MbInterface""" + + pass + + +class MbInterfaceBadRequestError(MbInterfaceError): + """Exception raised when the request is ill-formed""" + pass -class MusicBrainzError(mbzerror.MbzError): +class MbInterfaceUnauthorizedError(MbInterfaceError): + """Exception raised when the request does not have valid and sufficient + authentication for accessing the resource""" + pass -class MbResponseError(mbzerror.MbzWebServiceError): +class MbInterfaceNotFoundError(MbInterfaceError): + """Exception raised when an entity is not found""" + pass +def _convert_mbzero_exception_to_local( + mbz_ex: mbzerror.MbzWebServiceError, +) -> MbInterfaceError: + """Convert the mbzero exception to a MbInterfaceError + + :param mbz_ex: The mbzero exception to convert + :return: The converted exception. Either the error could be converted into a + specific error, or an instance of the base class is returned. + """ + + # Upstream issue to have fine-grained exception handling: + # - https://gitlab.com/mbzero/python-mbzero/-/issues/2 + + # mbz_ex.message is not a str but the exception cause, while mbz_ex.cause is None... + if isinstance(mbz_ex.message, requests.exceptions.HTTPError): + http_error: requests.exceptions.HTTPError = mbz_ex.message + status_code = http_error.response.status_code + + if status_code == 400: + return MbInterfaceBadRequestError(mbz_ex) + elif status_code == 401: + return MbInterfaceUnauthorizedError(mbz_ex) + elif status_code == 404: + return MbInterfaceNotFoundError(mbz_ex) + + # Base exception if no specific one could be found + return MbInterfaceError(mbz_ex) + + class MbInterface: """An interface for sending requests using MusicBrainz API""" @@ -204,6 +247,7 @@ def _lookup( :param includes: List of parameters to request more information to be included about the entity :return: The response as bytes + :raises MbInterfaceError: if the request did not succeed """ return self._send( mbzr.MbzRequestLookup(self.useragent, entity_type, mbid, includes), @@ -230,6 +274,7 @@ def _browse( :param limit: The number of entities that should be returned :param offset: Offset used for paging through more than one page of results :return: The response as bytes + :raises MbInterfaceError: if the request did not succeed """ return self._send( mbzr.MbzRequestBrowse( @@ -260,6 +305,7 @@ def _search( :param limit: The number of entities that should be returned :param offset: Offset used for paging through more than one page of results :return: The response as bytes + :raises MbInterfaceError: if the request did not succeed """ return self._send( mbzr.MbzRequestSearch(self.useragent, entity_type, query), @@ -281,6 +327,7 @@ def _send( :param limit: The number of entities that should be returned :param offset: Offset used for paging through more than one page of results :return: The response as bytes + :raises MbInterfaceError: if the request did not succeed """ if self.hostname: scheme = "https" if self.https else "http" @@ -290,7 +337,10 @@ def _send( opts["limit"] = limit if offset: opts["offset"] = offset - return mbr.send(opts=opts) + try: + return mbr.send(opts=opts) + except mbzerror.MbzWebServiceError as ex: + raise _convert_mbzero_exception_to_local(ex) def _make_query(self, fields: dict[str, str] = {}) -> str: """Make a Lucene Query string from a dict of fields @@ -361,6 +411,7 @@ def browse_recordings( :param limit: The number of recordings that should be returned :param offset: Offset used for paging through more than one page of results :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed """ return MbInterface._parse_and_clean_json( self._browse( @@ -385,6 +436,7 @@ def get_release_by_id( :param includes: List of parameters to request more information to be included about the release :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed """ return MbInterface._parse_and_clean_json( self._lookup( @@ -406,6 +458,7 @@ def get_recording_by_id( :param includes: List of parameters to request more information to be included about the recording :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed """ return MbInterface._parse_and_clean_json( self._lookup( @@ -428,6 +481,7 @@ def search_releases( :param offset: Offset used for paging through more than one page of results :param fields: Dict of fields composing the search query :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed """ return MbInterface._parse_and_clean_json( self._search( @@ -451,6 +505,7 @@ def search_recordings( :param offset: Offset used for paging through more than one page of results :param fields: Dict of fields composing the search query :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed """ return MbInterface._parse_and_clean_json( self._search( @@ -470,8 +525,6 @@ class MusicBrainzAPIError(util.HumanReadableError): def __init__(self, reason, verb, query, tb=None): self.query = query - if isinstance(reason, MbWebServiceError): - reason = "MusicBrainz not reachable" super().__init__(reason, verb, tb) def get_message(self): @@ -1215,7 +1268,7 @@ def _search_api( ) try: res = method(limit=self.config["searchlimit"].get(int), **filters) - except MusicBrainzError as exc: + except MbInterfaceError as exc: raise MusicBrainzAPIError( exc, f"{query_type} search", filters, traceback.format_exc() ) @@ -1266,10 +1319,10 @@ def album_for_id( self.mb_interface, res ) - except MbResponseError: + except MbInterfaceNotFoundError: self._log.debug("Album ID match failed.") return None - except MusicBrainzError as exc: + except MbInterfaceError as exc: raise MusicBrainzAPIError( exc, "get release by ID", albumid, traceback.format_exc() ) @@ -1296,10 +1349,10 @@ def track_for_id( try: res = self.mb_interface.get_recording_by_id(trackid, TRACK_INCLUDES) - except MbResponseError: + except MbInterfaceNotFoundError: self._log.debug("Track ID match failed.") return None - except MusicBrainzError as exc: + except MbInterfaceError as exc: raise MusicBrainzAPIError( exc, "get recording by ID", trackid, traceback.format_exc() ) From 038679d32ea4323b03aa68771832d17ddf1e45b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Thu, 12 Jun 2025 22:02:22 +0000 Subject: [PATCH 09/15] Fix `mbzero` not properly handling special characters in queries Issue created upstream: https://gitlab.com/mbzero/python-mbzero/-/issues/1 Fixed locally until it is resolved upstream and a new version is released. --- beetsplug/musicbrainz.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index ed51a04952..b8c201ca53 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -25,7 +25,7 @@ from functools import cached_property from itertools import product from typing import TYPE_CHECKING, Any -from urllib.parse import urljoin +from urllib.parse import quote, urljoin import requests.exceptions # For mbzero exception handling from mbzero import mbzerror @@ -307,8 +307,14 @@ def _search( :return: The response as bytes :raises MbInterfaceError: if the request did not succeed """ + + # mbzero does not properly handle queries with special characters. + # Quote the query beforehand to avoid this problem. + # See: https://gitlab.com/mbzero/python-mbzero/-/issues/1 + quoted_query = quote(query) + return self._send( - mbzr.MbzRequestSearch(self.useragent, entity_type, query), + mbzr.MbzRequestSearch(self.useragent, entity_type, quoted_query), limit=limit, offset=offset, ) From 56fce6cda8d49710fbab92a2c7b02c2c01680a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Sat, 14 Jun 2025 13:42:59 +0000 Subject: [PATCH 10/15] Clean, fix and move musicbrainz rate limiter to utils ** Clean The rate limiter code that was previously in the `python-musicbrainzngs` was copied locally in the first commit because `mbzero` does not include one. This rate limiter uses a global state (singleton) which means that it can only be used by the musicbrainz plugin, wheras this rate-limiting code may be useful elsewhere. To avoid using a singleton and instead store the configuration in an object, the rate limiter has been changed from a decorator to an object that is used in a `with` block, similarly to a `threading.Lock`. ** Move Since it is now not limited to the musicbrainz plugin, it has been moved to `utils/`. ** Fix While writing tests, I found a bug with the `musicbrainzngs` implementation. When we are rate-limited, we sleep until we should not be limited anymore. The code to compute this duration was: `(1.0 - self.remaining_requests) * (self.reqs_per_interval / self.interval_sec)` This is wrong and should instead be: `(1.0 - self.remaining_requests) * (self.interval_sec / self.reqs_per_interval)` To verify this, suppose that we want 10 reqs per 0.1 seconds: - We should wait around 0.01 second to be able to do 1 req - `self.reqs_per_interval / self.interval_sec = 10 / 0.1 = 100` - `self.interval_sec / self.reqs_per_interval = 0.1 / 10 = 0.01` - Also looking at the units, the first one returns a `req/s^2` while the second returns a `s` --- beets/util/rate_limiter.py | 84 ++++++++++++++++ beetsplug/musicbrainz.py | 170 +++++++-------------------------- test/util/test_rate_limiter.py | 93 ++++++++++++++++++ 3 files changed, 212 insertions(+), 135 deletions(-) create mode 100644 beets/util/rate_limiter.py create mode 100644 test/util/test_rate_limiter.py diff --git a/beets/util/rate_limiter.py b/beets/util/rate_limiter.py new file mode 100644 index 0000000000..c381b5e262 --- /dev/null +++ b/beets/util/rate_limiter.py @@ -0,0 +1,84 @@ +import threading +import time + + +class RateLimiter: + """Limits the rate at which one or multiple sections of code may be called. + + The limiting is thread-safe: threads that are rate-limited will sleep until they + are not anymore. + + Important: The rate limiter only limits the start of execution of the rate-limited + code. This means for example that for rate-limited web queries of one per second, + it is assured that at most one request is started per second, but there may be + multiple queries running concurrently if the first one takes time to execute. + """ + + def __init__(self, reqs_per_interval: int, interval_sec: float): + """Create the rate limiter with the specified rate + + :param reqs_per_interval: Number of requests that can be done per interval. + Must be strictly positive + :param interval_sec: The interval in seconds. Must be strictly positive + """ + + if reqs_per_interval <= 0.0: + raise ValueError("reqs_per_interval can't be less than 0") + if interval_sec <= 0: + raise ValueError("interval_sec can't be less than 0") + + # Configuration variables + self.reqs_per_interval = reqs_per_interval + self.interval_sec = interval_sec + + # Current state + self.lock = threading.Lock() + self.last_call = 0.0 + self.remaining_requests = None + + def _update_remaining(self): + """Update the number of remaining requests that can be done and the time of + last call + """ + if self.remaining_requests is None: + # On first invocation, we have the number of requests available + self.remaining_requests = float(self.reqs_per_interval) + + else: + # On following invocations, increase the number of requests available + # based on the time since last invocation + since_last_call = time.time() - self.last_call + self.remaining_requests += since_last_call * ( + self.reqs_per_interval / self.interval_sec + ) + # Number of requests cannot exceed the max number per interval + self.remaining_requests = min( + self.remaining_requests, float(self.reqs_per_interval) + ) + + self.last_call = time.time() + + def __enter__(self): + with self.lock: + self._update_remaining() + + # Assert to avoid typing errors + assert self.remaining_requests is not None + + # Delay if necessary + while self.remaining_requests < 0.999: + time.sleep( + (1.0 - self.remaining_requests) + * (self.interval_sec / self.reqs_per_interval) + ) + self._update_remaining() + + # "Pay" for the execution of the rate limited code section + self.remaining_requests -= 1.0 + + return self + + def __exit__(self, exc_type, exc_value, traceback): + # Nothing to do: limiting is only done on the start of execution of the + # rate-limited code + pass diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index b8c201ca53..1ebadd6a17 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -18,8 +18,6 @@ import json import re -import threading -import time import traceback from collections import Counter from functools import cached_property @@ -36,6 +34,7 @@ from beets import config, plugins, util from beets.plugins import BeetsPlugin from beets.util.id_extractors import extract_release_id +from beets.util.rate_limiter import RateLimiter if TYPE_CHECKING: from collections.abc import Iterator, Sequence @@ -61,105 +60,6 @@ "year": "date", } - -# Rate limiting - - -class _RateLimitsSingleton: - limit_interval = 1.0 - limit_requests = 1 - do_rate_limit = True - - def __new__(cls): - """Makes that class a singleton""" - if not hasattr(cls, "instance"): - cls.instance = super(_RateLimitsSingleton, cls).__new__(cls) - return cls.instance - - def get(self): - return (self.limit_interval, self.limit_requests, self.do_rate_limit) - - def set_rate_limit(self, limit_or_interval=1.0, new_requests=1): - """Sets the rate limiting behavior of the module. Must be invoked - before the first Web service call. - If the `limit_or_interval` parameter is set to False then - rate limiting will be disabled. If it is a number then only - a set number of requests (`new_requests`) will be made per - given interval (`limit_or_interval`). - """ - if isinstance(limit_or_interval, bool): - self.do_rate_limit = limit_or_interval - else: - if limit_or_interval <= 0.0: - raise ValueError("limit_or_interval can't be less than 0") - if new_requests <= 0: - raise ValueError("new_requests can't be less than 0") - self.do_rate_limit = True - self.limit_interval = limit_or_interval - self.limit_requests = new_requests - - -class _RateLim(object): - """A decorator that limits the rate at which the function may be - called. The rate is controlled by the `limit_interval` and - `limit_requests` global variables. The limiting is thread-safe; - only one thread may be in the function at a time (acts like a - monitor in this sense). The globals must be set before the first - call to the limited function. - """ - - def __init__(self, fun): - self.fun = fun - self.last_call = 0.0 - self.lock = threading.Lock() - self.remaining_requests = None # Set on first invocation. - - def _update_remaining(self): - """Update remaining requests based on the elapsed time since - they were last calculated. - """ - (limit_interval, limit_requests, do_rate_limit) = ( - _RateLimitsSingleton().get() - ) - - # On first invocation, we have the maximum number of requests - # available. - if self.remaining_requests is None: - self.remaining_requests = float(limit_requests) - - else: - since_last_call = time.time() - self.last_call - self.remaining_requests += since_last_call * ( - limit_requests / limit_interval - ) - self.remaining_requests = min( - self.remaining_requests, float(limit_requests) - ) - - self.last_call = time.time() - - def __call__(self, *args, **kwargs): - (limit_interval, limit_requests, do_rate_limit) = ( - _RateLimitsSingleton().get() - ) - - with self.lock: - if do_rate_limit: - self._update_remaining() - - # Delay if necessary. - while self.remaining_requests < 0.999: - time.sleep( - (1.0 - self.remaining_requests) - * (limit_requests / limit_interval) - ) - self._update_remaining() - - # Call the original function, "paying" for this call. - self.remaining_requests -= 1.0 - return self.fun(*args, **kwargs) - - # Musicbrainz library interface @@ -222,17 +122,16 @@ class MbInterface: BEETS_USERAGENT = "beets/{} (https://beets.io/)".format(beets.__version__) - def __init__(self, useragent=BEETS_USERAGENT): + def __init__(self, rate_limiter: RateLimiter, useragent=BEETS_USERAGENT): self.hostname = BASE_HOSTNAME self.https = True self.useragent = useragent - pass + self.rate_limiter = rate_limiter def set_hostname(self, hostname: str, https: bool): self.hostname = hostname self.https = https - @_RateLim def _lookup( self, entity_type: str, @@ -249,11 +148,13 @@ def _lookup( :return: The response as bytes :raises MbInterfaceError: if the request did not succeed """ - return self._send( - mbzr.MbzRequestLookup(self.useragent, entity_type, mbid, includes), - ) + with self.rate_limiter: + return self._send( + mbzr.MbzRequestLookup( + self.useragent, entity_type, mbid, includes + ), + ) - @_RateLim def _browse( self, lookup_entity_type: str, @@ -276,19 +177,19 @@ def _browse( :return: The response as bytes :raises MbInterfaceError: if the request did not succeed """ - return self._send( - mbzr.MbzRequestBrowse( - self.useragent, - linked_entities_type, - lookup_entity_type, - mbid, - includes, - ), - limit=limit, - offset=offset, - ) + with self.rate_limiter: + return self._send( + mbzr.MbzRequestBrowse( + self.useragent, + linked_entities_type, + lookup_entity_type, + mbid, + includes, + ), + limit=limit, + offset=offset, + ) - @_RateLim def _search( self, entity_type: str, @@ -313,11 +214,14 @@ def _search( # See: https://gitlab.com/mbzero/python-mbzero/-/issues/1 quoted_query = quote(query) - return self._send( - mbzr.MbzRequestSearch(self.useragent, entity_type, quoted_query), - limit=limit, - offset=offset, - ) + with self.rate_limiter: + return self._send( + mbzr.MbzRequestSearch( + self.useragent, entity_type, quoted_query + ), + limit=limit, + offset=offset, + ) def _send( self, @@ -421,7 +325,6 @@ def browse_recordings( """ return MbInterface._parse_and_clean_json( self._browse( - self, lookup_entity_type, mbid, "recording", @@ -446,7 +349,6 @@ def get_release_by_id( """ return MbInterface._parse_and_clean_json( self._lookup( - self, "release", mbid, includes, @@ -468,7 +370,6 @@ def get_recording_by_id( """ return MbInterface._parse_and_clean_json( self._lookup( - self, "recording", mbid, includes, @@ -491,7 +392,6 @@ def search_releases( """ return MbInterface._parse_and_clean_json( self._search( - self, "release", query=self._make_query(fields), limit=limit, @@ -515,7 +415,6 @@ def search_recordings( """ return MbInterface._parse_and_clean_json( self._search( - self, "recording", query=self._make_query(fields), limit=limit, @@ -852,17 +751,18 @@ def __init__(self): "extra_tags": [], }, ) - self.mb_interface = MbInterface() + self.mb_interface = MbInterface( + RateLimiter( + reqs_per_interval=self.config["ratelimit"].get(int), + interval_sec=self.config["ratelimit_interval"].as_number(), + ) + ) hostname = self.config["host"].as_str() https = self.config["https"].get(bool) # Only call set_hostname when a custom server is configured. Since # musicbrainz-ngs connects to musicbrainz.org with HTTPS by default if hostname != "musicbrainz.org": self.mb_interface.set_hostname(hostname, https) - _RateLimitsSingleton().set_rate_limit( - self.config["ratelimit_interval"].as_number(), - self.config["ratelimit"].get(int), - ) def track_info( self, diff --git a/test/util/test_rate_limiter.py b/test/util/test_rate_limiter.py new file mode 100644 index 0000000000..b56cb0c6fd --- /dev/null +++ b/test/util/test_rate_limiter.py @@ -0,0 +1,93 @@ +import time + +from beets.util.rate_limiter import RateLimiter + +# 10 reqs per 0.1 second +REQS_PER_INTERVAL = 10 +INTERVAL_SEC = 0.1 + +# Expected time to wait to be able to do one more request after being rate limited +WAIT_FOR_ONE_REQ = INTERVAL_SEC / REQS_PER_INTERVAL + + +def run_and_collect_delta_start_times(num_reqs: int) -> list[float]: + """Launch requests through the rate limiter and collect the durations between the + time before the first request and the starting time of each request. + + :param num_reqs: Number of requests to run + :return: A list of delta start times in seconds: non rate-limited ones should be + close to 0 + """ + rate_limiter = RateLimiter(REQS_PER_INTERVAL, INTERVAL_SEC) + + delta_start_times = [] + + start = time.time() + for _ in range(num_reqs): + with rate_limiter: + delta_start_times.append(time.time() - start) + + return delta_start_times + + +def test_all_reqs_in_one_interval(): + delta_start_times = run_and_collect_delta_start_times(REQS_PER_INTERVAL) + + for i in range(10): + assert delta_start_times[i] < WAIT_FOR_ONE_REQ, ( + f"request {i} should not have been rate-limited" + ) + + +def test_more_reqs_in_one_interval(): + delta_start_times = run_and_collect_delta_start_times(2 * REQS_PER_INTERVAL) + + # 20 reqs with rate-limitation of 10 reqs per 0.1s + # -> 10 reqs immediately, then 10*(1 req per 0.1s) + + for i in range(10): + assert delta_start_times[i] < WAIT_FOR_ONE_REQ, ( + f"request {i} should not have been rate-limited" + ) + + for i in range(10, len(delta_start_times)): + # Non rate-limited reqs are at interval 0 + # 1st rate-limited req is at interval 1 + # 2nd rate-limited req is at interval 2 + # etc. + expected_interval = i - 9 + expected_start_time = expected_interval * WAIT_FOR_ONE_REQ + + assert delta_start_times[i] >= expected_start_time, ( + f"request {i} has executed sooner than it should have" + ) + assert delta_start_times[i] < ( + expected_start_time + WAIT_FOR_ONE_REQ + ), f"request {i} has executed much later than it should have" + + +def test_reuse_after_no_requests(): + rate_limiter = RateLimiter(REQS_PER_INTERVAL, INTERVAL_SEC) + + # Use up all requests + start = time.time() + for _ in range(REQS_PER_INTERVAL): + with rate_limiter: + pass + end = time.time() + assert (end - start) < WAIT_FOR_ONE_REQ, ( + "requests should not have been rate-limited" + ) + + # Do no request for half an interval + time.sleep(INTERVAL_SEC / 2) + + # Now, we should be able to do half the REQS_PER_INTERVAL with no rate limitation + start = time.time() + for _ in range(REQS_PER_INTERVAL // 2): + with rate_limiter: + pass + end = time.time() + assert (end - start) < WAIT_FOR_ONE_REQ, ( + "requests should not have been rate-limited" + ) From 775dd8d939a18d6b0fe7fcea0fd594b72803bcc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Sat, 14 Jun 2025 20:45:56 +0000 Subject: [PATCH 11/15] Create a shared MbInterface instance This will allow other plugins to be able to use the `MbInterface` that is configured with the correct `host`, `https`, and rate limiting. All of that previously "kinda" worked because the `musicbrainzngs` library always stored the configuration in a global shared state. --- beetsplug/_mb_interface.py | 412 +++++++++++++++++++++++++++++++ beetsplug/musicbrainz.py | 399 +----------------------------- test/plugins/test_musicbrainz.py | 23 +- 3 files changed, 439 insertions(+), 395 deletions(-) create mode 100644 beetsplug/_mb_interface.py diff --git a/beetsplug/_mb_interface.py b/beetsplug/_mb_interface.py new file mode 100644 index 0000000000..ea64274ee6 --- /dev/null +++ b/beetsplug/_mb_interface.py @@ -0,0 +1,412 @@ +import json +import re +from typing import TYPE_CHECKING, Literal +from urllib.parse import quote + +import requests.exceptions # For mbzero exception handling +from mbzero import mbzerror +from mbzero import mbzrequest as mbzr + +import beets +from beets.util.rate_limiter import RateLimiter + +if TYPE_CHECKING: + from ._typing import JSONDict + + +class MbInterfaceError(Exception): + """Base class for exceptions raised by MbInterface""" + + pass + + +class MbInterfaceBadRequestError(MbInterfaceError): + """Exception raised when the request is ill-formed""" + + pass + + +class MbInterfaceUnauthorizedError(MbInterfaceError): + """Exception raised when the request does not have valid and sufficient + authentication for accessing the resource""" + + pass + + +class MbInterfaceNotFoundError(MbInterfaceError): + """Exception raised when an entity is not found""" + + pass + + +def _convert_mbzero_exception_to_local( + mbz_ex: mbzerror.MbzWebServiceError, +) -> MbInterfaceError: + """Convert the mbzero exception to a MbInterfaceError + + :param mbz_ex: The mbzero exception to convert + :return: The converted exception. Either the error could be converted into a + specific error, or an instance of the base class is returned. + """ + + # Upstream issue to have fine-grained exception handling: + # - https://gitlab.com/mbzero/python-mbzero/-/issues/2 + + # mbz_ex.message is not a str but the exception cause, while mbz_ex.cause is None... + if isinstance(mbz_ex.message, requests.exceptions.HTTPError): + http_error: requests.exceptions.HTTPError = mbz_ex.message + status_code = http_error.response.status_code + + if status_code == 400: + return MbInterfaceBadRequestError(mbz_ex) + elif status_code == 401: + return MbInterfaceUnauthorizedError(mbz_ex) + elif status_code == 404: + return MbInterfaceNotFoundError(mbz_ex) + + # Base exception if no specific one could be found + return MbInterfaceError(mbz_ex) + + +class MbInterface: + """An interface for sending requests using MusicBrainz API""" + + def __init__(self, hostname: str, https: bool, rate_limiter: RateLimiter): + self.hostname = hostname + self.https = https + self.rate_limiter = rate_limiter + self.useragent = "beets/{} (https://beets.io/)".format( + beets.__version__ + ) + + def _lookup( + self, + entity_type: str, + mbid: str, + includes: list[str], + ) -> bytes: + """Send a lookup request to the configured MusicBrainz API to get information + on a single entity + + :param entity_type: The type of entity to look up + :param mbid: The MusicBrainz ID of the entity to look up + :param includes: List of parameters to request more information to be included + about the entity + :return: The response as bytes + :raises MbInterfaceError: if the request did not succeed + """ + with self.rate_limiter: + return self._send( + mbzr.MbzRequestLookup( + self.useragent, entity_type, mbid, includes + ), + ) + + def _browse( + self, + lookup_entity_type: str, + mbid: str, + linked_entities_type: str, + includes: list[str] = [], + limit: int | None = None, + offset: int | None = None, + ) -> bytes: + """Send a browse request to the configured MusicBrainz API to get entities + linked to looked up one + + :param lookup_entity_type: The type of entity to look up + :param mbid: The MusicBrainz ID of the entity to look up + :param linked_entities_type: The type of linked entities to find + :param includes: List of parameters to request more information to be included + about the entity + :param limit: The number of entities that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The response as bytes + :raises MbInterfaceError: if the request did not succeed + """ + with self.rate_limiter: + return self._send( + mbzr.MbzRequestBrowse( + self.useragent, + linked_entities_type, + lookup_entity_type, + mbid, + includes, + ), + limit=limit, + offset=offset, + ) + + def _search( + self, + entity_type: str, + query: str, + limit: int | None = None, + offset: int | None = None, + **fields, + ) -> bytes: + """Send a search request to the configured MusicBrainz API to search entities + based on a query + + :param entity_type: The type of entity to look up + :param query: The query in the Lucene Search syntax + :param limit: The number of entities that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The response as bytes + :raises MbInterfaceError: if the request did not succeed + """ + + # mbzero does not properly handle queries with special characters. + # Quote the query beforehand to avoid this problem. + # See: https://gitlab.com/mbzero/python-mbzero/-/issues/1 + quoted_query = quote(query) + + with self.rate_limiter: + return self._send( + mbzr.MbzRequestSearch( + self.useragent, entity_type, quoted_query + ), + limit=limit, + offset=offset, + ) + + def _send( + self, + mbr: mbzr.MbzRequestLookup + | mbzr.MbzRequestSearch + | mbzr.MbzRequestBrowse, + limit: int | None = None, + offset: int | None = None, + ) -> bytes: + """Send the request + + :param mbr: The request object + :param limit: The number of entities that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The response as bytes + :raises MbInterfaceError: if the request did not succeed + """ + if self.hostname: + scheme = "https" if self.https else "http" + mbr.set_url(f"{scheme}://{self.hostname}/ws/2") + opts = {} + if limit: + opts["limit"] = limit + if offset: + opts["offset"] = offset + try: + return mbr.send(opts=opts) + except mbzerror.MbzWebServiceError as ex: + raise _convert_mbzero_exception_to_local(ex) + + def _make_query(self, fields: dict[str, str] = {}) -> str: + """Make a Lucene Query string from a dict of fields + + :param fields: Dict of field keys and values used to build the query. + Values will be properly escaped. + :return: The built Lucene Query string + """ + # Encode the query terms as a Lucene query string. + lucene_special = r'([+\-&|!(){}\[\]\^"~*?:\\\/])' + query_parts = [] + + for key, value in fields.items(): + # Escape Lucene's special characters. + value = re.sub(lucene_special, r"\\\1", value) + if value: + value = value.lower() # avoid AND / OR + query_parts.append(f"{key}:({value})") + full_query = " ".join(query_parts).strip() + + if not full_query: + raise ValueError("at least one query term is required") + + return full_query + + @staticmethod + def _remove_none_values(data): + """Iterate recursively over a Python object to remove all None values in + dicts + """ + if isinstance(data, dict): + return { + key: MbInterface._remove_none_values(value) + for key, value in data.items() + if value is not None + } + elif isinstance(data, list): + return [MbInterface._remove_none_values(item) for item in data] + else: + return data + + @staticmethod + def _parse_and_clean_json(data: bytes) -> "JSONDict": + """Parse the JSON data and remove all None values in dicts. + This is needed as the MusicBrainz JSON data contains None values instead of + simply not setting them in dictionaries. + This is also different from the their XML data which only contains filled + values. + + :param data: JSON data as bytes + """ + return MbInterface._remove_none_values(json.loads(data)) + + def browse_recordings( + self, + lookup_entity_type: Literal["artist", "collection", "release", "work"], + mbid: str, + includes: list[str] = [], + limit: int | None = None, + offset: int | None = None, + ) -> "JSONDict": + """Browse recordings linked to an entity + + :param lookup_entity_type: The type of entity whose recordings are to be browsed + :param mbid: The MusicBrainz ID of the entity + :param includes: List of parameters to request more information to be included + about the recordings + :param limit: The number of recordings that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed + """ + return MbInterface._parse_and_clean_json( + self._browse( + lookup_entity_type, + mbid, + "recording", + includes, + limit=limit, + offset=offset, + ) + ) + + def get_release_by_id( + self, + mbid: str, + includes: list[str] = [], + ) -> "JSONDict": + """Get a release from its ID + + :param mbid: The MusicBrainz ID of the release + :param includes: List of parameters to request more information to be included + about the release + :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed + """ + return MbInterface._parse_and_clean_json( + self._lookup( + "release", + mbid, + includes, + ) + ) + + def get_recording_by_id( + self, + mbid: str, + includes: list[str] = [], + ) -> "JSONDict": + """Get a recording from its ID + + :param mbid: The MusicBrainz ID of the entity + :param includes: List of parameters to request more information to be included + about the recording + :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed + """ + return MbInterface._parse_and_clean_json( + self._lookup( + "recording", + mbid, + includes, + ) + ) + + def search_releases( + self, + limit: int | None = None, + offset: int | None = None, + **fields: str, + ) -> "JSONDict": + """Search for releases using a query + + :param limit: The number of releases that should be returned + :param offset: Offset used for paging through more than one page of results + :param fields: Dict of fields composing the search query + :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed + """ + return MbInterface._parse_and_clean_json( + self._search( + "release", + query=self._make_query(fields), + limit=limit, + offset=offset, + ) + ) + + def search_recordings( + self, + limit: int | None = None, + offset: int | None = None, + **fields: str, + ) -> "JSONDict": + """Search for recordings using a query + + :param limit: The number of recordings that should be returned + :param offset: Offset used for paging through more than one page of results + :param fields: Dict of fields composing the search query + :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed + """ + return MbInterface._parse_and_clean_json( + self._search( + "recording", + query=self._make_query(fields), + limit=limit, + offset=offset, + ) + ) + + +class SharedMbInterface: + """Singleton holding a shared MbInterface. + This can be used to use the same configuration, rate limiting, etc. between + multiple plugins. + """ + + def __new__(cls): + """Create the singleton""" + if not hasattr(cls, "instance"): + cls.instance = super(SharedMbInterface, cls).__new__(cls) + return cls.instance + + def __init__(self): + mb_config = beets.config["musicbrainz"] + mb_config.add( + { + "host": "musicbrainz.org", + "https": False, + "ratelimit": 1, + "ratelimit_interval": 1, + } + ) + + hostname = mb_config["host"].as_str() + https = mb_config["https"].get(bool) + # Force https usage for default MusicBrainz server + if hostname == "musicbrainz.org": + https = True + + self.mb_interface = MbInterface( + hostname, + https, + RateLimiter( + reqs_per_interval=mb_config["ratelimit"].get(int), + interval_sec=mb_config["ratelimit_interval"].as_number(), + ), + ) + + def get(self) -> MbInterface: + return self.mb_interface diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index 1ebadd6a17..17aef1dee0 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -16,25 +16,25 @@ from __future__ import annotations -import json -import re import traceback from collections import Counter from functools import cached_property from itertools import product from typing import TYPE_CHECKING, Any -from urllib.parse import quote, urljoin - -import requests.exceptions # For mbzero exception handling -from mbzero import mbzerror -from mbzero import mbzrequest as mbzr +from urllib.parse import urljoin import beets import beets.autotag.hooks from beets import config, plugins, util from beets.plugins import BeetsPlugin from beets.util.id_extractors import extract_release_id -from beets.util.rate_limiter import RateLimiter + +from ._mb_interface import ( + MbInterface, + MbInterfaceError, + MbInterfaceNotFoundError, + SharedMbInterface, +) if TYPE_CHECKING: from collections.abc import Iterator, Sequence @@ -46,8 +46,7 @@ VARIOUS_ARTISTS_ID = "89ad4ac3-39f7-470e-963a-56509c546377" -BASE_HOSTNAME = "musicbrainz.org" -BASE_URL = f"https://{BASE_HOSTNAME}" +BASE_URL = "https://musicbrainz.org" SKIPPED_TRACKS = ["[data track]"] @@ -60,368 +59,6 @@ "year": "date", } -# Musicbrainz library interface - - -class MbInterfaceError(Exception): - """Base class for exceptions raised by MbInterface""" - - pass - - -class MbInterfaceBadRequestError(MbInterfaceError): - """Exception raised when the request is ill-formed""" - - pass - - -class MbInterfaceUnauthorizedError(MbInterfaceError): - """Exception raised when the request does not have valid and sufficient - authentication for accessing the resource""" - - pass - - -class MbInterfaceNotFoundError(MbInterfaceError): - """Exception raised when an entity is not found""" - - pass - - -def _convert_mbzero_exception_to_local( - mbz_ex: mbzerror.MbzWebServiceError, -) -> MbInterfaceError: - """Convert the mbzero exception to a MbInterfaceError - - :param mbz_ex: The mbzero exception to convert - :return: The converted exception. Either the error could be converted into a - specific error, or an instance of the base class is returned. - """ - - # Upstream issue to have fine-grained exception handling: - # - https://gitlab.com/mbzero/python-mbzero/-/issues/2 - - # mbz_ex.message is not a str but the exception cause, while mbz_ex.cause is None... - if isinstance(mbz_ex.message, requests.exceptions.HTTPError): - http_error: requests.exceptions.HTTPError = mbz_ex.message - status_code = http_error.response.status_code - - if status_code == 400: - return MbInterfaceBadRequestError(mbz_ex) - elif status_code == 401: - return MbInterfaceUnauthorizedError(mbz_ex) - elif status_code == 404: - return MbInterfaceNotFoundError(mbz_ex) - - # Base exception if no specific one could be found - return MbInterfaceError(mbz_ex) - - -class MbInterface: - """An interface for sending requests using MusicBrainz API""" - - BEETS_USERAGENT = "beets/{} (https://beets.io/)".format(beets.__version__) - - def __init__(self, rate_limiter: RateLimiter, useragent=BEETS_USERAGENT): - self.hostname = BASE_HOSTNAME - self.https = True - self.useragent = useragent - self.rate_limiter = rate_limiter - - def set_hostname(self, hostname: str, https: bool): - self.hostname = hostname - self.https = https - - def _lookup( - self, - entity_type: str, - mbid: str, - includes: list[str], - ) -> bytes: - """Send a lookup request to the configured MusicBrainz API to get information - on a single entity - - :param entity_type: The type of entity to look up - :param mbid: The MusicBrainz ID of the entity to look up - :param includes: List of parameters to request more information to be included - about the entity - :return: The response as bytes - :raises MbInterfaceError: if the request did not succeed - """ - with self.rate_limiter: - return self._send( - mbzr.MbzRequestLookup( - self.useragent, entity_type, mbid, includes - ), - ) - - def _browse( - self, - lookup_entity_type: str, - mbid: str, - linked_entities_type: str, - includes: list[str] = [], - limit: int | None = None, - offset: int | None = None, - ) -> bytes: - """Send a browse request to the configured MusicBrainz API to get entities - linked to looked up one - - :param lookup_entity_type: The type of entity to look up - :param mbid: The MusicBrainz ID of the entity to look up - :param linked_entities_type: The type of linked entities to find - :param includes: List of parameters to request more information to be included - about the entity - :param limit: The number of entities that should be returned - :param offset: Offset used for paging through more than one page of results - :return: The response as bytes - :raises MbInterfaceError: if the request did not succeed - """ - with self.rate_limiter: - return self._send( - mbzr.MbzRequestBrowse( - self.useragent, - linked_entities_type, - lookup_entity_type, - mbid, - includes, - ), - limit=limit, - offset=offset, - ) - - def _search( - self, - entity_type: str, - query: str, - limit: int | None = None, - offset: int | None = None, - **fields, - ) -> bytes: - """Send a search request to the configured MusicBrainz API to search entities - based on a query - - :param entity_type: The type of entity to look up - :param query: The query in the Lucene Search syntax - :param limit: The number of entities that should be returned - :param offset: Offset used for paging through more than one page of results - :return: The response as bytes - :raises MbInterfaceError: if the request did not succeed - """ - - # mbzero does not properly handle queries with special characters. - # Quote the query beforehand to avoid this problem. - # See: https://gitlab.com/mbzero/python-mbzero/-/issues/1 - quoted_query = quote(query) - - with self.rate_limiter: - return self._send( - mbzr.MbzRequestSearch( - self.useragent, entity_type, quoted_query - ), - limit=limit, - offset=offset, - ) - - def _send( - self, - mbr: mbzr.MbzRequestLookup - | mbzr.MbzRequestSearch - | mbzr.MbzRequestBrowse, - limit: int | None = None, - offset: int | None = None, - ) -> bytes: - """Send the request - - :param mbr: The request object - :param limit: The number of entities that should be returned - :param offset: Offset used for paging through more than one page of results - :return: The response as bytes - :raises MbInterfaceError: if the request did not succeed - """ - if self.hostname: - scheme = "https" if self.https else "http" - mbr.set_url(f"{scheme}://{self.hostname}/ws/2") - opts = {} - if limit: - opts["limit"] = limit - if offset: - opts["offset"] = offset - try: - return mbr.send(opts=opts) - except mbzerror.MbzWebServiceError as ex: - raise _convert_mbzero_exception_to_local(ex) - - def _make_query(self, fields: dict[str, str] = {}) -> str: - """Make a Lucene Query string from a dict of fields - - :param fields: Dict of field keys and values used to build the query. - Values will be properly escaped. - :return: The built Lucene Query string - """ - # Encode the query terms as a Lucene query string. - lucene_special = r'([+\-&|!(){}\[\]\^"~*?:\\\/])' - query_parts = [] - - for key, value in fields.items(): - # Escape Lucene's special characters. - value = re.sub(lucene_special, r"\\\1", value) - if value: - value = value.lower() # avoid AND / OR - query_parts.append(f"{key}:({value})") - full_query = " ".join(query_parts).strip() - - if not full_query: - raise ValueError("at least one query term is required") - - return full_query - - @staticmethod - def _remove_none_values(data): - """Iterate recursively over a Python object to remove all None values in - dicts - """ - if isinstance(data, dict): - return { - key: MbInterface._remove_none_values(value) - for key, value in data.items() - if value is not None - } - elif isinstance(data, list): - return [MbInterface._remove_none_values(item) for item in data] - else: - return data - - @staticmethod - def _parse_and_clean_json(data: bytes) -> JSONDict: - """Parse the JSON data and remove all None values in dicts. - This is needed as the MusicBrainz JSON data contains None values instead of - simply not setting them in dictionaries. - This is also different from the their XML data which only contains filled - values. - - :param data: JSON data as bytes - """ - return MbInterface._remove_none_values(json.loads(data)) - - def browse_recordings( - self, - lookup_entity_type: Literal["artist", "collection", "release", "work"], - mbid: str, - includes: list[str] = [], - limit: int | None = None, - offset: int | None = None, - ) -> JSONDict: - """Browse recordings linked to an entity - - :param lookup_entity_type: The type of entity whose recordings are to be browsed - :param mbid: The MusicBrainz ID of the entity - :param includes: List of parameters to request more information to be included - about the recordings - :param limit: The number of recordings that should be returned - :param offset: Offset used for paging through more than one page of results - :return: The JSON-decoded response as an object - :raises MbInterfaceError: if the request did not succeed - """ - return MbInterface._parse_and_clean_json( - self._browse( - lookup_entity_type, - mbid, - "recording", - includes, - limit=limit, - offset=offset, - ) - ) - - def get_release_by_id( - self, - mbid: str, - includes: list[str] = [], - ) -> JSONDict: - """Get a release from its ID - - :param mbid: The MusicBrainz ID of the release - :param includes: List of parameters to request more information to be included - about the release - :return: The JSON-decoded response as an object - :raises MbInterfaceError: if the request did not succeed - """ - return MbInterface._parse_and_clean_json( - self._lookup( - "release", - mbid, - includes, - ) - ) - - def get_recording_by_id( - self, - mbid: str, - includes: list[str] = [], - ) -> JSONDict: - """Get a recording from its ID - - :param mbid: The MusicBrainz ID of the entity - :param includes: List of parameters to request more information to be included - about the recording - :return: The JSON-decoded response as an object - :raises MbInterfaceError: if the request did not succeed - """ - return MbInterface._parse_and_clean_json( - self._lookup( - "recording", - mbid, - includes, - ) - ) - - def search_releases( - self, - limit: int | None = None, - offset: int | None = None, - **fields: str, - ) -> JSONDict: - """Search for releases using a query - - :param limit: The number of releases that should be returned - :param offset: Offset used for paging through more than one page of results - :param fields: Dict of fields composing the search query - :return: The JSON-decoded response as an object - :raises MbInterfaceError: if the request did not succeed - """ - return MbInterface._parse_and_clean_json( - self._search( - "release", - query=self._make_query(fields), - limit=limit, - offset=offset, - ) - ) - - def search_recordings( - self, - limit: int | None = None, - offset: int | None = None, - **fields: str, - ) -> JSONDict: - """Search for recordings using a query - - :param limit: The number of recordings that should be returned - :param offset: Offset used for paging through more than one page of results - :param fields: Dict of fields composing the search query - :return: The JSON-decoded response as an object - :raises MbInterfaceError: if the request did not succeed - """ - return MbInterface._parse_and_clean_json( - self._search( - "recording", - query=self._make_query(fields), - limit=limit, - offset=offset, - ) - ) - class MusicBrainzAPIError(util.HumanReadableError): """An error while talking to MusicBrainz. The `query` field is the @@ -735,10 +372,7 @@ def __init__(self): super().__init__() self.config.add( { - "host": "musicbrainz.org", - "https": False, - "ratelimit": 1, - "ratelimit_interval": 1, + # The rest of the config is defined in SharedMbInterface "searchlimit": 5, "genres": False, "external_ids": { @@ -751,18 +385,7 @@ def __init__(self): "extra_tags": [], }, ) - self.mb_interface = MbInterface( - RateLimiter( - reqs_per_interval=self.config["ratelimit"].get(int), - interval_sec=self.config["ratelimit_interval"].as_number(), - ) - ) - hostname = self.config["host"].as_str() - https = self.config["https"].get(bool) - # Only call set_hostname when a custom server is configured. Since - # musicbrainz-ngs connects to musicbrainz.org with HTTPS by default - if hostname != "musicbrainz.org": - self.mb_interface.set_hostname(hostname, https) + self.mb_interface = SharedMbInterface().get() def track_info( self, diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index 9d3a47f33f..88494caea2 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -835,7 +835,10 @@ def test_follow_pseudo_releases(self): "country": "COUNTRY", }, ] - with mock.patch("musicbrainz.MbInterface.get_release_by_id") as gp: + + with mock.patch( + "beetsplug._mb_interface.MbInterface.get_release_by_id" + ) as gp: gp.side_effect = side_effect album = self.mb.album_for_id("d2a6f856-b553-40a0-ac54-a321e8e2da02") assert album.country == "COUNTRY" @@ -878,7 +881,9 @@ def test_pseudo_releases_with_empty_links(self): }, ] - with mock.patch("musicbrainz.MbInterface.get_release_by_id") as gp: + with mock.patch( + "beetsplug._mb_interface.MbInterface.get_release_by_id" + ) as gp: gp.side_effect = side_effect album = self.mb.album_for_id("d2a6f856-b553-40a0-ac54-a321e8e2da02") assert album.country is None @@ -920,7 +925,9 @@ def test_pseudo_releases_without_links(self): }, ] - with mock.patch("musicbrainz.MbInterface.get_release_by_id") as gp: + with mock.patch( + "beetsplug._mb_interface.MbInterface.get_release_by_id" + ) as gp: gp.side_effect = side_effect album = self.mb.album_for_id("d2a6f856-b553-40a0-ac54-a321e8e2da02") assert album.country is None @@ -969,7 +976,9 @@ def test_pseudo_releases_with_unsupported_links(self): }, ] - with mock.patch("musicbrainz.MbInterface.get_release_by_id") as gp: + with mock.patch( + "beetsplug._mb_interface.MbInterface.get_release_by_id" + ) as gp: gp.side_effect = side_effect album = self.mb.album_for_id("d2a6f856-b553-40a0-ac54-a321e8e2da02") assert album.country is None @@ -1020,7 +1029,7 @@ def test_get_album_criteria( def test_item_candidates(self, monkeypatch, mb): monkeypatch.setattr( - "musicbrainz.MbInterface.search_recordings", + "beetsplug._mb_interface.MbInterface.search_recordings", lambda *_, **__: {"recordings": [self.RECORDING]}, ) @@ -1031,11 +1040,11 @@ def test_item_candidates(self, monkeypatch, mb): def test_candidates(self, monkeypatch, mb): monkeypatch.setattr( - "musicbrainz.MbInterface.search_releases", + "beetsplug._mb_interface.MbInterface.search_releases", lambda *_, **__: {"releases": [{"id": self.mbid}]}, ) monkeypatch.setattr( - "musicbrainz.MbInterface.get_release_by_id", + "beetsplug._mb_interface.MbInterface.get_release_by_id", lambda *_, **__: { "title": "hi", "id": self.mbid, From 57bad0737f82407d26184741e88fa7c81125dd40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Sun, 15 Jun 2025 07:48:16 +0000 Subject: [PATCH 12/15] Replace usage of `musicbrainzngs` in `listenbrainz` plugin with new `MbInterface` This migration is quite basic. There is one difference compared to before: when searching for recordings, the `track_name` was simply used as a non-field query and is now linked to the `recording` field. This should be more precise, but if this is not wanted, the `search_recordings` can be changed to also take a query instead of only fields. --- beetsplug/listenbrainz.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/beetsplug/listenbrainz.py b/beetsplug/listenbrainz.py index c579645db9..8670857cf9 100644 --- a/beetsplug/listenbrainz.py +++ b/beetsplug/listenbrainz.py @@ -2,13 +2,14 @@ import datetime -import musicbrainzngs import requests from beets import config, ui from beets.plugins import BeetsPlugin from beetsplug.lastimport import process_tracks +from ._mb_interface import SharedMbInterface + class ListenBrainzPlugin(BeetsPlugin): """A Beets plugin for interacting with ListenBrainz.""" @@ -22,6 +23,7 @@ def __init__(self): self.token = self.config["token"].get() self.username = self.config["username"].get() self.AUTH_HEADER = {"Authorization": f"Token {self.token}"} + self.mb_interface = SharedMbInterface().get() config["listenbrainz"]["token"].redact = True def commands(self): @@ -132,10 +134,9 @@ def get_tracks_from_listens(self, listens): def get_mb_recording_id(self, track): """Returns the MusicBrainz recording ID for a track.""" - resp = musicbrainzngs.search_recordings( - query=track["track_metadata"].get("track_name"), + resp = self.mb_interface.search_recordings( + recording=track["track_metadata"].get("track_name"), release=track["track_metadata"].get("release_name"), - strict=True, ) if resp.get("recording-count") == "1": return resp.get("recording-list")[0].get("id") @@ -210,7 +211,7 @@ def get_track_info(self, tracks): track_info = [] for track in tracks: identifier = track.get("identifier") - resp = musicbrainzngs.get_recording_by_id( + resp = self.mb_interface.get_recording_by_id( identifier, includes=["releases", "artist-credits"] ) recording = resp.get("recording") From 08895358d8f329a480deca0d5c248c024b940365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Sun, 15 Jun 2025 07:51:40 +0000 Subject: [PATCH 13/15] Replace usage of `musicbrainzngs` in `missing` plugin with new `MbInterface` This migration required adding a new `browse_release_groups` to the `MbInterface`, but this is trivial. --- beetsplug/_mb_interface.py | 31 +++++++++++++++++++++++++++++++ beetsplug/missing.py | 11 +++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/beetsplug/_mb_interface.py b/beetsplug/_mb_interface.py index ea64274ee6..14e21e0794 100644 --- a/beetsplug/_mb_interface.py +++ b/beetsplug/_mb_interface.py @@ -281,6 +281,37 @@ def browse_recordings( ) ) + def browse_release_groups( + self, + lookup_entity_type: Literal["artist", "collection", "release"], + mbid: str, + includes: list[str] = [], + limit: int | None = None, + offset: int | None = None, + ) -> "JSONDict": + """Browse release-groups linked to an entity + + :param lookup_entity_type: The type of entity whose release-groups are to be + browsed + :param mbid: The MusicBrainz ID of the entity + :param includes: List of parameters to request more information to be included + about the release-groups + :param limit: The number of release-groups that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The JSON-decoded response as an object + :raises mbzerror.MbzRequestError: if the request did not succeed + """ + return MbInterface._parse_and_clean_json( + self._browse( + lookup_entity_type, + mbid, + "release-group", + includes, + limit=limit, + offset=offset, + ) + ) + def get_release_by_id( self, mbid: str, diff --git a/beetsplug/missing.py b/beetsplug/missing.py index c4bbb83fda..8e5eb54a1b 100644 --- a/beetsplug/missing.py +++ b/beetsplug/missing.py @@ -18,8 +18,7 @@ from collections import defaultdict from collections.abc import Iterator -import musicbrainzngs -from musicbrainzngs.musicbrainz import MusicBrainzError +from mbzero import mbzerror from beets import config, plugins from beets.dbcore import types @@ -27,6 +26,8 @@ from beets.plugins import BeetsPlugin from beets.ui import Subcommand, decargs, print_ +from ._mb_interface import SharedMbInterface + MB_ARTIST_QUERY = r"mb_albumartistid::^\w{8}-\w{4}-\w{4}-\w{4}-\w{12}$" @@ -103,6 +104,8 @@ def __init__(self): } ) + self.mb_interface = SharedMbInterface().get() + self.album_template_fields["missing"] = _missing_count self._command = Subcommand("missing", help=__doc__, aliases=["miss"]) @@ -189,8 +192,8 @@ def _missing_albums(self, lib: Library, query: list[str]) -> None: calculating_total = self.config["total"].get() for (artist, artist_id), album_ids in album_ids_by_artist.items(): try: - resp = musicbrainzngs.browse_release_groups(artist=artist_id) - except MusicBrainzError as err: + resp = self.mb_interface.browse_release_groups(artist_id) + except mbzerror.MbzWebServiceError as err: self._log.info( "Couldn't fetch info for artist '{}' ({}) - '{}'", artist, From 14ef5d009108c2e6205e4c6d33c3c6f96c561d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Sun, 15 Jun 2025 08:08:15 +0000 Subject: [PATCH 14/15] Replace usage of `musicbrainzngs` in `parentwork` plugin with new `MbInterface` This migration required adding a new `get_work_by_id` to the `MbInterface`, but this is trivial. --- beetsplug/_mb_interface.py | 21 +++++++++++++++++++++ beetsplug/parentwork.py | 30 ++++++++++++++++++------------ test/plugins/test_parentwork.py | 3 ++- 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/beetsplug/_mb_interface.py b/beetsplug/_mb_interface.py index 14e21e0794..aaf3d5cdbf 100644 --- a/beetsplug/_mb_interface.py +++ b/beetsplug/_mb_interface.py @@ -354,6 +354,27 @@ def get_recording_by_id( ) ) + def get_work_by_id( + self, + mbid: str, + includes: list[str] = [], + ) -> "JSONDict": + """Get a work from its ID + + :param mbid: The MusicBrainz ID of the entity + :param includes: List of parameters to request more information to be included + about the work + :return: The JSON-decoded response as an object + :raises mbzerror.MbzRequestError: if the request did not succeed + """ + return MbInterface._parse_and_clean_json( + self._lookup( + "work", + mbid, + includes, + ) + ) + def search_releases( self, limit: int | None = None, diff --git a/beetsplug/parentwork.py b/beetsplug/parentwork.py index 463a455f57..2a95b2dbc5 100644 --- a/beetsplug/parentwork.py +++ b/beetsplug/parentwork.py @@ -16,17 +16,19 @@ and work composition date """ -import musicbrainzngs +from mbzero import mbzerror from beets import ui from beets.plugins import BeetsPlugin +from ._mb_interface import MbInterface, SharedMbInterface -def direct_parent_id(mb_workid, work_date=None): + +def direct_parent_id(mb_interface: MbInterface, mb_workid: str, work_date=None): """Given a Musicbrainz work id, find the id one of the works the work is part of and the first composition date it encounters. """ - work_info = musicbrainzngs.get_work_by_id( + work_info = mb_interface.get_work_by_id( mb_workid, includes=["work-rels", "artist-rels"] ) if "artist-relation-list" in work_info["work"] and work_date is None: @@ -46,25 +48,25 @@ def direct_parent_id(mb_workid, work_date=None): return None, work_date -def work_parent_id(mb_workid): +def work_parent_id(mb_interface: MbInterface, mb_workid: str): """Find the parent work id and composition date of a work given its id.""" work_date = None while True: - new_mb_workid, work_date = direct_parent_id(mb_workid, work_date) + new_mb_workid, work_date = direct_parent_id( + mb_interface, mb_workid, work_date + ) if not new_mb_workid: return mb_workid, work_date mb_workid = new_mb_workid return mb_workid, work_date -def find_parentwork_info(mb_workid): +def find_parentwork_info(mb_interface: MbInterface, mb_workid): """Get the MusicBrainz information dict about a parent work, including the artist relations, and the composition date for a work's parent work. """ - parent_id, work_date = work_parent_id(mb_workid) - work_info = musicbrainzngs.get_work_by_id( - parent_id, includes=["artist-rels"] - ) + parent_id, work_date = work_parent_id(mb_interface, mb_workid) + work_info = mb_interface.get_work_by_id(parent_id, includes=["artist-rels"]) return work_info, work_date @@ -79,6 +81,8 @@ def __init__(self): } ) + self.mb_interface = SharedMbInterface().get() + if self.config["auto"]: self.import_stages = [self.imported] @@ -192,8 +196,10 @@ def find_work(self, item, force, verbose): work_changed = item.parentwork_workid_current != item.mb_workid if force or not hasparent or work_changed: try: - work_info, work_date = find_parentwork_info(item.mb_workid) - except musicbrainzngs.musicbrainz.WebServiceError as e: + work_info, work_date = find_parentwork_info( + self.mb_interface, item.mb_workid + ) + except mbzerror.MbzWebServiceError as e: self._log.debug("error fetching work: {}", e) return parent_info = self.get_info(item, work_info) diff --git a/test/plugins/test_parentwork.py b/test/plugins/test_parentwork.py index 99267f6ffa..35705db091 100644 --- a/test/plugins/test_parentwork.py +++ b/test/plugins/test_parentwork.py @@ -160,7 +160,8 @@ def setUp(self): """Set up configuration""" super().setUp() self.patcher = patch( - "musicbrainzngs.get_work_by_id", side_effect=mock_workid_response + "beetsplug._mb_interface.MbInterface.get_work_by_id", + side_effect=mock_workid_response, ) self.patcher.start() From c1a73234524a02eb017fb2747539fef965f9e11e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Sun, 15 Jun 2025 11:23:26 +0000 Subject: [PATCH 15/15] WIP mbcollection --- beetsplug/_mb_interface.py | 83 ++++++++++++++++++++++++++++++++++++-- beetsplug/mbcollection.py | 31 ++++++++------ 2 files changed, 99 insertions(+), 15 deletions(-) diff --git a/beetsplug/_mb_interface.py b/beetsplug/_mb_interface.py index aaf3d5cdbf..9a3b5af2d4 100644 --- a/beetsplug/_mb_interface.py +++ b/beetsplug/_mb_interface.py @@ -4,10 +4,11 @@ from urllib.parse import quote import requests.exceptions # For mbzero exception handling -from mbzero import mbzerror +from mbzero import mbzauth, mbzerror from mbzero import mbzrequest as mbzr import beets +from beets.ui import UserError from beets.util.rate_limiter import RateLimiter if TYPE_CHECKING: @@ -71,13 +72,20 @@ def _convert_mbzero_exception_to_local( class MbInterface: """An interface for sending requests using MusicBrainz API""" - def __init__(self, hostname: str, https: bool, rate_limiter: RateLimiter): + def __init__( + self, + hostname: str, + https: bool, + rate_limiter: RateLimiter, + auth: mbzauth.MbzCredentials | None = None, + ): self.hostname = hostname self.https = https self.rate_limiter = rate_limiter self.useragent = "beets/{} (https://beets.io/)".format( beets.__version__ ) + self.auth = auth def _lookup( self, @@ -195,7 +203,7 @@ def _send( if offset: opts["offset"] = offset try: - return mbr.send(opts=opts) + return mbr.send(opts=opts, credentials=self.auth) except mbzerror.MbzWebServiceError as ex: raise _convert_mbzero_exception_to_local(ex) @@ -281,6 +289,37 @@ def browse_recordings( ) ) + def browse_release( + self, + lookup_entity_type: Literal["artist", "collection", "release"], + mbid: str, + includes: list[str] = [], + limit: int | None = None, + offset: int | None = None, + ) -> "JSONDict": + """Browse releases linked to an entity + + :param lookup_entity_type: The type of entity whose releases are to be + browsed + :param mbid: The MusicBrainz ID of the entity + :param includes: List of parameters to request more information to be included + about the releases + :param limit: The number of releases that should be returned + :param offset: Offset used for paging through more than one page of results + :return: The JSON-decoded response as an object + :raises mbzerror.MbzRequestError: if the request did not succeed + """ + return MbInterface._parse_and_clean_json( + self._browse( + lookup_entity_type, + mbid, + "release", + includes, + limit=limit, + offset=offset, + ) + ) + def browse_release_groups( self, lookup_entity_type: Literal["artist", "collection", "release"], @@ -375,6 +414,21 @@ def get_work_by_id( ) ) + def get_user_collections(self) -> "JSONDict": + """Get the collections of the authenticated user + + :return: The JSON-decoded response as an object + :raises MbInterfaceError: if the request did not succeed + """ + + assert self.auth is not None, ( + "get_user_collections requires an authenticated user" + ) + + return MbInterface._parse_and_clean_json( + self._lookup("collection", "", includes=[]) + ) + def search_releases( self, limit: int | None = None, @@ -444,6 +498,7 @@ def __init__(self): "ratelimit_interval": 1, } ) + mb_config["pass"].redact = True hostname = mb_config["host"].as_str() https = mb_config["https"].get(bool) @@ -451,6 +506,15 @@ def __init__(self): if hostname == "musicbrainz.org": https = True + # Set the auth if the config is set + if mb_config["user"].exists() and mb_config["pass"].exists(): + auth = mbzauth.MbzCredentials() + auth.auth_set( + mb_config["user"].as_str(), mb_config["pass"].as_str() + ) + else: + auth = None + self.mb_interface = MbInterface( hostname, https, @@ -458,7 +522,20 @@ def __init__(self): reqs_per_interval=mb_config["ratelimit"].get(int), interval_sec=mb_config["ratelimit_interval"].as_number(), ), + auth, ) + def require_auth_for_plugin(self, plugin_name: str) -> "SharedMbInterface": + """Raise an error if the authentication has not been configured + + :param plugin_name: Name of the plugin that requires authentication to work + """ + if self.mb_interface.auth is None: + raise UserError( + f"MusicBrainz authentication is required for plugin {plugin_name}: " + "musicbrainz.user and musicbrainz.pass need to be set in configuration" + ) + return self + def get(self) -> MbInterface: return self.mb_interface diff --git a/beetsplug/mbcollection.py b/beetsplug/mbcollection.py index 1c010bf504..ca981ae3e4 100644 --- a/beetsplug/mbcollection.py +++ b/beetsplug/mbcollection.py @@ -17,10 +17,16 @@ import musicbrainzngs -from beets import config, ui +from beets import ui from beets.plugins import BeetsPlugin from beets.ui import Subcommand +from ._mb_interface import ( + MbInterfaceError, + MbInterfaceUnauthorizedError, + SharedMbInterface, +) + SUBMISSION_CHUNK_SIZE = 200 FETCH_CHUNK_SIZE = 100 UUID_REGEX = r"^[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}$" @@ -30,12 +36,10 @@ def mb_call(func, *args, **kwargs): """Call a MusicBrainz API function and catch exceptions.""" try: return func(*args, **kwargs) - except musicbrainzngs.AuthenticationError: + except MbInterfaceUnauthorizedError: raise ui.UserError("authentication with MusicBrainz failed") - except (musicbrainzngs.ResponseError, musicbrainzngs.NetworkError) as exc: + except MbInterfaceError as exc: raise ui.UserError(f"MusicBrainz API error: {exc}") - except musicbrainzngs.UsageError: - raise ui.UserError("MusicBrainz credentials missing") def submit_albums(collection_id, release_ids): @@ -44,17 +48,14 @@ def submit_albums(collection_id, release_ids): """ for i in range(0, len(release_ids), SUBMISSION_CHUNK_SIZE): chunk = release_ids[i : i + SUBMISSION_CHUNK_SIZE] + # TODO: mbzero does not support PUT requests... + # - https://gitlab.com/mbzero/python-mbzero/-/issues/3 mb_call(musicbrainzngs.add_releases_to_collection, collection_id, chunk) class MusicBrainzCollectionPlugin(BeetsPlugin): def __init__(self): super().__init__() - config["musicbrainz"]["pass"].redact = True - musicbrainzngs.auth( - config["musicbrainz"]["user"].as_str(), - config["musicbrainz"]["pass"].as_str(), - ) self.config.add( { "auto": False, @@ -62,11 +63,14 @@ def __init__(self): "remove": False, } ) + self.mb_interface = ( + SharedMbInterface().require_auth_for_plugin(self.name).get() + ) if self.config["auto"]: self.import_stages = [self.imported] def _get_collection(self): - collections = mb_call(musicbrainzngs.get_collections) + collections = mb_call(self.mb_interface.get_user_collections) if not collections["collection-list"]: raise ui.UserError("no collections exist for user") @@ -90,7 +94,8 @@ def _get_collection(self): def _get_albums_in_collection(self, id): def _fetch(offset): res = mb_call( - musicbrainzngs.get_releases_in_collection, + self.mb_interface.browse_release, + "collection", id, limit=FETCH_CHUNK_SIZE, offset=offset, @@ -124,6 +129,8 @@ def remove_missing(self, collection_id, lib_albums): remove_me = list(set(albums_in_collection) - lib_ids) for i in range(0, len(remove_me), FETCH_CHUNK_SIZE): chunk = remove_me[i : i + FETCH_CHUNK_SIZE] + # TODO: mbzero does not support DELETE requests... + # - https://gitlab.com/mbzero/python-mbzero/-/issues/3 mb_call( musicbrainzngs.remove_releases_from_collection, collection_id,