diff --git a/openml/__init__.py b/openml/__init__.py
index 9a457c146..47bc86b4d 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -35,6 +35,7 @@
     utils,
 )
 from .__version__ import __version__
+from ._api import _backend
 from .datasets import OpenMLDataFeature, OpenMLDataset
 from .evaluations import OpenMLEvaluation
 from .flows import OpenMLFlow
@@ -116,6 +117,7 @@ def populate_cache(
     "OpenMLTask",
     "__version__",
     "_api_calls",
+    "_backend",
     "config",
     "datasets",
     "evaluations",
diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py
new file mode 100644
index 000000000..7766016d1
--- /dev/null
+++ b/openml/_api/__init__.py
@@ -0,0 +1,85 @@
+from .clients import (
+    HTTPCache,
+    HTTPClient,
+    MinIOClient,
+)
+from .resources import (
+    API_REGISTRY,
+    DatasetAPI,
+    DatasetV1API,
+    DatasetV2API,
+    EstimationProcedureAPI,
+    EstimationProcedureV1API,
+    EstimationProcedureV2API,
+    EvaluationAPI,
+    EvaluationMeasureAPI,
+    EvaluationMeasureV1API,
+    EvaluationMeasureV2API,
+    EvaluationV1API,
+    EvaluationV2API,
+    FallbackProxy,
+    FlowAPI,
+    FlowV1API,
+    FlowV2API,
+    ResourceAPI,
+    ResourceV1API,
+    ResourceV2API,
+    RunAPI,
+    RunV1API,
+    RunV2API,
+    SetupAPI,
+    SetupV1API,
+    SetupV2API,
+    StudyAPI,
+    StudyV1API,
+    StudyV2API,
+    TaskAPI,
+    TaskV1API,
+    TaskV2API,
+)
+from .setup import (
+    APIBackend,
+    APIBackendBuilder,
+    _backend,
+)
+
+__all__ = [
+    "API_REGISTRY",
+    "APIBackend",
+    "APIBackendBuilder",
+    "DatasetAPI",
+    "DatasetV1API",
+    "DatasetV2API",
+    "EstimationProcedureAPI",
+    "EstimationProcedureV1API",
+    "EstimationProcedureV2API",
+    "EvaluationAPI",
+    "EvaluationMeasureAPI",
+    "EvaluationMeasureV1API",
+    "EvaluationMeasureV2API",
+    "EvaluationV1API",
+    "EvaluationV2API",
+    "FallbackProxy",
+    "FlowAPI",
+    "FlowV1API",
+    "FlowV2API",
+    "HTTPCache",
+    "HTTPClient",
+    "MinIOClient",
+    "ResourceAPI",
+    "ResourceV1API",
+    "ResourceV2API",
+    "RunAPI",
+    "RunV1API",
+    "RunV2API",
+    "SetupAPI",
+    "SetupV1API",
+    "SetupV2API",
+    "StudyAPI",
+    "StudyV1API",
+    "StudyV2API",
+    "TaskAPI",
+    "TaskV1API",
+    "TaskV2API",
+    "_backend",
+]
diff --git a/openml/_api/clients/__init__.py b/openml/_api/clients/__init__.py
new file mode 100644
index 000000000..42f11fbcf
--- /dev/null
+++ b/openml/_api/clients/__init__.py
@@ -0,0 +1,8 @@
+from .http import HTTPCache, HTTPClient
+from .minio import MinIOClient
+
+__all__ = [
+    "HTTPCache",
+    "HTTPClient",
+    "MinIOClient",
+]
diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py
new file mode 100644
index 000000000..08db3317b
--- /dev/null
+++ b/openml/_api/clients/http.py
@@ -0,0 +1,811 @@
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import math
+import random
+import time
+import xml
+from collections.abc import Callable, Mapping
+from pathlib import Path
+from typing import Any, cast
+from urllib.parse import urlencode, urljoin, urlparse
+
+import requests
+import xmltodict
+from requests import Response
+
+import openml
+from openml.enums import APIVersion, RetryPolicy
+from openml.exceptions import (
+    OpenMLAuthenticationError,
+    OpenMLHashException,
+    OpenMLServerError,
+    OpenMLServerException,
+    OpenMLServerNoResult,
+)
+
+
+class HTTPCache:
+    """
+    Filesystem-based cache for HTTP responses.
+
+    This class stores HTTP responses on disk using a structured directory layout
+    derived from the request URL and parameters. Each cached response consists of
+    three files: metadata (``meta.json``), headers (``headers.json``), and the raw
+    body (``body.bin``).
+
+    Notes
+    -----
+    The cache key is derived from the URL (domain and path components) and query
+    parameters, excluding the ``api_key`` parameter.
+    """
+
+    @property
+    def path(self) -> Path:
+        return Path(openml.config.get_cache_directory())
+
+    def get_key(self, url: str, params: dict[str, Any]) -> str:
+        """
+        Generate a filesystem-safe cache key for a request.
+
+        The key is constructed from the reversed domain components, URL path
+        segments, and URL-encoded query parameters (excluding ``api_key``).
+
+        Parameters
+        ----------
+        url : str
+            The full request URL.
+        params : dict of str to Any
+            Query parameters associated with the request.
+
+        Returns
+        -------
+        str
+            A relative path string representing the cache key.
+        """
+        parsed_url = urlparse(url)
+        netloc_parts = parsed_url.netloc.split(".")[::-1]
+        path_parts = parsed_url.path.strip("/").split("/")
+
+        filtered_params = {k: v for k, v in params.items() if k != "api_key"}
+        params_part = [urlencode(filtered_params)] if filtered_params else []
+
+        return str(Path(*netloc_parts, *path_parts, *params_part))
+
+    def _key_to_path(self, key: str) -> Path:
+        """
+        Convert a cache key into an absolute filesystem path.
+
+        Parameters
+        ----------
+        key : str
+            Cache key as returned by :meth:`get_key`.
+
+        Returns
+        -------
+        pathlib.Path
+            Absolute path corresponding to the cache entry.
+        """
+        return self.path.joinpath(key)
+
+    def load(self, key: str) -> Response:
+        """
+        Load a cached HTTP response from disk.
+
+        Parameters
+        ----------
+        key : str
+            Cache key identifying the stored response.
+
+        Returns
+        -------
+        requests.Response
+            Reconstructed response object with status code, headers, body, and metadata.
+
+        Raises
+        ------
+        FileNotFoundError
+            If the cache entry or required files are missing.
+        ValueError
+            If required metadata is missing or malformed.
+        """
+        path = self._key_to_path(key)
+
+        if not path.exists():
+            raise FileNotFoundError(f"Cache entry not found: {path}")
+
+        meta_path = path / "meta.json"
+        headers_path = path / "headers.json"
+        body_path = path / "body.bin"
+
+        if not (meta_path.exists() and headers_path.exists() and body_path.exists()):
+            raise FileNotFoundError(f"Incomplete cache at {path}")
+
+        with meta_path.open("r", encoding="utf-8") as f:
+            meta = json.load(f)
+
+        with headers_path.open("r", encoding="utf-8") as f:
+            headers = json.load(f)
+
+        body = body_path.read_bytes()
+
+        response = Response()
+        response.status_code = meta["status_code"]
+        response.url = meta["url"]
+        response.reason = meta["reason"]
+        response.headers = headers
+        response._content = body
+        response.encoding = meta["encoding"]
+
+        return response
+
+    def save(self, key: str, response: Response) -> None:
+        """
+        Persist an HTTP response to disk.
+
+        Parameters
+        ----------
+        key : str
+            Cache key identifying where to store the response.
+        response : requests.Response
+            Response object to cache.
+
+        Notes
+        -----
+        The response body is stored as binary data. Headers and metadata
+        (status code, URL, reason, encoding, elapsed time, request info, and
+        creation timestamp) are stored as JSON.
+        """
+        path = self._key_to_path(key)
+        path.mkdir(parents=True, exist_ok=True)
+
+        (path / "body.bin").write_bytes(response.content)
+
+        with (path / "headers.json").open("w", encoding="utf-8") as f:
+            json.dump(dict(response.headers), f)
+
+        meta = {
+            "status_code": response.status_code,
+            "url": response.url,
+            "reason": response.reason,
+            "encoding": response.encoding,
+            "created_at": time.time(),
+            "request": {
+                "method": response.request.method if response.request else None,
+                "url": response.request.url if response.request else None,
+                "headers": dict(response.request.headers) if response.request else None,
+                "body": response.request.body if response.request else None,
+            },
+        }
+
+        with (path / "meta.json").open("w", encoding="utf-8") as f:
+            json.dump(meta, f)
+
+
+class HTTPClient:
+    """
+    HTTP client for interacting with the OpenML API.
+
+    This client supports configurable retry policies, optional filesystem
+    caching, API key authentication, and response validation including
+    checksum verification.
+
+    Parameters
+    ----------
+    api_version : APIVersion
+        Backend API Version.
+    """
+
+    def __init__(
+        self,
+        *,
+        api_version: APIVersion,
+    ) -> None:
+        self.api_version = api_version
+
+        self.cache = HTTPCache()
+
+    @property
+    def server(self) -> str:
+        server = openml.config.servers[self.api_version]["server"]
+        if server is None:
+            servers_repr = {k.value: v for k, v in openml.config.servers.items()}
+            raise ValueError(
+                f'server found to be None for api_version="{self.api_version}" in {servers_repr}'
+            )
+        return cast("str", server)
+
+    @property
+    def api_key(self) -> str | None:
+        return cast("str | None", openml.config.servers[self.api_version]["apikey"])
+
+    @property
+    def retries(self) -> int:
+        return cast("int", openml.config.connection_n_retries)
+
+    @property
+    def retry_policy(self) -> RetryPolicy:
+        return RetryPolicy.HUMAN if openml.config.retry_policy == "human" else RetryPolicy.ROBOT
+
+    @property
+    def retry_func(self) -> Callable:
+        return self._human_delay if self.retry_policy == RetryPolicy.HUMAN else self._robot_delay
+
+    def _robot_delay(self, n: int) -> float:
+        """
+        Compute delay for automated retry policy.
+
+        Parameters
+        ----------
+        n : int
+            Current retry attempt number (1-based).
+
+        Returns
+        -------
+        float
+            Number of seconds to wait before the next retry.
+
+        Notes
+        -----
+        Uses a sigmoid-based growth curve with Gaussian noise to gradually
+        increase waiting time.
+        """
+        wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60
+        variation = random.gauss(0, wait / 10)
+        return max(1.0, wait + variation)
+
+    def _human_delay(self, n: int) -> float:
+        """
+        Compute delay for human-like retry policy.
+
+        Parameters
+        ----------
+        n : int
+            Current retry attempt number (1-based).
+
+        Returns
+        -------
+        float
+            Number of seconds to wait before the next retry.
+        """
+        return max(1.0, n)
+
+    def _parse_exception_response(
+        self,
+        response: Response,
+    ) -> tuple[int | None, str]:
+        """
+        Parse an error response returned by the server.
+
+        Parameters
+        ----------
+        response : requests.Response
+            HTTP response containing error details in JSON or XML format.
+
+        Returns
+        -------
+        tuple of (int or None, str)
+            Parsed error code and combined error message. The code may be
+            ``None`` if unavailable.
+        """
+        content_type = response.headers.get("Content-Type", "").lower()
+
+        if "application/json" in content_type:
+            server_exception = response.json()
+            server_error = server_exception["detail"]
+            code = server_error.get("code")
+            message = server_error.get("message")
+            additional_information = server_error.get("additional_information")
+        else:
+            server_exception = xmltodict.parse(response.text)
+            server_error = server_exception["oml:error"]
+            code = server_error.get("oml:code")
+            message = server_error.get("oml:message")
+            additional_information = server_error.get("oml:additional_information")
+
+        if code is not None:
+            code = int(code)
+
+        if message and additional_information:
+            full_message = f"{message} - {additional_information}"
+        else:
+            full_message = message or additional_information or ""
+
+        return code, full_message
+
+    def _raise_code_specific_error(
+        self,
+        code: int,
+        message: str,
+        url: str,
+        files: Mapping[str, Any] | None,
+    ) -> None:
+        """
+        Raise specialized exceptions based on OpenML error codes.
+
+        Parameters
+        ----------
+        code : int
+            Server-provided error code.
+        message : str
+            Parsed error message.
+        url : str
+            Request URL associated with the error.
+        files : Mapping of str to Any or None
+            Files sent with the request, if any.
+
+        Raises
+        ------
+        OpenMLServerNoResult
+            If the error indicates a missing resource.
+        OpenMLNotAuthorizedError
+            If authentication is required or invalid.
+        OpenMLServerException
+            For other server-side errors (except retryable database errors).
+        """
+        if code in [111, 372, 512, 500, 482, 542, 674]:
+            # 512 for runs, 372 for datasets, 500 for flows
+            # 482 for tasks, 542 for evaluations, 674 for setups
+            # 111 for dataset descriptions
+            raise OpenMLServerNoResult(code=code, message=message, url=url)
+
+        # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
+        if code == 163 and files is not None and "description" in files:
+            # file_elements['description'] is the XML file description of the flow
+            message = f"\n{files['description']}\n{message}"
+
+        # Propagate all server errors to the calling functions, except
+        # for 107 which represents a database connection error.
+        # These are typically caused by high server load,
+        # which means trying again might resolve the issue.
+        # DATABASE_CONNECTION_ERRCODE
+        if code != 107:
+            raise OpenMLServerException(code=code, message=message, url=url)
+
+    def _validate_response(
+        self,
+        method: str,
+        url: str,
+        files: Mapping[str, Any] | None,
+        response: Response,
+    ) -> Exception | None:
+        """
+        Validate an HTTP response and determine whether to retry.
+
+        Parameters
+        ----------
+        method : str
+            HTTP method used for the request.
+        url : str
+            Full request URL.
+        files : Mapping of str to Any or None
+            Files sent with the request, if any.
+        response : requests.Response
+            Received HTTP response.
+
+        Returns
+        -------
+        Exception or None
+            ``None`` if the response is valid. Otherwise, an exception
+            indicating the error to raise or retry.
+
+        Raises
+        ------
+        OpenMLServerError
+            For unexpected server errors or malformed responses.
+        """
+        if (
+            "Content-Encoding" not in response.headers
+            or response.headers["Content-Encoding"] != "gzip"
+        ):
+            logging.warning(f"Received uncompressed content from OpenML for {url}.")
+
+        if response.status_code == 200:
+            return None
+
+        if response.status_code == requests.codes.URI_TOO_LONG:
+            raise OpenMLServerError(f"URI too long! ({url})")
+
+        exception: Exception | None = None
+        code: int | None = None
+        message: str = ""
+
+        try:
+            code, message = self._parse_exception_response(response)
+
+        except (requests.exceptions.JSONDecodeError, xml.parsers.expat.ExpatError) as e:
+            if method != "GET":
+                extra = f"Status code: {response.status_code}\n{response.text}"
+                raise OpenMLServerError(
+                    f"Unexpected server error when calling {url}. Please contact the "
+                    f"developers!\n{extra}"
+                ) from e
+
+            exception = e
+
+        except Exception as e:
+            # If we failed to parse it out,
+            # then something has gone wrong in the body we have sent back
+            # from the server and there is little extra information we can capture.
+            raise OpenMLServerError(
+                f"Unexpected server error when calling {url}. Please contact the developers!\n"
+                f"Status code: {response.status_code}\n{response.text}",
+            ) from e
+
+        if code is not None:
+            self._raise_code_specific_error(
+                code=code,
+                message=message,
+                url=url,
+                files=files,
+            )
+
+        if exception is None:
+            exception = OpenMLServerException(code=code, message=message, url=url)
+
+        return exception
+
+    def __request(  # noqa: PLR0913
+        self,
+        session: requests.Session,
+        method: str,
+        url: str,
+        params: Mapping[str, Any],
+        data: Mapping[str, Any],
+        headers: Mapping[str, str],
+        files: Mapping[str, Any] | None,
+        **request_kwargs: Any,
+    ) -> tuple[Response | None, Exception | None]:
+        """
+        Execute a single HTTP request attempt.
+
+        Parameters
+        ----------
+        session : requests.Session
+            Active session used to send the request.
+        method : str
+            HTTP method (e.g., ``GET``, ``POST``).
+        url : str
+            Full request URL.
+        params : Mapping of str to Any
+            Query parameters.
+        data : Mapping of str to Any
+            Request body data.
+        headers : Mapping of str to str
+            HTTP headers.
+        files : Mapping of str to Any or None
+            Files to upload.
+        **request_kwargs : Any
+            Additional arguments forwarded to ``requests.Session.request``.
+
+        Returns
+        -------
+        tuple of (requests.Response or None, Exception or None)
+            Response and potential retry exception.
+        """
+        exception: Exception | None = None
+        response: Response | None = None
+
+        try:
+            response = session.request(
+                method=method,
+                url=url,
+                params=params,
+                data=data,
+                headers=headers,
+                files=files,
+                **request_kwargs,
+            )
+        except (
+            requests.exceptions.ChunkedEncodingError,
+            requests.exceptions.ConnectionError,
+            requests.exceptions.SSLError,
+        ) as e:
+            exception = e
+
+        if response is not None:
+            exception = self._validate_response(
+                method=method,
+                url=url,
+                files=files,
+                response=response,
+            )
+
+        return response, exception
+
+    def _request(  # noqa: PLR0913, C901
+        self,
+        method: str,
+        path: str,
+        *,
+        enable_cache: bool = False,
+        refresh_cache: bool = False,
+        use_api_key: bool = False,
+        md5_checksum: str | None = None,
+        **request_kwargs: Any,
+    ) -> Response:
+        """
+        Send an HTTP request with retry, caching, and validation support.
+
+        Parameters
+        ----------
+        method : str
+            HTTP method to use.
+        path : str
+            API path relative to the base URL.
+        enable_cache : bool, optional
+            Whether to load/store response from cache.
+        refresh_cache : bool, optional
+            Only used when `enable_cache=True`. If True, ignore any existing
+            cached response and overwrite it with a fresh one.
+        use_api_key : bool, optional
+            Whether to include the API key in query parameters.
+        md5_checksum : str or None, optional
+            Expected MD5 checksum of the response body.
+        **request_kwargs : Any
+            Additional arguments passed to the underlying request.
+
+        Returns
+        -------
+        requests.Response
+            Final validated response.
+
+        Raises
+        ------
+        Exception
+            Propagates network, validation, or server exceptions after retries.
+        OpenMLHashException
+            If checksum verification fails.
+        """
+        url = urljoin(self.server, path)
+        retries = max(1, self.retries)
+
+        params = request_kwargs.pop("params", {}).copy()
+        data = request_kwargs.pop("data", {}).copy()
+
+        if use_api_key:
+            if self.api_key is None:
+                raise OpenMLAuthenticationError(
+                    message=(
+                        f"The API call {url} requires authentication via an API key. "
+                        "Please configure OpenML-Python to use your API "
+                        "as described in this example: "
+                        "https://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
+                    )
+                )
+            params["api_key"] = self.api_key
+
+        if method.upper() in {"POST", "PUT", "PATCH"}:
+            data = {**params, **data}
+            params = {}
+
+        # prepare headers
+        headers = request_kwargs.pop("headers", {}).copy()
+        headers.update(openml.config._HEADERS)
+
+        files = request_kwargs.pop("files", None)
+
+        if enable_cache and not refresh_cache:
+            cache_key = self.cache.get_key(url, params)
+            try:
+                return self.cache.load(cache_key)
+            except FileNotFoundError:
+                pass  # cache miss, continue
+            except Exception:
+                raise  # propagate unexpected cache errors
+
+        with requests.Session() as session:
+            for retry_counter in range(1, retries + 1):
+                response, exception = self.__request(
+                    session=session,
+                    method=method,
+                    url=url,
+                    params=params,
+                    data=data,
+                    headers=headers,
+                    files=files,
+                    **request_kwargs,
+                )
+
+                # executed successfully
+                if exception is None:
+                    break
+                # tries completed
+                if retry_counter >= retries:
+                    raise exception
+
+                delay = self.retry_func(retry_counter)
+                time.sleep(delay)
+
+        # response is guaranteed to be not `None`
+        # otherwise an exception would have been raised before
+        response = cast("Response", response)
+
+        if md5_checksum is not None:
+            self._verify_checksum(response, md5_checksum)
+
+        if enable_cache:
+            cache_key = self.cache.get_key(url, params)
+            self.cache.save(cache_key, response)
+
+        return response
+
+    def _verify_checksum(self, response: Response, md5_checksum: str) -> None:
+        """
+        Verify MD5 checksum of a response body.
+
+        Parameters
+        ----------
+        response : requests.Response
+            HTTP response whose content should be verified.
+        md5_checksum : str
+            Expected hexadecimal MD5 checksum.
+
+        Raises
+        ------
+        OpenMLHashException
+            If the computed checksum does not match the expected value.
+        """
+        # ruff sees hashlib.md5 as insecure
+        actual = hashlib.md5(response.content).hexdigest()  # noqa: S324
+        if actual != md5_checksum:
+            raise OpenMLHashException(
+                f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} "
+                f"when downloading {response.url}.",
+            )
+
+    def get(
+        self,
+        path: str,
+        *,
+        enable_cache: bool = False,
+        refresh_cache: bool = False,
+        use_api_key: bool = False,
+        md5_checksum: str | None = None,
+        **request_kwargs: Any,
+    ) -> Response:
+        """
+        Send a GET request.
+
+        Parameters
+        ----------
+        path : str
+            API path relative to the base URL.
+        enable_cache : bool, optional
+            Whether to use the response cache.
+        refresh_cache : bool, optional
+            Whether to ignore existing cached entries.
+        use_api_key : bool, optional
+            Whether to include the API key.
+        md5_checksum : str or None, optional
+            Expected MD5 checksum for response validation.
+        **request_kwargs : Any
+            Additional request arguments.
+
+        Returns
+        -------
+        requests.Response
+            HTTP response.
+        """
+        return self._request(
+            method="GET",
+            path=path,
+            enable_cache=enable_cache,
+            refresh_cache=refresh_cache,
+            use_api_key=use_api_key,
+            md5_checksum=md5_checksum,
+            **request_kwargs,
+        )
+
+    def post(
+        self,
+        path: str,
+        *,
+        use_api_key: bool = True,
+        **request_kwargs: Any,
+    ) -> Response:
+        """
+        Send a POST request.
+
+        Parameters
+        ----------
+        path : str
+            API path relative to the base URL.
+        use_api_key : bool, optional
+            Whether to include the API key.
+        **request_kwargs : Any
+            Additional request arguments.
+
+        Returns
+        -------
+        requests.Response
+            HTTP response.
+        """
+        return self._request(
+            method="POST",
+            path=path,
+            enable_cache=False,
+            use_api_key=use_api_key,
+            **request_kwargs,
+        )
+
+    def delete(
+        self,
+        path: str,
+        **request_kwargs: Any,
+    ) -> Response:
+        """
+        Send a DELETE request.
+
+        Parameters
+        ----------
+        path : str
+            API path relative to the base URL.
+        **request_kwargs : Any
+            Additional request arguments.
+
+        Returns
+        -------
+        requests.Response
+            HTTP response.
+        """
+        return self._request(
+            method="DELETE",
+            path=path,
+            enable_cache=False,
+            use_api_key=True,
+            **request_kwargs,
+        )
+
+    def download(
+        self,
+        url: str,
+        handler: Callable[[Response, Path, str], None] | None = None,
+        encoding: str = "utf-8",
+        file_name: str = "response.txt",
+        md5_checksum: str | None = None,
+    ) -> Path:
+        """
+        Download a resource and store it in the cache directory.
+
+        Parameters
+        ----------
+        url : str
+            Absolute URL of the resource to download.
+        handler : callable or None, optional
+            Custom handler function accepting ``(response, path, encoding)``
+            and returning a ``pathlib.Path``.
+        encoding : str, optional
+            Text encoding used when writing the response body.
+        file_name : str, optional
+            Name of the saved file.
+        md5_checksum : str or None, optional
+            Expected MD5 checksum for integrity verification.
+
+        Returns
+        -------
+        pathlib.Path
+            Path to the downloaded file.
+
+        Raises
+        ------
+        OpenMLHashException
+            If checksum verification fails.
+        """
+        base = self.cache.path
+        file_path = base / "downloads" / urlparse(url).path.lstrip("/") / file_name
+        file_path = file_path.expanduser()
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        if file_path.exists():
+            return file_path
+
+        response = self.get(url, md5_checksum=md5_checksum)
+
+        def write_to_file(response: Response, path: Path, encoding: str) -> None:
+            path.write_text(response.text, encoding)
+
+        handler = handler or write_to_file
+        handler(response, file_path, encoding)
+        return file_path
diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py
new file mode 100644
index 000000000..920b485e0
--- /dev/null
+++ b/openml/_api/clients/minio.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import openml
+
+
+class MinIOClient:
+    """
+    Lightweight client configuration for interacting with a MinIO-compatible
+    object storage service.
+
+    This class stores basic configuration such as a base filesystem path and
+    default HTTP headers. It is intended to be extended with actual request
+    or storage logic elsewhere.
+
+    Attributes
+    ----------
+    path : pathlib.Path or None
+        Configured base path for storage operations.
+    headers : dict of str to str
+        Default HTTP headers, including a user-agent identifying the
+        OpenML Python client version.
+    """
+
+    @property
+    def path(self) -> Path:
+        return Path(openml.config.get_cache_directory())
diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py
new file mode 100644
index 000000000..6d957966e
--- /dev/null
+++ b/openml/_api/resources/__init__.py
@@ -0,0 +1,63 @@
+from ._registry import API_REGISTRY
+from .base import (
+    DatasetAPI,
+    EstimationProcedureAPI,
+    EvaluationAPI,
+    EvaluationMeasureAPI,
+    FallbackProxy,
+    FlowAPI,
+    ResourceAPI,
+    ResourceV1API,
+    ResourceV2API,
+    RunAPI,
+    SetupAPI,
+    StudyAPI,
+    TaskAPI,
+)
+from .dataset import DatasetV1API, DatasetV2API
+from .estimation_procedure import (
+    EstimationProcedureV1API,
+    EstimationProcedureV2API,
+)
+from .evaluation import EvaluationV1API, EvaluationV2API
+from .evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API
+from .flow import FlowV1API, FlowV2API
+from .run import RunV1API, RunV2API
+from .setup import SetupV1API, SetupV2API
+from .study import StudyV1API, StudyV2API
+from .task import TaskV1API, TaskV2API
+
+__all__ = [
+    "API_REGISTRY",
+    "DatasetAPI",
+    "DatasetV1API",
+    "DatasetV2API",
+    "EstimationProcedureAPI",
+    "EstimationProcedureV1API",
+    "EstimationProcedureV2API",
+    "EvaluationAPI",
+    "EvaluationMeasureAPI",
+    "EvaluationMeasureV1API",
+    "EvaluationMeasureV2API",
+    "EvaluationV1API",
+    "EvaluationV2API",
+    "FallbackProxy",
+    "FlowAPI",
+    "FlowV1API",
+    "FlowV2API",
+    "ResourceAPI",
+    "ResourceV1API",
+    "ResourceV2API",
+    "RunAPI",
+    "RunV1API",
+    "RunV2API",
+    "SetupAPI",
+    "SetupV1API",
+    "SetupV2API",
+    "StudyAPI",
+    "StudyV1API",
+    "StudyV2API",
+    "TaskAPI",
+    "TaskV1API",
+    "TaskV2API",
+]
diff --git a/openml/_api/resources/_registry.py b/openml/_api/resources/_registry.py
new file mode 100644
index 000000000..66d7ec428
--- /dev/null
+++ b/openml/_api/resources/_registry.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from openml.enums import APIVersion, ResourceType
+
+from .dataset import DatasetV1API, DatasetV2API
+from .estimation_procedure import (
+    EstimationProcedureV1API,
+    EstimationProcedureV2API,
+)
+from .evaluation import EvaluationV1API, EvaluationV2API
+from .evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API
+from .flow import FlowV1API, FlowV2API
+from .run import RunV1API, RunV2API
+from .setup import SetupV1API, SetupV2API
+from .study import StudyV1API, StudyV2API
+from .task import TaskV1API, TaskV2API
+
+if TYPE_CHECKING:
+    from .base import ResourceAPI
+
+API_REGISTRY: dict[
+    APIVersion,
+    dict[ResourceType, type[ResourceAPI]],
+] = {
+    APIVersion.V1: {
+        ResourceType.DATASET: DatasetV1API,
+        ResourceType.TASK: TaskV1API,
+        ResourceType.EVALUATION_MEASURE: EvaluationMeasureV1API,
+        ResourceType.ESTIMATION_PROCEDURE: EstimationProcedureV1API,
+        ResourceType.EVALUATION: EvaluationV1API,
+        ResourceType.FLOW: FlowV1API,
+        ResourceType.STUDY: StudyV1API,
+        ResourceType.RUN: RunV1API,
+        ResourceType.SETUP: SetupV1API,
+    },
+    APIVersion.V2: {
+        ResourceType.DATASET: DatasetV2API,
+        ResourceType.TASK: TaskV2API,
+        ResourceType.EVALUATION_MEASURE: EvaluationMeasureV2API,
+        ResourceType.ESTIMATION_PROCEDURE: EstimationProcedureV2API,
+        ResourceType.EVALUATION: EvaluationV2API,
+        ResourceType.FLOW: FlowV2API,
+        ResourceType.STUDY: StudyV2API,
+        ResourceType.RUN: RunV2API,
+        ResourceType.SETUP: SetupV2API,
+    },
+}
diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py
new file mode 100644
index 000000000..ed6dc26f7
--- /dev/null
+++ b/openml/_api/resources/base/__init__.py
@@ -0,0 +1,30 @@
+from .base import ResourceAPI
+from .fallback import FallbackProxy
+from .resources import (
+    DatasetAPI,
+    EstimationProcedureAPI,
+    EvaluationAPI,
+    EvaluationMeasureAPI,
+    FlowAPI,
+    RunAPI,
+    SetupAPI,
+    StudyAPI,
+    TaskAPI,
+)
+from .versions import ResourceV1API, ResourceV2API
+
+__all__ = [
+    "DatasetAPI",
+    "EstimationProcedureAPI",
+    "EvaluationAPI",
+    "EvaluationMeasureAPI",
+    "FallbackProxy",
+    "FlowAPI",
+    "ResourceAPI",
+    "ResourceV1API",
+    "ResourceV2API",
+    "RunAPI",
+    "SetupAPI",
+    "StudyAPI",
+    "TaskAPI",
+]
diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py
new file mode 100644
index 000000000..625681e3b
--- /dev/null
+++ b/openml/_api/resources/base/base.py
@@ -0,0 +1,236 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, NoReturn
+
+from openml.exceptions import (
+    OpenMLNotAuthorizedError,
+    OpenMLNotSupportedError,
+    OpenMLServerError,
+    OpenMLServerException,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+    from typing import Any
+
+    from openml._api.clients import HTTPClient, MinIOClient
+    from openml.enums import APIVersion, ResourceType
+
+
+class ResourceAPI(ABC):
+    """
+    Abstract base class for OpenML resource APIs.
+
+    This class defines the common interface for interacting with OpenML
+    resources (e.g., datasets, flows, runs) across different API versions.
+    Concrete subclasses must implement the resource-specific operations
+    such as publishing, deleting, and tagging.
+
+    Parameters
+    ----------
+    http : HTTPClient
+        Configured HTTP client used for communication with the OpenML API.
+    minio : MinIOClient
+        Configured MinIO client used for object storage operations.
+
+    Attributes
+    ----------
+    api_version : APIVersion
+        API version implemented by the resource.
+    resource_type : ResourceType
+        Type of OpenML resource handled by the implementation.
+    _http : HTTPClient
+        Internal HTTP client instance.
+    _minio : MinIOClient or None
+        Internal MinIO client instance, if provided.
+    """
+
+    api_version: APIVersion
+    resource_type: ResourceType
+
+    def __init__(self, http: HTTPClient, minio: MinIOClient):
+        self._http = http
+        self._minio = minio
+
+    @abstractmethod
+    def delete(self, resource_id: int) -> bool:
+        """
+        Delete a resource by its identifier.
+
+        Parameters
+        ----------
+        resource_id : int
+            Unique identifier of the resource to delete.
+
+        Returns
+        -------
+        bool
+            ``True`` if the deletion was successful.
+
+        Notes
+        -----
+        Concrete subclasses must implement this method.
+        """
+
+    @abstractmethod
+    def publish(self, path: str, files: Mapping[str, Any] | None) -> int:
+        """
+        Publish a new resource to the OpenML server.
+
+        Parameters
+        ----------
+        path : str
+            API endpoint path used for publishing the resource.
+        files : Mapping of str to Any or None
+            Files or payload data required for publishing. The structure
+            depends on the resource type.
+
+        Returns
+        -------
+        int
+            Identifier of the newly created resource.
+
+        Notes
+        -----
+        Concrete subclasses must implement this method.
+        """
+
+    @abstractmethod
+    def tag(self, resource_id: int, tag: str) -> list[str]:
+        """
+        Add a tag to a resource.
+
+        Parameters
+        ----------
+        resource_id : int
+            Identifier of the resource to tag.
+        tag : str
+            Tag to associate with the resource.
+
+        Returns
+        -------
+        list of str
+            Updated list of tags assigned to the resource.
+
+        Notes
+        -----
+        Concrete subclasses must implement this method.
+        """
+
+    @abstractmethod
+    def untag(self, resource_id: int, tag: str) -> list[str]:
+        """
+        Remove a tag from a resource.
+
+        Parameters
+        ----------
+        resource_id : int
+            Identifier of the resource to untag.
+        tag : str
+            Tag to remove from the resource.
+
+        Returns
+        -------
+        list of str
+            Updated list of tags assigned to the resource.
+
+        Notes
+        -----
+        Concrete subclasses must implement this method.
+        """
+
+    @abstractmethod
+    def _get_endpoint_name(self) -> str:
+        """
+        Return the endpoint name for the current resource type.
+
+        Returns
+        -------
+        str
+            Endpoint segment used in API paths.
+
+        Notes
+        -----
+        Datasets use the special endpoint name ``"data"`` instead of
+        their enum value.
+        """
+
+    def _handle_delete_exception(
+        self, resource_type: str, exception: OpenMLServerException
+    ) -> None:
+        """
+        Map V1 deletion error codes to more specific exceptions.
+
+        Parameters
+        ----------
+        resource_type : str
+            Endpoint name of the resource type.
+        exception : OpenMLServerException
+            Original exception raised during deletion.
+
+        Raises
+        ------
+        OpenMLNotAuthorizedError
+            If the resource cannot be deleted due to ownership or
+            dependent entities.
+        OpenMLServerError
+            If deletion fails for an unknown reason.
+        OpenMLServerException
+            If the error code is not specially handled.
+        """
+        # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php
+        # Most exceptions are descriptive enough to be raised as their standard
+        # OpenMLServerException, however there are two cases where we add information:
+        #  - a generic "failed" message, we direct them to the right issue board
+        #  - when the user successfully authenticates with the server,
+        #    but user is not allowed to take the requested action,
+        #    in which case we specify a OpenMLNotAuthorizedError.
+        by_other_user = [323, 353, 393, 453, 594]
+        has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595]
+        unknown_reason = [325, 355, 394, 455, 593]
+        if exception.code in by_other_user:
+            raise OpenMLNotAuthorizedError(
+                message=(
+                    f"The {resource_type} can not be deleted because it was not uploaded by you."
+                ),
+            ) from exception
+        if exception.code in has_dependent_entities:
+            raise OpenMLNotAuthorizedError(
+                message=(
+                    f"The {resource_type} can not be deleted because "
+                    f"it still has associated entities: {exception.message}"
+                ),
+            ) from exception
+        if exception.code in unknown_reason:
+            raise OpenMLServerError(
+                message=(
+                    f"The {resource_type} can not be deleted for unknown reason,"
+                    " please open an issue at: https://github.com/openml/openml/issues/new"
+                ),
+            ) from exception
+        raise exception
+
+    def _not_supported(self, *, method: str) -> NoReturn:
+        """
+        Raise an error indicating that a method is not supported.
+
+        Parameters
+        ----------
+        method : str
+            Name of the unsupported method.
+
+        Raises
+        ------
+        OpenMLNotSupportedError
+            If the current API version does not support the requested method
+            for the given resource type.
+        """
+        version = getattr(self.api_version, "value", "unknown")
+        resource = getattr(self.resource_type, "value", "unknown")
+
+        raise OpenMLNotSupportedError(
+            f"{self.__class__.__name__}: "
+            f"{version} API does not support `{method}` "
+            f"for resource `{resource}`"
+        )
diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py
new file mode 100644
index 000000000..9b8f64a17
--- /dev/null
+++ b/openml/_api/resources/base/fallback.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import Any
+
+from openml.exceptions import OpenMLNotSupportedError
+
+
+class FallbackProxy:
+    """
+    Proxy object that provides transparent fallback across multiple API versions.
+
+    This class delegates attribute access to a sequence of API implementations.
+    When a callable attribute is invoked and raises ``OpenMLNotSupportedError``,
+    the proxy automatically attempts the same method on subsequent API instances
+    until one succeeds.
+
+    Parameters
+    ----------
+    *api_versions : Any
+        One or more API implementation instances ordered by priority.
+        The first API is treated as the primary implementation, and
+        subsequent APIs are used as fallbacks.
+
+    Raises
+    ------
+    ValueError
+        If no API implementations are provided.
+
+    Notes
+    -----
+    Attribute lookup is performed dynamically via ``__getattr__``.
+    Only methods that raise ``OpenMLNotSupportedError`` trigger fallback
+    behavior. Other exceptions are propagated immediately.
+    """
+
+    def __init__(self, *api_versions: Any):
+        if not api_versions:
+            raise ValueError("At least one API version must be provided")
+        self._apis = api_versions
+
+    def __getattr__(self, name: str) -> Any:
+        """
+        Dynamically resolve attribute access across API implementations.
+
+        Parameters
+        ----------
+        name : str
+            Name of the attribute being accessed.
+
+        Returns
+        -------
+        Any
+            The resolved attribute. If it is callable, a wrapped function
+            providing fallback behavior is returned.
+
+        Raises
+        ------
+        AttributeError
+            If none of the API implementations define the attribute.
+        """
+        api, attr = self._find_attr(name)
+        if callable(attr):
+            return self._wrap_callable(name, api, attr)
+        return attr
+
+    def _find_attr(self, name: str) -> tuple[Any, Any]:
+        """
+        Find the first API implementation that defines a given attribute.
+
+        Parameters
+        ----------
+        name : str
+            Name of the attribute to search for.
+
+        Returns
+        -------
+        tuple of (Any, Any)
+            The API instance and the corresponding attribute.
+
+        Raises
+        ------
+        AttributeError
+            If no API implementation defines the attribute.
+        """
+        for api in self._apis:
+            attr = getattr(api, name, None)
+            if attr is not None:
+                return api, attr
+        raise AttributeError(f"{self.__class__.__name__} has no attribute {name}")
+
+    def _wrap_callable(
+        self,
+        name: str,
+        primary_api: Any,
+        primary_attr: Callable[..., Any],
+    ) -> Callable[..., Any]:
+        """
+        Wrap a callable attribute to enable fallback behavior.
+
+        Parameters
+        ----------
+        name : str
+            Name of the method being wrapped.
+        primary_api : Any
+            Primary API instance providing the callable.
+        primary_attr : Callable[..., Any]
+            Callable attribute obtained from the primary API.
+
+        Returns
+        -------
+        Callable[..., Any]
+            Wrapped function that attempts the primary call first and
+            falls back to other APIs if ``OpenMLNotSupportedError`` is raised.
+        """
+
+        def wrapper(*args: Any, **kwargs: Any) -> Any:
+            try:
+                return primary_attr(*args, **kwargs)
+            except OpenMLNotSupportedError:
+                return self._call_fallbacks(name, primary_api, *args, **kwargs)
+
+        return wrapper
+
+    def _call_fallbacks(
+        self,
+        name: str,
+        skip_api: Any,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Attempt to call a method on fallback API implementations.
+
+        Parameters
+        ----------
+        name : str
+            Name of the method to invoke.
+        skip_api : Any
+            API instance to skip (typically the primary API that already failed).
+        *args : Any
+            Positional arguments passed to the method.
+        **kwargs : Any
+            Keyword arguments passed to the method.
+
+        Returns
+        -------
+        Any
+            Result returned by the first successful fallback invocation.
+
+        Raises
+        ------
+        OpenMLNotSupportedError
+            If all API implementations either do not define the method
+            or raise ``OpenMLNotSupportedError``.
+        """
+        for api in self._apis:
+            if api is skip_api:
+                continue
+            attr = getattr(api, name, None)
+            if callable(attr):
+                try:
+                    return attr(*args, **kwargs)
+                except OpenMLNotSupportedError:
+                    continue
+        raise OpenMLNotSupportedError(f"Could not fallback to any API for method: {name}")
diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py
new file mode 100644
index 000000000..7d07885dc
--- /dev/null
+++ b/openml/_api/resources/base/resources.py
@@ -0,0 +1,125 @@
+from __future__ import annotations
+
+from abc import abstractmethod
+from collections.abc import Callable
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from openml.enums import ResourceType
+
+from .base import ResourceAPI
+
+if TYPE_CHECKING:
+    import pandas as pd
+    from requests import Response
+    from traitlets import Any
+
+    from openml.tasks.task import OpenMLTask, TaskType
+
+
+class DatasetAPI(ResourceAPI):
+    """Abstract API interface for dataset resources."""
+
+    resource_type: ResourceType = ResourceType.DATASET
+
+
+class TaskAPI(ResourceAPI):
+    """Abstract API interface for task resources."""
+
+    resource_type: ResourceType = ResourceType.TASK
+
+    @abstractmethod
+    def get(
+        self,
+        task_id: int,
+    ) -> OpenMLTask:
+        """
+        API v1:
+            GET /task/{task_id}
+
+        API v2:
+            GET /tasks/{task_id}
+        """
+        ...
+
+    # Task listing (V1 only)
+    @abstractmethod
+    def list(
+        self,
+        limit: int,
+        offset: int,
+        task_type: TaskType | int | None = None,
+        **kwargs: Any,
+    ) -> pd.DataFrame:
+        """
+        List tasks with filters.
+
+        API v1:
+            GET /task/list
+
+        API v2:
+            Not available.
+
+        Returns
+        -------
+        pandas.DataFrame
+        """
+        ...
+
+    def download(
+        self,
+        url: str,
+        handler: Callable[[Response, Path, str], None] | None = None,
+        encoding: str = "utf-8",
+        file_name: str = "response.txt",
+        md5_checksum: str | None = None,
+    ) -> Path:
+        return self._http.download(
+            url=url,
+            handler=handler,
+            encoding=encoding,
+            file_name=file_name,
+            md5_checksum=md5_checksum,
+        )
+
+
+class EvaluationMeasureAPI(ResourceAPI):
+    """Abstract API interface for evaluation measure resources."""
+
+    resource_type: ResourceType = ResourceType.EVALUATION_MEASURE
+
+
+class EstimationProcedureAPI(ResourceAPI):
+    """Abstract API interface for estimation procedure resources."""
+
+    resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE
+
+
+class EvaluationAPI(ResourceAPI):
+    """Abstract API interface for evaluation resources."""
+
+    resource_type: ResourceType = ResourceType.EVALUATION
+
+
+class FlowAPI(ResourceAPI):
+    """Abstract API interface for flow resources."""
+
+    resource_type: ResourceType = ResourceType.FLOW
+
+
+class StudyAPI(ResourceAPI):
+    """Abstract API interface for study resources."""
+
+    resource_type: ResourceType = ResourceType.STUDY
+
+
+class RunAPI(ResourceAPI):
+    """Abstract API interface for run resources."""
+
+    resource_type: ResourceType = ResourceType.RUN
+
+
+class SetupAPI(ResourceAPI):
+    """Abstract API interface for setup resources."""
+
+    resource_type: ResourceType = ResourceType.SETUP
diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py
new file mode 100644
index 000000000..bba59b869
--- /dev/null
+++ b/openml/_api/resources/base/versions.py
@@ -0,0 +1,261 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, cast
+
+import xmltodict
+
+from openml.enums import APIVersion, ResourceType
+from openml.exceptions import (
+    OpenMLServerException,
+)
+
+from .base import ResourceAPI
+
+_LEGAL_RESOURCES_DELETE = [
+    ResourceType.DATASET,
+    ResourceType.TASK,
+    ResourceType.FLOW,
+    ResourceType.STUDY,
+    ResourceType.RUN,
+    ResourceType.USER,
+]
+
+_LEGAL_RESOURCES_TAG = [
+    ResourceType.DATASET,
+    ResourceType.TASK,
+    ResourceType.FLOW,
+    ResourceType.SETUP,
+    ResourceType.RUN,
+]
+
+
+class ResourceV1API(ResourceAPI):
+    """
+    Version 1 implementation of the OpenML resource API.
+
+    This class provides XML-based implementations for publishing,
+    deleting, tagging, and untagging resources using the V1 API
+    endpoints. Responses are parsed using ``xmltodict``.
+
+    Notes
+    -----
+    V1 endpoints expect and return XML. Error handling follows the
+    legacy OpenML server behavior and maps specific error codes to
+    more descriptive exceptions where appropriate.
+    """
+
+    api_version: APIVersion = APIVersion.V1
+
+    def publish(self, path: str, files: Mapping[str, Any] | None) -> int:
+        """
+        Publish a new resource using the V1 API.
+
+        Parameters
+        ----------
+        path : str
+            API endpoint path for the upload.
+        files : Mapping of str to Any or None
+            Files to upload as part of the request payload.
+
+        Returns
+        -------
+        int
+            Identifier of the newly created resource.
+
+        Raises
+        ------
+        ValueError
+            If the server response does not contain a valid resource ID.
+        OpenMLServerException
+            If the server returns an error during upload.
+        """
+        response = self._http.post(path, files=files)
+        parsed_response = xmltodict.parse(response.content)
+        return self._extract_id_from_upload(parsed_response)
+
+    def delete(self, resource_id: int) -> bool:
+        """
+        Delete a resource using the V1 API.
+
+        Parameters
+        ----------
+        resource_id : int
+            Identifier of the resource to delete.
+
+        Returns
+        -------
+        bool
+            ``True`` if the server confirms successful deletion.
+
+        Raises
+        ------
+        ValueError
+            If the resource type is not supported for deletion.
+        OpenMLNotAuthorizedError
+            If the user is not permitted to delete the resource.
+        OpenMLServerError
+            If deletion fails for an unknown reason.
+        OpenMLServerException
+            For other server-side errors.
+        """
+        if self.resource_type not in _LEGAL_RESOURCES_DELETE:
+            raise ValueError(f"Can't delete a {self.resource_type.value}")
+
+        endpoint_name = self._get_endpoint_name()
+        path = f"{endpoint_name}/{resource_id}"
+        try:
+            response = self._http.delete(path)
+            result = xmltodict.parse(response.content)
+            return f"oml:{endpoint_name}_delete" in result
+        except OpenMLServerException as e:
+            self._handle_delete_exception(endpoint_name, e)
+            raise
+
+    def tag(self, resource_id: int, tag: str) -> list[str]:
+        """
+        Add a tag to a resource using the V1 API.
+
+        Parameters
+        ----------
+        resource_id : int
+            Identifier of the resource to tag.
+        tag : str
+            Tag to associate with the resource.
+
+        Returns
+        -------
+        list of str
+            Updated list of tags assigned to the resource.
+
+        Raises
+        ------
+        ValueError
+            If the resource type does not support tagging.
+        OpenMLServerException
+            If the server returns an error.
+        """
+        if self.resource_type not in _LEGAL_RESOURCES_TAG:
+            raise ValueError(f"Can't tag a {self.resource_type.value}")
+
+        endpoint_name = self._get_endpoint_name()
+        path = f"{endpoint_name}/tag"
+        data = {f"{endpoint_name}_id": resource_id, "tag": tag}
+        response = self._http.post(path, data=data)
+
+        parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"})
+        result = parsed_response[f"oml:{endpoint_name}_tag"]
+        tags: list[str] = result.get("oml:tag", [])
+
+        return tags
+
+    def untag(self, resource_id: int, tag: str) -> list[str]:
+        """
+        Remove a tag from a resource using the V1 API.
+
+        Parameters
+        ----------
+        resource_id : int
+            Identifier of the resource to untag.
+        tag : str
+            Tag to remove from the resource.
+
+        Returns
+        -------
+        list of str
+            Updated list of tags assigned to the resource.
+
+        Raises
+        ------
+        ValueError
+            If the resource type does not support tagging.
+        OpenMLServerException
+            If the server returns an error.
+        """
+        if self.resource_type not in _LEGAL_RESOURCES_TAG:
+            raise ValueError(f"Can't untag a {self.resource_type.value}")
+
+        endpoint_name = self._get_endpoint_name()
+        path = f"{endpoint_name}/untag"
+        data = {f"{endpoint_name}_id": resource_id, "tag": tag}
+        response = self._http.post(path, data=data)
+
+        parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"})
+        result = parsed_response[f"oml:{endpoint_name}_untag"]
+        tags: list[str] = result.get("oml:tag", [])
+
+        return tags
+
+    def _get_endpoint_name(self) -> str:
+        if self.resource_type == ResourceType.DATASET:
+            return "data"
+        return cast("str", self.resource_type.value)
+
+    def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int:
+        """
+        Extract the resource identifier from an XML upload response.
+
+        Parameters
+        ----------
+        parsed : Mapping of str to Any
+            Parsed XML response as returned by ``xmltodict.parse``.
+
+        Returns
+        -------
+        int
+            Extracted resource identifier.
+
+        Raises
+        ------
+        ValueError
+            If the response structure is unexpected or no identifier
+            can be found.
+        """
+        # reads id from upload response
+        # actual parsed dict: {"oml:upload_flow": {"@xmlns:oml": "...", "oml:id": "42"}}
+
+        # xmltodict always gives exactly one root key
+        ((_, root_value),) = parsed.items()
+
+        if not isinstance(root_value, Mapping):
+            raise ValueError("Unexpected XML structure")
+
+        # Look for oml:id directly in the root value
+        if "oml:id" in root_value:
+            id_value = root_value["oml:id"]
+            if isinstance(id_value, (str, int)):
+                return int(id_value)
+
+        # Fallback: check all values for numeric/string IDs
+        for v in root_value.values():
+            if isinstance(v, (str, int)):
+                return int(v)
+
+        raise ValueError("No ID found in upload response")
+
+
+class ResourceV2API(ResourceAPI):
+    """
+    Version 2 implementation of the OpenML resource API.
+
+    This class represents the V2 API for resources. Operations such as
+    publishing, deleting, tagging, and untagging are currently not
+    supported and will raise ``OpenMLNotSupportedError``.
+    """
+
+    api_version: APIVersion = APIVersion.V2
+
+    def publish(self, path: str, files: Mapping[str, Any] | None) -> int:  # noqa: ARG002
+        self._not_supported(method="publish")
+
+    def delete(self, resource_id: int) -> bool:  # noqa: ARG002
+        self._not_supported(method="delete")
+
+    def tag(self, resource_id: int, tag: str) -> list[str]:  # noqa: ARG002
+        self._not_supported(method="tag")
+
+    def untag(self, resource_id: int, tag: str) -> list[str]:  # noqa: ARG002
+        self._not_supported(method="untag")
+
+    def _get_endpoint_name(self) -> str:
+        return cast("str", self.resource_type.value)
diff --git a/openml/_api/resources/dataset.py b/openml/_api/resources/dataset.py
new file mode 100644
index 000000000..520594df9
--- /dev/null
+++ b/openml/_api/resources/dataset.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import DatasetAPI, ResourceV1API, ResourceV2API
+
+
+class DatasetV1API(ResourceV1API, DatasetAPI):
+    """Version 1 API implementation for dataset resources."""
+
+
+class DatasetV2API(ResourceV2API, DatasetAPI):
+    """Version 2 API implementation for dataset resources."""
diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py
new file mode 100644
index 000000000..a45f7af66
--- /dev/null
+++ b/openml/_api/resources/estimation_procedure.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import EstimationProcedureAPI, ResourceV1API, ResourceV2API
+
+
+class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI):
+    """Version 1 API implementation for estimation procedure resources."""
+
+
+class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI):
+    """Version 2 API implementation for estimation procedure resources."""
diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py
new file mode 100644
index 000000000..fe7e360a6
--- /dev/null
+++ b/openml/_api/resources/evaluation.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import EvaluationAPI, ResourceV1API, ResourceV2API
+
+
+class EvaluationV1API(ResourceV1API, EvaluationAPI):
+    """Version 1 API implementation for evaluation resources."""
+
+
+class EvaluationV2API(ResourceV2API, EvaluationAPI):
+    """Version 2 API implementation for evaluation resources."""
diff --git a/openml/_api/resources/evaluation_measure.py b/openml/_api/resources/evaluation_measure.py
new file mode 100644
index 000000000..4ed5097f7
--- /dev/null
+++ b/openml/_api/resources/evaluation_measure.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import EvaluationMeasureAPI, ResourceV1API, ResourceV2API
+
+
+class EvaluationMeasureV1API(ResourceV1API, EvaluationMeasureAPI):
+    """Version 1 API implementation for evaluation measure resources."""
+
+
+class EvaluationMeasureV2API(ResourceV2API, EvaluationMeasureAPI):
+    """Version 2 API implementation for evaluation measure resources."""
diff --git a/openml/_api/resources/flow.py b/openml/_api/resources/flow.py
new file mode 100644
index 000000000..1716d89d3
--- /dev/null
+++ b/openml/_api/resources/flow.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import FlowAPI, ResourceV1API, ResourceV2API
+
+
+class FlowV1API(ResourceV1API, FlowAPI):
+    """Version 1 API implementation for flow resources."""
+
+
+class FlowV2API(ResourceV2API, FlowAPI):
+    """Version 2 API implementation for flow resources."""
diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py
new file mode 100644
index 000000000..4caccb0b6
--- /dev/null
+++ b/openml/_api/resources/run.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import ResourceV1API, ResourceV2API, RunAPI
+
+
+class RunV1API(ResourceV1API, RunAPI):
+    """Version 1 API implementation for run resources."""
+
+
+class RunV2API(ResourceV2API, RunAPI):
+    """Version 2 API implementation for run resources."""
diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py
new file mode 100644
index 000000000..2896d3d9f
--- /dev/null
+++ b/openml/_api/resources/setup.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import ResourceV1API, ResourceV2API, SetupAPI
+
+
+class SetupV1API(ResourceV1API, SetupAPI):
+    """Version 1 API implementation for setup resources."""
+
+
+class SetupV2API(ResourceV2API, SetupAPI):
+    """Version 2 API implementation for setup resources."""
diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py
new file mode 100644
index 000000000..fb073555c
--- /dev/null
+++ b/openml/_api/resources/study.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .base import ResourceV1API, ResourceV2API, StudyAPI
+
+
+class StudyV1API(ResourceV1API, StudyAPI):
+    """Version 1 API implementation for study resources."""
+
+
+class StudyV2API(ResourceV2API, StudyAPI):
+    """Version 2 API implementation for study resources."""
diff --git a/openml/_api/resources/task.py b/openml/_api/resources/task.py
new file mode 100644
index 000000000..5146fdd2d
--- /dev/null
+++ b/openml/_api/resources/task.py
@@ -0,0 +1,401 @@
+from __future__ import annotations
+
+import builtins
+import warnings
+from typing import Any
+
+import pandas as pd
+import xmltodict
+
+from openml.tasks.task import (
+    OpenMLClassificationTask,
+    OpenMLClusteringTask,
+    OpenMLLearningCurveTask,
+    OpenMLRegressionTask,
+    OpenMLTask,
+    TaskType,
+)
+
+from .base import ResourceV1API, ResourceV2API, TaskAPI
+
+
+class TaskV1API(ResourceV1API, TaskAPI):
+    def get(self, task_id: int) -> OpenMLTask:
+        """Download OpenML task for a given task ID.
+
+        Downloads the task representation.
+
+        Parameters
+        ----------
+        task_id : int
+            The OpenML task id of the task to download.
+        get_dataset_kwargs :
+            Args and kwargs can be used pass optional parameters to
+            :meth:`openml.datasets.get_dataset`.
+
+        Returns
+        -------
+        task: OpenMLTask
+        """
+        if not isinstance(task_id, int):
+            raise TypeError(f"Task id should be integer, is {type(task_id)}")
+
+        response = self._http.get(f"task/{task_id}", enable_cache=True)
+        return self._create_task_from_xml(response.text)
+
+    def _create_task_from_xml(self, xml: str) -> OpenMLTask:
+        """Create a task given a xml string.
+
+        Parameters
+        ----------
+        xml : string
+            Task xml representation.
+
+        Returns
+        -------
+        OpenMLTask
+        """
+        dic = xmltodict.parse(xml)["oml:task"]
+        estimation_parameters = {}
+        inputs = {}
+        # Due to the unordered structure we obtain, we first have to extract
+        # the possible keys of oml:input; dic["oml:input"] is a list of
+        # OrderedDicts
+
+        # Check if there is a list of inputs
+        if isinstance(dic["oml:input"], list):
+            for input_ in dic["oml:input"]:
+                name = input_["@name"]
+                inputs[name] = input_
+        # Single input case
+        elif isinstance(dic["oml:input"], dict):
+            name = dic["oml:input"]["@name"]
+            inputs[name] = dic["oml:input"]
+
+        evaluation_measures = None
+        if "evaluation_measures" in inputs:
+            evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][
+                "oml:evaluation_measure"
+            ]
+
+        task_type = TaskType(int(dic["oml:task_type_id"]))
+        common_kwargs = {
+            "task_id": dic["oml:task_id"],
+            "task_type": dic["oml:task_type"],
+            "task_type_id": task_type,
+            "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
+            "evaluation_measure": evaluation_measures,
+        }
+        # TODO: add OpenMLClusteringTask?
+        if task_type in (
+            TaskType.SUPERVISED_CLASSIFICATION,
+            TaskType.SUPERVISED_REGRESSION,
+            TaskType.LEARNING_CURVE,
+        ):
+            # Convert some more parameters
+            for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
+                "oml:parameter"
+            ]:
+                name = parameter["@name"]
+                text = parameter.get("#text", "")
+                estimation_parameters[name] = text
+
+            common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][
+                "oml:estimation_procedure"
+            ]["oml:type"]
+            common_kwargs["estimation_procedure_id"] = int(
+                inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"]
+            )
+
+            common_kwargs["estimation_parameters"] = estimation_parameters
+            common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][
+                "oml:target_feature"
+            ]
+            common_kwargs["data_splits_url"] = inputs["estimation_procedure"][
+                "oml:estimation_procedure"
+            ]["oml:data_splits_url"]
+
+        cls = {
+            TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
+            TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
+            TaskType.CLUSTERING: OpenMLClusteringTask,
+            TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
+        }.get(task_type)
+        if cls is None:
+            raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
+        return cls(**common_kwargs)  # type: ignore
+
+    def list(
+        self,
+        limit: int,
+        offset: int,
+        task_type: TaskType | int | None = None,
+        **kwargs: Any,
+    ) -> pd.DataFrame:
+        """
+        Perform the api call to return a number of tasks having the given filters.
+
+        Parameters
+        ----------
+        Filter task_type is separated from the other filters because
+        it is used as task_type in the task description, but it is named
+        type when used as a filter in list tasks call.
+        limit: int
+        offset: int
+        task_type : TaskType, optional
+            Refers to the type of task.
+        kwargs: dict, optional
+            Legal filter operators: tag, task_id (list), data_tag, status, limit,
+            offset, data_id, data_name, number_instances, number_features,
+            number_classes, number_missing_values.
+
+        Returns
+        -------
+        dataframe
+        """
+        api_call = self._build_url(limit, offset, task_type, kwargs)
+        return self._parse_list_xml(api_call=api_call)
+
+    def _build_url(
+        self, limit: int, offset: int, task_type: TaskType | int | None, kwargs: dict[str, Any]
+    ) -> str:
+        api_call = "task/list"
+        if limit is not None:
+            api_call += f"/limit/{limit}"
+        if offset is not None:
+            api_call += f"/offset/{offset}"
+        if task_type is not None:
+            tvalue = task_type.value if isinstance(task_type, TaskType) else task_type
+            api_call += f"/type/{tvalue}"
+        if kwargs is not None:
+            for operator, value in kwargs.items():
+                if value is not None:
+                    if operator == "task_id":
+                        value = ",".join([str(int(i)) for i in value])  # noqa: PLW2901
+                    api_call += f"/{operator}/{value}"
+        return api_call
+
+    def _parse_list_xml(self, api_call: str) -> pd.DataFrame:  # noqa: C901, PLR0912
+        """Returns a Pandas DataFrame with information about OpenML tasks.
+
+        Parameters
+        ----------
+        api_call : str
+            The API call specifying which tasks to return.
+
+        Returns
+        -------
+            A Pandas DataFrame with information about OpenML tasks.
+
+        Raises
+        ------
+        ValueError
+            If the XML returned by the OpenML API does not contain 'oml:tasks', '@xmlns:oml',
+            or has an incorrect value for '@xmlns:oml'.
+        KeyError
+            If an invalid key is found in the XML for a task.
+        """
+        xml_string = self._http.get(api_call).text
+
+        tasks_dict = xmltodict.parse(xml_string, force_list=("oml:task", "oml:input"))
+        # Minimalistic check if the XML is useful
+        if "oml:tasks" not in tasks_dict:
+            raise ValueError(f'Error in return XML, does not contain "oml:runs": {tasks_dict}')
+
+        if "@xmlns:oml" not in tasks_dict["oml:tasks"]:
+            raise ValueError(
+                f'Error in return XML, does not contain "oml:runs"/@xmlns:oml: {tasks_dict}'
+            )
+
+        if tasks_dict["oml:tasks"]["@xmlns:oml"] != "http://openml.org/openml":
+            raise ValueError(
+                "Error in return XML, value of  "
+                '"oml:runs"/@xmlns:oml is not '
+                f'"http://openml.org/openml": {tasks_dict!s}',
+            )
+
+        assert isinstance(tasks_dict["oml:tasks"]["oml:task"], list), type(tasks_dict["oml:tasks"])
+
+        tasks = {}
+        procs = self._get_estimation_procedure_list()
+        proc_dict = {x["id"]: x for x in procs}
+
+        for task_ in tasks_dict["oml:tasks"]["oml:task"]:
+            tid = None
+            try:
+                tid = int(task_["oml:task_id"])
+                task_type_int = int(task_["oml:task_type_id"])
+                try:
+                    task_type_id = TaskType(task_type_int)
+                except ValueError as e:
+                    warnings.warn(
+                        f"Could not create task type id for {task_type_int} due to error {e}",
+                        RuntimeWarning,
+                        stacklevel=2,
+                    )
+                    continue
+
+                task = {
+                    "tid": tid,
+                    "ttid": task_type_id,
+                    "did": int(task_["oml:did"]),
+                    "name": task_["oml:name"],
+                    "task_type": task_["oml:task_type"],
+                    "status": task_["oml:status"],
+                }
+
+                # Other task inputs
+                for _input in task_.get("oml:input", []):
+                    if _input["@name"] == "estimation_procedure":
+                        task[_input["@name"]] = proc_dict[int(_input["#text"])]["name"]
+                    else:
+                        value = _input.get("#text")
+                        task[_input["@name"]] = value
+
+                # The number of qualities can range from 0 to infinity
+                for quality in task_.get("oml:quality", []):
+                    if "#text" not in quality:
+                        quality_value = 0.0
+                    else:
+                        quality["#text"] = float(quality["#text"])
+                        if abs(int(quality["#text"]) - quality["#text"]) < 0.0000001:
+                            quality["#text"] = int(quality["#text"])
+                        quality_value = quality["#text"]
+                    task[quality["@name"]] = quality_value
+                tasks[tid] = task
+            except KeyError as e:
+                if tid is not None:
+                    warnings.warn(
+                        f"Invalid xml for task {tid}: {e}\nFrom {task_}",
+                        RuntimeWarning,
+                        stacklevel=2,
+                    )
+                else:
+                    warnings.warn(
+                        f"Could not find key {e} in {task_}!", RuntimeWarning, stacklevel=2
+                    )
+
+        return pd.DataFrame.from_dict(tasks, orient="index")
+
+    def _get_estimation_procedure_list(self) -> builtins.list[dict[str, Any]]:
+        """Return a list of all estimation procedures which are on OpenML.
+
+        Returns
+        -------
+        procedures : list
+            A list of all estimation procedures. Every procedure is represented by
+            a dictionary containing the following information: id, task type id,
+            name, type, repeats, folds, stratified.
+        """
+        url_suffix = "estimationprocedure/list"
+        xml_string = self._http.get(url_suffix).text
+
+        procs_dict = xmltodict.parse(xml_string)
+        # Minimalistic check if the XML is useful
+        if "oml:estimationprocedures" not in procs_dict:
+            raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.")
+
+        if "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]:
+            raise ValueError(
+                "Error in return XML, does not contain tag "
+                "@xmlns:oml as a child of oml:estimationprocedures.",
+            )
+
+        if procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml":
+            raise ValueError(
+                "Error in return XML, value of "
+                "oml:estimationprocedures/@xmlns:oml is not "
+                "http://openml.org/openml, but {}".format(
+                    str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"])
+                ),
+            )
+
+        procs: list[dict[str, Any]] = []
+        for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]:
+            task_type_int = int(proc_["oml:ttid"])
+            try:
+                task_type_id = TaskType(task_type_int)
+                procs.append(
+                    {
+                        "id": int(proc_["oml:id"]),
+                        "task_type_id": task_type_id,
+                        "name": proc_["oml:name"],
+                        "type": proc_["oml:type"],
+                    },
+                )
+            except ValueError as e:
+                warnings.warn(
+                    f"Could not create task type id for {task_type_int} due to error {e}",
+                    RuntimeWarning,
+                    stacklevel=2,
+                )
+
+        return procs
+
+
+class TaskV2API(ResourceV2API, TaskAPI):
+    def get(self, task_id: int) -> OpenMLTask:
+        """Download OpenML task for a given task ID.
+
+        Downloads the task representation.
+
+        Parameters
+        ----------
+        task_id : int
+            The OpenML task id of the task to download.
+
+        Returns
+        -------
+        task: OpenMLTask
+        """
+        response = self._http.get(f"tasks/{task_id}", enable_cache=True)
+        return self._create_task_from_json(response.json())
+
+    def _create_task_from_json(self, task_json: dict) -> OpenMLTask:
+        task_type_id = TaskType(int(task_json["task_type_id"]))
+
+        inputs = {i["name"]: i for i in task_json.get("input", [])}
+
+        source = inputs["source_data"]["data_set"]
+
+        common_kwargs = {
+            "task_id": int(task_json["id"]),
+            "task_type": task_json["task_type"],
+            "task_type_id": task_type_id,
+            "data_set_id": int(source["data_set_id"]),
+            "evaluation_measure": None,
+        }
+
+        if task_type_id in (
+            TaskType.SUPERVISED_CLASSIFICATION,
+            TaskType.SUPERVISED_REGRESSION,
+            TaskType.LEARNING_CURVE,
+        ):
+            est = inputs.get("estimation_procedure", {}).get("estimation_procedure")
+
+            if est:
+                common_kwargs["estimation_procedure_id"] = int(est["id"])
+                common_kwargs["estimation_procedure_type"] = est["type"]
+                common_kwargs["estimation_parameters"] = {
+                    p["name"]: p.get("value") for p in est.get("parameter", [])
+                }
+
+            common_kwargs["target_name"] = source.get("target_feature")
+
+        cls = {
+            TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
+            TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
+            TaskType.CLUSTERING: OpenMLClusteringTask,
+            TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
+        }[task_type_id]
+
+        return cls(**common_kwargs)  # type: ignore
+
+    def list(
+        self,
+        limit: int,  # noqa: ARG002
+        offset: int,  # noqa: ARG002
+        task_type: TaskType | int | None = None,  # noqa: ARG002
+        **kwargs: Any,  # noqa: ARG002
+    ) -> pd.DataFrame:
+        raise self._not_supported(method="list")
diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py
new file mode 100644
index 000000000..80545824f
--- /dev/null
+++ b/openml/_api/setup/__init__.py
@@ -0,0 +1,10 @@
+from .backend import APIBackend
+from .builder import APIBackendBuilder
+
+_backend = APIBackend.get_instance()
+
+__all__ = [
+    "APIBackend",
+    "APIBackendBuilder",
+    "_backend",
+]
diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py
new file mode 100644
index 000000000..1604fd074
--- /dev/null
+++ b/openml/_api/setup/backend.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, ClassVar, cast
+
+import openml
+
+from .builder import APIBackendBuilder
+
+if TYPE_CHECKING:
+    from openml._api.clients import HTTPClient, MinIOClient
+    from openml._api.resources import (
+        DatasetAPI,
+        EstimationProcedureAPI,
+        EvaluationAPI,
+        EvaluationMeasureAPI,
+        FlowAPI,
+        RunAPI,
+        SetupAPI,
+        StudyAPI,
+        TaskAPI,
+    )
+
+
+class APIBackend:
+    """
+    Central backend for accessing all OpenML API resource interfaces.
+
+    This class provides a singleton interface to dataset, task, flow,
+    evaluation, run, setup, study, and other resource APIs. It also
+    manages configuration through a nested ``Config`` object and
+    allows dynamic retrieval and updating of configuration values.
+
+    Parameters
+    ----------
+    config : Config, optional
+        Optional configuration object. If not provided, a default
+        ``Config`` instance is created.
+
+    Attributes
+    ----------
+    dataset : DatasetAPI
+        Interface for dataset-related API operations.
+    task : TaskAPI
+        Interface for task-related API operations.
+    evaluation_measure : EvaluationMeasureAPI
+        Interface for evaluation measure-related API operations.
+    estimation_procedure : EstimationProcedureAPI
+        Interface for estimation procedure-related API operations.
+    evaluation : EvaluationAPI
+        Interface for evaluation-related API operations.
+    flow : FlowAPI
+        Interface for flow-related API operations.
+    study : StudyAPI
+        Interface for study-related API operations.
+    run : RunAPI
+        Interface for run-related API operations.
+    setup : SetupAPI
+        Interface for setup-related API operations.
+    """
+
+    _instance: ClassVar[APIBackend | None] = None
+    _backends: ClassVar[dict[str, APIBackendBuilder]] = {}
+
+    @property
+    def _backend(self) -> APIBackendBuilder:
+        api_version = openml.config.api_version
+        fallback_api_version = openml.config.fallback_api_version
+        key = f"{api_version}_{fallback_api_version}"
+
+        if key not in self._backends:
+            _backend = APIBackendBuilder(
+                api_version=api_version,
+                fallback_api_version=fallback_api_version,
+            )
+            self._backends[key] = _backend
+
+        return self._backends[key]
+
+    @property
+    def dataset(self) -> DatasetAPI:
+        return cast("DatasetAPI", self._backend.dataset)
+
+    @property
+    def task(self) -> TaskAPI:
+        return cast("TaskAPI", self._backend.task)
+
+    @property
+    def evaluation_measure(self) -> EvaluationMeasureAPI:
+        return cast("EvaluationMeasureAPI", self._backend.evaluation_measure)
+
+    @property
+    def estimation_procedure(self) -> EstimationProcedureAPI:
+        return cast("EstimationProcedureAPI", self._backend.estimation_procedure)
+
+    @property
+    def evaluation(self) -> EvaluationAPI:
+        return cast("EvaluationAPI", self._backend.evaluation)
+
+    @property
+    def flow(self) -> FlowAPI:
+        return cast("FlowAPI", self._backend.flow)
+
+    @property
+    def study(self) -> StudyAPI:
+        return cast("StudyAPI", self._backend.study)
+
+    @property
+    def run(self) -> RunAPI:
+        return cast("RunAPI", self._backend.run)
+
+    @property
+    def setup(self) -> SetupAPI:
+        return cast("SetupAPI", self._backend.setup)
+
+    @property
+    def http_client(self) -> HTTPClient:
+        return cast("HTTPClient", self._backend.http_client)
+
+    @property
+    def fallback_http_client(self) -> HTTPClient | None:
+        return cast("HTTPClient | None", self._backend.fallback_http_client)
+
+    @property
+    def minio_client(self) -> MinIOClient:
+        return cast("MinIOClient", self._backend.minio_client)
+
+    @classmethod
+    def get_instance(cls) -> APIBackend:
+        """
+        Get the singleton instance of the APIBackend.
+
+        Returns
+        -------
+        APIBackend
+            Singleton instance of the backend.
+        """
+        if cls._instance is None:
+            cls._instance = cls()
+        return cls._instance
diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py
new file mode 100644
index 000000000..76d6e0970
--- /dev/null
+++ b/openml/_api/setup/builder.py
@@ -0,0 +1,138 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from openml._api.clients import HTTPClient, MinIOClient
+from openml._api.resources import (
+    API_REGISTRY,
+    FallbackProxy,
+)
+from openml.enums import ResourceType
+
+if TYPE_CHECKING:
+    from openml._api.resources import ResourceAPI
+    from openml.enums import APIVersion
+
+
+class APIBackendBuilder:
+    """
+    Builder for constructing API backend instances with all resource-specific APIs.
+
+    This class organizes resource-specific API objects (datasets, tasks,
+    flows, evaluations, runs, setups, studies, etc.) and provides a
+    centralized access point for both the primary API version and an
+    optional fallback API version.
+
+    The constructor automatically initializes:
+
+    - HTTPClient for the primary API version
+    - Optional HTTPClient for a fallback API version
+    - MinIOClient for file storage operations
+    - Resource-specific API instances, optionally wrapped with fallback proxies
+
+    Parameters
+    ----------
+    api_version : APIVersion
+        The primary API version to use for all resource APIs and HTTP communication.
+    fallback_api_version : APIVersion | None, default=None
+        Optional fallback API version to wrap resource APIs with a FallbackProxy.
+
+    Attributes
+    ----------
+    dataset : ResourceAPI | FallbackProxy
+        API interface for dataset resources.
+    task : ResourceAPI | FallbackProxy
+        API interface for task resources.
+    evaluation_measure : ResourceAPI | FallbackProxy
+        API interface for evaluation measure resources.
+    estimation_procedure : ResourceAPI | FallbackProxy
+        API interface for estimation procedure resources.
+    evaluation : ResourceAPI | FallbackProxy
+        API interface for evaluation resources.
+    flow : ResourceAPI | FallbackProxy
+        API interface for flow resources.
+    study : ResourceAPI | FallbackProxy
+        API interface for study resources.
+    run : ResourceAPI | FallbackProxy
+        API interface for run resources.
+    setup : ResourceAPI | FallbackProxy
+        API interface for setup resources.
+    http_client : HTTPClient
+        Client for HTTP communication using the primary API version.
+    fallback_http_client : HTTPClient | None
+        Client for HTTP communication using the fallback API version, if provided.
+    minio_client : MinIOClient
+        Client for file storage operations (MinIO/S3).
+    """
+
+    dataset: ResourceAPI | FallbackProxy
+    task: ResourceAPI | FallbackProxy
+    evaluation_measure: ResourceAPI | FallbackProxy
+    estimation_procedure: ResourceAPI | FallbackProxy
+    evaluation: ResourceAPI | FallbackProxy
+    flow: ResourceAPI | FallbackProxy
+    study: ResourceAPI | FallbackProxy
+    run: ResourceAPI | FallbackProxy
+    setup: ResourceAPI | FallbackProxy
+    http_client: HTTPClient
+    fallback_http_client: HTTPClient | None
+    minio_client: MinIOClient
+
+    def __init__(self, api_version: APIVersion, fallback_api_version: APIVersion | None = None):
+        # initialize clients and resource APIs in-place
+        self._build(api_version, fallback_api_version)
+
+    def _build(self, api_version: APIVersion, fallback_api_version: APIVersion | None) -> None:
+        """
+        Construct an APIBackendBuilder instance from a configuration.
+
+        This method initializes HTTP and MinIO clients, creates resource-specific
+        API instances for the primary API version, and optionally wraps them
+        with fallback proxies if a fallback API version is configured.
+
+        Parameters
+        ----------
+        config : Config
+            Configuration object containing API versions, endpoints, cache
+            settings, and connection parameters.
+
+        Returns
+        -------
+        APIBackendBuilder
+            Builder instance with all resource API interfaces initialized.
+        """
+        minio_client = MinIOClient()
+        primary_http_client = HTTPClient(api_version=api_version)
+
+        self.http_client = primary_http_client
+        self.minio_client = minio_client
+        self.fallback_http_client = None
+
+        resource_apis: dict[ResourceType, ResourceAPI | FallbackProxy] = {}
+        for resource_type, resource_api_cls in API_REGISTRY[api_version].items():
+            resource_apis[resource_type] = resource_api_cls(primary_http_client, minio_client)
+
+        if fallback_api_version is not None:
+            fallback_http_client = HTTPClient(api_version=fallback_api_version)
+            self.fallback_http_client = fallback_http_client
+
+            fallback_resource_apis: dict[ResourceType, ResourceAPI | FallbackProxy] = {}
+            for resource_type, resource_api_cls in API_REGISTRY[fallback_api_version].items():
+                fallback_resource_apis[resource_type] = resource_api_cls(
+                    fallback_http_client, minio_client
+                )
+
+            resource_apis = {
+                name: FallbackProxy(resource_apis[name], fallback_resource_apis[name])
+                for name in resource_apis
+            }
+
+        self.dataset = resource_apis[ResourceType.DATASET]
+        self.task = resource_apis[ResourceType.TASK]
+        self.evaluation_measure = resource_apis[ResourceType.EVALUATION_MEASURE]
+        self.estimation_procedure = resource_apis[ResourceType.ESTIMATION_PROCEDURE]
+        self.evaluation = resource_apis[ResourceType.EVALUATION]
+        self.flow = resource_apis[ResourceType.FLOW]
+        self.study = resource_apis[ResourceType.STUDY]
+        self.run = resource_apis[ResourceType.RUN]
+        self.setup = resource_apis[ResourceType.SETUP]
diff --git a/openml/_config.py b/openml/_config.py
index a7034b9b4..a38b16b21 100644
--- a/openml/_config.py
+++ b/openml/_config.py
@@ -12,16 +12,68 @@
 import warnings
 from collections.abc import Iterator
 from contextlib import contextmanager
+from copy import deepcopy
 from dataclasses import dataclass, field, fields, replace
 from io import StringIO
 from pathlib import Path
 from typing import Any, ClassVar, Literal, cast
 from urllib.parse import urlparse
 
+from openml.enums import APIVersion, ServerMode
+
+from .__version__ import __version__
+
 logger = logging.getLogger(__name__)
 openml_logger = logging.getLogger("openml")
 
 
+_PROD_SERVERS: dict[APIVersion, dict[str, str | None]] = {
+    APIVersion.V1: {
+        "server": "https://www.openml.org/api/v1/xml/",
+        "apikey": None,
+    },
+    APIVersion.V2: {
+        "server": None,
+        "apikey": None,
+    },
+}
+
+_TEST_SERVERS: dict[APIVersion, dict[str, str | None]] = {
+    APIVersion.V1: {
+        "server": "https://test.openml.org/api/v1/xml/",
+        "apikey": "normaluser",
+    },
+    APIVersion.V2: {
+        "server": None,
+        "apikey": None,
+    },
+}
+
+_TEST_SERVERS_LOCAL: dict[APIVersion, dict[str, str | None]] = {
+    APIVersion.V1: {
+        "server": "http://localhost:8000/api/v1/xml/",
+        "apikey": "normaluser",
+    },
+    APIVersion.V2: {
+        "server": "http://localhost:8082/",
+        "apikey": "AD000000000000000000000000000000",
+    },
+}
+
+_SERVERS_REGISTRY: dict[ServerMode, dict[APIVersion, dict[str, str | None]]] = {
+    ServerMode.PRODUCTION: _PROD_SERVERS,
+    ServerMode.TEST: (
+        _TEST_SERVERS_LOCAL if os.getenv("OPENML_USE_LOCAL_SERVICES") == "true" else _TEST_SERVERS
+    ),
+}
+
+
+def _get_servers(mode: ServerMode) -> dict[APIVersion, dict[str, str | None]]:
+    if mode not in ServerMode:
+        raise ValueError(f'invalid mode="{mode}" allowed modes: {", ".join(list(ServerMode))}')
+    return deepcopy(_SERVERS_REGISTRY[mode])
+
+
 def _resolve_default_cache_dir() -> Path:
     user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR")
     if user_defined_cache_dir is not None:
@@ -57,19 +109,38 @@ def _resolve_default_cache_dir() -> Path:
 class OpenMLConfig:
     """Dataclass storing the OpenML configuration."""
 
-    apikey: str | None = ""
-    server: str = "https://www.openml.org/api/v1/xml"
+    servers: dict[APIVersion, dict[str, str | None]] = field(
+        default_factory=lambda: _get_servers(ServerMode.PRODUCTION)
+    )
+    api_version: APIVersion = APIVersion.V1
+    fallback_api_version: APIVersion | None = None
     cachedir: Path = field(default_factory=_resolve_default_cache_dir)
     avoid_duplicate_runs: bool = False
     retry_policy: Literal["human", "robot"] = "human"
     connection_n_retries: int = 5
     show_progress: bool = False
 
-    def __setattr__(self, name: str, value: Any) -> None:
-        if name == "apikey" and not isinstance(value, (type(None), str)):
-            raise TypeError("apikey must be a string or None")
+    @property
+    def server(self) -> str:
+        server = self.servers[self.api_version]["server"]
+        if server is None:
+            servers_repr = {k.value: v for k, v in self.servers.items()}
+            raise ValueError(
+                f'server found to be None for api_version="{self.api_version}" in {servers_repr}'
+            )
+        return server
 
-        super().__setattr__(name, value)
+    @server.setter
+    def server(self, value: str | None) -> None:
+        self.servers[self.api_version]["server"] = value
+
+    @property
+    def apikey(self) -> str | None:
+        return self.servers[self.api_version]["apikey"]
+
+    @apikey.setter
+    def apikey(self, value: str | None) -> None:
+        self.servers[self.api_version]["apikey"] = value
 
 
 class OpenMLConfigManager:
@@ -81,9 +152,8 @@ def __init__(self) -> None:
 
         self.OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR"
         self.OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
-        self._TEST_SERVER_NORMAL_USER_KEY = "normaluser"
         self.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY"
-        self.TEST_SERVER_URL = "https://test.openml.org"
+        self._HEADERS: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}
 
         self._config: OpenMLConfig = OpenMLConfig()
         # for legacy test `test_non_writable_home`
@@ -116,7 +186,7 @@ def __setattr__(self, name: str, value: Any) -> None:
             "_examples",
             "OPENML_CACHE_DIR_ENV_VAR",
             "OPENML_SKIP_PARQUET_ENV_VAR",
-            "_TEST_SERVER_NORMAL_USER_KEY",
+            "_HEADERS",
         }:
             return object.__setattr__(self, name, value)
 
@@ -127,6 +197,10 @@ def __setattr__(self, name: str, value: Any) -> None:
             object.__setattr__(self, "_config", replace(self._config, **{name: value}))
             return None
 
+        if name in ["server", "apikey"]:
+            setattr(self._config, name, value)
+            return None
+
         object.__setattr__(self, name, value)
         return None
 
@@ -190,6 +264,48 @@ def get_server_base_url(self) -> str:
         domain, _ = self._config.server.split("/api", maxsplit=1)
         return domain.replace("api", "www")
 
+    def _get_servers(self, mode: ServerMode) -> dict[APIVersion, dict[str, str | None]]:
+        return _get_servers(mode)
+
+    def _set_servers(self, mode: ServerMode) -> None:
+        servers = self._get_servers(mode)
+        self._config = replace(self._config, servers=servers)
+
+    def get_production_servers(self) -> dict[APIVersion, dict[str, str | None]]:
+        return self._get_servers(mode=ServerMode.PRODUCTION)
+
+    def get_test_servers(self) -> dict[APIVersion, dict[str, str | None]]:
+        return self._get_servers(mode=ServerMode.TEST)
+
+    def use_production_servers(self) -> None:
+        self._set_servers(mode=ServerMode.PRODUCTION)
+
+    def use_test_servers(self) -> None:
+        self._set_servers(mode=ServerMode.TEST)
+
+    def set_api_version(
+        self,
+        api_version: APIVersion,
+        fallback_api_version: APIVersion | None = None,
+    ) -> None:
+        if api_version not in APIVersion:
+            raise ValueError(
+                f'invalid api_version="{api_version}" '
+                f"allowed versions: {', '.join(list(APIVersion))}"
+            )
+
+        if fallback_api_version is not None and fallback_api_version not in APIVersion:
+            raise ValueError(
+                f'invalid fallback_api_version="{fallback_api_version}" '
+                f"allowed versions: {', '.join(list(APIVersion))}"
+            )
+
+        self._config = replace(
+            self._config,
+            api_version=api_version,
+            fallback_api_version=fallback_api_version,
+        )
+
     def set_retry_policy(
         self, value: Literal["human", "robot"], n_retries: int | None = None
     ) -> None:
@@ -317,13 +433,18 @@ def _setup(self, config: dict[str, Any] | None = None) -> None:
 
         self._config = replace(
             self._config,
-            apikey=config["apikey"],
-            server=config["server"],
+            servers=config["servers"],
+            api_version=config["api_version"],
+            fallback_api_version=config["fallback_api_version"],
             show_progress=config["show_progress"],
             avoid_duplicate_runs=config["avoid_duplicate_runs"],
             retry_policy=config["retry_policy"],
             connection_n_retries=int(config["connection_n_retries"]),
         )
+        if "server" in config:
+            self._config.server = config["server"]
+        if "apikey" in config:
+            self._config.apikey = config["apikey"]
 
         user_defined_cache_dir = os.environ.get(self.OPENML_CACHE_DIR_ENV_VAR)
         if user_defined_cache_dir is not None:
@@ -393,14 +514,12 @@ def overwrite_config_context(self, config: dict[str, Any]) -> Iterator[dict[str,
 class ConfigurationForExamples:
     """Allows easy switching to and from a test configuration, used for examples."""
 
-    _last_used_server = None
-    _last_used_key = None
+    _last_used_servers = None
     _start_last_called = False
 
     def __init__(self, manager: OpenMLConfigManager):
         self._manager = manager
-        self._test_apikey = manager._TEST_SERVER_NORMAL_USER_KEY
-        self._test_server = f"{manager.TEST_SERVER_URL}/api/v1/xml"
+        self._test_servers = manager.get_test_servers()
 
     def start_using_configuration_for_example(self) -> None:
         """Sets the configuration to connect to the test server with valid apikey.
@@ -408,27 +527,22 @@ def start_using_configuration_for_example(self) -> None:
         To configuration as was before this call is stored, and can be recovered
         by using the `stop_use_example_configuration` method.
         """
-        if (
-            self._start_last_called
-            and self._manager._config.server == self._test_server
-            and self._manager._config.apikey == self._test_apikey
-        ):
+        if self._start_last_called and self._manager._config.servers == self._test_servers:
             # Method is called more than once in a row without modifying the server or apikey.
             # We don't want to save the current test configuration as a last used configuration.
             return
 
-        self._last_used_server = self._manager._config.server
-        self._last_used_key = self._manager._config.apikey
+        self._last_used_servers = self._manager._config.servers
         type(self)._start_last_called = True
 
         # Test server key for examples
         self._manager._config = replace(
             self._manager._config,
-            server=self._test_server,
-            apikey=self._test_apikey,
+            servers=self._test_servers,
         )
+        test_server = self._test_servers[self._manager._config.api_version]["server"]
         warnings.warn(
-            f"Switching to the test server {self._test_server} to not upload results to "
+            f"Switching to the test server {test_server} to not upload results to "
             "the live server. Using the test server may result in reduced performance of the "
             "API!",
             stacklevel=2,
@@ -446,8 +560,7 @@ def stop_using_configuration_for_example(self) -> None:
 
         self._manager._config = replace(
             self._manager._config,
-            server=cast("str", self._last_used_server),
-            apikey=cast("str", self._last_used_key),
+            servers=cast("dict[APIVersion, dict[str, str | None]]", self._last_used_servers),
         )
         type(self)._start_last_called = False
 
diff --git a/openml/cli.py b/openml/cli.py
index 838f774d1..1415d0af9 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -8,10 +8,12 @@
 from collections.abc import Callable
 from dataclasses import fields
 from pathlib import Path
+from typing import cast
 from urllib.parse import urlparse
 
 import openml
 from openml.__version__ import __version__
+from openml.enums import APIVersion
 
 
 def is_hex(string_: str) -> bool:
@@ -110,9 +112,9 @@ def check_server(server: str) -> str:
 
     def replace_shorthand(server: str) -> str:
         if server == "test":
-            return f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
+            return cast("str", openml.config.get_test_servers()[APIVersion.V1]["server"])
         if server == "production_server":
-            return "https://www.openml.org/api/v1/xml"
+            return cast("str", openml.config.get_production_servers()[APIVersion.V1]["server"])
         return server
 
     configure_field(
diff --git a/openml/enums.py b/openml/enums.py
new file mode 100644
index 000000000..8c8048e07
--- /dev/null
+++ b/openml/enums.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from enum import Enum
+
+
+class ServerMode(str, Enum):
+    """Supported modes in server."""
+
+    PRODUCTION = "production"
+    TEST = "test"
+
+
+class APIVersion(str, Enum):
+    """Supported OpenML API versions."""
+
+    V1 = "v1"
+    V2 = "v2"
+
+
+class ResourceType(str, Enum):
+    """Canonical resource types exposed by the OpenML API."""
+
+    DATASET = "dataset"
+    TASK = "task"
+    TASK_TYPE = "task_type"
+    EVALUATION_MEASURE = "evaluation_measure"
+    ESTIMATION_PROCEDURE = "estimation_procedure"
+    EVALUATION = "evaluation"
+    FLOW = "flow"
+    STUDY = "study"
+    RUN = "run"
+    SETUP = "setup"
+    USER = "user"
+
+
+class RetryPolicy(str, Enum):
+    """Retry behavior for failed API requests."""
+
+    HUMAN = "human"
+    ROBOT = "robot"
diff --git a/openml/exceptions.py b/openml/exceptions.py
index 1c1343ff3..e96ebfcb2 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -88,3 +88,7 @@ def __init__(self, message: str):
 
 class ObjectNotPublishedError(PyOpenMLError):
     """Indicates an object has not been published yet."""
+
+
+class OpenMLNotSupportedError(PyOpenMLError):
+    """Raised when an API operation is not supported for a resource/version."""
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 3fbc7adee..e83db9f72 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -1,19 +1,14 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
-import os
-import re
 import warnings
 from functools import partial
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 import pandas as pd
-import xmltodict
 
-import openml._api_calls
 import openml.utils
 from openml.datasets import get_dataset
-from openml.exceptions import OpenMLCacheException
 
 from .task import (
     OpenMLClassificationTask,
@@ -21,109 +16,13 @@
     OpenMLLearningCurveTask,
     OpenMLRegressionTask,
     OpenMLSupervisedTask,
-    OpenMLTask,
     TaskType,
 )
 
-TASKS_CACHE_DIR_NAME = "tasks"
-
-
-def _get_cached_tasks() -> dict[int, OpenMLTask]:
-    """Return a dict of all the tasks which are cached locally.
-
-    Returns
-    -------
-    tasks : OrderedDict
-        A dict of all the cached tasks. Each task is an instance of
-        OpenMLTask.
-    """
-    task_cache_dir = openml.utils._create_cache_directory(TASKS_CACHE_DIR_NAME)
-    directory_content = os.listdir(task_cache_dir)  # noqa: PTH208
-    directory_content.sort()
-
-    # Find all dataset ids for which we have downloaded the dataset
-    # description
-    tids = (int(did) for did in directory_content if re.match(r"[0-9]*", did))
-    return {tid: _get_cached_task(tid) for tid in tids}
-
-
-def _get_cached_task(tid: int) -> OpenMLTask:
-    """Return a cached task based on the given id.
-
-    Parameters
-    ----------
-    tid : int
-        Id of the task.
-
-    Returns
-    -------
-    OpenMLTask
-    """
-    tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, tid)
-
-    task_xml_path = tid_cache_dir / "task.xml"
-    try:
-        with task_xml_path.open(encoding="utf8") as fh:
-            return _create_task_from_xml(fh.read())
-    except OSError as e:
-        openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
-        raise OpenMLCacheException(f"Task file for tid {tid} not cached") from e
-
-
-def _get_estimation_procedure_list() -> list[dict[str, Any]]:
-    """Return a list of all estimation procedures which are on OpenML.
-
-    Returns
-    -------
-    procedures : list
-        A list of all estimation procedures. Every procedure is represented by
-        a dictionary containing the following information: id, task type id,
-        name, type, repeats, folds, stratified.
-    """
-    url_suffix = "estimationprocedure/list"
-    xml_string = openml._api_calls._perform_api_call(url_suffix, "get")
-
-    procs_dict = xmltodict.parse(xml_string)
-    # Minimalistic check if the XML is useful
-    if "oml:estimationprocedures" not in procs_dict:
-        raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.")
-
-    if "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]:
-        raise ValueError(
-            "Error in return XML, does not contain tag "
-            "@xmlns:oml as a child of oml:estimationprocedures.",
-        )
-
-    if procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml":
-        raise ValueError(
-            "Error in return XML, value of "
-            "oml:estimationprocedures/@xmlns:oml is not "
-            "http://openml.org/openml, but {}".format(
-                str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"])
-            ),
-        )
-
-    procs: list[dict[str, Any]] = []
-    for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]:
-        task_type_int = int(proc_["oml:ttid"])
-        try:
-            task_type_id = TaskType(task_type_int)
-            procs.append(
-                {
-                    "id": int(proc_["oml:id"]),
-                    "task_type_id": task_type_id,
-                    "name": proc_["oml:name"],
-                    "type": proc_["oml:type"],
-                },
-            )
-        except ValueError as e:
-            warnings.warn(
-                f"Could not create task type id for {task_type_int} due to error {e}",
-                RuntimeWarning,
-                stacklevel=2,
-            )
-
-    return procs
+if TYPE_CHECKING:
+    from .task import (
+        OpenMLTask,
+    )
 
 
 def list_tasks(  # noqa: PLR0913
@@ -175,7 +74,7 @@ def list_tasks(  # noqa: PLR0913
         calculated for the associated dataset, some of these are also returned.
     """
     listing_call = partial(
-        _list_tasks,
+        openml._backend.task.list,
         task_type=task_type,
         tag=tag,
         data_tag=data_tag,
@@ -194,151 +93,6 @@ def list_tasks(  # noqa: PLR0913
     return pd.concat(batches)
 
 
-def _list_tasks(
-    limit: int,
-    offset: int,
-    task_type: TaskType | int | None = None,
-    **kwargs: Any,
-) -> pd.DataFrame:
-    """
-    Perform the api call to return a number of tasks having the given filters.
-
-    Parameters
-    ----------
-    Filter task_type is separated from the other filters because
-    it is used as task_type in the task description, but it is named
-    type when used as a filter in list tasks call.
-    limit: int
-    offset: int
-    task_type : TaskType, optional
-        Refers to the type of task.
-    kwargs: dict, optional
-        Legal filter operators: tag, task_id (list), data_tag, status, limit,
-        offset, data_id, data_name, number_instances, number_features,
-        number_classes, number_missing_values.
-
-    Returns
-    -------
-    dataframe
-    """
-    api_call = "task/list"
-    if limit is not None:
-        api_call += f"/limit/{limit}"
-    if offset is not None:
-        api_call += f"/offset/{offset}"
-    if task_type is not None:
-        tvalue = task_type.value if isinstance(task_type, TaskType) else task_type
-        api_call += f"/type/{tvalue}"
-    if kwargs is not None:
-        for operator, value in kwargs.items():
-            if value is not None:
-                if operator == "task_id":
-                    value = ",".join([str(int(i)) for i in value])  # noqa: PLW2901
-                api_call += f"/{operator}/{value}"
-
-    return __list_tasks(api_call=api_call)
-
-
-def __list_tasks(api_call: str) -> pd.DataFrame:  # noqa: C901, PLR0912
-    """Returns a Pandas DataFrame with information about OpenML tasks.
-
-    Parameters
-    ----------
-    api_call : str
-        The API call specifying which tasks to return.
-
-    Returns
-    -------
-        A Pandas DataFrame with information about OpenML tasks.
-
-    Raises
-    ------
-    ValueError
-        If the XML returned by the OpenML API does not contain 'oml:tasks', '@xmlns:oml',
-        or has an incorrect value for '@xmlns:oml'.
-    KeyError
-        If an invalid key is found in the XML for a task.
-    """
-    xml_string = openml._api_calls._perform_api_call(api_call, "get")
-    tasks_dict = xmltodict.parse(xml_string, force_list=("oml:task", "oml:input"))
-    # Minimalistic check if the XML is useful
-    if "oml:tasks" not in tasks_dict:
-        raise ValueError(f'Error in return XML, does not contain "oml:runs": {tasks_dict}')
-
-    if "@xmlns:oml" not in tasks_dict["oml:tasks"]:
-        raise ValueError(
-            f'Error in return XML, does not contain "oml:runs"/@xmlns:oml: {tasks_dict}'
-        )
-
-    if tasks_dict["oml:tasks"]["@xmlns:oml"] != "http://openml.org/openml":
-        raise ValueError(
-            "Error in return XML, value of  "
-            '"oml:runs"/@xmlns:oml is not '
-            f'"http://openml.org/openml": {tasks_dict!s}',
-        )
-
-    assert isinstance(tasks_dict["oml:tasks"]["oml:task"], list), type(tasks_dict["oml:tasks"])
-
-    tasks = {}
-    procs = _get_estimation_procedure_list()
-    proc_dict = {x["id"]: x for x in procs}
-
-    for task_ in tasks_dict["oml:tasks"]["oml:task"]:
-        tid = None
-        try:
-            tid = int(task_["oml:task_id"])
-            task_type_int = int(task_["oml:task_type_id"])
-            try:
-                task_type_id = TaskType(task_type_int)
-            except ValueError as e:
-                warnings.warn(
-                    f"Could not create task type id for {task_type_int} due to error {e}",
-                    RuntimeWarning,
-                    stacklevel=2,
-                )
-                continue
-
-            task = {
-                "tid": tid,
-                "ttid": task_type_id,
-                "did": int(task_["oml:did"]),
-                "name": task_["oml:name"],
-                "task_type": task_["oml:task_type"],
-                "status": task_["oml:status"],
-            }
-
-            # Other task inputs
-            for _input in task_.get("oml:input", []):
-                if _input["@name"] == "estimation_procedure":
-                    task[_input["@name"]] = proc_dict[int(_input["#text"])]["name"]
-                else:
-                    value = _input.get("#text")
-                    task[_input["@name"]] = value
-
-            # The number of qualities can range from 0 to infinity
-            for quality in task_.get("oml:quality", []):
-                if "#text" not in quality:
-                    quality_value = 0.0
-                else:
-                    quality["#text"] = float(quality["#text"])
-                    if abs(int(quality["#text"]) - quality["#text"]) < 0.0000001:
-                        quality["#text"] = int(quality["#text"])
-                    quality_value = quality["#text"]
-                task[quality["@name"]] = quality_value
-            tasks[tid] = task
-        except KeyError as e:
-            if tid is not None:
-                warnings.warn(
-                    f"Invalid xml for task {tid}: {e}\nFrom {task_}",
-                    RuntimeWarning,
-                    stacklevel=2,
-                )
-            else:
-                warnings.warn(f"Could not find key {e} in {task_}!", RuntimeWarning, stacklevel=2)
-
-    return pd.DataFrame.from_dict(tasks, orient="index")
-
-
 def get_tasks(
     task_ids: list[int],
     download_data: bool | None = None,
@@ -346,7 +100,7 @@ def get_tasks(
 ) -> list[OpenMLTask]:
     """Download tasks.
 
-    This function iterates :meth:`openml.tasks.get_task`.
+    This function iterates :meth:`openml.task.get`.
 
     Parameters
     ----------
@@ -412,136 +166,35 @@ def get_task(
     -------
     task: OpenMLTask
     """
+    from openml._api.resources.task import TaskV1API, TaskV2API
+
     if not isinstance(task_id, int):
         raise TypeError(f"Task id should be integer, is {type(task_id)}")
 
-    task_cache_directory = openml.utils._create_cache_directory_for_id(
-        TASKS_CACHE_DIR_NAME, task_id
-    )
-    task_cache_directory_existed = task_cache_directory.exists()
-    try:
-        task = _get_task_description(task_id)
-        dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
-        # List of class labels available in dataset description
-        # Including class labels as part of task meta data handles
-        #   the case where data download was initially disabled
-        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-            assert task.target_name is not None, (
-                "Supervised tasks must define a target feature before retrieving class labels."
-            )
-            task.class_labels = dataset.retrieve_class_labels(task.target_name)
-        # Clustering tasks do not have class labels
-        # and do not offer download_split
-        if download_splits and isinstance(task, OpenMLSupervisedTask):
-            task.download_split()
-    except Exception as e:
-        if not task_cache_directory_existed:
-            openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory)
-        raise e
-
-    return task
-
+    task = openml._backend.task.get(task_id)
+    dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
 
-def _get_task_description(task_id: int) -> OpenMLTask:
-    try:
-        return _get_cached_task(task_id)
-    except OpenMLCacheException:
-        _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
-        xml_file = _cache_dir / "task.xml"
-        task_xml = openml._api_calls._perform_api_call(f"task/{task_id}", "get")
-
-        with xml_file.open("w", encoding="utf8") as fh:
-            fh.write(task_xml)
-        return _create_task_from_xml(task_xml)
-
-
-def _create_task_from_xml(xml: str) -> OpenMLTask:
-    """Create a task given a xml string.
-
-    Parameters
-    ----------
-    xml : string
-        Task xml representation.
+    if (
+        isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask))
+        and task.target_name is not None
+    ):
+        task.class_labels = dataset.retrieve_class_labels(task.target_name)
 
-    Returns
-    -------
-    OpenMLTask
-    """
-    dic = xmltodict.parse(xml)["oml:task"]
-    estimation_parameters = {}
-    inputs = {}
-    # Due to the unordered structure we obtain, we first have to extract
-    # the possible keys of oml:input; dic["oml:input"] is a list of
-    # OrderedDicts
-
-    # Check if there is a list of inputs
-    if isinstance(dic["oml:input"], list):
-        for input_ in dic["oml:input"]:
-            name = input_["@name"]
-            inputs[name] = input_
-    # Single input case
-    elif isinstance(dic["oml:input"], dict):
-        name = dic["oml:input"]["@name"]
-        inputs[name] = dic["oml:input"]
-
-    evaluation_measures = None
-    if "evaluation_measures" in inputs:
-        evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][
-            "oml:evaluation_measure"
-        ]
-
-    task_type = TaskType(int(dic["oml:task_type_id"]))
-    common_kwargs = {
-        "task_id": dic["oml:task_id"],
-        "task_type": dic["oml:task_type"],
-        "task_type_id": task_type,
-        "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
-        "evaluation_measure": evaluation_measures,
-    }
-    # TODO: add OpenMLClusteringTask?
-    if task_type in (
-        TaskType.SUPERVISED_CLASSIFICATION,
-        TaskType.SUPERVISED_REGRESSION,
-        TaskType.LEARNING_CURVE,
+    if (
+        download_splits
+        and isinstance(task, OpenMLSupervisedTask)
+        and isinstance(openml._backend.task, TaskV1API)
     ):
-        # Convert some more parameters
-        for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
-            "oml:parameter"
-        ]:
-            name = parameter["@name"]
-            text = parameter.get("#text", "")
-            estimation_parameters[name] = text
-
-        common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][
-            "oml:estimation_procedure"
-        ]["oml:type"]
-        common_kwargs["estimation_procedure_id"] = int(
-            inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"]
+        task.download_split()
+    elif download_splits and isinstance(openml._backend.task, TaskV2API):
+        warnings.warn(
+            "`download_splits` is not yet supported in the v2 API and will be ignored.",
+            stacklevel=2,
         )
 
-        common_kwargs["estimation_parameters"] = estimation_parameters
-        common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"]["oml:target_feature"]
-        common_kwargs["data_splits_url"] = inputs["estimation_procedure"][
-            "oml:estimation_procedure"
-        ]["oml:data_splits_url"]
-
-    cls = {
-        TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
-        TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
-        TaskType.CLUSTERING: OpenMLClusteringTask,
-        TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
-    }.get(task_type)
-    if cls is None:
-        raise NotImplementedError(
-            f"Task type '{common_kwargs['task_type']}' is not supported. "
-            f"Supported task types: SUPERVISED_CLASSIFICATION,"
-            f"SUPERVISED_REGRESSION, CLUSTERING, LEARNING_CURVE."
-            f"Please check the OpenML documentation for available task types."
-        )
-    return cls(**common_kwargs)  # type: ignore
+    return task
 
 
-# TODO(eddiebergman): overload on `task_type`
 def create_task(
     task_type: TaskType,
     dataset_id: int,
@@ -629,4 +282,4 @@ def delete_task(task_id: int) -> bool:
     bool
         True if the deletion was successful. False otherwise.
     """
-    return openml.utils._delete_entity("task", task_id)
+    return openml._backend.task.delete(task_id)
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index ab3cb3da4..03fc46760 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -241,6 +241,46 @@ def _parse_publish_response(self, xml_response: dict) -> None:
         """Parse the id from the xml_response and assign it to self."""
         self.task_id = int(xml_response["oml:upload_task"]["oml:id"])
 
+    def publish(self) -> OpenMLTask:
+        """Publish this task to OpenML server.
+
+        Returns
+        -------
+        self : OpenMLTask
+        """
+        file_elements = self._get_file_elements()
+        if "description" not in file_elements:
+            file_elements["description"] = self._to_xml()
+        task_id = openml._backend.task.publish(path="task", files=file_elements)
+        self.task_id = task_id
+        return self
+
+    def push_tag(self, tag: str) -> None:
+        """Annotates this task with a tag on the server.
+
+        Parameters
+        ----------
+        tag : str
+            Tag to attach to the task.
+        """
+        if self.task_id is None:
+            raise ValueError("Task does not have an ID. Please publish the task before tagging.")
+        openml._backend.task.tag(self.task_id, tag)
+
+    def remove_tag(self, tag: str) -> None:
+        """Removes a tag from this task on the server.
+
+        Parameters
+        ----------
+        tag : str
+            Tag to remove from the task.
+        """
+        if self.task_id is None:
+            raise ValueError(
+                "Dataset does not have an ID. Please publish the dataset before untagging."
+            )
+        openml._backend.task.untag(self.task_id, tag)
+
 
 class OpenMLSupervisedTask(OpenMLTask, ABC):
     """OpenML Supervised Classification object.
diff --git a/openml/testing.py b/openml/testing.py
index 9f694f9bf..5151a5a62 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -47,9 +47,7 @@ class TestBase(unittest.TestCase):
         "user": [],
     }
     flow_name_tracker: ClassVar[list[str]] = []
-    test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
     admin_key = os.environ.get(openml.config.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR)
-    user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY
 
     # creating logger for tracking files uploaded to test server
     logger = logging.getLogger("unit_tests_published_entities")
@@ -99,8 +97,6 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None:
         os.chdir(self.workdir)
 
         self.cached = True
-        openml.config.apikey = TestBase.user_key
-        self.production_server = "https://www.openml.org/api/v1/xml"
         openml.config.set_root_cache_directory(str(self.workdir))
 
         # Increase the number of retries to avoid spurious server failures
@@ -114,8 +110,7 @@ def use_production_server(self) -> None:
 
         Please use this sparingly - it is better to use the test server.
         """
-        openml.config.server = self.production_server
-        openml.config.apikey = ""
+        openml.config.use_production_servers()
 
     def tearDown(self) -> None:
         """Tear down the test"""
diff --git a/tests/conftest.py b/tests/conftest.py
index 1967f1fad..202da8bf5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -34,6 +34,8 @@
 from pathlib import Path
 import pytest
 import openml_sklearn
+from openml._api import HTTPClient, MinIOClient
+from openml.enums import APIVersion
 
 import openml
 from openml.testing import TestBase
@@ -97,8 +99,7 @@ def delete_remote_files(tracker, flow_names) -> None:
     :param tracker: Dict
     :return: None
     """
-    openml.config.server = TestBase.test_server
-    openml.config.apikey = TestBase.user_key
+    openml.config.use_test_servers()
 
     # reordering to delete sub flows at the end of flows
     # sub-flows have shorter names, hence, sorting by descending order of flow name length
@@ -250,8 +251,23 @@ def test_files_directory() -> Path:
 
 
 @pytest.fixture(scope="session")
-def test_api_key() -> str:
-    return TestBase.user_key
+def test_server_v1() -> str:
+    return openml.config.get_test_servers()[APIVersion.V1]["server"]
+
+
+@pytest.fixture(scope="session")
+def test_apikey_v1() -> str:
+    return openml.config.get_test_servers()[APIVersion.V1]["apikey"]
+
+
+@pytest.fixture(scope="session")
+def test_server_v2() -> str:
+    return openml.config.get_test_servers()[APIVersion.V2]["server"]
+
+
+@pytest.fixture(scope="session")
+def test_apikey_v2() -> str:
+    return openml.config.get_test_servers()[APIVersion.V2]["apikey"]
 
 
 @pytest.fixture(autouse=True, scope="function")
@@ -272,15 +288,14 @@ def as_robot() -> Iterator[None]:
 
 @pytest.fixture(autouse=True)
 def with_server(request):
-    if os.getenv("OPENML_USE_LOCAL_SERVICES") == "true":
-        openml.config.TEST_SERVER_URL = "http://localhost:8000"
+    openml.config.set_api_version(APIVersion.V1)
+
     if "production_server" in request.keywords:
-        openml.config.server = "https://www.openml.org/api/v1/xml"
-        openml.config.apikey = None
+        openml.config.use_production_servers()
         yield
         return
-    openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
-    openml.config.apikey = TestBase.user_key
+
+    openml.config.use_test_servers()
     yield
 
 
@@ -304,8 +319,8 @@ def with_test_cache(test_files_directory, request):
     openml.config.set_root_cache_directory(_root_cache_directory)
     if tmp_cache.exists():
         shutil.rmtree(tmp_cache)
-        
 
+ 
 @pytest.fixture
 def static_cache_dir():
     return Path(__file__).parent / "files" 
@@ -315,4 +330,19 @@ def workdir(tmp_path):
     original_cwd = Path.cwd()
     os.chdir(tmp_path)
     yield tmp_path
-    os.chdir(original_cwd)
\ No newline at end of file
+    os.chdir(original_cwd)
+
+
+@pytest.fixture
+def http_client_v1() -> HTTPClient:
+    return HTTPClient(api_version=APIVersion.V1)
+
+
+@pytest.fixture
+def http_client_v2() -> HTTPClient:
+    return HTTPClient(api_version=APIVersion.V2)
+
+
+@pytest.fixture
+def minio_client() -> MinIOClient:
+    return MinIOClient()
diff --git a/tests/test_api/__init__.py b/tests/test_api/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py
new file mode 100644
index 000000000..9783777f7
--- /dev/null
+++ b/tests/test_api/test_http.py
@@ -0,0 +1,259 @@
+from requests import Response, Request, Session
+from unittest.mock import patch
+import pytest
+import os
+import hashlib
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+from openml.enums import APIVersion
+from openml.exceptions import OpenMLAuthenticationError
+from openml._api import HTTPClient, HTTPCache
+import openml
+
+
+@pytest.fixture
+def cache(http_client_v1) -> HTTPCache:
+    return http_client_v1.cache
+
+
+@pytest.fixture
+def http_client(http_client_v1) -> HTTPClient:
+    return http_client_v1
+
+
+@pytest.fixture
+def sample_path() -> str:
+    return "task/1"
+
+
+@pytest.fixture
+def sample_url_v1(sample_path, test_server_v1) -> str:
+    return urljoin(test_server_v1, sample_path)
+
+
+@pytest.fixture
+def sample_download_url_v1(test_server_v1) -> str:
+    server = test_server_v1.split("api/")[0]
+    endpoint = "data/v1/download/1/anneal.arff"
+    url = server + endpoint
+    return url
+
+
+def test_cache(cache, sample_url_v1):
+    params = {"param1": "value1", "param2": "value2"}
+
+    parsed_url = urlparse(sample_url_v1)
+    netloc_parts = parsed_url.netloc.split(".")[::-1]
+    path_parts = parsed_url.path.strip("/").split("/")
+    params_key = "&".join([f"{k}={v}" for k, v in params.items()])
+
+
+    key = cache.get_key(sample_url_v1, params)
+
+    expected_key = os.path.join(
+        *netloc_parts,
+        *path_parts,
+        params_key,
+    )
+
+    assert key == expected_key
+
+    # mock response
+    req = Request("GET", sample_url_v1).prepare()
+    response = Response()
+    response.status_code = 200
+    response.url = sample_url_v1
+    response.reason = "OK"
+    response._content = b"<xml>test</xml>"
+    response.headers = {"Content-Type": "text/xml"}
+    response.encoding = "utf-8"
+    response.request = req
+    response.elapsed = type("Elapsed", (), {"total_seconds": lambda x: 0.1})()
+
+    cache.save(key, response)
+    cached = cache.load(key)
+
+    assert cached.status_code == 200
+    assert cached.url == sample_url_v1
+    assert cached.content == b"<xml>test</xml>"
+    assert cached.headers["Content-Type"] == "text/xml"
+
+
+@pytest.mark.test_server()
+def test_get(http_client):
+    response = http_client.get("task/1")
+
+    assert response.status_code == 200
+    assert b"<oml:task" in response.content
+
+
+@pytest.mark.test_server()
+def test_get_with_cache_creates_cache(http_client, cache, sample_url_v1, sample_path):
+    response = http_client.get(sample_path, enable_cache=True)
+
+    assert response.status_code == 200
+    assert cache.path.exists()
+
+    cache_key = cache.get_key(sample_url_v1, {})
+    cache_path = cache._key_to_path(cache_key)
+
+    assert (cache_path / "meta.json").exists()
+    assert (cache_path / "headers.json").exists()
+    assert (cache_path / "body.bin").exists()
+
+
+@pytest.mark.test_server()
+def test_get_uses_cached_response(http_client, cache, sample_url_v1, sample_path, monkeypatch):
+    response = Response()
+    response.status_code = 200
+    response._content = b"cached-response"
+    response.headers = {}
+
+    key = cache.get_key(url=sample_url_v1, params={})
+    cache.save(key=key, response=response)
+
+    def fail_request(*args, **kwargs):
+        raise AssertionError("HTTP request should not be called")
+    monkeypatch.setattr(Session, "request", fail_request)
+
+    cached_response = http_client.get(sample_path, enable_cache=True)
+
+    assert cached_response.status_code == response.status_code
+    assert cached_response.content == response.content
+
+@pytest.mark.test_server()
+def test_get_refresh_cache(http_client, cache, sample_url_v1, sample_path):
+    key = cache.get_key(sample_url_v1, {})
+    meta_path = cache._key_to_path(key) / "meta.json"
+
+    r1 = http_client.get(sample_path, enable_cache=True)
+    mtime1 = meta_path.stat().st_mtime
+
+    r2 = http_client.get(sample_path, enable_cache=True, refresh_cache=True)
+    mtime2 = meta_path.stat().st_mtime
+
+    assert mtime1 != mtime2
+    assert r2.status_code == 200
+    assert r1.content == r2.content
+
+
+@pytest.mark.test_server()
+def test_get_with_api_key(http_client, sample_path, test_apikey_v1):
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+
+        http_client.get(sample_path, use_api_key=True)
+
+        _, kwargs = mock_request.call_args
+        assert kwargs.get("params", {}).get("api_key") == test_apikey_v1
+
+
+@pytest.mark.test_server()
+def test_get_without_api_key_raises(http_client):
+    with openml.config.overwrite_config_context({"apikey": None}), pytest.raises(OpenMLAuthenticationError):
+        http_client.get("task/1", use_api_key=True)
+
+
+@pytest.mark.test_server()
+def test_download_creates_file(http_client, sample_download_url_v1):
+    dummy_content = b"this is dummy content"
+    md5_checksum = hashlib.md5(dummy_content).hexdigest()
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = dummy_content
+
+        path = http_client.download(
+            url=sample_download_url_v1,
+            file_name="downloaded.arff",
+            md5_checksum=md5_checksum,
+        )
+
+    assert path.exists()
+    assert path.is_file()
+    assert path.read_bytes() == dummy_content
+
+
+@pytest.mark.test_server()
+def test_download_is_cached_on_disk(http_client, sample_download_url_v1, monkeypatch):
+    path1 = http_client.download(
+        url=sample_download_url_v1,
+        file_name="cached.arff",
+    )
+    mtime1 = path1.stat().st_mtime
+
+    def fail_request(*args, **kwargs):
+        raise AssertionError("HTTP request should not be called")
+    monkeypatch.setattr(Session, "request", fail_request)
+
+    path2 = http_client.download(
+        url=sample_download_url_v1,
+        file_name="cached.arff",
+    )
+    mtime2 = path2.stat().st_mtime
+
+    assert path1 == path2
+    assert mtime1 == mtime2
+
+
+@pytest.mark.test_server()
+def test_download_respects_custom_handler(http_client, sample_download_url_v1):
+    def handler(response, path: Path, encoding: str):
+        path.write_text("HANDLED", encoding=encoding)
+        return path
+
+    path = http_client.download(
+        url=sample_download_url_v1,
+        file_name="handler.arff",
+        handler=handler,
+    )
+
+    assert path.exists()
+    assert path.read_text() == "HANDLED"
+
+
+def test_post(http_client, test_server_v1, test_apikey_v1):
+    resource_name = "resource"
+    resource_files = {"description": "Resource Description File"}
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+
+        http_client.post(resource_name, files=resource_files)
+
+        mock_request.assert_called_once_with(
+            method="POST",
+            url=urljoin(test_server_v1, resource_name),
+            params={},
+            data={"api_key": test_apikey_v1},
+            headers=openml.config._HEADERS,
+            files=resource_files,
+        )
+
+
+def test_delete(http_client, test_server_v1, test_apikey_v1):
+    resource_name = "resource"
+    resource_id = 123
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+
+        http_client.delete(f"{resource_name}/{resource_id}")
+
+        mock_request.assert_called_once_with(
+            method="DELETE",
+            url=(
+                test_server_v1
+                + resource_name
+                + "/"
+                + str(resource_id)
+            ),
+            params={"api_key": test_apikey_v1},
+            data={},
+            headers=openml.config._HEADERS,
+            files=None,
+        )
diff --git a/tests/test_api/test_task.py b/tests/test_api/test_task.py
new file mode 100644
index 000000000..9c6769b57
--- /dev/null
+++ b/tests/test_api/test_task.py
@@ -0,0 +1,200 @@
+import pytest
+import pandas as pd
+from requests import Session, Response
+from unittest.mock import patch
+
+import openml
+from openml._api.resources.task import TaskV1API, TaskV2API
+from openml._api.resources.base.fallback import FallbackProxy
+from openml.exceptions import OpenMLNotSupportedError
+from openml.tasks.task import TaskType
+
+
+@pytest.fixture
+def task_v1(http_client_v1, minio_client) -> TaskV1API:
+    return TaskV1API(http=http_client_v1, minio=minio_client)
+
+
+@pytest.fixture
+def task_v2(http_client_v2, minio_client) -> TaskV2API:
+    return TaskV2API(http=http_client_v2, minio=minio_client)
+
+
+@pytest.fixture
+def task_fallback(task_v1, task_v2) -> FallbackProxy:
+    return FallbackProxy(task_v2, task_v1)
+
+
+def _get_first_tid(task_api: TaskV1API, task_type: TaskType) -> int:
+    """Helper to find an existing task ID for a given type using the V1 resource."""
+    tasks = task_api.list(limit=1, offset=0, task_type=task_type)
+    if tasks.empty:
+        pytest.skip(f"No tasks of type {task_type} found on test server.")
+    return int(tasks.iloc[0]["tid"])
+
+
+@pytest.mark.uses_test_server()
+def test_v1_list_tasks(task_v1):
+    """Verify V1 list endpoint returns a populated DataFrame."""
+    tasks_df = task_v1.list(limit=5, offset=0)
+    assert isinstance(tasks_df, pd.DataFrame)
+    assert not tasks_df.empty
+    assert "tid" in tasks_df.columns
+
+
+@pytest.mark.uses_test_server()
+def test_v2_list_tasks(task_v2):
+    """Verify V2 list endpoint raises NotSupported."""
+    with pytest.raises(OpenMLNotSupportedError):
+        task_v2.list(limit=5, offset=0)
+
+def test_v1_publish(task_v1):
+    resource_name = task_v1.resource_type.value
+    resource_files = {"description": "Resource Description File"}
+    resource_id = 123
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = (
+            f'<oml:upload_task xmlns:oml="http://openml.org/openml">\n'
+            f"\t<oml:id>{resource_id}</oml:id>\n"
+            f"</oml:upload_task>\n"
+        ).encode("utf-8")
+
+        published_resource_id = task_v1.publish(
+            resource_name,
+            files=resource_files,
+        )
+
+        assert resource_id == published_resource_id
+
+        mock_request.assert_called_once_with(
+            method="POST",
+            url=openml.config.server + resource_name,
+            params={},
+            data={"api_key": openml.config.apikey},
+            headers=openml.config._HEADERS,
+            files=resource_files,
+        )
+
+
+def test_v1_delete(task_v1):
+    resource_name = task_v1.resource_type.value
+    resource_id = 123
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = (
+            f'<oml:task_delete xmlns:oml="http://openml.org/openml">\n'
+            f"  <oml:id>{resource_id}</oml:id>\n"
+            f"</oml:task_delete>\n"
+        ).encode("utf-8")
+
+        task_v1.delete(resource_id)
+
+        mock_request.assert_called_once_with(
+            method="DELETE",
+            url=(
+                openml.config.server
+                + resource_name
+                + "/"
+                + str(resource_id)
+            ),
+            params={"api_key": openml.config.apikey},
+            data={},
+            headers=openml.config._HEADERS,
+            files=None,
+        )
+
+
+def test_v1_tag(task_v1):
+    resource_id = 123
+    resource_tag = "TAG"
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = (
+            f'<oml:task_tag xmlns:oml="http://openml.org/openml">'
+            f"<oml:id>{resource_id}</oml:id>"
+            f"<oml:tag>{resource_tag}</oml:tag>"
+            f"</oml:task_tag>"
+        ).encode("utf-8")
+
+        tags = task_v1.tag(resource_id, resource_tag)
+
+        assert resource_tag in tags
+
+        mock_request.assert_called_once_with(
+            method="POST",
+            url=(
+                openml.config.server
+                + task_v1.resource_type.value
+                + "/tag"
+            ),
+            params={},
+            data={
+                "api_key": openml.config.apikey,
+                "task_id": resource_id,
+                "tag": resource_tag,
+            },
+            headers=openml.config._HEADERS,
+            files=None,
+        )
+
+
+def test_v1_untag(task_v1):
+    resource_id = 123
+    resource_tag = "TAG"
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = (
+            f'<oml:task_untag xmlns:oml="http://openml.org/openml">'
+            f"<oml:id>{resource_id}</oml:id>"
+            f"</oml:task_untag>"
+        ).encode("utf-8")
+
+        tags = task_v1.untag(resource_id, resource_tag)
+
+        assert resource_tag not in tags
+
+        mock_request.assert_called_once_with(
+            method="POST",
+            url=(
+                openml.config.server
+                + task_v1.resource_type.value
+                + "/untag"
+            ),
+            params={},
+            data={
+                "api_key": openml.config.apikey,
+                "task_id": resource_id,
+                "tag": resource_tag,
+            },
+            headers=openml.config._HEADERS,
+            files=None,
+        )
+
+
+def test_v2_publish(task_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        task_v2.publish(path=None, files=None)
+
+
+def test_v2_delete(task_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        task_v2.delete(resource_id=None)
+
+
+def test_v2_tag(task_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        task_v2.tag(resource_id=None, tag=None)
+
+
+def test_v2_untag(task_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        task_v2.untag(resource_id=None, tag=None)
diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py
new file mode 100644
index 000000000..d258af35c
--- /dev/null
+++ b/tests/test_api/test_versions.py
@@ -0,0 +1,218 @@
+import pytest
+from requests import Session, Response
+from unittest.mock import patch
+from openml._api import FallbackProxy, ResourceAPI, ResourceV1API, ResourceV2API
+from openml.enums import ResourceType
+from openml.exceptions import OpenMLNotSupportedError
+import openml
+
+
+class DummyTaskAPI(ResourceAPI):
+    resource_type: ResourceType = ResourceType.TASK
+
+
+class DummyTaskV1API(ResourceV1API, DummyTaskAPI):
+    pass
+
+
+class DummyTaskV2API(ResourceV2API, DummyTaskAPI):
+    pass
+
+
+@pytest.fixture
+def dummy_task_v1(http_client_v1, minio_client) -> DummyTaskV1API:
+    return DummyTaskV1API(http=http_client_v1, minio=minio_client)
+
+
+@pytest.fixture
+def dummy_task_v2(http_client_v2, minio_client) -> DummyTaskV1API:
+    return DummyTaskV2API(http=http_client_v2, minio=minio_client)
+
+
+@pytest.fixture
+def dummy_task_fallback(dummy_task_v1, dummy_task_v2) -> DummyTaskV1API:
+    return FallbackProxy(dummy_task_v2, dummy_task_v1)
+
+
+def test_v1_publish(dummy_task_v1, test_server_v1, test_apikey_v1):
+    resource = dummy_task_v1
+    resource_name = resource.resource_type.value
+    resource_files = {"description": "Resource Description File"}
+    resource_id = 123
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = (
+            f'<oml:upload_task xmlns:oml="http://openml.org/openml">\n'
+            f"\t<oml:id>{resource_id}</oml:id>\n"
+            f"</oml:upload_task>\n"
+        ).encode("utf-8")
+
+        published_resource_id = resource.publish(
+            resource_name,
+            files=resource_files,
+        )
+
+        assert resource_id == published_resource_id
+
+        mock_request.assert_called_once_with(
+            method="POST",
+            url=test_server_v1 + resource_name,
+            params={},
+            data={"api_key": test_apikey_v1},
+            headers=openml.config._HEADERS,
+            files=resource_files,
+        )
+
+
+def test_v1_delete(dummy_task_v1, test_server_v1, test_apikey_v1):
+    resource = dummy_task_v1
+    resource_name = resource.resource_type.value
+    resource_id = 123
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = (
+            f'<oml:task_delete xmlns:oml="http://openml.org/openml">\n'
+            f"  <oml:id>{resource_id}</oml:id>\n"
+            f"</oml:task_delete>\n"
+        ).encode("utf-8")
+
+        resource.delete(resource_id)
+
+        mock_request.assert_called_once_with(
+            method="DELETE",
+            url=(
+                test_server_v1
+                + resource_name
+                + "/"
+                + str(resource_id)
+            ),
+            params={"api_key": test_apikey_v1},
+            data={},
+            headers=openml.config._HEADERS,
+            files=None,
+        )
+
+
+def test_v1_tag(dummy_task_v1, test_server_v1, test_apikey_v1):
+    resource = dummy_task_v1
+    resource_id = 123
+    resource_tag = "TAG"
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = (
+            f'<oml:task_tag xmlns:oml="http://openml.org/openml">'
+            f"<oml:id>{resource_id}</oml:id>"
+            f"<oml:tag>{resource_tag}</oml:tag>"
+            f"</oml:task_tag>"
+        ).encode("utf-8")
+
+        tags = resource.tag(resource_id, resource_tag)
+
+        assert resource_tag in tags
+
+        mock_request.assert_called_once_with(
+            method="POST",
+            url=(
+                test_server_v1
+                + resource.resource_type
+                + "/tag"
+            ),
+            params={},
+            data={
+                "api_key": test_apikey_v1,
+                "task_id": resource_id,
+                "tag": resource_tag,
+            },
+            headers=openml.config._HEADERS,
+            files=None,
+        )
+
+
+def test_v1_untag(dummy_task_v1, test_server_v1, test_apikey_v1):
+    resource = dummy_task_v1
+    resource_id = 123
+    resource_tag = "TAG"
+
+    with patch.object(Session, "request") as mock_request:
+        mock_request.return_value = Response()
+        mock_request.return_value.status_code = 200
+        mock_request.return_value._content = (
+            f'<oml:task_untag xmlns:oml="http://openml.org/openml">'
+            f"<oml:id>{resource_id}</oml:id>"
+            f"</oml:task_untag>"
+        ).encode("utf-8")
+
+        tags = resource.untag(resource_id, resource_tag)
+
+        assert resource_tag not in tags
+
+        mock_request.assert_called_once_with(
+            method="POST",
+            url=(
+                test_server_v1
+                + resource.resource_type
+                + "/untag"
+            ),
+            params={},
+            data={
+                "api_key": test_apikey_v1,
+                "task_id": resource_id,
+                "tag": resource_tag,
+            },
+            headers=openml.config._HEADERS,
+            files=None,
+        )
+
+
+def test_v2_publish(dummy_task_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        dummy_task_v2.publish(path=None, files=None)
+
+
+def test_v2_delete(dummy_task_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        dummy_task_v2.delete(resource_id=None)
+
+
+def test_v2_tag(dummy_task_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        dummy_task_v2.tag(resource_id=None, tag=None)
+
+
+def test_v2_untag(dummy_task_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        dummy_task_v2.untag(resource_id=None, tag=None)
+
+
+def test_fallback_publish(dummy_task_fallback):
+    with patch.object(ResourceV1API, "publish") as mock_publish:
+        mock_publish.return_value = None
+        dummy_task_fallback.publish(path=None, files=None)
+        mock_publish.assert_called_once_with(path=None, files=None)
+
+
+def test_fallback_delete(dummy_task_fallback):
+    with patch.object(ResourceV1API, "delete") as mock_delete:
+        mock_delete.return_value = None
+        dummy_task_fallback.delete(resource_id=None)
+        mock_delete.assert_called_once_with(resource_id=None)
+
+
+def test_fallback_tag(dummy_task_fallback):
+    with patch.object(ResourceV1API, "tag") as mock_tag:
+        mock_tag.return_value = None
+        dummy_task_fallback.tag(resource_id=None, tag=None)
+        mock_tag.assert_called_once_with(resource_id=None, tag=None)
+
+
+def test_fallback_untag(dummy_task_fallback):
+    with patch.object(ResourceV1API, "untag") as mock_untag:
+        mock_untag.return_value = None
+        dummy_task_fallback.untag(resource_id=None, tag=None)
+        mock_untag.assert_called_once_with(resource_id=None, tag=None)
\ No newline at end of file
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 974fb36ef..f885198f1 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -157,7 +157,6 @@ def test_check_datasets_active(self):
             openml.datasets.check_datasets_active,
             [79],
         )
-        openml.config.server = self.test_server
 
     @pytest.mark.test_server()
     def test_illegal_character_tag(self):
@@ -185,7 +184,6 @@ def test__name_to_id_with_deactivated(self):
         self.use_production_server()
         # /d/1 was deactivated
         assert openml.datasets.functions._name_to_id("anneal") == 2
-        openml.config.server = self.test_server
 
     @pytest.mark.production_server()
     def test__name_to_id_with_multiple_active(self):
@@ -291,7 +289,9 @@ def test_get_dataset_cannot_access_private_data(self):
     @pytest.mark.skip("Need to find dataset name of private dataset")
     def test_dataset_by_name_cannot_access_private_data(self):
         self.use_production_server()
-        self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE")
+        self.assertRaises(
+            OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE"
+        )
 
     @pytest.mark.test_server()
     def test_get_dataset_lazy_all_functions(self):
@@ -301,7 +301,9 @@ def test_get_dataset_lazy_all_functions(self):
 
         def ensure_absence_of_real_data():
             assert not os.path.exists(
-                os.path.join(openml.config.get_cache_directory(), "datasets", "1", "dataset.arff")
+                os.path.join(
+                    openml.config.get_cache_directory(), "datasets", "1", "dataset.arff"
+                )
             )
 
         tag = "test_lazy_tag_%d" % random.randint(1, 1000000)
@@ -406,7 +408,6 @@ def test__download_minio_file_works_with_bucket_subdirectory(self):
             file_destination
         ), "_download_minio_file can download from subdirectories"
 
-
     @mock.patch("openml._api_calls._download_minio_file")
     @pytest.mark.test_server()
     def test__get_dataset_parquet_is_cached(self, patch):
@@ -526,13 +527,29 @@ def test_deletion_of_cache_dir(self):
     @pytest.mark.test_server()
     def test_deletion_of_cache_dir_faulty_download(self, patch):
         patch.side_effect = Exception("Boom!")
-        self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
-        datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets")
+        self.assertRaisesRegex(
+            Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1
+        )
+        datasets_cache_dir = os.path.join(
+            openml.config.get_cache_directory(), "datasets"
+        )
         assert len(os.listdir(datasets_cache_dir)) == 0
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_publish_dataset(self):
-        arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
+        arff_file_path = (
+            self.static_cache_dir
+            / "org"
+            / "openml"
+            / "test"
+            / "datasets"
+            / "2"
+            / "dataset.arff"
+        )
         dataset = OpenMLDataset(
             "anneal",
             "test",
@@ -563,7 +580,9 @@ def test__retrieve_class_labels(self):
         # Test workaround for string-typed class labels
         custom_ds = openml.datasets.get_dataset(2)
         custom_ds.features[31].data_type = "string"
-        labels = custom_ds.retrieve_class_labels(target_name=custom_ds.features[31].name)
+        labels = custom_ds.retrieve_class_labels(
+            target_name=custom_ds.features[31].name
+        )
         assert labels == ["COIL", "SHEET"]
 
     @pytest.mark.test_server()
@@ -684,11 +703,16 @@ def test_attributes_arff_from_df_unknown_dtype(self):
         for arr, dt in zip(data, dtype):
             df = pd.DataFrame(arr)
             err_msg = (
-                f"The dtype '{dt}' of the column '0' is not currently " "supported by liac-arff"
+                f"The dtype '{dt}' of the column '0' is not currently "
+                "supported by liac-arff"
             )
             with pytest.raises(ValueError, match=err_msg):
                 attributes_arff_from_df(df)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_numpy(self):
         data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
@@ -721,8 +745,14 @@ def test_create_dataset_numpy(self):
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded arff does not match original one"
-        assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
+        assert (
+            _get_online_dataset_format(dataset.id) == "arff"
+        ), "Wrong format for dataset"
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_list(self):
         data = [
@@ -776,8 +806,14 @@ def test_create_dataset_list(self):
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded ARFF does not match original one"
-        assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
+        assert (
+            _get_online_dataset_format(dataset.id) == "arff"
+        ), "Wrong format for dataset"
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_sparse(self):
         # test the scipy.sparse.coo_matrix
@@ -926,6 +962,10 @@ def test_get_online_dataset_format(self):
             dataset_id
         ), "The format of the ARFF files is different"
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_pandas(self):
         data = [
@@ -993,7 +1033,9 @@ def test_create_dataset_pandas(self):
         column_names = ["input1", "input2", "y"]
         df = pd.DataFrame.sparse.from_spmatrix(sparse_data, columns=column_names)
         # meta-information
-        description = "Synthetic dataset created from a Pandas DataFrame with Sparse columns"
+        description = (
+            "Synthetic dataset created from a Pandas DataFrame with Sparse columns"
+        )
         dataset = openml.datasets.functions.create_dataset(
             name=name,
             description=description,
@@ -1018,7 +1060,9 @@ def test_create_dataset_pandas(self):
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded ARFF does not match original one"
-        assert _get_online_dataset_format(dataset.id) == "sparse_arff", "Wrong format for dataset"
+        assert (
+            _get_online_dataset_format(dataset.id) == "sparse_arff"
+        ), "Wrong format for dataset"
 
         # Check that we can overwrite the attributes
         data = [["a"], ["b"], ["c"], ["d"], ["e"]]
@@ -1048,7 +1092,9 @@ def test_create_dataset_pandas(self):
         TestBase._mark_entity_for_removal("data", dataset.id)
         TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}")
         downloaded_data = _get_online_dataset_arff(dataset.id)
-        assert downloaded_data == dataset._dataset, "Uploaded ARFF does not match original one"
+        assert (
+            downloaded_data == dataset._dataset
+        ), "Uploaded ARFF does not match original one"
         assert "@ATTRIBUTE rnd_str {a, b, c, d, e, f, g}" in downloaded_data
 
     def test_ignore_attributes_dataset(self):
@@ -1151,6 +1197,10 @@ def test_ignore_attributes_dataset(self):
                 paper_url=paper_url,
             )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_publish_fetch_ignore_attribute(self):
         """Test to upload and retrieve dataset and check ignore_attributes"""
@@ -1270,6 +1320,10 @@ def test_create_dataset_row_id_attribute_error(self):
                 paper_url=paper_url,
             )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_row_id_attribute_inference(self):
         # meta-information
@@ -1398,7 +1452,9 @@ def test_get_dataset_cache_format_feather(self):
         cache_dir = openml.config.get_cache_directory()
         cache_dir_for_id = os.path.join(cache_dir, "datasets", "128")
         feather_file = os.path.join(cache_dir_for_id, "dataset.feather")
-        pickle_file = os.path.join(cache_dir_for_id, "dataset.feather.attributes.pkl.py3")
+        pickle_file = os.path.join(
+            cache_dir_for_id, "dataset.feather.attributes.pkl.py3"
+        )
         data = pd.read_feather(feather_file)
         assert os.path.isfile(feather_file), "Feather file is missing"
         assert os.path.isfile(pickle_file), "Attributes pickle file is missing"
@@ -1438,6 +1494,10 @@ def test_data_edit_non_critical_field(self):
         edited_dataset = openml.datasets.get_dataset(did)
         assert edited_dataset.description == desc
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_data_edit_critical_field(self):
         # Case 2
@@ -1445,7 +1505,9 @@ def test_data_edit_critical_field(self):
         # for this, we need to first clone a dataset to do changes
         did = fork_dataset(1)
         self._wait_for_dataset_being_processed(did)
-        result = edit_dataset(did, default_target_attribute="shape", ignore_attribute="oil")
+        result = edit_dataset(
+            did, default_target_attribute="shape", ignore_attribute="oil"
+        )
         assert did == result
 
         n_tries = 10
@@ -1453,7 +1515,9 @@ def test_data_edit_critical_field(self):
         for i in range(n_tries):
             edited_dataset = openml.datasets.get_dataset(did)
             try:
-                assert edited_dataset.default_target_attribute == "shape", edited_dataset
+                assert (
+                    edited_dataset.default_target_attribute == "shape"
+                ), edited_dataset
                 assert edited_dataset.ignore_attribute == ["oil"], edited_dataset
                 break
             except AssertionError as e:
@@ -1461,9 +1525,11 @@ def test_data_edit_critical_field(self):
                     raise e
                 time.sleep(10)
                 # Delete the cache dir to get the newer version of the dataset
-                
+
                 shutil.rmtree(
-                    os.path.join(openml.config.get_cache_directory(), "datasets", str(did)),
+                    os.path.join(
+                        openml.config.get_cache_directory(), "datasets", str(did)
+                    ),
                 )
 
     @pytest.mark.test_server()
@@ -1490,6 +1556,10 @@ def test_data_edit_requires_valid_dataset(self):
             description="xor operation dataset",
         )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self):
         # Need to own a dataset to be able to edit meta-data
@@ -1542,7 +1612,6 @@ def test_data_fork(self):
             data_id=999999,
         )
 
-
     @pytest.mark.production_server()
     def test_list_datasets_with_high_size_parameter(self):
         # Testing on prod since concurrent deletion of uploded datasets make the test fail
@@ -1552,7 +1621,6 @@ def test_list_datasets_with_high_size_parameter(self):
         datasets_b = openml.datasets.list_datasets(size=np.inf)
 
         # Reverting to test server
-        openml.config.server = self.test_server
         assert len(datasets_a) == len(datasets_b)
 
 
@@ -1629,7 +1697,9 @@ def test_invalid_attribute_validations(
         (None, None, ["outlook", "windy"]),
     ],
 )
-def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
+def test_valid_attribute_validations(
+    default_target_attribute, row_id_attribute, ignore_attribute
+):
     data = [
         ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
         ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
@@ -1727,9 +1797,12 @@ def test_delete_dataset(self):
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
+def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = (
-        test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
+        test_files_directory
+        / "mock_responses"
+        / "datasets"
+        / "data_delete_not_owned.xml"
     )
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -1742,15 +1815,18 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
     ):
         openml.datasets.delete_dataset(40_000)
 
-    dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
+    dataset_url = test_server_v1 + "data/40000"
     assert dataset_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
+def test_delete_dataset_with_run(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = (
-        test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
+        test_files_directory
+        / "mock_responses"
+        / "datasets"
+        / "data_delete_has_tasks.xml"
     )
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -1763,15 +1839,18 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
     ):
         openml.datasets.delete_dataset(40_000)
 
-    dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
+    dataset_url = test_server_v1 + "data/40000"
     assert dataset_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
+def test_delete_dataset_success(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = (
-        test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
+        test_files_directory
+        / "mock_responses"
+        / "datasets"
+        / "data_delete_successful.xml"
     )
     mock_delete.return_value = create_request_response(
         status_code=200,
@@ -1781,15 +1860,18 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
     success = openml.datasets.delete_dataset(40000)
     assert success
 
-    dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
+    dataset_url = test_server_v1 + "data/40000"
     assert dataset_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
+def test_delete_unknown_dataset(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = (
-        test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
+        test_files_directory
+        / "mock_responses"
+        / "datasets"
+        / "data_delete_not_exist.xml"
     )
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -1802,9 +1884,9 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
     ):
         openml.datasets.delete_dataset(9_999_999)
 
-    dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999"
+    dataset_url = test_server_v1 + "data/9999999"
     assert dataset_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 def _assert_datasets_have_id_and_valid_status(datasets: pd.DataFrame):
@@ -1959,9 +2041,15 @@ def test_get_dataset_lazy_behavior(
         with_features=with_features,
         with_data=with_data,
     )
-    assert dataset.features, "Features should be downloaded on-demand if not during get_dataset"
-    assert dataset.qualities, "Qualities should be downloaded on-demand if not during get_dataset"
-    assert dataset.get_data(), "Data should be downloaded on-demand if not during get_dataset"
+    assert (
+        dataset.features
+    ), "Features should be downloaded on-demand if not during get_dataset"
+    assert (
+        dataset.qualities
+    ), "Qualities should be downloaded on-demand if not during get_dataset"
+    assert (
+        dataset.get_data()
+    ), "Data should be downloaded on-demand if not during get_dataset"
     _assert_datasets_retrieved_successfully(
         [1], with_qualities=True, with_features=True, with_data=True
     )
@@ -1980,7 +2068,9 @@ def test__get_dataset_parquet_not_cached():
         "oml:parquet_url": "http://data.openml.org/dataset20/dataset_20.pq",
         "oml:id": "20",
     }
-    path = _get_dataset_parquet(description, cache_directory=Path(openml.config.get_cache_directory()))
+    path = _get_dataset_parquet(
+        description, cache_directory=Path(openml.config.get_cache_directory())
+    )
     assert isinstance(path, Path), "_get_dataset_parquet returns a path"
     assert path.is_file(), "_get_dataset_parquet returns path to real file"
 
@@ -1989,21 +2079,24 @@ def test_read_features_from_xml_with_whitespace() -> None:
     from openml.datasets.dataset import _read_features
 
     features_file = (
-        Path(__file__).parent.parent / "files" / "misc" / "features_with_whitespaces.xml"
+        Path(__file__).parent.parent
+        / "files"
+        / "misc"
+        / "features_with_whitespaces.xml"
     )
     dict = _read_features(features_file)
     assert dict[1].nominal_values == [" - 50000.", " 50000+."]
 
 
 @pytest.mark.test_server()
-def test_get_dataset_parquet(requests_mock, test_files_directory):
+def test_get_dataset_parquet(requests_mock, test_files_directory, test_server_v1):
     # Parquet functionality is disabled on the test server
     # There is no parquet-copy of the test server yet.
     content_file = (
-            test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
+        test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
     )
     # While the mocked example is from production, unit tests by default connect to the test server.
-    requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text())
+    requests_mock.get(test_server_v1 + "data/61", text=content_file.read_text())
     dataset = openml.datasets.get_dataset(61, download_data=True)
     assert dataset._parquet_url is not None
     assert dataset.parquet_file is not None
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 4e391fd3b..108a05c3f 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -4,6 +4,7 @@
 import collections
 import copy
 import hashlib
+import os
 import re
 import os
 import time
@@ -162,12 +163,16 @@ def test_from_xml_to_xml(self):
     def test_to_xml_from_xml(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
+            "base_estimator"
+            if Version(sklearn.__version__) < Version("1.4")
+            else "estimator"
         )
         boosting = sklearn.ensemble.AdaBoostClassifier(
             **{estimator_name: sklearn.tree.DecisionTreeClassifier()},
         )
-        model = sklearn.pipeline.Pipeline(steps=(("scaler", scaler), ("boosting", boosting)))
+        model = sklearn.pipeline.Pipeline(
+            steps=(("scaler", scaler), ("boosting", boosting))
+        )
         flow = self.extension.model_to_flow(model)
         flow.flow_id = -234
         # end of setup
@@ -180,6 +185,10 @@ def test_to_xml_from_xml(self):
         openml.flows.functions.assert_flows_equal(new_flow, flow)
         assert new_flow is not flow
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_publish_flow(self):
@@ -204,7 +213,9 @@ def test_publish_flow(self):
 
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}"
+        )
         assert isinstance(flow.flow_id, int)
 
     @pytest.mark.sklearn()
@@ -214,7 +225,9 @@ def test_publish_existing_flow(self, flow_exists_mock):
         flow = self.extension.model_to_flow(clf)
         flow_exists_mock.return_value = 1
 
-        with pytest.raises(openml.exceptions.PyOpenMLError, match="OpenMLFlow already exists"):
+        with pytest.raises(
+            openml.exceptions.PyOpenMLError, match="OpenMLFlow already exists"
+        ):
             flow.publish(raise_error_if_exists=True)
 
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
@@ -222,6 +235,10 @@ def test_publish_existing_flow(self, flow_exists_mock):
             f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
         )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_publish_flow_with_similar_components(self):
@@ -232,7 +249,9 @@ def test_publish_flow_with_similar_components(self):
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}"
+        )
         # For a flow where both components are published together, the upload
         # date should be equal
         assert flow.upload_date == flow.components["lr"].upload_date, (
@@ -247,7 +266,9 @@ def test_publish_flow_with_similar_components(self):
         flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None)
         flow1.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow1.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow1.flow_id}"
+        )
 
         # In order to assign different upload times to the flows!
         time.sleep(1)
@@ -259,20 +280,30 @@ def test_publish_flow_with_similar_components(self):
         flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel)
         flow2.publish()
         TestBase._mark_entity_for_removal("flow", flow2.flow_id, flow2.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow2.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow2.flow_id}"
+        )
         # If one component was published before the other, the components in
         # the flow should have different upload dates
         assert flow2.upload_date != flow2.components["dt"].upload_date
 
-        clf3 = sklearn.ensemble.AdaBoostClassifier(sklearn.tree.DecisionTreeClassifier(max_depth=3))
+        clf3 = sklearn.ensemble.AdaBoostClassifier(
+            sklearn.tree.DecisionTreeClassifier(max_depth=3)
+        )
         flow3 = self.extension.model_to_flow(clf3)
         flow3, _ = self._add_sentinel_to_flow_name(flow3, sentinel)
         # Child flow has different parameter. Check for storing the flow
         # correctly on the server should thus not check the child's parameters!
         flow3.publish()
         TestBase._mark_entity_for_removal("flow", flow3.flow_id, flow3.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}"
+        )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_semi_legal_flow(self):
@@ -280,7 +311,9 @@ def test_semi_legal_flow(self):
         # should not throw error as it contains two differentiable forms of
         # Bagging i.e., Bagging(Bagging(J48)) and Bagging(J48)
         estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
+            "base_estimator"
+            if Version(sklearn.__version__) < Version("1.4")
+            else "estimator"
         )
         semi_legal = sklearn.ensemble.BaggingClassifier(
             **{
@@ -296,7 +329,9 @@ def test_semi_legal_flow(self):
 
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}"
+        )
 
     @pytest.mark.sklearn()
     @mock.patch("openml.flows.functions.get_flow")
@@ -383,13 +418,21 @@ def get_sentinel():
         flow_id = openml.flows.flow_exists(name, version)
         assert not flow_id
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_existing_flow_exists(self):
         # create a flow
         nb = sklearn.naive_bayes.GaussianNB()
 
-        sparse = "sparse" if Version(sklearn.__version__) < Version("1.4") else "sparse_output"
+        sparse = (
+            "sparse"
+            if Version(sklearn.__version__) < Version("1.4")
+            else "sparse_output"
+        )
         ohe_params = {sparse: False, "handle_unknown": "ignore"}
         if Version(sklearn.__version__) >= Version("0.20"):
             ohe_params["categories"] = "auto"
@@ -424,6 +467,10 @@ def test_existing_flow_exists(self):
             )
             assert downloaded_flow_id == flow.flow_id
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_sklearn_to_upload_to_flow(self):
@@ -444,13 +491,20 @@ def test_sklearn_to_upload_to_flow(self):
         )
         fu = sklearn.pipeline.FeatureUnion(transformer_list=[("pca", pca), ("fs", fs)])
         estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
+            "base_estimator"
+            if Version(sklearn.__version__) < Version("1.4")
+            else "estimator"
         )
         boosting = sklearn.ensemble.AdaBoostClassifier(
             **{estimator_name: sklearn.tree.DecisionTreeClassifier()},
         )
         model = sklearn.pipeline.Pipeline(
-            steps=[("ohe", ohe), ("scaler", scaler), ("fu", fu), ("boosting", boosting)],
+            steps=[
+                ("ohe", ohe),
+                ("scaler", scaler),
+                ("fu", fu),
+                ("boosting", boosting),
+            ],
         )
         parameter_grid = {
             "boosting__n_estimators": [1, 5, 10, 100],
@@ -477,7 +531,9 @@ def test_sklearn_to_upload_to_flow(self):
 
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}"
+        )
         assert isinstance(flow.flow_id, int)
 
         # Check whether we can load the flow again
@@ -560,7 +616,10 @@ def test_extract_tags(self):
         tags = openml.utils.extract_xml_tags("oml:tag", flow_dict)
         assert tags == ["study_14"]
 
-        flow_xml = "<oml:flow><oml:tag>OpenmlWeka</oml:tag>\n" "<oml:tag>weka</oml:tag></oml:flow>"
+        flow_xml = (
+            "<oml:flow><oml:tag>OpenmlWeka</oml:tag>\n"
+            "<oml:tag>weka</oml:tag></oml:flow>"
+        )
         flow_dict = xmltodict.parse(flow_xml)
         tags = openml.utils.extract_xml_tags("oml:tag", flow_dict["oml:flow"])
         assert tags == ["OpenmlWeka", "weka"]
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 14bb78060..f0709bb45 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -6,7 +6,7 @@
 import unittest
 from collections import OrderedDict
 from multiprocessing.managers import Value
-
+import os
 from openml_sklearn import SklearnExtension
 from packaging.version import Version
 from unittest import mock
@@ -153,7 +153,9 @@ def test_are_flows_equal(self):
         openml.flows.functions.assert_flows_equal(flow, flow)
         new_flow = copy.deepcopy(flow)
         new_flow.parameters["abc"] = 3.0
-        self.assertRaises(ValueError, openml.flows.functions.assert_flows_equal, flow, new_flow)
+        self.assertRaises(
+            ValueError, openml.flows.functions.assert_flows_equal, flow, new_flow
+        )
 
         # Now test for components (subflows)
         parent_flow = copy.deepcopy(flow)
@@ -195,24 +197,28 @@ def test_are_flows_equal_ignore_parameter_values(self):
         )
 
         openml.flows.functions.assert_flows_equal(flow, flow)
-        openml.flows.functions.assert_flows_equal(flow, flow, ignore_parameter_values=True)
+        openml.flows.functions.assert_flows_equal(
+            flow, flow, ignore_parameter_values=True
+        )
 
         new_flow = copy.deepcopy(flow)
         new_flow.parameters["a"] = 7
         with pytest.raises(ValueError) as excinfo:
             openml.flows.functions.assert_flows_equal(flow, new_flow)
-        assert str(paramaters) in str(excinfo.value) and str(new_flow.parameters) in str(
-            excinfo.value
-        )
+        assert str(paramaters) in str(excinfo.value) and str(
+            new_flow.parameters
+        ) in str(excinfo.value)
 
-        openml.flows.functions.assert_flows_equal(flow, new_flow, ignore_parameter_values=True)
+        openml.flows.functions.assert_flows_equal(
+            flow, new_flow, ignore_parameter_values=True
+        )
 
         del new_flow.parameters["a"]
         with pytest.raises(ValueError) as excinfo:
             openml.flows.functions.assert_flows_equal(flow, new_flow)
-        assert str(paramaters) in str(excinfo.value) and str(new_flow.parameters) in str(
-            excinfo.value
-        )
+        assert str(paramaters) in str(excinfo.value) and str(
+            new_flow.parameters
+        ) in str(excinfo.value)
 
         self.assertRaisesRegex(
             ValueError,
@@ -246,7 +252,9 @@ def test_are_flows_equal_ignore_if_older(self):
             upload_date=flow_upload_date,
         )
 
-        assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=flow_upload_date)
+        assert_flows_equal(
+            flow, flow, ignore_parameter_values_on_older_children=flow_upload_date
+        )
         assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=None)
         new_flow = copy.deepcopy(flow)
         new_flow.parameters["a"] = 7
@@ -296,7 +304,9 @@ def test_sklearn_to_flow_list_of_lists(self):
         self._add_sentinel_to_flow_name(flow)
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}"
+        )
         # Test deserialization works
         server_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
         assert server_flow.parameters["categories"] == "[[0, 1], [0, 1]]"
@@ -310,6 +320,10 @@ def test_get_flow1(self):
         flow = openml.flows.get_flow(1)
         assert flow.external_version is None
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_get_flow_reinstantiate_model(self):
@@ -318,10 +332,14 @@ def test_get_flow_reinstantiate_model(self):
         flow = extension.model_to_flow(model)
         flow.publish(raise_error_if_exists=False)
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}"
+        )
 
         downloaded_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
-        assert isinstance(downloaded_flow.model, sklearn.ensemble.RandomForestClassifier)
+        assert isinstance(
+            downloaded_flow.model, sklearn.ensemble.RandomForestClassifier
+        )
 
     @pytest.mark.test_server()
     def test_get_flow_reinstantiate_model_no_extension(self):
@@ -340,7 +358,9 @@ def test_get_flow_reinstantiate_model_no_extension(self):
         reason="Requires scikit-learn!=0.19.1, because target flow is from that version.",
     )
     @pytest.mark.production_server()
-    def test_get_flow_with_reinstantiate_strict_with_wrong_version_raises_exception(self):
+    def test_get_flow_with_reinstantiate_strict_with_wrong_version_raises_exception(
+        self,
+    ):
         self.use_production_server()
         flow = 8175
         expected = "Trying to deserialize a model with dependency sklearn==0.19.1 not satisfied."
@@ -363,7 +383,9 @@ def test_get_flow_with_reinstantiate_strict_with_wrong_version_raises_exception(
     @pytest.mark.production_server()
     def test_get_flow_reinstantiate_flow_not_strict_post_1(self):
         self.use_production_server()
-        flow = openml.flows.get_flow(flow_id=19190, reinstantiate=True, strict_version=False)
+        flow = openml.flows.get_flow(
+            flow_id=19190, reinstantiate=True, strict_version=False
+        )
         assert flow.flow_id is None
         assert "sklearn==1.0.0" not in flow.dependencies
 
@@ -377,7 +399,9 @@ def test_get_flow_reinstantiate_flow_not_strict_post_1(self):
     @pytest.mark.production_server()
     def test_get_flow_reinstantiate_flow_not_strict_023_and_024(self):
         self.use_production_server()
-        flow = openml.flows.get_flow(flow_id=18587, reinstantiate=True, strict_version=False)
+        flow = openml.flows.get_flow(
+            flow_id=18587, reinstantiate=True, strict_version=False
+        )
         assert flow.flow_id is None
         assert "sklearn==0.23.1" not in flow.dependencies
 
@@ -389,10 +413,16 @@ def test_get_flow_reinstantiate_flow_not_strict_023_and_024(self):
     @pytest.mark.production_server()
     def test_get_flow_reinstantiate_flow_not_strict_pre_023(self):
         self.use_production_server()
-        flow = openml.flows.get_flow(flow_id=8175, reinstantiate=True, strict_version=False)
+        flow = openml.flows.get_flow(
+            flow_id=8175, reinstantiate=True, strict_version=False
+        )
         assert flow.flow_id is None
         assert "sklearn==0.19.1" not in flow.dependencies
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_get_flow_id(self):
@@ -402,13 +432,19 @@ def test_get_flow_id(self):
             list_all = functools.lru_cache()(openml.utils._list_all)
         with patch("openml.utils._list_all", list_all):
             clf = sklearn.tree.DecisionTreeClassifier()
-            flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
+            flow = (
+                openml.extensions.get_extension_by_model(clf)
+                .model_to_flow(clf)
+                .publish()
+            )
             TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
             TestBase.logger.info(
                 f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
             )
 
-            assert openml.flows.get_flow_id(model=clf, exact_version=True) == flow.flow_id
+            assert (
+                openml.flows.get_flow_id(model=clf, exact_version=True) == flow.flow_id
+            )
             flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
             assert flow.flow_id in flow_ids
             assert len(flow_ids) > 0
@@ -424,9 +460,13 @@ def test_get_flow_id(self):
                 exact_version=False,
             )
             assert flow.flow_id in flow_ids_exact_version_True
-            assert set(flow_ids_exact_version_True).issubset(set(flow_ids_exact_version_False))
+            assert set(flow_ids_exact_version_True).issubset(
+                set(flow_ids_exact_version_False)
+            )
             # instead of the assertion above, the assertion below used to be used.
-            pytest.skip(reason="Not sure why there should only be one version of this flow.")
+            pytest.skip(
+                reason="Not sure why there should only be one version of this flow."
+            )
             assert flow_ids_exact_version_True == flow_ids_exact_version_False
 
     @pytest.mark.test_server()
@@ -453,7 +493,7 @@ def test_delete_flow(self):
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
+def test_delete_flow_not_owned(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -466,13 +506,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
     ):
         openml.flows.delete_flow(40_000)
 
-    flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
+    flow_url = test_server_v1 + "flow/40000"
     assert flow_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
+def test_delete_flow_with_run(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -485,13 +525,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
     ):
         openml.flows.delete_flow(40_000)
 
-    flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
+    flow_url = test_server_v1 + "flow/40000"
     assert flow_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
+def test_delete_subflow(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -504,13 +544,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
     ):
         openml.flows.delete_flow(40_000)
 
-    flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
+    flow_url = test_server_v1 + "flow/40000"
     assert flow_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
+def test_delete_flow_success(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
     mock_delete.return_value = create_request_response(
         status_code=200,
@@ -520,14 +560,14 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
     success = openml.flows.delete_flow(33364)
     assert success
 
-    flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364"
+    flow_url = test_server_v1 + "flow/33364"
     assert flow_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
 @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
-def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
+def test_delete_unknown_flow(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -540,6 +580,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
     ):
         openml.flows.delete_flow(9_999_999)
 
-    flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999"
+    flow_url = test_server_v1 + "flow/9999999"
     assert flow_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
index f2a81be9f..538fbe59f 100644
--- a/tests/test_openml/test_api_calls.py
+++ b/tests/test_openml/test_api_calls.py
@@ -6,6 +6,7 @@
 from unittest import mock
 
 import minio
+import os
 import pytest
 import os
 
@@ -20,6 +21,10 @@ def test_too_long_uri(self):
         with pytest.raises(openml.exceptions.OpenMLServerError, match="URI too long!"):
             openml.datasets.list_datasets(data_id=list(range(10000)))
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @unittest.mock.patch("time.sleep")
     @unittest.mock.patch("requests.Session")
     @pytest.mark.test_server()
@@ -33,11 +38,17 @@ def test_retry_on_database_error(self, Session_class_mock, _):
             "Please wait for N seconds and try again.</oml:message>\n"
             "</oml:error>"
         )
-        Session_class_mock.return_value.__enter__.return_value.get.return_value = response_mock
-        with pytest.raises(openml.exceptions.OpenMLServerException, match="/abc returned code 107"):
+        Session_class_mock.return_value.__enter__.return_value.get.return_value = (
+            response_mock
+        )
+        with pytest.raises(
+            openml.exceptions.OpenMLServerException, match="/abc returned code 107"
+        ):
             openml._api_calls._send_request("get", "/abc", {})
 
-        assert Session_class_mock.return_value.__enter__.return_value.get.call_count == 20
+        assert (
+            Session_class_mock.return_value.__enter__.return_value.get.call_count == 20
+        )
 
 
 class FakeObject(NamedTuple):
@@ -124,5 +135,9 @@ def test_authentication_endpoints_requiring_api_key_show_relevant_help_link(
 ) -> None:
     # We need to temporarily disable the API key to test the error message
     with openml.config.overwrite_config_context({"apikey": None}):
-        with pytest.raises(openml.exceptions.OpenMLAuthenticationError, match=API_TOKEN_HELP_LINK):
-            openml._api_calls._perform_api_call(call=endpoint, request_method=method, data=None)
+        with pytest.raises(
+            openml.exceptions.OpenMLAuthenticationError, match=API_TOKEN_HELP_LINK
+        ):
+            openml._api_calls._perform_api_call(
+                call=endpoint, request_method=method, data=None
+            )
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index f3feca784..941af9f1c 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -9,12 +9,14 @@
 from typing import Any, Iterator
 from pathlib import Path
 import platform
+from urllib.parse import urlparse
 
 import pytest
 
 import openml
 import openml.testing
 from openml.testing import TestBase
+from openml.enums import APIVersion, ServerMode
 
 
 @contextmanager
@@ -77,22 +79,24 @@ def test_get_config_as_dict(self):
         """Checks if the current configuration is returned accurately as a dict."""
         config = openml.config.get_config_as_dict()
         _config = {}
-        _config["apikey"] = TestBase.user_key
-        _config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
+        _config["api_version"] = APIVersion.V1
+        _config["fallback_api_version"] = None
+        _config["servers"] = openml.config.get_test_servers()
         _config["cachedir"] = self.workdir
         _config["avoid_duplicate_runs"] = False
         _config["connection_n_retries"] = 20
         _config["retry_policy"] = "robot"
         _config["show_progress"] = False
         assert isinstance(config, dict)
-        assert len(config) == 7
+        assert len(config) == 8
         self.assertDictEqual(config, _config)
 
     def test_setup_with_config(self):
         """Checks if the OpenML configuration can be updated using _setup()."""
         _config = {}
-        _config["apikey"] = TestBase.user_key
-        _config["server"] = "https://www.openml.org/api/v1/xml"
+        _config["api_version"] = APIVersion.V1
+        _config["fallback_api_version"] = None
+        _config["servers"] = openml.config.get_test_servers()
         _config["cachedir"] = self.workdir
         _config["avoid_duplicate_runs"] = True
         _config["retry_policy"] = "human"
@@ -109,26 +113,22 @@ class TestConfigurationForExamples(openml.testing.TestBase):
     @pytest.mark.production_server()
     def test_switch_to_example_configuration(self):
         """Verifies the test configuration is loaded properly."""
-        # Below is the default test key which would be used anyway, but just for clarity:
-        openml.config.apikey = "any-api-key"
-        openml.config.server = self.production_server
+        openml.config.use_production_servers()
 
         openml.config.start_using_configuration_for_example()
 
-        assert openml.config.apikey == TestBase.user_key
-        assert openml.config.server == self.test_server
+        assert openml.config.servers == openml.config.get_test_servers()
 
     @pytest.mark.production_server()
     def test_switch_from_example_configuration(self):
         """Verifies the previous configuration is loaded after stopping."""
         # Below is the default test key which would be used anyway, but just for clarity:
-        openml.config.apikey = TestBase.user_key
-        openml.config.server = self.production_server
+        openml.config.use_production_servers()
 
         openml.config.start_using_configuration_for_example()
         openml.config.stop_using_configuration_for_example()
-        assert openml.config.apikey == TestBase.user_key
-        assert openml.config.server == self.production_server
+
+        assert openml.config.servers == openml.config.get_production_servers()
 
     def test_example_configuration_stop_before_start(self):
         """Verifies an error is raised if `stop_...` is called before `start_...`."""
@@ -145,15 +145,13 @@ def test_example_configuration_stop_before_start(self):
     @pytest.mark.production_server()
     def test_example_configuration_start_twice(self):
         """Checks that the original config can be returned to if `start..` is called twice."""
-        openml.config.apikey = TestBase.user_key
-        openml.config.server = self.production_server
+        openml.config.use_production_servers()
 
         openml.config.start_using_configuration_for_example()
         openml.config.start_using_configuration_for_example()
         openml.config.stop_using_configuration_for_example()
 
-        assert openml.config.apikey == TestBase.user_key
-        assert openml.config.server == self.production_server
+        assert openml.config.servers == openml.config.get_production_servers()
 
 
 def test_configuration_file_not_overwritten_on_load():
@@ -190,5 +188,71 @@ def test_openml_cache_dir_env_var(tmp_path: Path) -> None:
 
     with safe_environ_patcher("OPENML_CACHE_DIR", str(expected_path)):
         openml.config._setup()
+
         assert openml.config._root_cache_directory == expected_path
         assert openml.config.get_cache_directory() == str(expected_path / "org" / "openml" / "www")
+
+
+@pytest.mark.parametrize("mode", list(ServerMode))
+@pytest.mark.parametrize("api_version", [APIVersion.V1, APIVersion.V2])
+def test_get_servers(mode, api_version):
+    orig_servers = openml.config._get_servers(mode)
+
+    openml.config._set_servers(mode)
+    openml.config.set_api_version(api_version)
+    openml.config.server = "temp-server1"
+    openml.config.apikey = "temp-apikey1"
+    openml.config._get_servers(mode)["server"] = 'temp-server2'
+    openml.config._get_servers(mode)["apikey"] = 'temp-server2'
+
+    assert openml.config._get_servers(mode) == orig_servers
+
+
+@pytest.mark.parametrize("mode", list(ServerMode))
+@pytest.mark.parametrize("api_version", [APIVersion.V1, APIVersion.V2])
+def test_set_servers(mode, api_version):
+    openml.config._set_servers(mode)
+    openml.config.set_api_version(api_version)
+
+    assert openml.config.servers == openml.config._get_servers(mode)
+    assert openml.config.api_version == api_version
+
+    openml.config.server = "temp-server"
+    openml.config.apikey = "temp-apikey"
+
+    assert openml.config.server == openml.config.servers[api_version]["server"]
+    assert openml.config.apikey == openml.config.servers[api_version]["apikey"]
+
+    for version, servers in openml.config.servers.items():
+        if version == api_version:
+            assert servers != openml.config._get_servers(mode)[version]
+        else:
+            assert servers == openml.config._get_servers(mode)[version]
+
+
+def test_get_production_servers():
+    assert openml.config.get_production_servers() == openml.config._get_servers("production")
+
+
+def test_get_test_servers():
+    assert openml.config.get_test_servers() == openml.config._get_servers("test")
+
+
+def test_use_production_servers():
+    openml.config.use_production_servers()
+    servers_1 = openml.config.servers
+
+    openml.config._set_servers("production")
+    servers_2 = openml.config.servers
+
+    assert servers_1 == servers_2
+
+
+def test_use_test_servers():
+    openml.config.use_test_servers()
+    servers_1 = openml.config.servers
+
+    openml.config._set_servers("test")
+    servers_2 = openml.config.servers
+
+    assert servers_1 == servers_2
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 22a8bc936..05e8ef1dd 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -48,7 +48,10 @@ def test_tagging(self):
     def _test_prediction_data_equal(run, run_prime):
         # Determine which attributes are numeric and which not
         num_cols = np.array(
-            [d_type == "NUMERIC" for _, d_type in run._generate_arff_dict()["attributes"]],
+            [
+                d_type == "NUMERIC"
+                for _, d_type in run._generate_arff_dict()["attributes"]
+            ],
         )
         # Get run data consistently
         #   (For run from server, .data_content does not exist)
@@ -66,7 +69,9 @@ def _test_prediction_data_equal(run, run_prime):
     def _test_run_obj_equals(self, run, run_prime):
         for dictionary in ["evaluations", "fold_evaluations", "sample_evaluations"]:
             if getattr(run, dictionary) is not None:
-                self.assertDictEqual(getattr(run, dictionary), getattr(run_prime, dictionary))
+                self.assertDictEqual(
+                    getattr(run, dictionary), getattr(run_prime, dictionary)
+                )
             else:
                 # should be none or empty
                 other = getattr(run_prime, dictionary)
@@ -76,7 +81,9 @@ def _test_run_obj_equals(self, run, run_prime):
         self._test_prediction_data_equal(run, run_prime)
 
         # Test trace
-        run_trace_content = run.trace.trace_to_arff()["data"] if run.trace is not None else None
+        run_trace_content = (
+            run.trace.trace_to_arff()["data"] if run.trace is not None else None
+        )
 
         if run_prime.trace is not None:
             run_prime_trace_content = run_prime.trace.trace_to_arff()["data"]
@@ -118,6 +125,10 @@ def _check_array(array, type_):
         else:
             assert run_prime_trace_content is None
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_to_from_filesystem_vanilla(self):
@@ -153,6 +164,10 @@ def test_to_from_filesystem_vanilla(self):
             f"collected from {__file__.split('/')[-1]}: {run_prime.run_id}",
         )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.flaky()
     @pytest.mark.test_server()
@@ -189,14 +204,23 @@ def test_to_from_filesystem_search(self):
             f"collected from {__file__.split('/')[-1]}: {run_prime.run_id}",
         )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_to_from_filesystem_no_model(self):
         model = Pipeline(
-            [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())],
+            [
+                ("imputer", SimpleImputer(strategy="mean")),
+                ("classifier", DummyClassifier()),
+            ],
         )
         task = openml.tasks.get_task(119)  # diabetes; crossvalidation
-        run = openml.runs.run_model_on_task(model=model, task=task, add_local_measures=False)
+        run = openml.runs.run_model_on_task(
+            model=model, task=task, add_local_measures=False
+        )
 
         cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
         run.to_filesystem(cache_path, store_model=False)
@@ -265,7 +289,9 @@ def assert_run_prediction_data(task, run, model):
         # Check correctness of y_true and y_pred in run
         for fold_id in range(n_folds):
             # Get data for fold
-            _, test_indices = task.get_train_test_split_indices(repeat=0, fold=fold_id, sample=0)
+            _, test_indices = task.get_train_test_split_indices(
+                repeat=0, fold=fold_id, sample=0
+            )
             train_mask = np.full(len(X), True)
             train_mask[test_indices] = False
 
@@ -279,7 +305,9 @@ def assert_run_prediction_data(task, run, model):
             y_pred = model.fit(X_train, y_train).predict(X_test)
 
             # Get stored data for fold
-            saved_fold_data = run.predictions[run.predictions["fold"] == fold_id].sort_values(
+            saved_fold_data = run.predictions[
+                run.predictions["fold"] == fold_id
+            ].sort_values(
                 by="row_id",
             )
             saved_y_pred = saved_fold_data["prediction"].values
@@ -295,6 +323,10 @@ def assert_run_prediction_data(task, run, model):
             assert_method(y_pred, saved_y_pred)
             assert_method(y_test, saved_y_test)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_publish_with_local_loaded_flow(self):
@@ -323,7 +355,9 @@ def test_publish_with_local_loaded_flow(self):
             # Make sure that the prediction data stored in the run is correct.
             self.assert_run_prediction_data(task, run, clone(model))
 
-            cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
+            cache_path = os.path.join(
+                self.workdir, "runs", str(random.getrandbits(128))
+            )
             run.to_filesystem(cache_path)
             # obtain run from filesystem
             loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
@@ -339,6 +373,10 @@ def test_publish_with_local_loaded_flow(self):
             assert openml.flows.flow_exists(flow.name, flow.external_version)
             openml.runs.get_run(loaded_run.run_id)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     @pytest.mark.skip(reason="https://github.com/openml/openml-python/issues/1586")
@@ -362,7 +400,9 @@ def test_offline_and_online_run_identical(self):
             assert not openml.flows.flow_exists(flow.name, flow.external_version)
 
             # Load from filesystem
-            cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
+            cache_path = os.path.join(
+                self.workdir, "runs", str(random.getrandbits(128))
+            )
             run.to_filesystem(cache_path)
             loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
@@ -396,5 +436,7 @@ def test_run_setup_string_included_in_xml(self):
         assert "oml:setup_string" in run_dict
         assert run_dict["oml:setup_string"] == SETUP_STRING
 
-        recreated_run = openml.runs.functions._create_run_from_xml(xml, from_server=False)
+        recreated_run = openml.runs.functions._create_run_from_xml(
+            xml, from_server=False
+        )
         assert recreated_run.setup_string == SETUP_STRING
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 8d5a00f9b..3f7cc12e9 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -40,7 +40,8 @@
     OpenMLNotAuthorizedError,
     OpenMLServerException,
 )
-#from openml.extensions.sklearn import cat, cont
+
+# from openml.extensions.sklearn import cat, cont
 from openml.runs.functions import (
     _run_task_get_arffcontent,
     delete_run,
@@ -132,9 +133,9 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
                 time.sleep(10)
                 continue
 
-            assert len(run.evaluations) > 0, (
-                "Expect not-None evaluations to always contain elements."
-            )
+            assert (
+                len(run.evaluations) > 0
+            ), "Expect not-None evaluations to always contain elements."
             return
 
         raise RuntimeError(
@@ -143,7 +144,10 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
         )
 
     def _assert_predictions_equal(self, predictions, predictions_prime):
-        assert np.array(predictions_prime["data"]).shape == np.array(predictions["data"]).shape
+        assert (
+            np.array(predictions_prime["data"]).shape
+            == np.array(predictions["data"]).shape
+        )
 
         # The original search model does not submit confidence
         # bounds, so we can not compare the arff line
@@ -164,7 +168,9 @@ def _assert_predictions_equal(self, predictions, predictions_prime):
                 else:
                     assert val_1 == val_2
 
-    def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create_task_obj):
+    def _rerun_model_and_compare_predictions(
+        self, run_id, model_prime, seed, create_task_obj
+    ):
         run = openml.runs.get_run(run_id)
 
         # TODO: assert holdout task
@@ -251,9 +257,13 @@ def _perform_run(
             "sklearn.pipeline.Pipeline",
         ]
         if Version(sklearn.__version__) < Version("0.22"):
-            classes_without_random_state.append("sklearn.linear_model.base.LinearRegression")
+            classes_without_random_state.append(
+                "sklearn.linear_model.base.LinearRegression"
+            )
         else:
-            classes_without_random_state.append("sklearn.linear_model._base.LinearRegression")
+            classes_without_random_state.append(
+                "sklearn.linear_model._base.LinearRegression"
+            )
 
         def _remove_random_state(flow):
             if "random_state" in flow.parameters:
@@ -305,9 +315,12 @@ def _remove_random_state(flow):
             flow_server = self.extension.model_to_flow(clf_server)
 
             if flow.class_name not in classes_without_random_state:
-                error_msg = "Flow class %s (id=%d) does not have a random state parameter" % (
-                    flow.class_name,
-                    flow.flow_id,
+                error_msg = (
+                    "Flow class %s (id=%d) does not have a random state parameter"
+                    % (
+                        flow.class_name,
+                        flow.flow_id,
+                    )
                 )
                 assert "random_state" in flow.parameters, error_msg
                 # If the flow is initialized from a model without a random
@@ -397,6 +410,10 @@ def _check_sample_evaluations(
                                 assert evaluation > 0
                             assert evaluation < max_time_allowed
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_regression_on_classif_task(self):
@@ -407,13 +424,18 @@ def test_run_regression_on_classif_task(self):
         # internally dataframe is loaded and targets are categorical
         # which LinearRegression() cannot handle
         with pytest.raises(
-            AttributeError, match="'LinearRegression' object has no attribute 'classes_'"
+            AttributeError,
+            match="'LinearRegression' object has no attribute 'classes_'",
         ):
             openml.runs.run_model_on_task(
                 model=clf,
                 task=task,
             )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_check_erronous_sklearn_flow_fails(self):
@@ -479,7 +501,9 @@ def determine_grid_size(param_grid):
                     grid_iterations += determine_grid_size(sub_grid)
                 return grid_iterations
             else:
-                raise TypeError("Param Grid should be of type list (GridSearch only) or dict")
+                raise TypeError(
+                    "Param Grid should be of type list (GridSearch only) or dict"
+                )
 
         run = self._perform_run(
             task_id,
@@ -627,6 +651,10 @@ def _run_and_upload_regression(
             sentinel=sentinel,
         )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_and_upload_logistic_regression(self):
@@ -634,8 +662,14 @@ def test_run_and_upload_logistic_regression(self):
         task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
         n_missing_vals = self.TEST_SERVER_TASK_SIMPLE["n_missing_vals"]
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
-        self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501")
+        self._run_and_upload_classification(
+            lr, task_id, n_missing_vals, n_test_obs, "62501"
+        )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_and_upload_linear_regression(self):
@@ -656,7 +690,9 @@ def test_run_and_upload_linear_regression(self):
                 if e.code == 614:  # Task already exists
                     # the exception message contains the task_id that was matched in the format
                     # 'Task already exists. - matched id(s): [xxxx]'
-                    task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
+                    task_id = ast.literal_eval(
+                        e.message.split("matched id(s):")[-1].strip()
+                    )[0]
                 else:
                     raise Exception(repr(e))
             # mark to remove the uploaded task
@@ -665,8 +701,14 @@ def test_run_and_upload_linear_regression(self):
 
         n_missing_vals = self.TEST_SERVER_TASK_REGRESSION["n_missing_vals"]
         n_test_obs = self.TEST_SERVER_TASK_REGRESSION["n_test_obs"]
-        self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501")
+        self._run_and_upload_regression(
+            lr, task_id, n_missing_vals, n_test_obs, "62501"
+        )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_and_upload_pipeline_dummy_pipeline(self):
@@ -679,8 +721,14 @@ def test_run_and_upload_pipeline_dummy_pipeline(self):
         task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
         n_missing_vals = self.TEST_SERVER_TASK_SIMPLE["n_missing_vals"]
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
-        self._run_and_upload_classification(pipeline1, task_id, n_missing_vals, n_test_obs, "62501")
+        self._run_and_upload_classification(
+            pipeline1, task_id, n_missing_vals, n_test_obs, "62501"
+        )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
@@ -706,7 +754,9 @@ def get_ct_cf(nominal_indices, numeric_indices):
                         "nominal",
                         make_pipeline(
                             CustomImputer(strategy="most_frequent"),
-                            sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
+                            sklearn.preprocessing.OneHotEncoder(
+                                handle_unknown="ignore"
+                            ),
                         ),
                         nominal_indices,
                     ),
@@ -782,7 +832,9 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
         task_id = self.TEST_SERVER_TASK_MISSING_VALS["task_id"]
         n_missing_vals = self.TEST_SERVER_TASK_MISSING_VALS["n_missing_vals"]
         n_test_obs = self.TEST_SERVER_TASK_MISSING_VALS["n_test_obs"]
-        self._run_and_upload_classification(pipeline2, task_id, n_missing_vals, n_test_obs, "62501")
+        self._run_and_upload_classification(
+            pipeline2, task_id, n_missing_vals, n_test_obs, "62501"
+        )
         # The warning raised is:
         # "The total space of parameters 8 is smaller than n_iter=10.
         # Running 8 iterations. For exhaustive searches, use GridSearchCV."
@@ -798,15 +850,24 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
                 call_count += 1
         assert call_count == 3
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_and_upload_gridsearch(self):
         estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
+            "base_estimator"
+            if Version(sklearn.__version__) < Version("1.4")
+            else "estimator"
         )
         gridsearch = GridSearchCV(
             BaggingClassifier(**{estimator_name: SVC()}),
-            {f"{estimator_name}__C": [0.01, 0.1, 10], f"{estimator_name}__gamma": [0.01, 0.1, 10]},
+            {
+                f"{estimator_name}__C": [0.01, 0.1, 10],
+                f"{estimator_name}__gamma": [0.01, 0.1, 10],
+            },
             cv=3,
         )
         task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
@@ -821,6 +882,10 @@ def test_run_and_upload_gridsearch(self):
         )
         assert len(run.trace.trace_iterations) == 9
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_and_upload_randomsearch(self):
@@ -854,6 +919,10 @@ def test_run_and_upload_randomsearch(self):
         trace = openml.runs.get_run_trace(run.run_id)
         assert len(trace.trace_iterations) == 5
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_and_upload_maskedarrays(self):
@@ -882,6 +951,10 @@ def test_run_and_upload_maskedarrays(self):
 
     ##########################################################################
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_learning_curve_task_1(self):
@@ -905,8 +978,14 @@ def test_learning_curve_task_1(self):
             pipeline1,
             flow_expected_rsv="62501",
         )
-        self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
+        self._check_sample_evaluations(
+            run.sample_evaluations, num_repeats, num_folds, num_samples
+        )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_learning_curve_task_2(self):
@@ -942,8 +1021,14 @@ def test_learning_curve_task_2(self):
             pipeline2,
             flow_expected_rsv="62501",
         )
-        self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
+        self._check_sample_evaluations(
+            run.sample_evaluations, num_repeats, num_folds, num_samples
+        )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.21"),
@@ -1023,6 +1108,10 @@ def _test_local_evaluations(self, run):
                 assert alt_scores[idx] >= 0
                 assert alt_scores[idx] <= 1
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_local_run_swapped_parameter_order_model(self):
@@ -1039,6 +1128,10 @@ def test_local_run_swapped_parameter_order_model(self):
 
         self._test_local_evaluations(run)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.skip("https://github.com/openml/openml-python/issues/1586")
     @unittest.skipIf(
@@ -1108,6 +1201,10 @@ def test_online_run_metric_score(self):
 
         self._test_local_evaluations(run)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
@@ -1141,7 +1238,9 @@ def test_initialize_model_from_run(self):
                 if e.code == 614:  # Task already exists
                     # the exception message contains the task_id that was matched in the format
                     # 'Task already exists. - matched id(s): [xxxx]'
-                    task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
+                    task_id = ast.literal_eval(
+                        e.message.split("matched id(s):")[-1].strip()
+                    )[0]
                 else:
                     raise Exception(repr(e))
             # mark to remove the uploaded task
@@ -1170,6 +1269,10 @@ def test_initialize_model_from_run(self):
         assert flowS.components["Imputer"].parameters["strategy"] == '"most_frequent"'
         assert flowS.components["VarianceThreshold"].parameters["threshold"] == "0.05"
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
@@ -1230,6 +1333,10 @@ def test__run_exists(self):
             run_ids = run_exists(task.task_id, setup_exists)
             assert run_ids, (run_ids, clf)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_with_illegal_flow_id(self):
@@ -1243,13 +1350,19 @@ def test_run_with_illegal_flow_id(self):
         expected_message_regex = (
             r"Flow does not exist on the server, but 'flow.flow_id' is not None."
         )
-        with pytest.raises(openml.exceptions.PyOpenMLError, match=expected_message_regex):
+        with pytest.raises(
+            openml.exceptions.PyOpenMLError, match=expected_message_regex
+        ):
             openml.runs.run_flow_on_task(
                 task=task,
                 flow=flow,
                 avoid_duplicate_runs=True,
             )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_with_illegal_flow_id_after_load(self):
@@ -1277,11 +1390,19 @@ def test_run_with_illegal_flow_id_after_load(self):
         expected_message_regex = (
             r"Flow does not exist on the server, but 'flow.flow_id' is not None."
         )
-        with pytest.raises(openml.exceptions.PyOpenMLError, match=expected_message_regex):
+        with pytest.raises(
+            openml.exceptions.PyOpenMLError, match=expected_message_regex
+        ):
             loaded_run.publish()
             TestBase._mark_entity_for_removal("run", loaded_run.run_id)
-            TestBase.logger.info(f"collected from test_run_functions: {loaded_run.run_id}")
+            TestBase.logger.info(
+                f"collected from test_run_functions: {loaded_run.run_id}"
+            )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_with_illegal_flow_id_1(self):
@@ -1293,21 +1414,31 @@ def test_run_with_illegal_flow_id_1(self):
         try:
             flow_orig.publish()  # ensures flow exist on server
             TestBase._mark_entity_for_removal("flow", flow_orig.flow_id, flow_orig.name)
-            TestBase.logger.info(f"collected from test_run_functions: {flow_orig.flow_id}")
+            TestBase.logger.info(
+                f"collected from test_run_functions: {flow_orig.flow_id}"
+            )
         except openml.exceptions.OpenMLServerException:
             # flow already exists
             pass
         flow_new = self.extension.model_to_flow(clf)
 
         flow_new.flow_id = -1
-        expected_message_regex = "Local flow_id does not match server flow_id: '-1' vs '[0-9]+'"
-        with pytest.raises(openml.exceptions.PyOpenMLError, match=expected_message_regex):
+        expected_message_regex = (
+            "Local flow_id does not match server flow_id: '-1' vs '[0-9]+'"
+        )
+        with pytest.raises(
+            openml.exceptions.PyOpenMLError, match=expected_message_regex
+        ):
             openml.runs.run_flow_on_task(
                 task=task,
                 flow=flow_new,
                 avoid_duplicate_runs=True,
             )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_with_illegal_flow_id_1_after_load(self):
@@ -1319,7 +1450,9 @@ def test_run_with_illegal_flow_id_1_after_load(self):
         try:
             flow_orig.publish()  # ensures flow exist on server
             TestBase._mark_entity_for_removal("flow", flow_orig.flow_id, flow_orig.name)
-            TestBase.logger.info(f"collected from test_run_functions: {flow_orig.flow_id}")
+            TestBase.logger.info(
+                f"collected from test_run_functions: {flow_orig.flow_id}"
+            )
         except openml.exceptions.OpenMLServerException:
             # flow already exists
             pass
@@ -1340,13 +1473,19 @@ def test_run_with_illegal_flow_id_1_after_load(self):
         run.to_filesystem(cache_path)
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
-        expected_message_regex = "Local flow_id does not match server flow_id: '-1' vs '[0-9]+'"
+        expected_message_regex = (
+            "Local flow_id does not match server flow_id: '-1' vs '[0-9]+'"
+        )
         self.assertRaisesRegex(
             openml.exceptions.PyOpenMLError,
             expected_message_regex,
             loaded_run.publish,
         )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
@@ -1577,6 +1716,10 @@ def test_get_runs_list_by_tag(self):
         runs = openml.runs.list_runs(tag="curves", size=2)
         assert len(runs) >= 1
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
@@ -1598,7 +1741,10 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
         cont_imp = make_pipeline(CustomImputer(), StandardScaler())
         ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
         model = Pipeline(
-            steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())],
+            steps=[
+                ("preprocess", ct),
+                ("estimator", sklearn.tree.DecisionTreeClassifier()),
+            ],
         )  # build a sklearn classifier
 
         data_content, _, _, _ = _run_task_get_arffcontent(
@@ -1614,6 +1760,10 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
             # repeat, fold, row_id, 6 confidences, prediction and correct label
             assert len(row) == 12
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
@@ -1642,7 +1792,10 @@ def test_run_on_dataset_with_missing_labels_array(self):
         cont_imp = make_pipeline(CustomImputer(), StandardScaler())
         ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
         model = Pipeline(
-            steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())],
+            steps=[
+                ("preprocess", ct),
+                ("estimator", sklearn.tree.DecisionTreeClassifier()),
+            ],
         )  # build a sklearn classifier
 
         data_content, _, _, _ = _run_task_get_arffcontent(
@@ -1668,6 +1821,10 @@ def test_get_uncached_run(self):
         with pytest.raises(openml.exceptions.OpenMLCacheException):
             openml.runs.functions._get_cached_run(10)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_run_flow_on_task_downloaded_flow(self):
@@ -1696,7 +1853,8 @@ def test_format_prediction_non_supervised(self):
         clustering = openml.tasks.get_task(126033, download_data=False)
         ignored_input = [0] * 5
         with pytest.raises(
-            NotImplementedError, match=r"Formatting for <class '[\w.]+'> is not supported."
+            NotImplementedError,
+            match=r"Formatting for <class '[\w.]+'> is not supported.",
         ):
             format_prediction(clustering, *ignored_input)
 
@@ -1707,7 +1865,9 @@ def test_format_prediction_classification_no_probabilities(self):
             download_data=False,
         )
         ignored_input = [0] * 5
-        with pytest.raises(ValueError, match="`proba` is required for classification task"):
+        with pytest.raises(
+            ValueError, match="`proba` is required for classification task"
+        ):
             format_prediction(classification, *ignored_input, proba=None)
 
     @pytest.mark.test_server()
@@ -1718,8 +1878,12 @@ def test_format_prediction_classification_incomplete_probabilities(self):
         )
         ignored_input = [0] * 5
         incomplete_probabilities = {c: 0.2 for c in classification.class_labels[1:]}
-        with pytest.raises(ValueError, match="Each class should have a predicted probability"):
-            format_prediction(classification, *ignored_input, proba=incomplete_probabilities)
+        with pytest.raises(
+            ValueError, match="Each class should have a predicted probability"
+        ):
+            format_prediction(
+                classification, *ignored_input, proba=incomplete_probabilities
+            )
 
     @pytest.mark.test_server()
     def test_format_prediction_task_without_classlabels_set(self):
@@ -1729,16 +1893,24 @@ def test_format_prediction_task_without_classlabels_set(self):
         )
         classification.class_labels = None
         ignored_input = [0] * 5
-        with pytest.raises(ValueError, match="The classification task must have class labels set"):
+        with pytest.raises(
+            ValueError, match="The classification task must have class labels set"
+        ):
             format_prediction(classification, *ignored_input, proba={})
 
     @pytest.mark.test_server()
     def test_format_prediction_task_learning_curve_sample_not_set(self):
-        learning_curve = openml.tasks.get_task(801, download_data=False)  # diabetes;crossvalidation
+        learning_curve = openml.tasks.get_task(
+            801, download_data=False
+        )  # diabetes;crossvalidation
         probabilities = {c: 0.2 for c in learning_curve.class_labels}
         ignored_input = [0] * 5
-        with pytest.raises(ValueError, match="`sample` can not be none for LearningCurveTask"):
-            format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities)
+        with pytest.raises(
+            ValueError, match="`sample` can not be none for LearningCurveTask"
+        ):
+            format_prediction(
+                learning_curve, *ignored_input, sample=None, proba=probabilities
+            )
 
     @pytest.mark.test_server()
     def test_format_prediction_task_regression(self):
@@ -1756,7 +1928,9 @@ def test_format_prediction_task_regression(self):
                 if e.code == 614:  # Task already exists
                     # the exception message contains the task_id that was matched in the format
                     # 'Task already exists. - matched id(s): [xxxx]'
-                    task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
+                    task_id = ast.literal_eval(
+                        e.message.split("matched id(s):")[-1].strip()
+                    )[0]
                 else:
                     raise Exception(repr(e))
             # mark to remove the uploaded task
@@ -1786,12 +1960,16 @@ def test_delete_run(self):
         task = openml.tasks.get_task(32)  # diabetes; crossvalidation
 
         run = openml.runs.run_model_on_task(
-            model=clf, task=task, seed=rs,
+            model=clf,
+            task=task,
+            seed=rs,
         )
         run.publish()
 
         with pytest.raises(openml.exceptions.OpenMLRunsExistError):
-            openml.runs.run_model_on_task(model=clf, task=task, seed=rs, avoid_duplicate_runs=True)
+            openml.runs.run_model_on_task(
+                model=clf, task=task, seed=rs, avoid_duplicate_runs=True
+            )
 
         TestBase._mark_entity_for_removal("run", run.run_id)
         TestBase.logger.info(f"collected from test_run_functions: {run.run_id}")
@@ -1799,7 +1977,9 @@ def test_delete_run(self):
         _run_id = run.run_id
         assert delete_run(_run_id)
 
-    @pytest.mark.skip(reason="run id is in problematic state on test server due to PR#1454")
+    @pytest.mark.skip(
+        reason="run id is in problematic state on test server due to PR#1454"
+    )
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1813,7 +1993,7 @@ def test_initialize_model_from_run_nonstrict(self):
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
+def test_delete_run_not_owned(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml"
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -1826,13 +2006,13 @@ def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
     ):
         openml.runs.delete_run(40_000)
 
-    run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/40000"
+    run_url = test_server_v1 + "run/40000"
     assert run_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
+def test_delete_run_success(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml"
     mock_delete.return_value = create_request_response(
         status_code=200,
@@ -1842,13 +2022,13 @@ def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
     success = openml.runs.delete_run(10591880)
     assert success
 
-    run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/10591880"
+    run_url = test_server_v1 + "run/10591880"
     assert run_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
 @mock.patch.object(requests.Session, "delete")
-def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
+def test_delete_unknown_run(mock_delete, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml"
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -1861,20 +2041,24 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
     ):
         openml.runs.delete_run(9_999_999)
 
-    run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/9999999"
+    run_url = test_server_v1 + "run/9999999"
     assert run_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    assert test_apikey_v1 == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
 
 
+@pytest.mark.skipif(
+    os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+    reason="Pending resolution of #1657",
+)
 @pytest.mark.sklearn()
 @unittest.skipIf(
     Version(sklearn.__version__) < Version("0.21"),
     reason="couldn't perform local tests successfully w/o bloating RAM",
-    )
+)
 @unittest.skipIf(
     Version(sklearn.__version__) >= Version("1.8"),
     reason="predictions differ significantly",
-    )
+)
 @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
 @pytest.mark.test_server()
 def test__run_task_get_arffcontent_2(parallel_mock):
@@ -1903,8 +2087,11 @@ def test__run_task_get_arffcontent_2(parallel_mock):
         ]
     )
     n_jobs = 2
-    backend = "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
+    backend = (
+        "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
+    )
     from openml_sklearn import SklearnExtension
+
     extension = SklearnExtension()
     with parallel_backend(backend, n_jobs=n_jobs):
         res = openml.runs.functions._run_task_get_arffcontent(
@@ -1948,11 +2135,15 @@ def test__run_task_get_arffcontent_2(parallel_mock):
     )
 
 
+@pytest.mark.skipif(
+    os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+    reason="Pending resolution of #1657",
+)
 @pytest.mark.sklearn()
 @unittest.skipIf(
     Version(sklearn.__version__) < Version("0.21"),
     reason="couldn't perform local tests successfully w/o bloating RAM",
-    )
+)
 @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
 @pytest.mark.parametrize(
     ("n_jobs", "backend", "call_count"),
@@ -1961,18 +2152,28 @@ def test__run_task_get_arffcontent_2(parallel_mock):
         # spawns multiple processes if n_jobs != 1, which means the mock is not applied.
         (2, None, 0),
         (-1, None, 0),
-        (1, None, 10),  # with n_jobs=1 the mock *is* applied, since there is no new subprocess
+        (
+            1,
+            None,
+            10,
+        ),  # with n_jobs=1 the mock *is* applied, since there is no new subprocess
         (1, "sequential", 10),
         (1, "threading", 10),
-        (-1, "threading", 10),  # the threading backend does preserve mocks even with parallelizing
-    ]
+        (
+            -1,
+            "threading",
+            10,
+        ),  # the threading backend does preserve mocks even with parallelizing
+    ],
 )
 @pytest.mark.test_server()
 def test_joblib_backends(parallel_mock, n_jobs, backend, call_count):
     """Tests evaluation of a run using various joblib backends and n_jobs."""
     if backend is None:
         backend = (
-            "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
+            "loky"
+            if Version(joblib.__version__) > Version("0.11")
+            else "multiprocessing"
         )
 
     task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 30943ea70..12bf3fc88 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -34,6 +34,10 @@ def setUp(self):
         self.extension = SklearnExtension()
         super().setUp()
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_nonexisting_setup_exists(self):
@@ -45,7 +49,9 @@ def test_nonexisting_setup_exists(self):
         flow.name = f"TEST{sentinel}{flow.name}"
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}"
+        )
 
         # although the flow exists (created as of previous statement),
         # we can be sure there are no setups (yet) as it was just created
@@ -58,7 +64,9 @@ def _existing_setup_exists(self, classif):
         flow.name = f"TEST{get_sentinel()}{flow.name}"
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
+        TestBase.logger.info(
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}"
+        )
 
         # although the flow exists, we can be sure there are no
         # setups (yet) as it hasn't been ran
@@ -82,6 +90,10 @@ def _existing_setup_exists(self, classif):
         setup_id = openml.setups.setup_exists(flow)
         assert setup_id == run.setup_id
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_existing_setup_exists_1(self):
@@ -98,12 +110,20 @@ def side_effect(self):
             nb = sklearn.naive_bayes.GaussianNB()
             self._existing_setup_exists(nb)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_exisiting_setup_exists_2(self):
         # Check a flow with one hyperparameter
         self._existing_setup_exists(sklearn.naive_bayes.GaussianNB())
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_existing_setup_exists_3(self):
@@ -161,10 +181,14 @@ def test_list_setups_output_format(self):
         flow_id = 6794
         setups = openml.setups.list_setups(flow=flow_id, size=10)
         assert isinstance(setups, dict)
-        assert isinstance(setups[next(iter(setups.keys()))], openml.setups.setup.OpenMLSetup)
+        assert isinstance(
+            setups[next(iter(setups.keys()))], openml.setups.setup.OpenMLSetup
+        )
         assert len(setups) == 10
 
-        setups = openml.setups.list_setups(flow=flow_id, size=10, output_format="dataframe")
+        setups = openml.setups.list_setups(
+            flow=flow_id, size=10, output_format="dataframe"
+        )
         assert isinstance(setups, pd.DataFrame)
         assert len(setups) == 10
 
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index df3c0a3b6..3f2587fd5 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -3,16 +3,17 @@
 
 import os
 import unittest
-from typing import cast
 from unittest import mock
 
-import pandas as pd
 import pytest
 import requests
 
 import openml
 from openml import OpenMLSplit, OpenMLTask
-from openml.exceptions import OpenMLCacheException, OpenMLNotAuthorizedError, OpenMLServerException
+from openml.exceptions import (
+    OpenMLNotAuthorizedError,
+    OpenMLServerException,
+)
 from openml.tasks import TaskType
 from openml.testing import TestBase, create_request_response
 
@@ -26,36 +27,6 @@ def setUp(self):
     def tearDown(self):
         super().tearDown()
 
-    @pytest.mark.test_server()
-    def test__get_cached_tasks(self):
-        openml.config.set_root_cache_directory(self.static_cache_dir)
-        tasks = openml.tasks.functions._get_cached_tasks()
-        assert isinstance(tasks, dict)
-        assert len(tasks) == 3
-        assert isinstance(next(iter(tasks.values())), OpenMLTask)
-
-    @pytest.mark.test_server()
-    def test__get_cached_task(self):
-        openml.config.set_root_cache_directory(self.static_cache_dir)
-        task = openml.tasks.functions._get_cached_task(1)
-        assert isinstance(task, OpenMLTask)
-
-    def test__get_cached_task_not_cached(self):
-        openml.config.set_root_cache_directory(self.static_cache_dir)
-        self.assertRaisesRegex(
-            OpenMLCacheException,
-            "Task file for tid 2 not cached",
-            openml.tasks.functions._get_cached_task,
-            2,
-        )
-
-    @pytest.mark.test_server()
-    def test__get_estimation_procedure_list(self):
-        estimation_procedures = openml.tasks.functions._get_estimation_procedure_list()
-        assert isinstance(estimation_procedures, list)
-        assert isinstance(estimation_procedures[0], dict)
-        assert estimation_procedures[0]["task_type_id"] == TaskType.SUPERVISED_CLASSIFICATION
-
     @pytest.mark.production_server()
     @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_list_clustering_task(self):
@@ -138,11 +109,6 @@ def test_list_tasks_per_type_paginate(self):
                     assert j == task["ttid"]
                     self._check_task(task)
 
-    @pytest.mark.test_server()
-    def test__get_task(self):
-        openml.config.set_root_cache_directory(self.static_cache_dir)
-        openml.tasks.get_task(1882)
-
     @unittest.skip(
         "Please await outcome of discussion: https://github.com/openml/OpenML/issues/776",
     )
@@ -153,20 +119,10 @@ def test__get_task_live(self):
         # https://github.com/openml/openml-python/issues/378
         openml.tasks.get_task(34536)
 
-    @pytest.mark.test_server()
-    def test_get_task(self):
-        task = openml.tasks.get_task(1, download_data=True)  # anneal; crossvalidation
-        assert isinstance(task, OpenMLTask)
-        assert os.path.exists(
-            os.path.join(openml.config.get_cache_directory(), "tasks", "1", "task.xml")
-        )
-        assert not os.path.exists(
-            os.path.join(openml.config.get_cache_directory(), "tasks", "1", "datasplits.arff")
-        )
-        assert os.path.exists(
-            os.path.join(openml.config.get_cache_directory(), "datasets", "1", "dataset_1.pq")
-        )
-
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_get_task_lazy(self):
         task = openml.tasks.get_task(2, download_data=False)  # anneal; crossvalidation
@@ -177,16 +133,22 @@ def test_get_task_lazy(self):
         assert task.class_labels == ["1", "2", "3", "4", "5", "U"]
 
         assert not os.path.exists(
-            os.path.join(openml.config.get_cache_directory(), "tasks", "2", "datasplits.arff")
+            os.path.join(
+                openml.config.get_cache_directory(), "tasks", "2", "datasplits.arff"
+            )
         )
         # Since the download_data=False is propagated to get_dataset
         assert not os.path.exists(
-            os.path.join(openml.config.get_cache_directory(), "datasets", "2", "dataset.arff")
+            os.path.join(
+                openml.config.get_cache_directory(), "datasets", "2", "dataset.arff"
+            )
         )
 
         task.download_split()
         assert os.path.exists(
-            os.path.join(openml.config.get_cache_directory(), "tasks", "2", "datasplits.arff")
+            os.path.join(
+                openml.config.get_cache_directory(), "tasks", "2", "datasplits.arff"
+            )
         )
 
     @mock.patch("openml.tasks.functions.get_dataset")
@@ -208,12 +170,6 @@ def assert_and_raise(*args, **kwargs):
         # Now the file should no longer exist
         assert not os.path.exists(os.path.join(os.getcwd(), "tasks", "1", "tasks.xml"))
 
-    @pytest.mark.test_server()
-    def test_get_task_with_cache(self):
-        openml.config.set_root_cache_directory(self.static_cache_dir)
-        task = openml.tasks.get_task(1)
-        assert isinstance(task, OpenMLTask)
-
     @pytest.mark.production_server()
     def test_get_task_different_types(self):
         self.use_production_server()
@@ -224,13 +180,19 @@ def test_get_task_different_types(self):
         # Issue 538, get_task failing with clustering task.
         openml.tasks.functions.get_task(126033)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_download_split(self):
         task = openml.tasks.get_task(1)  # anneal; crossvalidation
         split = task.download_split()
         assert type(split) == OpenMLSplit
         assert os.path.exists(
-            os.path.join(openml.config.get_cache_directory(), "tasks", "1", "datasplits.arff")
+            os.path.join(
+                openml.config.get_cache_directory(), "tasks", "1", "datasplits.arff"
+            )
         )
 
     def test_deletion_of_cache_dir(self):
@@ -244,48 +206,47 @@ def test_deletion_of_cache_dir(self):
         assert not os.path.exists(tid_cache_dir)
 
 
-@mock.patch.object(requests.Session, "delete")
-def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key):
+@mock.patch.object(requests.Session, "request")
+def test_delete_task_not_owned(mock_request, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_owned.xml"
-    mock_delete.return_value = create_request_response(
+    mock_request.return_value = create_request_response(
         status_code=412,
         content_filepath=content_file,
     )
-
     with pytest.raises(
         OpenMLNotAuthorizedError,
         match="The task can not be deleted because it was not uploaded by you.",
     ):
         openml.tasks.delete_task(1)
 
-    task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/1"
-    assert task_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    task_url = test_server_v1 + "task/1"
+    assert task_url == mock_request.call_args.kwargs.get("url")
+    assert test_apikey_v1 == mock_request.call_args.kwargs.get("params", {}).get("api_key")
 
 
-@mock.patch.object(requests.Session, "delete")
-def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key):
+@mock.patch.object(requests.Session, "request")
+def test_delete_task_with_run(mock_request, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_has_runs.xml"
-    mock_delete.return_value = create_request_response(
+    mock_request.return_value = create_request_response(
         status_code=412,
         content_filepath=content_file,
     )
 
     with pytest.raises(
-        OpenMLNotAuthorizedError,
-        match="The task can not be deleted because it still has associated entities:",
+        OpenMLServerException,
+        match="Task does not exist",
     ):
         openml.tasks.delete_task(3496)
 
-    task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/3496"
-    assert task_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    task_url = test_server_v1 + "task/3496"
+    assert task_url == mock_request.call_args.kwargs.get("url")
+    assert test_apikey_v1 == mock_request.call_args.kwargs.get("params", {}).get("api_key")
 
 
-@mock.patch.object(requests.Session, "delete")
-def test_delete_success(mock_delete, test_files_directory, test_api_key):
+@mock.patch.object(requests.Session, "request")
+def test_delete_success(mock_request, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_successful.xml"
-    mock_delete.return_value = create_request_response(
+    mock_request.return_value = create_request_response(
         status_code=200,
         content_filepath=content_file,
     )
@@ -293,15 +254,15 @@ def test_delete_success(mock_delete, test_files_directory, test_api_key):
     success = openml.tasks.delete_task(361323)
     assert success
 
-    task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/361323"
-    assert task_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    task_url = test_server_v1 + "task/361323"
+    assert task_url == mock_request.call_args.kwargs.get("url")
+    assert test_apikey_v1 == mock_request.call_args.kwargs.get("params", {}).get("api_key")
 
 
-@mock.patch.object(requests.Session, "delete")
-def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key):
+@mock.patch.object(requests.Session, "request")
+def test_delete_unknown_task(mock_request, test_files_directory, test_server_v1, test_apikey_v1):
     content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_exist.xml"
-    mock_delete.return_value = create_request_response(
+    mock_request.return_value = create_request_response(
         status_code=412,
         content_filepath=content_file,
     )
@@ -312,6 +273,6 @@ def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key):
     ):
         openml.tasks.delete_task(9_999_999)
 
-    task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/9999999"
-    assert task_url == mock_delete.call_args.args[0]
-    assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+    task_url = test_server_v1 + "task/9999999"
+    assert task_url == mock_request.call_args.kwargs.get("url")
+    assert test_apikey_v1 == mock_request.call_args.kwargs.get("params", {}).get("api_key")
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index 75f24ebf0..f711e2cc1 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -44,13 +44,13 @@ def min_number_evaluations_on_test_server() -> int:
 
 
 def _mocked_perform_api_call(call, request_method):
-    url = openml.config.server + "/" + call
+    url = openml.config.server  + call
     return openml._api_calls._download_text_file(url)
 
 
 @pytest.mark.test_server()
 def test_list_all():
-    openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
+    openml.utils._list_all(listing_call=openml.tasks.functions.list_tasks)
 
 
 @pytest.mark.test_server()
@@ -65,7 +65,7 @@ def test_list_all_with_multiple_batches(min_number_tasks_on_test_server):
     # batches and at the same time do as few batches (roundtrips) as possible.
     batch_size = min_number_tasks_on_test_server - 1
     batches = openml.utils._list_all(
-        listing_call=openml.tasks.functions._list_tasks,
+        listing_call=openml.tasks.functions.list_tasks,
         batch_size=batch_size,
     )
     assert len(batches) >= 2