From 83a2f2c62dc993603487e8b19b6e329b7475510e Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 20 Mar 2026 03:27:24 +0000 Subject: [PATCH 1/2] fix: sanitize endpoint path params --- src/cas_parser/_utils/__init__.py | 1 + src/cas_parser/_utils/_path.py | 127 ++++++++++++++++++++++ src/cas_parser/resources/cdsl/fetch.py | 6 +- src/cas_parser/resources/inbound_email.py | 10 +- tests/test_utils/test_path.py | 89 +++++++++++++++ 5 files changed, 225 insertions(+), 8 deletions(-) create mode 100644 src/cas_parser/_utils/_path.py create mode 100644 tests/test_utils/test_path.py diff --git a/src/cas_parser/_utils/__init__.py b/src/cas_parser/_utils/__init__.py index dc64e29..10cb66d 100644 --- a/src/cas_parser/_utils/__init__.py +++ b/src/cas_parser/_utils/__init__.py @@ -1,3 +1,4 @@ +from ._path import path_template as path_template from ._sync import asyncify as asyncify from ._proxy import LazyProxy as LazyProxy from ._utils import ( diff --git a/src/cas_parser/_utils/_path.py b/src/cas_parser/_utils/_path.py new file mode 100644 index 0000000..4d6e1e4 --- /dev/null +++ b/src/cas_parser/_utils/_path.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import re +from typing import ( + Any, + Mapping, + Callable, +) +from urllib.parse import quote + +# Matches '.' or '..' where each dot is either literal or percent-encoded (%2e / %2E). +_DOT_SEGMENT_RE = re.compile(r"^(?:\.|%2[eE]){1,2}$") + +_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}") + + +def _quote_path_segment_part(value: str) -> str: + """Percent-encode `value` for use in a URI path segment. + + Considers characters not in `pchar` set from RFC 3986 §3.3 to be unsafe. + https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 + """ + # quote() already treats unreserved characters (letters, digits, and -._~) + # as safe, so we only need to add sub-delims, ':', and '@'. + # Notably, unlike the default `safe` for quote(), / is unsafe and must be quoted. + return quote(value, safe="!$&'()*+,;=:@") + + +def _quote_query_part(value: str) -> str: + """Percent-encode `value` for use in a URI query string. + + Considers &, = and characters not in `query` set from RFC 3986 §3.4 to be unsafe. + https://datatracker.ietf.org/doc/html/rfc3986#section-3.4 + """ + return quote(value, safe="!$'()*+,;:@/?") + + +def _quote_fragment_part(value: str) -> str: + """Percent-encode `value` for use in a URI fragment. + + Considers characters not in `fragment` set from RFC 3986 §3.5 to be unsafe. + https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 + """ + return quote(value, safe="!$&'()*+,;=:@/?") + + +def _interpolate( + template: str, + values: Mapping[str, Any], + quoter: Callable[[str], str], +) -> str: + """Replace {name} placeholders in `template`, quoting each value with `quoter`. + + Placeholder names are looked up in `values`. + + Raises: + KeyError: If a placeholder is not found in `values`. + """ + # re.split with a capturing group returns alternating + # [text, name, text, name, ..., text] elements. + parts = _PLACEHOLDER_RE.split(template) + + for i in range(1, len(parts), 2): + name = parts[i] + if name not in values: + raise KeyError(f"a value for placeholder {{{name}}} was not provided") + val = values[name] + if val is None: + parts[i] = "null" + elif isinstance(val, bool): + parts[i] = "true" if val else "false" + else: + parts[i] = quoter(str(values[name])) + + return "".join(parts) + + +def path_template(template: str, /, **kwargs: Any) -> str: + """Interpolate {name} placeholders in `template` from keyword arguments. + + Args: + template: The template string containing {name} placeholders. + **kwargs: Keyword arguments to interpolate into the template. + + Returns: + The template with placeholders interpolated and percent-encoded. + + Safe characters for percent-encoding are dependent on the URI component. + Placeholders in path and fragment portions are percent-encoded where the `segment` + and `fragment` sets from RFC 3986 respectively are considered safe. + Placeholders in the query portion are percent-encoded where the `query` set from + RFC 3986 §3.3 is considered safe except for = and & characters. + + Raises: + KeyError: If a placeholder is not found in `kwargs`. + ValueError: If resulting path contains /./ or /../ segments (including percent-encoded dot-segments). + """ + # Split the template into path, query, and fragment portions. + fragment_template: str | None = None + query_template: str | None = None + + rest = template + if "#" in rest: + rest, fragment_template = rest.split("#", 1) + if "?" in rest: + rest, query_template = rest.split("?", 1) + path_template = rest + + # Interpolate each portion with the appropriate quoting rules. + path_result = _interpolate(path_template, kwargs, _quote_path_segment_part) + + # Reject dot-segments (. and ..) in the final assembled path. The check + # runs after interpolation so that adjacent placeholders or a mix of static + # text and placeholders that together form a dot-segment are caught. + # Also reject percent-encoded dot-segments to protect against incorrectly + # implemented normalization in servers/proxies. + for segment in path_result.split("/"): + if _DOT_SEGMENT_RE.match(segment): + raise ValueError(f"Constructed path {path_result!r} contains dot-segment {segment!r} which is not allowed") + + result = path_result + if query_template is not None: + result += "?" + _interpolate(query_template, kwargs, _quote_query_part) + if fragment_template is not None: + result += "#" + _interpolate(fragment_template, kwargs, _quote_fragment_part) + + return result diff --git a/src/cas_parser/resources/cdsl/fetch.py b/src/cas_parser/resources/cdsl/fetch.py index 191c3a7..6198ad2 100644 --- a/src/cas_parser/resources/cdsl/fetch.py +++ b/src/cas_parser/resources/cdsl/fetch.py @@ -5,7 +5,7 @@ import httpx from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given -from ..._utils import maybe_transform, async_maybe_transform +from ..._utils import path_template, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -138,7 +138,7 @@ def verify_otp( if not session_id: raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}") return self._post( - f"/v4/cdsl/fetch/{session_id}/verify", + path_template("/v4/cdsl/fetch/{session_id}/verify", session_id=session_id), body=maybe_transform( { "otp": otp, @@ -269,7 +269,7 @@ async def verify_otp( if not session_id: raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}") return await self._post( - f"/v4/cdsl/fetch/{session_id}/verify", + path_template("/v4/cdsl/fetch/{session_id}/verify", session_id=session_id), body=await async_maybe_transform( { "otp": otp, diff --git a/src/cas_parser/resources/inbound_email.py b/src/cas_parser/resources/inbound_email.py index 3e45f5d..adc1c15 100644 --- a/src/cas_parser/resources/inbound_email.py +++ b/src/cas_parser/resources/inbound_email.py @@ -9,7 +9,7 @@ from ..types import inbound_email_list_params, inbound_email_create_params from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given -from .._utils import maybe_transform, async_maybe_transform +from .._utils import path_template, maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( @@ -185,7 +185,7 @@ def retrieve( if not inbound_email_id: raise ValueError(f"Expected a non-empty value for `inbound_email_id` but received {inbound_email_id!r}") return self._get( - f"/v4/inbound-email/{inbound_email_id}", + path_template("/v4/inbound-email/{inbound_email_id}", inbound_email_id=inbound_email_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -274,7 +274,7 @@ def delete( if not inbound_email_id: raise ValueError(f"Expected a non-empty value for `inbound_email_id` but received {inbound_email_id!r}") return self._delete( - f"/v4/inbound-email/{inbound_email_id}", + path_template("/v4/inbound-email/{inbound_email_id}", inbound_email_id=inbound_email_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -440,7 +440,7 @@ async def retrieve( if not inbound_email_id: raise ValueError(f"Expected a non-empty value for `inbound_email_id` but received {inbound_email_id!r}") return await self._get( - f"/v4/inbound-email/{inbound_email_id}", + path_template("/v4/inbound-email/{inbound_email_id}", inbound_email_id=inbound_email_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -529,7 +529,7 @@ async def delete( if not inbound_email_id: raise ValueError(f"Expected a non-empty value for `inbound_email_id` but received {inbound_email_id!r}") return await self._delete( - f"/v4/inbound-email/{inbound_email_id}", + path_template("/v4/inbound-email/{inbound_email_id}", inbound_email_id=inbound_email_id), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/tests/test_utils/test_path.py b/tests/test_utils/test_path.py new file mode 100644 index 0000000..cf039c7 --- /dev/null +++ b/tests/test_utils/test_path.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +from cas_parser._utils._path import path_template + + +@pytest.mark.parametrize( + "template, kwargs, expected", + [ + ("/v1/{id}", dict(id="abc"), "/v1/abc"), + ("/v1/{a}/{b}", dict(a="x", b="y"), "/v1/x/y"), + ("/v1/{a}{b}/path/{c}?val={d}#{e}", dict(a="x", b="y", c="z", d="u", e="v"), "/v1/xy/path/z?val=u#v"), + ("/{w}/{w}", dict(w="echo"), "/echo/echo"), + ("/v1/static", {}, "/v1/static"), + ("", {}, ""), + ("/v1/?q={n}&count=10", dict(n=42), "/v1/?q=42&count=10"), + ("/v1/{v}", dict(v=None), "/v1/null"), + ("/v1/{v}", dict(v=True), "/v1/true"), + ("/v1/{v}", dict(v=False), "/v1/false"), + ("/v1/{v}", dict(v=".hidden"), "/v1/.hidden"), # dot prefix ok + ("/v1/{v}", dict(v="file.txt"), "/v1/file.txt"), # dot in middle ok + ("/v1/{v}", dict(v="..."), "/v1/..."), # triple dot ok + ("/v1/{a}{b}", dict(a=".", b="txt"), "/v1/.txt"), # dot var combining with adjacent to be ok + ("/items?q={v}#{f}", dict(v=".", f=".."), "/items?q=.#.."), # dots in query/fragment are fine + ( + "/v1/{a}?query={b}", + dict(a="../../other/endpoint", b="a&bad=true"), + "/v1/..%2F..%2Fother%2Fendpoint?query=a%26bad%3Dtrue", + ), + ("/v1/{val}", dict(val="a/b/c"), "/v1/a%2Fb%2Fc"), + ("/v1/{val}", dict(val="a/b/c?query=value"), "/v1/a%2Fb%2Fc%3Fquery=value"), + ("/v1/{val}", dict(val="a/b/c?query=value&bad=true"), "/v1/a%2Fb%2Fc%3Fquery=value&bad=true"), + ("/v1/{val}", dict(val="%20"), "/v1/%2520"), # escapes escape sequences in input + # Query: slash and ? are safe, # is not + ("/items?q={v}", dict(v="a/b"), "/items?q=a/b"), + ("/items?q={v}", dict(v="a?b"), "/items?q=a?b"), + ("/items?q={v}", dict(v="a#b"), "/items?q=a%23b"), + ("/items?q={v}", dict(v="a b"), "/items?q=a%20b"), + # Fragment: slash and ? are safe + ("/docs#{v}", dict(v="a/b"), "/docs#a/b"), + ("/docs#{v}", dict(v="a?b"), "/docs#a?b"), + # Path: slash, ? and # are all encoded + ("/v1/{v}", dict(v="a/b"), "/v1/a%2Fb"), + ("/v1/{v}", dict(v="a?b"), "/v1/a%3Fb"), + ("/v1/{v}", dict(v="a#b"), "/v1/a%23b"), + # same var encoded differently by component + ( + "/v1/{v}?q={v}#{v}", + dict(v="a/b?c#d"), + "/v1/a%2Fb%3Fc%23d?q=a/b?c%23d#a/b?c%23d", + ), + ("/v1/{val}", dict(val="x?admin=true"), "/v1/x%3Fadmin=true"), # query injection + ("/v1/{val}", dict(val="x#admin"), "/v1/x%23admin"), # fragment injection + ], +) +def test_interpolation(template: str, kwargs: dict[str, Any], expected: str) -> None: + assert path_template(template, **kwargs) == expected + + +def test_missing_kwarg_raises_key_error() -> None: + with pytest.raises(KeyError, match="org_id"): + path_template("/v1/{org_id}") + + +@pytest.mark.parametrize( + "template, kwargs", + [ + ("{a}/path", dict(a=".")), + ("{a}/path", dict(a="..")), + ("/v1/{a}", dict(a=".")), + ("/v1/{a}", dict(a="..")), + ("/v1/{a}/path", dict(a=".")), + ("/v1/{a}/path", dict(a="..")), + ("/v1/{a}{b}", dict(a=".", b=".")), # adjacent vars → ".." + ("/v1/{a}.", dict(a=".")), # var + static → ".." + ("/v1/{a}{b}", dict(a="", b=".")), # empty + dot → "." + ("/v1/%2e/{x}", dict(x="ok")), # encoded dot in static text + ("/v1/%2e./{x}", dict(x="ok")), # mixed encoded ".." in static + ("/v1/.%2E/{x}", dict(x="ok")), # mixed encoded ".." in static + ("/v1/{v}?q=1", dict(v="..")), + ("/v1/{v}#frag", dict(v="..")), + ], +) +def test_dot_segment_rejected(template: str, kwargs: dict[str, Any]) -> None: + with pytest.raises(ValueError, match="dot-segment"): + path_template(template, **kwargs) From a23503406ec58d941df891a5e70e71998e570e92 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 20 Mar 2026 03:27:39 +0000 Subject: [PATCH 2/2] release: 1.6.4 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 8 ++++++++ pyproject.toml | 2 +- src/cas_parser/_version.py | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 70a9c76..6e80882 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.6.3" + ".": "1.6.4" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b36545..6c5c83e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 1.6.4 (2026-03-20) + +Full Changelog: [v1.6.3...v1.6.4](https://github.com/CASParser/cas-parser-python/compare/v1.6.3...v1.6.4) + +### Bug Fixes + +* sanitize endpoint path params ([83a2f2c](https://github.com/CASParser/cas-parser-python/commit/83a2f2c62dc993603487e8b19b6e329b7475510e)) + ## 1.6.3 (2026-03-17) Full Changelog: [v1.6.2...v1.6.3](https://github.com/CASParser/cas-parser-python/compare/v1.6.2...v1.6.3) diff --git a/pyproject.toml b/pyproject.toml index dcc280e..25a4d97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "cas-parser-python" -version = "1.6.3" +version = "1.6.4" description = "The official Python library for the cas-parser API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/cas_parser/_version.py b/src/cas_parser/_version.py index 43dd9a6..56a45fd 100644 --- a/src/cas_parser/_version.py +++ b/src/cas_parser/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "cas_parser" -__version__ = "1.6.3" # x-release-please-version +__version__ = "1.6.4" # x-release-please-version