diff --git a/src/toon_format/__init__.py b/src/toon_format/__init__.py index f664ec0..2058c81 100644 --- a/src/toon_format/__init__.py +++ b/src/toon_format/__init__.py @@ -23,12 +23,20 @@ from .decoder import ToonDecodeError, decode from .encoder import encode from .types import DecodeOptions, Delimiter, DelimiterKey, EncodeOptions -from .utils import compare_formats, count_tokens, estimate_savings +from .utils import ( + compare_formats, + count_tokens, + encode_json, + estimate_savings, + loads, +) __version__ = "0.9.0-beta.1" __all__ = [ "encode", "decode", + "encode_json", + "loads", "ToonDecodeError", "Delimiter", "DelimiterKey", diff --git a/src/toon_format/utils.py b/src/toon_format/utils.py index 935a074..91f9a10 100644 --- a/src/toon_format/utils.py +++ b/src/toon_format/utils.py @@ -1,37 +1,30 @@ # Copyright (c) 2025 TOON Format Organization # SPDX-License-Identifier: MIT -"""Token analysis utilities for TOON format. +"""Utilities for TOON format. -This module provides utilities for counting tokens and comparing -token efficiency between JSON and TOON formats. Useful for: -- Estimating API costs (tokens are the primary cost driver) -- Optimizing prompt sizes for LLM context windows -- Benchmarking TOON's token efficiency +This module provides utilities for: +- Token analysis and efficiency comparison between JSON and TOON formats +- JSON integration and null value handling +- Estimating API costs and optimizing prompt sizes Functions: count_tokens: Count tokens in a text string estimate_savings: Compare JSON vs TOON token counts compare_formats: Generate formatted comparison table + loads: Parse JSON string into Python objects (alias for json.loads) + encode_json: Encode a JSON string directly into TOON format Requirements: tiktoken: Install with `uv add tiktoken` or `uv add toon_format[benchmark]` - -Example: - >>> import toon_format - >>> data = {"name": "Alice", "age": 30} - >>> result = toon_format.estimate_savings(data) - >>> print(f"TOON saves {result['savings_percent']:.1f}% tokens") """ import functools import json from typing import Any, Dict -# Import encode from parent package (defined in __init__.py before this module is imported) -# __init__.py defines encode() before importing utils, so this is safe -from . import encode +from .encoder import encode -__all__ = ["count_tokens", "estimate_savings", "compare_formats"] +__all__ = ["count_tokens", "estimate_savings", "compare_formats", "encode_json", "loads"] _TIKTOKEN_MISSING_MSG = ( @@ -40,6 +33,43 @@ ) +def loads(json_string: str) -> Any: + """Parse JSON string into Python objects. + + This is an alias for `json.loads()` provided for convenience and to ensure + a TOON-friendly integration flow where JSON 'null' is correctly converted + to Python 'None'. + + Args: + json_string: The JSON string to parse. + + Returns: + Any: Parsed Python data structure. + """ + return json.loads(json_string) + + +def encode_json(json_string: str) -> str: + """Encode a JSON string directly into TOON format. + + Parses the JSON string (converting 'null' to 'None' automatically) + and then encodes the resulting Python object into TOON. + + Args: + json_string: The JSON string to encode. + + Returns: + str: TOON-formatted string. + + Example: + >>> import toon_format + >>> toon_format.encode_json('{"abc": null}') + 'abc: null' + """ + data = loads(json_string) + return encode(data) + + def _require_tiktoken(): try: import tiktoken # type: ignore[import-not-found] diff --git a/tests/test_json_integration.py b/tests/test_json_integration.py new file mode 100644 index 0000000..cab3c2c --- /dev/null +++ b/tests/test_json_integration.py @@ -0,0 +1,32 @@ +from toon_format import encode_json, loads + + +def test_loads_null_to_none(): + json_str = '{"abc": null, "xyz": 123}' + data = loads(json_str) + assert data["abc"] is None + assert data["xyz"] == 123 + + +def test_encode_json_integration(): + json_str = '{"abc": null, "xyz": null}' + # This should automatically handle null -> None -> TOON null + toon_output = encode_json(json_str) + expected = "abc: null\nxyz: null" + assert toon_output.strip() == expected + + +def test_complex_json_integration(): + json_str = """ + { + "status": "success", + "data": { + "user": null, + "items": [1, null, 3] + } + } + """ + toon_output = encode_json(json_str) + assert "user: null" in toon_output + # Check for null in items array (can be inline "1,null,3" or list "- null") + assert "null" in toon_output