Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/toon_format/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,20 @@
from .decoder import ToonDecodeError, decode
from .encoder import encode
from .types import DecodeOptions, Delimiter, DelimiterKey, EncodeOptions
from .utils import compare_formats, count_tokens, estimate_savings
from .utils import (
compare_formats,
count_tokens,
encode_json,
estimate_savings,
loads,
)

__version__ = "0.9.0-beta.1"
__all__ = [
"encode",
"decode",
"encode_json",
"loads",
"ToonDecodeError",
"Delimiter",
"DelimiterKey",
Expand Down
62 changes: 46 additions & 16 deletions src/toon_format/utils.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,30 @@
# Copyright (c) 2025 TOON Format Organization
# SPDX-License-Identifier: MIT
"""Token analysis utilities for TOON format.
"""Utilities for TOON format.

This module provides utilities for counting tokens and comparing
token efficiency between JSON and TOON formats. Useful for:
- Estimating API costs (tokens are the primary cost driver)
- Optimizing prompt sizes for LLM context windows
- Benchmarking TOON's token efficiency
This module provides utilities for:
- Token analysis and efficiency comparison between JSON and TOON formats
- JSON integration and null value handling
- Estimating API costs and optimizing prompt sizes

Functions:
count_tokens: Count tokens in a text string
estimate_savings: Compare JSON vs TOON token counts
compare_formats: Generate formatted comparison table
loads: Parse JSON string into Python objects (alias for json.loads)
encode_json: Encode a JSON string directly into TOON format

Requirements:
tiktoken: Install with `uv add tiktoken` or `uv add toon_format[benchmark]`

Example:
>>> import toon_format
>>> data = {"name": "Alice", "age": 30}
>>> result = toon_format.estimate_savings(data)
>>> print(f"TOON saves {result['savings_percent']:.1f}% tokens")
"""

import functools
import json
from typing import Any, Dict

# Import encode from parent package (defined in __init__.py before this module is imported)
# __init__.py defines encode() before importing utils, so this is safe
from . import encode
from .encoder import encode

__all__ = ["count_tokens", "estimate_savings", "compare_formats"]
__all__ = ["count_tokens", "estimate_savings", "compare_formats", "encode_json", "loads"]


_TIKTOKEN_MISSING_MSG = (
Expand All @@ -40,6 +33,43 @@
)


def loads(json_string: str) -> Any:
"""Parse JSON string into Python objects.
Comment thread
adityak74 marked this conversation as resolved.

This is an alias for `json.loads()` provided for convenience and to ensure
a TOON-friendly integration flow where JSON 'null' is correctly converted
to Python 'None'.

Args:
json_string: The JSON string to parse.

Returns:
Any: Parsed Python data structure.
"""
return json.loads(json_string)


def encode_json(json_string: str) -> str:
"""Encode a JSON string directly into TOON format.

Parses the JSON string (converting 'null' to 'None' automatically)
and then encodes the resulting Python object into TOON.

Args:
json_string: The JSON string to encode.

Returns:
str: TOON-formatted string.

Example:
>>> import toon_format
>>> toon_format.encode_json('{"abc": null}')
'abc: null'
"""
data = loads(json_string)
return encode(data)


def _require_tiktoken():
try:
import tiktoken # type: ignore[import-not-found]
Expand Down
32 changes: 32 additions & 0 deletions tests/test_json_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from toon_format import encode_json, loads


def test_loads_null_to_none():
json_str = '{"abc": null, "xyz": 123}'
data = loads(json_str)
assert data["abc"] is None
assert data["xyz"] == 123


def test_encode_json_integration():
json_str = '{"abc": null, "xyz": null}'
# This should automatically handle null -> None -> TOON null
toon_output = encode_json(json_str)
expected = "abc: null\nxyz: null"
assert toon_output.strip() == expected


def test_complex_json_integration():
json_str = """
{
"status": "success",
"data": {
"user": null,
"items": [1, null, 3]
}
}
"""
toon_output = encode_json(json_str)
assert "user: null" in toon_output
# Check for null in items array (can be inline "1,null,3" or list "- null")
assert "null" in toon_output
Loading