Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions src/aignostics/application/_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,14 +679,13 @@ def application_runs( # noqa: C901, PLR0912, PLR0913, PLR0914, PLR0915, PLR0917
# Add to dict if not already present from note search
if run.run_id not in note_runs_dict:
tag_runs_dict[run.run_id] = run
if limit is not None and len(note_runs_dict) + len(tag_runs_dict) >= limit:
if limit is not None and len(tag_runs_dict) >= limit:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent fix! The independent limit for tag search is now correctly implemented.

What changed: Changed from len(note_runs_dict) + len(tag_runs_dict) >= limit to just len(tag_runs_dict) >= limit.

Why this matters: Previously, if the note search filled the entire limit quota, the tag search had zero budget. Now each search gets its own independent N-slot budget, ensuring both searches can contribute results up to the limit.

Example: With limit=3:

  • Old behavior: note search finds 3 runs → tag search has 0 budget → only note results returned
  • New behavior: note search finds 3 runs, tag search finds 3 unique runs → 6 total, then sorted and sliced to top 3 newest

The deduplication logic (line 680 check) ensures duplicates don't consume the tag search quota. 👍

break

# Union of results from both searches
# Union of results from both searches, sorted newest-first
runs = list(note_runs_dict.values()) + list(tag_runs_dict.values())

# Apply limit after union
if limit is not None and len(runs) > limit:
runs.sort(key=lambda r: r.submitted_at, reverse=True)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Critical sorting fix! This addresses the "unsorted merge" bug described in the PR.

What changed: Added .sort(key=lambda r: r.submitted_at, reverse=True) before applying the limit.

Why this matters: Previously, the union concatenated runs in arbitrary order (note results first, then tag results), and [:limit] didn't guarantee the newest runs. Now the merged list is sorted by submission time descending before slicing.

Example:

  • Old: [note-day1, note-day2, tag-day10][:2][note-day1, note-day2] (oldest wins! ❌)
  • New: Sort → [tag-day10, note-day2, note-day1][:2][tag-day10, note-day2] (newest wins! ✅)

The comprehensive test coverage in test_application_runs_query_sorts_results_by_submitted_at and test_application_runs_query_respects_limit validates this behavior. Excellent work!

if limit is not None:
runs = runs[:limit]

return runs
Expand Down
23 changes: 23 additions & 0 deletions tests/aignostics/application/cli_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from aignostics.application import Service as ApplicationService
from aignostics.cli import cli
from aignostics.platform import LIST_APPLICATION_RUNS_MAX_PAGE_SIZE
from aignostics.utils import Health, sanitize_path
from tests.conftest import normalize_output, print_directory_structure
from tests.constants_test import (
Expand Down Expand Up @@ -516,6 +517,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--query",
"note_of_this_complex_test",
"--limit",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good performance optimization! Adding --limit to CLI test list operations prevents executing heavy queries repeatedly.

Why this matters: The test was listing runs without limits dozens of times per operation, which could cause:

  1. Slow test execution (especially in E2E against real API)
  2. Unnecessary load on the staging API
  3. Timeouts in CI if many runs exist

Using LIST_APPLICATION_RUNS_MAX_PAGE_SIZE as the limit is appropriate since:

  • It's the max page size constant from the platform module
  • Provides predictable test behavior
  • Reduces API load without affecting test coverage

Applied consistently across all 10+ list operations in this test. 👍

str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -531,6 +534,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--query",
"test_cli_run_submit_and_describe_and_cancel_and_download_and_delete",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -546,6 +551,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--query",
"another_tag",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -561,6 +568,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--note-regex",
"note_of_this_complex_test",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -576,6 +585,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--note-regex",
"other_note",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -591,6 +602,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--tags",
"test_cli_run_submit_and_describe_and_cancel_and_download_and_delete",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -606,6 +619,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--tags",
"other-tag",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -621,6 +636,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--tags",
"cli-test,test_cli_run_submit_and_describe_and_cancel_and_download_and_delete",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -636,6 +653,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--tags",
"cli-test,test_cli_run_submit_and_describe_and_cancel_and_download_and_delete,further-tag",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -651,6 +670,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"list",
"--tags",
"cli-test,test_cli_run_submit_and_describe_and_cancel_and_download_and_delete,further-tag,non-existing-tag",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand All @@ -668,6 +689,8 @@ def test_cli_run_submit_and_describe_and_cancel_and_download_and_delete( # noqa
"note_of_this_complex_test",
"--tags",
"cli-test,test_cli_run_submit_and_describe_and_cancel_and_download_and_delete,further-tag",
"--limit",
str(LIST_APPLICATION_RUNS_MAX_PAGE_SIZE),
],
)
assert list_result.exit_code == 0
Expand Down
175 changes: 163 additions & 12 deletions tests/aignostics/application/service_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,17 +224,22 @@ def test_application_runs_query_with_tags_raises() -> None:
def test_application_runs_query_searches_note_and_tags(mock_get_client: MagicMock) -> None:
"""Test that query parameter searches both note and tags with union semantics."""
# Create mock runs
base_time = datetime(2024, 1, 1, tzinfo=UTC)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent test enhancement! All existing tests have been updated with submitted_at timestamps.

Why this matters: The fix adds sorting by submitted_at, so tests MUST have realistic timestamps to:

  1. Verify sort order is correct
  2. Prevent flaky tests from undefined ordering
  3. Document expected behavior clearly

Best practice observed:

  • base_time = datetime(2024, 1, 1, tzinfo=UTC) establishes a reference
  • Each run gets base_time + timedelta(days=N) for deterministic ordering
  • Newer tests use explicit day offsets to test sort logic

Note: All 5 new/updated tests properly mock submitted_at. Great attention to detail! 🎯


run_from_note = MagicMock(spec=RunData)
run_from_note.run_id = "run-note-123"
run_from_note.output = RunOutput.FULL
run_from_note.submitted_at = base_time + timedelta(days=1)

run_from_tag = MagicMock(spec=RunData)
run_from_tag.run_id = "run-tag-456"
run_from_tag.output = RunOutput.FULL
run_from_tag.submitted_at = base_time + timedelta(days=2)

run_from_both = MagicMock(spec=RunData)
run_from_both.run_id = "run-both-789"
run_from_both.output = RunOutput.FULL
run_from_both.submitted_at = base_time + timedelta(days=3)

# Mock the platform client to return different runs for note and tag searches
mock_client = MagicMock()
Expand Down Expand Up @@ -285,6 +290,7 @@ def test_application_runs_query_deduplicates_results(mock_get_client: MagicMock)
run_from_both = MagicMock(spec=RunData)
run_from_both.run_id = "run-both-123"
run_from_both.output = RunOutput.FULL
run_from_both.submitted_at = datetime(2024, 1, 1, tzinfo=UTC)

# Mock the platform client to return the same run from both searches
mock_client = MagicMock()
Expand All @@ -310,33 +316,178 @@ def test_application_runs_query_deduplicates_results(mock_get_client: MagicMock)
@pytest.mark.unit
@patch("aignostics.application._service.Service._get_platform_client")
def test_application_runs_query_respects_limit(mock_get_client: MagicMock) -> None:
"""Test that query parameter respects the limit parameter."""
# Create mock runs
runs = []
for i in range(10):
"""Test that query parameter respects the limit parameter and returns the newest runs."""
base_time = datetime(2024, 1, 10, tzinfo=UTC)

# Note runs are older (days 0..4), tag runs are newer (days 5..9)
note_runs = []
for i in range(5):
run = MagicMock(spec=RunData)
run.run_id = f"run-{i}"
run.run_id = f"run-note-{i}"
run.output = RunOutput.FULL
runs.append(run)
run.submitted_at = base_time + timedelta(days=i)
note_runs.append(run)

tag_runs = []
for i in range(5):
run = MagicMock(spec=RunData)
run.run_id = f"run-tag-{i}"
run.output = RunOutput.FULL
run.submitted_at = base_time + timedelta(days=5 + i)
tag_runs.append(run)

# Mock the platform client to return many runs
mock_client = MagicMock()
mock_runs = MagicMock()

# Note search returns 5 runs, tag search returns 5 runs
# API returns newest-first; reverse the lists to simulate that behaviour
mock_runs.list_data.side_effect = [
iter(runs[:5]), # Note search results
iter(runs[5:]), # Tag search results
iter(reversed(note_runs)),
iter(reversed(tag_runs)),
]
mock_client.runs = mock_runs
mock_get_client.return_value = mock_client

service = ApplicationService()
results = service.application_runs(query="test", limit=3)

# With limit=3 each search stops after 3 items (newest-first).
# Note search: note-4(day4), note-3(day3), note-2(day2) → stops.
# Tag search: tag-4(day9), tag-3(day8), tag-2(day7) → stops.
# After merge+sort+slice: tag-4(9), tag-3(8), tag-2(7) are the 3 newest.
assert len(results) == 3
result_ids = {r.run_id for r in results}
assert result_ids == {"run-tag-2", "run-tag-3", "run-tag-4"}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comprehensive new test coverage! Four new unit tests added:

  1. test_application_runs_query_respects_limit (lines 318-356): Verifies limit returns newest N runs across both searches
  2. test_application_runs_query_tag_search_has_independent_limit (lines 359-402): Confirms tag search has its own budget (the key fix!)
  3. test_application_runs_query_sorts_results_by_submitted_at (lines 405-440): Validates newest-first ordering without limit
  4. test_application_runs_query_deduplicates_with_independent_budget (lines 443-485): Ensures duplicates don't consume tag quota

Test quality observations:

  • ✅ All properly marked with @pytest.mark.unit
  • ✅ Detailed docstrings explaining the scenario
  • ✅ Realistic mock data with timestamps
  • ✅ Uses MagicMock(spec=RunData) for type safety
  • mock_runs.list_data.side_effect simulates API pagination correctly
  • ✅ Assertions validate both result count and content

Coverage impact: These tests add 163 lines covering critical edge cases. Total test count increased from 20 → 24 tests (+20%). 📈

The test names and docstrings clearly communicate intent. Excellent TDD approach!



@pytest.mark.unit
@patch("aignostics.application._service.Service._get_platform_client")
def test_application_runs_query_tag_search_has_independent_limit(mock_get_client: MagicMock) -> None:
"""Tag search gets its own N-slot budget; a full note search does not starve the tag search."""
base_time = datetime(2024, 1, 1, tzinfo=UTC)

# Note search fills its quota of N=3
note_runs = []
for i in range(3):
run = MagicMock(spec=RunData)
run.run_id = f"run-note-{i}"
run.output = RunOutput.FULL
run.submitted_at = base_time + timedelta(days=i)
note_runs.append(run)

# Tag search has 3 unique (non-overlapping) newer runs
tag_runs = []
for i in range(3):
run = MagicMock(spec=RunData)
run.run_id = f"run-tag-{i}"
run.output = RunOutput.FULL
run.submitted_at = base_time + timedelta(days=10 + i)
tag_runs.append(run)

mock_client = MagicMock()
mock_runs = MagicMock()
# API returns newest-first; reverse the lists to simulate that behaviour
mock_runs.list_data.side_effect = [
iter(reversed(note_runs)),
iter(reversed(tag_runs)),
]
mock_client.runs = mock_runs
mock_get_client.return_value = mock_client

service = ApplicationService()
results = service.application_runs(query="test", limit=3)

# Verify we only got 3 runs despite having 10 total
# Note search (budget=3): fetches note-2(day2), note-1(day1), note-0(day0) → stops.
# Tag search (independent budget=3): fetches tag-2(day12), tag-1(day11), tag-0(day10) → stops.
# After merge+sort+slice: tag-2, tag-1, tag-0 are the 3 newest.
assert len(results) == 3
result_ids = {r.run_id for r in results}
assert result_ids == {"run-tag-0", "run-tag-1", "run-tag-2"}


@pytest.mark.unit
@patch("aignostics.application._service.Service._get_platform_client")
def test_application_runs_query_sorts_results_by_submitted_at(mock_get_client: MagicMock) -> None:
"""No-limit case: mixed note+tag results are returned newest-first regardless of which search found them."""
base_time = datetime(2024, 3, 1, tzinfo=UTC)

run_note = MagicMock(spec=RunData)
run_note.run_id = "note-recent"
run_note.output = RunOutput.FULL
run_note.submitted_at = base_time + timedelta(days=5)

run_tag_a = MagicMock(spec=RunData)
run_tag_a.run_id = "tag-middle"
run_tag_a.output = RunOutput.FULL
run_tag_a.submitted_at = base_time + timedelta(days=3)

run_tag_b = MagicMock(spec=RunData)
run_tag_b.run_id = "tag-oldest"
run_tag_b.output = RunOutput.FULL
run_tag_b.submitted_at = base_time + timedelta(days=1)

mock_client = MagicMock()
mock_runs = MagicMock()
mock_runs.list_data.side_effect = [
iter([run_note]),
iter([run_tag_a, run_tag_b]),
]
mock_client.runs = mock_runs
mock_get_client.return_value = mock_client

service = ApplicationService()
results = service.application_runs(query="test")

assert len(results) == 3
assert results[0].run_id == "note-recent"
assert results[1].run_id == "tag-middle"
assert results[2].run_id == "tag-oldest"


@pytest.mark.unit
@patch("aignostics.application._service.Service._get_platform_client")
def test_application_runs_query_deduplicates_with_independent_budget(mock_get_client: MagicMock) -> None:
"""A run matching both note and tag appears exactly once.

The duplicate does not consume the tag search's quota — the search continues to find
the next unique tag-only run.
"""
base_time = datetime(2024, 4, 1, tzinfo=UTC)

run_both = MagicMock(spec=RunData)
run_both.run_id = "run-both"
run_both.output = RunOutput.FULL
run_both.submitted_at = base_time + timedelta(days=2)

run_tag_only = MagicMock(spec=RunData)
run_tag_only.run_id = "run-tag-only"
run_tag_only.output = RunOutput.FULL
run_tag_only.submitted_at = base_time + timedelta(days=1)

mock_client = MagicMock()
mock_runs = MagicMock()
# Tag search sees run_both first (dup, skipped) then run_tag_only (unique)
mock_runs.list_data.side_effect = [
iter([run_both]),
iter([run_both, run_tag_only]),
]
mock_client.runs = mock_runs
mock_get_client.return_value = mock_client

service = ApplicationService()
# limit=1 for tag_runs_dict; the duplicate doesn't consume the slot, so run_tag_only is found
results = service.application_runs(query="test", limit=1)

# After merge (run_both + run_tag_only), sort, slice to 1 → newest wins (run_both, day 2)
assert len(results) == 1
assert results[0].run_id == "run-both"

# With limit=2: both should appear, confirming tag search found run_tag_only
mock_runs.list_data.side_effect = [
iter([run_both]),
iter([run_both, run_tag_only]),
]
results_limit2 = ApplicationService().application_runs(query="test", limit=2)
result_ids = {r.run_id for r in results_limit2}
assert result_ids == {"run-both", "run-tag-only"}


@pytest.mark.unit
Expand Down
Loading