From e919fb2396b6aad59f4ba53052895be8b26917b8 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Tue, 24 Mar 2026 17:22:31 +0100 Subject: [PATCH 1/2] fix: Allow overriding column definitions in schema inheritance The duplicate alias check from #291 also rejected intentional column overrides in subclasses. Before merging the child namespace, walk the parent MRO to remove columns that share the same attribute name so the child definition takes precedence, while still detecting genuine alias conflicts. Co-Authored-By: Claude Opus 4.6 --- dataframely/_base_schema.py | 20 +++++++++++++++++++- tests/schema/test_base.py | 8 ++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index b875e6bf..bcf9aee2 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -119,7 +119,25 @@ def __new__( result = Metadata() for base in bases: result.update(mcs._get_metadata_recursively(base)) - result.update(mcs._get_metadata(namespace)) + # Before merging the child namespace, remove any parent columns that the + # child explicitly overrides (same attribute name). This allows subclasses to + # redefine inherited columns while still detecting genuine alias conflicts. + namespace_metadata = mcs._get_metadata(namespace) + for attr, value in namespace.items(): + if not isinstance(value, Column): + continue + # Walk parent MRO to find if this attribute was a Column in a parent class. + for base in bases: + for parent_cls in base.__mro__: + parent_col = parent_cls.__dict__.get(attr) + if parent_col is not None and isinstance(parent_col, Column): + parent_key = parent_col.alias or attr + result.columns.pop(parent_key, None) + break + else: + continue + break + result.update(namespace_metadata) namespace[_COLUMN_ATTR] = result.columns cls = super().__new__(mcs, name, bases, namespace, *args, **kwargs) diff --git a/tests/schema/test_base.py b/tests/schema/test_base.py index 6eb20849..0477630a 100644 --- a/tests/schema/test_base.py +++ b/tests/schema/test_base.py @@ -141,3 +141,11 @@ def test_user_error_polars_datatype_type() -> None: class MySchemaWithPolarsDataTypeType(dy.Schema): a = dy.Int32(nullable=False) b = pl.String # User error: Used pl.String instead of dy.String() + + +def test_override() -> None: + class FirstSchema(dy.Schema): + x = dy.Int64() + + class SecondSchema(FirstSchema): + x = dy.Int64(nullable=True) From 13f740399d90fa557e666b95b7c6a8e93e97f536 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Tue, 24 Mar 2026 17:53:21 +0100 Subject: [PATCH 2/2] fix: Address review feedback - Scan all bases/MROs instead of breaking after the first match, so multiple-inheritance overrides remove all inherited columns - Add assertions to test_override to verify the child definition actually takes precedence Co-Authored-By: Claude Opus 4.6 --- dataframely/_base_schema.py | 14 +++++++------- tests/schema/test_base.py | 11 +++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/dataframely/_base_schema.py b/dataframely/_base_schema.py index bcf9aee2..068943c5 100644 --- a/dataframely/_base_schema.py +++ b/dataframely/_base_schema.py @@ -126,17 +126,17 @@ def __new__( for attr, value in namespace.items(): if not isinstance(value, Column): continue - # Walk parent MRO to find if this attribute was a Column in a parent class. + # Walk all parent MROs to find if this attribute was a Column in any + # parent class. In multiple-inheritance scenarios, the same attribute + # name may appear in more than one base with different aliases. + keys_to_remove: set[str] = set() for base in bases: for parent_cls in base.__mro__: parent_col = parent_cls.__dict__.get(attr) if parent_col is not None and isinstance(parent_col, Column): - parent_key = parent_col.alias or attr - result.columns.pop(parent_key, None) - break - else: - continue - break + keys_to_remove.add(parent_col.alias or attr) + for parent_key in keys_to_remove: + result.columns.pop(parent_key, None) result.update(namespace_metadata) namespace[_COLUMN_ATTR] = result.columns cls = super().__new__(mcs, name, bases, namespace, *args, **kwargs) diff --git a/tests/schema/test_base.py b/tests/schema/test_base.py index 0477630a..eb86ca38 100644 --- a/tests/schema/test_base.py +++ b/tests/schema/test_base.py @@ -149,3 +149,14 @@ class FirstSchema(dy.Schema): class SecondSchema(FirstSchema): x = dy.Int64(nullable=True) + + first_columns = FirstSchema.columns() + second_columns = SecondSchema.columns() + + assert set(first_columns) == {"x"} + assert set(second_columns) == {"x"} + + assert first_columns["x"].nullable is False + assert second_columns["x"].nullable is True + + assert type(second_columns["x"]) is type(first_columns["x"])