From 4bebc2c5520a6880fea7adf216230fadb391b7cd Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:58:18 -0500 Subject: [PATCH 1/4] Add docstring examples for Common utility functions Add example usage to docstrings for Common utility functions to improve documentation. Co-Authored-By: Claude Opus 4.6 --- python/datafusion/functions.py | 179 +++++++++++++++++++++++++++++++-- 1 file changed, 169 insertions(+), 10 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index fd116254b..bfa3325b8 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -295,7 +295,16 @@ def isnan(expr: Expr) -> Expr: - """Returns true if a given number is +NaN or -NaN otherwise returns false.""" + """Returns true if a given number is +NaN or -NaN otherwise returns false. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0, np.nan]}) + >>> result = df.select(dfn.functions.isnan(dfn.col("a")).alias("isnan")) + >>> result.collect_column("isnan")[1].as_py() + True + """ return Expr(f.isnan(expr.expr)) @@ -303,22 +312,61 @@ def nullif(expr1: Expr, expr2: Expr) -> Expr: """Returns NULL if expr1 equals expr2; otherwise it returns expr1. This can be used to perform the inverse operation of the COALESCE expression. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2], "b": [1, 3]}) + >>> result = df.select( + ... dfn.functions.nullif(dfn.col("a"), dfn.col("b")).alias("nullif")) + >>> result.collect_column("nullif").to_pylist() + [None, 2] """ return Expr(f.nullif(expr1.expr, expr2.expr)) def encode(expr: Expr, encoding: Expr) -> Expr: - """Encode the ``input``, using the ``encoding``. encoding can be base64 or hex.""" + """Encode the ``input``, using the ``encoding``. encoding can be base64 or hex. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["hello"]}) + >>> result = df.select( + ... dfn.functions.encode(dfn.col("a"), dfn.lit("base64")).alias("enc")) + >>> result.collect_column("enc")[0].as_py() + 'aGVsbG8' + """ return Expr(f.encode(expr.expr, encoding.expr)) def decode(expr: Expr, encoding: Expr) -> Expr: - """Decode the ``input``, using the ``encoding``. encoding can be base64 or hex.""" + """Decode the ``input``, using the ``encoding``. encoding can be base64 or hex. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["aGVsbG8="]}) + >>> result = df.select( + ... dfn.functions.decode(dfn.col("a"), dfn.lit("base64")).alias("dec")) + >>> result.collect_column("dec")[0].as_py() + b'hello' + """ return Expr(f.decode(expr.expr, encoding.expr)) def array_to_string(expr: Expr, delimiter: Expr) -> Expr: - """Converts each element to its text representation.""" + """Converts each element to its text representation. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select( + ... dfn.functions.array_to_string(dfn.col("a"), dfn.lit(",")).alias("s")) + >>> result.collect_column("s")[0].as_py() + '1,2,3' + """ return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string()))) @@ -326,6 +374,15 @@ def array_join(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation. This is an alias for :py:func:`array_to_string`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select( + ... dfn.functions.array_join(dfn.col("a"), dfn.lit("-")).alias("s")) + >>> result.collect_column("s")[0].as_py() + '1-2-3' """ return array_to_string(expr, delimiter) @@ -334,6 +391,15 @@ def list_to_string(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation. This is an alias for :py:func:`array_to_string`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[4, 5, 6]]}) + >>> result = df.select( + ... dfn.functions.list_to_string(dfn.col("a"), dfn.lit(",")).alias("s")) + >>> result.collect_column("s")[0].as_py() + '4,5,6' """ return array_to_string(expr, delimiter) @@ -342,12 +408,31 @@ def list_join(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation. This is an alias for :py:func:`array_to_string`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[7, 8, 9]]}) + >>> result = df.select( + ... dfn.functions.list_join(dfn.col("a"), dfn.lit("|")).alias("s")) + >>> result.collect_column("s")[0].as_py() + '7|8|9' """ return array_to_string(expr, delimiter) def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr: - """Returns whether the argument is contained within the list ``values``.""" + """Returns whether the argument is contained within the list ``values``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.functions.in_list(dfn.col("a"), [dfn.lit(1), dfn.lit(3)]).alias("in")) + >>> result.collect_column("in").to_pylist() + [True, False, True] + """ values = [v.expr for v in values] return Expr(f.in_list(arg.expr, values, negated)) @@ -357,6 +442,15 @@ def digest(value: Expr, method: Expr) -> Expr: Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["hello"]}) + >>> result = df.select( + ... dfn.functions.digest(dfn.col("a"), dfn.lit("md5")).alias("d")) + >>> len(result.collect_column("d")[0].as_py()) > 0 + True """ return Expr(f.digest(value.expr, method.expr)) @@ -365,6 +459,14 @@ def concat(*args: Expr) -> Expr: """Concatenates the text representations of all the arguments. NULL arguments are ignored. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["hello"], "b": [" world"]}) + >>> result = df.select(dfn.functions.concat(dfn.col("a"), dfn.col("b")).alias("c")) + >>> result.collect_column("c")[0].as_py() + 'hello world' """ args = [arg.expr for arg in args] return Expr(f.concat(args)) @@ -374,32 +476,61 @@ def concat_ws(separator: str, *args: Expr) -> Expr: """Concatenates the list ``args`` with the separator. ``NULL`` arguments are ignored. ``separator`` should not be ``NULL``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["hello"], "b": ["world"]}) + >>> result = df.select( + ... dfn.functions.concat_ws("-", dfn.col("a"), dfn.col("b")).alias("c")) + >>> result.collect_column("c")[0].as_py() + 'hello-world' """ args = [arg.expr for arg in args] return Expr(f.concat_ws(separator, args)) def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr: - """Creates a new sort expression.""" + """Creates a new sort expression. + + Examples: + --------- + >>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False) + >>> sort_expr.ascending() + False + """ return SortExpr(expr, ascending=ascending, nulls_first=nulls_first) def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr: """Creates an alias expression with an optional metadata dictionary. - Args: + Parameters: + ----------- expr: The expression to alias name: The alias name metadata: Optional metadata to attach to the column - Returns: - An expression with the given alias + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2]}) + >>> df.select(dfn.functions.alias(dfn.col("a"), "b")).collect_column("b")[0].as_py() + 1 """ return Expr(f.alias(expr.expr, name, metadata)) def col(name: str) -> Expr: - """Creates a column reference expression.""" + """Creates a column reference expression. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> df.select(dfn.functions.col("a")).collect_column("a")[0].as_py() + 1 + """ return Expr(f.col(name)) @@ -413,6 +544,14 @@ def count_star(filter: Expr | None = None) -> Expr: Args: filter: If provided, only count rows for which the filter is True + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.aggregate([], [dfn.functions.count_star().alias("cnt")]) + >>> result.collect_column("cnt")[0].as_py() + 3 """ return count(Expr.literal(1), filter=filter) @@ -423,6 +562,16 @@ def case(expr: Expr) -> CaseBuilder: Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for detailed usage. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.functions.case(dfn.col("a")).when(dfn.lit(1), + ... dfn.lit("one")).otherwise(dfn.lit("other")).alias("c")) + >>> result.collect_column("c")[0].as_py() + 'one' """ return CaseBuilder(f.case(expr.expr)) @@ -433,6 +582,16 @@ def when(when: Expr, then: Expr) -> CaseBuilder: Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for detailed usage. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.functions.when(dfn.col("a") > dfn.lit(2), + ... dfn.lit("big")).otherwise(dfn.lit("small")).alias("c")) + >>> result.collect_column("c")[2].as_py() + 'big' """ return CaseBuilder(f.when(when.expr, then.expr)) From 43e25296c3353abab44a68116b156a0895b89622 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:23:52 -0400 Subject: [PATCH 2/4] Don't add examples for aliases --- python/datafusion/functions.py | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index bfa3325b8..1691d14f3 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -373,16 +373,8 @@ def array_to_string(expr: Expr, delimiter: Expr) -> Expr: def array_join(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation. - This is an alias for :py:func:`array_to_string`. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) - >>> result = df.select( - ... dfn.functions.array_join(dfn.col("a"), dfn.lit("-")).alias("s")) - >>> result.collect_column("s")[0].as_py() - '1-2-3' + See Also: + This is an alias for :py:func:`array_to_string`. """ return array_to_string(expr, delimiter) @@ -390,16 +382,8 @@ def array_join(expr: Expr, delimiter: Expr) -> Expr: def list_to_string(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation. - This is an alias for :py:func:`array_to_string`. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [[4, 5, 6]]}) - >>> result = df.select( - ... dfn.functions.list_to_string(dfn.col("a"), dfn.lit(",")).alias("s")) - >>> result.collect_column("s")[0].as_py() - '4,5,6' + See Also: + This is an alias for :py:func:`array_to_string`. """ return array_to_string(expr, delimiter) @@ -409,14 +393,8 @@ def list_join(expr: Expr, delimiter: Expr) -> Expr: This is an alias for :py:func:`array_to_string`. - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [[7, 8, 9]]}) - >>> result = df.select( - ... dfn.functions.list_join(dfn.col("a"), dfn.lit("|")).alias("s")) - >>> result.collect_column("s")[0].as_py() - '7|8|9' + See Also: + This is an alias for :py:func:`array_to_string`. """ return array_to_string(expr, delimiter) From c56cd23dcf0a748b047c73013db00abb9b0755ca Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:24:40 -0400 Subject: [PATCH 3/4] Parameters back to args --- python/datafusion/functions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 1691d14f3..2df84b57f 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -483,8 +483,7 @@ def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> So def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr: """Creates an alias expression with an optional metadata dictionary. - Parameters: - ----------- + Args: expr: The expression to alias name: The alias name metadata: Optional metadata to attach to the column From eb5c6eec5f75e205306357da50052b97cd88d56d Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:29:32 -0400 Subject: [PATCH 4/4] Examples to google doc style --- python/datafusion/functions.py | 390 ++++++++++++++++----------------- 1 file changed, 183 insertions(+), 207 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 2df84b57f..bb6afdd7f 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -298,12 +298,11 @@ def isnan(expr: Expr) -> Expr: """Returns true if a given number is +NaN or -NaN otherwise returns false. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0, np.nan]}) - >>> result = df.select(dfn.functions.isnan(dfn.col("a")).alias("isnan")) - >>> result.collect_column("isnan")[1].as_py() - True + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0, np.nan]}) + >>> result = df.select(dfn.functions.isnan(dfn.col("a")).alias("isnan")) + >>> result.collect_column("isnan")[1].as_py() + True """ return Expr(f.isnan(expr.expr)) @@ -314,13 +313,12 @@ def nullif(expr1: Expr, expr2: Expr) -> Expr: This can be used to perform the inverse operation of the COALESCE expression. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2], "b": [1, 3]}) - >>> result = df.select( - ... dfn.functions.nullif(dfn.col("a"), dfn.col("b")).alias("nullif")) - >>> result.collect_column("nullif").to_pylist() - [None, 2] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2], "b": [1, 3]}) + >>> result = df.select( + ... dfn.functions.nullif(dfn.col("a"), dfn.col("b")).alias("nullif")) + >>> result.collect_column("nullif").to_pylist() + [None, 2] """ return Expr(f.nullif(expr1.expr, expr2.expr)) @@ -329,13 +327,12 @@ def encode(expr: Expr, encoding: Expr) -> Expr: """Encode the ``input``, using the ``encoding``. encoding can be base64 or hex. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": ["hello"]}) - >>> result = df.select( - ... dfn.functions.encode(dfn.col("a"), dfn.lit("base64")).alias("enc")) - >>> result.collect_column("enc")[0].as_py() - 'aGVsbG8' + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["hello"]}) + >>> result = df.select( + ... dfn.functions.encode(dfn.col("a"), dfn.lit("base64")).alias("enc")) + >>> result.collect_column("enc")[0].as_py() + 'aGVsbG8' """ return Expr(f.encode(expr.expr, encoding.expr)) @@ -344,13 +341,12 @@ def decode(expr: Expr, encoding: Expr) -> Expr: """Decode the ``input``, using the ``encoding``. encoding can be base64 or hex. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": ["aGVsbG8="]}) - >>> result = df.select( - ... dfn.functions.decode(dfn.col("a"), dfn.lit("base64")).alias("dec")) - >>> result.collect_column("dec")[0].as_py() - b'hello' + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["aGVsbG8="]}) + >>> result = df.select( + ... dfn.functions.decode(dfn.col("a"), dfn.lit("base64")).alias("dec")) + >>> result.collect_column("dec")[0].as_py() + b'hello' """ return Expr(f.decode(expr.expr, encoding.expr)) @@ -359,13 +355,12 @@ def array_to_string(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) - >>> result = df.select( - ... dfn.functions.array_to_string(dfn.col("a"), dfn.lit(",")).alias("s")) - >>> result.collect_column("s")[0].as_py() - '1,2,3' + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [[1, 2, 3]]}) + >>> result = df.select( + ... dfn.functions.array_to_string(dfn.col("a"), dfn.lit(",")).alias("s")) + >>> result.collect_column("s")[0].as_py() + '1,2,3' """ return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string()))) @@ -403,13 +398,15 @@ def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr: """Returns whether the argument is contained within the list ``values``. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2, 3]}) - >>> result = df.select( - ... dfn.functions.in_list(dfn.col("a"), [dfn.lit(1), dfn.lit(3)]).alias("in")) - >>> result.collect_column("in").to_pylist() - [True, False, True] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.functions.in_list( + ... dfn.col("a"), [dfn.lit(1), dfn.lit(3)] + ... ).alias("in") + ... ) + >>> result.collect_column("in").to_pylist() + [True, False, True] """ values = [v.expr for v in values] return Expr(f.in_list(arg.expr, values, negated)) @@ -422,13 +419,12 @@ def digest(value: Expr, method: Expr) -> Expr: blake2b, and blake3. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": ["hello"]}) - >>> result = df.select( - ... dfn.functions.digest(dfn.col("a"), dfn.lit("md5")).alias("d")) - >>> len(result.collect_column("d")[0].as_py()) > 0 - True + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["hello"]}) + >>> result = df.select( + ... dfn.functions.digest(dfn.col("a"), dfn.lit("md5")).alias("d")) + >>> len(result.collect_column("d")[0].as_py()) > 0 + True """ return Expr(f.digest(value.expr, method.expr)) @@ -439,12 +435,13 @@ def concat(*args: Expr) -> Expr: NULL arguments are ignored. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": ["hello"], "b": [" world"]}) - >>> result = df.select(dfn.functions.concat(dfn.col("a"), dfn.col("b")).alias("c")) - >>> result.collect_column("c")[0].as_py() - 'hello world' + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["hello"], "b": [" world"]}) + >>> result = df.select( + ... dfn.functions.concat(dfn.col("a"), dfn.col("b")).alias("c") + ... ) + >>> result.collect_column("c")[0].as_py() + 'hello world' """ args = [arg.expr for arg in args] return Expr(f.concat(args)) @@ -456,13 +453,12 @@ def concat_ws(separator: str, *args: Expr) -> Expr: ``NULL`` arguments are ignored. ``separator`` should not be ``NULL``. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": ["hello"], "b": ["world"]}) - >>> result = df.select( - ... dfn.functions.concat_ws("-", dfn.col("a"), dfn.col("b")).alias("c")) - >>> result.collect_column("c")[0].as_py() - 'hello-world' + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": ["hello"], "b": ["world"]}) + >>> result = df.select( + ... dfn.functions.concat_ws("-", dfn.col("a"), dfn.col("b")).alias("c")) + >>> result.collect_column("c")[0].as_py() + 'hello-world' """ args = [arg.expr for arg in args] return Expr(f.concat_ws(separator, args)) @@ -472,10 +468,9 @@ def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> So """Creates a new sort expression. Examples: - --------- - >>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False) - >>> sort_expr.ascending() - False + >>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False) + >>> sort_expr.ascending() + False """ return SortExpr(expr, ascending=ascending, nulls_first=nulls_first) @@ -489,11 +484,12 @@ def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr metadata: Optional metadata to attach to the column Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2]}) - >>> df.select(dfn.functions.alias(dfn.col("a"), "b")).collect_column("b")[0].as_py() - 1 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2]}) + >>> df.select( + ... dfn.functions.alias(dfn.col("a"), "b") + ... ).collect_column("b")[0].as_py() + 1 """ return Expr(f.alias(expr.expr, name, metadata)) @@ -502,11 +498,10 @@ def col(name: str) -> Expr: """Creates a column reference expression. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2, 3]}) - >>> df.select(dfn.functions.col("a")).collect_column("a")[0].as_py() - 1 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> df.select(dfn.functions.col("a")).collect_column("a")[0].as_py() + 1 """ return Expr(f.col(name)) @@ -523,12 +518,11 @@ def count_star(filter: Expr | None = None) -> Expr: filter: If provided, only count rows for which the filter is True Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2, 3]}) - >>> result = df.aggregate([], [dfn.functions.count_star().alias("cnt")]) - >>> result.collect_column("cnt")[0].as_py() - 3 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.aggregate([], [dfn.functions.count_star().alias("cnt")]) + >>> result.collect_column("cnt")[0].as_py() + 3 """ return count(Expr.literal(1), filter=filter) @@ -541,14 +535,13 @@ def case(expr: Expr) -> CaseBuilder: detailed usage. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2, 3]}) - >>> result = df.select( - ... dfn.functions.case(dfn.col("a")).when(dfn.lit(1), - ... dfn.lit("one")).otherwise(dfn.lit("other")).alias("c")) - >>> result.collect_column("c")[0].as_py() - 'one' + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.functions.case(dfn.col("a")).when(dfn.lit(1), + ... dfn.lit("one")).otherwise(dfn.lit("other")).alias("c")) + >>> result.collect_column("c")[0].as_py() + 'one' """ return CaseBuilder(f.case(expr.expr)) @@ -561,14 +554,13 @@ def when(when: Expr, then: Expr) -> CaseBuilder: detailed usage. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2, 3]}) - >>> result = df.select( - ... dfn.functions.when(dfn.col("a") > dfn.lit(2), - ... dfn.lit("big")).otherwise(dfn.lit("small")).alias("c")) - >>> result.collect_column("c")[2].as_py() - 'big' + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.functions.when(dfn.col("a") > dfn.lit(2), + ... dfn.lit("big")).otherwise(dfn.lit("small")).alias("c")) + >>> result.collect_column("c")[2].as_py() + 'big' """ return CaseBuilder(f.when(when.expr, then.expr)) @@ -632,12 +624,11 @@ def acos(arg: Expr) -> Expr: """Returns the arc cosine or inverse cosine of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.acos(dfn.col("a")).alias("acos")) - >>> result.collect_column("acos")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0]}) + >>> result = df.select(dfn.functions.acos(dfn.col("a")).alias("acos")) + >>> result.collect_column("acos")[0].as_py() + 0.0 """ return Expr(f.acos(arg.expr)) @@ -646,12 +637,11 @@ def acosh(arg: Expr) -> Expr: """Returns inverse hyperbolic cosine. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.acosh(dfn.col("a")).alias("acosh")) - >>> result.collect_column("acosh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0]}) + >>> result = df.select(dfn.functions.acosh(dfn.col("a")).alias("acosh")) + >>> result.collect_column("acosh")[0].as_py() + 0.0 """ return Expr(f.acosh(arg.expr)) @@ -665,12 +655,11 @@ def asin(arg: Expr) -> Expr: """Returns the arc sine or inverse sine of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.asin(dfn.col("a")).alias("asin")) - >>> result.collect_column("asin")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.asin(dfn.col("a")).alias("asin")) + >>> result.collect_column("asin")[0].as_py() + 0.0 """ return Expr(f.asin(arg.expr)) @@ -679,12 +668,11 @@ def asinh(arg: Expr) -> Expr: """Returns inverse hyperbolic sine. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.asinh(dfn.col("a")).alias("asinh")) - >>> result.collect_column("asinh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.asinh(dfn.col("a")).alias("asinh")) + >>> result.collect_column("asinh")[0].as_py() + 0.0 """ return Expr(f.asinh(arg.expr)) @@ -693,12 +681,11 @@ def atan(arg: Expr) -> Expr: """Returns inverse tangent of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.atan(dfn.col("a")).alias("atan")) - >>> result.collect_column("atan")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.atan(dfn.col("a")).alias("atan")) + >>> result.collect_column("atan")[0].as_py() + 0.0 """ return Expr(f.atan(arg.expr)) @@ -707,12 +694,11 @@ def atanh(arg: Expr) -> Expr: """Returns inverse hyperbolic tangent. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.atanh(dfn.col("a")).alias("atanh")) - >>> result.collect_column("atanh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.atanh(dfn.col("a")).alias("atanh")) + >>> result.collect_column("atanh")[0].as_py() + 0.0 """ return Expr(f.atanh(arg.expr)) @@ -721,13 +707,12 @@ def atan2(y: Expr, x: Expr) -> Expr: """Returns inverse tangent of a division given in the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"y": [0.0], "x": [1.0]}) - >>> result = df.select( - ... dfn.functions.atan2(dfn.col("y"), dfn.col("x")).alias("atan2")) - >>> result.collect_column("atan2")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"y": [0.0], "x": [1.0]}) + >>> result = df.select( + ... dfn.functions.atan2(dfn.col("y"), dfn.col("x")).alias("atan2")) + >>> result.collect_column("atan2")[0].as_py() + 0.0 """ return Expr(f.atan2(y.expr, x.expr)) @@ -782,12 +767,11 @@ def cos(arg: Expr) -> Expr: """Returns the cosine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,-1,1]}) - >>> cos_df = df.select(dfn.functions.cos(dfn.col("a")).alias("cos")) - >>> cos_df.collect_column("cos")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,-1,1]}) + >>> cos_df = df.select(dfn.functions.cos(dfn.col("a")).alias("cos")) + >>> cos_df.collect_column("cos")[0].as_py() + 1.0 """ return Expr(f.cos(arg.expr)) @@ -796,12 +780,11 @@ def cosh(arg: Expr) -> Expr: """Returns the hyperbolic cosine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,-1,1]}) - >>> cosh_df = df.select(dfn.functions.cosh(dfn.col("a")).alias("cosh")) - >>> cosh_df.collect_column("cosh")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,-1,1]}) + >>> cosh_df = df.select(dfn.functions.cosh(dfn.col("a")).alias("cosh")) + >>> cosh_df.collect_column("cosh")[0].as_py() + 1.0 """ return Expr(f.cosh(arg.expr)) @@ -810,18 +793,17 @@ def cot(arg: Expr) -> Expr: """Returns the cotangent of the argument. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [pi / 4]}) - >>> import builtins - >>> result = df.select( - ... dfn.functions.cot(dfn.col("a")).alias("cot") - ... ) - >>> builtins.round( - ... result.collect_column("cot")[0].as_py(), 1 - ... ) - 1.0 + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [pi / 4]}) + >>> import builtins + >>> result = df.select( + ... dfn.functions.cot(dfn.col("a")).alias("cot") + ... ) + >>> builtins.round( + ... result.collect_column("cot")[0].as_py(), 1 + ... ) + 1.0 """ return Expr(f.cot(arg.expr)) @@ -830,13 +812,12 @@ def degrees(arg: Expr) -> Expr: """Converts the argument from radians to degrees. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,pi,2*pi]}) - >>> deg_df = df.select(dfn.functions.degrees(dfn.col("a")).alias("deg")) - >>> deg_df.collect_column("deg")[2].as_py() - 360.0 + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,pi,2*pi]}) + >>> deg_df = df.select(dfn.functions.degrees(dfn.col("a")).alias("deg")) + >>> deg_df.collect_column("deg")[2].as_py() + 360.0 """ return Expr(f.degrees(arg.expr)) @@ -1018,18 +999,17 @@ def radians(arg: Expr) -> Expr: """Converts the argument from degrees to radians. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [180.0]}) - >>> import builtins - >>> result = df.select( - ... dfn.functions.radians(dfn.col("a")).alias("rad") - ... ) - >>> builtins.round( - ... result.collect_column("rad")[0].as_py(), 6 - ... ) - 3.141593 + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [180.0]}) + >>> import builtins + >>> result = df.select( + ... dfn.functions.radians(dfn.col("a")).alias("rad") + ... ) + >>> builtins.round( + ... result.collect_column("rad")[0].as_py(), 6 + ... ) + 3.141593 """ return Expr(f.radians(arg.expr)) @@ -1194,12 +1174,11 @@ def sin(arg: Expr) -> Expr: """Returns the sine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.sin(dfn.col("a")).alias("sin")) - >>> result.collect_column("sin")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.sin(dfn.col("a")).alias("sin")) + >>> result.collect_column("sin")[0].as_py() + 0.0 """ return Expr(f.sin(arg.expr)) @@ -1208,12 +1187,11 @@ def sinh(arg: Expr) -> Expr: """Returns the hyperbolic sine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.sinh(dfn.col("a")).alias("sinh")) - >>> result.collect_column("sinh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.sinh(dfn.col("a")).alias("sinh")) + >>> result.collect_column("sinh")[0].as_py() + 0.0 """ return Expr(f.sinh(arg.expr)) @@ -1265,12 +1243,11 @@ def tan(arg: Expr) -> Expr: """Returns the tangent of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.tan(dfn.col("a")).alias("tan")) - >>> result.collect_column("tan")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.tan(dfn.col("a")).alias("tan")) + >>> result.collect_column("tan")[0].as_py() + 0.0 """ return Expr(f.tan(arg.expr)) @@ -1279,12 +1256,11 @@ def tanh(arg: Expr) -> Expr: """Returns the hyperbolic tangent of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.tanh(dfn.col("a")).alias("tanh")) - >>> result.collect_column("tanh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.tanh(dfn.col("a")).alias("tanh")) + >>> result.collect_column("tanh")[0].as_py() + 0.0 """ return Expr(f.tanh(arg.expr))