From 548cddb4bdd08e7a7f72fb70abbc5827d8cb93f8 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:58:21 -0500 Subject: [PATCH 1/5] Add docstring examples for Aggregate window functions Add example usage to docstrings for Aggregate window functions to improve documentation. Co-Authored-By: Claude Opus 4.6 --- python/datafusion/functions.py | 104 +++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index fd116254b..ffb89e685 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -2516,6 +2516,14 @@ def first_value( For example:: df.aggregate([], first_value(col("a"), order_by="ts")) + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.aggregate([], [dfn.functions.first_value(dfn.col("a")).alias("v")]) + >>> result.collect_column("v")[0].as_py() + 10 """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2553,6 +2561,14 @@ def last_value( For example:: df.aggregate([], last_value(col("a"), order_by="ts")) + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.aggregate([], [dfn.functions.last_value(dfn.col("a")).alias("v")]) + >>> result.collect_column("v")[0].as_py() + 30 """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2592,6 +2608,14 @@ def nth_value( For example:: df.aggregate([], nth_value(col("a"), 2, order_by="ts")) + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.aggregate([], [dfn.functions.nth_value(dfn.col("a"), 2).alias("v")]) + >>> result.collect_column("v")[0].as_py() + 20 """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2732,6 +2756,16 @@ def lead( For example:: lead(col("b"), order_by="ts") + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.lead(dfn.col("a"), shift_offset=1, + ... default_value=0, order_by="a").alias("lead")) + >>> result.sort(dfn.col("a")).collect_column("lead").to_pylist() + [2, 3, 0] """ if not isinstance(default_value, pa.Scalar) and default_value is not None: default_value = pa.scalar(default_value) @@ -2787,6 +2821,16 @@ def lag( For example:: lag(col("b"), order_by="ts") + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.lag(dfn.col("a"), shift_offset=1, + ... default_value=0, order_by="a").alias("lag")) + >>> result.sort(dfn.col("a")).collect_column("lag").to_pylist() + [0, 1, 2] """ if not isinstance(default_value, pa.Scalar): default_value = pa.scalar(default_value) @@ -2832,6 +2876,15 @@ def row_number( For example:: row_number(order_by="points") + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn")) + >>> result.sort(dfn.col("a")).collect_column("rn").to_pylist() + [1, 2, 3] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -2876,6 +2929,14 @@ def rank( For example:: rank(order_by="points") + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 10, 20]}) + >>> result = df.select(dfn.col("a"), dfn.functions.rank(order_by="a").alias("rnk")) + >>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist() + [1, 1, 3] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -2915,6 +2976,15 @@ def dense_rank( For example:: dense_rank(order_by="points") + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 10, 20]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr")) + >>> result.sort(dfn.col("a")).collect_column("dr").to_pylist() + [1, 1, 2] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -2955,6 +3025,15 @@ def percent_rank( For example:: percent_rank(order_by="points") + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr")) + >>> result.sort(dfn.col("a")).collect_column("pr").to_pylist() + [0.0, 0.5, 1.0] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -2995,6 +3074,22 @@ def cume_dist( For example:: cume_dist(order_by="points") + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 10, 20]}) + >>> import builtins + >>> result = df.select( + ... dfn.col("a"), + ... dfn.functions.cume_dist( + ... order_by="a" + ... ).alias("cd") + ... ) + >>> [builtins.round(x, 4) for x in + ... result.sort(dfn.col("a") + ... ).collect_column("cd").to_pylist()] + [0.6667, 0.6667, 1.0] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -3039,6 +3134,15 @@ def ntile( For example:: ntile(3, order_by="points") + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30, 40]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt")) + >>> result.sort(dfn.col("a")).collect_column("nt").to_pylist() + [1, 1, 2, 2] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) From abe668d0911d832823bf2a83e8598c4a0d91ab44 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:08:30 -0400 Subject: [PATCH 2/5] Remove for example for example docstring --- python/datafusion/functions.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index ffb89e685..e0fbaf4f9 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -2513,10 +2513,6 @@ def first_value( column names or expressions. null_treatment: Assign whether to respect or ignore null values. - For example:: - - df.aggregate([], first_value(col("a"), order_by="ts")) - Examples: --------- >>> ctx = dfn.SessionContext() @@ -2558,10 +2554,6 @@ def last_value( column names or expressions. null_treatment: Assign whether to respect or ignore null values. - For example:: - - df.aggregate([], last_value(col("a"), order_by="ts")) - Examples: --------- >>> ctx = dfn.SessionContext() From 66f1a0443ce1f3a307b96153abb4eb3b5588b233 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:10:14 -0400 Subject: [PATCH 3/5] Actually remove all for example calls in favor of docstrings --- python/datafusion/functions.py | 43 ---------------------------------- 1 file changed, 43 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index e0fbaf4f9..7e95a8f55 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -2009,10 +2009,6 @@ def array_agg( distinct: If True, a single entry for each distinct value will be in the result filter: If provided, only compute against rows for which the filter is True order_by: Order the resultant array values. Accepts column names or expressions. - - For example:: - - df.aggregate([], array_agg(col("a"), order_by="b")) """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2597,10 +2593,6 @@ def nth_value( column names or expressions. null_treatment: Assign whether to respect or ignore null values. - For example:: - - df.aggregate([], nth_value(col("a"), 2, order_by="ts")) - Examples: --------- >>> ctx = dfn.SessionContext() @@ -2745,10 +2737,6 @@ def lead( order_by: Set ordering within the window frame. Accepts column names or expressions. - For example:: - - lead(col("b"), order_by="ts") - Examples: --------- >>> ctx = dfn.SessionContext() @@ -2810,10 +2798,6 @@ def lag( order_by: Set ordering within the window frame. Accepts column names or expressions. - For example:: - - lag(col("b"), order_by="ts") - Examples: --------- >>> ctx = dfn.SessionContext() @@ -2865,10 +2849,6 @@ def row_number( order_by: Set ordering within the window frame. Accepts column names or expressions. - For example:: - - row_number(order_by="points") - Examples: --------- >>> ctx = dfn.SessionContext() @@ -2918,10 +2898,6 @@ def rank( order_by: Set ordering within the window frame. Accepts column names or expressions. - For example:: - - rank(order_by="points") - Examples: --------- >>> ctx = dfn.SessionContext() @@ -2965,10 +2941,6 @@ def dense_rank( order_by: Set ordering within the window frame. Accepts column names or expressions. - For example:: - - dense_rank(order_by="points") - Examples: --------- >>> ctx = dfn.SessionContext() @@ -3014,9 +2986,6 @@ def percent_rank( order_by: Set ordering within the window frame. Accepts column names or expressions. - For example:: - - percent_rank(order_by="points") Examples: --------- @@ -3063,10 +3032,6 @@ def cume_dist( order_by: Set ordering within the window frame. Accepts column names or expressions. - For example:: - - cume_dist(order_by="points") - Examples: --------- >>> ctx = dfn.SessionContext() @@ -3123,10 +3088,6 @@ def ntile( order_by: Set ordering within the window frame. Accepts column names or expressions. - For example:: - - ntile(3, order_by="points") - Examples: --------- >>> ctx = dfn.SessionContext() @@ -3169,10 +3130,6 @@ def string_agg( filter: If provided, only compute against rows for which the filter is True order_by: Set the ordering of the expression to evaluate. Accepts column names or expressions. - - For example:: - - df.aggregate([], string_agg(col("a"), ",", order_by="b")) """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None From 401baa71066e60002ffe9858092ec8203a2d14e8 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:15:01 -0400 Subject: [PATCH 4/5] Remove builtins --- python/datafusion/functions.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 7e95a8f55..15f3015f0 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -678,14 +678,11 @@ def cot(arg: Expr) -> Expr: >>> from math import pi >>> ctx = dfn.SessionContext() >>> df = ctx.from_pydict({"a": [pi / 4]}) - >>> import builtins >>> result = df.select( ... dfn.functions.cot(dfn.col("a")).alias("cot") ... ) - >>> builtins.round( - ... result.collect_column("cot")[0].as_py(), 1 - ... ) - 1.0 + >>> result.collect_column("cot")[0].as_py() + 1.0... """ return Expr(f.cot(arg.expr)) @@ -886,14 +883,11 @@ def radians(arg: Expr) -> Expr: >>> from math import pi >>> ctx = dfn.SessionContext() >>> df = ctx.from_pydict({"a": [180.0]}) - >>> import builtins >>> result = df.select( ... dfn.functions.radians(dfn.col("a")).alias("rad") ... ) - >>> builtins.round( - ... result.collect_column("rad")[0].as_py(), 6 - ... ) - 3.141593 + >>> result.collect_column("rad")[0].as_py() + 3.14159... """ return Expr(f.radians(arg.expr)) @@ -3035,18 +3029,15 @@ def cume_dist( Examples: --------- >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 10, 20]}) - >>> import builtins + >>> df = ctx.from_pydict({"a": [1., 2., 2., 3.]}) >>> result = df.select( ... dfn.col("a"), ... dfn.functions.cume_dist( ... order_by="a" ... ).alias("cd") ... ) - >>> [builtins.round(x, 4) for x in - ... result.sort(dfn.col("a") - ... ).collect_column("cd").to_pylist()] - [0.6667, 0.6667, 1.0] + >>> result.collect_column("cd").to_pylist() + [0.25..., 0.75..., 0.75..., 1.0...] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) From b4642146342c894fe8ba9db271aa2e004590f699 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:19:38 -0400 Subject: [PATCH 5/5] Make google docstyle --- python/datafusion/functions.py | 347 ++++++++++++++++----------------- 1 file changed, 164 insertions(+), 183 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 15f3015f0..24cfe4013 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -496,12 +496,11 @@ def acos(arg: Expr) -> Expr: """Returns the arc cosine or inverse cosine of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.acos(dfn.col("a")).alias("acos")) - >>> result.collect_column("acos")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0]}) + >>> result = df.select(dfn.functions.acos(dfn.col("a")).alias("acos")) + >>> result.collect_column("acos")[0].as_py() + 0.0 """ return Expr(f.acos(arg.expr)) @@ -510,12 +509,11 @@ def acosh(arg: Expr) -> Expr: """Returns inverse hyperbolic cosine. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.acosh(dfn.col("a")).alias("acosh")) - >>> result.collect_column("acosh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0]}) + >>> result = df.select(dfn.functions.acosh(dfn.col("a")).alias("acosh")) + >>> result.collect_column("acosh")[0].as_py() + 0.0 """ return Expr(f.acosh(arg.expr)) @@ -529,12 +527,11 @@ def asin(arg: Expr) -> Expr: """Returns the arc sine or inverse sine of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.asin(dfn.col("a")).alias("asin")) - >>> result.collect_column("asin")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.asin(dfn.col("a")).alias("asin")) + >>> result.collect_column("asin")[0].as_py() + 0.0 """ return Expr(f.asin(arg.expr)) @@ -543,12 +540,11 @@ def asinh(arg: Expr) -> Expr: """Returns inverse hyperbolic sine. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.asinh(dfn.col("a")).alias("asinh")) - >>> result.collect_column("asinh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.asinh(dfn.col("a")).alias("asinh")) + >>> result.collect_column("asinh")[0].as_py() + 0.0 """ return Expr(f.asinh(arg.expr)) @@ -557,12 +553,11 @@ def atan(arg: Expr) -> Expr: """Returns inverse tangent of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.atan(dfn.col("a")).alias("atan")) - >>> result.collect_column("atan")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.atan(dfn.col("a")).alias("atan")) + >>> result.collect_column("atan")[0].as_py() + 0.0 """ return Expr(f.atan(arg.expr)) @@ -571,12 +566,11 @@ def atanh(arg: Expr) -> Expr: """Returns inverse hyperbolic tangent. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.atanh(dfn.col("a")).alias("atanh")) - >>> result.collect_column("atanh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.atanh(dfn.col("a")).alias("atanh")) + >>> result.collect_column("atanh")[0].as_py() + 0.0 """ return Expr(f.atanh(arg.expr)) @@ -585,13 +579,12 @@ def atan2(y: Expr, x: Expr) -> Expr: """Returns inverse tangent of a division given in the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"y": [0.0], "x": [1.0]}) - >>> result = df.select( - ... dfn.functions.atan2(dfn.col("y"), dfn.col("x")).alias("atan2")) - >>> result.collect_column("atan2")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"y": [0.0], "x": [1.0]}) + >>> result = df.select( + ... dfn.functions.atan2(dfn.col("y"), dfn.col("x")).alias("atan2")) + >>> result.collect_column("atan2")[0].as_py() + 0.0 """ return Expr(f.atan2(y.expr, x.expr)) @@ -646,12 +639,11 @@ def cos(arg: Expr) -> Expr: """Returns the cosine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,-1,1]}) - >>> cos_df = df.select(dfn.functions.cos(dfn.col("a")).alias("cos")) - >>> cos_df.collect_column("cos")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,-1,1]}) + >>> cos_df = df.select(dfn.functions.cos(dfn.col("a")).alias("cos")) + >>> cos_df.collect_column("cos")[0].as_py() + 1.0 """ return Expr(f.cos(arg.expr)) @@ -660,12 +652,11 @@ def cosh(arg: Expr) -> Expr: """Returns the hyperbolic cosine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,-1,1]}) - >>> cosh_df = df.select(dfn.functions.cosh(dfn.col("a")).alias("cosh")) - >>> cosh_df.collect_column("cosh")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,-1,1]}) + >>> cosh_df = df.select(dfn.functions.cosh(dfn.col("a")).alias("cosh")) + >>> cosh_df.collect_column("cosh")[0].as_py() + 1.0 """ return Expr(f.cosh(arg.expr)) @@ -674,15 +665,14 @@ def cot(arg: Expr) -> Expr: """Returns the cotangent of the argument. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [pi / 4]}) - >>> result = df.select( - ... dfn.functions.cot(dfn.col("a")).alias("cot") - ... ) - >>> result.collect_column("cot")[0].as_py() - 1.0... + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [pi / 4]}) + >>> result = df.select( + ... dfn.functions.cot(dfn.col("a")).alias("cot") + ... ) + >>> result.collect_column("cot")[0].as_py() + 1.0... """ return Expr(f.cot(arg.expr)) @@ -691,13 +681,12 @@ def degrees(arg: Expr) -> Expr: """Converts the argument from radians to degrees. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,pi,2*pi]}) - >>> deg_df = df.select(dfn.functions.degrees(dfn.col("a")).alias("deg")) - >>> deg_df.collect_column("deg")[2].as_py() - 360.0 + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,pi,2*pi]}) + >>> deg_df = df.select(dfn.functions.degrees(dfn.col("a")).alias("deg")) + >>> deg_df.collect_column("deg")[2].as_py() + 360.0 """ return Expr(f.degrees(arg.expr)) @@ -879,15 +868,14 @@ def radians(arg: Expr) -> Expr: """Converts the argument from degrees to radians. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [180.0]}) - >>> result = df.select( - ... dfn.functions.radians(dfn.col("a")).alias("rad") - ... ) - >>> result.collect_column("rad")[0].as_py() - 3.14159... + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [180.0]}) + >>> result = df.select( + ... dfn.functions.radians(dfn.col("a")).alias("rad") + ... ) + >>> result.collect_column("rad")[0].as_py() + 3.14159... """ return Expr(f.radians(arg.expr)) @@ -1052,12 +1040,11 @@ def sin(arg: Expr) -> Expr: """Returns the sine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.sin(dfn.col("a")).alias("sin")) - >>> result.collect_column("sin")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.sin(dfn.col("a")).alias("sin")) + >>> result.collect_column("sin")[0].as_py() + 0.0 """ return Expr(f.sin(arg.expr)) @@ -1066,12 +1053,11 @@ def sinh(arg: Expr) -> Expr: """Returns the hyperbolic sine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.sinh(dfn.col("a")).alias("sinh")) - >>> result.collect_column("sinh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.sinh(dfn.col("a")).alias("sinh")) + >>> result.collect_column("sinh")[0].as_py() + 0.0 """ return Expr(f.sinh(arg.expr)) @@ -1123,12 +1109,11 @@ def tan(arg: Expr) -> Expr: """Returns the tangent of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.tan(dfn.col("a")).alias("tan")) - >>> result.collect_column("tan")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.tan(dfn.col("a")).alias("tan")) + >>> result.collect_column("tan")[0].as_py() + 0.0 """ return Expr(f.tan(arg.expr)) @@ -1137,12 +1122,11 @@ def tanh(arg: Expr) -> Expr: """Returns the hyperbolic tangent of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.tanh(dfn.col("a")).alias("tanh")) - >>> result.collect_column("tanh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.tanh(dfn.col("a")).alias("tanh")) + >>> result.collect_column("tanh")[0].as_py() + 0.0 """ return Expr(f.tanh(arg.expr)) @@ -2504,12 +2488,13 @@ def first_value( null_treatment: Assign whether to respect or ignore null values. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 20, 30]}) - >>> result = df.aggregate([], [dfn.functions.first_value(dfn.col("a")).alias("v")]) - >>> result.collect_column("v")[0].as_py() - 10 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.aggregate( + ... [], [dfn.functions.first_value(dfn.col("a")).alias("v")] + ... ) + >>> result.collect_column("v")[0].as_py() + 10 """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2545,12 +2530,13 @@ def last_value( null_treatment: Assign whether to respect or ignore null values. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 20, 30]}) - >>> result = df.aggregate([], [dfn.functions.last_value(dfn.col("a")).alias("v")]) - >>> result.collect_column("v")[0].as_py() - 30 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.aggregate( + ... [], [dfn.functions.last_value(dfn.col("a")).alias("v")] + ... ) + >>> result.collect_column("v")[0].as_py() + 30 """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2588,12 +2574,13 @@ def nth_value( null_treatment: Assign whether to respect or ignore null values. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 20, 30]}) - >>> result = df.aggregate([], [dfn.functions.nth_value(dfn.col("a"), 2).alias("v")]) - >>> result.collect_column("v")[0].as_py() - 20 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.aggregate( + ... [], [dfn.functions.nth_value(dfn.col("a"), 2).alias("v")] + ... ) + >>> result.collect_column("v")[0].as_py() + 20 """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2732,14 +2719,13 @@ def lead( column names or expressions. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2, 3]}) - >>> result = df.select( - ... dfn.col("a"), dfn.functions.lead(dfn.col("a"), shift_offset=1, - ... default_value=0, order_by="a").alias("lead")) - >>> result.sort(dfn.col("a")).collect_column("lead").to_pylist() - [2, 3, 0] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.lead(dfn.col("a"), shift_offset=1, + ... default_value=0, order_by="a").alias("lead")) + >>> result.sort(dfn.col("a")).collect_column("lead").to_pylist() + [2, 3, 0] """ if not isinstance(default_value, pa.Scalar) and default_value is not None: default_value = pa.scalar(default_value) @@ -2793,14 +2779,13 @@ def lag( column names or expressions. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1, 2, 3]}) - >>> result = df.select( - ... dfn.col("a"), dfn.functions.lag(dfn.col("a"), shift_offset=1, - ... default_value=0, order_by="a").alias("lag")) - >>> result.sort(dfn.col("a")).collect_column("lag").to_pylist() - [0, 1, 2] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1, 2, 3]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.lag(dfn.col("a"), shift_offset=1, + ... default_value=0, order_by="a").alias("lag")) + >>> result.sort(dfn.col("a")).collect_column("lag").to_pylist() + [0, 1, 2] """ if not isinstance(default_value, pa.Scalar): default_value = pa.scalar(default_value) @@ -2844,13 +2829,12 @@ def row_number( column names or expressions. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 20, 30]}) - >>> result = df.select( - ... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn")) - >>> result.sort(dfn.col("a")).collect_column("rn").to_pylist() - [1, 2, 3] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn")) + >>> result.sort(dfn.col("a")).collect_column("rn").to_pylist() + [1, 2, 3] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -2893,12 +2877,13 @@ def rank( column names or expressions. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 10, 20]}) - >>> result = df.select(dfn.col("a"), dfn.functions.rank(order_by="a").alias("rnk")) - >>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist() - [1, 1, 3] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 10, 20]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.rank(order_by="a").alias("rnk") + ... ) + >>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist() + [1, 1, 3] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -2936,13 +2921,12 @@ def dense_rank( column names or expressions. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 10, 20]}) - >>> result = df.select( - ... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr")) - >>> result.sort(dfn.col("a")).collect_column("dr").to_pylist() - [1, 1, 2] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 10, 20]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr")) + >>> result.sort(dfn.col("a")).collect_column("dr").to_pylist() + [1, 1, 2] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -2982,13 +2966,12 @@ def percent_rank( Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 20, 30]}) - >>> result = df.select( - ... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr")) - >>> result.sort(dfn.col("a")).collect_column("pr").to_pylist() - [0.0, 0.5, 1.0] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr")) + >>> result.sort(dfn.col("a")).collect_column("pr").to_pylist() + [0.0, 0.5, 1.0] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -3027,17 +3010,16 @@ def cume_dist( column names or expressions. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1., 2., 2., 3.]}) - >>> result = df.select( - ... dfn.col("a"), - ... dfn.functions.cume_dist( - ... order_by="a" - ... ).alias("cd") - ... ) - >>> result.collect_column("cd").to_pylist() - [0.25..., 0.75..., 0.75..., 1.0...] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1., 2., 2., 3.]}) + >>> result = df.select( + ... dfn.col("a"), + ... dfn.functions.cume_dist( + ... order_by="a" + ... ).alias("cd") + ... ) + >>> result.collect_column("cd").to_pylist() + [0.25..., 0.75..., 0.75..., 1.0...] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) @@ -3080,13 +3062,12 @@ def ntile( column names or expressions. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [10, 20, 30, 40]}) - >>> result = df.select( - ... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt")) - >>> result.sort(dfn.col("a")).collect_column("nt").to_pylist() - [1, 1, 2, 2] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [10, 20, 30, 40]}) + >>> result = df.select( + ... dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt")) + >>> result.sort(dfn.col("a")).collect_column("nt").to_pylist() + [1, 1, 2, 2] """ partition_by_raw = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by)