From 0d0c276be9b4cf48650703b386c4c9293e6d3f58 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:58:19 -0500 Subject: [PATCH 1/4] Add docstring examples for Scalar math functions Add example usage to docstrings for Scalar math functions to improve documentation. Co-Authored-By: Claude Opus 4.6 --- python/datafusion/functions.py | 249 ++++++++++++++++++++++++++++++--- 1 file changed, 226 insertions(+), 23 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index fd116254b..9852316bb 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -484,10 +484,13 @@ def window( def abs(arg: Expr) -> Expr: """Return the absolute value of a given number. - Returns: - -------- - Expr - A new expression representing the absolute value of the input expression. + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [-1, 0, 1]}) + >>> result = df.select(dfn.functions.abs(dfn.col("a")).alias("abs")) + >>> result.collect_column("abs")[0].as_py() + 1 """ return Expr(f.abs(arg.expr)) @@ -607,12 +610,30 @@ def btrim(arg: Expr) -> Expr: def cbrt(arg: Expr) -> Expr: - """Returns the cube root of a number.""" + """Returns the cube root of a number. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [27]}) + >>> cbrt_df = df.select(dfn.functions.cbrt(dfn.col("a")).alias("cbrt")) + >>> cbrt_df.collect_column("cbrt")[0].as_py() + 3.0 + """ return Expr(f.cbrt(arg.expr)) def ceil(arg: Expr) -> Expr: - """Returns the nearest integer greater than or equal to argument.""" + """Returns the nearest integer greater than or equal to argument. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.9]}) + >>> floor_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil")) + >>> floor_df.collect_column("ceil")[0].as_py() + 2.0 + """ return Expr(f.ceil(arg.expr)) @@ -711,12 +732,32 @@ def ends_with(arg: Expr, suffix: Expr) -> Expr: def exp(arg: Expr) -> Expr: - """Returns the exponential of the argument.""" + """Returns the exponential of the argument. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.exp(dfn.col("a")).alias("exp")) + >>> result.collect_column("exp")[0].as_py() + 1.0 + """ return Expr(f.exp(arg.expr)) def factorial(arg: Expr) -> Expr: - """Returns the factorial of the argument.""" + """Returns the factorial of the argument. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [3]}) + >>> result = df.select( + ... dfn.functions.factorial(dfn.col("a")).alias("factorial") + ... ) + >>> result.collect_column("factorial")[0].as_py() + 6 + """ return Expr(f.factorial(arg.expr)) @@ -732,12 +773,30 @@ def find_in_set(string: Expr, string_list: Expr) -> Expr: def floor(arg: Expr) -> Expr: - """Returns the nearest integer less than or equal to the argument.""" + """Returns the nearest integer less than or equal to the argument. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.9]}) + >>> floor_df = df.select(dfn.functions.floor(dfn.col("a")).alias("floor")) + >>> floor_df.collect_column("floor")[0].as_py() + 1.0 + """ return Expr(f.floor(arg.expr)) def gcd(x: Expr, y: Expr) -> Expr: - """Returns the greatest common divisor.""" + """Returns the greatest common divisor. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [12], "b": [8]}) + >>> result = df.select(dfn.functions.gcd(dfn.col("a"), dfn.col("b")).alias("gcd")) + >>> result.collect_column("gcd")[0].as_py() + 4 + """ return Expr(f.gcd(x.expr, y.expr)) @@ -759,12 +818,30 @@ def instr(string: Expr, substring: Expr) -> Expr: def iszero(arg: Expr) -> Expr: - """Returns true if a given number is +0.0 or -0.0 otherwise returns false.""" + """Returns true if a given number is +0.0 or -0.0 otherwise returns false. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0, 1.0]}) + >>> result = df.select(dfn.functions.iszero(dfn.col("a")).alias("iz")) + >>> result.collect_column("iz")[0].as_py() + True + """ return Expr(f.iszero(arg.expr)) def lcm(x: Expr, y: Expr) -> Expr: - """Returns the least common multiple.""" + """Returns the least common multiple. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [4], "b": [6]}) + >>> result = df.select(dfn.functions.lcm(dfn.col("a"), dfn.col("b")).alias("lcm")) + >>> result.collect_column("lcm")[0].as_py() + 12 + """ return Expr(f.lcm(x.expr, y.expr)) @@ -779,22 +856,58 @@ def levenshtein(string1: Expr, string2: Expr) -> Expr: def ln(arg: Expr) -> Expr: - """Returns the natural logarithm (base e) of the argument.""" + """Returns the natural logarithm (base e) of the argument. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0]}) + >>> result = df.select(dfn.functions.ln(dfn.col("a")).alias("ln")) + >>> result.collect_column("ln")[0].as_py() + 0.0 + """ return Expr(f.ln(arg.expr)) def log(base: Expr, num: Expr) -> Expr: - """Returns the logarithm of a number for a particular ``base``.""" + """Returns the logarithm of a number for a particular ``base``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [100.0]}) + >>> result = df.select(dfn.functions.log(dfn.lit(10.0), dfn.col("a")).alias("log")) + >>> result.collect_column("log")[0].as_py() + 2.0 + """ return Expr(f.log(base.expr, num.expr)) def log10(arg: Expr) -> Expr: - """Base 10 logarithm of the argument.""" + """Base 10 logarithm of the argument. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [100.0]}) + >>> result = df.select(dfn.functions.log10(dfn.col("a")).alias("log10")) + >>> result.collect_column("log10")[0].as_py() + 2.0 + """ return Expr(f.log10(arg.expr)) def log2(arg: Expr) -> Expr: - """Base 2 logarithm of the argument.""" + """Base 2 logarithm of the argument. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [8.0]}) + >>> result = df.select(dfn.functions.log2(dfn.col("a")).alias("log2")) + >>> result.collect_column("log2")[0].as_py() + 3.0 + """ return Expr(f.log2(arg.expr)) @@ -825,7 +938,19 @@ def md5(arg: Expr) -> Expr: def nanvl(x: Expr, y: Expr) -> Expr: - """Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``.""" + """Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [np.nan, 1.0], "b": [0.0, 0.0]}) + >>> nanvl_df = df.select( + ... dfn.functions.nanvl(dfn.col("a"), dfn.col("b")).alias("nanvl")) + >>> nanvl_df.collect_column("nanvl")[0].as_py() + 0.0 + >>> nanvl_df.collect_column("nanvl")[1].as_py() + 1.0 + """ return Expr(f.nanvl(x.expr, y.expr)) @@ -853,7 +978,21 @@ def overlay( def pi() -> Expr: - """Returns an approximate value of π.""" + """Returns an approximate value of π. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> import builtins + >>> result = df.select( + ... dfn.functions.pi().alias("pi") + ... ) + >>> builtins.round( + ... result.collect_column("pi")[0].as_py(), 5 + ... ) + 3.14159 + """ return Expr(f.pi()) @@ -866,7 +1005,16 @@ def position(string: Expr, substring: Expr) -> Expr: def power(base: Expr, exponent: Expr) -> Expr: - """Returns ``base`` raised to the power of ``exponent``.""" + """Returns ``base`` raised to the power of ``exponent``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [2.0]}) + >>> result = df.select(dfn.functions.power(dfn.col("a"), dfn.lit(3.0)).alias("pow")) + >>> result.collect_column("pow")[0].as_py() + 8.0 + """ return Expr(f.power(base.expr, exponent.expr)) @@ -874,6 +1022,14 @@ def pow(base: Expr, exponent: Expr) -> Expr: """Returns ``base`` raised to the power of ``exponent``. This is an alias of :py:func:`power`. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [3.0]}) + >>> result = df.select(dfn.functions.pow(dfn.col("a"), dfn.lit(2.0)).alias("pow")) + >>> result.collect_column("pow")[0].as_py() + 9.0 """ return power(base, exponent) @@ -1008,6 +1164,14 @@ def round(value: Expr, decimal_places: Expr | None = None) -> Expr: If the optional ``decimal_places`` is specified, round to the nearest number of decimal places. You can specify a negative number of decimal places. For example ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.567]}) + >>> result = df.select(dfn.functions.round(dfn.col("a"), dfn.lit(2)).alias("r")) + >>> result.collect_column("r")[0].as_py() + 1.57 """ if decimal_places is None: decimal_places = Expr.literal(0) @@ -1050,7 +1214,16 @@ def sha512(arg: Expr) -> Expr: def signum(arg: Expr) -> Expr: - """Returns the sign of the argument (-1, 0, +1).""" + """Returns the sign of the argument (-1, 0, +1). + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [-5.0, 0.0, 5.0]}) + >>> result = df.select(dfn.functions.signum(dfn.col("a")).alias("s")) + >>> result.collect_column("s").to_pylist() + [-1.0, 0.0, 1.0] + """ return Expr(f.signum(arg.expr)) @@ -1092,7 +1265,16 @@ def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr: def sqrt(arg: Expr) -> Expr: - """Returns the square root of the argument.""" + """Returns the square root of the argument. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [9.0]}) + >>> result = df.select(dfn.functions.sqrt(dfn.col("a")).alias("sqrt")) + >>> result.collect_column("sqrt")[0].as_py() + 3.0 + """ return Expr(f.sqrt(arg.expr)) @@ -1331,7 +1513,16 @@ def trim(arg: Expr) -> Expr: def trunc(num: Expr, precision: Expr | None = None) -> Expr: - """Truncate the number toward zero with optional precision.""" + """Truncate the number toward zero with optional precision. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.567]}) + >>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t")) + >>> result.collect_column("t")[0].as_py() + 1.0 + """ if precision is not None: return Expr(f.trunc(num.expr, precision.expr)) return Expr(f.trunc(num.expr)) @@ -1408,7 +1599,19 @@ def arrow_cast(expr: Expr, data_type: Expr) -> Expr: def random() -> Expr: - """Returns a random value in the range ``0.0 <= x < 1.0``.""" + """Returns a random value in the range ``0.0 <= x < 1.0``. + + Examples: + --------- + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> result = df.select( + ... dfn.functions.random().alias("r") + ... ) + >>> val = result.collect_column("r")[0].as_py() + >>> 0.0 <= val < 1.0 + True + """ return Expr(f.random()) From 63faeca8e3700f311b29aa22afba9f742fffa613 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 11:04:24 -0400 Subject: [PATCH 2/4] Fix copy past error on name --- python/datafusion/functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 9852316bb..2ff9d66a7 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -630,8 +630,8 @@ def ceil(arg: Expr) -> Expr: --------- >>> ctx = dfn.SessionContext() >>> df = ctx.from_pydict({"a": [1.9]}) - >>> floor_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil")) - >>> floor_df.collect_column("ceil")[0].as_py() + >>> ceil_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil")) + >>> ceil_df.collect_column("ceil")[0].as_py() 2.0 """ return Expr(f.ceil(arg.expr)) From 43700905cfede8069edc44b46b14de9dbbc37a11 Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 11:05:49 -0400 Subject: [PATCH 3/4] Remove example from alias --- python/datafusion/functions.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 2ff9d66a7..566020182 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -1022,14 +1022,6 @@ def pow(base: Expr, exponent: Expr) -> Expr: """Returns ``base`` raised to the power of ``exponent``. This is an alias of :py:func:`power`. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [3.0]}) - >>> result = df.select(dfn.functions.pow(dfn.col("a"), dfn.lit(2.0)).alias("pow")) - >>> result.collect_column("pow")[0].as_py() - 9.0 """ return power(base, exponent) From f70c777a5ce8d7c569a37d86cfe39f31a0c6b80c Mon Sep 17 00:00:00 2001 From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com> Date: Sat, 14 Mar 2026 11:10:39 -0400 Subject: [PATCH 4/4] Examples google docstyle --- python/datafusion/functions.py | 469 ++++++++++++++++----------------- 1 file changed, 220 insertions(+), 249 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 566020182..fc4f94df7 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -485,12 +485,11 @@ def abs(arg: Expr) -> Expr: """Return the absolute value of a given number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [-1, 0, 1]}) - >>> result = df.select(dfn.functions.abs(dfn.col("a")).alias("abs")) - >>> result.collect_column("abs")[0].as_py() - 1 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [-1, 0, 1]}) + >>> result = df.select(dfn.functions.abs(dfn.col("a")).alias("abs")) + >>> result.collect_column("abs")[0].as_py() + 1 """ return Expr(f.abs(arg.expr)) @@ -499,12 +498,11 @@ def acos(arg: Expr) -> Expr: """Returns the arc cosine or inverse cosine of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.acos(dfn.col("a")).alias("acos")) - >>> result.collect_column("acos")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0]}) + >>> result = df.select(dfn.functions.acos(dfn.col("a")).alias("acos")) + >>> result.collect_column("acos")[0].as_py() + 0.0 """ return Expr(f.acos(arg.expr)) @@ -513,12 +511,11 @@ def acosh(arg: Expr) -> Expr: """Returns inverse hyperbolic cosine. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.acosh(dfn.col("a")).alias("acosh")) - >>> result.collect_column("acosh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0]}) + >>> result = df.select(dfn.functions.acosh(dfn.col("a")).alias("acosh")) + >>> result.collect_column("acosh")[0].as_py() + 0.0 """ return Expr(f.acosh(arg.expr)) @@ -532,12 +529,11 @@ def asin(arg: Expr) -> Expr: """Returns the arc sine or inverse sine of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.asin(dfn.col("a")).alias("asin")) - >>> result.collect_column("asin")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.asin(dfn.col("a")).alias("asin")) + >>> result.collect_column("asin")[0].as_py() + 0.0 """ return Expr(f.asin(arg.expr)) @@ -546,12 +542,11 @@ def asinh(arg: Expr) -> Expr: """Returns inverse hyperbolic sine. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.asinh(dfn.col("a")).alias("asinh")) - >>> result.collect_column("asinh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.asinh(dfn.col("a")).alias("asinh")) + >>> result.collect_column("asinh")[0].as_py() + 0.0 """ return Expr(f.asinh(arg.expr)) @@ -560,12 +555,11 @@ def atan(arg: Expr) -> Expr: """Returns inverse tangent of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.atan(dfn.col("a")).alias("atan")) - >>> result.collect_column("atan")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.atan(dfn.col("a")).alias("atan")) + >>> result.collect_column("atan")[0].as_py() + 0.0 """ return Expr(f.atan(arg.expr)) @@ -574,12 +568,11 @@ def atanh(arg: Expr) -> Expr: """Returns inverse hyperbolic tangent. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.atanh(dfn.col("a")).alias("atanh")) - >>> result.collect_column("atanh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.atanh(dfn.col("a")).alias("atanh")) + >>> result.collect_column("atanh")[0].as_py() + 0.0 """ return Expr(f.atanh(arg.expr)) @@ -588,13 +581,12 @@ def atan2(y: Expr, x: Expr) -> Expr: """Returns inverse tangent of a division given in the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"y": [0.0], "x": [1.0]}) - >>> result = df.select( - ... dfn.functions.atan2(dfn.col("y"), dfn.col("x")).alias("atan2")) - >>> result.collect_column("atan2")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"y": [0.0], "x": [1.0]}) + >>> result = df.select( + ... dfn.functions.atan2(dfn.col("y"), dfn.col("x")).alias("atan2")) + >>> result.collect_column("atan2")[0].as_py() + 0.0 """ return Expr(f.atan2(y.expr, x.expr)) @@ -613,12 +605,11 @@ def cbrt(arg: Expr) -> Expr: """Returns the cube root of a number. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [27]}) - >>> cbrt_df = df.select(dfn.functions.cbrt(dfn.col("a")).alias("cbrt")) - >>> cbrt_df.collect_column("cbrt")[0].as_py() - 3.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [27]}) + >>> cbrt_df = df.select(dfn.functions.cbrt(dfn.col("a")).alias("cbrt")) + >>> cbrt_df.collect_column("cbrt")[0].as_py() + 3.0 """ return Expr(f.cbrt(arg.expr)) @@ -627,12 +618,11 @@ def ceil(arg: Expr) -> Expr: """Returns the nearest integer greater than or equal to argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.9]}) - >>> ceil_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil")) - >>> ceil_df.collect_column("ceil")[0].as_py() - 2.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.9]}) + >>> ceil_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil")) + >>> ceil_df.collect_column("ceil")[0].as_py() + 2.0 """ return Expr(f.ceil(arg.expr)) @@ -667,12 +657,11 @@ def cos(arg: Expr) -> Expr: """Returns the cosine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,-1,1]}) - >>> cos_df = df.select(dfn.functions.cos(dfn.col("a")).alias("cos")) - >>> cos_df.collect_column("cos")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,-1,1]}) + >>> cos_df = df.select(dfn.functions.cos(dfn.col("a")).alias("cos")) + >>> cos_df.collect_column("cos")[0].as_py() + 1.0 """ return Expr(f.cos(arg.expr)) @@ -681,12 +670,11 @@ def cosh(arg: Expr) -> Expr: """Returns the hyperbolic cosine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,-1,1]}) - >>> cosh_df = df.select(dfn.functions.cosh(dfn.col("a")).alias("cosh")) - >>> cosh_df.collect_column("cosh")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,-1,1]}) + >>> cosh_df = df.select(dfn.functions.cosh(dfn.col("a")).alias("cosh")) + >>> cosh_df.collect_column("cosh")[0].as_py() + 1.0 """ return Expr(f.cosh(arg.expr)) @@ -695,18 +683,17 @@ def cot(arg: Expr) -> Expr: """Returns the cotangent of the argument. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [pi / 4]}) - >>> import builtins - >>> result = df.select( - ... dfn.functions.cot(dfn.col("a")).alias("cot") - ... ) - >>> builtins.round( - ... result.collect_column("cot")[0].as_py(), 1 - ... ) - 1.0 + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [pi / 4]}) + >>> import builtins + >>> result = df.select( + ... dfn.functions.cot(dfn.col("a")).alias("cot") + ... ) + >>> builtins.round( + ... result.collect_column("cot")[0].as_py(), 1 + ... ) + 1.0 """ return Expr(f.cot(arg.expr)) @@ -715,13 +702,12 @@ def degrees(arg: Expr) -> Expr: """Converts the argument from radians to degrees. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,pi,2*pi]}) - >>> deg_df = df.select(dfn.functions.degrees(dfn.col("a")).alias("deg")) - >>> deg_df.collect_column("deg")[2].as_py() - 360.0 + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0,pi,2*pi]}) + >>> deg_df = df.select(dfn.functions.degrees(dfn.col("a")).alias("deg")) + >>> deg_df.collect_column("deg")[2].as_py() + 360.0 """ return Expr(f.degrees(arg.expr)) @@ -735,12 +721,11 @@ def exp(arg: Expr) -> Expr: """Returns the exponential of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.exp(dfn.col("a")).alias("exp")) - >>> result.collect_column("exp")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.exp(dfn.col("a")).alias("exp")) + >>> result.collect_column("exp")[0].as_py() + 1.0 """ return Expr(f.exp(arg.expr)) @@ -749,14 +734,13 @@ def factorial(arg: Expr) -> Expr: """Returns the factorial of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [3]}) - >>> result = df.select( - ... dfn.functions.factorial(dfn.col("a")).alias("factorial") - ... ) - >>> result.collect_column("factorial")[0].as_py() - 6 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [3]}) + >>> result = df.select( + ... dfn.functions.factorial(dfn.col("a")).alias("factorial") + ... ) + >>> result.collect_column("factorial")[0].as_py() + 6 """ return Expr(f.factorial(arg.expr)) @@ -776,12 +760,11 @@ def floor(arg: Expr) -> Expr: """Returns the nearest integer less than or equal to the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.9]}) - >>> floor_df = df.select(dfn.functions.floor(dfn.col("a")).alias("floor")) - >>> floor_df.collect_column("floor")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.9]}) + >>> floor_df = df.select(dfn.functions.floor(dfn.col("a")).alias("floor")) + >>> floor_df.collect_column("floor")[0].as_py() + 1.0 """ return Expr(f.floor(arg.expr)) @@ -790,12 +773,13 @@ def gcd(x: Expr, y: Expr) -> Expr: """Returns the greatest common divisor. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [12], "b": [8]}) - >>> result = df.select(dfn.functions.gcd(dfn.col("a"), dfn.col("b")).alias("gcd")) - >>> result.collect_column("gcd")[0].as_py() - 4 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [12], "b": [8]}) + >>> result = df.select( + ... dfn.functions.gcd(dfn.col("a"), dfn.col("b")).alias("gcd") + ... ) + >>> result.collect_column("gcd")[0].as_py() + 4 """ return Expr(f.gcd(x.expr, y.expr)) @@ -821,12 +805,11 @@ def iszero(arg: Expr) -> Expr: """Returns true if a given number is +0.0 or -0.0 otherwise returns false. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0, 1.0]}) - >>> result = df.select(dfn.functions.iszero(dfn.col("a")).alias("iz")) - >>> result.collect_column("iz")[0].as_py() - True + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0, 1.0]}) + >>> result = df.select(dfn.functions.iszero(dfn.col("a")).alias("iz")) + >>> result.collect_column("iz")[0].as_py() + True """ return Expr(f.iszero(arg.expr)) @@ -835,12 +818,13 @@ def lcm(x: Expr, y: Expr) -> Expr: """Returns the least common multiple. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [4], "b": [6]}) - >>> result = df.select(dfn.functions.lcm(dfn.col("a"), dfn.col("b")).alias("lcm")) - >>> result.collect_column("lcm")[0].as_py() - 12 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [4], "b": [6]}) + >>> result = df.select( + ... dfn.functions.lcm(dfn.col("a"), dfn.col("b")).alias("lcm") + ... ) + >>> result.collect_column("lcm")[0].as_py() + 12 """ return Expr(f.lcm(x.expr, y.expr)) @@ -859,12 +843,11 @@ def ln(arg: Expr) -> Expr: """Returns the natural logarithm (base e) of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.ln(dfn.col("a")).alias("ln")) - >>> result.collect_column("ln")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.0]}) + >>> result = df.select(dfn.functions.ln(dfn.col("a")).alias("ln")) + >>> result.collect_column("ln")[0].as_py() + 0.0 """ return Expr(f.ln(arg.expr)) @@ -873,12 +856,13 @@ def log(base: Expr, num: Expr) -> Expr: """Returns the logarithm of a number for a particular ``base``. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [100.0]}) - >>> result = df.select(dfn.functions.log(dfn.lit(10.0), dfn.col("a")).alias("log")) - >>> result.collect_column("log")[0].as_py() - 2.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [100.0]}) + >>> result = df.select( + ... dfn.functions.log(dfn.lit(10.0), dfn.col("a")).alias("log") + ... ) + >>> result.collect_column("log")[0].as_py() + 2.0 """ return Expr(f.log(base.expr, num.expr)) @@ -887,12 +871,11 @@ def log10(arg: Expr) -> Expr: """Base 10 logarithm of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [100.0]}) - >>> result = df.select(dfn.functions.log10(dfn.col("a")).alias("log10")) - >>> result.collect_column("log10")[0].as_py() - 2.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [100.0]}) + >>> result = df.select(dfn.functions.log10(dfn.col("a")).alias("log10")) + >>> result.collect_column("log10")[0].as_py() + 2.0 """ return Expr(f.log10(arg.expr)) @@ -901,12 +884,11 @@ def log2(arg: Expr) -> Expr: """Base 2 logarithm of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [8.0]}) - >>> result = df.select(dfn.functions.log2(dfn.col("a")).alias("log2")) - >>> result.collect_column("log2")[0].as_py() - 3.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [8.0]}) + >>> result = df.select(dfn.functions.log2(dfn.col("a")).alias("log2")) + >>> result.collect_column("log2")[0].as_py() + 3.0 """ return Expr(f.log2(arg.expr)) @@ -941,15 +923,14 @@ def nanvl(x: Expr, y: Expr) -> Expr: """Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [np.nan, 1.0], "b": [0.0, 0.0]}) - >>> nanvl_df = df.select( - ... dfn.functions.nanvl(dfn.col("a"), dfn.col("b")).alias("nanvl")) - >>> nanvl_df.collect_column("nanvl")[0].as_py() - 0.0 - >>> nanvl_df.collect_column("nanvl")[1].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [np.nan, 1.0], "b": [0.0, 0.0]}) + >>> nanvl_df = df.select( + ... dfn.functions.nanvl(dfn.col("a"), dfn.col("b")).alias("nanvl")) + >>> nanvl_df.collect_column("nanvl")[0].as_py() + 0.0 + >>> nanvl_df.collect_column("nanvl")[1].as_py() + 1.0 """ return Expr(f.nanvl(x.expr, y.expr)) @@ -981,17 +962,16 @@ def pi() -> Expr: """Returns an approximate value of π. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1]}) - >>> import builtins - >>> result = df.select( - ... dfn.functions.pi().alias("pi") - ... ) - >>> builtins.round( - ... result.collect_column("pi")[0].as_py(), 5 - ... ) - 3.14159 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> import builtins + >>> result = df.select( + ... dfn.functions.pi().alias("pi") + ... ) + >>> builtins.round( + ... result.collect_column("pi")[0].as_py(), 5 + ... ) + 3.14159 """ return Expr(f.pi()) @@ -1008,12 +988,13 @@ def power(base: Expr, exponent: Expr) -> Expr: """Returns ``base`` raised to the power of ``exponent``. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [2.0]}) - >>> result = df.select(dfn.functions.power(dfn.col("a"), dfn.lit(3.0)).alias("pow")) - >>> result.collect_column("pow")[0].as_py() - 8.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [2.0]}) + >>> result = df.select( + ... dfn.functions.power(dfn.col("a"), dfn.lit(3.0)).alias("pow") + ... ) + >>> result.collect_column("pow")[0].as_py() + 8.0 """ return Expr(f.power(base.expr, exponent.expr)) @@ -1030,18 +1011,17 @@ def radians(arg: Expr) -> Expr: """Converts the argument from degrees to radians. Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [180.0]}) - >>> import builtins - >>> result = df.select( - ... dfn.functions.radians(dfn.col("a")).alias("rad") - ... ) - >>> builtins.round( - ... result.collect_column("rad")[0].as_py(), 6 - ... ) - 3.141593 + >>> from math import pi + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [180.0]}) + >>> import builtins + >>> result = df.select( + ... dfn.functions.radians(dfn.col("a")).alias("rad") + ... ) + >>> builtins.round( + ... result.collect_column("rad")[0].as_py(), 6 + ... ) + 3.141593 """ return Expr(f.radians(arg.expr)) @@ -1158,12 +1138,11 @@ def round(value: Expr, decimal_places: Expr | None = None) -> Expr: ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.567]}) - >>> result = df.select(dfn.functions.round(dfn.col("a"), dfn.lit(2)).alias("r")) - >>> result.collect_column("r")[0].as_py() - 1.57 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.567]}) + >>> result = df.select(dfn.functions.round(dfn.col("a"), dfn.lit(2)).alias("r")) + >>> result.collect_column("r")[0].as_py() + 1.57 """ if decimal_places is None: decimal_places = Expr.literal(0) @@ -1209,12 +1188,11 @@ def signum(arg: Expr) -> Expr: """Returns the sign of the argument (-1, 0, +1). Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [-5.0, 0.0, 5.0]}) - >>> result = df.select(dfn.functions.signum(dfn.col("a")).alias("s")) - >>> result.collect_column("s").to_pylist() - [-1.0, 0.0, 1.0] + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [-5.0, 0.0, 5.0]}) + >>> result = df.select(dfn.functions.signum(dfn.col("a")).alias("s")) + >>> result.collect_column("s").to_pylist() + [-1.0, 0.0, 1.0] """ return Expr(f.signum(arg.expr)) @@ -1223,12 +1201,11 @@ def sin(arg: Expr) -> Expr: """Returns the sine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.sin(dfn.col("a")).alias("sin")) - >>> result.collect_column("sin")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.sin(dfn.col("a")).alias("sin")) + >>> result.collect_column("sin")[0].as_py() + 0.0 """ return Expr(f.sin(arg.expr)) @@ -1237,12 +1214,11 @@ def sinh(arg: Expr) -> Expr: """Returns the hyperbolic sine of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.sinh(dfn.col("a")).alias("sinh")) - >>> result.collect_column("sinh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.sinh(dfn.col("a")).alias("sinh")) + >>> result.collect_column("sinh")[0].as_py() + 0.0 """ return Expr(f.sinh(arg.expr)) @@ -1260,12 +1236,11 @@ def sqrt(arg: Expr) -> Expr: """Returns the square root of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [9.0]}) - >>> result = df.select(dfn.functions.sqrt(dfn.col("a")).alias("sqrt")) - >>> result.collect_column("sqrt")[0].as_py() - 3.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [9.0]}) + >>> result = df.select(dfn.functions.sqrt(dfn.col("a")).alias("sqrt")) + >>> result.collect_column("sqrt")[0].as_py() + 3.0 """ return Expr(f.sqrt(arg.expr)) @@ -1303,12 +1278,11 @@ def tan(arg: Expr) -> Expr: """Returns the tangent of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.tan(dfn.col("a")).alias("tan")) - >>> result.collect_column("tan")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.tan(dfn.col("a")).alias("tan")) + >>> result.collect_column("tan")[0].as_py() + 0.0 """ return Expr(f.tan(arg.expr)) @@ -1317,12 +1291,11 @@ def tanh(arg: Expr) -> Expr: """Returns the hyperbolic tangent of the argument. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.tanh(dfn.col("a")).alias("tanh")) - >>> result.collect_column("tanh")[0].as_py() - 0.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [0.0]}) + >>> result = df.select(dfn.functions.tanh(dfn.col("a")).alias("tanh")) + >>> result.collect_column("tanh")[0].as_py() + 0.0 """ return Expr(f.tanh(arg.expr)) @@ -1508,12 +1481,11 @@ def trunc(num: Expr, precision: Expr | None = None) -> Expr: """Truncate the number toward zero with optional precision. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.567]}) - >>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t")) - >>> result.collect_column("t")[0].as_py() - 1.0 + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1.567]}) + >>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t")) + >>> result.collect_column("t")[0].as_py() + 1.0 """ if precision is not None: return Expr(f.trunc(num.expr, precision.expr)) @@ -1594,15 +1566,14 @@ def random() -> Expr: """Returns a random value in the range ``0.0 <= x < 1.0``. Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1]}) - >>> result = df.select( - ... dfn.functions.random().alias("r") - ... ) - >>> val = result.collect_column("r")[0].as_py() - >>> 0.0 <= val < 1.0 - True + >>> ctx = dfn.SessionContext() + >>> df = ctx.from_pydict({"a": [1]}) + >>> result = df.select( + ... dfn.functions.random().alias("r") + ... ) + >>> val = result.collect_column("r")[0].as_py() + >>> 0.0 <= val < 1.0 + True """ return Expr(f.random())