-
Notifications
You must be signed in to change notification settings - Fork 151
Add docstring examples for Scalar regex, crypto, struct and other #1422
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -637,7 +637,17 @@ def chr(arg: Expr) -> Expr: | |
|
|
||
|
|
||
| def coalesce(*args: Expr) -> Expr: | ||
| """Returns the value of the first expr in ``args`` which is not NULL.""" | ||
| """Returns the value of the first expr in ``args`` which is not NULL. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": [None, 1], "b": [2, 3]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.coalesce(dfn.col("a"), dfn.col("b")).alias("c")) | ||
| >>> result.collect_column("c")[0].as_py() | ||
| 2 | ||
| """ | ||
| args = [arg.expr for arg in args] | ||
| return Expr(f.coalesce(*args)) | ||
|
|
||
|
|
@@ -820,7 +830,16 @@ def ltrim(arg: Expr) -> Expr: | |
|
|
||
|
|
||
| def md5(arg: Expr) -> Expr: | ||
| """Computes an MD5 128-bit checksum for a string expression.""" | ||
| """Computes an MD5 128-bit checksum for a string expression. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello"]}) | ||
| >>> result = df.select(dfn.functions.md5(dfn.col("a")).alias("md5")) | ||
| >>> result.collect_column("md5")[0].as_py() | ||
| '5d41402abc4b2a76b9719d911017c592' | ||
| """ | ||
| return Expr(f.md5(arg.expr)) | ||
|
|
||
|
|
||
|
|
@@ -830,7 +849,18 @@ def nanvl(x: Expr, y: Expr) -> Expr: | |
|
|
||
|
|
||
| def nvl(x: Expr, y: Expr) -> Expr: | ||
| """Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``.""" | ||
| """Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": [None, 1], "b": [0, 0]}) | ||
| >>> nvl_df = df.select(dfn.functions.nvl(dfn.col("a"), dfn.col("b")).alias("nvl")) | ||
| >>> nvl_df.collect_column("nvl")[0].as_py() | ||
| 0 | ||
| >>> nvl_df.collect_column("nvl")[1].as_py() | ||
| 1 | ||
| """ | ||
| return Expr(f.nvl(x.expr, y.expr)) | ||
|
|
||
|
|
||
|
|
@@ -899,21 +929,45 @@ def radians(arg: Expr) -> Expr: | |
|
|
||
|
|
||
| def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: | ||
| """Find if any regular expression (regex) matches exist. | ||
| r"""Find if any regular expression (regex) matches exist. | ||
|
|
||
| Tests a string using a regular expression returning true if at least one match, | ||
| false otherwise. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello123"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.regexp_like( | ||
| ... dfn.col("a"), dfn.lit("\\d+") | ||
| ... ).alias("m") | ||
| ... ) | ||
| >>> result.collect_column("m")[0].as_py() | ||
| True | ||
| """ | ||
| if flags is not None: | ||
| flags = flags.expr | ||
| return Expr(f.regexp_like(string.expr, regex.expr, flags)) | ||
|
|
||
|
|
||
| def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: | ||
| """Perform regular expression (regex) matching. | ||
| r"""Perform regular expression (regex) matching. | ||
|
|
||
| Returns an array with each element containing the leftmost-first match of the | ||
| corresponding index in ``regex`` to string in ``string``. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello 42 world"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.regexp_match( | ||
| ... dfn.col("a"), dfn.lit("(\\d+)") | ||
| ... ).alias("m") | ||
| ... ) | ||
| >>> result.collect_column("m")[0].as_py() | ||
| ['42'] | ||
| """ | ||
| if flags is not None: | ||
| flags = flags.expr | ||
|
|
@@ -923,13 +977,26 @@ def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: | |
| def regexp_replace( | ||
| string: Expr, pattern: Expr, replacement: Expr, flags: Expr | None = None | ||
| ) -> Expr: | ||
| """Replaces substring(s) matching a PCRE-like regular expression. | ||
| r"""Replaces substring(s) matching a PCRE-like regular expression. | ||
|
|
||
| The full list of supported features and syntax can be found at | ||
| <https://docs.rs/regex/latest/regex/#syntax> | ||
|
|
||
| Supported flags with the addition of 'g' can be found at | ||
| <https://docs.rs/regex/latest/regex/#grouping-and-flags> | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello 42"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.regexp_replace( | ||
| ... dfn.col("a"), dfn.lit("\\d+"), | ||
| ... dfn.lit("XX") | ||
| ... ).alias("r") | ||
| ... ) | ||
| >>> result.collect_column("r")[0].as_py() | ||
| 'hello XX' | ||
| """ | ||
| if flags is not None: | ||
| flags = flags.expr | ||
|
|
@@ -943,6 +1010,15 @@ def regexp_count( | |
|
|
||
| Optional start position (the first position is 1) to search for the regular | ||
| expression. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["abcabc"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.regexp_count(dfn.col("a"), dfn.lit("abc")).alias("c")) | ||
| >>> result.collect_column("c")[0].as_py() | ||
| 2 | ||
| """ | ||
| if flags is not None: | ||
| flags = flags.expr | ||
|
|
@@ -958,12 +1034,24 @@ def regexp_instr( | |
| flags: Expr | None = None, | ||
| sub_expr: Expr | None = None, | ||
| ) -> Expr: | ||
| """Returns the position of a regular expression match in a string. | ||
| r"""Returns the position of a regular expression match in a string. | ||
|
|
||
| Searches ``values`` for the ``n``-th occurrence of ``regex``, starting at position | ||
| ``start`` (the first position is 1). Returns the starting or ending position based | ||
| on ``end_position``. Use ``flags`` to control regex behavior and ``sub_expr`` to | ||
| return the position of a specific capture group instead of the entire match. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello 42 world"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.regexp_instr( | ||
| ... dfn.col("a"), dfn.lit("\\d+") | ||
| ... ).alias("pos") | ||
| ... ) | ||
| >>> result.collect_column("pos")[0].as_py() | ||
| 7 | ||
| """ | ||
| start = start.expr if start is not None else None | ||
| n = n.expr if n is not None else None | ||
|
|
@@ -1030,22 +1118,66 @@ def rtrim(arg: Expr) -> Expr: | |
|
|
||
|
|
||
| def sha224(arg: Expr) -> Expr: | ||
| """Computes the SHA-224 hash of a binary string.""" | ||
| """Computes the SHA-224 hash of a binary string. | ||
|
Comment on lines
1120
to
+1121
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These four examples ( That would reduce doc drift and make the examples more informative. |
||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.sha224(dfn.col("a")).alias("h") | ||
| ... ) | ||
| >>> len(result.collect_column("h")[0].as_py()) > 0 | ||
| True | ||
| """ | ||
| return Expr(f.sha224(arg.expr)) | ||
|
|
||
|
|
||
| def sha256(arg: Expr) -> Expr: | ||
| """Computes the SHA-256 hash of a binary string.""" | ||
| """Computes the SHA-256 hash of a binary string. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.sha256(dfn.col("a")).alias("h") | ||
| ... ) | ||
| >>> len(result.collect_column("h")[0].as_py()) > 0 | ||
| True | ||
| """ | ||
| return Expr(f.sha256(arg.expr)) | ||
|
|
||
|
|
||
| def sha384(arg: Expr) -> Expr: | ||
| """Computes the SHA-384 hash of a binary string.""" | ||
| """Computes the SHA-384 hash of a binary string. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.sha384(dfn.col("a")).alias("h") | ||
| ... ) | ||
| >>> len(result.collect_column("h")[0].as_py()) > 0 | ||
| True | ||
| """ | ||
| return Expr(f.sha384(arg.expr)) | ||
|
|
||
|
|
||
| def sha512(arg: Expr) -> Expr: | ||
| """Computes the SHA-512 hash of a binary string.""" | ||
| """Computes the SHA-512 hash of a binary string. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": ["hello"]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.sha512(dfn.col("a")).alias("h") | ||
| ... ) | ||
| >>> len(result.collect_column("h")[0].as_py()) > 0 | ||
| True | ||
| """ | ||
| return Expr(f.sha512(arg.expr)) | ||
|
|
||
|
|
||
|
|
@@ -1370,18 +1502,55 @@ def range(start: Expr, stop: Expr, step: Expr) -> Expr: | |
|
|
||
|
|
||
| def uuid() -> Expr: | ||
| """Returns uuid v4 as a string value.""" | ||
| """Returns uuid v4 as a string value. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": [1]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.uuid().alias("u") | ||
| ... ) | ||
| >>> len(result.collect_column("u")[0].as_py()) == 36 | ||
| True | ||
| """ | ||
| return Expr(f.uuid()) | ||
|
|
||
|
|
||
| def struct(*args: Expr) -> Expr: | ||
| """Returns a struct with the given arguments.""" | ||
| """Returns a struct with the given arguments. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": [1], "b": [2]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.struct( | ||
| ... dfn.col("a"), dfn.col("b") | ||
| ... ).alias("s") | ||
| ... ) | ||
| >>> result.collect_column("s")[0].as_py() == {"c0": 1, "c1": 2} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this example call out why the keys are |
||
| True | ||
| """ | ||
| args = [arg.expr for arg in args] | ||
| return Expr(f.struct(*args)) | ||
|
|
||
|
|
||
| def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: | ||
| """Returns a struct with the given names and arguments pairs.""" | ||
| """Returns a struct with the given names and arguments pairs. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": [1]}) | ||
| >>> result = df.select( | ||
| ... dfn.functions.named_struct( | ||
| ... [("x", dfn.lit(10)), ("y", dfn.lit(20))] | ||
| ... ).alias("s") | ||
| ... ) | ||
| >>> result.collect_column("s")[0].as_py() == {"x": 10, "y": 20} | ||
| True | ||
| """ | ||
| name_pair_exprs = [ | ||
| [Expr.literal(pa.scalar(pair[0], type=pa.string())), pair[1]] | ||
| for pair in name_pairs | ||
|
|
@@ -1398,12 +1567,31 @@ def from_unixtime(arg: Expr) -> Expr: | |
|
|
||
|
|
||
| def arrow_typeof(arg: Expr) -> Expr: | ||
| """Returns the Arrow type of the expression.""" | ||
| """Returns the Arrow type of the expression. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": [1]}) | ||
| >>> result = df.select(dfn.functions.arrow_typeof(dfn.col("a")).alias("t")) | ||
| >>> result.collect_column("t")[0].as_py() | ||
| 'Int64' | ||
| """ | ||
| return Expr(f.arrow_typeof(arg.expr)) | ||
|
|
||
|
|
||
| def arrow_cast(expr: Expr, data_type: Expr) -> Expr: | ||
| """Casts an expression to a specified data type.""" | ||
| """Casts an expression to a specified data type. | ||
|
|
||
| Examples: | ||
| --------- | ||
| >>> ctx = dfn.SessionContext() | ||
| >>> df = ctx.from_pydict({"a": [1]}) | ||
| >>> data_type = dfn.string_literal("Float64") | ||
| >>> result = df.select(dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c")) | ||
| >>> result.collect_column("c")[0].as_py() | ||
| 1.0 | ||
| """ | ||
| return Expr(f.arrow_cast(expr.expr, data_type.expr)) | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
end_positionis not in the function signature