From 674575dd2524ecd92bce490711f96d0a76f3af38 Mon Sep 17 00:00:00 2001
From: George Robertson <50412379+georgeRobertson@users.noreply.github.com>
Date: Sun, 22 Feb 2026 21:44:22 +0000
Subject: [PATCH 1/5] docs: add Zensical autodocs and update docs
---
.github/workflows/ci_docs_publish.yml | 49 +++
.github/workflows/ci_linting.yml | 4 +-
.github/workflows/ci_testing.yml | 4 +-
docs/README.md | 304 --------------
docs/advanced_guidance/index.md | 21 +
docs/advanced_guidance/json_schemas.md | 35 ++
.../components/base_entity.schema.json | 0
.../contact_error_details.schema.json | 0
.../contract/components/entity.schema.json | 0
.../contract/components/field.schema.json | 0
.../components/field_error_detail.schema.json | 0
.../field_error_type.schema copy.json | 0
.../components/field_error_type.schema.json | 0
.../field_specification.schema.json | 0
.../components/readable_entity.schema.json | 0
.../contract/components/type_name.schema.json | 0
.../validation_function.schema.json | 0
.../contract/contract.schema.json | 0
.../json_schemas/dataset.schema.json | 0
.../json_schemas/rule_store.schema.json | 0
.../components/business_filter.schema.json | 0
.../components/business_rule.schema.json | 0
.../components/concrete_filter.schema.json | 0
.../components/core_filter.schema.json | 0
.../components/filter.schema.json | 0
.../multiple_expressions.schema.json | 0
.../components/rule.schema.json | 0
.../transformations.schema.json | 0
docs/advanced_guidance/new_backend.md | 2 +
.../package_documentation/auditing.md | 2 +
.../package_documentation/index.md | 15 +
.../package_documentation/pipeline.md | 18 +
.../package_documentation/refdata_loaders.md | 18 +
docs/assets/images/favicon.ico | Bin 0 -> 15086 bytes
docs/assets/images/favicon.svg | 4 +
docs/assets/images/nhsuk-icon-180.png | Bin 0 -> 1079 bytes
docs/assets/images/nhsuk-icon-192.png | Bin 0 -> 1164 bytes
docs/assets/images/nhsuk-icon-512.png | Bin 0 -> 3308 bytes
docs/assets/images/nhsuk-icon-mask.svg | 3 +
docs/assets/images/nhsuk-opengraph-image.png | Bin 0 -> 4585 bytes
docs/assets/stylesheets/extra.css | 57 +++
docs/detailed_guidance/business_rules.md | 363 -----------------
docs/detailed_guidance/data_contract.md | 315 ---------------
docs/detailed_guidance/domain_types.md | 27 --
docs/detailed_guidance/feedback_messages.md | 1 -
docs/detailed_guidance/file_transformation.md | 1 -
docs/index.md | 39 ++
docs/json_schemas/README.md | 30 --
docs/user_guidance/auditing.md | 2 +
docs/user_guidance/business_rules.md | 2 +
docs/user_guidance/data_contract.md | 2 +
docs/user_guidance/error_reports.md | 2 +
docs/user_guidance/feedback_messages.md | 2 +
docs/user_guidance/file_transformation.md | 2 +
docs/user_guidance/getting_started.md | 117 ++++++
docs/user_guidance/implementations/duckdb.md | 175 ++++++++
.../implementations/mixing_implementations.md | 30 ++
.../platform_specific/databricks.md | 10 +
.../platform_specific/palantir_foundry.md | 2 +
docs/user_guidance/implementations/spark.md | 165 ++++++++
docs/user_guidance/install.md | 91 +++++
includes/jargon_and_acronyms.md | 3 +
overrides/.icons/nhseng.svg | 4 +
poetry.lock | 379 ++++++++++++++++--
pyproject.toml | 9 +
zensical.toml | 194 +++++++++
66 files changed, 1415 insertions(+), 1088 deletions(-)
create mode 100644 .github/workflows/ci_docs_publish.yml
delete mode 100644 docs/README.md
create mode 100644 docs/advanced_guidance/index.md
create mode 100644 docs/advanced_guidance/json_schemas.md
rename docs/{ => advanced_guidance}/json_schemas/contract/components/base_entity.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/contact_error_details.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/entity.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/field.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/field_error_detail.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/field_error_type.schema copy.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/field_error_type.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/field_specification.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/readable_entity.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/type_name.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/components/validation_function.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/contract/contract.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/dataset.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/rule_store.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/transformations/components/business_filter.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/transformations/components/business_rule.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/transformations/components/concrete_filter.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/transformations/components/core_filter.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/transformations/components/filter.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/transformations/components/multiple_expressions.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/transformations/components/rule.schema.json (100%)
rename docs/{ => advanced_guidance}/json_schemas/transformations/transformations.schema.json (100%)
create mode 100644 docs/advanced_guidance/new_backend.md
create mode 100644 docs/advanced_guidance/package_documentation/auditing.md
create mode 100644 docs/advanced_guidance/package_documentation/index.md
create mode 100644 docs/advanced_guidance/package_documentation/pipeline.md
create mode 100644 docs/advanced_guidance/package_documentation/refdata_loaders.md
create mode 100644 docs/assets/images/favicon.ico
create mode 100644 docs/assets/images/favicon.svg
create mode 100644 docs/assets/images/nhsuk-icon-180.png
create mode 100644 docs/assets/images/nhsuk-icon-192.png
create mode 100644 docs/assets/images/nhsuk-icon-512.png
create mode 100644 docs/assets/images/nhsuk-icon-mask.svg
create mode 100644 docs/assets/images/nhsuk-opengraph-image.png
create mode 100644 docs/assets/stylesheets/extra.css
delete mode 100644 docs/detailed_guidance/business_rules.md
delete mode 100644 docs/detailed_guidance/data_contract.md
delete mode 100644 docs/detailed_guidance/domain_types.md
delete mode 100644 docs/detailed_guidance/feedback_messages.md
delete mode 100644 docs/detailed_guidance/file_transformation.md
create mode 100644 docs/index.md
delete mode 100644 docs/json_schemas/README.md
create mode 100644 docs/user_guidance/auditing.md
create mode 100644 docs/user_guidance/business_rules.md
create mode 100644 docs/user_guidance/data_contract.md
create mode 100644 docs/user_guidance/error_reports.md
create mode 100644 docs/user_guidance/feedback_messages.md
create mode 100644 docs/user_guidance/file_transformation.md
create mode 100644 docs/user_guidance/getting_started.md
create mode 100644 docs/user_guidance/implementations/duckdb.md
create mode 100644 docs/user_guidance/implementations/mixing_implementations.md
create mode 100644 docs/user_guidance/implementations/platform_specific/databricks.md
create mode 100644 docs/user_guidance/implementations/platform_specific/palantir_foundry.md
create mode 100644 docs/user_guidance/implementations/spark.md
create mode 100644 docs/user_guidance/install.md
create mode 100644 includes/jargon_and_acronyms.md
create mode 100644 overrides/.icons/nhseng.svg
create mode 100644 zensical.toml
diff --git a/.github/workflows/ci_docs_publish.yml b/.github/workflows/ci_docs_publish.yml
new file mode 100644
index 0000000..09435f2
--- /dev/null
+++ b/.github/workflows/ci_docs_publish.yml
@@ -0,0 +1,49 @@
+name: Publish Documentation
+
+on:
+ push:
+ branches: main
+
+permissions:
+ contents: read
+ pages: write
+ id-token: write
+
+jobs:
+ deploy:
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ runs-on: ubuntu-24.04
+ steps:
+ - uses: actions/configure-pages@v5
+
+ - uses: actions/checkout@v5
+
+ - name: Install extra dependencies for a python install
+ run: |
+ sudo apt-get update
+ sudo apt -y install --no-install-recommends liblzma-dev libbz2-dev libreadline-dev
+
+ - name: Install asdf cli
+ uses: asdf-vm/actions/setup@b7bcd026f18772e44fe1026d729e1611cc435d47
+
+ - name: Install software through asdf
+ uses: asdf-vm/actions/install@b7bcd026f18772e44fe1026d729e1611cc435d47
+
+ - name: reshim asdf
+ run: asdf reshim
+
+ - name: ensure poetry using desired python version
+ run: poetry env use $(asdf which python)
+
+ - name: install docs requirements
+ run: |
+ poetry install --sync --no-interaction --with docs
+
+ - run: poetry run zensical build --clean
+ - uses: actions/upload-pages-artifact@v4
+ with:
+ path: site
+ - uses: actions/deploy-pages@v4
+ id: deployment
diff --git a/.github/workflows/ci_linting.yml b/.github/workflows/ci_linting.yml
index 1be9758..fc9d2bf 100644
--- a/.github/workflows/ci_linting.yml
+++ b/.github/workflows/ci_linting.yml
@@ -17,10 +17,10 @@ jobs:
sudo apt -y install --no-install-recommends liblzma-dev libbz2-dev libreadline-dev
- name: Install asdf cli
- uses: asdf-vm/actions/setup@v4
+ uses: asdf-vm/actions/setup@b7bcd026f18772e44fe1026d729e1611cc435d47
- name: Install software through asdf
- uses: asdf-vm/actions/install@v4
+ uses: asdf-vm/actions/install@b7bcd026f18772e44fe1026d729e1611cc435d47
- name: reshim asdf
run: asdf reshim
diff --git a/.github/workflows/ci_testing.yml b/.github/workflows/ci_testing.yml
index 232ffb7..ac25451 100644
--- a/.github/workflows/ci_testing.yml
+++ b/.github/workflows/ci_testing.yml
@@ -20,10 +20,10 @@ jobs:
sudo apt -y install --no-install-recommends liblzma-dev libbz2-dev libreadline-dev libxml2-utils
- name: Install asdf cli
- uses: asdf-vm/actions/setup@v4
+ uses: asdf-vm/actions/setup@b7bcd026f18772e44fe1026d729e1611cc435d47
- name: Install software through asdf
- uses: asdf-vm/actions/install@v4
+ uses: asdf-vm/actions/install@b7bcd026f18772e44fe1026d729e1611cc435d47
- name: reshim asdf
run: asdf reshim
diff --git a/docs/README.md b/docs/README.md
deleted file mode 100644
index fc0de4a..0000000
--- a/docs/README.md
+++ /dev/null
@@ -1,304 +0,0 @@
-The Data Validation Engine (DVE) is a configuration driven data validation library.
-
-There are 3 core steps within the DVE:
-
-1. [File transformation](./detailed_guidance/file_transformation.md) - Parsing files from their submitted format into a common format.
-2. [Data contract](./detailed_guidance/data_contract.md) - Validating the types that have been submitted and casting them.
-3. [Business rules](./detailed_guidance/business_rules.md) - Performing more complex validations such as comparisons between fields and tables.
-
-with a 4th step being important but more variable depending on platform and users:
-
-4. [Error reports](./detailed_guidance/feedback_messages.md) - Compiles the errors generated from the previous stages and presents them within an Excel report. However, this could be reconfigured to meet the needs of your users.
-
-Each of these steps produce a list of [Feedback message](details/Feedback%20message.md) objects which can be reported back to the user for them to fix any issues.
-
-DVE configuration can be instantiated from a json (dischema) file which might be structured like this:
-
-```json
-{
- "contract": {
- "cache_originals": true,
- "error_details": null,
- "types": {},
- "schemas": {},
- "datasets": {
- "CWTHeader": {
- "fields": {
- "version": {
- "description": null,
- "is_array": false,
- "callable": "constr",
- "constraints": {
- "regex": "\\d{1,2}\\.\\d{1,2}"
- }
- },
- "periodStartDate": {
- "description": null,
- "is_array": false,
- "callable": "conformatteddate",
- "constraints": {
- "date_format": "%Y-%m-%d"
- }
- }
- },
- "mandatory_fields": [
- "version",
- "periodStartDate"
- ],
- "reporting_fields": [],
- "key_field": null,
- "reader_config": {
- ".xml": {
- "reader": "XMLStreamReader",
- "kwargs": {
- "record_tag": "Header",
- "n_records_to_read": 1
- },
- "field_names": null
- }
- },
- "aliases": {}
- }
- }
- },
- "transformations": {
- "rule_stores": [],
- "reference_data": {},
- "parameters": {},
- "rules": [],
- "filters": [
- {
- "name": "version is at least 1.0",
- "entity": "CWTHeader",
- "expression": "version >= '1.0'",
- "failure_type": "submission",
- "failure_message": "version is not at least 1.0",
- "error_code": "CWT000101",
- "reporting_field": "version",
- "category": "Bad value"
- }
- ],
- "post_filter_rules": [],
- "complex_rules": []
- }
-}
-```
-"Contract" is where [Data Contract](./detailed_guidance/data_contract.md) and [File Transformation](./detailed_guidance/file_transformation.md) (in the reader configs) are configured, and (due to legacy naming) transformations are where [Business rules](./detailed_guidance/business_rules.md) are configured.
-
-## Quick start
-In the code example shared above we have a json file named `cwt_example.dischema.json` and an xml file with the following structure:
-
-```xml
-
-
h}VB
zRACO>^D}h{;C-^R(aucLqdcay#%zw{NkIF9f##{EEG@_5xrT6pK>GW;!oEIBXL3nt
z$8BK)-s@zV#TmE!=6Q7tjQRz`UYL-drYGW{QIDch{zO|0c!f>vYFeaE-+}h~0rDM%
z?
zwwqa2c6YmeglQ=Q7tNfqC+G6d*8UVmLMyz)dl2Wn7T0>5`yH|T@g%)FKdxe62urSQ
z_$7QKJ78w?(P|N{H&EVl)|f)rI~C7$O1&sfKIMn0A6>}XUSgrw4d6p=myF26XKEBm
z_pWvmxAiO}*zzMwu_mWnE#zqSzupCB7}>p1Ms57HfW3sdX8*$77cdnaiI~?VqnWoPTeei$eaOw-r(k?{1W^
zxu_tD@^;%o7$Q6~F$LWRx;ia>sBe*>zolQ?vS6^$I_8RDf^j=dR`|jfzwY~=dgll5
zk)QoEgLVgV*Wfb_rE`^hE`oV5#h*%yUd>$Fi+?_^zff4MmvjOwWzF0c{6vt$X!)Z4
zOP7@#b`rs#NiW>z458Z2$ZMDTl?A7JMFd!`CP4L!)q-Z+&dt8${FHHbX)riD3^siB
z=q_Si{wXQxHP|i(msBn_$)QrQV`%o!H_)Ct!l1tHP`xvW?kNyNLM_xf8IS^17qaSV47?Mr_DNFggn=vp5ia*$$2#Ipr)cCqTIHkj@Z
zJtU+uV_~EPEWbFde7OTC@;6JtO6(put)1it!SUT=wbWx}dMGnL_vW4%WkZy#86RlI
zdKsB95OU?4@-~O>*%k>)poL9Vm(JP76eCya*R!SRu@U~MA?&&(aL8668>&*ga5thM
zUD>;_qams4=#}q6pzRk3*my;|lTSy!>*GEr|9G~OEg@xR>f3|{T28FMr<$H?9j`V_
zMF$X9wBE{1y=A3Vo(S6=*r^gjey43bsF(=d$_|G8Wj$7n2ClQMgk>*Q{WDnvkJ9Fj
zG51>=p=z@M7tBsD$6{IJri)H5^41cfSn3*g<`)$N_ZFO5!-l*bSn!Yl_xl@yr#JCi
z4oxSGF=D(VASxQ*2I&PPcmS!>IV6L!bNtUg|89EL{&?Afl58grqVom@6hFz~)OrKM
zICFpk|CRi&I&KD{bGE&wdq8xn-uxDmNZ9#jwJW)_*DI1UL;PNb)5KC4lNp|VcQc>5
zC~ZOl{JC*|_i1dzW8Wc#%hNLE?@BqPrg@6uLp5r)+vvgpdKI3Jg<6lxt6PTqP3P#F
zPF<5WGfFM5`44wY3OpjStW0cFZPo@_dZnDR@g7^?*j&
zA}|$^K`_ZGcN~d%7qQSi$4nme%WdFDSiZEprcRn1HqBS?ia_c7ZL@Kj%JD*Z^w=pR
za-=}J^0G~%wJ4R(O#{+rjPbv_)f)v>Fdwr_8PlacW+&J3sv~(~=}>%D$Fjs2t^;Pl
z(VWpcAszYsTlF(vDgxxI>D(I(cRk}Sm|?DBjE$DZMAft1y9n&}PAFmpV~VeYK}!N&
z
diff --git a/docs/user_guidance/implementations/duckdb.md b/docs/user_guidance/implementations/duckdb.md
new file mode 100644
index 0000000..584f1d0
--- /dev/null
+++ b/docs/user_guidance/implementations/duckdb.md
@@ -0,0 +1,175 @@
+!!! quote
+ DuckDB is a high-performance analytical database system. It is designed to be fast, reliable, portable, and easy to use. DuckDB provides a rich SQL dialect with support far beyond basic SQL. DuckDB supports arbitrary and nested correlated subqueries, window functions, collations, complex types (arrays, structs, maps), and several extensions designed to make SQL easier to use.
+
+ DuckDB is available as a standalone CLI application and has clients for Python, R, Java, Wasm, etc., with deep integrations with packages such as pandas and dplyr.
+
+You can read more about DuckDB with the following links:
+
+- [Official Documentation :material-file-document-arrow-right:](https://duckdb.org/docs/stable/)
+- [GitHub :material-github:](https://github.com/duckdb/duckdb)
+
+## Setting up a DuckDB Connection
+
+To be able to use DuckDB with the DVE you first need to create a DuckDB connection object. You can simply do this with the following code:
+
+=== "Persist Database on memory"
+ ```py
+ import duckdb as ddb
+
+ db_path = ":memory:"
+ db_con = ddb.connect(db_path)
+ ```
+
+=== "Persist Database on disk"
+ ```py
+ import duckdb as ddb
+
+ db_path = "path/to/my_database.duckdb"
+ db_con = ddb.connect(db_path)
+ ```
+
+!!! note
+ You will need to close the db_con object with `db.close()`. Alternatively, you could build a custom [context manager](https://docs.python.org/3/library/contextlib.html) object to open and close the connection without needing to explicitly close the connection.
+
+
+Now you have the DuckDB connection object setup, you are ready to setup the required DVE objects.
+
+## Generating SubmissionInfo objects
+
+Before we utilise the DVE, we need to generate an iterable object containing `SubmissionInfo` objects. These objects effectively contain the necessery metadata for the DVE to work with a given submission. Here is an example function used to generate SubmissionInfo objects from a given path:
+
+```py
+import glob
+from datetime import date, datetime
+from pathlib import Path
+from typing import Optional
+from uuid import uuid4
+
+from dve.core_engine.models import SubmissionInfo
+
+
+def generate_sub_infos_from_submissions_path(
+ submission_path: Path,
+ dataset_id: Optional[str] = "example",
+ submitting_org: Optional[str] = None,
+ submission_method: Optional[str] = "local_test",
+ reporting_period_start_date: Optional[date | datetime] = None,
+ reporting_period_end_date: Optional[date | datetime] = None,
+) -> list[SubmissionInfo]:
+ sub_infos: list[SubmissionInfo] = []
+ for f in glob.glob(str(submission_path) + "/*.*"):
+ file_path = Path(f)
+ file_stats = file_path.stat()
+ metadata = {
+ "dataset_id": dataset_id,
+ "file_name": file_path.stem,
+ "file_extension": file_path.suffix,
+ "submission_method": submission_method,
+ "file_size": file_stats.st_size,
+ "datetime_received": datetime.now(),
+ }
+ if submitting_org:
+ metadata["submitting_org"] = submitting_org
+ if reporting_period_start_date:
+ metadata["reporting_period_start"] = str(reporting_period_start_date)
+ if reporting_period_end_date:
+ metadata["reporting_period_end"] = str(reporting_period_end_date)
+
+ sub_infos.append(SubmissionInfo(submission_id=uuid4().hex, **metadata))
+ return sub_infos
+
+
+submissions = generate_sub_infos_from_submissions_path(Path("path", "to", "my", "submissions"))
+```
+
+!!! note
+ If you have a large number of submissions, it may be worth converting the above into a [generator](https://docs.python.org/3/reference/expressions.html#generator-expressions). Using the example above, you can do this by simply removing the sub_infos object and yield the SubmissionInfo object per file returned from the glob iterator.
+
+## DuckDB Audit Table Setup
+
+The first object you must setup is an "Audit Manager Object". This can be done with the following code:
+
+```py
+from dve.core_engine.backends.implementations.duckdb.auditing import DDBAuditingManager
+
+audit_manager = DDBAuditingManager(db_path, connection=db_con) # type: ignore
+```
+
+The "Audit Manager" object within the DVE is used to keep track of the status of your submission. A submission for instance could fail during the File Transformation section, so it's important that we have something to keep track of the submission. The Audit Manager object has a number of methods that can be used to read/write information to tables being stored within the duckdb connection setup in the previous step.
+
+You can learn more about the Auditing Objects [here](../auditing.md).
+
+Once you have setup your "Audit Manager" object, we can move onto setting up the DuckDB reference data loader (if required) and then setting up the DuckDB DVE Pipeline object.
+
+## DuckDB Reference Data Setup (Optional)
+If your business rules are reliant on utilising reference data, you will need to write the following code to ensure that reference data can be loaded during the application of those rules:
+
+```py
+from dve.core_engine.backends.implementations.duckdb.reference_data import DuckDBRefDataLoader
+
+DuckDBRefDataLoader.connection = db_con
+DuckDBRefDataLoader.dataset_config_uri = Path("path", "to", "my", "rules").as_posix()
+```
+
+The connection passed into the `DuckDBRefDataLoader` object will then be able use various DuckDB readers to load data from an existing table on the connection OR loading data from reference data persisted in either `parquet` or `pyarrow` format.
+
+If you want to learn more about the reference data loaders, you can view the advanced user guidance [here](../../advanced_guidance/package_documentation/refdata_loaders.md).
+
+Now we can move onto setting up the DuckDB DVE Pipeline object.
+
+## DuckDB Pipeline Setup
+
+To setup a DuckDB Pipeline, you can use the following example below:
+
+=== "Without Rules"
+
+ ```py
+
+ from dve.pipeline.duckdb_pipeline import DDBDVEPipeline
+
+
+ dve_pipeline = DDBDVEPipeline(
+ processed_files_path=Path("location_to_store", "dve_outputs").as_posix(),
+ audit_tables=audit_manager,
+ connection=db_con,
+ submitted_files_path=Path("submissions", "path").as_posix(),
+ reference_data_loader=DuckDBRefDataLoader,
+ )
+ ```
+
+=== "With Rules"
+
+ ```py
+ from dve.pipeline.duckdb_pipeline import DDBDVEPipeline
+
+
+ dve_pipeline = DDBDVEPipeline(
+ processed_files_path=Path("location_to_store", "dve_outputs").as_posix(),
+ audit_tables=audit_manager,
+ connection=db_con,
+ rules_path=Path("to", "my", "rules").as_posix(),
+ submitted_files_path=Path("submissions", "path").as_posix(),
+ reference_data_loader=DuckDBRefDataLoader,
+ )
+ ```
+
+!!! note
+ If using remote resources, then you will want to use `as_uri` for your paths.
+
+ E.g.
+ ```py
+ Path("remote", "path").as_uri()
+ ```
+
+Once your Pipeline object is defined, you can simply run the `cluster_pipeline_run` method. E.g.
+
+```py
+error_reports = dve_pipeline.cluster_pipeline_run()
+```
+
+
+## Further documentation
+For further details on the objects referenced above, you can use the following links to read more about the objects:
+
+- [Pipeline Docs](../../advanced_guidance/package_documentation/pipeline.md)
+- [Reference Data Docs](../../advanced_guidance/package_documentation/refdata_loaders.md)
diff --git a/docs/user_guidance/implementations/mixing_implementations.md b/docs/user_guidance/implementations/mixing_implementations.md
new file mode 100644
index 0000000..dc75aeb
--- /dev/null
+++ b/docs/user_guidance/implementations/mixing_implementations.md
@@ -0,0 +1,30 @@
+
+## Mixing backend implementations
+
+The examples shown above are using the Spark Backend. DVE also has a DuckDB backend found at [core_engine.backends.implementations.duckdb](https://github.com/NHSDigital/data-validation-engine/tree/main/src/dve/core_engine/backends/implementations/duckdb). In order to mix the two you will need to convert from one type of entity to the other. For example from a spark `Dataframe` to DuckDB `relation`. The easiest way to do this is to use the `write_parquet` method from one backend and use `read_parquet` from another backend.
+
+Currently the configuration isn't backend agnostic for applying business rules. So if you want to swap between spark and duckdb, the business rules need to be written using only features that are common to both backends. For example, a regex check in spark would be something along the lines of...
+```sql
+nhsnumber rlike '^\d{10}$'
+```
+...but in duckdb it would be...
+```sql
+regexp_matches(nhsnumber, '^\d{10}$')
+```
+Failures in parsing the expressions lead to failure messages such as
+```python
+FeedbackMessage(
+ entity=None,
+ record=None,
+ failure_type='integrity',
+ is_informational=False,
+ error_type=None,
+ error_location=None,
+ error_message="Unexpected error (AnalysisException: Undefined function: 'regexp_matches'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 5) in transformations (rule: root; step: 0; id: None)",
+ error_code=None,
+ reporting_field=None,
+ reporting_field_name=None,
+ value=None,
+ category=None
+)
+```
\ No newline at end of file
diff --git a/docs/user_guidance/implementations/platform_specific/databricks.md b/docs/user_guidance/implementations/platform_specific/databricks.md
new file mode 100644
index 0000000..e478c0d
--- /dev/null
+++ b/docs/user_guidance/implementations/platform_specific/databricks.md
@@ -0,0 +1,10 @@
+## Installation
+
+Firstly, please ensure that you've read the guidance on our [installation section](../../install.md).
+
+You can follow these guides to help you install the Data Validation Engine onto a Databricks Cluster:
+
+- [AWS](https://docs.databricks.com/aws/en/libraries/)
+- [GCP](https://docs.databricks.com/gcp/en/libraries/)
+- [Microsoft Azure](https://learn.microsoft.com/en-us/azure/databricks/libraries/)
+
diff --git a/docs/user_guidance/implementations/platform_specific/palantir_foundry.md b/docs/user_guidance/implementations/platform_specific/palantir_foundry.md
new file mode 100644
index 0000000..1c63d00
--- /dev/null
+++ b/docs/user_guidance/implementations/platform_specific/palantir_foundry.md
@@ -0,0 +1,2 @@
+!!! note
+ This section has not yet been written. Coming soon.
diff --git a/docs/user_guidance/implementations/spark.md b/docs/user_guidance/implementations/spark.md
new file mode 100644
index 0000000..75e1f5e
--- /dev/null
+++ b/docs/user_guidance/implementations/spark.md
@@ -0,0 +1,165 @@
+!!! quote
+ Apache Spark™ is a multi-language engine for executing data engineering, data science, and machine learning on single-node machines or clusters.
+
+You can read more about Spark here with the following links:
+
+- [Official Documentation :material-file-document-arrow-right:](https://spark.apache.org/)
+- [GitHub :material-github:](https://github.com/apache/spark)
+
+
+## Setting up a Spark Session
+
+For a basic Spark Session setup, you can use the following snippet of code:
+```py
+spark = SparkSession.builder.appName("SimpleApp").getOrCreate()
+```
+
+You can learn more about setting up a Spark Session [here](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.SparkSession.html).
+
+!!! warning
+
+ If you need to load XML data and the version of spark you're running is <4.0.0, you'll need the `spark-xml` extension. You can read more about it [here](https://github.com/databricks/spark-xml).
+
+
+## Generating SubmissionInfo Objects
+
+Before we utilise the DVE, we need to generate an iterable object containing `SubmissionInfo` objects. These objects effectively contain the necessery metadata for the DVE to work with a given submission. Here is an example function used to generate SubmissionInfo objects from a given path:
+
+```py
+import glob
+from datetime import date, datetime
+from pathlib import Path
+from typing import Optional
+from uuid import uuid4
+
+from dve.core_engine.models import SubmissionInfo
+
+
+def generate_sub_infos_from_submissions_path(
+ submission_path: Path,
+ dataset_id: Optional[str] = "example",
+ submitting_org: Optional[str] = None,
+ submission_method: Optional[str] = "local_test",
+ reporting_period_start_date: Optional[date | datetime] = None,
+ reporting_period_end_date: Optional[date | datetime] = None,
+) -> list[SubmissionInfo]:
+ sub_infos: list[SubmissionInfo] = []
+ for f in glob.glob(str(submission_path) + "/*.*"):
+ file_path = Path(f)
+ file_stats = file_path.stat()
+ metadata = {
+ "dataset_id": dataset_id,
+ "file_name": file_path.stem,
+ "file_extension": file_path.suffix,
+ "submission_method": submission_method,
+ "file_size": file_stats.st_size,
+ "datetime_received": datetime.now(),
+ }
+ if submitting_org:
+ metadata["submitting_org"] = submitting_org
+ if reporting_period_start_date:
+ metadata["reporting_period_start"] = str(reporting_period_start_date)
+ if reporting_period_end_date:
+ metadata["reporting_period_end"] = str(reporting_period_end_date)
+
+ sub_infos.append(SubmissionInfo(submission_id=uuid4().hex, **metadata))
+ return sub_infos
+
+
+submissions = generate_sub_infos_from_submissions_path(Path("path", "to", "my", "submissions"))
+```
+
+!!! note
+ If you have a large number of submissions, it may be worth converting the above into a [generator](https://docs.python.org/3/reference/expressions.html#generator-expressions). Using the example above, you can do this by simply removing the sub_infos object and yield the SubmissionInfo object per file returned from the glob iterator.
+
+## Spark Audit Table Setup
+
+The first object you must setup is an "Audit Manager Object". This can be done with the following code:
+
+```py
+from dve.core_engine.backends.implementations.spark.auditing import SparkAuditingManager
+
+db_name = "test_dve"
+spark.sql(f"CREATE DATABASE {db_name};")
+
+audit_manager = SparkAuditingManager(db_name, spark)
+```
+
+!!! note
+
+ `spark` session is optional for the `SparkAuditingManager`. If not provided a spark session will be generated.
+
+The "Audit Manager" object within the DVE is used to keep track of the status of your submission. A submission for instance could fail during the File Transformation section, so it's important that we have something to keep track of the submission. The Audit Manager object has a number of methods that can be used to read/write information to tables being stored within the duckdb connection setup in the previous step.
+
+You can learn more about the Auditing Objects [here](../auditing.md).
+
+Once you have setup your "Audit Manager" object, we can move onto setting up the Spark reference data loader (if required) and then setting up the Spark DVE Pipeline object.
+
+## Spark Reference Data Setup (Optional)
+If your business rules are reliant on utilising reference data, you will need to write the following code to ensure that reference data can be loaded during the application of those rules:
+
+```py
+from pathlib import Path
+
+from dve.core_engine.backends.implementations.spark.reference_data import SparkRefDataLoader
+
+SparkRefDataLoader.spark = spark
+SparkRefDataLoader.dataset_config_uri = Path("path", "to", "my", "rules").as_posix()
+```
+
+## Spark Pipeline Setup
+
+To setup a Spark Pipeline, you can use the following example below:
+
+=== "Without Rules"
+
+ ```py
+
+ from dve.pipeline.spark_pipeline import SparkDVEPipeline
+
+
+ dve_pipeline = SparkDVEPipeline(
+ processed_files_path=Path("location_to_store", "dve_outputs").as_posix(),
+ audit_tables=audit_manager,
+ submitted_files_path=Path("submissions", "path").as_posix(),
+ reference_data_loader=SparkRefDataLoader,
+ spark=spark,
+ )
+ ```
+
+=== "With Rules"
+
+ ```py
+ from dve.pipeline.spark_pipeline import SparkDVEPipeline
+
+
+ dve_pipeline = SparkDVEPipeline(
+ processed_files_path=Path("location_to_store", "dve_outputs").as_posix(),
+ audit_tables=audit_manager,
+ rules_path=Path("to", "my", "rules").as_posix(),
+ submitted_files_path=Path("submissions", "path").as_posix(),
+ reference_data_loader=SparkRefDataLoader,
+ spark=spark,
+ )
+ ```
+
+!!! note
+ If using remote resources, then you will want to use `as_uri` for your paths.
+
+ E.g.
+ ```py
+ Path("remote", "path").as_uri()
+ ```
+
+Once your Pipeline object is defined, you can simply run the `cluster_pipeline_run` method. E.g.
+
+```py
+error_reports = dve_pipeline.cluster_pipeline_run()
+```
+
+## Further documentation
+
+For further details on the objects referenced above, you can use the following links to read more about the objects:
+
+- [Pipeline Docs](../../advanced_guidance/package_documentation/pipeline.md)
+- [Reference Data Docs](../../advanced_guidance/package_documentation/refdata_loaders.md)
diff --git a/docs/user_guidance/install.md b/docs/user_guidance/install.md
new file mode 100644
index 0000000..34e1f3a
--- /dev/null
+++ b/docs/user_guidance/install.md
@@ -0,0 +1,91 @@
+---
+title: Installing the Data Validation Engine
+tags:
+ - Introduction
+ - Installation
+---
+
+!!! warning
+ **DVE is currently an unstable package. Expect breaking changes between every minor patch**. We intend to follow semantic versioning of `major.minor.patch` more strictly after a 1.0 release. Until then, we recommend that you pin your install to the latest version available and keep an eye on [future releases](https://github.com/NHSDigital/data-validation-engine/releases) that will have changelogs provided with each release.
+
+ **Please note that we only support Python runtimes of 3.10 and 3.11.** In the future we will look to add support for Python versions greater than 3.11, but it's not an immediate priority.
+
+ If working on Python 3.7, the `0.1` release supports this (and only this) version of Python. However, we have not been updating that version with any bugfixes, performance improvements etc. There are also a number of vulnerable dependencies on version `0.1` release due to [Python 3.7 being depreciated](https://devguide.python.org/versions/) and a number of packages dropping support. **If you choose to install `0.1`, you accept the risks of doing so and additional support will not be provided.**
+
+You can install the DVE package through python package managers such as [pip](https://pypi.org/project/pip/), [pipx](https://github.com/pypa/pipx), [uv](https://docs.astral.sh/uv/) and [poetry](https://python-poetry.org/). See examples below for installing the DVE:
+
+=== "pip"
+
+ ```sh
+ pip install git+https://github.com/NHSDigital/data-validation-engine.git@vMaj.Min.Pat
+ ```
+
+=== "pipx"
+
+ ```sh
+ pipx install git+https://github.com/NHSDigital/data-validation-engine.git@vMaj.Min.Pat
+ ```
+
+=== "uv"
+
+ Add to your existing `uv` project...
+ ```sh
+ uv add git+https://github.com/NHSDigital/data-validation-engine.git@vMaj.Min.Pat
+ ```
+
+ ...or you can add via your `pyproject.toml`...
+
+ ```toml
+ dependencies = [
+ nhs-dve @ https://github.com/NHSDigital/data-validation-engine.git@vMaj.Min.Pat
+ ]
+ ```
+
+ ```sh
+ uv lock
+ ```
+
+ ```sh
+ uv sync
+ ```
+
+=== "poetry"
+
+ Add to your existing `poetry` project...
+ ```sh
+ poetry add git+https://github.com/NHSDigital/data-validation-engine.git@vMaj.Min.Pat
+ ```
+
+ ...or you can add via your `pyproject.toml`...
+
+ ```toml
+ [tool.poetry.dependencies]
+ nhs-dve = { git = "https://github.com/NHSDigital/data-validation-engine.git", tag = "vMaj.Min.Pat" }
+ ```
+
+ ```sh
+ poetry lock
+ ```
+
+ ```sh
+ poetry install
+ ```
+
+!!! note
+ Replace `Maj.Min.Pat` with the version of the DVE you want. We recommend the latest release if you're just starting with the DVE.
+
+!!! info
+ We are working on getting the DVE available via PyPi and Conda. We will update this page with the relevant instructions once this has been succesfully setup.
+
+Python dependencies are listed in `pyproject.toml` [(here)](https://github.com/NHSDigital/data-validation-engine/blob/main/pyproject.toml). Many of the dependencies are locked to quite restrictive versions due to complexity of this package. Core packages such as Pydantic, Pyspark and DuckDB are unlikely to receive flexible version constraints as changes in those packages could cause the DVE to malfunction. For less important dependencies, we have tried to make the contraints more flexible. Therefore, we would advise you to install the DVE into a seperate environment rather than trying to integrate it into an existing Python environment.
+
+Once you have installed the DVE you are almost ready to use it. To be able to run the DVE, you will need to choose one of the supported pipeline runners (see Backend implementations here - [DuckDB](user_guidance/implementations/duckdb.md) *or* [Spark](user_guidance/implementations/spark.md)) and you will need to create your own dischema document to configure how the DVE should validate incoming data. You can read more about this in [Getting Started](getting_started.md) page.
+
+
+## DVE Version Compatability Matrix
+
+| DVE Version | Python Version | DuckDB Version | Spark Version |
+| ------------ | -------------- | -------------- | ------------- |
+| >=0.6 | >=3.10,<3.12 | 1.1.* | 3.4.* |
+| >=0.2,<0.6 | >=3.10,<3.12 | 1.1.0 | 3.4.4 |
+| 0.1 | >=3.7.2,<3.8 | 1.1.0 | 3.2.1 |
diff --git a/includes/jargon_and_acronyms.md b/includes/jargon_and_acronyms.md
new file mode 100644
index 0000000..4962306
--- /dev/null
+++ b/includes/jargon_and_acronyms.md
@@ -0,0 +1,3 @@
+*[DVE]: Data Validation Engine
+*[dischema]: Data ingest schema
+*[stringified]: all fields casted to string
diff --git a/overrides/.icons/nhseng.svg b/overrides/.icons/nhseng.svg
new file mode 100644
index 0000000..cd21739
--- /dev/null
+++ b/overrides/.icons/nhseng.svg
@@ -0,0 +1,4 @@
+
diff --git a/poetry.lock b/poetry.lock
index 7b1987a..7074536 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -779,14 +779,14 @@ files = [
[[package]]
name = "click"
-version = "8.3.1"
+version = "8.2.1"
description = "Composable command line interface toolkit"
optional = false
python-versions = ">=3.10"
-groups = ["dev", "lint"]
+groups = ["dev", "docs", "lint"]
files = [
- {file = "click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6"},
- {file = "click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a"},
+ {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"},
+ {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"},
]
[package.dependencies]
@@ -798,12 +798,12 @@ version = "0.4.6"
description = "Cross-platform colored terminal text."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["dev", "lint", "test"]
+groups = ["dev", "docs", "lint", "test"]
files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
-markers = {lint = "platform_system == \"Windows\" or sys_platform == \"win32\""}
+markers = {docs = "platform_system == \"Windows\"", lint = "platform_system == \"Windows\" or sys_platform == \"win32\""}
[[package]]
name = "commitizen"
@@ -1024,14 +1024,14 @@ files = [
[[package]]
name = "cucumber-tag-expressions"
-version = "9.0.0"
+version = "9.1.0"
description = "Provides a tag-expression parser and evaluation logic for cucumber/behave"
optional = false
python-versions = ">=3.10"
groups = ["dev", "test"]
files = [
- {file = "cucumber_tag_expressions-9.0.0-py3-none-any.whl", hash = "sha256:36f3eacf49ad24feeb60218db4c51ab114853b3f022f4f3ad790c32b7597faee"},
- {file = "cucumber_tag_expressions-9.0.0.tar.gz", hash = "sha256:731302c12bd602309596b35e733c1021b517d4948329803c23ca026e26ef4e99"},
+ {file = "cucumber_tag_expressions-9.1.0-py3-none-any.whl", hash = "sha256:cca145d677a942c1877e5a2cf13da8c6ec99260988877c817efd284d8455bb56"},
+ {file = "cucumber_tag_expressions-9.1.0.tar.gz", hash = "sha256:d960383d5885300ebcbcb14e41657946fde2a59d5c0f485eb291bc6a0e228acc"},
]
[[package]]
@@ -1046,6 +1046,21 @@ files = [
{file = "decli-0.6.3.tar.gz", hash = "sha256:87f9d39361adf7f16b9ca6e3b614badf7519da13092f2db3c80ca223c53c7656"},
]
+[[package]]
+name = "deepmerge"
+version = "2.0"
+description = "A toolset for deeply merging Python dictionaries."
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00"},
+ {file = "deepmerge-2.0.tar.gz", hash = "sha256:5c3d86081fbebd04dd5de03626a0607b809a98fb6ccba5770b62466fe940ff20"},
+]
+
+[package.extras]
+dev = ["black", "build", "mypy", "pytest", "pyupgrade", "twine", "validate-pyproject[all]"]
+
[[package]]
name = "delta-spark"
version = "2.4.0"
@@ -1218,16 +1233,48 @@ python-dateutil = ">=2.4"
[[package]]
name = "filelock"
-version = "3.21.2"
+version = "3.24.3"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.10"
groups = ["dev"]
files = [
- {file = "filelock-3.21.2-py3-none-any.whl", hash = "sha256:d6cd4dbef3e1bb63bc16500fc5aa100f16e405bbff3fb4231711851be50c1560"},
- {file = "filelock-3.21.2.tar.gz", hash = "sha256:cfd218cfccf8b947fce7837da312ec3359d10ef2a47c8602edd59e0bacffb708"},
+ {file = "filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d"},
+ {file = "filelock-3.24.3.tar.gz", hash = "sha256:011a5644dc937c22699943ebbfc46e969cdde3e171470a6e40b9533e5a72affa"},
]
+[[package]]
+name = "ghp-import"
+version = "2.1.0"
+description = "Copy your docs directly to the gh-pages branch."
+optional = false
+python-versions = "*"
+groups = ["docs"]
+files = [
+ {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"},
+ {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"},
+]
+
+[package.dependencies]
+python-dateutil = ">=2.8.1"
+
+[package.extras]
+dev = ["flake8", "markdown", "twine", "wheel"]
+
+[[package]]
+name = "griffelib"
+version = "2.0.0"
+description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API."
+optional = false
+python-versions = ">=3.10"
+groups = ["docs"]
+files = [
+ {file = "griffelib-2.0.0-py3-none-any.whl", hash = "sha256:01284878c966508b6d6f1dbff9b6fa607bc062d8261c5c7253cb285b06422a7f"},
+]
+
+[package.extras]
+pypi = ["pip (>=24.0)", "platformdirs (>=4.2)", "wheel (>=0.42)"]
+
[[package]]
name = "identify"
version = "2.6.16"
@@ -1318,7 +1365,7 @@ version = "3.1.6"
description = "A very fast and expressive template engine."
optional = false
python-versions = ">=3.7"
-groups = ["main", "dev", "test"]
+groups = ["main", "dev", "docs", "test"]
files = [
{file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"},
{file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"},
@@ -1505,13 +1552,29 @@ html5 = ["html5lib"]
htmlsoup = ["BeautifulSoup4"]
source = ["Cython (==0.29.37)"]
+[[package]]
+name = "markdown"
+version = "3.10.2"
+description = "Python implementation of John Gruber's Markdown."
+optional = false
+python-versions = ">=3.10"
+groups = ["docs"]
+files = [
+ {file = "markdown-3.10.2-py3-none-any.whl", hash = "sha256:e91464b71ae3ee7afd3017d9f358ef0baf158fd9a298db92f1d4761133824c36"},
+ {file = "markdown-3.10.2.tar.gz", hash = "sha256:994d51325d25ad8aa7ce4ebaec003febcce822c3f8c911e3b17c52f7f589f950"},
+]
+
+[package.extras]
+docs = ["mdx_gh_links (>=0.2)", "mkdocs (>=1.6)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python] (>=0.28.3)"]
+testing = ["coverage", "pyyaml"]
+
[[package]]
name = "markupsafe"
version = "3.0.3"
description = "Safely add untrusted strings to HTML/XML markup."
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev", "test"]
+groups = ["main", "dev", "docs", "test"]
files = [
{file = "markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559"},
{file = "markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419"},
@@ -1616,6 +1679,127 @@ files = [
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
]
+[[package]]
+name = "mergedeep"
+version = "1.3.4"
+description = "A deep merge function for 🐍."
+optional = false
+python-versions = ">=3.6"
+groups = ["docs"]
+files = [
+ {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"},
+ {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"},
+]
+
+[[package]]
+name = "mkdocs"
+version = "1.6.1"
+description = "Project documentation with Markdown."
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"},
+ {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""}
+ghp-import = ">=1.0"
+jinja2 = ">=2.11.1"
+markdown = ">=3.3.6"
+markupsafe = ">=2.0.1"
+mergedeep = ">=1.3.4"
+mkdocs-get-deps = ">=0.2.0"
+packaging = ">=20.5"
+pathspec = ">=0.11.1"
+pyyaml = ">=5.1"
+pyyaml-env-tag = ">=0.1"
+watchdog = ">=2.0"
+
+[package.extras]
+i18n = ["babel (>=2.9.0)"]
+min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"]
+
+[[package]]
+name = "mkdocs-autorefs"
+version = "1.4.4"
+description = "Automatically link across pages in MkDocs."
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "mkdocs_autorefs-1.4.4-py3-none-any.whl", hash = "sha256:834ef5408d827071ad1bc69e0f39704fa34c7fc05bc8e1c72b227dfdc5c76089"},
+ {file = "mkdocs_autorefs-1.4.4.tar.gz", hash = "sha256:d54a284f27a7346b9c38f1f852177940c222da508e66edc816a0fa55fc6da197"},
+]
+
+[package.dependencies]
+Markdown = ">=3.3"
+markupsafe = ">=2.0.1"
+mkdocs = ">=1.1"
+
+[[package]]
+name = "mkdocs-get-deps"
+version = "0.2.0"
+description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file"
+optional = false
+python-versions = ">=3.8"
+groups = ["docs"]
+files = [
+ {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"},
+ {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"},
+]
+
+[package.dependencies]
+mergedeep = ">=1.3.4"
+platformdirs = ">=2.2.0"
+pyyaml = ">=5.1"
+
+[[package]]
+name = "mkdocstrings"
+version = "1.0.3"
+description = "Automatic documentation from sources, for MkDocs."
+optional = false
+python-versions = ">=3.10"
+groups = ["docs"]
+files = [
+ {file = "mkdocstrings-1.0.3-py3-none-any.whl", hash = "sha256:0d66d18430c2201dc7fe85134277382baaa15e6b30979f3f3bdbabd6dbdb6046"},
+ {file = "mkdocstrings-1.0.3.tar.gz", hash = "sha256:ab670f55040722b49bb45865b2e93b824450fb4aef638b00d7acb493a9020434"},
+]
+
+[package.dependencies]
+Jinja2 = ">=3.1"
+Markdown = ">=3.6"
+MarkupSafe = ">=1.1"
+mkdocs = ">=1.6"
+mkdocs-autorefs = ">=1.4"
+mkdocstrings-python = {version = ">=1.16.2", optional = true, markers = "extra == \"python\""}
+pymdown-extensions = ">=6.3"
+
+[package.extras]
+crystal = ["mkdocstrings-crystal (>=0.3.4)"]
+python = ["mkdocstrings-python (>=1.16.2)"]
+python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"]
+
+[[package]]
+name = "mkdocstrings-python"
+version = "2.0.3"
+description = "A Python handler for mkdocstrings."
+optional = false
+python-versions = ">=3.10"
+groups = ["docs"]
+files = [
+ {file = "mkdocstrings_python-2.0.3-py3-none-any.whl", hash = "sha256:0b83513478bdfd803ff05aa43e9b1fca9dd22bcd9471f09ca6257f009bc5ee12"},
+ {file = "mkdocstrings_python-2.0.3.tar.gz", hash = "sha256:c518632751cc869439b31c9d3177678ad2bfa5c21b79b863956ad68fc92c13b8"},
+]
+
+[package.dependencies]
+griffelib = ">=2.0"
+mkdocs-autorefs = ">=1.4"
+mkdocstrings = ">=0.30"
+typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
+
[[package]]
name = "moto"
version = "4.0.13"
@@ -1994,7 +2178,7 @@ version = "26.0"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.8"
-groups = ["dev", "lint", "test"]
+groups = ["dev", "docs", "lint", "test"]
files = [
{file = "packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"},
{file = "packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4"},
@@ -2113,14 +2297,14 @@ files = [
[[package]]
name = "parse"
-version = "1.21.0"
+version = "1.21.1"
description = "parse() is the opposite of format()"
optional = false
python-versions = "*"
groups = ["dev", "test"]
files = [
- {file = "parse-1.21.0-py2.py3-none-any.whl", hash = "sha256:6d81f7bae0ab25fd72818375c4a9c71c8705256bfc42e8725be609cf8b904aed"},
- {file = "parse-1.21.0.tar.gz", hash = "sha256:937725d51330ffec9c7a26fdb5623baa135d8ba8ed78817ea9523538844e3ce4"},
+ {file = "parse-1.21.1-py2.py3-none-any.whl", hash = "sha256:55339ca698019815df3b8e8b550e5933933527e623b0cdf1ca2f404da35ffb47"},
+ {file = "parse-1.21.1.tar.gz", hash = "sha256:825e1a88e9d9fb481b8d2ca709c6195558b6eaa97c559ad3a9a20aa2d12815a3"},
]
[[package]]
@@ -2150,7 +2334,7 @@ version = "1.0.4"
description = "Utility library for gitignore style pattern matching of file paths."
optional = false
python-versions = ">=3.9"
-groups = ["dev", "lint"]
+groups = ["dev", "docs", "lint"]
files = [
{file = "pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723"},
{file = "pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645"},
@@ -2164,14 +2348,14 @@ tests = ["pytest (>=9)", "typing-extensions (>=4.15)"]
[[package]]
name = "platformdirs"
-version = "4.7.0"
+version = "4.9.2"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
optional = false
python-versions = ">=3.10"
-groups = ["dev", "lint"]
+groups = ["dev", "docs", "lint"]
files = [
- {file = "platformdirs-4.7.0-py3-none-any.whl", hash = "sha256:1ed8db354e344c5bb6039cd727f096af975194b508e37177719d562b2b540ee6"},
- {file = "platformdirs-4.7.0.tar.gz", hash = "sha256:fd1a5f8599c85d49b9ac7d6e450bc2f1aaf4a23f1fe86d09952fe20ad365cf36"},
+ {file = "platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd"},
+ {file = "platformdirs-4.9.2.tar.gz", hash = "sha256:9a33809944b9db043ad67ca0db94b14bf452cc6aeaac46a88ea55b26e2e9d291"},
]
[[package]]
@@ -2400,7 +2584,7 @@ version = "2.19.2"
description = "Pygments is a syntax highlighting package written in Python."
optional = false
python-versions = ">=3.8"
-groups = ["dev", "test"]
+groups = ["dev", "docs", "test"]
files = [
{file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"},
{file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"},
@@ -2438,6 +2622,25 @@ tomlkit = ">=0.10.1"
spelling = ["pyenchant (>=3.2,<4.0)"]
testutils = ["gitpython (>3)"]
+[[package]]
+name = "pymdown-extensions"
+version = "10.21"
+description = "Extension pack for Python Markdown."
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "pymdown_extensions-10.21-py3-none-any.whl", hash = "sha256:91b879f9f864d49794c2d9534372b10150e6141096c3908a455e45ca72ad9d3f"},
+ {file = "pymdown_extensions-10.21.tar.gz", hash = "sha256:39f4a020f40773f6b2ff31d2cd2546c2c04d0a6498c31d9c688d2be07e1767d5"},
+]
+
+[package.dependencies]
+markdown = ">=3.6"
+pyyaml = "*"
+
+[package.extras]
+extra = ["pygments (>=2.19.1)"]
+
[[package]]
name = "pyspark"
version = "3.4.4"
@@ -2504,7 +2707,7 @@ version = "2.9.0.post0"
description = "Extensions to the standard Python datetime module"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["main", "dev", "test"]
+groups = ["main", "dev", "docs", "test"]
files = [
{file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
{file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -2531,7 +2734,7 @@ version = "6.0.3"
description = "YAML parser and emitter for Python"
optional = false
python-versions = ">=3.8"
-groups = ["dev", "test"]
+groups = ["dev", "docs", "test"]
files = [
{file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"},
{file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"},
@@ -2601,6 +2804,21 @@ files = [
{file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"},
]
+[[package]]
+name = "pyyaml-env-tag"
+version = "1.1"
+description = "A custom YAML tag for referencing environment variables in YAML files."
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04"},
+ {file = "pyyaml_env_tag-1.1.tar.gz", hash = "sha256:2eb38b75a2d21ee0475d6d97ec19c63287a7e140231e4214969d0eac923cd7ff"},
+]
+
+[package.dependencies]
+pyyaml = "*"
+
[[package]]
name = "questionary"
version = "2.1.1"
@@ -2640,14 +2858,14 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "responses"
-version = "0.25.8"
+version = "0.26.0"
description = "A utility library for mocking out the `requests` Python library."
optional = false
python-versions = ">=3.8"
groups = ["dev", "test"]
files = [
- {file = "responses-0.25.8-py3-none-any.whl", hash = "sha256:0c710af92def29c8352ceadff0c3fe340ace27cf5af1bbe46fb71275bcd2831c"},
- {file = "responses-0.25.8.tar.gz", hash = "sha256:9374d047a575c8f781b94454db5cab590b6029505f488d12899ddb10a4af1cf4"},
+ {file = "responses-0.26.0-py3-none-any.whl", hash = "sha256:03ec4409088cd5c66b71ecbbbd27fe2c58ddfad801c66203457b3e6a04868c37"},
+ {file = "responses-0.26.0.tar.gz", hash = "sha256:c7f6923e6343ef3682816ba421c006626777893cb0d5e1434f674b649bac9eb4"},
]
[package.dependencies]
@@ -2682,7 +2900,7 @@ version = "1.17.0"
description = "Python 2 and 3 compatibility utilities"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["main", "dev", "test"]
+groups = ["main", "dev", "docs", "test"]
files = [
{file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
{file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
@@ -2709,7 +2927,7 @@ version = "2.4.0"
description = "A lil' TOML parser"
optional = false
python-versions = ">=3.8"
-groups = ["dev", "lint", "test"]
+groups = ["dev", "docs", "lint", "test"]
markers = "python_version == \"3.10\""
files = [
{file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
@@ -2890,12 +3108,12 @@ version = "4.15.0"
description = "Backported and Experimental Type Hints for Python 3.9+"
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev", "lint", "test"]
+groups = ["main", "dev", "docs", "lint", "test"]
files = [
{file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
{file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
]
-markers = {test = "python_version == \"3.10\""}
+markers = {docs = "python_version == \"3.10\"", test = "python_version == \"3.10\""}
[[package]]
name = "tzdata"
@@ -2929,25 +3147,68 @@ zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""]
[[package]]
name = "virtualenv"
-version = "20.36.1"
+version = "20.38.0"
description = "Virtual Python Environment builder"
optional = false
python-versions = ">=3.8"
groups = ["dev"]
files = [
- {file = "virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f"},
- {file = "virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba"},
+ {file = "virtualenv-20.38.0-py3-none-any.whl", hash = "sha256:d6e78e5889de3a4742df2d3d44e779366325a90cf356f15621fddace82431794"},
+ {file = "virtualenv-20.38.0.tar.gz", hash = "sha256:94f39b1abaea5185bf7ea5a46702b56f1d0c9aa2f41a6c2b8b0af4ddc74c10a7"},
]
[package.dependencies]
distlib = ">=0.3.7,<1"
-filelock = {version = ">=3.20.1,<4", markers = "python_version >= \"3.10\""}
+filelock = {version = ">=3.24.2,<4", markers = "python_version >= \"3.10\""}
platformdirs = ">=3.9.1,<5"
typing-extensions = {version = ">=4.13.2", markers = "python_version < \"3.11\""}
[package.extras]
-docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
-test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""]
+docs = ["furo (>=2023.7.26)", "pre-commit-uv (>=4.1.4)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinx-autodoc-typehints (>=3.6.2)", "sphinx-copybutton (>=0.5.2)", "sphinx-inline-tabs (>=2025.12.21.14)", "sphinxcontrib-mermaid (>=2)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
+test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "pytest-xdist (>=3.5)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""]
+
+[[package]]
+name = "watchdog"
+version = "6.0.0"
+description = "Filesystem events monitoring"
+optional = false
+python-versions = ">=3.9"
+groups = ["docs"]
+files = [
+ {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"},
+ {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"},
+ {file = "watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3"},
+ {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c"},
+ {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2"},
+ {file = "watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c"},
+ {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948"},
+ {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860"},
+ {file = "watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0"},
+ {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c"},
+ {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134"},
+ {file = "watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b"},
+ {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e6f0e77c9417e7cd62af82529b10563db3423625c5fce018430b249bf977f9e8"},
+ {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90c8e78f3b94014f7aaae121e6b909674df5b46ec24d6bebc45c44c56729af2a"},
+ {file = "watchdog-6.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c"},
+ {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881"},
+ {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11"},
+ {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7a0e56874cfbc4b9b05c60c8a1926fedf56324bb08cfbc188969777940aef3aa"},
+ {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6439e374fc012255b4ec786ae3c4bc838cd7309a540e5fe0952d03687d8804e"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c"},
+ {file = "watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2"},
+ {file = "watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a"},
+ {file = "watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680"},
+ {file = "watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f"},
+ {file = "watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282"},
+]
+
+[package.extras]
+watchmedo = ["PyYAML (>=3.10)"]
[[package]]
name = "wcwidth"
@@ -3084,19 +3345,51 @@ files = [
[[package]]
name = "xmltodict"
-version = "1.0.2"
+version = "1.0.4"
description = "Makes working with XML feel like you are working with JSON"
optional = false
python-versions = ">=3.9"
groups = ["dev", "test"]
files = [
- {file = "xmltodict-1.0.2-py3-none-any.whl", hash = "sha256:62d0fddb0dcbc9f642745d8bbf4d81fd17d6dfaec5a15b5c1876300aad92af0d"},
- {file = "xmltodict-1.0.2.tar.gz", hash = "sha256:54306780b7c2175a3967cad1db92f218207e5bc1aba697d887807c0fb68b7649"},
+ {file = "xmltodict-1.0.4-py3-none-any.whl", hash = "sha256:a4a00d300b0e1c59fc2bfccb53d7b2e88c32f200df138a0dd2229f842497026a"},
+ {file = "xmltodict-1.0.4.tar.gz", hash = "sha256:6d94c9f834dd9e44514162799d344d815a3a4faec913717a9ecbfa5be1bb8e61"},
]
[package.extras]
test = ["pytest", "pytest-cov"]
+[[package]]
+name = "zensical"
+version = "0.0.23"
+description = "A modern static site generator built by the creators of Material for MkDocs"
+optional = false
+python-versions = ">=3.10"
+groups = ["docs"]
+files = [
+ {file = "zensical-0.0.23-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35d6d3eb803fe73a67187a1a25443408bd02a8dd50e151f4a4bafd40de3f0928"},
+ {file = "zensical-0.0.23-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:5973267460a190f348f24d445ff0c01e8ed334fd075947687b305e68257f6b18"},
+ {file = "zensical-0.0.23-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:953adf1f0b346a6c65fc6e05e6cc1c38a6440fec29c50c76fb29700cc1927006"},
+ {file = "zensical-0.0.23-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49c1cbd6131dafa056be828e081759184f9b8dd24b99bf38d1e77c8c31b0c720"},
+ {file = "zensical-0.0.23-cp310-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5b7fe22c5d33b2b91899c5df7631ad4ce9cccfabac2560cc92ba73eafe2d297"},
+ {file = "zensical-0.0.23-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a3679d6bf6374f503afb74d9f6061da5de83c25922f618042b63a30b16f0389"},
+ {file = "zensical-0.0.23-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:54d981e21a19c3dcec6e7fa77c4421db47389dfdff20d29fea70df8e1be4062e"},
+ {file = "zensical-0.0.23-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:afde7865cc3c79c99f6df4a911d638fb2c3b472a1b81367d47163f8e3c36f910"},
+ {file = "zensical-0.0.23-cp310-abi3-musllinux_1_2_i686.whl", hash = "sha256:c484674d7b0a3e6d39db83914db932249bccdef2efaf8a5669671c66c16f584d"},
+ {file = "zensical-0.0.23-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:927d12fe2851f355fb3206809e04641d6651bdd2ff4afe9c205721aa3a32aa82"},
+ {file = "zensical-0.0.23-cp310-abi3-win32.whl", hash = "sha256:ffb79db4244324e9cc063d16adff25a40b145153e5e76d75e0012ba3c05af25d"},
+ {file = "zensical-0.0.23-cp310-abi3-win_amd64.whl", hash = "sha256:a8cfe240dca75231e8e525985366d010d09ee73aec0937930e88f7230694ce01"},
+ {file = "zensical-0.0.23.tar.gz", hash = "sha256:5c4fc3aaf075df99d8cf41b9f2566e4d588180d9a89493014d3607dfe50ac4bc"},
+]
+
+[package.dependencies]
+click = ">=8.1.8"
+deepmerge = ">=2.0"
+markdown = ">=3.7"
+pygments = ">=2.16"
+pymdown-extensions = ">=10.15"
+pyyaml = ">=6.0.2"
+tomli = {version = ">=2.0", markers = "python_full_version < \"3.11.0\""}
+
[[package]]
name = "zipp"
version = "3.23.0"
@@ -3120,4 +3413,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.12"
-content-hash = "08ea1eedf25a896fdc21f03d04f4403d47d655fc90eb5eb310ff7cde7e3b7a6d"
+content-hash = "7d4c014f794bf1e5125e697c4eab04f07961e7d77ae680377a6ddc984ba4d33b"
diff --git a/pyproject.toml b/pyproject.toml
index 6036c9e..0b2116f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,6 +78,15 @@ types-setuptools = "68.2.0.0"
types-urllib3 = "1.26.25.14"
types-xmltodict = "0.13.0.3"
+[tool.poetry.group.docs]
+optional = true
+
+[tool.poetry.group.docs.dependencies]
+click = "8.2.1"
+mkdocs = "^1.6.1"
+mkdocstrings = { version = "^1.0.3", extras = ["python"] }
+zensical = "~=0.0.23"
+
[tool.ruff]
line-length = 100
diff --git a/zensical.toml b/zensical.toml
new file mode 100644
index 0000000..f7a3731
--- /dev/null
+++ b/zensical.toml
@@ -0,0 +1,194 @@
+[project]
+site_name = "Data Validation Engine"
+site_description = "Documentation for using the Data Validation Engine (DVE)."
+site_author = "NHS England"
+site_url = "https://nhsdigital.github.io/data-validation-engine/"
+copyright = """
+
+"""
+nav = [
+ {"User Guidance" = [
+ "index.md",
+ {"Installation" = "user_guidance/install.md"},
+ {"Getting Started" = "user_guidance/getting_started.md"},
+ {"Auditing" = "user_guidance/auditing.md"},
+ {"Creating a Dischema" = [
+ {"File Transformation" = "user_guidance/file_transformation.md"},
+ {"Data Contract" = "user_guidance/data_contract.md"},
+ {"Business Rules" = "user_guidance/business_rules.md"},
+ {"Feedback Messages" = "user_guidance/feedback_messages.md"},
+ ]},
+ {"Backend Implementations" = [
+ {"DuckDB" = "user_guidance/implementations/duckdb.md"},
+ {"Spark" = "user_guidance/implementations/spark.md"},
+ {"Platform Specific Implementations" = [
+ {"Databricks" = "user_guidance/implementations/platform_specific/databricks.md"},
+ {"Palantir Foundry" = "user_guidance/implementations/platform_specific/palantir_foundry.md"},
+ ]},
+ ]},
+ ]},
+ {"Advanced User Guidance" = [
+ "advanced_guidance/index.md",
+ {"DVE Package Documentation" = [
+ "advanced_guidance/package_documentation/index.md",
+ {"Pipeline" = "advanced_guidance/package_documentation/pipeline.md"},
+ {"Refdata Loaders" = "advanced_guidance/package_documentation/refdata_loaders.md"},
+ ]},
+ {"DVE Developer Guidance" = [
+ {"Implementing a new backend" = "advanced_guidance/new_backend.md"},
+ {"Dischema Language Server" = "advanced_guidance/json_schemas.md"},
+ ]},
+ ]}
+]
+extra_css = ["assets/stylesheets/extra.css"]
+# extra_javascript = ["assets/javascript/extra.js"]
+repo_url = "https://github.com/NHSDigital/data-validation-engine"
+repo_name = "Data Validation Engine"
+
+# ----------------------------------------------------------------------------
+# Section for configuring theme options
+# ----------------------------------------------------------------------------
+[project.theme]
+variant = "classic"
+custom_dir = "overrides"
+logo = "assets/images/favicon.svg"
+favicon = "assets/images/favicon.ico"
+language = "en"
+features = [
+ "content.action.edit",
+ "content.code.annotate",
+ "content.code.copy",
+ "content.code.select",
+ # "content.footnote.tooltips",
+ "content.tabs.link",
+ # "content.tooltips",
+ # "header.autohide",
+ # "navigation.expand",
+ "navigation.footer",
+ "navigation.indexes",
+ "navigation.instant",
+ "navigation.instant.prefetch",
+ "navigation.instant.preview",
+ "navigation.instant.progress",
+ "navigation.path",
+ #"navigation.prune",
+ "navigation.sections",
+ "navigation.tabs",
+ #"navigation.tabs.sticky",
+ "navigation.top",
+ # "navigation.tracking",
+ "search.highlight",
+ "toc.follow",
+ "toc.integrate",
+]
+
+# ----------------------------------------------------------------------------
+# In the "palette" subsection you can configure options for the color scheme.
+# You can configure different color # schemes, e.g., to turn on dark mode,
+# that the user can switch between. Each color scheme can be further
+# customized.
+#
+# Read more:
+# - https://zensical.org/docs/setup/colors/
+# ----------------------------------------------------------------------------
+[[project.theme.palette]]
+media = "(prefers-color-scheme)"
+toggle.icon = "material/brightness-auto"
+toggle.name = "Switch to light mode"
+
+[[project.theme.palette]]
+media = "(prefers-color-scheme: light)"
+scheme = "default"
+toggle.icon = "material/brightness-7"
+toggle.name = "Switch to dark mode"
+
+[[project.theme.palette]]
+media = "(prefers-color-scheme: dark)"
+scheme = "slate"
+toggle.icon = "material/brightness-4"
+toggle.name = "Switch to system preference"
+
+# ----------------------------------------------------------------------------
+# In the "font" subsection you can configure the fonts used. By default, fonts
+# are loaded from Google Fonts, giving you a wide range of choices from a set
+# of suitably licensed fonts. There are options for a normal text font and for
+# a monospaced font used in code blocks.
+# ----------------------------------------------------------------------------
+[project.theme.font]
+text = "Inter"
+code = "Jetbrains Mono"
+
+# ----------------------------------------------------------------------------
+# The "extra" section contains miscellaneous settings.
+# ----------------------------------------------------------------------------
+
+[project.extra.consent]
+title = "Cookie consent"
+description = """
+ We use cookies to recognize your repeated visits and preferences, as well
+ as to measure the effectiveness of our documentation and whether users
+ find what they're searching for. With your consent, you're helping us to
+ make our documentation better.
+"""
+
+[[project.extra.social]]
+icon = "nhseng"
+link = "https://www.england.nhs.uk/"
+name = "NHS England Website"
+
+[[project.extra.social]]
+icon = "fontawesome/brands/github"
+link = "https://github.com/NHSDigital"
+name = "NHS Digital GitHub"
+
+# ----------------------------------------------------------------------------
+# Markdown Extensions
+# ----------------------------------------------------------------------------
+
+[project.markdown_extensions.abbr]
+[project.markdown_extensions.admonition]
+[project.markdown_extensions.attr_list]
+[project.markdown_extensions.md_in_html]
+[project.markdown_extensions.pymdownx.details]
+
+[project.markdown_extensions.pymdownx.emoji]
+emoji_index = "zensical.extensions.emoji.twemoji"
+emoji_generator = "zensical.extensions.emoji.to_svg"
+options.custom_icons = ["overrides/.icons"]
+
+[project.markdown_extensions.pymdownx.highlight]
+[project.markdown_extensions.pymdownx.inlinehilite]
+
+[project.markdown_extensions.pymdownx.snippets]
+auto_append = ["includes/jargon_and_acronyms.md"]
+
+[project.markdown_extensions.pymdownx.superfences]
+
+[project.markdown_extensions.pymdownx.tabbed]
+alternate_style = true
+
+[project.markdown_extensions.pymdownx.tabbed.slugify]
+object = "pymdownx.slugs.slugify"
+kwds = { case = "lower" }
+
+[project.markdown_extensions.toc]
+permalink = true
+
+[project.markdown_extensions.zensical.extensions.preview]
+
+# ----------------------------------------------------------------------------
+# Plugins
+# ----------------------------------------------------------------------------
+
+[project.plugins.mkdocstrings.handlers.python]
+paths = ["src/dve"]
+inventories = ["https://docs.python.org/3/objects.inv"]
From a02c3bd4e2747857887305284a01ffe0cae8fbcb Mon Sep 17 00:00:00 2001
From: "george.robertson1" <50412379+georgeRobertson@users.noreply.github.com>
Date: Wed, 4 Mar 2026 11:16:53 +0000
Subject: [PATCH 2/5] docs: further wip docs
---
.../package_documentation/readers.md | 87 +++++++++
docs/index.md | 6 +-
docs/user_guidance/auditing.md | 36 +++-
docs/user_guidance/file_transformation.md | 168 +++++++++++++++++-
docs/user_guidance/getting_started.md | 16 +-
docs/user_guidance/install.md | 10 +-
.../implementations/duckdb/readers/csv.py | 1 +
zensical.toml | 1 +
8 files changed, 306 insertions(+), 19 deletions(-)
create mode 100644 docs/advanced_guidance/package_documentation/readers.md
diff --git a/docs/advanced_guidance/package_documentation/readers.md b/docs/advanced_guidance/package_documentation/readers.md
new file mode 100644
index 0000000..6a944d3
--- /dev/null
+++ b/docs/advanced_guidance/package_documentation/readers.md
@@ -0,0 +1,87 @@
+## CSV
+
+=== "Base"
+
+ ::: src.dve.core_engine.backends.readers.csv.CSVFileReader
+ options:
+ heading_level: 3
+ merge_init_into_class: true
+ members: false
+
+=== "DuckDB"
+
+ ::: src.dve.core_engine.backends.implementations.duckdb.readers.csv.DuckDBCSVReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
+
+ ::: src.dve.core_engine.backends.implementations.duckdb.readers.csv.PolarsToDuckDBCSVReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
+
+ ::: src.dve.core_engine.backends.implementations.duckdb.readers.csv.DuckDBCSVRepeatingHeaderReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
+
+=== "Spark"
+
+ ::: src.dve.core_engine.backends.implementations.spark.readers.csv.SparkCSVReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
+
+## JSON
+
+=== "DuckDB"
+
+ ::: src.dve.core_engine.backends.implementations.duckdb.readers.json.DuckDBJSONReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
+
+=== "Spark"
+
+ ::: src.dve.core_engine.backends.implementations.spark.readers.json.SparkJSONReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
+
+## XML
+
+=== "Base"
+
+ ::: src.dve.core_engine.backends.readers.xml.BasicXMLFileReader
+ options:
+ heading_level: 3
+ merge_init_into_class: true
+ members: false
+
+=== "DuckDB"
+
+ ::: src.dve.core_engine.backends.implementations.duckdb.readers.xml.DuckDBXMLStreamReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
+
+=== "Spark"
+
+ ::: src.dve.core_engine.backends.implementations.spark.readers.xml.SparkXMLStreamReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
+
+ ::: src.dve.core_engine.backends.implementations.spark.readers.xml.SparkXMLReader
+ options:
+ heading_level: 3
+ members:
+ - __init__
diff --git a/docs/index.md b/docs/index.md
index c4c60f5..dea5827 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -11,7 +11,7 @@ tags:
# Data Validation Engine
-The Data Validation Engine (DVE) is a configuration driven data validation library written in [Python](https://www.python.org/), [Pydantic](https://docs.pydantic.dev/latest/) and a SQL backend currently consisting of [DuckDB](https://duckdb.org/) or [Spark](https://spark.apache.org/sql/). The configuration to run validations against a dataset are defined and written in a json document, which we will be referring to as the "dischema". The rules written within the dischema are designed to be run against all incoming data in a given submision - as this allows the DVE to capture all possible issues with the data without the submitter having resubmit the same data repeatedly which is burdensome and time consuming for the submitter and receiver of the data. Additionally, the rules can be configured to have the following behaviour:
+The Data Validation Engine (DVE) is a configuration driven data validation library written in [Python](https://www.python.org/), [Pydantic](https://docs.pydantic.dev/latest/) and a SQL backend currently consisting of [DuckDB](https://duckdb.org/) or [Spark](https://spark.apache.org/sql/). The configuration to run validations against a dataset are defined and written in a json document, which we will be referring to as the "dischema". The rules written within the dischema are designed to be run against all incoming data in a given submission - as this allows the DVE to capture all possible issues with the data without the submitter having resubmit the same data repeatedly which is burdensome and time consuming for the submitter and receiver of the data. Additionally, the rules can be configured to have the following behaviour:
- **File Rejection** - The entire submission will be rejected if the given rule triggers one or more times.
- **Row Rejection** - The row that triggered the rule will be rejected. Rows that pass the validation will be flowed through into a validated entity.
@@ -30,9 +30,9 @@ The DVE has 3 core components:
3. [Business rules](user_guidance/business_rules.md) - Performs simple and complex validations such as comparisons between fields, entities and/or lookups against reference data.
-For each component listed above, a [feedback message](user_guidance/feedback_messages.md) is generated whenever a rule is violated. These [feedback messages](user_guidance/feedback_messages.md) can be interegated directly into your system given you can consume `jsonl` files. Alternatively, we offer a fourth component called the [Error Reports](user_guidance/error_reports.md). This component will load the [feedback messages](user_guidance/feedback_messages.md) into an `.xlsx` (Excel) file which could be sent back to the submitter of the data. The excel file is compatiable with services that offer spreadsheet reading such as [Microsoft Excel](https://www.microsoft.com/en/microsoft-365/excel), [Google Docs](https://docs.google.com/), [Libre Office Calc](https://www.libreoffice.org/discover/calc/) etc.
+For each component listed above, a [feedback message](user_guidance/feedback_messages.md) is generated whenever a rule is violated. These [feedback messages](user_guidance/feedback_messages.md) can be integrated directly into your system given you can consume `JSONL` files. Alternatively, we offer a fourth component called the [Error Reports](user_guidance/error_reports.md). This component will load the [feedback messages](user_guidance/feedback_messages.md) into an `.xlsx` (Excel) file which could be sent back to the submitter of the data. The excel file is compatible with services that offer spreadsheet reading such as [Microsoft Excel](https://www.microsoft.com/en/microsoft-365/excel), [Google Docs](https://docs.google.com/), [Libre Office Calc](https://www.libreoffice.org/discover/calc/) etc.
-To be able to run the DVE out of the box, you can have look at the Backend Implementations sections with [DuckDB](user_guidance/implementations/duckdb.md) or [Spark](user_guidance/implementations/spark.md). If you to need a write a custom backend implementation, you may want to look at the [Advanced User Guidance](advanced_guidance/backends.md) section.
+To be able to run the DVE out of the box, you will need to choose and install one of the supported Backend Implementations such as [DuckDB](user_guidance/implementations/duckdb.md) or [Spark](user_guidance/implementations/spark.md). If you to need a write a custom backend implementation, you may want to look at the [Advanced User Guidance](advanced_guidance/backends.md) section.
Feel free to use the Table of Contents on the left hand side of the page to navigate to sections of interest or to use the "Next" and "Previous" buttons at the bottom of each page if you want to read through each page in sequential order.
diff --git a/docs/user_guidance/auditing.md b/docs/user_guidance/auditing.md
index 1c63d00..2ae9b54 100644
--- a/docs/user_guidance/auditing.md
+++ b/docs/user_guidance/auditing.md
@@ -1,2 +1,34 @@
-!!! note
- This section has not yet been written. Coming soon.
+---
+tags:
+ - Auditing
+---
+
+The Auditing objects within the DVE are used to help control and store information about a given submission and what stage it's currently at. In addition to the above, it's also used to store statistics about the submission and the number of validations it has triggered etc. So, for users not interested in using the Error reports stage, you could source information directly from the audit tables.
+
+## Audit Tables
+Currently, these are the audit tables that can be accessed within the DVE:
+
+| Table Name | Purpose |
+| --------------------- | ------- |
+| processing_status | Contains information about the submission and what the current processing status is. |
+| submission_info | Contains information about the submitted file. |
+| submission_statistics | Contains validation statistics for each submission. |
+
+## Audit Objects
+
+You can use the the following methods to help you interact with the tables above or you can query the table via `sql`.
+
+
+
+::: src.dve.core_engine.backends.base.auditing.BaseAuditingManager
+ options:
+ heading_level: 3
+ members:
+ - get_submission_info
+ - get_submission_statistics
+ - get_submission_status
+ - get_all_file_transformation_submissions
+ - get_all_data_contract_submissions
+ - get_all_business_rule_submissions
+ - get_all_error_report_submissions
+ - get_current_processing_info
diff --git a/docs/user_guidance/file_transformation.md b/docs/user_guidance/file_transformation.md
index 1c63d00..89013ef 100644
--- a/docs/user_guidance/file_transformation.md
+++ b/docs/user_guidance/file_transformation.md
@@ -1,2 +1,166 @@
-!!! note
- This section has not yet been written. Coming soon.
+---
+title: File Transformation
+tags:
+ - Contract
+ - Data Contract
+ - File Transformation
+ - Readers
+---
+
+The File Transformation stage within the DVE is used to convert submitted files to stringified parquet format. This is critical as the rest of the stages within the DVE are reliant on the data being in parquet format. [Parquet was choosen as it's a very efficient column oriented format](https://www.databricks.com/glossary/what-is-parquet). When specifying which formats you are expecting, you will define it in your dischema like this:
+
+=== "DuckDB"
+
+ ```json
+ {
+ "contract": {
+ "datasets": {
+ "
z9g4MlYCte~Bnmz_NA?f}x~0}$QwyhSyA>JFL6bnWfMPY>4{=3iOdZ<(Nlc!{F7CT?
z2#%yuYbSB}eX%KtS{IMeN?e&vU#2X_oJ
*HExqu
zKwH^i4ojUHuM-FlEmOOpx1fz^pdVTPn^c
J@9kZdWX%bcgu_;8n=Umh?M9~-
zO2Cr7t5TU*P9Dc}W#q~`R&B=G>{JbfE2Rf{C8ecjx5jM&+Uq^FWi;bnm{YB-MG!
z&~To6t9{U4CVe$oDTmeg8iYVbOXDnhY~*q8pAk;G$oDL>)Bw3E;M-HIVQ=U4)$e1H
zozw|Ymm)(p0j*zOXwx#-e$O`q(0N{d*w>g YEI{;>@H9*__y~C13R+RD1gtcjeywJMlnnyyXrzZnq$u+U$DW&
zUprxF+!OV1!(%V;`LRG|C^R|7M&%6~ud
L7w-H5RU{RuHC{*Jy$n;3y$U|BDsKbB(Mo9w(AB-
zV4xxS5(yg;C;5|iF7yfvd!%8tC?RM_H3cPLZX3NdXXK)3JH23&{S|G0Yc_^p<4D>U
z-dWfwuXiLbE86TO!n5S~Y41dTy_?1I#fz*u33n-7c!v`KTfdbm|88#Myw+&p>c;(N
zO1WuRHd})a`VaY2gIJO%#lR%J(Ef!~)u12*k;fZ|Mb7ZK#U78Gx7tVaj3E|ll{;o8
zyc38@W5^|Sxn6IW?+mj2X86!z8&^$xo@}VxE1!1b?DXVwfj@<-MAtRfP|e~vc}_ue
zZ?8E9c0K+$PBh!|JsgFde(Eq_zlf#z5!8;cX+8NTXZgLmC}9tEa`7Q~rwtAuC4sZvq^#bAwo$Up1H)Ss=?(8PVN1
z>zP?7nEbXrbk)>!R1P(2B|MzJe610yRAl*3>&4fqa^8m#!>^j?_?V_3*hAQahO(r3
z_>#D+l|85Eaf473u=n%g@wgcTQ**qe5g5t%h)JG&c8Rw1LL3@2jbbmy?MS6Xgd)LA
zU_DjWaBI<+Q+A{yy+@KMY=oFJSsO7FtOO}~