diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
deleted file mode 100644
index 241085f59..000000000
--- a/.github/workflows/docs.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: docs
-
-on:
- push:
- branches:
- - main
-
-jobs:
- test:
- runs-on: ubuntu-latest
-
- strategy:
- matrix:
- python-version: [3.8]
- fail-fast: false
-
- steps:
- - uses: actions/checkout@v3
-
- - name: set up python
- uses: actions/setup-python@v4
- with:
- cache: "pip"
- cache-dependency-path: "pyproject.toml"
- python-version: ${{ matrix.python-version }}
-
- - name: install invoke
- run: pip install invoke
-
- - name: install dependencies
- run: inv install
-
- - name: docs
- run: inv docs-build
-
- - name: Pushes to another repository
- uses: cpina/github-action-push-to-another-repository@main
- env:
- API_TOKEN_GITHUB: ${{ secrets.GH_TOKEN }}
- with:
- source-directory: "docs/autodocs/_build/markdown"
- target-directory: "api/python-sdk/autodocs"
- destination-github-username: "rungalileo"
- destination-repository-name: "docs"
- user-email: team@rungalileo.io
diff --git a/.github/workflows/publish-docs.yaml b/.github/workflows/publish-docs.yaml
new file mode 100644
index 000000000..f70a63004
--- /dev/null
+++ b/.github/workflows/publish-docs.yaml
@@ -0,0 +1,48 @@
+name: Publish Docs
+
+on:
+ workflow_dispatch:
+ workflow_run:
+ workflows: ["publish"]
+ types:
+ - completed
+
+jobs:
+ publish-docs:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ cache: "pip"
+ cache-dependency-path: "pyproject.toml"
+
+ - name: Install Dependencies
+ # Install all dependencies so that the docs can use the function and type signatures.
+ run: |
+ pipx install invoke
+ invoke install
+
+ - name: Build Docs
+ run: invoke docs-build
+
+ - name: Add GitHub Pages Config
+ run: |
+ touch docs/build/html/.nojekyll
+ echo "dataquality.docs.rungalileo.io" > docs/build/html/CNAME
+
+ - name: Pushes to another repository
+ uses: cpina/github-action-push-to-another-repository@main
+ env:
+ SSH_DEPLOY_KEY: ${{ secrets.SSH_DATAQUALITY_DOCS_DEPLOY_KEY }}
+ with:
+ source-directory: "docs/build/html"
+ target-directory: "docs/"
+ destination-github-username: "rungalileo"
+ destination-repository-name: "dataquality-docs"
+ user-name: galileo-automation
+ user-email: ci@rungalileo.io
diff --git a/.gitignore b/.gitignore
index cc6940c6a..9701632bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,8 +76,8 @@ instance/
.scrapy
# Sphinx documentation
-docs/_build/html
-docs/_build/doctrees
+docs/build/html
+docs/build/md
# PyBuilder
.pybuilder/
@@ -188,4 +188,4 @@ large_run.ipynb
*.cache
.python-version
-local_notebooks
\ No newline at end of file
+local_notebooks
diff --git a/docs/autodocs/Makefile b/docs/autodocs/Makefile
deleted file mode 100644
index d4bb2cbb9..000000000
--- a/docs/autodocs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS ?=
-SPHINXBUILD ?= sphinx-build
-SOURCEDIR = .
-BUILDDIR = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
- @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/autodocs/Readme.md b/docs/autodocs/Readme.md
deleted file mode 100644
index 21fd4a9f1..000000000
--- a/docs/autodocs/Readme.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Sphinx automatic docs
-
-Modify the `docs/autodocs/api/dataquality.rst` file to add a new documentation to the reference
\ No newline at end of file
diff --git a/docs/autodocs/_build/doctrees/api/dataquality.doctree b/docs/autodocs/_build/doctrees/api/dataquality.doctree
deleted file mode 100644
index 840f80eb2..000000000
Binary files a/docs/autodocs/_build/doctrees/api/dataquality.doctree and /dev/null differ
diff --git a/docs/autodocs/_build/doctrees/environment.pickle b/docs/autodocs/_build/doctrees/environment.pickle
deleted file mode 100644
index d10869d9a..000000000
Binary files a/docs/autodocs/_build/doctrees/environment.pickle and /dev/null differ
diff --git a/docs/autodocs/_build/doctrees/index.doctree b/docs/autodocs/_build/doctrees/index.doctree
deleted file mode 100644
index aa40e0578..000000000
Binary files a/docs/autodocs/_build/doctrees/index.doctree and /dev/null differ
diff --git a/docs/autodocs/_build/markdown/api/dataquality.md b/docs/autodocs/_build/markdown/api/dataquality.md
deleted file mode 100644
index d5ba6a9f5..000000000
--- a/docs/autodocs/_build/markdown/api/dataquality.md
+++ /dev/null
@@ -1,1274 +0,0 @@
-# dataquality
-
-dataquality
-
-
-### login()
-Log into your Galileo environment.
-
-The function will prompt your for an Authorization Token (api key) that you can
-access from the console.
-
-To skip the prompt for automated workflows, you can set GALILEO_USERNAME
-(your email) and GALILEO_PASSWORD if you signed up with an email and password
-
-
-* **Return type**
-
- `None`
-
-
-
-### init(task_type, project_name=None, run_name=None, is_public=True, overwrite_local=True)
-Start a run
-
-Initialize a new run and new project, initialize a new run in an existing project,
-or reinitialize an existing run in an existing project.
-
-Before creating the project, check:
-- The user is valid, login if not
-- The DQ client version is compatible with API version
-
-Optionally provide project and run names to create a new project/run or restart
-existing ones.
-
-
-* **Return type**
-
- `None`
-
-
-
-* **Parameters**
-
- **task_type** (`str`) -- The task type for modeling. This must be one of the valid
-
-
-dataquality.schemas.task_type.TaskType options
-:type project_name: `Optional`[`str`]
-:param project_name: The project name. If not passed in, a random one will be
-generated. If provided, and the project does not exist, it will be created. If it
-does exist, it will be set.
-:type run_name: `Optional`[`str`]
-:param run_name: The run name. If not passed in, a random one will be
-generated. If provided, and the project does not exist, it will be created. If it
-does exist, it will be set.
-:type is_public: `bool`
-:param is_public: Boolean value that sets the project's visibility. Default True.
-:type overwrite_local: `bool`
-:param overwrite_local: If True, the current project/run log directory will be
-cleared during this function. If logging over many sessions with checkpoints, you
-may want to set this to False. Default True
-
-
-### log_model_outputs(\*, embs, ids, split=None, epoch=None, logits=None, probs=None, inference_name=None, exclude_embs=False)
-Logs model outputs for model during training/test/validation.
-
-
-* **Parameters**
-
-
- * **embs** (`Union`[`List`, `ndarray`, `None`]) -- The embeddings per output sample
-
-
- * **ids** (`Union`[`List`, `ndarray`]) -- The ids for each sample. Must match input ids of logged samples
-
-
- * **split** (`Optional`[`Split`]) -- The current split. Must be set either here or via dq.set_split
-
-
- * **epoch** (`Optional`[`int`]) -- The current epoch. Must be set either here or via dq.set_epoch
-
-
- * **logits** (`Union`[`List`, `ndarray`, `None`]) -- The logits for each sample
-
-
- * **probs** (`Union`[`List`, `ndarray`, `None`]) -- Deprecated, use logits. If passed in, a softmax will NOT be applied
-
-
- * **inference_name** (`Optional`[`str`]) -- Inference name indicator for this inference split.
- If logging for an inference split, this is required.
-
-
- * **exclude_embs** (`bool`) -- Optional flag to exclude embeddings from logging. If True and
- embs is set to None, this will generate random embs for each sample.
-
-
-
-* **Return type**
-
- `None`
-
-
-The expected argument shapes come from the task_type being used
-See dq.docs() for more task specific details on parameter shape
-
-
-### finish(last_epoch=None, wait=True, create_data_embs=False)
-Finishes the current run and invokes a job
-
-
-* **Parameters**
-
-
- * **last_epoch** (`Optional`[`int`]) -- If set, only epochs up to this value will be uploaded/processed
- This is inclusive, so setting last_epoch to 5 would upload epochs 0,1,2,3,4,5
-
-
- * **wait** (`bool`) -- If true, after uploading the data, this will wait for the
- run to be processed by the Galileo server. If false, you can manually wait
- for the run by calling dq.wait_for_run() Default True
-
-
- * **create_data_embs** (`bool`) -- If True, an off-the-shelf transformer will run on the raw
- text input to generate data-level embeddings. These will be available in the
- data view tab of the Galileo console. You can also access these embeddings
- via dq.metrics.get_data_embeddings()
-
-
-
-* **Return type**
-
- `str`
-
-
-
-### set_labels_for_run(labels)
-Creates the mapping of the labels for the model to their respective indexes.
-:rtype: `None`
-
-
-* **Parameters**
-
- **labels** (`Union`[`List`[`List`[`str`]], `List`[`str`]]) -- An ordered list of labels (ie ['dog','cat','fish']
-
-
-If this is a multi-label type, then labels are a list of lists where each inner
-list indicates the label for the given task
-
-This order MUST match the order of probabilities that the model outputs.
-
-In the multi-label case, the outer order (order of the tasks) must match the
-task-order of the task-probabilities logged as well.
-
-
-### set_tasks_for_run(tasks, binary=True)
-Sets the task names for the run (multi-label case only).
-
-This order MUST match the order of the labels list provided in log_input_data
-and the order of the probability vectors provided in log_model_outputs.
-
-This also must match the order of the labels logged in set_labels_for_run (meaning
-that the first list of labels must be the labels of the first task passed in here)
-
-
-* **Return type**
-
- `None`
-
-
-
-* **Parameters**
-
-
- * **tasks** (`List`[`str`]) -- The list of tasks for your run
-
-
- * **binary** (`bool`) -- Whether this is a binary multi label run. If true, tasks will also
-
-
-be set as your labels, and you should NOT call dq.set_labels_for_run it will be
-handled for you. Default True
-
-
-### set_epoch(epoch)
-Set the current epoch.
-
-When set, logging model outputs will use this if not logged explicitly
-
-
-* **Return type**
-
- `None`
-
-
-
-### set_split(split, inference_name=None)
-Set the current split.
-
-When set, logging data inputs/model outputs will use this if not logged explicitly
-When setting split to inference, inference_name must be included
-
-
-* **Return type**
-
- `None`
-
-
-
-### log_data_sample(\*, text, id, \*\*kwargs)
-Log a single input example to disk
-
-Fields are expected singular elements. Field names are in the singular of
-log_input_samples (texts -> text)
-The expected arguments come from the task_type being used: See dq.docs() for details
-
-
-* **Parameters**
-
-
- * **text** (`str`) -- List[str] the input samples to your model
-
-
- * **id** (`int`) -- List[int | str] the ids per sample
-
-
- * **split** -- Optional[str] the split for this data. Can also be set via
- dq.set_split
-
-
- * **kwargs** (`Any`) -- See dq.docs() for details on other task specific parameters
-
-
-
-* **Return type**
-
- `None`
-
-
-
-### log_dataset(dataset, \*, batch_size=100000, text='text', id='id', split=None, meta=None, \*\*kwargs)
-Log an iterable or other dataset to disk. Useful for logging memory mapped files
-
-Dataset provided must be an iterable that can be traversed row by row, and for each
-row, the fields can be indexed into either via string keys or int indexes. Pandas
-and Vaex dataframes are also allowed, as well as HuggingFace Datasets
-
-valid examples:
-
- d = [
-
- {"my_text": "sample1", "my_labels": "A", "my_id": 1, "sample_quality": 5.3},
- {"my_text": "sample2", "my_labels": "A", "my_id": 2, "sample_quality": 9.1},
- {"my_text": "sample3", "my_labels": "B", "my_id": 3, "sample_quality": 2.7},
-
- ]
- dq.log_dataset(
-
- > d, text="my_text", id="my_id", label="my_labels", meta=["sample_quality"]
-
- )
-
- Logging a pandas dataframe, df:
-
- text label id sample_quality
-
- 0 sample1 A 1 5.3
- 1 sample2 A 2 9.1
- 2 sample3 B 3 2.7
- # We don't need to set text id or label because it matches the default
- dq.log_dataset(d, meta=["sample_quality"])
-
- Logging and iterable of tuples:
- d = [
-
- > ("sample1", "A", "ID1"),
- > ("sample2", "A", "ID2"),
- > ("sample3", "B", "ID3"),
-
- ]
- dq.log_dataset(d, text=0, id=2, label=1)
-
-Invalid example:
-
- d = {
-
- "my_text": ["sample1", "sample2", "sample3"],
- "my_labels": ["A", "A", "B"],
- "my_id": [1, 2, 3],
- "sample_quality": [5.3, 9.1, 2.7]
-
- }
-
-In the invalid case, use dq.log_data_samples:
-
- meta = {"sample_quality": d["sample_quality"]}
- dq.log_data_samples(
-
- > texts=d["my_text"], labels=d["my_labels"], ids=d["my_ids"], meta=meta
-
- )
-
-Keyword arguments are specific to the task type. See dq.docs() for details
-
-
-* **Parameters**
-
-
- * **dataset** (`TypeVar`(`DataSet`, bound= `Union`[`Iterable`, `DataFrame`, `DataFrame`])) -- The iterable or dataframe to log
-
-
- * **text** (`Union`[`str`, `int`]) -- str | int The column, key, or int index for text data. Default "text"
-
-
- * **id** (`Union`[`str`, `int`]) -- str | int The column, key, or int index for id data. Default "id"
-
-
- * **split** (`Optional`[`Split`]) -- Optional[str] the split for this data. Can also be set via
- dq.set_split
-
-
- * **meta** (`Optional`[`List`[`Union`[`str`, `int`]]]) -- List[str | int] Additional keys/columns to your input data to be
- logged as metadata. Consider a pandas dataframe, this would be the list of
-
-
- * **kwargs** (`Any`) -- See help(dq.get_data_logger().log_dataset) for more details here
-
-
-
-* **Batch_size**
-
- The number of data samples to log at a time. Useful when logging a
- memory mapped dataset. A larger batch_size will result in faster logging at the
- expense of more memory usage. Default 100,000
-
-
-
-* **Return type**
-
- `None`
- columns corresponding to each metadata field to log
-
-
-or dq.docs() for more general task details
-
-
-### auto(hf_data=None, hf_inference_names=None, train_data=None, val_data=None, test_data=None, inference_data=None, max_padding_length=200, hf_model='distilbert-base-uncased', labels=None, project_name=None, run_name=None, wait=True, create_data_embs=False)
-Automatically gets insights on a text classification or NER dataset
-
-Given either a pandas dataframe, file_path, or huggingface dataset path, this
-function will load the data, train a huggingface transformer model, and
-provide Galileo insights via a link to the Galileo Console
-
-One of hf_data, train_data should be provided. If neither of those are, a
-demo dataset will be loaded by Galileo for training.
-
-
-* **Parameters**
-
-
- * **hf_data** (`Union`[`DatasetDict`, `str`, `None`]) -- Union[DatasetDict, str] Use this param if you have huggingface
- data in the hub or in memory. Otherwise see train_data, val_data,
- and test_data. If provided, train_data, val_data, and test_data are ignored.
-
-
- * **hf_inference_names** (`Optional`[`List`[`str`]]) -- Use this param alongside hf_data if you have splits
- you'd like to consider as inference. A list of key names in hf_data
- to be run as inference runs after training. Any keys set must exist in hf_data
-
-
- * **train_data** (`Union`[`DataFrame`, `Dataset`, `str`, `None`]) -- Optional training data to use. Can be one of
- \* Pandas dataframe
- \* Huggingface dataset
- \* Path to a local file
- \* Huggingface dataset hub path
-
-
- * **val_data** (`Union`[`DataFrame`, `Dataset`, `str`, `None`]) -- Optional validation data to use. The validation data is what is
- used for the evaluation dataset in huggingface, and what is used for early
- stopping. If not provided, but test_data is, that will be used as the evaluation
- set. If neither val nor test are available, the train data will be randomly
- split 80/20 for use as evaluation data.
- Can be one of
- \* Pandas dataframe
- \* Huggingface dataset
- \* Path to a local file
- \* Huggingface dataset hub path
-
-
- * **test_data** (`Union`[`DataFrame`, `Dataset`, `str`, `None`]) -- Optional test data to use. The test data, if provided with val,
- will be used after training is complete, as the held-out set. If no validation
- data is provided, this will instead be used as the evaluation set.
- Can be one of
- \* Pandas dataframe
- \* Huggingface dataset
- \* Path to a local file
- \* Huggingface dataset hub path
-
-
- * **inference_data** (`Optional`[`Dict`[`str`, `Union`[`DataFrame`, `Dataset`, `str`]]]) -- User this param to include inference data alongside the
- train_data param. If you are passing data via the hf_data parameter, you
- should use the hf_inference_names param. Optional inference datasets to run
- with after training completes. The structure is a dictionary with the
- key being the inference name and the value one of
- \* Pandas dataframe
- \* Huggingface dataset
- \* Path to a local file
- \* Huggingface dataset hub path
-
-
- * **max_padding_length** (`int`) -- The max length for padding the input text
- during tokenization. Default 200
-
-
- * **hf_model** (`str`) -- The pretrained AutoModel from huggingface that will be used to
- tokenize and train on the provided data. Default distilbert-base-uncased
-
-
- * **labels** (`Optional`[`List`[`str`]]) -- Optional list of labels for this dataset. If not provided, they
- will attempt to be extracted from the data
-
-
- * **project_name** (`Optional`[`str`]) -- Optional project name. If not set, a random name will
- be generated
-
-
- * **run_name** (`Optional`[`str`]) -- Optional run name for this data. If not set, a random name will
- be generated
-
-
- * **wait** (`bool`) -- Whether to wait for Galileo to complete processing your run.
- Default True
-
-
- * **create_data_embs** (`bool`) -- Whether to create data embeddings for this run. If True,
- Sentence-Transformers will be used to generate data embeddings for this dataset
- and uploaded with this run. You can access these embeddings via
- dq.metrics.get_data_embeddings in the emb column or
- dq.metrics.get_dataframe(..., include_data_embs=True) in the data_emb col
- Only available for TC currently. NER coming soon. Default False.
-
-
-
-* **Return type**
-
- `None`
-
-
-For text classification datasets, the only required columns are text and label
-
-For NER, the required format is the huggingface standard format of tokens and
-tags (or ner_tags).
-See example: [https://huggingface.co/datasets/rungalileo/mit_movies](https://huggingface.co/datasets/rungalileo/mit_movies)
-
-> MIT Movies dataset in huggingface format
-
-```python
-tokens ner_tags
-[what, is, a, good, action, movie, that, is, r... [0, 0, 0, 0, 7, 0, ...
-[show, me, political, drama, movies, with, jef... [0, 0, 7, 8, 0, 0, ...
-[what, are, some, good, 1980, s, g, rated, mys... [0, 0, 0, 0, 5, 6, ...
-[list, a, crime, film, which, director, was, d... [0, 0, 7, 0, 0, 0, ...
-[is, there, a, thriller, movie, starring, al, ... [0, 0, 0, 7, 0, 0, ...
-... ... ...
-```
-
-To see auto insights on a random, pre-selected dataset, simply run
-
-```python
-import dataquality as dq
-
-dq.auto()
-```
-
-An example using auto with a hosted huggingface text classification dataset
-
-```python
-import dataquality as dq
-
-dq.auto(hf_data="rungalileo/trec6")
-```
-
-Similarly, for NER
-
-```python
-import dataquality as dq
-
-dq.auto(hf_data="conll2003")
-```
-
-An example using auto with sklearn data as pandas dataframes
-
-```python
-import dataquality as dq
-import pandas as pd
-from sklearn.datasets import fetch_20newsgroups
-
-# Load the newsgroups dataset from sklearn
-newsgroups_train = fetch_20newsgroups(subset='train')
-newsgroups_test = fetch_20newsgroups(subset='test')
-# Convert to pandas dataframes
-df_train = pd.DataFrame(
- {"text": newsgroups_train.data, "label": newsgroups_train.target}
-)
-df_test = pd.DataFrame(
- {"text": newsgroups_test.data, "label": newsgroups_test.target}
-)
-
-dq.auto(
- train_data=df_train,
- test_data=df_test,
- labels=newsgroups_train.target_names,
- project_name="newsgroups_work",
- run_name="run_1_raw_data"
-)
-```
-
-An example of using auto with a local CSV file with text and label columns
-
-```python
-import dataquality as dq
-
-dq.auto(
- train_data="train.csv",
- test_data="test.csv",
- project_name="data_from_local",
- run_name="run_1_raw_data"
-)
-```
-
-# dataquality.integrations.torch
-
-
-### watch(model, dataloaders=[], classifier_layer=None, embedding_dim=None, logits_dim=None, embedding_fn=None, logits_fn=None, last_hidden_state_layer=None, unpatch_on_start=False)
-wraps a PyTorch model and optionally dataloaders to log the
-embeddings and logits to [Galileo]([https://www.rungalileo.io/](https://www.rungalileo.io/)).
-
-```python
-dq.log_dataset(train_dataset, split="train")
-train_dataloader = torch.utils.data.DataLoader()
-model = TextClassificationModel(num_labels=len(train_dataset.list_of_labels))
-watch(model, [train_dataloader, test_dataloader])
-for epoch in range(NUM_EPOCHS):
- dq.set_epoch_and_split(epoch,"training")
- train()
- dq.set_split("validation")
- validate()
-dq.finish()
-```
-
-
-* **Parameters**
-
-
- * **model** (`Module`) -- Pytorch Model to be wrapped
-
-
- * **dataloaders** (`Optional`[`List`[`DataLoader`]]) -- List of dataloaders to be wrapped
-
-
- * **classifier_layer** (`Union`[`Module`, `str`, `None`]) -- Layer to hook into (usually 'classifier' or 'fc').
- Inputs are the embeddings and outputs are the logits.
-
-
- * **embedding_dim** (`Union`[`str`, `int`, `slice`, `Tensor`, `List`, `Tuple`, `None`]) -- Dimension of the embeddings for example "[:, 0]"
- to remove the cls token
-
-
- * **logits_dim** (`Union`[`str`, `int`, `slice`, `Tensor`, `List`, `Tuple`, `None`]) -- Dimension to extract the logits for example in NER
- "[:,1:,:]"
-
-
- * **logits_dim** -- Dimension of the logits
- from layer input and logits from layer output. If the layer is not found,
- the last_hidden_state_layer will be used
-
-
- * **embedding_fn** (`Optional`[`Callable`]) -- Function to process embeddings from the model
-
-
- * **logits_fn** (`Optional`[`Callable`]) -- Function to process logits from the model f.e.
- lambda x: x[0]
-
-
- * **last_hidden_state_layer** (`Union`[`Module`, `str`, `None`]) -- Layer to extract the embeddings from
-
-
- * **unpatch_on_start** (`bool`) -- Force unpatching of dataloaders
- instead of global patching
-
-
- * **model** -- Pytorch Model to be wrapped
-
-
- * **dataloaders** -- List of dataloaders to be wrapped
-
-
- * **last_hidden_state_layer** -- Layer to extract the embeddings from
-
-
- * **embedding_dim** -- Dimension of the embeddings for example "[:, 0]"
-
-
-
-* **Return type**
-
- `None`
-
-
-to remove the cls token
-:param logits_dim: Dimension to extract the logits for example in NER
-
-> "[:,1:,:]"
-
-
-### unwatch(model=None, force=True)
-Unwatches the model. Run after the run is finished.
-:type force: `bool`
-:param force: Force unwatch even if the model is not watched
-
-
-* **Return type**
-
- `None`
-
-
-# dataquality.integrations.transformers_trainer
-
-
-### watch(trainer, last_hidden_state_layer=None, embedding_dim=None, logits_dim=None, classifier_layer=None, embedding_fn=None, logits_fn=None)
-used to *hook* into to the **trainer**
-to log to [Galileo]([https://www.rungalileo.io/](https://www.rungalileo.io/))
-
-
-* **Parameters**
-
- **trainer** (`Trainer`) -- Trainer object
-
-
-
-* **Return type**
-
- `None`
-
-
-
-### unwatch(trainer)
-unwatch is used to remove the callback from the trainer
-:type trainer: `Trainer`
-:param trainer: Trainer object
-
-
-* **Return type**
-
- `None`
-
-
-# dataquality.integrations.keras
-
-
-### _class_ DataQualityCallback()
-Bases: `Callback`
-
-
-#### on_epoch_begin(epoch, logs)
-Called at the start of an epoch.
-
-Subclasses should override for any actions to run. This function should only
-be called during TRAIN mode.
-
-
-* **Parameters**
-
-
- * **epoch** (`int`) -- Integer, index of epoch.
-
-
- * **logs** (`Dict`) -- Dict. Currently no data is passed to this argument for this method
- but that may change in the future.
-
-
-
-* **Return type**
-
- `None`
-
-
-
-#### on_test_batch_begin(batch, logs=None)
-Called at the beginning of a batch in evaluate methods.
-
-Also called at the beginning of a validation batch in the fit
-methods, if validation data is provided.
-
-Subclasses should override for any actions to run.
-
-Note that if the steps_per_execution argument to compile in
-tf.keras.Model is set to N, this method will only be called every N
-batches.
-
-
-* **Parameters**
-
-
- * **batch** (`Any`) -- Integer, index of batch within the current epoch.
-
-
- * **logs** (`Optional`[`Dict`]) -- Dict. Currently no data is passed to this argument for this method
- but that may change in the future.
-
-
-
-* **Return type**
-
- `None`
-
-
-
-#### on_test_batch_end(batch, logs=None)
-Called at the end of a batch in evaluate methods.
-
-Also called at the end of a validation batch in the fit
-methods, if validation data is provided.
-
-Subclasses should override for any actions to run.
-
-Note that if the steps_per_execution argument to compile in
-tf.keras.Model is set to N, this method will only be called every N
-batches.
-
-
-* **Parameters**
-
-
- * **batch** (`Any`) -- Integer, index of batch within the current epoch.
-
-
- * **logs** (`Optional`[`Dict`]) -- Dict. Aggregated metric results up until this batch.
-
-
-
-* **Return type**
-
- `None`
-
-
-
-#### on_train_batch_begin(batch, logs=None)
-Called at the beginning of a training batch in fit methods.
-
-Subclasses should override for any actions to run.
-
-Note that if the steps_per_execution argument to compile in
-tf.keras.Model is set to N, this method will only be called every N
-batches.
-
-
-* **Parameters**
-
-
- * **batch** (`Any`) -- Integer, index of batch within the current epoch.
-
-
- * **logs** (`Optional`[`Dict`]) -- Dict. Currently no data is passed to this argument for this method
- but that may change in the future.
-
-
-
-* **Return type**
-
- `None`
-
-
-
-#### on_train_batch_end(batch, logs=None)
-Called at the end of a training batch in fit methods.
-
-Subclasses should override for any actions to run.
-
-Note that if the steps_per_execution argument to compile in
-tf.keras.Model is set to N, this method will only be called every N
-batches.
-
-
-* **Parameters**
-
-
- * **batch** (`Any`) -- Integer, index of batch within the current epoch.
-
-
- * **logs** (`Optional`[`Dict`]) -- Dict. Aggregated metric results up until this batch.
-
-
-
-* **Return type**
-
- `None`
-
-
-
-### _class_ DataQualityLoggingLayer(what_to_log)
-Bases: `Layer`
-
-
-#### call(inputs)
-This is where the layer's logic lives.
-
-The call() method may not create state (except in its first invocation,
-wrapping the creation of variables or other resources in tf.init_scope()).
-It is recommended to create state in __init__(), or the build() method
-that is called automatically before call() executes the first time.
-
-
-* **Parameters**
-
-
- * **inputs** (`Tensor`) -- Input tensor, or dict/list/tuple of input tensors.
- The first positional inputs argument is subject to special rules:
- - inputs must be explicitly passed. A layer cannot have zero
-
- > arguments, and inputs cannot be provided via the default value
- > of a keyword argument.
-
-
- * NumPy array or Python scalar values in inputs get cast as tensors.
-
-
- * Keras mask metadata is only collected from inputs.
-
-
- * Layers are built (build(input_shape) method)
- using shape info from inputs only.
-
-
- * input_spec compatibility is only checked against inputs.
-
-
- * Mixed precision input casting is only applied to inputs.
- If a layer has tensor arguments in \*args or \*\*kwargs, their
- casting behavior in mixed precision should be handled manually.
-
-
- * The SavedModel input specification is generated using inputs only.
-
-
- * Integration with various ecosystem packages like TFMOT, TFLite,
- TF.js, etc is only supported for inputs and not for tensors in
- positional and keyword arguments.
-
-
-
- * **\*args** -- Additional positional arguments. May contain tensors, although
- this is not recommended, for the reasons above.
-
-
- * **\*\*kwargs** -- Additional keyword arguments. May contain tensors, although
- this is not recommended, for the reasons above.
- The following optional keyword arguments are reserved:
- - training: Boolean scalar tensor of Python boolean indicating
-
- > whether the call is meant for training or inference.
-
-
- * mask: Boolean input mask. If the layer's call() method takes a
- mask argument, its default value will be set to the mask generated
- for inputs by the previous layer (if input did come from a layer
- that generated a corresponding mask, i.e. if it came from a Keras
- layer with masking support).
-
-
-
-
-* **Return type**
-
- `Tensor`
-
-
-
-* **Returns**
-
- A tensor or list/tuple of tensors.
-
-
-
-### add_ids_to_numpy_arr(orig_arr, ids)
-Deprecated, use add_sample_ids
-
-
-* **Return type**
-
- `ndarray`
-
-
-
-### add_sample_ids(orig_arr, ids)
-Add sample IDs to the training/test data before training begins
-
-This is necessary to call before training a Keras model with the
-Galileo DataQualityCallback
-
-
-* **Return type**
-
- `ndarray`
-
-
-
-* **Parameters**
-
-
- * **orig_arr** (`ndarray`) -- The numpy array to be passed into model.train
-
-
- * **ids** (`Union`[`List`[`int`], `ndarray`]) -- The ids for each sample to append. These are the same IDs that are
-
-
-logged for the input data. They must match 1-1
-
-# dataquality.integrations.experimental.keras
-
-
-### watch(model, layer=None, seed=42)
-Watch a model and log the inputs and outputs of a layer.
-:type model: `Layer`
-:param model: The model to watch
-:type layer: `Optional`[`Any`]
-:param layer: The layer to watch, if None the classifier layer is used
-:type seed: `int`
-:param seed: The seed to use for the model
-
-
-* **Return type**
-
- `None`
-
-
-
-### unwatch(model)
-Unpatches the model. Run after the run is finished
-:type model: `Layer`
-:param model: The model to unpatch
-
-
-* **Return type**
-
- `None`
-
-
-# dataquality.integrations.spacy
-
-
-### watch(nlp)
-Stores the nlp object before calling watch on the ner component within it
-
-We need access to the nlp object so that during training we can capture the
-model's predictions over the raw text by running nlp("user's text") and looking
-at the results
-
-
-* **Parameters**
-
- **nlp** (`Language`) -- The spacy nlp Language component.
-
-
-
-* **Return type**
-
- `None`
-
-
-
-### unwatch(nlp)
-Returns spacy nlp Language component to its original unpatched state.
-
-Unfortunately, spacy does not make this easy, so we replicate spacy's add_pipe
-for logic for using internal spacy methods to add a component object to a specific
-position.
-
-
-* **Return type**
-
- `None`
-
-
-# dataquality.integrations.hf
-
-
-### infer_schema(label_list)
-Infers the schema via the exhaustive list of labels
-
-
-* **Return type**
-
- `TaggingSchema`
-
-
-
-### tokenize_and_log_dataset(dd, tokenizer, label_names=None, meta=None)
-This function tokenizes a huggingface DatasetDict and aligns the labels to BPE
-
-After tokenization, this function will also log the dataset(s) present in the
-DatasetDict
-
-
-* **Parameters**
-
-
- * **dd** (`DatasetDict`) -- DatasetDict from huggingface to log
-
-
- * **tokenizer** (`PreTrainedTokenizerBase`) -- The pretrained tokenizer from huggingface
-
-
- * **label_names** (`Optional`[`List`[`str`]]) -- Optional list of labels for the dataset. These can typically
- be extracted automatically (if the dataset came from hf datasets hub or was
- exported via Galileo dataquality). If they cannot be extracted, an error will
- be raised requesting label names
-
-
- * **meta** (`Optional`[`List`[`str`]]) -- Optional metadata columns to be logged. The columns must be present
- in at least one of the splits of the dataset.
-
-
-
-* **Return type**
-
- `DatasetDict`
-
-
-
-### _class_ TextDataset(hf_dataset)
-Bases: `Dataset`
-
-An abstracted Huggingface Text dataset for users to import and use
-
-Get back a DataLoader via the get_dataloader function
-
-
-### get_dataloader(dataset, \*\*kwargs)
-Create a DataLoader for a particular split given a huggingface Dataset
-
-The DataLoader will be a loader of a TextDataset. The __getitem__ for that dataset
-will return:
-
->
-> * id - the Galileo ID of the sample
-
-
-> * input_ids - the standard huggingface input_ids
-
-
-> * attention_mask - the standard huggingface attention_mask
-
-
-> * labels - output labels adjusted with tokenized NER data
-
-
-* **Parameters**
-
-
- * **dataset** (`Dataset`) -- The huggingface dataset to convert to a DataLoader
-
-
- * **kwargs** (`Any`) -- Any additional keyword arguments to be passed into the DataLoader
- Things like batch_size or shuffle
-
-
-
-* **Return type**
-
- `DataLoader`
-
-
-# dataquality
-
-dataquality
-
-
-### _class_ AggregateFunction(value)
-Bases: `str`, `Enum`
-
-An enumeration.
-
-
-### _class_ Operator(value)
-Bases: `str`, `Enum`
-
-An enumeration.
-
-
-### _class_ Condition(\*\*data)
-Bases: `BaseModel`
-
-Class for building custom conditions for data quality checks
-
-After building a condition, call evaluate to determine the truthiness
-of the condition against a given DataFrame.
-
-With a bit of thought, complex and custom conditions can be built. To gain an
-intuition for what can be accomplished, consider the following examples:
-
-
-1. Is the average confidence less than 0.3?
-
- ```python
- >>> c = Condition(
- ... agg=AggregateFunction.avg,
- ... metric="confidence",
- ... operator=Operator.lt,
- ... threshold=0.3,
- ... )
- >>> c.evaluate(df)
- ```
-
-
-2. Is the max DEP greater or equal to 0.45?
-
- ```python
- >>> c = Condition(
- ... agg=AggregateFunction.max,
- ... metric="data_error_potential",
- ... operator=Operator.gte,
- ... threshold=0.45,
- ... )
- >>> c.evaluate(df)
- ```
-
-By adding filters, you can further narrow down the scope of the condition.
-If the aggregate function is "pct", you don't need to specify a metric,
-
-> as the filters will determine the percentage of data.
-
-For example:
-
-
-1. Alert if over 80% of the dataset has confidence under 0.1
-
- ```python
- >>> c = Condition(
- ... operator=Operator.gt,
- ... threshold=0.8,
- ... agg=AggregateFunction.pct,
- ... filters=[
- ... ConditionFilter(
- ... metric="confidence", operator=Operator.lt, value=0.1
- ... ),
- ... ],
- ... )
- >>> c.evaluate(df)
- ```
-
-
-2. Alert if at least 20% of the dataset has drifted (Inference DataFrames only)
-
- ```python
- >>> c = Condition(
- ... operator=Operator.gte,
- ... threshold=0.2,
- ... agg=AggregateFunction.pct,
- ... filters=[
- ... ConditionFilter(
- ... metric="is_drifted", operator=Operator.eq, value=True
- ... ),
- ... ],
- ... )
- >>> c.evaluate(df)
- ```
-
-
-3. Alert 5% or more of the dataset contains PII
-
- ```python
- >>> c = Condition(
- ... operator=Operator.gte,
- ... threshold=0.05,
- ... agg=AggregateFunction.pct,
- ... filters=[
- ... ConditionFilter(
- ... metric="galileo_pii", operator=Operator.neq, value="None"
- ... ),
- ... ],
- ... )
- >>> c.evaluate(df)
- ```
-
-Complex conditions can be built when the filter has a different metric
-than the metric used in the condition. For example:
-
-
-1. Alert if the min confidence of drifted data is less than 0.15
-
- ```python
- >>> c = Condition(
- ... agg=AggregateFunction.min,
- ... metric="confidence",
- ... operator=Operator.lt,
- ... threshold=0.15,
- ... filters=[
- ... ConditionFilter(
- ... metric="is_drifted", operator=Operator.eq, value=True
- ... )
- ... ],
- ... )
- >>> c.evaluate(df)
- ```
-
-
-2. Alert if over 50% of high DEP (>=0.7) data contains PII
-
- ```python
- >>> c = Condition(
- ... operator=Operator.gt,
- ... threshold=0.5,
- ... agg=AggregateFunction.pct,
- ... filters=[
- ... ConditionFilter(
- ... metric="data_error_potential", operator=Operator.gte, value=0.7
- ... ),
- ... ConditionFilter(
- ... metric="galileo_pii", operator=Operator.neq, value="None"
- ... ),
- ... ],
- ... )
- >>> c.evaluate(df)
- ```
-
-You can also call conditions directly, which will assert its truth against a df
-1. Assert that average confidence less than 0.3
->>> c = Condition(
-... agg=AggregateFunction.avg,
-... metric="confidence",
-... operator=Operator.lt,
-... threshold=0.3,
-... )
->>> c(df) # Will raise an AssertionError if False
-
-
-* **Parameters**
-
-
- * **metric** -- The DF column for evaluating the condition
-
-
- * **agg** -- An aggregate function to apply to the metric
-
-
- * **operator** -- The operator to use for comparing the agg to the threshold
- (e.g. "gt", "lt", "eq", "neq")
-
-
- * **threshold** -- Threshold value for evaluating the condition
-
-
- * **filter** -- Optional filter to apply to the DataFrame before evaluating the
- condition
-
-
-
-### _class_ ConditionFilter(\*\*data)
-Bases: `BaseModel`
-
-Filter a dataframe based on the column value
-
-Note that the column used for filtering is the same as the metric used
-in the condition.
-
-
-* **Parameters**
-
-
- * **operator** -- The operator to use for filtering (e.g. "gt", "lt", "eq", "neq")
- See Operator
-
-
- * **value** -- The value to compare against
diff --git a/docs/autodocs/_build/markdown/index.md b/docs/autodocs/_build/markdown/index.md
deleted file mode 100644
index 8adb680ec..000000000
--- a/docs/autodocs/_build/markdown/index.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Python API
-
-
-* [dataquality](api/dataquality.md)
-
-
- * [`login()`](api/dataquality.md#dataquality.login)
-
-
- * [`init()`](api/dataquality.md#dataquality.init)
-
-
- * [`log_model_outputs()`](api/dataquality.md#dataquality.log_model_outputs)
-
-
- * [`finish()`](api/dataquality.md#dataquality.finish)
-
-
- * [`set_labels_for_run()`](api/dataquality.md#dataquality.set_labels_for_run)
-
-
- * [`set_tasks_for_run()`](api/dataquality.md#dataquality.set_tasks_for_run)
-
-
- * [`set_epoch()`](api/dataquality.md#dataquality.set_epoch)
-
-
- * [`set_split()`](api/dataquality.md#dataquality.set_split)
-
-
- * [`log_data_sample()`](api/dataquality.md#dataquality.log_data_sample)
-
-
- * [`log_dataset()`](api/dataquality.md#dataquality.log_dataset)
-
-
- * [`auto()`](api/dataquality.md#dataquality.auto)
-
-
-* [dataquality.integrations.torch](api/dataquality.md#dataquality-integrations-torch)
-
-
- * [`watch()`](api/dataquality.md#dataquality.integrations.torch.watch)
-
-
- * [`unwatch()`](api/dataquality.md#dataquality.integrations.torch.unwatch)
-
-
-* [dataquality.integrations.transformers_trainer](api/dataquality.md#dataquality-integrations-transformers-trainer)
-
-
- * [`watch()`](api/dataquality.md#dataquality.integrations.transformers_trainer.watch)
-
-
- * [`unwatch()`](api/dataquality.md#dataquality.integrations.transformers_trainer.unwatch)
-
-
-* [dataquality.integrations.keras](api/dataquality.md#dataquality-integrations-keras)
-
-
- * [`DataQualityCallback`](api/dataquality.md#dataquality.integrations.keras.DataQualityCallback)
-
-
- * [`DataQualityLoggingLayer`](api/dataquality.md#dataquality.integrations.keras.DataQualityLoggingLayer)
-
-
- * [`add_ids_to_numpy_arr()`](api/dataquality.md#dataquality.integrations.keras.add_ids_to_numpy_arr)
-
-
- * [`add_sample_ids()`](api/dataquality.md#dataquality.integrations.keras.add_sample_ids)
-
-
-* [dataquality.integrations.experimental.keras](api/dataquality.md#dataquality-integrations-experimental-keras)
-
-
- * [`watch()`](api/dataquality.md#dataquality.integrations.experimental.keras.watch)
-
-
- * [`unwatch()`](api/dataquality.md#dataquality.integrations.experimental.keras.unwatch)
-
-
-* [dataquality.integrations.spacy](api/dataquality.md#dataquality-integrations-spacy)
-
-
- * [`watch()`](api/dataquality.md#dataquality.integrations.spacy.watch)
-
-
- * [`unwatch()`](api/dataquality.md#dataquality.integrations.spacy.unwatch)
-
-
-* [dataquality.integrations.hf](api/dataquality.md#dataquality-integrations-hf)
-
-
- * [`infer_schema()`](api/dataquality.md#dataquality.integrations.hf.infer_schema)
-
-
- * [`tokenize_and_log_dataset()`](api/dataquality.md#dataquality.integrations.hf.tokenize_and_log_dataset)
-
-
- * [`TextDataset`](api/dataquality.md#dataquality.integrations.hf.TextDataset)
-
-
- * [`get_dataloader()`](api/dataquality.md#dataquality.integrations.hf.get_dataloader)
-
-
-* [dataquality](api/dataquality.md#id1)
-
-
- * [`AggregateFunction`](api/dataquality.md#dataquality.AggregateFunction)
-
-
- * [`Operator`](api/dataquality.md#dataquality.Operator)
-
-
- * [`Condition`](api/dataquality.md#dataquality.Condition)
-
-
- * [`ConditionFilter`](api/dataquality.md#dataquality.ConditionFilter)
diff --git a/docs/autodocs/api/dataquality.rst b/docs/autodocs/api/dataquality.rst
deleted file mode 100644
index 20cec5bc3..000000000
--- a/docs/autodocs/api/dataquality.rst
+++ /dev/null
@@ -1,84 +0,0 @@
-dataquality
------------
-.. automodule:: dataquality
- :members: login, init, set_labels_for_run, set_tasks_for_run, log_dataset, set_split, set_epoch, log_model_outputs, metrics, log_data_sample,log_image_dataset, auto, logout, finish
- :show-inheritance:
- :member-order: bysource
-
-dataquality.integrations.torch
------------
-.. automodule:: dataquality.integrations.torch
- :members: watch, unwatch
- :show-inheritance:
- :member-order: bysource
-
-dataquality.integrations.transformers_trainer
------------
-.. automodule:: dataquality.integrations.transformers_trainer
- :members: watch, unwatch
- :show-inheritance:
- :member-order: bysource
-
-dataquality.integrations.lightning
------------
-.. automodule:: dataquality.integrations.lightning
- :members: LightningDQCallback
- :show-inheritance:
- :member-order: bysource
-
-
-dataquality.integrations.legacy.keras
------------
-.. automodule:: dataquality.integrations.legacy.keras
- :members:
- :show-inheritance:
-
-dataquality.integrations.keras
------------
-.. automodule:: dataquality.integrations.keras
- :members: watch, unwatch
- :show-inheritance:
- :member-order: bysource
-
-dataquality.integrations.spacy
------------
-.. automodule:: dataquality.integrations.spacy
- :members: watch, unwatch
- :show-inheritance:
- :member-order: bysource
-
-
-dataquality.integrations.hf
------------
-.. automodule:: dataquality.integrations.hf
- :members:
- :show-inheritance:
- :member-order: bysource
-
-dataquality.integrations.fastai
------------
-.. automodule:: dataquality.integrations.fastai
- :members: FastAiDQCallback
- :show-inheritance:
- :member-order: bysource
-
-dataquality.integrations.setfit
------------
-.. automodule:: dataquality.integrations.setfit
- :members: watch, unwatch, auto
- :show-inheritance:
- :member-order: bysource
-
-dataquality.integrations.jsl
------------
-.. automodule:: dataquality.integrations.jsl
- :members: JSLProject
- :show-inheritance:
- :member-order: bysource
-
-dataquality
------------
-.. automodule:: dataquality
- :members: AggregateFunction, Operator, Condition, ConditionFilter
- :show-inheritance:
- :member-order: bysource
\ No newline at end of file
diff --git a/docs/autodocs/conf.py b/docs/autodocs/conf.py
deleted file mode 100644
index 69c6a0968..000000000
--- a/docs/autodocs/conf.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath(".."))
-
-# -- Project information -----------------------------------------------------
-
-project = "Dataquality"
-
-copyright = "2022 rungalileo.io"
-author = "Galileo Team"
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
- "sphinx.ext.autodoc",
- "sphinx.ext.napoleon",
- "sphinx.ext.autosummary",
- "sphinx_autodoc_typehints",
- "sphinx.ext.autosectionlabel",
- "sphinx.ext.intersphinx",
- "myst_parser",
- "sphinx_markdown_builder",
-]
-
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "build/*", "Thumbs.db", ".DS_Store"]
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-#
-html_theme = "furo"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
-
-html_theme_options = {
- "sidebar_hide_name": True,
- "navigation_with_keys": True,
- "light_logo": "logo-240x80.png",
- "dark_logo": "logo-240x80-white.png",
- # 'github_user': 'dataquality',
- # 'github_repo': 'dataquality',
- # 'fixed_sidebar': True,
-}
-
-html_favicon = "_static/logo.png"
-
-# The master toctree document.
-master_doc = "index"
-
-add_module_names = False
-
-pygments_style = None
-
-numfig = True
-
-smartquotes = False
diff --git a/docs/autodocs/index.rst b/docs/autodocs/index.rst
deleted file mode 100644
index 1fbf992d4..000000000
--- a/docs/autodocs/index.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-==========
-Python API
-==========
-
-.. toctree::
- :maxdepth: 2
-
- api/dataquality.rst
\ No newline at end of file
diff --git a/docs/autodocs/make.bat b/docs/autodocs/make.bat
deleted file mode 100644
index 8084272b4..000000000
--- a/docs/autodocs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
- echo.
- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
- echo.installed, then set the SPHINXBUILD environment variable to point
- echo.to the full path of the 'sphinx-build' executable. Alternatively you
- echo.may add the Sphinx directory to PATH.
- echo.
- echo.If you don't have Sphinx installed, grab it from
- echo.https://www.sphinx-doc.org/
- exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/autodocs/requirements.txt b/docs/autodocs/requirements.txt
deleted file mode 100644
index c14d513c7..000000000
--- a/docs/autodocs/requirements.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# requirements for readthedocs generation
-# TODO COMMON requirements
-sphinx==5.3.0
-sphinx-autobuild
-sphinx-autodoc-typehints
-furo
-myst-parser
-sphinx-markdown-builder
\ No newline at end of file
diff --git a/docs/source/_static/logo.png b/docs/source/_static/logo.png
new file mode 100644
index 000000000..aeabe61e2
Binary files /dev/null and b/docs/source/_static/logo.png differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 000000000..625f7d53a
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,62 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+from datetime import datetime
+
+
+from dataquality import __version__
+
+project = "dataquality"
+copyright = f"{datetime.now().year}, Galileo Technologies Inc."
+
+author = "Galileo Technologies Inc."
+release = __version__
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+ "sphinx.ext.napoleon",
+ "sphinx.ext.autodoc",
+ "myst_parser",
+ "sphinx.ext.autosummary",
+ "sphinx_autodoc_typehints",
+ "sphinx.ext.autosectionlabel",
+ "sphinx.ext.intersphinx",
+ "sphinx_markdown_builder",
+ "sphinxcontrib.autodoc_pydantic",
+]
+
+templates_path = ["_templates"]
+
+add_module_names = False
+autoclass_content = "both"
+autodoc_default_flags = ["show-inheritance", "members", "undoc-members"]
+autodoc_member_order = "bysource"
+
+# autosummary
+autosummary_generate = True
+
+# autosectionlabel
+autosectionlabel_prefix_document = True
+
+# autodoc_pydantic
+autodoc_pydantic_model_show_json = False
+autodoc_pydantic_model_show_config_summary = False
+autodoc_pydantic_model_show_validator_summary = False
+autodoc_pydantic_model_show_validator_members = False
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "furo"
+html_static_path = ["_static"]
+
+# Theme options
+html_logo = "_static/logo.png"
diff --git a/docs/source/dataquality.clients.rst b/docs/source/dataquality.clients.rst
new file mode 100644
index 000000000..ffd5365ab
--- /dev/null
+++ b/docs/source/dataquality.clients.rst
@@ -0,0 +1,29 @@
+dataquality.clients package
+===========================
+
+Submodules
+----------
+
+dataquality.clients.api module
+------------------------------
+
+.. automodule:: dataquality.clients.api
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.clients.objectstore module
+--------------------------------------
+
+.. automodule:: dataquality.clients.objectstore
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.clients
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.core.rst b/docs/source/dataquality.core.rst
new file mode 100644
index 000000000..cb9fd75e0
--- /dev/null
+++ b/docs/source/dataquality.core.rst
@@ -0,0 +1,53 @@
+dataquality.core package
+========================
+
+Submodules
+----------
+
+dataquality.core.auth module
+----------------------------
+
+.. automodule:: dataquality.core.auth
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.core.finish module
+------------------------------
+
+.. automodule:: dataquality.core.finish
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.core.init module
+----------------------------
+
+.. automodule:: dataquality.core.init
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.core.log module
+---------------------------
+
+.. automodule:: dataquality.core.log
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.core.report module
+------------------------------
+
+.. automodule:: dataquality.core.report
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.core
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.dq_auto.rst b/docs/source/dataquality.dq_auto.rst
new file mode 100644
index 000000000..2d8c35119
--- /dev/null
+++ b/docs/source/dataquality.dq_auto.rst
@@ -0,0 +1,77 @@
+dataquality.dq\_auto package
+============================
+
+Submodules
+----------
+
+dataquality.dq\_auto.auto module
+--------------------------------
+
+.. automodule:: dataquality.dq_auto.auto
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.dq\_auto.base\_data\_manager module
+-----------------------------------------------
+
+.. automodule:: dataquality.dq_auto.base_data_manager
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.dq\_auto.ner module
+-------------------------------
+
+.. automodule:: dataquality.dq_auto.ner
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.dq\_auto.ner\_trainer module
+----------------------------------------
+
+.. automodule:: dataquality.dq_auto.ner_trainer
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.dq\_auto.notebook module
+------------------------------------
+
+.. automodule:: dataquality.dq_auto.notebook
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.dq\_auto.schema module
+----------------------------------
+
+.. automodule:: dataquality.dq_auto.schema
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.dq\_auto.tc\_trainer module
+---------------------------------------
+
+.. automodule:: dataquality.dq_auto.tc_trainer
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.dq\_auto.text\_classification module
+------------------------------------------------
+
+.. automodule:: dataquality.dq_auto.text_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.dq_auto
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.dq_start.rst b/docs/source/dataquality.dq_start.rst
new file mode 100644
index 000000000..3170eb0f9
--- /dev/null
+++ b/docs/source/dataquality.dq_start.rst
@@ -0,0 +1,10 @@
+dataquality.dq\_start package
+=============================
+
+Module contents
+---------------
+
+.. automodule:: dataquality.dq_start
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.integrations.rst b/docs/source/dataquality.integrations.rst
new file mode 100644
index 000000000..8b6860045
--- /dev/null
+++ b/docs/source/dataquality.integrations.rst
@@ -0,0 +1,101 @@
+dataquality.integrations package
+================================
+
+Subpackages
+-----------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality.integrations.seq2seq
+
+Submodules
+----------
+
+dataquality.integrations.fastai module
+--------------------------------------
+
+.. automodule:: dataquality.integrations.fastai
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.hf module
+----------------------------------
+
+.. automodule:: dataquality.integrations.hf
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.jsl module
+-----------------------------------
+
+.. automodule:: dataquality.integrations.jsl
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.keras module
+-------------------------------------
+
+.. automodule:: dataquality.integrations.keras
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.lightning module
+-----------------------------------------
+
+.. automodule:: dataquality.integrations.lightning
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.setfit module
+--------------------------------------
+
+.. automodule:: dataquality.integrations.setfit
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.torch module
+-------------------------------------
+
+.. automodule:: dataquality.integrations.torch
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.torch\_semantic\_segmentation module
+-------------------------------------------------------------
+
+.. automodule:: dataquality.integrations.torch_semantic_segmentation
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.transformers\_trainer module
+-----------------------------------------------------
+
+.. automodule:: dataquality.integrations.transformers_trainer
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.ultralytics module
+-------------------------------------------
+
+.. automodule:: dataquality.integrations.ultralytics
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.integrations
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.integrations.seq2seq.formatters.rst b/docs/source/dataquality.integrations.seq2seq.formatters.rst
new file mode 100644
index 000000000..e3d369bb9
--- /dev/null
+++ b/docs/source/dataquality.integrations.seq2seq.formatters.rst
@@ -0,0 +1,37 @@
+dataquality.integrations.seq2seq.formatters package
+===================================================
+
+Submodules
+----------
+
+dataquality.integrations.seq2seq.formatters.alpaca module
+---------------------------------------------------------
+
+.. automodule:: dataquality.integrations.seq2seq.formatters.alpaca
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.seq2seq.formatters.base module
+-------------------------------------------------------
+
+.. automodule:: dataquality.integrations.seq2seq.formatters.base
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.seq2seq.formatters.chat module
+-------------------------------------------------------
+
+.. automodule:: dataquality.integrations.seq2seq.formatters.chat
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.integrations.seq2seq.formatters
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.integrations.seq2seq.rst b/docs/source/dataquality.integrations.seq2seq.rst
new file mode 100644
index 000000000..e745bc96f
--- /dev/null
+++ b/docs/source/dataquality.integrations.seq2seq.rst
@@ -0,0 +1,53 @@
+dataquality.integrations.seq2seq package
+========================================
+
+Subpackages
+-----------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality.integrations.seq2seq.formatters
+
+Submodules
+----------
+
+dataquality.integrations.seq2seq.auto module
+--------------------------------------------
+
+.. automodule:: dataquality.integrations.seq2seq.auto
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.seq2seq.core module
+--------------------------------------------
+
+.. automodule:: dataquality.integrations.seq2seq.core
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.seq2seq.s2s\_trainer module
+----------------------------------------------------
+
+.. automodule:: dataquality.integrations.seq2seq.s2s_trainer
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.integrations.seq2seq.schema module
+----------------------------------------------
+
+.. automodule:: dataquality.integrations.seq2seq.schema
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.integrations.seq2seq
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.loggers.data_logger.rst b/docs/source/dataquality.loggers.data_logger.rst
new file mode 100644
index 000000000..e4c3bd092
--- /dev/null
+++ b/docs/source/dataquality.loggers.data_logger.rst
@@ -0,0 +1,85 @@
+dataquality.loggers.data\_logger package
+========================================
+
+Subpackages
+-----------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality.loggers.data_logger.seq2seq
+
+Submodules
+----------
+
+dataquality.loggers.data\_logger.base\_data\_logger module
+----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.base_data_logger
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.image\_classification module
+-------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.image_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.object\_detection module
+---------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.object_detection
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.semantic\_segmentation module
+--------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.semantic_segmentation
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.tabular\_classification module
+---------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.tabular_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.text\_classification module
+------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.text_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.text\_multi\_label module
+----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.text_multi_label
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.text\_ner module
+-------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.text_ner
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.loggers.data_logger
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.loggers.data_logger.seq2seq.rst b/docs/source/dataquality.loggers.data_logger.seq2seq.rst
new file mode 100644
index 000000000..b908b65f6
--- /dev/null
+++ b/docs/source/dataquality.loggers.data_logger.seq2seq.rst
@@ -0,0 +1,45 @@
+dataquality.loggers.data\_logger.seq2seq package
+================================================
+
+Submodules
+----------
+
+dataquality.loggers.data\_logger.seq2seq.chat module
+----------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.seq2seq.chat
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.seq2seq.completion module
+----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.seq2seq.completion
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.seq2seq.formatters module
+----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.seq2seq.formatters
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.data\_logger.seq2seq.seq2seq\_base module
+-------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.data_logger.seq2seq.seq2seq_base
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.loggers.data_logger.seq2seq
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.loggers.logger_config.rst b/docs/source/dataquality.loggers.logger_config.rst
new file mode 100644
index 000000000..ee2c2b463
--- /dev/null
+++ b/docs/source/dataquality.loggers.logger_config.rst
@@ -0,0 +1,85 @@
+dataquality.loggers.logger\_config package
+==========================================
+
+Subpackages
+-----------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality.loggers.logger_config.seq2seq
+
+Submodules
+----------
+
+dataquality.loggers.logger\_config.base\_logger\_config module
+--------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.base_logger_config
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.image\_classification module
+---------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.image_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.object\_detection module
+-----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.object_detection
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.semantic\_segmentation module
+----------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.semantic_segmentation
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.tabular\_classification module
+-----------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.tabular_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.text\_classification module
+--------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.text_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.text\_multi\_label module
+------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.text_multi_label
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.text\_ner module
+---------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.text_ner
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.loggers.logger_config
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.loggers.logger_config.seq2seq.rst b/docs/source/dataquality.loggers.logger_config.seq2seq.rst
new file mode 100644
index 000000000..d6f4b785e
--- /dev/null
+++ b/docs/source/dataquality.loggers.logger_config.seq2seq.rst
@@ -0,0 +1,37 @@
+dataquality.loggers.logger\_config.seq2seq package
+==================================================
+
+Submodules
+----------
+
+dataquality.loggers.logger\_config.seq2seq.chat module
+------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.seq2seq.chat
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.seq2seq.completion module
+------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.seq2seq.completion
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.logger\_config.seq2seq.seq2seq\_base module
+---------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.logger_config.seq2seq.seq2seq_base
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.loggers.logger_config.seq2seq
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.loggers.model_logger.rst b/docs/source/dataquality.loggers.model_logger.rst
new file mode 100644
index 000000000..133909cf8
--- /dev/null
+++ b/docs/source/dataquality.loggers.model_logger.rst
@@ -0,0 +1,85 @@
+dataquality.loggers.model\_logger package
+=========================================
+
+Subpackages
+-----------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality.loggers.model_logger.seq2seq
+
+Submodules
+----------
+
+dataquality.loggers.model\_logger.base\_model\_logger module
+------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.base_model_logger
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.image\_classification module
+--------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.image_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.object\_detection module
+----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.object_detection
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.semantic\_segmentation module
+---------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.semantic_segmentation
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.tabular\_classification module
+----------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.tabular_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.text\_classification module
+-------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.text_classification
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.text\_multi\_label module
+-----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.text_multi_label
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.text\_ner module
+--------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.text_ner
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.loggers.model_logger
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.loggers.model_logger.seq2seq.rst b/docs/source/dataquality.loggers.model_logger.seq2seq.rst
new file mode 100644
index 000000000..ee691f2f0
--- /dev/null
+++ b/docs/source/dataquality.loggers.model_logger.seq2seq.rst
@@ -0,0 +1,45 @@
+dataquality.loggers.model\_logger.seq2seq package
+=================================================
+
+Submodules
+----------
+
+dataquality.loggers.model\_logger.seq2seq.chat module
+-----------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.seq2seq.chat
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.seq2seq.completion module
+-----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.seq2seq.completion
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.seq2seq.formatters module
+-----------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.seq2seq.formatters
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.loggers.model\_logger.seq2seq.seq2seq\_base module
+--------------------------------------------------------------
+
+.. automodule:: dataquality.loggers.model_logger.seq2seq.seq2seq_base
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.loggers.model_logger.seq2seq
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.loggers.rst b/docs/source/dataquality.loggers.rst
new file mode 100644
index 000000000..518d3d9e0
--- /dev/null
+++ b/docs/source/dataquality.loggers.rst
@@ -0,0 +1,31 @@
+dataquality.loggers package
+===========================
+
+Subpackages
+-----------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality.loggers.data_logger
+ dataquality.loggers.logger_config
+ dataquality.loggers.model_logger
+
+Submodules
+----------
+
+dataquality.loggers.base\_logger module
+---------------------------------------
+
+.. automodule:: dataquality.loggers.base_logger
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.loggers
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.rst b/docs/source/dataquality.rst
new file mode 100644
index 000000000..67cc426ba
--- /dev/null
+++ b/docs/source/dataquality.rst
@@ -0,0 +1,68 @@
+dataquality package
+===================
+
+Subpackages
+-----------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality.clients
+ dataquality.core
+ dataquality.dq_auto
+ dataquality.dq_start
+ dataquality.integrations
+ dataquality.loggers
+ dataquality.schemas
+ dataquality.utils
+
+Submodules
+----------
+
+dataquality.analytics module
+----------------------------
+
+.. automodule:: dataquality.analytics
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.dqyolo module
+-------------------------
+
+.. automodule:: dataquality.dqyolo
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.exceptions module
+-----------------------------
+
+.. automodule:: dataquality.exceptions
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.internal module
+---------------------------
+
+.. automodule:: dataquality.internal
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.metrics module
+--------------------------
+
+.. automodule:: dataquality.metrics
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.schemas.rst b/docs/source/dataquality.schemas.rst
new file mode 100644
index 000000000..cbcd974a1
--- /dev/null
+++ b/docs/source/dataquality.schemas.rst
@@ -0,0 +1,149 @@
+dataquality.schemas package
+===========================
+
+Submodules
+----------
+
+dataquality.schemas.condition module
+------------------------------------
+
+.. automodule:: dataquality.schemas.condition
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.cv module
+-----------------------------
+
+.. automodule:: dataquality.schemas.cv
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.dataframe module
+------------------------------------
+
+.. automodule:: dataquality.schemas.dataframe
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.edit module
+-------------------------------
+
+.. automodule:: dataquality.schemas.edit
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.hf module
+-----------------------------
+
+.. automodule:: dataquality.schemas.hf
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.job module
+------------------------------
+
+.. automodule:: dataquality.schemas.job
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.metrics module
+----------------------------------
+
+.. automodule:: dataquality.schemas.metrics
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.model module
+--------------------------------
+
+.. automodule:: dataquality.schemas.model
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.ner module
+------------------------------
+
+.. automodule:: dataquality.schemas.ner
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.report module
+---------------------------------
+
+.. automodule:: dataquality.schemas.report
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.request\_type module
+----------------------------------------
+
+.. automodule:: dataquality.schemas.request_type
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.route module
+--------------------------------
+
+.. automodule:: dataquality.schemas.route
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.semantic\_segmentation module
+-------------------------------------------------
+
+.. automodule:: dataquality.schemas.semantic_segmentation
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.seq2seq module
+----------------------------------
+
+.. automodule:: dataquality.schemas.seq2seq
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.split module
+--------------------------------
+
+.. automodule:: dataquality.schemas.split
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.task\_type module
+-------------------------------------
+
+.. automodule:: dataquality.schemas.task_type
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.schemas.torch module
+--------------------------------
+
+.. automodule:: dataquality.schemas.torch
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.schemas
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.utils.rst b/docs/source/dataquality.utils.rst
new file mode 100644
index 000000000..12aaf1d3b
--- /dev/null
+++ b/docs/source/dataquality.utils.rst
@@ -0,0 +1,294 @@
+dataquality.utils package
+=========================
+
+Subpackages
+-----------
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality.utils.semantic_segmentation
+ dataquality.utils.seq2seq
+
+Submodules
+----------
+
+dataquality.utils.arrow module
+------------------------------
+
+.. automodule:: dataquality.utils.arrow
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.auth module
+-----------------------------
+
+.. automodule:: dataquality.utils.auth
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.auto module
+-----------------------------
+
+.. automodule:: dataquality.utils.auto
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.auto\_trainer module
+--------------------------------------
+
+.. automodule:: dataquality.utils.auto_trainer
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.cuda module
+-----------------------------
+
+.. automodule:: dataquality.utils.cuda
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.cv module
+---------------------------
+
+.. automodule:: dataquality.utils.cv
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.cv\_smart\_features module
+--------------------------------------------
+
+.. automodule:: dataquality.utils.cv_smart_features
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.dq\_logger module
+-----------------------------------
+
+.. automodule:: dataquality.utils.dq_logger
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.dqyolo module
+-------------------------------
+
+.. automodule:: dataquality.utils.dqyolo
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.emb module
+----------------------------
+
+.. automodule:: dataquality.utils.emb
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.file module
+-----------------------------
+
+.. automodule:: dataquality.utils.file
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.hdf5\_store module
+------------------------------------
+
+.. automodule:: dataquality.utils.hdf5_store
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.helpers module
+--------------------------------
+
+.. automodule:: dataquality.utils.helpers
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.hf\_images module
+-----------------------------------
+
+.. automodule:: dataquality.utils.hf_images
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.hf\_tokenizer module
+--------------------------------------
+
+.. automodule:: dataquality.utils.hf_tokenizer
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.imports module
+--------------------------------
+
+.. automodule:: dataquality.utils.imports
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.jsl module
+----------------------------
+
+.. automodule:: dataquality.utils.jsl
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.keras module
+------------------------------
+
+.. automodule:: dataquality.utils.keras
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.ml module
+---------------------------
+
+.. automodule:: dataquality.utils.ml
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.name module
+-----------------------------
+
+.. automodule:: dataquality.utils.name
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.od module
+---------------------------
+
+.. automodule:: dataquality.utils.od
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.patcher module
+--------------------------------
+
+.. automodule:: dataquality.utils.patcher
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.profiler module
+---------------------------------
+
+.. automodule:: dataquality.utils.profiler
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.setfit module
+-------------------------------
+
+.. automodule:: dataquality.utils.setfit
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.task\_helpers module
+--------------------------------------
+
+.. automodule:: dataquality.utils.task_helpers
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.tf module
+---------------------------
+
+.. automodule:: dataquality.utils.tf
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.thread\_pool module
+-------------------------------------
+
+.. automodule:: dataquality.utils.thread_pool
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.torch module
+------------------------------
+
+.. automodule:: dataquality.utils.torch
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.transformers module
+-------------------------------------
+
+.. automodule:: dataquality.utils.transformers
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.ultralytics module
+------------------------------------
+
+.. automodule:: dataquality.utils.ultralytics
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.upload module
+-------------------------------
+
+.. automodule:: dataquality.utils.upload
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.upload\_model module
+--------------------------------------
+
+.. automodule:: dataquality.utils.upload_model
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.vaex module
+-----------------------------
+
+.. automodule:: dataquality.utils.vaex
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.version module
+--------------------------------
+
+.. automodule:: dataquality.utils.version
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.utils.semantic_segmentation.rst b/docs/source/dataquality.utils.semantic_segmentation.rst
new file mode 100644
index 000000000..c99c5a0a2
--- /dev/null
+++ b/docs/source/dataquality.utils.semantic_segmentation.rst
@@ -0,0 +1,61 @@
+dataquality.utils.semantic\_segmentation package
+================================================
+
+Submodules
+----------
+
+dataquality.utils.semantic\_segmentation.constants module
+---------------------------------------------------------
+
+.. automodule:: dataquality.utils.semantic_segmentation.constants
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.semantic\_segmentation.errors module
+------------------------------------------------------
+
+.. automodule:: dataquality.utils.semantic_segmentation.errors
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.semantic\_segmentation.lm module
+--------------------------------------------------
+
+.. automodule:: dataquality.utils.semantic_segmentation.lm
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.semantic\_segmentation.metrics module
+-------------------------------------------------------
+
+.. automodule:: dataquality.utils.semantic_segmentation.metrics
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.semantic\_segmentation.polygons module
+--------------------------------------------------------
+
+.. automodule:: dataquality.utils.semantic_segmentation.polygons
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.semantic\_segmentation.utils module
+-----------------------------------------------------
+
+.. automodule:: dataquality.utils.semantic_segmentation.utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.utils.semantic_segmentation
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/dataquality.utils.seq2seq.rst b/docs/source/dataquality.utils.seq2seq.rst
new file mode 100644
index 000000000..f5cdc1ddb
--- /dev/null
+++ b/docs/source/dataquality.utils.seq2seq.rst
@@ -0,0 +1,53 @@
+dataquality.utils.seq2seq package
+=================================
+
+Submodules
+----------
+
+dataquality.utils.seq2seq.data\_error\_potential module
+-------------------------------------------------------
+
+.. automodule:: dataquality.utils.seq2seq.data_error_potential
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.seq2seq.decoder\_only module
+----------------------------------------------
+
+.. automodule:: dataquality.utils.seq2seq.decoder_only
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.seq2seq.generation module
+-------------------------------------------
+
+.. automodule:: dataquality.utils.seq2seq.generation
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.seq2seq.logprobs module
+-----------------------------------------
+
+.. automodule:: dataquality.utils.seq2seq.logprobs
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+dataquality.utils.seq2seq.offsets module
+----------------------------------------
+
+.. automodule:: dataquality.utils.seq2seq.offsets
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Module contents
+---------------
+
+.. automodule:: dataquality.utils.seq2seq
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 000000000..c8eabe337
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,20 @@
+################
+📊 DataQuality
+################
+
+`dataquality` is the Python library to interact with `Galileo's NLP and CV Studios `_.
+
+Here you'll find an overview and API documentation. The package is available to download on `PyPI `_.
+
+Table of Contents
+-----------------
+
+.. toctree::
+ :maxdepth: 2
+
+ dataquality
+ dataquality.integrations
+ Galileo Docs
+
+- :ref:`genindex`
+- :ref:`search`
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
new file mode 100644
index 000000000..96a5d78c8
--- /dev/null
+++ b/docs/source/modules.rst
@@ -0,0 +1,7 @@
+dataquality
+===========
+
+.. toctree::
+ :maxdepth: 4
+
+ dataquality
diff --git a/pyproject.toml b/pyproject.toml
index 045c0211c..633731ac0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,7 +65,8 @@ doc = [
"myst-parser",
"sphinx-markdown-builder",
"sphinx-autobuild",
- "sphinx-markdown-builder"
+ "sphinx-markdown-builder",
+ "autodoc-pydantic"
]
test = [
"ultralytics>=8.0.209",
diff --git a/tasks.py b/tasks.py
index 8a3d6968b..b895f4dd1 100644
--- a/tasks.py
+++ b/tasks.py
@@ -146,12 +146,9 @@ def docs_build(ctx: Context) -> None:
Build the docs.
"""
- with ctx.cd("docs/autodocs"):
- ctx.run(
- "make markdown",
- pty=True,
- echo=True,
- )
+ ctx.run("sphinx-apidoc -f -o docs/source/ dataquality/", pty=True, echo=True)
+ ctx.run("sphinx-build -M markdown docs/source docs/build/md", echo=True)
+ ctx.run("sphinx-build -b html docs/source/ docs/build/html", echo=True)
@unique