diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml deleted file mode 100644 index 241085f59..000000000 --- a/.github/workflows/docs.yaml +++ /dev/null @@ -1,45 +0,0 @@ -name: docs - -on: - push: - branches: - - main - -jobs: - test: - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: [3.8] - fail-fast: false - - steps: - - uses: actions/checkout@v3 - - - name: set up python - uses: actions/setup-python@v4 - with: - cache: "pip" - cache-dependency-path: "pyproject.toml" - python-version: ${{ matrix.python-version }} - - - name: install invoke - run: pip install invoke - - - name: install dependencies - run: inv install - - - name: docs - run: inv docs-build - - - name: Pushes to another repository - uses: cpina/github-action-push-to-another-repository@main - env: - API_TOKEN_GITHUB: ${{ secrets.GH_TOKEN }} - with: - source-directory: "docs/autodocs/_build/markdown" - target-directory: "api/python-sdk/autodocs" - destination-github-username: "rungalileo" - destination-repository-name: "docs" - user-email: team@rungalileo.io diff --git a/.github/workflows/publish-docs.yaml b/.github/workflows/publish-docs.yaml new file mode 100644 index 000000000..f70a63004 --- /dev/null +++ b/.github/workflows/publish-docs.yaml @@ -0,0 +1,48 @@ +name: Publish Docs + +on: + workflow_dispatch: + workflow_run: + workflows: ["publish"] + types: + - completed + +jobs: + publish-docs: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + cache: "pip" + cache-dependency-path: "pyproject.toml" + + - name: Install Dependencies + # Install all dependencies so that the docs can use the function and type signatures. + run: | + pipx install invoke + invoke install + + - name: Build Docs + run: invoke docs-build + + - name: Add GitHub Pages Config + run: | + touch docs/build/html/.nojekyll + echo "dataquality.docs.rungalileo.io" > docs/build/html/CNAME + + - name: Pushes to another repository + uses: cpina/github-action-push-to-another-repository@main + env: + SSH_DEPLOY_KEY: ${{ secrets.SSH_DATAQUALITY_DOCS_DEPLOY_KEY }} + with: + source-directory: "docs/build/html" + target-directory: "docs/" + destination-github-username: "rungalileo" + destination-repository-name: "dataquality-docs" + user-name: galileo-automation + user-email: ci@rungalileo.io diff --git a/.gitignore b/.gitignore index cc6940c6a..9701632bc 100644 --- a/.gitignore +++ b/.gitignore @@ -76,8 +76,8 @@ instance/ .scrapy # Sphinx documentation -docs/_build/html -docs/_build/doctrees +docs/build/html +docs/build/md # PyBuilder .pybuilder/ @@ -188,4 +188,4 @@ large_run.ipynb *.cache .python-version -local_notebooks \ No newline at end of file +local_notebooks diff --git a/docs/autodocs/Makefile b/docs/autodocs/Makefile deleted file mode 100644 index d4bb2cbb9..000000000 --- a/docs/autodocs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/autodocs/Readme.md b/docs/autodocs/Readme.md deleted file mode 100644 index 21fd4a9f1..000000000 --- a/docs/autodocs/Readme.md +++ /dev/null @@ -1,3 +0,0 @@ -# Sphinx automatic docs - -Modify the `docs/autodocs/api/dataquality.rst` file to add a new documentation to the reference \ No newline at end of file diff --git a/docs/autodocs/_build/doctrees/api/dataquality.doctree b/docs/autodocs/_build/doctrees/api/dataquality.doctree deleted file mode 100644 index 840f80eb2..000000000 Binary files a/docs/autodocs/_build/doctrees/api/dataquality.doctree and /dev/null differ diff --git a/docs/autodocs/_build/doctrees/environment.pickle b/docs/autodocs/_build/doctrees/environment.pickle deleted file mode 100644 index d10869d9a..000000000 Binary files a/docs/autodocs/_build/doctrees/environment.pickle and /dev/null differ diff --git a/docs/autodocs/_build/doctrees/index.doctree b/docs/autodocs/_build/doctrees/index.doctree deleted file mode 100644 index aa40e0578..000000000 Binary files a/docs/autodocs/_build/doctrees/index.doctree and /dev/null differ diff --git a/docs/autodocs/_build/markdown/api/dataquality.md b/docs/autodocs/_build/markdown/api/dataquality.md deleted file mode 100644 index d5ba6a9f5..000000000 --- a/docs/autodocs/_build/markdown/api/dataquality.md +++ /dev/null @@ -1,1274 +0,0 @@ -# dataquality - -dataquality - - -### login() -Log into your Galileo environment. - -The function will prompt your for an Authorization Token (api key) that you can -access from the console. - -To skip the prompt for automated workflows, you can set GALILEO_USERNAME -(your email) and GALILEO_PASSWORD if you signed up with an email and password - - -* **Return type** - - `None` - - - -### init(task_type, project_name=None, run_name=None, is_public=True, overwrite_local=True) -Start a run - -Initialize a new run and new project, initialize a new run in an existing project, -or reinitialize an existing run in an existing project. - -Before creating the project, check: -- The user is valid, login if not -- The DQ client version is compatible with API version - -Optionally provide project and run names to create a new project/run or restart -existing ones. - - -* **Return type** - - `None` - - - -* **Parameters** - - **task_type** (`str`) -- The task type for modeling. This must be one of the valid - - -dataquality.schemas.task_type.TaskType options -:type project_name: `Optional`[`str`] -:param project_name: The project name. If not passed in, a random one will be -generated. If provided, and the project does not exist, it will be created. If it -does exist, it will be set. -:type run_name: `Optional`[`str`] -:param run_name: The run name. If not passed in, a random one will be -generated. If provided, and the project does not exist, it will be created. If it -does exist, it will be set. -:type is_public: `bool` -:param is_public: Boolean value that sets the project's visibility. Default True. -:type overwrite_local: `bool` -:param overwrite_local: If True, the current project/run log directory will be -cleared during this function. If logging over many sessions with checkpoints, you -may want to set this to False. Default True - - -### log_model_outputs(\*, embs, ids, split=None, epoch=None, logits=None, probs=None, inference_name=None, exclude_embs=False) -Logs model outputs for model during training/test/validation. - - -* **Parameters** - - - * **embs** (`Union`[`List`, `ndarray`, `None`]) -- The embeddings per output sample - - - * **ids** (`Union`[`List`, `ndarray`]) -- The ids for each sample. Must match input ids of logged samples - - - * **split** (`Optional`[`Split`]) -- The current split. Must be set either here or via dq.set_split - - - * **epoch** (`Optional`[`int`]) -- The current epoch. Must be set either here or via dq.set_epoch - - - * **logits** (`Union`[`List`, `ndarray`, `None`]) -- The logits for each sample - - - * **probs** (`Union`[`List`, `ndarray`, `None`]) -- Deprecated, use logits. If passed in, a softmax will NOT be applied - - - * **inference_name** (`Optional`[`str`]) -- Inference name indicator for this inference split. - If logging for an inference split, this is required. - - - * **exclude_embs** (`bool`) -- Optional flag to exclude embeddings from logging. If True and - embs is set to None, this will generate random embs for each sample. - - - -* **Return type** - - `None` - - -The expected argument shapes come from the task_type being used -See dq.docs() for more task specific details on parameter shape - - -### finish(last_epoch=None, wait=True, create_data_embs=False) -Finishes the current run and invokes a job - - -* **Parameters** - - - * **last_epoch** (`Optional`[`int`]) -- If set, only epochs up to this value will be uploaded/processed - This is inclusive, so setting last_epoch to 5 would upload epochs 0,1,2,3,4,5 - - - * **wait** (`bool`) -- If true, after uploading the data, this will wait for the - run to be processed by the Galileo server. If false, you can manually wait - for the run by calling dq.wait_for_run() Default True - - - * **create_data_embs** (`bool`) -- If True, an off-the-shelf transformer will run on the raw - text input to generate data-level embeddings. These will be available in the - data view tab of the Galileo console. You can also access these embeddings - via dq.metrics.get_data_embeddings() - - - -* **Return type** - - `str` - - - -### set_labels_for_run(labels) -Creates the mapping of the labels for the model to their respective indexes. -:rtype: `None` - - -* **Parameters** - - **labels** (`Union`[`List`[`List`[`str`]], `List`[`str`]]) -- An ordered list of labels (ie ['dog','cat','fish'] - - -If this is a multi-label type, then labels are a list of lists where each inner -list indicates the label for the given task - -This order MUST match the order of probabilities that the model outputs. - -In the multi-label case, the outer order (order of the tasks) must match the -task-order of the task-probabilities logged as well. - - -### set_tasks_for_run(tasks, binary=True) -Sets the task names for the run (multi-label case only). - -This order MUST match the order of the labels list provided in log_input_data -and the order of the probability vectors provided in log_model_outputs. - -This also must match the order of the labels logged in set_labels_for_run (meaning -that the first list of labels must be the labels of the first task passed in here) - - -* **Return type** - - `None` - - - -* **Parameters** - - - * **tasks** (`List`[`str`]) -- The list of tasks for your run - - - * **binary** (`bool`) -- Whether this is a binary multi label run. If true, tasks will also - - -be set as your labels, and you should NOT call dq.set_labels_for_run it will be -handled for you. Default True - - -### set_epoch(epoch) -Set the current epoch. - -When set, logging model outputs will use this if not logged explicitly - - -* **Return type** - - `None` - - - -### set_split(split, inference_name=None) -Set the current split. - -When set, logging data inputs/model outputs will use this if not logged explicitly -When setting split to inference, inference_name must be included - - -* **Return type** - - `None` - - - -### log_data_sample(\*, text, id, \*\*kwargs) -Log a single input example to disk - -Fields are expected singular elements. Field names are in the singular of -log_input_samples (texts -> text) -The expected arguments come from the task_type being used: See dq.docs() for details - - -* **Parameters** - - - * **text** (`str`) -- List[str] the input samples to your model - - - * **id** (`int`) -- List[int | str] the ids per sample - - - * **split** -- Optional[str] the split for this data. Can also be set via - dq.set_split - - - * **kwargs** (`Any`) -- See dq.docs() for details on other task specific parameters - - - -* **Return type** - - `None` - - - -### log_dataset(dataset, \*, batch_size=100000, text='text', id='id', split=None, meta=None, \*\*kwargs) -Log an iterable or other dataset to disk. Useful for logging memory mapped files - -Dataset provided must be an iterable that can be traversed row by row, and for each -row, the fields can be indexed into either via string keys or int indexes. Pandas -and Vaex dataframes are also allowed, as well as HuggingFace Datasets - -valid examples: - - d = [ - - {"my_text": "sample1", "my_labels": "A", "my_id": 1, "sample_quality": 5.3}, - {"my_text": "sample2", "my_labels": "A", "my_id": 2, "sample_quality": 9.1}, - {"my_text": "sample3", "my_labels": "B", "my_id": 3, "sample_quality": 2.7}, - - ] - dq.log_dataset( - - > d, text="my_text", id="my_id", label="my_labels", meta=["sample_quality"] - - ) - - Logging a pandas dataframe, df: - - text label id sample_quality - - 0 sample1 A 1 5.3 - 1 sample2 A 2 9.1 - 2 sample3 B 3 2.7 - # We don't need to set text id or label because it matches the default - dq.log_dataset(d, meta=["sample_quality"]) - - Logging and iterable of tuples: - d = [ - - > ("sample1", "A", "ID1"), - > ("sample2", "A", "ID2"), - > ("sample3", "B", "ID3"), - - ] - dq.log_dataset(d, text=0, id=2, label=1) - -Invalid example: - - d = { - - "my_text": ["sample1", "sample2", "sample3"], - "my_labels": ["A", "A", "B"], - "my_id": [1, 2, 3], - "sample_quality": [5.3, 9.1, 2.7] - - } - -In the invalid case, use dq.log_data_samples: - - meta = {"sample_quality": d["sample_quality"]} - dq.log_data_samples( - - > texts=d["my_text"], labels=d["my_labels"], ids=d["my_ids"], meta=meta - - ) - -Keyword arguments are specific to the task type. See dq.docs() for details - - -* **Parameters** - - - * **dataset** (`TypeVar`(`DataSet`, bound= `Union`[`Iterable`, `DataFrame`, `DataFrame`])) -- The iterable or dataframe to log - - - * **text** (`Union`[`str`, `int`]) -- str | int The column, key, or int index for text data. Default "text" - - - * **id** (`Union`[`str`, `int`]) -- str | int The column, key, or int index for id data. Default "id" - - - * **split** (`Optional`[`Split`]) -- Optional[str] the split for this data. Can also be set via - dq.set_split - - - * **meta** (`Optional`[`List`[`Union`[`str`, `int`]]]) -- List[str | int] Additional keys/columns to your input data to be - logged as metadata. Consider a pandas dataframe, this would be the list of - - - * **kwargs** (`Any`) -- See help(dq.get_data_logger().log_dataset) for more details here - - - -* **Batch_size** - - The number of data samples to log at a time. Useful when logging a - memory mapped dataset. A larger batch_size will result in faster logging at the - expense of more memory usage. Default 100,000 - - - -* **Return type** - - `None` - columns corresponding to each metadata field to log - - -or dq.docs() for more general task details - - -### auto(hf_data=None, hf_inference_names=None, train_data=None, val_data=None, test_data=None, inference_data=None, max_padding_length=200, hf_model='distilbert-base-uncased', labels=None, project_name=None, run_name=None, wait=True, create_data_embs=False) -Automatically gets insights on a text classification or NER dataset - -Given either a pandas dataframe, file_path, or huggingface dataset path, this -function will load the data, train a huggingface transformer model, and -provide Galileo insights via a link to the Galileo Console - -One of hf_data, train_data should be provided. If neither of those are, a -demo dataset will be loaded by Galileo for training. - - -* **Parameters** - - - * **hf_data** (`Union`[`DatasetDict`, `str`, `None`]) -- Union[DatasetDict, str] Use this param if you have huggingface - data in the hub or in memory. Otherwise see train_data, val_data, - and test_data. If provided, train_data, val_data, and test_data are ignored. - - - * **hf_inference_names** (`Optional`[`List`[`str`]]) -- Use this param alongside hf_data if you have splits - you'd like to consider as inference. A list of key names in hf_data - to be run as inference runs after training. Any keys set must exist in hf_data - - - * **train_data** (`Union`[`DataFrame`, `Dataset`, `str`, `None`]) -- Optional training data to use. Can be one of - \* Pandas dataframe - \* Huggingface dataset - \* Path to a local file - \* Huggingface dataset hub path - - - * **val_data** (`Union`[`DataFrame`, `Dataset`, `str`, `None`]) -- Optional validation data to use. The validation data is what is - used for the evaluation dataset in huggingface, and what is used for early - stopping. If not provided, but test_data is, that will be used as the evaluation - set. If neither val nor test are available, the train data will be randomly - split 80/20 for use as evaluation data. - Can be one of - \* Pandas dataframe - \* Huggingface dataset - \* Path to a local file - \* Huggingface dataset hub path - - - * **test_data** (`Union`[`DataFrame`, `Dataset`, `str`, `None`]) -- Optional test data to use. The test data, if provided with val, - will be used after training is complete, as the held-out set. If no validation - data is provided, this will instead be used as the evaluation set. - Can be one of - \* Pandas dataframe - \* Huggingface dataset - \* Path to a local file - \* Huggingface dataset hub path - - - * **inference_data** (`Optional`[`Dict`[`str`, `Union`[`DataFrame`, `Dataset`, `str`]]]) -- User this param to include inference data alongside the - train_data param. If you are passing data via the hf_data parameter, you - should use the hf_inference_names param. Optional inference datasets to run - with after training completes. The structure is a dictionary with the - key being the inference name and the value one of - \* Pandas dataframe - \* Huggingface dataset - \* Path to a local file - \* Huggingface dataset hub path - - - * **max_padding_length** (`int`) -- The max length for padding the input text - during tokenization. Default 200 - - - * **hf_model** (`str`) -- The pretrained AutoModel from huggingface that will be used to - tokenize and train on the provided data. Default distilbert-base-uncased - - - * **labels** (`Optional`[`List`[`str`]]) -- Optional list of labels for this dataset. If not provided, they - will attempt to be extracted from the data - - - * **project_name** (`Optional`[`str`]) -- Optional project name. If not set, a random name will - be generated - - - * **run_name** (`Optional`[`str`]) -- Optional run name for this data. If not set, a random name will - be generated - - - * **wait** (`bool`) -- Whether to wait for Galileo to complete processing your run. - Default True - - - * **create_data_embs** (`bool`) -- Whether to create data embeddings for this run. If True, - Sentence-Transformers will be used to generate data embeddings for this dataset - and uploaded with this run. You can access these embeddings via - dq.metrics.get_data_embeddings in the emb column or - dq.metrics.get_dataframe(..., include_data_embs=True) in the data_emb col - Only available for TC currently. NER coming soon. Default False. - - - -* **Return type** - - `None` - - -For text classification datasets, the only required columns are text and label - -For NER, the required format is the huggingface standard format of tokens and -tags (or ner_tags). -See example: [https://huggingface.co/datasets/rungalileo/mit_movies](https://huggingface.co/datasets/rungalileo/mit_movies) - -> MIT Movies dataset in huggingface format - -```python -tokens ner_tags -[what, is, a, good, action, movie, that, is, r... [0, 0, 0, 0, 7, 0, ... -[show, me, political, drama, movies, with, jef... [0, 0, 7, 8, 0, 0, ... -[what, are, some, good, 1980, s, g, rated, mys... [0, 0, 0, 0, 5, 6, ... -[list, a, crime, film, which, director, was, d... [0, 0, 7, 0, 0, 0, ... -[is, there, a, thriller, movie, starring, al, ... [0, 0, 0, 7, 0, 0, ... -... ... ... -``` - -To see auto insights on a random, pre-selected dataset, simply run - -```python -import dataquality as dq - -dq.auto() -``` - -An example using auto with a hosted huggingface text classification dataset - -```python -import dataquality as dq - -dq.auto(hf_data="rungalileo/trec6") -``` - -Similarly, for NER - -```python -import dataquality as dq - -dq.auto(hf_data="conll2003") -``` - -An example using auto with sklearn data as pandas dataframes - -```python -import dataquality as dq -import pandas as pd -from sklearn.datasets import fetch_20newsgroups - -# Load the newsgroups dataset from sklearn -newsgroups_train = fetch_20newsgroups(subset='train') -newsgroups_test = fetch_20newsgroups(subset='test') -# Convert to pandas dataframes -df_train = pd.DataFrame( - {"text": newsgroups_train.data, "label": newsgroups_train.target} -) -df_test = pd.DataFrame( - {"text": newsgroups_test.data, "label": newsgroups_test.target} -) - -dq.auto( - train_data=df_train, - test_data=df_test, - labels=newsgroups_train.target_names, - project_name="newsgroups_work", - run_name="run_1_raw_data" -) -``` - -An example of using auto with a local CSV file with text and label columns - -```python -import dataquality as dq - -dq.auto( - train_data="train.csv", - test_data="test.csv", - project_name="data_from_local", - run_name="run_1_raw_data" -) -``` - -# dataquality.integrations.torch - - -### watch(model, dataloaders=[], classifier_layer=None, embedding_dim=None, logits_dim=None, embedding_fn=None, logits_fn=None, last_hidden_state_layer=None, unpatch_on_start=False) -wraps a PyTorch model and optionally dataloaders to log the -embeddings and logits to [Galileo]([https://www.rungalileo.io/](https://www.rungalileo.io/)). - -```python -dq.log_dataset(train_dataset, split="train") -train_dataloader = torch.utils.data.DataLoader() -model = TextClassificationModel(num_labels=len(train_dataset.list_of_labels)) -watch(model, [train_dataloader, test_dataloader]) -for epoch in range(NUM_EPOCHS): - dq.set_epoch_and_split(epoch,"training") - train() - dq.set_split("validation") - validate() -dq.finish() -``` - - -* **Parameters** - - - * **model** (`Module`) -- Pytorch Model to be wrapped - - - * **dataloaders** (`Optional`[`List`[`DataLoader`]]) -- List of dataloaders to be wrapped - - - * **classifier_layer** (`Union`[`Module`, `str`, `None`]) -- Layer to hook into (usually 'classifier' or 'fc'). - Inputs are the embeddings and outputs are the logits. - - - * **embedding_dim** (`Union`[`str`, `int`, `slice`, `Tensor`, `List`, `Tuple`, `None`]) -- Dimension of the embeddings for example "[:, 0]" - to remove the cls token - - - * **logits_dim** (`Union`[`str`, `int`, `slice`, `Tensor`, `List`, `Tuple`, `None`]) -- Dimension to extract the logits for example in NER - "[:,1:,:]" - - - * **logits_dim** -- Dimension of the logits - from layer input and logits from layer output. If the layer is not found, - the last_hidden_state_layer will be used - - - * **embedding_fn** (`Optional`[`Callable`]) -- Function to process embeddings from the model - - - * **logits_fn** (`Optional`[`Callable`]) -- Function to process logits from the model f.e. - lambda x: x[0] - - - * **last_hidden_state_layer** (`Union`[`Module`, `str`, `None`]) -- Layer to extract the embeddings from - - - * **unpatch_on_start** (`bool`) -- Force unpatching of dataloaders - instead of global patching - - - * **model** -- Pytorch Model to be wrapped - - - * **dataloaders** -- List of dataloaders to be wrapped - - - * **last_hidden_state_layer** -- Layer to extract the embeddings from - - - * **embedding_dim** -- Dimension of the embeddings for example "[:, 0]" - - - -* **Return type** - - `None` - - -to remove the cls token -:param logits_dim: Dimension to extract the logits for example in NER - -> "[:,1:,:]" - - -### unwatch(model=None, force=True) -Unwatches the model. Run after the run is finished. -:type force: `bool` -:param force: Force unwatch even if the model is not watched - - -* **Return type** - - `None` - - -# dataquality.integrations.transformers_trainer - - -### watch(trainer, last_hidden_state_layer=None, embedding_dim=None, logits_dim=None, classifier_layer=None, embedding_fn=None, logits_fn=None) -used to *hook* into to the **trainer** -to log to [Galileo]([https://www.rungalileo.io/](https://www.rungalileo.io/)) - - -* **Parameters** - - **trainer** (`Trainer`) -- Trainer object - - - -* **Return type** - - `None` - - - -### unwatch(trainer) -unwatch is used to remove the callback from the trainer -:type trainer: `Trainer` -:param trainer: Trainer object - - -* **Return type** - - `None` - - -# dataquality.integrations.keras - - -### _class_ DataQualityCallback() -Bases: `Callback` - - -#### on_epoch_begin(epoch, logs) -Called at the start of an epoch. - -Subclasses should override for any actions to run. This function should only -be called during TRAIN mode. - - -* **Parameters** - - - * **epoch** (`int`) -- Integer, index of epoch. - - - * **logs** (`Dict`) -- Dict. Currently no data is passed to this argument for this method - but that may change in the future. - - - -* **Return type** - - `None` - - - -#### on_test_batch_begin(batch, logs=None) -Called at the beginning of a batch in evaluate methods. - -Also called at the beginning of a validation batch in the fit -methods, if validation data is provided. - -Subclasses should override for any actions to run. - -Note that if the steps_per_execution argument to compile in -tf.keras.Model is set to N, this method will only be called every N -batches. - - -* **Parameters** - - - * **batch** (`Any`) -- Integer, index of batch within the current epoch. - - - * **logs** (`Optional`[`Dict`]) -- Dict. Currently no data is passed to this argument for this method - but that may change in the future. - - - -* **Return type** - - `None` - - - -#### on_test_batch_end(batch, logs=None) -Called at the end of a batch in evaluate methods. - -Also called at the end of a validation batch in the fit -methods, if validation data is provided. - -Subclasses should override for any actions to run. - -Note that if the steps_per_execution argument to compile in -tf.keras.Model is set to N, this method will only be called every N -batches. - - -* **Parameters** - - - * **batch** (`Any`) -- Integer, index of batch within the current epoch. - - - * **logs** (`Optional`[`Dict`]) -- Dict. Aggregated metric results up until this batch. - - - -* **Return type** - - `None` - - - -#### on_train_batch_begin(batch, logs=None) -Called at the beginning of a training batch in fit methods. - -Subclasses should override for any actions to run. - -Note that if the steps_per_execution argument to compile in -tf.keras.Model is set to N, this method will only be called every N -batches. - - -* **Parameters** - - - * **batch** (`Any`) -- Integer, index of batch within the current epoch. - - - * **logs** (`Optional`[`Dict`]) -- Dict. Currently no data is passed to this argument for this method - but that may change in the future. - - - -* **Return type** - - `None` - - - -#### on_train_batch_end(batch, logs=None) -Called at the end of a training batch in fit methods. - -Subclasses should override for any actions to run. - -Note that if the steps_per_execution argument to compile in -tf.keras.Model is set to N, this method will only be called every N -batches. - - -* **Parameters** - - - * **batch** (`Any`) -- Integer, index of batch within the current epoch. - - - * **logs** (`Optional`[`Dict`]) -- Dict. Aggregated metric results up until this batch. - - - -* **Return type** - - `None` - - - -### _class_ DataQualityLoggingLayer(what_to_log) -Bases: `Layer` - - -#### call(inputs) -This is where the layer's logic lives. - -The call() method may not create state (except in its first invocation, -wrapping the creation of variables or other resources in tf.init_scope()). -It is recommended to create state in __init__(), or the build() method -that is called automatically before call() executes the first time. - - -* **Parameters** - - - * **inputs** (`Tensor`) -- Input tensor, or dict/list/tuple of input tensors. - The first positional inputs argument is subject to special rules: - - inputs must be explicitly passed. A layer cannot have zero - - > arguments, and inputs cannot be provided via the default value - > of a keyword argument. - - - * NumPy array or Python scalar values in inputs get cast as tensors. - - - * Keras mask metadata is only collected from inputs. - - - * Layers are built (build(input_shape) method) - using shape info from inputs only. - - - * input_spec compatibility is only checked against inputs. - - - * Mixed precision input casting is only applied to inputs. - If a layer has tensor arguments in \*args or \*\*kwargs, their - casting behavior in mixed precision should be handled manually. - - - * The SavedModel input specification is generated using inputs only. - - - * Integration with various ecosystem packages like TFMOT, TFLite, - TF.js, etc is only supported for inputs and not for tensors in - positional and keyword arguments. - - - - * **\*args** -- Additional positional arguments. May contain tensors, although - this is not recommended, for the reasons above. - - - * **\*\*kwargs** -- Additional keyword arguments. May contain tensors, although - this is not recommended, for the reasons above. - The following optional keyword arguments are reserved: - - training: Boolean scalar tensor of Python boolean indicating - - > whether the call is meant for training or inference. - - - * mask: Boolean input mask. If the layer's call() method takes a - mask argument, its default value will be set to the mask generated - for inputs by the previous layer (if input did come from a layer - that generated a corresponding mask, i.e. if it came from a Keras - layer with masking support). - - - - -* **Return type** - - `Tensor` - - - -* **Returns** - - A tensor or list/tuple of tensors. - - - -### add_ids_to_numpy_arr(orig_arr, ids) -Deprecated, use add_sample_ids - - -* **Return type** - - `ndarray` - - - -### add_sample_ids(orig_arr, ids) -Add sample IDs to the training/test data before training begins - -This is necessary to call before training a Keras model with the -Galileo DataQualityCallback - - -* **Return type** - - `ndarray` - - - -* **Parameters** - - - * **orig_arr** (`ndarray`) -- The numpy array to be passed into model.train - - - * **ids** (`Union`[`List`[`int`], `ndarray`]) -- The ids for each sample to append. These are the same IDs that are - - -logged for the input data. They must match 1-1 - -# dataquality.integrations.experimental.keras - - -### watch(model, layer=None, seed=42) -Watch a model and log the inputs and outputs of a layer. -:type model: `Layer` -:param model: The model to watch -:type layer: `Optional`[`Any`] -:param layer: The layer to watch, if None the classifier layer is used -:type seed: `int` -:param seed: The seed to use for the model - - -* **Return type** - - `None` - - - -### unwatch(model) -Unpatches the model. Run after the run is finished -:type model: `Layer` -:param model: The model to unpatch - - -* **Return type** - - `None` - - -# dataquality.integrations.spacy - - -### watch(nlp) -Stores the nlp object before calling watch on the ner component within it - -We need access to the nlp object so that during training we can capture the -model's predictions over the raw text by running nlp("user's text") and looking -at the results - - -* **Parameters** - - **nlp** (`Language`) -- The spacy nlp Language component. - - - -* **Return type** - - `None` - - - -### unwatch(nlp) -Returns spacy nlp Language component to its original unpatched state. - -Unfortunately, spacy does not make this easy, so we replicate spacy's add_pipe -for logic for using internal spacy methods to add a component object to a specific -position. - - -* **Return type** - - `None` - - -# dataquality.integrations.hf - - -### infer_schema(label_list) -Infers the schema via the exhaustive list of labels - - -* **Return type** - - `TaggingSchema` - - - -### tokenize_and_log_dataset(dd, tokenizer, label_names=None, meta=None) -This function tokenizes a huggingface DatasetDict and aligns the labels to BPE - -After tokenization, this function will also log the dataset(s) present in the -DatasetDict - - -* **Parameters** - - - * **dd** (`DatasetDict`) -- DatasetDict from huggingface to log - - - * **tokenizer** (`PreTrainedTokenizerBase`) -- The pretrained tokenizer from huggingface - - - * **label_names** (`Optional`[`List`[`str`]]) -- Optional list of labels for the dataset. These can typically - be extracted automatically (if the dataset came from hf datasets hub or was - exported via Galileo dataquality). If they cannot be extracted, an error will - be raised requesting label names - - - * **meta** (`Optional`[`List`[`str`]]) -- Optional metadata columns to be logged. The columns must be present - in at least one of the splits of the dataset. - - - -* **Return type** - - `DatasetDict` - - - -### _class_ TextDataset(hf_dataset) -Bases: `Dataset` - -An abstracted Huggingface Text dataset for users to import and use - -Get back a DataLoader via the get_dataloader function - - -### get_dataloader(dataset, \*\*kwargs) -Create a DataLoader for a particular split given a huggingface Dataset - -The DataLoader will be a loader of a TextDataset. The __getitem__ for that dataset -will return: - -> -> * id - the Galileo ID of the sample - - -> * input_ids - the standard huggingface input_ids - - -> * attention_mask - the standard huggingface attention_mask - - -> * labels - output labels adjusted with tokenized NER data - - -* **Parameters** - - - * **dataset** (`Dataset`) -- The huggingface dataset to convert to a DataLoader - - - * **kwargs** (`Any`) -- Any additional keyword arguments to be passed into the DataLoader - Things like batch_size or shuffle - - - -* **Return type** - - `DataLoader` - - -# dataquality - -dataquality - - -### _class_ AggregateFunction(value) -Bases: `str`, `Enum` - -An enumeration. - - -### _class_ Operator(value) -Bases: `str`, `Enum` - -An enumeration. - - -### _class_ Condition(\*\*data) -Bases: `BaseModel` - -Class for building custom conditions for data quality checks - -After building a condition, call evaluate to determine the truthiness -of the condition against a given DataFrame. - -With a bit of thought, complex and custom conditions can be built. To gain an -intuition for what can be accomplished, consider the following examples: - - -1. Is the average confidence less than 0.3? - - ```python - >>> c = Condition( - ... agg=AggregateFunction.avg, - ... metric="confidence", - ... operator=Operator.lt, - ... threshold=0.3, - ... ) - >>> c.evaluate(df) - ``` - - -2. Is the max DEP greater or equal to 0.45? - - ```python - >>> c = Condition( - ... agg=AggregateFunction.max, - ... metric="data_error_potential", - ... operator=Operator.gte, - ... threshold=0.45, - ... ) - >>> c.evaluate(df) - ``` - -By adding filters, you can further narrow down the scope of the condition. -If the aggregate function is "pct", you don't need to specify a metric, - -> as the filters will determine the percentage of data. - -For example: - - -1. Alert if over 80% of the dataset has confidence under 0.1 - - ```python - >>> c = Condition( - ... operator=Operator.gt, - ... threshold=0.8, - ... agg=AggregateFunction.pct, - ... filters=[ - ... ConditionFilter( - ... metric="confidence", operator=Operator.lt, value=0.1 - ... ), - ... ], - ... ) - >>> c.evaluate(df) - ``` - - -2. Alert if at least 20% of the dataset has drifted (Inference DataFrames only) - - ```python - >>> c = Condition( - ... operator=Operator.gte, - ... threshold=0.2, - ... agg=AggregateFunction.pct, - ... filters=[ - ... ConditionFilter( - ... metric="is_drifted", operator=Operator.eq, value=True - ... ), - ... ], - ... ) - >>> c.evaluate(df) - ``` - - -3. Alert 5% or more of the dataset contains PII - - ```python - >>> c = Condition( - ... operator=Operator.gte, - ... threshold=0.05, - ... agg=AggregateFunction.pct, - ... filters=[ - ... ConditionFilter( - ... metric="galileo_pii", operator=Operator.neq, value="None" - ... ), - ... ], - ... ) - >>> c.evaluate(df) - ``` - -Complex conditions can be built when the filter has a different metric -than the metric used in the condition. For example: - - -1. Alert if the min confidence of drifted data is less than 0.15 - - ```python - >>> c = Condition( - ... agg=AggregateFunction.min, - ... metric="confidence", - ... operator=Operator.lt, - ... threshold=0.15, - ... filters=[ - ... ConditionFilter( - ... metric="is_drifted", operator=Operator.eq, value=True - ... ) - ... ], - ... ) - >>> c.evaluate(df) - ``` - - -2. Alert if over 50% of high DEP (>=0.7) data contains PII - - ```python - >>> c = Condition( - ... operator=Operator.gt, - ... threshold=0.5, - ... agg=AggregateFunction.pct, - ... filters=[ - ... ConditionFilter( - ... metric="data_error_potential", operator=Operator.gte, value=0.7 - ... ), - ... ConditionFilter( - ... metric="galileo_pii", operator=Operator.neq, value="None" - ... ), - ... ], - ... ) - >>> c.evaluate(df) - ``` - -You can also call conditions directly, which will assert its truth against a df -1. Assert that average confidence less than 0.3 ->>> c = Condition( -... agg=AggregateFunction.avg, -... metric="confidence", -... operator=Operator.lt, -... threshold=0.3, -... ) ->>> c(df) # Will raise an AssertionError if False - - -* **Parameters** - - - * **metric** -- The DF column for evaluating the condition - - - * **agg** -- An aggregate function to apply to the metric - - - * **operator** -- The operator to use for comparing the agg to the threshold - (e.g. "gt", "lt", "eq", "neq") - - - * **threshold** -- Threshold value for evaluating the condition - - - * **filter** -- Optional filter to apply to the DataFrame before evaluating the - condition - - - -### _class_ ConditionFilter(\*\*data) -Bases: `BaseModel` - -Filter a dataframe based on the column value - -Note that the column used for filtering is the same as the metric used -in the condition. - - -* **Parameters** - - - * **operator** -- The operator to use for filtering (e.g. "gt", "lt", "eq", "neq") - See Operator - - - * **value** -- The value to compare against diff --git a/docs/autodocs/_build/markdown/index.md b/docs/autodocs/_build/markdown/index.md deleted file mode 100644 index 8adb680ec..000000000 --- a/docs/autodocs/_build/markdown/index.md +++ /dev/null @@ -1,118 +0,0 @@ -# Python API - - -* [dataquality](api/dataquality.md) - - - * [`login()`](api/dataquality.md#dataquality.login) - - - * [`init()`](api/dataquality.md#dataquality.init) - - - * [`log_model_outputs()`](api/dataquality.md#dataquality.log_model_outputs) - - - * [`finish()`](api/dataquality.md#dataquality.finish) - - - * [`set_labels_for_run()`](api/dataquality.md#dataquality.set_labels_for_run) - - - * [`set_tasks_for_run()`](api/dataquality.md#dataquality.set_tasks_for_run) - - - * [`set_epoch()`](api/dataquality.md#dataquality.set_epoch) - - - * [`set_split()`](api/dataquality.md#dataquality.set_split) - - - * [`log_data_sample()`](api/dataquality.md#dataquality.log_data_sample) - - - * [`log_dataset()`](api/dataquality.md#dataquality.log_dataset) - - - * [`auto()`](api/dataquality.md#dataquality.auto) - - -* [dataquality.integrations.torch](api/dataquality.md#dataquality-integrations-torch) - - - * [`watch()`](api/dataquality.md#dataquality.integrations.torch.watch) - - - * [`unwatch()`](api/dataquality.md#dataquality.integrations.torch.unwatch) - - -* [dataquality.integrations.transformers_trainer](api/dataquality.md#dataquality-integrations-transformers-trainer) - - - * [`watch()`](api/dataquality.md#dataquality.integrations.transformers_trainer.watch) - - - * [`unwatch()`](api/dataquality.md#dataquality.integrations.transformers_trainer.unwatch) - - -* [dataquality.integrations.keras](api/dataquality.md#dataquality-integrations-keras) - - - * [`DataQualityCallback`](api/dataquality.md#dataquality.integrations.keras.DataQualityCallback) - - - * [`DataQualityLoggingLayer`](api/dataquality.md#dataquality.integrations.keras.DataQualityLoggingLayer) - - - * [`add_ids_to_numpy_arr()`](api/dataquality.md#dataquality.integrations.keras.add_ids_to_numpy_arr) - - - * [`add_sample_ids()`](api/dataquality.md#dataquality.integrations.keras.add_sample_ids) - - -* [dataquality.integrations.experimental.keras](api/dataquality.md#dataquality-integrations-experimental-keras) - - - * [`watch()`](api/dataquality.md#dataquality.integrations.experimental.keras.watch) - - - * [`unwatch()`](api/dataquality.md#dataquality.integrations.experimental.keras.unwatch) - - -* [dataquality.integrations.spacy](api/dataquality.md#dataquality-integrations-spacy) - - - * [`watch()`](api/dataquality.md#dataquality.integrations.spacy.watch) - - - * [`unwatch()`](api/dataquality.md#dataquality.integrations.spacy.unwatch) - - -* [dataquality.integrations.hf](api/dataquality.md#dataquality-integrations-hf) - - - * [`infer_schema()`](api/dataquality.md#dataquality.integrations.hf.infer_schema) - - - * [`tokenize_and_log_dataset()`](api/dataquality.md#dataquality.integrations.hf.tokenize_and_log_dataset) - - - * [`TextDataset`](api/dataquality.md#dataquality.integrations.hf.TextDataset) - - - * [`get_dataloader()`](api/dataquality.md#dataquality.integrations.hf.get_dataloader) - - -* [dataquality](api/dataquality.md#id1) - - - * [`AggregateFunction`](api/dataquality.md#dataquality.AggregateFunction) - - - * [`Operator`](api/dataquality.md#dataquality.Operator) - - - * [`Condition`](api/dataquality.md#dataquality.Condition) - - - * [`ConditionFilter`](api/dataquality.md#dataquality.ConditionFilter) diff --git a/docs/autodocs/api/dataquality.rst b/docs/autodocs/api/dataquality.rst deleted file mode 100644 index 20cec5bc3..000000000 --- a/docs/autodocs/api/dataquality.rst +++ /dev/null @@ -1,84 +0,0 @@ -dataquality ------------ -.. automodule:: dataquality - :members: login, init, set_labels_for_run, set_tasks_for_run, log_dataset, set_split, set_epoch, log_model_outputs, metrics, log_data_sample,log_image_dataset, auto, logout, finish - :show-inheritance: - :member-order: bysource - -dataquality.integrations.torch ------------ -.. automodule:: dataquality.integrations.torch - :members: watch, unwatch - :show-inheritance: - :member-order: bysource - -dataquality.integrations.transformers_trainer ------------ -.. automodule:: dataquality.integrations.transformers_trainer - :members: watch, unwatch - :show-inheritance: - :member-order: bysource - -dataquality.integrations.lightning ------------ -.. automodule:: dataquality.integrations.lightning - :members: LightningDQCallback - :show-inheritance: - :member-order: bysource - - -dataquality.integrations.legacy.keras ------------ -.. automodule:: dataquality.integrations.legacy.keras - :members: - :show-inheritance: - -dataquality.integrations.keras ------------ -.. automodule:: dataquality.integrations.keras - :members: watch, unwatch - :show-inheritance: - :member-order: bysource - -dataquality.integrations.spacy ------------ -.. automodule:: dataquality.integrations.spacy - :members: watch, unwatch - :show-inheritance: - :member-order: bysource - - -dataquality.integrations.hf ------------ -.. automodule:: dataquality.integrations.hf - :members: - :show-inheritance: - :member-order: bysource - -dataquality.integrations.fastai ------------ -.. automodule:: dataquality.integrations.fastai - :members: FastAiDQCallback - :show-inheritance: - :member-order: bysource - -dataquality.integrations.setfit ------------ -.. automodule:: dataquality.integrations.setfit - :members: watch, unwatch, auto - :show-inheritance: - :member-order: bysource - -dataquality.integrations.jsl ------------ -.. automodule:: dataquality.integrations.jsl - :members: JSLProject - :show-inheritance: - :member-order: bysource - -dataquality ------------ -.. automodule:: dataquality - :members: AggregateFunction, Operator, Condition, ConditionFilter - :show-inheritance: - :member-order: bysource \ No newline at end of file diff --git a/docs/autodocs/conf.py b/docs/autodocs/conf.py deleted file mode 100644 index 69c6a0968..000000000 --- a/docs/autodocs/conf.py +++ /dev/null @@ -1,84 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys - -sys.path.insert(0, os.path.abspath("..")) - -# -- Project information ----------------------------------------------------- - -project = "Dataquality" - -copyright = "2022 rungalileo.io" -author = "Galileo Team" - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.napoleon", - "sphinx.ext.autosummary", - "sphinx_autodoc_typehints", - "sphinx.ext.autosectionlabel", - "sphinx.ext.intersphinx", - "myst_parser", - "sphinx_markdown_builder", -] - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "build/*", "Thumbs.db", ".DS_Store"] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "furo" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] - -html_theme_options = { - "sidebar_hide_name": True, - "navigation_with_keys": True, - "light_logo": "logo-240x80.png", - "dark_logo": "logo-240x80-white.png", - # 'github_user': 'dataquality', - # 'github_repo': 'dataquality', - # 'fixed_sidebar': True, -} - -html_favicon = "_static/logo.png" - -# The master toctree document. -master_doc = "index" - -add_module_names = False - -pygments_style = None - -numfig = True - -smartquotes = False diff --git a/docs/autodocs/index.rst b/docs/autodocs/index.rst deleted file mode 100644 index 1fbf992d4..000000000 --- a/docs/autodocs/index.rst +++ /dev/null @@ -1,8 +0,0 @@ -========== -Python API -========== - -.. toctree:: - :maxdepth: 2 - - api/dataquality.rst \ No newline at end of file diff --git a/docs/autodocs/make.bat b/docs/autodocs/make.bat deleted file mode 100644 index 8084272b4..000000000 --- a/docs/autodocs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/autodocs/requirements.txt b/docs/autodocs/requirements.txt deleted file mode 100644 index c14d513c7..000000000 --- a/docs/autodocs/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# requirements for readthedocs generation -# TODO COMMON requirements -sphinx==5.3.0 -sphinx-autobuild -sphinx-autodoc-typehints -furo -myst-parser -sphinx-markdown-builder \ No newline at end of file diff --git a/docs/source/_static/logo.png b/docs/source/_static/logo.png new file mode 100644 index 000000000..aeabe61e2 Binary files /dev/null and b/docs/source/_static/logo.png differ diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 000000000..625f7d53a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,62 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +from datetime import datetime + + +from dataquality import __version__ + +project = "dataquality" +copyright = f"{datetime.now().year}, Galileo Technologies Inc." + +author = "Galileo Technologies Inc." +release = __version__ + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.napoleon", + "sphinx.ext.autodoc", + "myst_parser", + "sphinx.ext.autosummary", + "sphinx_autodoc_typehints", + "sphinx.ext.autosectionlabel", + "sphinx.ext.intersphinx", + "sphinx_markdown_builder", + "sphinxcontrib.autodoc_pydantic", +] + +templates_path = ["_templates"] + +add_module_names = False +autoclass_content = "both" +autodoc_default_flags = ["show-inheritance", "members", "undoc-members"] +autodoc_member_order = "bysource" + +# autosummary +autosummary_generate = True + +# autosectionlabel +autosectionlabel_prefix_document = True + +# autodoc_pydantic +autodoc_pydantic_model_show_json = False +autodoc_pydantic_model_show_config_summary = False +autodoc_pydantic_model_show_validator_summary = False +autodoc_pydantic_model_show_validator_members = False + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "furo" +html_static_path = ["_static"] + +# Theme options +html_logo = "_static/logo.png" diff --git a/docs/source/dataquality.clients.rst b/docs/source/dataquality.clients.rst new file mode 100644 index 000000000..ffd5365ab --- /dev/null +++ b/docs/source/dataquality.clients.rst @@ -0,0 +1,29 @@ +dataquality.clients package +=========================== + +Submodules +---------- + +dataquality.clients.api module +------------------------------ + +.. automodule:: dataquality.clients.api + :members: + :undoc-members: + :show-inheritance: + +dataquality.clients.objectstore module +-------------------------------------- + +.. automodule:: dataquality.clients.objectstore + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.clients + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.core.rst b/docs/source/dataquality.core.rst new file mode 100644 index 000000000..cb9fd75e0 --- /dev/null +++ b/docs/source/dataquality.core.rst @@ -0,0 +1,53 @@ +dataquality.core package +======================== + +Submodules +---------- + +dataquality.core.auth module +---------------------------- + +.. automodule:: dataquality.core.auth + :members: + :undoc-members: + :show-inheritance: + +dataquality.core.finish module +------------------------------ + +.. automodule:: dataquality.core.finish + :members: + :undoc-members: + :show-inheritance: + +dataquality.core.init module +---------------------------- + +.. automodule:: dataquality.core.init + :members: + :undoc-members: + :show-inheritance: + +dataquality.core.log module +--------------------------- + +.. automodule:: dataquality.core.log + :members: + :undoc-members: + :show-inheritance: + +dataquality.core.report module +------------------------------ + +.. automodule:: dataquality.core.report + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.core + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.dq_auto.rst b/docs/source/dataquality.dq_auto.rst new file mode 100644 index 000000000..2d8c35119 --- /dev/null +++ b/docs/source/dataquality.dq_auto.rst @@ -0,0 +1,77 @@ +dataquality.dq\_auto package +============================ + +Submodules +---------- + +dataquality.dq\_auto.auto module +-------------------------------- + +.. automodule:: dataquality.dq_auto.auto + :members: + :undoc-members: + :show-inheritance: + +dataquality.dq\_auto.base\_data\_manager module +----------------------------------------------- + +.. automodule:: dataquality.dq_auto.base_data_manager + :members: + :undoc-members: + :show-inheritance: + +dataquality.dq\_auto.ner module +------------------------------- + +.. automodule:: dataquality.dq_auto.ner + :members: + :undoc-members: + :show-inheritance: + +dataquality.dq\_auto.ner\_trainer module +---------------------------------------- + +.. automodule:: dataquality.dq_auto.ner_trainer + :members: + :undoc-members: + :show-inheritance: + +dataquality.dq\_auto.notebook module +------------------------------------ + +.. automodule:: dataquality.dq_auto.notebook + :members: + :undoc-members: + :show-inheritance: + +dataquality.dq\_auto.schema module +---------------------------------- + +.. automodule:: dataquality.dq_auto.schema + :members: + :undoc-members: + :show-inheritance: + +dataquality.dq\_auto.tc\_trainer module +--------------------------------------- + +.. automodule:: dataquality.dq_auto.tc_trainer + :members: + :undoc-members: + :show-inheritance: + +dataquality.dq\_auto.text\_classification module +------------------------------------------------ + +.. automodule:: dataquality.dq_auto.text_classification + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.dq_auto + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.dq_start.rst b/docs/source/dataquality.dq_start.rst new file mode 100644 index 000000000..3170eb0f9 --- /dev/null +++ b/docs/source/dataquality.dq_start.rst @@ -0,0 +1,10 @@ +dataquality.dq\_start package +============================= + +Module contents +--------------- + +.. automodule:: dataquality.dq_start + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.integrations.rst b/docs/source/dataquality.integrations.rst new file mode 100644 index 000000000..8b6860045 --- /dev/null +++ b/docs/source/dataquality.integrations.rst @@ -0,0 +1,101 @@ +dataquality.integrations package +================================ + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + dataquality.integrations.seq2seq + +Submodules +---------- + +dataquality.integrations.fastai module +-------------------------------------- + +.. automodule:: dataquality.integrations.fastai + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.hf module +---------------------------------- + +.. automodule:: dataquality.integrations.hf + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.jsl module +----------------------------------- + +.. automodule:: dataquality.integrations.jsl + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.keras module +------------------------------------- + +.. automodule:: dataquality.integrations.keras + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.lightning module +----------------------------------------- + +.. automodule:: dataquality.integrations.lightning + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.setfit module +-------------------------------------- + +.. automodule:: dataquality.integrations.setfit + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.torch module +------------------------------------- + +.. automodule:: dataquality.integrations.torch + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.torch\_semantic\_segmentation module +------------------------------------------------------------- + +.. automodule:: dataquality.integrations.torch_semantic_segmentation + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.transformers\_trainer module +----------------------------------------------------- + +.. automodule:: dataquality.integrations.transformers_trainer + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.ultralytics module +------------------------------------------- + +.. automodule:: dataquality.integrations.ultralytics + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.integrations + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.integrations.seq2seq.formatters.rst b/docs/source/dataquality.integrations.seq2seq.formatters.rst new file mode 100644 index 000000000..e3d369bb9 --- /dev/null +++ b/docs/source/dataquality.integrations.seq2seq.formatters.rst @@ -0,0 +1,37 @@ +dataquality.integrations.seq2seq.formatters package +=================================================== + +Submodules +---------- + +dataquality.integrations.seq2seq.formatters.alpaca module +--------------------------------------------------------- + +.. automodule:: dataquality.integrations.seq2seq.formatters.alpaca + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.seq2seq.formatters.base module +------------------------------------------------------- + +.. automodule:: dataquality.integrations.seq2seq.formatters.base + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.seq2seq.formatters.chat module +------------------------------------------------------- + +.. automodule:: dataquality.integrations.seq2seq.formatters.chat + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.integrations.seq2seq.formatters + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.integrations.seq2seq.rst b/docs/source/dataquality.integrations.seq2seq.rst new file mode 100644 index 000000000..e745bc96f --- /dev/null +++ b/docs/source/dataquality.integrations.seq2seq.rst @@ -0,0 +1,53 @@ +dataquality.integrations.seq2seq package +======================================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + dataquality.integrations.seq2seq.formatters + +Submodules +---------- + +dataquality.integrations.seq2seq.auto module +-------------------------------------------- + +.. automodule:: dataquality.integrations.seq2seq.auto + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.seq2seq.core module +-------------------------------------------- + +.. automodule:: dataquality.integrations.seq2seq.core + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.seq2seq.s2s\_trainer module +---------------------------------------------------- + +.. automodule:: dataquality.integrations.seq2seq.s2s_trainer + :members: + :undoc-members: + :show-inheritance: + +dataquality.integrations.seq2seq.schema module +---------------------------------------------- + +.. automodule:: dataquality.integrations.seq2seq.schema + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.integrations.seq2seq + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.loggers.data_logger.rst b/docs/source/dataquality.loggers.data_logger.rst new file mode 100644 index 000000000..e4c3bd092 --- /dev/null +++ b/docs/source/dataquality.loggers.data_logger.rst @@ -0,0 +1,85 @@ +dataquality.loggers.data\_logger package +======================================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + dataquality.loggers.data_logger.seq2seq + +Submodules +---------- + +dataquality.loggers.data\_logger.base\_data\_logger module +---------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.base_data_logger + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.image\_classification module +------------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.image_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.object\_detection module +--------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.object_detection + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.semantic\_segmentation module +-------------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.semantic_segmentation + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.tabular\_classification module +--------------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.tabular_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.text\_classification module +------------------------------------------------------------ + +.. automodule:: dataquality.loggers.data_logger.text_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.text\_multi\_label module +---------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.text_multi_label + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.text\_ner module +------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.text_ner + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.loggers.data_logger + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.loggers.data_logger.seq2seq.rst b/docs/source/dataquality.loggers.data_logger.seq2seq.rst new file mode 100644 index 000000000..b908b65f6 --- /dev/null +++ b/docs/source/dataquality.loggers.data_logger.seq2seq.rst @@ -0,0 +1,45 @@ +dataquality.loggers.data\_logger.seq2seq package +================================================ + +Submodules +---------- + +dataquality.loggers.data\_logger.seq2seq.chat module +---------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.seq2seq.chat + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.seq2seq.completion module +---------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.seq2seq.completion + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.seq2seq.formatters module +---------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.seq2seq.formatters + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.data\_logger.seq2seq.seq2seq\_base module +------------------------------------------------------------- + +.. automodule:: dataquality.loggers.data_logger.seq2seq.seq2seq_base + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.loggers.data_logger.seq2seq + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.loggers.logger_config.rst b/docs/source/dataquality.loggers.logger_config.rst new file mode 100644 index 000000000..ee2c2b463 --- /dev/null +++ b/docs/source/dataquality.loggers.logger_config.rst @@ -0,0 +1,85 @@ +dataquality.loggers.logger\_config package +========================================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + dataquality.loggers.logger_config.seq2seq + +Submodules +---------- + +dataquality.loggers.logger\_config.base\_logger\_config module +-------------------------------------------------------------- + +.. automodule:: dataquality.loggers.logger_config.base_logger_config + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.image\_classification module +--------------------------------------------------------------- + +.. automodule:: dataquality.loggers.logger_config.image_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.object\_detection module +----------------------------------------------------------- + +.. automodule:: dataquality.loggers.logger_config.object_detection + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.semantic\_segmentation module +---------------------------------------------------------------- + +.. automodule:: dataquality.loggers.logger_config.semantic_segmentation + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.tabular\_classification module +----------------------------------------------------------------- + +.. automodule:: dataquality.loggers.logger_config.tabular_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.text\_classification module +-------------------------------------------------------------- + +.. automodule:: dataquality.loggers.logger_config.text_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.text\_multi\_label module +------------------------------------------------------------ + +.. automodule:: dataquality.loggers.logger_config.text_multi_label + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.text\_ner module +--------------------------------------------------- + +.. automodule:: dataquality.loggers.logger_config.text_ner + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.loggers.logger_config + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.loggers.logger_config.seq2seq.rst b/docs/source/dataquality.loggers.logger_config.seq2seq.rst new file mode 100644 index 000000000..d6f4b785e --- /dev/null +++ b/docs/source/dataquality.loggers.logger_config.seq2seq.rst @@ -0,0 +1,37 @@ +dataquality.loggers.logger\_config.seq2seq package +================================================== + +Submodules +---------- + +dataquality.loggers.logger\_config.seq2seq.chat module +------------------------------------------------------ + +.. automodule:: dataquality.loggers.logger_config.seq2seq.chat + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.seq2seq.completion module +------------------------------------------------------------ + +.. automodule:: dataquality.loggers.logger_config.seq2seq.completion + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.logger\_config.seq2seq.seq2seq\_base module +--------------------------------------------------------------- + +.. automodule:: dataquality.loggers.logger_config.seq2seq.seq2seq_base + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.loggers.logger_config.seq2seq + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.loggers.model_logger.rst b/docs/source/dataquality.loggers.model_logger.rst new file mode 100644 index 000000000..133909cf8 --- /dev/null +++ b/docs/source/dataquality.loggers.model_logger.rst @@ -0,0 +1,85 @@ +dataquality.loggers.model\_logger package +========================================= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + dataquality.loggers.model_logger.seq2seq + +Submodules +---------- + +dataquality.loggers.model\_logger.base\_model\_logger module +------------------------------------------------------------ + +.. automodule:: dataquality.loggers.model_logger.base_model_logger + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.image\_classification module +-------------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.image_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.object\_detection module +---------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.object_detection + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.semantic\_segmentation module +--------------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.semantic_segmentation + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.tabular\_classification module +---------------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.tabular_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.text\_classification module +------------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.text_classification + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.text\_multi\_label module +----------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.text_multi_label + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.text\_ner module +-------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.text_ner + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.loggers.model_logger + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.loggers.model_logger.seq2seq.rst b/docs/source/dataquality.loggers.model_logger.seq2seq.rst new file mode 100644 index 000000000..ee691f2f0 --- /dev/null +++ b/docs/source/dataquality.loggers.model_logger.seq2seq.rst @@ -0,0 +1,45 @@ +dataquality.loggers.model\_logger.seq2seq package +================================================= + +Submodules +---------- + +dataquality.loggers.model\_logger.seq2seq.chat module +----------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.seq2seq.chat + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.seq2seq.completion module +----------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.seq2seq.completion + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.seq2seq.formatters module +----------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.seq2seq.formatters + :members: + :undoc-members: + :show-inheritance: + +dataquality.loggers.model\_logger.seq2seq.seq2seq\_base module +-------------------------------------------------------------- + +.. automodule:: dataquality.loggers.model_logger.seq2seq.seq2seq_base + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.loggers.model_logger.seq2seq + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.loggers.rst b/docs/source/dataquality.loggers.rst new file mode 100644 index 000000000..518d3d9e0 --- /dev/null +++ b/docs/source/dataquality.loggers.rst @@ -0,0 +1,31 @@ +dataquality.loggers package +=========================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + dataquality.loggers.data_logger + dataquality.loggers.logger_config + dataquality.loggers.model_logger + +Submodules +---------- + +dataquality.loggers.base\_logger module +--------------------------------------- + +.. automodule:: dataquality.loggers.base_logger + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.loggers + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.rst b/docs/source/dataquality.rst new file mode 100644 index 000000000..67cc426ba --- /dev/null +++ b/docs/source/dataquality.rst @@ -0,0 +1,68 @@ +dataquality package +=================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + dataquality.clients + dataquality.core + dataquality.dq_auto + dataquality.dq_start + dataquality.integrations + dataquality.loggers + dataquality.schemas + dataquality.utils + +Submodules +---------- + +dataquality.analytics module +---------------------------- + +.. automodule:: dataquality.analytics + :members: + :undoc-members: + :show-inheritance: + +dataquality.dqyolo module +------------------------- + +.. automodule:: dataquality.dqyolo + :members: + :undoc-members: + :show-inheritance: + +dataquality.exceptions module +----------------------------- + +.. automodule:: dataquality.exceptions + :members: + :undoc-members: + :show-inheritance: + +dataquality.internal module +--------------------------- + +.. automodule:: dataquality.internal + :members: + :undoc-members: + :show-inheritance: + +dataquality.metrics module +-------------------------- + +.. automodule:: dataquality.metrics + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.schemas.rst b/docs/source/dataquality.schemas.rst new file mode 100644 index 000000000..cbcd974a1 --- /dev/null +++ b/docs/source/dataquality.schemas.rst @@ -0,0 +1,149 @@ +dataquality.schemas package +=========================== + +Submodules +---------- + +dataquality.schemas.condition module +------------------------------------ + +.. automodule:: dataquality.schemas.condition + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.cv module +----------------------------- + +.. automodule:: dataquality.schemas.cv + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.dataframe module +------------------------------------ + +.. automodule:: dataquality.schemas.dataframe + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.edit module +------------------------------- + +.. automodule:: dataquality.schemas.edit + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.hf module +----------------------------- + +.. automodule:: dataquality.schemas.hf + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.job module +------------------------------ + +.. automodule:: dataquality.schemas.job + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.metrics module +---------------------------------- + +.. automodule:: dataquality.schemas.metrics + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.model module +-------------------------------- + +.. automodule:: dataquality.schemas.model + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.ner module +------------------------------ + +.. automodule:: dataquality.schemas.ner + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.report module +--------------------------------- + +.. automodule:: dataquality.schemas.report + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.request\_type module +---------------------------------------- + +.. automodule:: dataquality.schemas.request_type + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.route module +-------------------------------- + +.. automodule:: dataquality.schemas.route + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.semantic\_segmentation module +------------------------------------------------- + +.. automodule:: dataquality.schemas.semantic_segmentation + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.seq2seq module +---------------------------------- + +.. automodule:: dataquality.schemas.seq2seq + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.split module +-------------------------------- + +.. automodule:: dataquality.schemas.split + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.task\_type module +------------------------------------- + +.. automodule:: dataquality.schemas.task_type + :members: + :undoc-members: + :show-inheritance: + +dataquality.schemas.torch module +-------------------------------- + +.. automodule:: dataquality.schemas.torch + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.schemas + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.utils.rst b/docs/source/dataquality.utils.rst new file mode 100644 index 000000000..12aaf1d3b --- /dev/null +++ b/docs/source/dataquality.utils.rst @@ -0,0 +1,294 @@ +dataquality.utils package +========================= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + dataquality.utils.semantic_segmentation + dataquality.utils.seq2seq + +Submodules +---------- + +dataquality.utils.arrow module +------------------------------ + +.. automodule:: dataquality.utils.arrow + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.auth module +----------------------------- + +.. automodule:: dataquality.utils.auth + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.auto module +----------------------------- + +.. automodule:: dataquality.utils.auto + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.auto\_trainer module +-------------------------------------- + +.. automodule:: dataquality.utils.auto_trainer + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.cuda module +----------------------------- + +.. automodule:: dataquality.utils.cuda + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.cv module +--------------------------- + +.. automodule:: dataquality.utils.cv + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.cv\_smart\_features module +-------------------------------------------- + +.. automodule:: dataquality.utils.cv_smart_features + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.dq\_logger module +----------------------------------- + +.. automodule:: dataquality.utils.dq_logger + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.dqyolo module +------------------------------- + +.. automodule:: dataquality.utils.dqyolo + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.emb module +---------------------------- + +.. automodule:: dataquality.utils.emb + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.file module +----------------------------- + +.. automodule:: dataquality.utils.file + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.hdf5\_store module +------------------------------------ + +.. automodule:: dataquality.utils.hdf5_store + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.helpers module +-------------------------------- + +.. automodule:: dataquality.utils.helpers + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.hf\_images module +----------------------------------- + +.. automodule:: dataquality.utils.hf_images + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.hf\_tokenizer module +-------------------------------------- + +.. automodule:: dataquality.utils.hf_tokenizer + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.imports module +-------------------------------- + +.. automodule:: dataquality.utils.imports + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.jsl module +---------------------------- + +.. automodule:: dataquality.utils.jsl + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.keras module +------------------------------ + +.. automodule:: dataquality.utils.keras + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.ml module +--------------------------- + +.. automodule:: dataquality.utils.ml + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.name module +----------------------------- + +.. automodule:: dataquality.utils.name + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.od module +--------------------------- + +.. automodule:: dataquality.utils.od + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.patcher module +-------------------------------- + +.. automodule:: dataquality.utils.patcher + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.profiler module +--------------------------------- + +.. automodule:: dataquality.utils.profiler + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.setfit module +------------------------------- + +.. automodule:: dataquality.utils.setfit + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.task\_helpers module +-------------------------------------- + +.. automodule:: dataquality.utils.task_helpers + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.tf module +--------------------------- + +.. automodule:: dataquality.utils.tf + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.thread\_pool module +------------------------------------- + +.. automodule:: dataquality.utils.thread_pool + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.torch module +------------------------------ + +.. automodule:: dataquality.utils.torch + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.transformers module +------------------------------------- + +.. automodule:: dataquality.utils.transformers + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.ultralytics module +------------------------------------ + +.. automodule:: dataquality.utils.ultralytics + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.upload module +------------------------------- + +.. automodule:: dataquality.utils.upload + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.upload\_model module +-------------------------------------- + +.. automodule:: dataquality.utils.upload_model + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.vaex module +----------------------------- + +.. automodule:: dataquality.utils.vaex + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.version module +-------------------------------- + +.. automodule:: dataquality.utils.version + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.utils.semantic_segmentation.rst b/docs/source/dataquality.utils.semantic_segmentation.rst new file mode 100644 index 000000000..c99c5a0a2 --- /dev/null +++ b/docs/source/dataquality.utils.semantic_segmentation.rst @@ -0,0 +1,61 @@ +dataquality.utils.semantic\_segmentation package +================================================ + +Submodules +---------- + +dataquality.utils.semantic\_segmentation.constants module +--------------------------------------------------------- + +.. automodule:: dataquality.utils.semantic_segmentation.constants + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.semantic\_segmentation.errors module +------------------------------------------------------ + +.. automodule:: dataquality.utils.semantic_segmentation.errors + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.semantic\_segmentation.lm module +-------------------------------------------------- + +.. automodule:: dataquality.utils.semantic_segmentation.lm + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.semantic\_segmentation.metrics module +------------------------------------------------------- + +.. automodule:: dataquality.utils.semantic_segmentation.metrics + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.semantic\_segmentation.polygons module +-------------------------------------------------------- + +.. automodule:: dataquality.utils.semantic_segmentation.polygons + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.semantic\_segmentation.utils module +----------------------------------------------------- + +.. automodule:: dataquality.utils.semantic_segmentation.utils + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.utils.semantic_segmentation + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/dataquality.utils.seq2seq.rst b/docs/source/dataquality.utils.seq2seq.rst new file mode 100644 index 000000000..f5cdc1ddb --- /dev/null +++ b/docs/source/dataquality.utils.seq2seq.rst @@ -0,0 +1,53 @@ +dataquality.utils.seq2seq package +================================= + +Submodules +---------- + +dataquality.utils.seq2seq.data\_error\_potential module +------------------------------------------------------- + +.. automodule:: dataquality.utils.seq2seq.data_error_potential + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.seq2seq.decoder\_only module +---------------------------------------------- + +.. automodule:: dataquality.utils.seq2seq.decoder_only + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.seq2seq.generation module +------------------------------------------- + +.. automodule:: dataquality.utils.seq2seq.generation + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.seq2seq.logprobs module +----------------------------------------- + +.. automodule:: dataquality.utils.seq2seq.logprobs + :members: + :undoc-members: + :show-inheritance: + +dataquality.utils.seq2seq.offsets module +---------------------------------------- + +.. automodule:: dataquality.utils.seq2seq.offsets + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: dataquality.utils.seq2seq + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 000000000..c8eabe337 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,20 @@ +################ +📊 DataQuality +################ + +`dataquality` is the Python library to interact with `Galileo's NLP and CV Studios `_. + +Here you'll find an overview and API documentation. The package is available to download on `PyPI `_. + +Table of Contents +----------------- + +.. toctree:: + :maxdepth: 2 + + dataquality + dataquality.integrations + Galileo Docs + +- :ref:`genindex` +- :ref:`search` diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 000000000..96a5d78c8 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +dataquality +=========== + +.. toctree:: + :maxdepth: 4 + + dataquality diff --git a/pyproject.toml b/pyproject.toml index 045c0211c..633731ac0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,8 @@ doc = [ "myst-parser", "sphinx-markdown-builder", "sphinx-autobuild", - "sphinx-markdown-builder" + "sphinx-markdown-builder", + "autodoc-pydantic" ] test = [ "ultralytics>=8.0.209", diff --git a/tasks.py b/tasks.py index 8a3d6968b..b895f4dd1 100644 --- a/tasks.py +++ b/tasks.py @@ -146,12 +146,9 @@ def docs_build(ctx: Context) -> None: Build the docs. """ - with ctx.cd("docs/autodocs"): - ctx.run( - "make markdown", - pty=True, - echo=True, - ) + ctx.run("sphinx-apidoc -f -o docs/source/ dataquality/", pty=True, echo=True) + ctx.run("sphinx-build -M markdown docs/source docs/build/md", echo=True) + ctx.run("sphinx-build -b html docs/source/ docs/build/html", echo=True) @unique