Skip to content

Commit

Permalink
Merge branch 'master' into improve_iceberg_connector
Browse files Browse the repository at this point in the history
  • Loading branch information
skrydal authored Jan 10, 2025
2 parents 66d53f4 + a6cd995 commit eb14f5c
Show file tree
Hide file tree
Showing 19 changed files with 876 additions and 702 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,12 @@ jobs:
run: |
echo "BACKEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(metadata-models|entity-registry|datahuyb-graphql-core|metadata-io|metadata-jobs|metadata-utils|metadata-service|medata-dao-impl|metadata-operation|li-utils|metadata-integration|metadata-events|metadata-auth|ingestion-scheduler|notifications|datahub-upgrade)' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV
echo "FRONTEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(datahub-frontend|datahub-web-react).*\.(xml|json)$' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV
- name: Generate tz artifact name
run: echo "NAME_TZ=$(echo ${{ matrix.timezone }} | tr '/' '-')" >> $GITHUB_ENV
- uses: actions/upload-artifact@v4
if: always()
with:
name: Test Results (build) - ${{ matrix.command}}-${{ matrix.timezone }}
name: Test Results (build) - ${{ matrix.command}}-${{ env.NAME_TZ }}
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
Expand Down
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) {

resolutionStrategy.force externalDependency.antlr4Runtime
resolutionStrategy.force externalDependency.antlr4
resolutionStrategy.force 'org.apache.mina:mina-core:2.2.4'
}
}

Expand Down
24 changes: 20 additions & 4 deletions docs-website/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ The purpose of this section is to provide developers & technical users with conc

This section aims to provide plain-language feature overviews for both technical and non-technical readers alike.


## Docs Generation Features

**Includes all markdown files**
Expand All @@ -145,16 +144,33 @@ You can suppress this check by adding the path to the file in a comment in `side

Use an "inline" directive to include code snippets from other files. The `show_path_as_comment` option will include the path to the file as a comment at the top of the snippet.

```python
{{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }}
```
```python
{{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }}
```

**Command Output**

Use the `{{ command-output cmd }}` directive to run subprocesses and inject the outputs into the final markdown.

{{ command-output python -c 'print("Hello world")' }}

This also works for multi-line scripts.

{{ command-output
source metadata-ingestion/venv/bin/activate
python -m <something>
}}

Regardless of the location of the markdown file, the subcommands will be executed with working directory set to the repo root.

Only the stdout of the subprocess will be outputted. The stderr, if any, will be included as a comment in the markdown.

## Docs site generation process

This process is orchestrated by a combination of Gradle and Yarn tasks. The main entrypoint is via the `docs-website:yarnGenerate` task, which in turn eventually runs `yarn run generate`.

Steps:

1. Generate the GraphQL combined schema using the gradle's `docs-website:generateGraphQLSchema` task. This generates `./graphql/combined.graphql`.
2. Generate docs for ingestion sources using the `:metadata-ingestion:docGen` gradle task.
3. Generate docs for our metadata model using the `:metadata-ingestion:modelDocGen` gradle task.
Expand Down
37 changes: 37 additions & 0 deletions docs-website/generateDocsDir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,42 @@ function markdown_process_inline_directives(
contents.content = new_content;
}

function markdown_process_command_output(
contents: matter.GrayMatterFile<string>,
filepath: string
): void {
const new_content = contents.content.replace(
/^{{\s*command-output\s*([\s\S]*?)\s*}}$/gm,
(_, command: string) => {
try {
// Change to repo root directory before executing command
const repoRoot = path.resolve(__dirname, "..");

console.log(`Executing command: ${command}`);

// Execute the command and capture output
const output = execSync(command, {
cwd: repoRoot,
encoding: "utf8",
stdio: ["pipe", "pipe", "pipe"],
});

// Return the command output
return output.trim();
} catch (error: any) {
// If there's an error, include it as a comment
const errorMessage = error.stderr
? error.stderr.toString()
: error.message;
return `${
error.stdout ? error.stdout.toString().trim() : ""
}\n<!-- Error: ${errorMessage.trim()} -->`;
}
}
);
contents.content = new_content;
}

function markdown_sanitize_and_linkify(content: string): string {
// MDX escaping
content = content.replace(/</g, "&lt;");
Expand Down Expand Up @@ -602,6 +638,7 @@ function copy_python_wheels(): void {
markdown_rewrite_urls(contents, filepath);
markdown_enable_specials(contents, filepath);
markdown_process_inline_directives(contents, filepath);
markdown_process_command_output(contents, filepath);
//copy_platform_logos();
// console.log(contents);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
### Configuration Notes

See the

1. [Microsoft Grant user access to a Report Server doc](https://docs.microsoft.com/en-us/sql/reporting-services/security/grant-user-access-to-a-report-server?view=sql-server-ver16)
2. Use your user credentials from previous step in yaml file

### Concept mapping

| Power BI Report Server | Datahub |
| ---------------------- | ----------- |
| `Paginated Report` | `Dashboard` |
| `Power BI Report` | `Dashboard` |
| `Mobile Report` | `Dashboard` |
| `Linked Report` | `Dashboard` |
| `Dataset, Datasource` | `N/A` |

This file was deleted.

38 changes: 25 additions & 13 deletions metadata-ingestion/scripts/avro_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def write_urn_classes(key_aspects: List[dict], urn_dir: Path) -> None:
code = """
# This file contains classes corresponding to entity URNs.
from typing import ClassVar, List, Optional, Type, TYPE_CHECKING
from typing import ClassVar, List, Optional, Type, TYPE_CHECKING, Union
import functools
from deprecated.sphinx import deprecated as _sphinx_deprecated
Expand Down Expand Up @@ -547,10 +547,31 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str:
assert fields[0]["type"] == ["null", "string"]
fields[0]["type"] = "string"

field_urn_type_classes = {}
for field in fields:
# Figure out if urn types are valid for each field.
field_urn_type_class = None
if field_name(field) == "platform":
field_urn_type_class = "DataPlatformUrn"
elif field.get("Urn"):
if len(field.get("entityTypes", [])) == 1:
field_entity_type = field["entityTypes"][0]
field_urn_type_class = f"{capitalize_entity_name(field_entity_type)}Urn"
else:
field_urn_type_class = "Urn"

field_urn_type_classes[field_name(field)] = field_urn_type_class

_init_arg_parts: List[str] = []
for field in fields:
field_urn_type_class = field_urn_type_classes[field_name(field)]

default = '"PROD"' if field_name(field) == "env" else None
_arg_part = f"{field_name(field)}: {field_type(field)}"

type_hint = field_type(field)
if field_urn_type_class:
type_hint = f'Union["{field_urn_type_class}", str]'
_arg_part = f"{field_name(field)}: {type_hint}"
if default:
_arg_part += f" = {default}"
_init_arg_parts.append(_arg_part)
Expand Down Expand Up @@ -579,16 +600,7 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str:
init_validation += f'if not {field_name(field)}:\n raise InvalidUrnError("{class_name} {field_name(field)} cannot be empty")\n'

# Generalized mechanism for validating embedded urns.
field_urn_type_class = None
if field_name(field) == "platform":
field_urn_type_class = "DataPlatformUrn"
elif field.get("Urn"):
if len(field.get("entityTypes", [])) == 1:
field_entity_type = field["entityTypes"][0]
field_urn_type_class = f"{capitalize_entity_name(field_entity_type)}Urn"
else:
field_urn_type_class = "Urn"

field_urn_type_class = field_urn_type_classes[field_name(field)]
if field_urn_type_class:
init_validation += f"{field_name(field)} = str({field_name(field)})\n"
init_validation += (
Expand All @@ -608,7 +620,7 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str:
init_coercion += " platform_name = DataPlatformUrn.from_string(platform_name).platform_name\n"

if field_name(field) == "platform":
init_coercion += "platform = DataPlatformUrn(platform).urn()\n"
init_coercion += "platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()\n"
elif field_urn_type_class is None:
# For all non-urns, run the value through the UrnEncoder.
init_coercion += (
Expand Down
Loading

0 comments on commit eb14f5c

Please sign in to comment.