diff --git a/README.md b/README.md
index 757c0fb50364e..3588d9941913b 100644
--- a/README.md
+++ b/README.md
@@ -130,6 +130,7 @@ Here are some of the major database solutions that are supported:
+
**A more comprehensive list of supported databases** along with the configuration instructions can be found [here](https://superset.apache.org/docs/databases/installing-database-drivers).
diff --git a/docs/docs/databases/doris.mdx b/docs/docs/databases/doris.mdx
new file mode 100644
index 0000000000000..62c16afeb3e1a
--- /dev/null
+++ b/docs/docs/databases/doris.mdx
@@ -0,0 +1,26 @@
+---
+title: Apache Doris
+hide_title: true
+sidebar_position: 5
+version: 1
+---
+
+## Doris
+
+The [sqlalchemy-doris](https://pypi.org/project/pydoris/) library is the recommended way to connect to Apache Doris through SQLAlchemy.
+
+You'll need the following setting values to form the connection string:
+
+- **User**: User Name
+- **Password**: Password
+- **Host**: Doris FE Host
+- **Port**: Doris FE port
+- **Catalog**: Catalog Name
+- **Database**: Database Name
+
+
+Here's what the connection string looks like:
+
+```
+doris://:@:/.
+```
diff --git a/docs/docs/databases/installing-database-drivers.mdx b/docs/docs/databases/installing-database-drivers.mdx
index f698b7ab8ee2a..f11b4ec5eb722 100644
--- a/docs/docs/databases/installing-database-drivers.mdx
+++ b/docs/docs/databases/installing-database-drivers.mdx
@@ -25,6 +25,7 @@ Some of the recommended packages are shown below. Please refer to [setup.py](htt
| Database | PyPI package | Connection String |
| --------------------------------------------------------- | ---------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
| [Amazon Athena](/docs/databases/athena) | `pip install pyathena[pandas]` , `pip install PyAthenaJDBC` | `awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}@athena.{region_name}.amazonaws.com/{schema_name}?s3_staging_dir={s3_staging_dir}&... ` |
+| [Apache Doris](/docs/databases/doris) | `pip install pydoris` | `doris://:@:/.` |
| [Amazon DynamoDB](/docs/databases/dynamodb) | `pip install pydynamodb` | `dynamodb://{access_key_id}:{secret_access_key}@dynamodb.{region_name}.amazonaws.com?connector=superset` |
| [Amazon Redshift](/docs/databases/redshift) | `pip install sqlalchemy-redshift` | ` redshift+psycopg2://:@:5439/` |
| [Apache Drill](/docs/databases/drill) | `pip install sqlalchemy-drill` | `drill+sadrill:// For JDBC drill+jdbc://` |
diff --git a/docs/src/resources/data.js b/docs/src/resources/data.js
index a07be552673ef..42cf835a495b4 100644
--- a/docs/src/resources/data.js
+++ b/docs/src/resources/data.js
@@ -117,4 +117,9 @@ export const Databases = [
href: 'https://www.microsoft.com/en-us/sql-server',
imgName: 'msql.png',
},
+ {
+ title: 'Apache Doris',
+ href: 'https://doris.apache.org/',
+ imgName: 'doris.png',
+ },
];
diff --git a/docs/static/img/databases/doris.png b/docs/static/img/databases/doris.png
new file mode 100644
index 0000000000000..4d88f2a36cf72
Binary files /dev/null and b/docs/static/img/databases/doris.png differ
diff --git a/setup.py b/setup.py
index e4d437b4d1077..29df567e04a36 100644
--- a/setup.py
+++ b/setup.py
@@ -205,6 +205,7 @@ def get_git_sha() -> str:
"vertica": ["sqlalchemy-vertica-python>=0.5.9, < 0.6"],
"netezza": ["nzalchemy>=11.0.2"],
"starrocks": ["starrocks>=1.0.0"],
+ "doris": ["pydoris>=1.0.0, <2.0.0"],
},
python_requires="~=3.9",
author="Apache Software Foundation",
diff --git a/superset-frontend/src/assets/images/doris.png b/superset-frontend/src/assets/images/doris.png
new file mode 100644
index 0000000000000..4d88f2a36cf72
Binary files /dev/null and b/superset-frontend/src/assets/images/doris.png differ
diff --git a/superset/db_engine_specs/doris.py b/superset/db_engine_specs/doris.py
new file mode 100644
index 0000000000000..e502f5bda2be7
--- /dev/null
+++ b/superset/db_engine_specs/doris.py
@@ -0,0 +1,278 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+import re
+from re import Pattern
+from typing import Any, Optional
+from urllib import parse
+
+from flask_babel import gettext as __
+from sqlalchemy import Float, Integer, Numeric, String, TEXT, types
+from sqlalchemy.engine.url import URL
+from sqlalchemy.sql.type_api import TypeEngine
+
+from superset.db_engine_specs.mysql import MySQLEngineSpec
+from superset.errors import SupersetErrorType
+from superset.utils.core import GenericDataType
+
+# Regular expressions to catch custom errors
+CONNECTION_ACCESS_DENIED_REGEX = re.compile(
+ "Access denied for user '(?P.*?)'"
+)
+CONNECTION_INVALID_HOSTNAME_REGEX = re.compile(
+ "Unknown Doris server host '(?P.*?)'"
+)
+CONNECTION_UNKNOWN_DATABASE_REGEX = re.compile("Unknown database '(?P.*?)'")
+CONNECTION_HOST_DOWN_REGEX = re.compile(
+ "Can't connect to Doris server on '(?P.*?)'"
+)
+SYNTAX_ERROR_REGEX = re.compile(
+ "check the manual that corresponds to your MySQL server "
+ "version for the right syntax to use near '(?P.*)"
+)
+
+logger = logging.getLogger(__name__)
+
+
+class TINYINT(Integer):
+ __visit_name__ = "TINYINT"
+
+
+class LARGEINT(Integer):
+ __visit_name__ = "LARGEINT"
+
+
+class DOUBLE(Float):
+ __visit_name__ = "DOUBLE"
+
+
+class HLL(Numeric):
+ __visit_name__ = "HLL"
+
+
+class BITMAP(Numeric):
+ __visit_name__ = "BITMAP"
+
+
+class QuantileState(Numeric):
+ __visit_name__ = "QUANTILE_STATE"
+
+
+class AggState(Numeric):
+ __visit_name__ = "AGG_STATE"
+
+
+class ARRAY(TypeEngine):
+ __visit_name__ = "ARRAY"
+
+ @property
+ def python_type(self) -> Optional[type[list[Any]]]:
+ return list
+
+
+class MAP(TypeEngine):
+ __visit_name__ = "MAP"
+
+ @property
+ def python_type(self) -> Optional[type[dict[Any, Any]]]:
+ return dict
+
+
+class STRUCT(TypeEngine):
+ __visit_name__ = "STRUCT"
+
+ @property
+ def python_type(self) -> Optional[type[Any]]:
+ return None
+
+
+class DorisEngineSpec(MySQLEngineSpec):
+ engine = "pydoris"
+ engine_aliases = {"doris"}
+ engine_name = "Apache Doris"
+ max_column_name_length = 64
+ default_driver = "pydoris"
+ sqlalchemy_uri_placeholder = (
+ "doris://user:password@host:port/catalog.db[?key=value&key=value...]"
+ )
+ encryption_parameters = {"ssl": "0"}
+ supports_dynamic_schema = True
+
+ column_type_mappings = ( # type: ignore
+ (
+ re.compile(r"^tinyint", re.IGNORECASE),
+ TINYINT(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^largeint", re.IGNORECASE),
+ LARGEINT(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^decimal.*", re.IGNORECASE),
+ types.DECIMAL(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^double", re.IGNORECASE),
+ DOUBLE(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^varchar(\((\d+)\))*$", re.IGNORECASE),
+ types.VARCHAR(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^char(\((\d+)\))*$", re.IGNORECASE),
+ types.CHAR(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^json.*", re.IGNORECASE),
+ types.JSON(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^binary.*", re.IGNORECASE),
+ types.BINARY(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^quantile_state", re.IGNORECASE),
+ QuantileState(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^agg_state.*", re.IGNORECASE),
+ AggState(),
+ GenericDataType.STRING,
+ ),
+ (re.compile(r"^hll", re.IGNORECASE), HLL(), GenericDataType.STRING),
+ (
+ re.compile(r"^bitmap", re.IGNORECASE),
+ BITMAP(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^array.*", re.IGNORECASE),
+ ARRAY(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^map.*", re.IGNORECASE),
+ MAP(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^struct.*", re.IGNORECASE),
+ STRUCT(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^datetime.*", re.IGNORECASE),
+ types.DATETIME(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^date.*", re.IGNORECASE),
+ types.DATE(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^text.*", re.IGNORECASE),
+ TEXT(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^string.*", re.IGNORECASE),
+ String(),
+ GenericDataType.STRING,
+ ),
+ )
+
+ custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
+ CONNECTION_ACCESS_DENIED_REGEX: (
+ __('Either the username "%(username)s" or the password is incorrect.'),
+ SupersetErrorType.CONNECTION_ACCESS_DENIED_ERROR,
+ {"invalid": ["username", "password"]},
+ ),
+ CONNECTION_INVALID_HOSTNAME_REGEX: (
+ __('Unknown Doris server host "%(hostname)s".'),
+ SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR,
+ {"invalid": ["host"]},
+ ),
+ CONNECTION_HOST_DOWN_REGEX: (
+ __('The host "%(hostname)s" might be down and can\'t be reached.'),
+ SupersetErrorType.CONNECTION_HOST_DOWN_ERROR,
+ {"invalid": ["host", "port"]},
+ ),
+ CONNECTION_UNKNOWN_DATABASE_REGEX: (
+ __('Unable to connect to database "%(database)s".'),
+ SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR,
+ {"invalid": ["database"]},
+ ),
+ SYNTAX_ERROR_REGEX: (
+ __(
+ 'Please check your query for syntax errors near "%(server_error)s". '
+ "Then, try running your query again."
+ ),
+ SupersetErrorType.SYNTAX_ERROR,
+ {},
+ ),
+ }
+
+ @classmethod
+ def adjust_engine_params(
+ cls,
+ uri: URL,
+ connect_args: dict[str, Any],
+ catalog: Optional[str] = None,
+ schema: Optional[str] = None,
+ ) -> tuple[URL, dict[str, Any]]:
+ database = uri.database
+ if schema and database:
+ schema = parse.quote(schema, safe="")
+ if "." in database:
+ database = database.split(".")[0] + "." + schema
+ else:
+ database = "internal." + schema
+ uri = uri.set(database=database)
+
+ return uri, connect_args
+
+ @classmethod
+ def get_schema_from_engine_params(
+ cls,
+ sqlalchemy_uri: URL,
+ connect_args: dict[str, Any],
+ ) -> Optional[str]:
+ """
+ Return the configured schema.
+
+ For doris the SQLAlchemy URI looks like this:
+
+ doris://localhost:9030/catalog.database
+
+ """
+ database = sqlalchemy_uri.database.strip("/")
+
+ if "." not in database:
+ return None
+
+ return parse.unquote(database.split(".")[1])
diff --git a/tests/unit_tests/db_engine_specs/test_doris.py b/tests/unit_tests/db_engine_specs/test_doris.py
new file mode 100644
index 0000000000000..d7444f8d2d62e
--- /dev/null
+++ b/tests/unit_tests/db_engine_specs/test_doris.py
@@ -0,0 +1,147 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Optional
+
+import pytest
+from sqlalchemy import JSON, types
+from sqlalchemy.engine.url import make_url
+
+from superset.db_engine_specs.doris import (
+ AggState,
+ ARRAY,
+ BITMAP,
+ DOUBLE,
+ HLL,
+ LARGEINT,
+ MAP,
+ QuantileState,
+ STRUCT,
+ TINYINT,
+)
+from superset.utils.core import GenericDataType
+from tests.unit_tests.db_engine_specs.utils import assert_column_spec
+
+
+@pytest.mark.parametrize(
+ "native_type,sqla_type,attrs,generic_type,is_dttm",
+ [
+ # Numeric
+ ("tinyint", TINYINT, None, GenericDataType.NUMERIC, False),
+ ("largeint", LARGEINT, None, GenericDataType.NUMERIC, False),
+ ("decimal(38,18)", types.DECIMAL, None, GenericDataType.NUMERIC, False),
+ ("decimalv3(38,18)", types.DECIMAL, None, GenericDataType.NUMERIC, False),
+ ("double", DOUBLE, None, GenericDataType.NUMERIC, False),
+ # String
+ ("char(10)", types.CHAR, None, GenericDataType.STRING, False),
+ ("varchar(65533)", types.VARCHAR, None, GenericDataType.STRING, False),
+ ("binary", types.BINARY, None, GenericDataType.STRING, False),
+ ("text", types.TEXT, None, GenericDataType.STRING, False),
+ ("string", types.String, None, GenericDataType.STRING, False),
+ # Date
+ ("datetimev2", types.DateTime, None, GenericDataType.STRING, False),
+ ("datev2", types.Date, None, GenericDataType.STRING, False),
+ # Complex type
+ ("array", ARRAY, None, GenericDataType.STRING, False),
+ ("map", MAP, None, GenericDataType.STRING, False),
+ ("struct", STRUCT, None, GenericDataType.STRING, False),
+ ("json", JSON, None, GenericDataType.STRING, False),
+ ("jsonb", JSON, None, GenericDataType.STRING, False),
+ ("bitmap", BITMAP, None, GenericDataType.STRING, False),
+ ("hll", HLL, None, GenericDataType.STRING, False),
+ ("quantile_state", QuantileState, None, GenericDataType.STRING, False),
+ ("agg_state", AggState, None, GenericDataType.STRING, False),
+ ],
+)
+def test_get_column_spec(
+ native_type: str,
+ sqla_type: type[types.TypeEngine],
+ attrs: Optional[dict[str, Any]],
+ generic_type: GenericDataType,
+ is_dttm: bool,
+) -> None:
+ from superset.db_engine_specs.doris import DorisEngineSpec as spec
+
+ assert_column_spec(spec, native_type, sqla_type, attrs, generic_type, is_dttm)
+
+
+@pytest.mark.parametrize(
+ "sqlalchemy_uri,connect_args,return_schema,return_connect_args",
+ [
+ (
+ "doris://user:password@host/db1",
+ {"param1": "some_value"},
+ "db1",
+ {"param1": "some_value"},
+ ),
+ (
+ "pydoris://user:password@host/db1",
+ {"param1": "some_value"},
+ "db1",
+ {"param1": "some_value"},
+ ),
+ (
+ "doris://user:password@host/catalog1.db1",
+ {"param1": "some_value"},
+ "catalog1.db1",
+ {"param1": "some_value"},
+ ),
+ (
+ "pydoris://user:password@host/catalog1.db1",
+ {"param1": "some_value"},
+ "catalog1.db1",
+ {"param1": "some_value"},
+ ),
+ ],
+)
+def test_adjust_engine_params(
+ sqlalchemy_uri: str,
+ connect_args: dict[str, Any],
+ return_schema: str,
+ return_connect_args: dict[str, Any],
+) -> None:
+ from superset.db_engine_specs.doris import DorisEngineSpec
+
+ url = make_url(sqlalchemy_uri)
+ returned_url, returned_connect_args = DorisEngineSpec.adjust_engine_params(
+ url, connect_args
+ )
+ assert returned_url.database == return_schema
+ assert returned_connect_args == return_connect_args
+
+
+def test_get_schema_from_engine_params() -> None:
+ """
+ Test the ``get_schema_from_engine_params`` method.
+ """
+ from superset.db_engine_specs.doris import DorisEngineSpec
+
+ assert (
+ DorisEngineSpec.get_schema_from_engine_params(
+ make_url("doris://localhost:9030/hive.test"),
+ {},
+ )
+ == "test"
+ )
+
+ assert (
+ DorisEngineSpec.get_schema_from_engine_params(
+ make_url("doris://localhost:9030/hive"),
+ {},
+ )
+ is None
+ )