Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a DataGemma UI #4913

Merged
merged 6 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/web_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

FROM nikolaik/python-nodejs:python3.11-nodejs18-slim as base

RUN apt-get update && apt-get -y upgrade
RUN apt-get update && apt-get -y upgrade && apt-get install -y git

ARG ENV
ENV ENV=${ENV}
Expand Down
40 changes: 40 additions & 0 deletions server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,26 @@
DEFAULT_NL_ROOT = "http://127.0.0.1:6060"


# Helper method to get an api key first from the environment, then from GCP secrets.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: use docstrings for comments like go/pystyle#function-docs

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

# TODO: use this method everywhere else that is applicable in this file
def _get_api_key(env_keys=[], secret_project='', secret_path=''):
# Try to get the key from the environment
for k in env_keys:
if os.environ.get(k):
return os.environ.get(k)

# Try to get the key from secrets
if secret_project and secret_path:
secret_client = secretmanager.SecretManagerServiceClient()
secret_name = secret_client.secret_version_path(secret_project, secret_path,
'latest')
secret_response = secret_client.access_secret_version(name=secret_name)
return secret_response.payload.data.decode('UTF-8').replace('\n', '')

# If key is not found, return an empty string
return ''


def register_routes_base_dc(app):
# apply the blueprints for all apps
from server.routes.dev import html as dev_html
Expand Down Expand Up @@ -132,6 +152,23 @@ def register_routes_sustainability(app):
)


def register_routes_datagemma(app, cfg):
# Install blueprint for DataGemma page
from server.routes.dev_datagemma import api as dev_datagemma_api
app.register_blueprint(dev_datagemma_api.bp)
from server.routes.dev_datagemma import html as dev_datagemma_html
app.register_blueprint(dev_datagemma_html.bp)

# Set the gemini api key
app.config['GEMINI_API_KEY'] = _get_api_key(['GEMINI_API_KEY'],
cfg.SECRET_PROJECT,
'gemini-api-key')
# Set the DC NL api key
app.config['DC_NL_API_KEY'] = _get_api_key(['DC_NL_API_KEY'],
cfg.SECRET_PROJECT,
'dc-nl-api-key')


def register_routes_common(app):
# apply blueprints for main app
from server.routes import static
Expand Down Expand Up @@ -282,6 +319,9 @@ def create_app(nl_root=DEFAULT_NL_ROOT):
if cfg.SHOW_SUSTAINABILITY:
register_routes_sustainability(app)

if cfg.ENABLE_DATAGEMMA:
register_routes_datagemma(app, cfg)

# Load topic page config
topic_page_configs = libutil.get_topic_page_config()
app.config['TOPIC_PAGE_CONFIG'] = topic_page_configs
Expand Down
3 changes: 3 additions & 0 deletions server/app_env/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,6 @@ class Config:
# Whether to enable BigQuery for instance. This is primarily used for
# accessing the observation browser pages.
ENABLE_BQ = False
# Whether to enable the DataGemma UI for this instance. This UI should only be
# enabled for internal instances.
ENABLE_DATAGEMMA = False
1 change: 1 addition & 0 deletions server/app_env/autopush.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ class Config(_base.Config):
HIDE_DEBUG = False
USE_MEMCACHE = False
ENABLE_BQ = True
ENABLE_DATAGEMMA = True
1 change: 1 addition & 0 deletions server/app_env/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Config(_base.Config):
SCHEME = 'http'
USE_MEMCACHE = False
ENABLE_BQ = True
ENABLE_DATAGEMMA = True


class DCConfig(Config):
Expand Down
1 change: 1 addition & 0 deletions server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ typing-extensions==4.10.0
webdriver-manager==4.0.0
Werkzeug==3.0.6
wheel==0.38.1
git+https://github.com/datacommonsorg/llm-tools.git
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should publish this to pypi down the road to avoid having to do the git dependency install

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added TODO

69 changes: 69 additions & 0 deletions server/routes/dev_datagemma/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Endpoints for DataGemma page"""

from data_gemma import DataCommons
from data_gemma import GoogleAIStudio
from data_gemma import RAGFlow
from data_gemma import RIGFlow
from data_gemma import VertexAI
import flask
from flask import current_app
from flask import request

# Define blueprint
bp = flask.Blueprint('dev_datagemma_api',
__name__,
url_prefix='/api/dev/datagemma')

_RIG_MODE = 'rig'
_RAG_MODE = 'rag'

# TODO: consider moving these specifications to a config somewhere
_VERTEX_AI_RIG = VertexAI(project_id='datcom-website-dev',
location='us-central1',
prediction_endpoint_id='4999251772590522368')
_VERTEX_AI_RAG = VertexAI(project_id='datcom-website-dev',
location='us-central1',
prediction_endpoint_id='3459865124959944704')


def _get_datagemma_answer(query, mode):
dc_nl_service = DataCommons(api_key=current_app.config['DC_NL_API_KEY'])
result = None
if mode == _RIG_MODE:
result = RIGFlow(llm=_VERTEX_AI_RIG,
data_fetcher=dc_nl_service).query(query=query)
elif mode == _RAG_MODE:
gemini_model = GoogleAIStudio(
model='gemini-1.5-pro', api_keys=[current_app.config['GEMINI_API_KEY']])
result = RAGFlow(llm_question=_VERTEX_AI_RAG,
llm_answer=gemini_model,
data_fetcher=dc_nl_service).query(query=query)
if result:
return result.answer()
else:
return ''


@bp.route('/query')
def datagemma_query():
query = request.args.get('query')
mode = request.args.get('mode')
if not query:
return "error: must provide a query field", 400
if not mode or mode not in [_RIG_MODE, _RAG_MODE]:
return f'error: must provide a mode field with values {_RIG_MODE} or {_RAG_MODE}', 400
resp = _get_datagemma_answer(query, mode)
return resp, 200
24 changes: 24 additions & 0 deletions server/routes/dev_datagemma/html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""DataGemma page routes"""

import flask

# Define blueprint
bp = flask.Blueprint("dev-datagemma", __name__, url_prefix='/dev/datagemma')


@bp.route('/')
def dev_datagemma():
return flask.render_template('dev/datagemma.html')
35 changes: 35 additions & 0 deletions server/templates/dev/datagemma.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{#
Copyright 2025 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
#}
{%- extends BASE_HTML -%}

{% set main_id = 'dev-datagemma' %}
{% set page_id = 'page-dev-datagemma' %}
{% set title = 'Datagemma' %}
{% set is_hide_header_search_bar = 'true' %}

{% block head %}
<link rel="stylesheet" href={{url_for('static', filename='css/datagemma.min.css')}}>
<link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
{% endblock %}

{% block content %}
<div id="datagemma"></div>
{% endblock %}

{% block footer %}
<script src={{url_for('static', filename='datagemma.js', t=config['GAE_VERSION'])}}></script>
{% endblock %}

98 changes: 98 additions & 0 deletions static/css/datagemma.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

@import "base";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For new frontend code, can you look into using "emotion" instead of scss? Examples in the new homepage: https://github.com/datacommonsorg/website/blob/master/static/js/apps/homepage/components/home_hero.tsx#L48

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


#page-dev-datagemma {
height: 100%;

body {
height: 100%;
}

#main {
height: 100%;
display: flex;
flex-direction: column;
}

main {
flex-grow: 1;
width: 100%;
}
}

#datagemma {
padding: 0 24px;
}

label {
margin-bottom: 0;
}

.title {
color: #212529;
font-size: 28px;
font-style: normal;
font-weight: 400;
line-height: 36px;
margin-bottom: 24px;
}

.inputs {
display: flex;
flex-direction: column;
align-items: flex-start;
gap: 8px;

.query-input {
display: flex;
align-items: center;
gap: 8px;
width: 100%;
}

.mode-input {
margin-left: 24px;
width: fit-content;
display: flex;
flex-direction: column;
gap: 8px;

.mode-option {
display: flex;
align-items: center;
}
}
}

.answer {
margin-top: 24px;

p {
margin-top: 1rem;
margin-bottom: 0.3rem;
}

table, th, td {
border: 1px solid;
}

.footnotes {
margin-top: 1rem;
}
}

Loading