Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a DataGemma UI #4913

Merged
merged 6 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/web_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

FROM nikolaik/python-nodejs:python3.11-nodejs18-slim as base

RUN apt-get update && apt-get -y upgrade
RUN apt-get update && apt-get -y upgrade && apt-get install -y git

ARG ENV
ENV ENV=${ENV}
Expand Down
50 changes: 50 additions & 0 deletions server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,36 @@
DEFAULT_NL_ROOT = "http://127.0.0.1:6060"


def _get_api_key(env_keys=[], gcp_project='', gcp_path=''):
"""Gets an api key first from the environment, then from GCP secrets.

Args:
env_keys: A list of keys in the environment to try getting the api key with
gcp_project: The GCP project to use to get the api key from GCP secrets
gcp_path: The path to getting the api key from GCP secrets

Returns:
API key if it exists

TODO: use this method everywhere else in this file
"""
# Try to get the key from the environment
for k in env_keys:
if os.environ.get(k):
return os.environ.get(k)

# Try to get the key from secrets
if gcp_project and gcp_path:
secret_client = secretmanager.SecretManagerServiceClient()
secret_name = secret_client.secret_version_path(gcp_project, gcp_path,
'latest')
secret_response = secret_client.access_secret_version(name=secret_name)
return secret_response.payload.data.decode('UTF-8').replace('\n', '')

# If key is not found, return an empty string
return ''


def register_routes_base_dc(app):
# apply the blueprints for all apps
from server.routes.dev import html as dev_html
Expand Down Expand Up @@ -132,6 +162,23 @@ def register_routes_sustainability(app):
)


def register_routes_datagemma(app, cfg):
# Install blueprint for DataGemma page
from server.routes.dev_datagemma import api as dev_datagemma_api
app.register_blueprint(dev_datagemma_api.bp)
from server.routes.dev_datagemma import html as dev_datagemma_html
app.register_blueprint(dev_datagemma_html.bp)

# Set the gemini api key
app.config['GEMINI_API_KEY'] = _get_api_key(['GEMINI_API_KEY'],
cfg.SECRET_PROJECT,
'gemini-api-key')
# Set the DC NL api key
app.config['DC_NL_API_KEY'] = _get_api_key(['DC_NL_API_KEY'],
cfg.SECRET_PROJECT,
'dc-nl-api-key')


def register_routes_common(app):
# apply blueprints for main app
from server.routes import static
Expand Down Expand Up @@ -282,6 +329,9 @@ def create_app(nl_root=DEFAULT_NL_ROOT):
if cfg.SHOW_SUSTAINABILITY:
register_routes_sustainability(app)

if cfg.ENABLE_DATAGEMMA:
register_routes_datagemma(app, cfg)

# Load topic page config
topic_page_configs = libutil.get_topic_page_config()
app.config['TOPIC_PAGE_CONFIG'] = topic_page_configs
Expand Down
3 changes: 3 additions & 0 deletions server/app_env/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,6 @@ class Config:
# Whether to enable BigQuery for instance. This is primarily used for
# accessing the observation browser pages.
ENABLE_BQ = False
# Whether to enable the DataGemma UI for this instance. This UI should only be
# enabled for internal instances.
ENABLE_DATAGEMMA = False
1 change: 1 addition & 0 deletions server/app_env/autopush.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ class Config(_base.Config):
HIDE_DEBUG = False
USE_MEMCACHE = False
ENABLE_BQ = True
ENABLE_DATAGEMMA = True
1 change: 1 addition & 0 deletions server/app_env/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Config(_base.Config):
SCHEME = 'http'
USE_MEMCACHE = False
ENABLE_BQ = True
ENABLE_DATAGEMMA = True


class DCConfig(Config):
Expand Down
2 changes: 2 additions & 0 deletions server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,5 @@ typing-extensions==4.10.0
webdriver-manager==4.0.0
Werkzeug==3.0.6
wheel==0.38.1
# TODO: publish the library to pypi so we don't have to install from git
git+https://github.com/datacommonsorg/llm-tools.git
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should publish this to pypi down the road to avoid having to do the git dependency install

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added TODO

82 changes: 82 additions & 0 deletions server/routes/dev_datagemma/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Endpoints for DataGemma page"""

import json

from data_gemma import DataCommons
from data_gemma import GoogleAIStudio
from data_gemma import RAGFlow
from data_gemma import RIGFlow
from data_gemma import VertexAI
import flask
from flask import current_app
from flask import request
from flask import Response

# Define blueprint
bp = flask.Blueprint('dev_datagemma_api',
__name__,
url_prefix='/api/dev/datagemma')

_RIG_MODE = 'rig'
_RAG_MODE = 'rag'

# TODO: consider moving these specifications to a config somewhere
_VERTEX_AI_RIG = VertexAI(project_id='datcom-website-dev',
location='us-central1',
prediction_endpoint_id='4999251772590522368')
_VERTEX_AI_RAG = VertexAI(project_id='datcom-website-dev',
location='us-central1',
prediction_endpoint_id='3459865124959944704')


def _get_datagemma_result(query, mode):
"""Gets the results of running a datagemma flow on a query

Args:
query: Query to run datagemma flow on
mode: mode to run the datagemma flow in

Returns:
Results of running the datagemma flow. This is a FlowResponse as defined
here: https://github.com/datacommonsorg/llm-tools/blob/main/data_gemma/base.py#L116
"""
dc_nl_service = DataCommons(api_key=current_app.config['DC_NL_API_KEY'])
result = None
if mode == _RIG_MODE:
result = RIGFlow(llm=_VERTEX_AI_RIG,
data_fetcher=dc_nl_service).query(query=query)
elif mode == _RAG_MODE:
gemini_model = GoogleAIStudio(
model='gemini-1.5-pro', api_keys=[current_app.config['GEMINI_API_KEY']])
result = RAGFlow(llm_question=_VERTEX_AI_RAG,
llm_answer=gemini_model,
data_fetcher=dc_nl_service).query(query=query)
return result


@bp.route('/query')
def datagemma_query():
query = request.args.get('query')
mode = request.args.get('mode')
if not query:
return 'error: must provide a query field', 400
if not mode or mode not in [_RIG_MODE, _RAG_MODE]:
return f'error: must provide a mode field with values {_RIG_MODE} or {_RAG_MODE}', 400
dg_result = _get_datagemma_result(query, mode)
result = {'answer': '', 'debug': ''}
if dg_result:
result = {'answer': dg_result.answer(), 'debug': dg_result.debug()}
return Response(json.dumps(result), 200, mimetype='application/json')
24 changes: 24 additions & 0 deletions server/routes/dev_datagemma/html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""DataGemma page routes"""

import flask

# Define blueprint
bp = flask.Blueprint("dev-datagemma", __name__, url_prefix='/dev/datagemma')


@bp.route('/')
def dev_datagemma():
return flask.render_template('dev/datagemma.html')
34 changes: 34 additions & 0 deletions server/templates/dev/datagemma.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{#
Copyright 2025 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
#}
{%- extends BASE_HTML -%}

{% set main_id = 'dev-datagemma' %}
{% set page_id = 'page-dev-datagemma' %}
{% set title = 'Datagemma' %}
{% set is_hide_header_search_bar = 'true' %}

{% block head %}
<link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
{% endblock %}

{% block content %}
<div id="datagemma"></div>
{% endblock %}

{% block footer %}
<script src={{url_for('static', filename='datagemma.js', t=config['GAE_VERSION'])}}></script>
{% endblock %}

Loading