From efd01b89c353e672117f04ae2877f63cbdd74d81 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Mon, 21 Oct 2024 21:37:57 -0700 Subject: [PATCH 1/7] Added #121 --- utils/db_utils.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index c3ffc91..a44b4c8 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -176,7 +176,24 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # Add primary modes from the sensed, inferred and ble summaries. Note that we do this # **before** filtering the `all_trip_columns` because the # *_section_summary columns are not currently valid - get_max_mode_from_summary = lambda md: max(md["distance"], key=md["distance"].get) if len(md["distance"]) > 0 else "INVALID" + # Check if 'md' is not a dictionary or does not contain the key 'distance' + # or if 'md["distance"]' is not a dictionary. + # If any of these conditions are true, return "INVALID". + get_max_mode_from_summary = lambda md: ( + "INVALID" + if not isinstance(md, dict) + or "distance" not in md + or not isinstance(md["distance"], dict) + # If 'md' is a dictionary and 'distance' is a valid key pointing to a dictionary: + else ( + # Get the maximum value from 'md["distance"]' using the values of 'md["distance"].get' as the key for 'max'. + # This operation only happens if the length of 'md["distance"]' is greater than 0. + # Otherwise, return "INVALID". + max(md["distance"], key=md["distance"].get) + if len(md["distance"]) > 0 + else "INVALID" + ) + ) df["data.primary_sensed_mode"] = df.cleaned_section_summary.apply(get_max_mode_from_summary) df["data.primary_predicted_mode"] = df.inferred_section_summary.apply(get_max_mode_from_summary) if 'ble_sensed_summary' in df.columns: From d979a732fc95eb4dfa7dde40bc5320ac4a10ca05 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Sun, 27 Oct 2024 10:03:44 -0700 Subject: [PATCH 2/7] Added add_user --- pages/data.py | 276 +++++++++++++++++++++++++++------------------- utils/db_utils.py | 148 +++++++++++-------------- 2 files changed, 225 insertions(+), 199 deletions(-) diff --git a/pages/data.py b/pages/data.py index 4f35e7a..30091a0 100644 --- a/pages/data.py +++ b/pages/data.py @@ -21,7 +21,7 @@ intro = """## Data""" layout = html.Div( - [ + [ dcc.Markdown(intro), dcc.Tabs(id="tabs-datatable", value='tab-uuids-datatable', children=[ dcc.Tab(label='UUIDs', value='tab-uuids-datatable'), @@ -30,6 +30,27 @@ dcc.Tab(label='Trajectories', value='tab-trajectories-datatable'), ]), html.Div(id='tabs-content'), + dcc.Store(id='selected-tab', data='tab-uuids-datatable'), # Store to hold selected tab + dcc.Interval(id='interval-load-more', interval=20000, n_intervals=0), # default loading at 10s, can be lowered or hightened based on perf (usual process local is 3s) + dcc.Store(id='store-uuids', data=[]), # Store to hold the original UUIDs data + dcc.Store(id='store-loaded-uuids', data={'data': [], 'loaded': False}), # Store to track loaded data + # RadioItems for key list switch, wrapped in a div that can hide/show + html.Div( + id='keylist-switch-container', + children=[ + html.Label("Select Key List:"), + dcc.RadioItems( + id='keylist-switch', + options=[ + {'label': 'Analysis/Recreated Location', 'value': 'analysis/recreated_location'}, + {'label': 'Background/Location', 'value': 'background/location'} + ], + value='analysis/recreated_location', # Default value + labelStyle={'display': 'inline-block', 'margin-right': '10px'} + ), + ], + style={'display': 'none'} # Initially hidden, will show only for the "Trajectories" tab + ), ] ) @@ -102,6 +123,8 @@ def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_ @callback( Output('tabs-content', 'children'), + Output('store-loaded-uuids', 'data'), + Output('interval-load-more', 'disabled'), # Disable interval when all data is loaded Input('tabs-datatable', 'value'), Input('store-uuids', 'data'), Input('store-excluded-uuids', 'data'), @@ -111,123 +134,144 @@ def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_ Input('date-picker', 'start_date'), Input('date-picker', 'end_date'), Input('date-picker-timezone', 'value'), + Input('interval-load-more', 'n_intervals'), # Interval to trigger the loading of more data + Input('keylist-switch', 'value'), # Add keylist-switch to trigger data refresh on change + State('store-loaded-uuids', 'data'), # Use State to track already loaded data + State('store-loaded-uuids', 'loaded') # Keep track if we have finished loading all data ) -def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone): - with ect.Timer() as total_timer: - data, columns, has_perm = None, [], False - - # Handle UUIDs tab - if tab == 'tab-uuids-datatable': - with ect.Timer() as stage1_timer: - data = store_uuids["data"] - data = db_utils.add_user_stats(data) - columns = perm_utils.get_uuids_columns() - has_perm = perm_utils.has_permission('data_uuids') - esdsq.store_dashboard_time( - "admin/data/render_content/handle_uuids_tab", - stage1_timer - ) - - # Handle Trips tab - elif tab == 'tab-trips-datatable': - with ect.Timer() as stage2_timer: - data = store_trips["data"] - columns = perm_utils.get_allowed_trip_columns() - columns.update( - col['label'] for col in perm_utils.get_allowed_named_trip_columns() - ) - columns.update(store_trips["userinputcols"]) - has_perm = perm_utils.has_permission('data_trips') - df = pd.DataFrame(data) - if df.empty or not has_perm: - return None - - logging.debug(f"Final list of retained cols {columns=}") - logging.debug(f"Before dropping, {df.columns=}") - df = df.drop(columns=[col for col in df.columns if col not in columns]) - logging.debug(f"After dropping, {df.columns=}") - df = clean_location_data(df) - - trips_table = populate_datatable(df, 'trips-table') - # Return an HTML Div containing a button (button-clicked) and the populated datatable - return html.Div([ - html.Button( - 'Display columns with raw units', - id='button-clicked', # identifier for the button - n_clicks=0, # initialize number of clicks to 0 - style={'marginLeft': '5px'} - ), - trips_table, # populated trips table component - ]) - esdsq.store_dashboard_time( - "admin/data/render_content/handle_trips_tab", - stage2_timer +def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, + start_date, end_date, timezone, n_intervals, key_list, loaded_uuids_store, all_data_loaded): + initial_batch_size = 10 # Define the batch size for loading UUIDs + + # Update selected tab + selected_tab = tab + logging.debug(f"Selected tab: {selected_tab}") + # Handle the UUIDs tab without fullscreen loading spinner + if tab == 'tab-uuids-datatable': + # Ensure store_uuids contains the key 'data' which is a list of dictionaries + if not isinstance(store_uuids, dict) or 'data' not in store_uuids: + logging.error(f"Expected store_uuids to be a dict with a 'data' key, but got {type(store_uuids)}") + return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True + + # Extract the list of UUIDs from the dict + uuids_list = store_uuids['data'] + + # Ensure uuids_list is a list for slicing + if not isinstance(uuids_list, list): + logging.error(f"Expected store_uuids['data'] to be a list but got {type(uuids_list)}") + return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True + + # Retrieve already loaded data from the store + loaded_data = loaded_uuids_store.get('data', []) + total_loaded = len(loaded_data) + + # Handle lazy loading + if not loaded_uuids_store.get('loaded', False): + total_to_load = total_loaded + initial_batch_size + total_to_load = min(total_to_load, len(uuids_list)) # Avoid loading more than available + + logging.debug(f"Loading next batch of UUIDs: {total_loaded} to {total_to_load}") + + # Slice the list of UUIDs from the dict + new_data = uuids_list[total_loaded:total_to_load] + + if new_data: + # Process and append the new data to the loaded store + processed_data = db_utils.add_user_stats(new_data, initial_batch_size) + loaded_data.extend(processed_data) + + # Update the store with the new data + loaded_uuids_store['data'] = loaded_data + loaded_uuids_store['loaded'] = len(loaded_data) >= len(uuids_list) # Mark all data as loaded if done + + logging.debug(f"New batch loaded. Total loaded: {len(loaded_data)}") + + # Prepare the data to be displayed + columns = perm_utils.get_uuids_columns() # Get the relevant columns + df = pd.DataFrame(loaded_data) + + if df.empty or not perm_utils.has_permission('data_uuids'): + logging.debug("No data or permission issues.") + return html.Div([html.P("No data available or you don't have permission.")]), loaded_uuids_store, True + + df = df.drop(columns=[col for col in df.columns if col not in columns]) + + logging.debug("Returning appended data to update the UI.") + content = html.Div([ + populate_datatable(df), + html.P( + f"Showing {len(loaded_data)} of {len(uuids_list)} UUIDs." + + (f" Loading 10 more..." if not loaded_uuids_store.get('loaded', False) else ""), + style={'margin': '15px 5px'} ) - - # Handle Demographics tab - elif tab == 'tab-demographics-datatable': - with ect.Timer() as stage3_timer: - data = store_demographics["data"] - has_perm = perm_utils.has_permission('data_demographics') - # If only one survey is available, process it without creating a subtab - if len(data) == 1: - # Here data is a dictionary - data = list(data.values())[0] - columns = list(data[0].keys()) - # For multiple surveys, create subtabs for unique surveys - elif len(data) > 1: - # Returns subtab only if has_perm is True - if not has_perm: - return None - return html.Div([ - dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[ - dcc.Tab(label=key, value=key) for key in data - ]), - html.Div(id='subtabs-demographics-content') - ]) - esdsq.store_dashboard_time( - "admin/data/render_content/handle_demographics_tab", - stage3_timer - ) - - # Handle Trajectories tab - elif tab == 'tab-trajectories-datatable': - # Currently store_trajectories data is loaded only when the respective tab is selected - # Here we query for trajectory data once "Trajectories" tab is selected - with ect.Timer() as stage4_timer: - (start_date, end_date) = iso_to_date_only(start_date, end_date) - if store_trajectories == {}: - store_trajectories = update_store_trajectories(start_date, end_date, timezone, store_excluded_uuids) - data = store_trajectories["data"] - if data: - columns = list(data[0].keys()) - columns = perm_utils.get_trajectories_columns(columns) - has_perm = perm_utils.has_permission('data_trajectories') - esdsq.store_dashboard_time( - "admin/data/render_content/handle_trajectories_tab", - stage4_timer - ) - - # Prepare final DataFrame and return datatable - with ect.Timer() as stage5_timer: - df = pd.DataFrame(data) - if df.empty or not has_perm: - return None - - df = df.drop(columns=[col for col in df.columns if col not in columns]) - - result = populate_datatable(df) - esdsq.store_dashboard_time( - "admin/data/render_content/prepare_final_dataframe_and_return", - stage5_timer - ) - - esdsq.store_dashboard_time( - "admin/data/render_content/total_time", - total_timer - ) - - return result + ]) + return content, loaded_uuids_store, False if not loaded_uuids_store['loaded'] else True + + # Handle other tabs normally + elif tab == 'tab-trips-datatable': + data = store_trips["data"] + columns = perm_utils.get_allowed_trip_columns() + columns.update(col['label'] for col in perm_utils.get_allowed_named_trip_columns()) + columns.update(store_trips["userinputcols"]) + has_perm = perm_utils.has_permission('data_trips') + + df = pd.DataFrame(data) + if df.empty or not has_perm: + return None, loaded_uuids_store, True + + df = df.drop(columns=[col for col in df.columns if col not in columns]) + df = clean_location_data(df) + + trips_table = populate_datatable(df, 'trips-table') + logging.debug(f"Returning 3 values: {trips_table}, {loaded_uuids_store}, True") + return html.Div([ + html.Button('Display columns with raw units', id='button-clicked', n_clicks=0, style={'marginLeft': '5px'}), + trips_table + ]), loaded_uuids_store, True + + elif tab == 'tab-demographics-datatable': + data = store_demographics["data"] + has_perm = perm_utils.has_permission('data_demographics') + + if len(data) == 1: + data = list(data.values())[0] + columns = list(data[0].keys()) + elif len(data) > 1: + if not has_perm: + return None, loaded_uuids_store, True + return html.Div([ + dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[ + dcc.Tab(label=key, value=key) for key in data + ]), + html.Div(id='subtabs-demographics-content') + ]), loaded_uuids_store, True + + elif tab == 'tab-trajectories-datatable': + (start_date, end_date) = iso_to_date_only(start_date, end_date) + + # Fetch new data based on the selected key_list from the keylist-switch + if store_trajectories == {} or key_list: # Ensure data is refreshed when key_list changes + store_trajectories = update_store_trajectories(start_date, end_date, timezone, store_excluded_uuids, key_list) + + data = store_trajectories.get("data", []) + if data: + columns = list(data[0].keys()) + columns = perm_utils.get_trajectories_columns(columns) + has_perm = perm_utils.has_permission('data_trajectories') + + df = pd.DataFrame(data) + if df.empty or not has_perm: + # If no permission or data, disable interval and return empty content + return None, loaded_uuids_store, True + + # Filter the columns based on permissions + df = df.drop(columns=[col for col in df.columns if col not in columns]) + + # Return the populated DataTable + return populate_datatable(df), loaded_uuids_store, True + + # Default case: if no data is loaded or the tab is not handled + return None, loaded_uuids_store, True # Handle subtabs for demographic table when there are multiple surveys @callback( diff --git a/utils/db_utils.py b/utils/db_utils.py index a44b4c8..f02ee29 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -18,6 +18,7 @@ from utils import constants from utils import permissions as perm_utils from utils.datetime_utils import iso_range_to_ts_range +from concurrent.futures import ThreadPoolExecutor, as_completed def df_to_filtered_records(df, col_to_filter=None, vals_to_exclude: list[str] = []): """ @@ -424,100 +425,81 @@ def query_trajectories(start_date: str, end_date: str, tz: str): -def add_user_stats(user_data): - with ect.Timer() as total_timer: - - for user in user_data: - user_uuid = UUID(user['user_id']) +def add_user_stats(user_data, batch_size=5): + time_format = 'YYYY-MM-DD HH:mm:ss' + def process_user(user): + user_uuid = UUID(user['user_id']) + + # Fetch aggregated data for all users once and cache it + ts_aggregate = esta.TimeSeries.get_aggregate_time_series() - # Stage 1: Count total trips - with ect.Timer() as stage1_timer: - total_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( - key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}] - ) - user['total_trips'] = total_trips - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/count_total_trips", - stage1_timer + # Fetch data for the user, cached for repeated queries + profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) + + total_trips = ts_aggregate.find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}] + ) + labeled_trips = ts_aggregate.find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] + ) + + user['total_trips'] = total_trips + user['labeled_trips'] = labeled_trips + + if profile_data: + user['platform'] = profile_data.get('curr_platform') + user['manufacturer'] = profile_data.get('manufacturer') + user['app_version'] = profile_data.get('client_app_version') + user['os_version'] = profile_data.get('client_os_version') + user['phone_lang'] = profile_data.get('phone_lang') + + if total_trips > 0: + ts = esta.TimeSeries.get_time_series(user_uuid) + first_trip_ts = ts.get_first_value_for_field( + key='analysis/confirmed_trip', + field='data.end_ts', + sort_order=pymongo.ASCENDING ) + if first_trip_ts != -1: + user['first_trip'] = arrow.get(first_trip_ts).format(time_format) - # Stage 2: Count labeled trips - with ect.Timer() as stage2_timer: - labeled_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( - key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] - ) - user['labeled_trips'] = labeled_trips - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/count_labeled_trips", - stage2_timer + last_trip_ts = ts.get_first_value_for_field( + key='analysis/confirmed_trip', + field='data.end_ts', + sort_order=pymongo.DESCENDING ) + if last_trip_ts != -1: + user['last_trip'] = arrow.get(last_trip_ts).format(time_format) - # Stage 3: Retrieve user profile data - with ect.Timer() as stage3_timer: - profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) - user['platform'] = profile_data.get('curr_platform') - user['manufacturer'] = profile_data.get('manufacturer') - user['app_version'] = profile_data.get('client_app_version') - user['os_version'] = profile_data.get('client_os_version') - user['phone_lang'] = profile_data.get('phone_lang') - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/retrieve_user_profile_data", - stage3_timer + last_call_ts = ts.get_first_value_for_field( + key='stats/server_api_time', + field='data.ts', + sort_order=pymongo.DESCENDING ) + if last_call_ts != -1: + user['last_call'] = arrow.get(last_call_ts).format(time_format) + + return user - if total_trips > 0: - # Stage 4: Get first trip timestamp - with ect.Timer() as stage4_timer: - time_format = 'YYYY-MM-DD HH:mm:ss' - ts = esta.TimeSeries.get_time_series(user_uuid) - start_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.ASCENDING - ) - if start_ts != -1: - user['first_trip'] = arrow.get(start_ts).format(time_format) - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/get_first_trip_timestamp", - stage4_timer - ) + def batch_process(users_batch): + with ThreadPoolExecutor() as executor: # Adjust max_workers based on CPU cores + futures = [executor.submit(process_user, user) for user in users_batch] + processed_batch = [future.result() for future in as_completed(futures)] + return processed_batch - # Stage 5: Get last trip timestamp - with ect.Timer() as stage5_timer: - end_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.DESCENDING - ) - if end_ts != -1: - user['last_trip'] = arrow.get(end_ts).format(time_format) - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/get_last_trip_timestamp", - stage5_timer - ) + total_users = len(user_data) + processed_data = [] - # Stage 6: Get last server call timestamp - with ect.Timer() as stage6_timer: - last_call = ts.get_first_value_for_field( - key='stats/server_api_time', - field='data.ts', - sort_order=pymongo.DESCENDING - ) - if last_call != -1: - user['last_call'] = arrow.get(last_call).format(time_format) - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/get_last_server_call_timestamp", - stage6_timer - ) + for i in range(0, total_users, batch_size): + batch = user_data[i:i + batch_size] + processed_batch = batch_process(batch) + processed_data.extend(processed_batch) - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/total_time", - total_timer - ) + logging.debug(f'Processed {len(processed_data)} users out of {total_users}') - return user_data + return processed_data def query_segments_crossing_endpoints(poly_region_start, poly_region_end, start_date: str, end_date: str, tz: str, excluded_uuids: list[str]): with ect.Timer() as total_timer: From 280a6dc930fcdad6a6119a18875b895d4e0e661b Mon Sep 17 00:00:00 2001 From: Robin Date: Fri, 18 Oct 2024 12:19:14 -0700 Subject: [PATCH 3/7] Update pages/data.py Co-authored-by: Jack Greenlee --- pages/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pages/data.py b/pages/data.py index 30091a0..a77174e 100644 --- a/pages/data.py +++ b/pages/data.py @@ -198,7 +198,7 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de logging.debug("Returning appended data to update the UI.") content = html.Div([ - populate_datatable(df), + populate_datatable(df, table_id='uuid-table', page_current=current_page), html.P( f"Showing {len(loaded_data)} of {len(uuids_list)} UUIDs." + (f" Loading 10 more..." if not loaded_uuids_store.get('loaded', False) else ""), From 5478b208dde94c98197139e9e434ffabd5e6a709 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Thu, 10 Oct 2024 10:50:35 -0700 Subject: [PATCH 4/7] Cherried Table Hold Cherried UI Cherried UI --- app_sidebar_collapsible.py | 14 +++ pages/data.py | 174 ++++++++++++++++++++++--------------- 2 files changed, 116 insertions(+), 72 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index da74ade..71b51b7 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -210,9 +210,13 @@ def make_controls(): 'flex-direction': 'column'} ) + page_content = dcc.Loading( + id='global-loading', type='default', fullscreen=True, + overlay_style={"visibility": "visible", "filter": "blur(2px)"}, + style={"background-color": "transparent"}, children=html.Div(dash.page_container, style={ "margin-left": "5rem", "margin-right": "2rem", @@ -221,6 +225,16 @@ def make_controls(): ) +@app.callback( + Output('global-loading', 'display'), + Input('interval-load-more', 'disabled'), +) +def hide_spinner_while_loading_batch(interval_disabled): + if interval_disabled: + return 'auto' + return 'hide' + + def make_home_page(): return [ sidebar, html.Div([make_controls(), page_content]) diff --git a/pages/data.py b/pages/data.py index a77174e..bd5d027 100644 --- a/pages/data.py +++ b/pages/data.py @@ -8,6 +8,7 @@ import logging import pandas as pd from dash.exceptions import PreventUpdate +import time from utils import constants from utils import permissions as perm_utils @@ -23,17 +24,22 @@ layout = html.Div( [ dcc.Markdown(intro), - dcc.Tabs(id="tabs-datatable", value='tab-uuids-datatable', children=[ - dcc.Tab(label='UUIDs', value='tab-uuids-datatable'), - dcc.Tab(label='Trips', value='tab-trips-datatable'), - dcc.Tab(label='Demographics', value='tab-demographics-datatable'), - dcc.Tab(label='Trajectories', value='tab-trajectories-datatable'), - ]), + dcc.Tabs( + id="tabs-datatable", + value='tab-uuids-datatable', + children=[ + dcc.Tab(label='UUIDs', value='tab-uuids-datatable'), + dcc.Tab(label='Trips', value='tab-trips-datatable'), + dcc.Tab(label='Demographics', value='tab-demographics-datatable'), + dcc.Tab(label='Trajectories', value='tab-trajectories-datatable'), + ] + ), html.Div(id='tabs-content'), dcc.Store(id='selected-tab', data='tab-uuids-datatable'), # Store to hold selected tab - dcc.Interval(id='interval-load-more', interval=20000, n_intervals=0), # default loading at 10s, can be lowered or hightened based on perf (usual process local is 3s) + dcc.Interval(id='interval-load-more', interval=24000, n_intervals=0), # Interval for loading more data dcc.Store(id='store-uuids', data=[]), # Store to hold the original UUIDs data dcc.Store(id='store-loaded-uuids', data={'data': [], 'loaded': False}), # Store to track loaded data + dcc.Store(id='uuids-page-current', data=0), # Store to track current page for UUIDs DataTable # RadioItems for key list switch, wrapped in a div that can hide/show html.Div( id='keylist-switch-container', @@ -121,6 +127,27 @@ def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_ return store +@callback( + Output('keylist-switch-container', 'style'), + Input('tabs-datatable', 'value'), +) +def show_keylist_switch(tab): + if tab == 'tab-trajectories-datatable': + return {'display': 'block'} + return {'display': 'none'} # Hide the keylist-switch on all other tabs + + +@callback( + Output('uuids-page-current', 'data'), + Input('uuid-table', 'page_current'), + State('tabs-datatable', 'value') +) +def update_uuids_page_current(page_current, selected_tab): + if selected_tab == 'tab-uuids-datatable': + return page_current + raise PreventUpdate + + @callback( Output('tabs-content', 'children'), Output('store-loaded-uuids', 'data'), @@ -136,24 +163,31 @@ def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_ Input('date-picker-timezone', 'value'), Input('interval-load-more', 'n_intervals'), # Interval to trigger the loading of more data Input('keylist-switch', 'value'), # Add keylist-switch to trigger data refresh on change + Input('uuids-page-current', 'data'), # Current page number for UUIDs DataTable State('store-loaded-uuids', 'data'), # Use State to track already loaded data State('store-loaded-uuids', 'loaded') # Keep track if we have finished loading all data ) -def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, - start_date, end_date, timezone, n_intervals, key_list, loaded_uuids_store, all_data_loaded): +def render_content( + tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, + start_date, end_date, timezone, n_intervals, key_list, current_page, + loaded_uuids_store, all_data_loaded +): initial_batch_size = 10 # Define the batch size for loading UUIDs # Update selected tab selected_tab = tab - logging.debug(f"Selected tab: {selected_tab}") + logging.debug(f"Callback - {selected_tab} Stage 1: Selected tab updated.") + # Handle the UUIDs tab without fullscreen loading spinner if tab == 'tab-uuids-datatable': + start_time = time.time() + logging.debug(f"Callback - {selected_tab} Stage 2: Handling UUIDs tab.") + # Ensure store_uuids contains the key 'data' which is a list of dictionaries if not isinstance(store_uuids, dict) or 'data' not in store_uuids: logging.error(f"Expected store_uuids to be a dict with a 'data' key, but got {type(store_uuids)}") return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True - # Extract the list of UUIDs from the dict uuids_list = store_uuids['data'] # Ensure uuids_list is a list for slicing @@ -161,7 +195,6 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de logging.error(f"Expected store_uuids['data'] to be a list but got {type(uuids_list)}") return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True - # Retrieve already loaded data from the store loaded_data = loaded_uuids_store.get('data', []) total_loaded = len(loaded_data) @@ -170,45 +203,51 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de total_to_load = total_loaded + initial_batch_size total_to_load = min(total_to_load, len(uuids_list)) # Avoid loading more than available - logging.debug(f"Loading next batch of UUIDs: {total_loaded} to {total_to_load}") + logging.debug(f"Callback - {selected_tab} Stage 3: Loading next batch of UUIDs from {total_loaded} to {total_to_load}.") - # Slice the list of UUIDs from the dict new_data = uuids_list[total_loaded:total_to_load] if new_data: # Process and append the new data to the loaded store processed_data = db_utils.add_user_stats(new_data, initial_batch_size) loaded_data.extend(processed_data) - + # Update the store with the new data - loaded_uuids_store['data'] = loaded_data - loaded_uuids_store['loaded'] = len(loaded_data) >= len(uuids_list) # Mark all data as loaded if done + loaded_uuids_store['data'] = loaded_data # Mark all data as loaded if done + loaded_uuids_store['loaded'] = len(loaded_data) >= len(uuids_list) - logging.debug(f"New batch loaded. Total loaded: {len(loaded_data)}") + logging.debug(f"Callback - {selected_tab} Stage 4: New batch loaded. Total loaded: {len(loaded_data)}.") # Prepare the data to be displayed columns = perm_utils.get_uuids_columns() # Get the relevant columns df = pd.DataFrame(loaded_data) if df.empty or not perm_utils.has_permission('data_uuids'): - logging.debug("No data or permission issues.") + logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") return html.Div([html.P("No data available or you don't have permission.")]), loaded_uuids_store, True df = df.drop(columns=[col for col in df.columns if col not in columns]) - logging.debug("Returning appended data to update the UI.") + logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.") content = html.Div([ - populate_datatable(df, table_id='uuid-table', page_current=current_page), + populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page html.P( f"Showing {len(loaded_data)} of {len(uuids_list)} UUIDs." + (f" Loading 10 more..." if not loaded_uuids_store.get('loaded', False) else ""), style={'margin': '15px 5px'} ) ]) + + elapsed_time = time.time() - start_time + logging.info(f"Callback - {selected_tab} Stage 6: Total Time for UUIDs Tab: {elapsed_time:.2f} seconds") + return content, loaded_uuids_store, False if not loaded_uuids_store['loaded'] else True # Handle other tabs normally elif tab == 'tab-trips-datatable': + start_time = time.time() + logging.debug(f"Callback - {selected_tab} Stage 2: Handling Trips tab.") + data = store_trips["data"] columns = perm_utils.get_allowed_trip_columns() columns.update(col['label'] for col in perm_utils.get_allowed_named_trip_columns()) @@ -217,19 +256,25 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de df = pd.DataFrame(data) if df.empty or not has_perm: + logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") return None, loaded_uuids_store, True df = df.drop(columns=[col for col in df.columns if col not in columns]) df = clean_location_data(df) - trips_table = populate_datatable(df, 'trips-table') - logging.debug(f"Returning 3 values: {trips_table}, {loaded_uuids_store}, True") + trips_table = populate_datatable(df) + elapsed_time = time.time() - start_time + logging.info(f"Callback - {selected_tab} Stage 3: Total Time for Trips Tab: {elapsed_time:.2f} seconds") + return html.Div([ html.Button('Display columns with raw units', id='button-clicked', n_clicks=0, style={'marginLeft': '5px'}), trips_table ]), loaded_uuids_store, True elif tab == 'tab-demographics-datatable': + start_time = time.time() + logging.debug(f"Callback - {selected_tab} Stage 2: Handling Demographics tab.") + data = store_demographics["data"] has_perm = perm_utils.has_permission('data_demographics') @@ -246,11 +291,16 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de html.Div(id='subtabs-demographics-content') ]), loaded_uuids_store, True + elapsed_time = time.time() - start_time + logging.info(f"Callback - {selected_tab} Stage 3: Total Time for Demographics Tab: {elapsed_time:.2f} seconds") + elif tab == 'tab-trajectories-datatable': - (start_date, end_date) = iso_to_date_only(start_date, end_date) + start_time = time.time() + logging.debug(f"Callback - {selected_tab} Stage 2: Handling Trajectories tab.") + (start_date, end_date) = iso_to_date_only(start_date, end_date) # Fetch new data based on the selected key_list from the keylist-switch - if store_trajectories == {} or key_list: # Ensure data is refreshed when key_list changes + if store_trajectories == {} or key_list: store_trajectories = update_store_trajectories(start_date, end_date, timezone, store_excluded_uuids, key_list) data = store_trajectories.get("data", []) @@ -261,16 +311,18 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de df = pd.DataFrame(data) if df.empty or not has_perm: - # If no permission or data, disable interval and return empty content + logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") return None, loaded_uuids_store, True - # Filter the columns based on permissions df = df.drop(columns=[col for col in df.columns if col not in columns]) - # Return the populated DataTable + elapsed_time = time.time() - start_time + logging.info(f"Callback - {selected_tab} Stage 3: Total Time for Trajectories Tab: {elapsed_time:.2f} seconds") + return populate_datatable(df), loaded_uuids_store, True # Default case: if no data is loaded or the tab is not handled + logging.debug(f"Callback - {selected_tab} Error Stage: No data loaded or unhandled tab.") return None, loaded_uuids_store, True # Handle subtabs for demographic table when there are multiple surveys @@ -369,50 +421,28 @@ def update_dropdowns_trips(n_clicks, button_label): return hidden_col, button_label - -def populate_datatable(df, table_id=''): - with ect.Timer() as total_timer: - - # Stage 1: Check if df is a DataFrame and raise PreventUpdate if not - with ect.Timer() as stage1_timer: - if not isinstance(df, pd.DataFrame): - raise PreventUpdate - esdsq.store_dashboard_time( - "admin/data/populate_datatable/check_dataframe_type", - stage1_timer - ) - - # Stage 2: Create the DataTable from the DataFrame - with ect.Timer() as stage2_timer: - result = dash_table.DataTable( - id=table_id, - # columns=[{"name": i, "id": i} for i in df.columns], - data=df.to_dict('records'), - export_format="csv", - filter_options={"case": "sensitive"}, - # filter_action="native", - sort_action="native", # give user capability to sort columns - sort_mode="single", # sort across 'multi' or 'single' columns - page_current=0, # page number that user is on - page_size=50, # number of rows visible per page - style_cell={ - 'textAlign': 'left', - # 'minWidth': '100px', - # 'width': '100px', - # 'maxWidth': '100px', - }, - style_table={'overflowX': 'auto'}, - css=[{"selector": ".show-hide", "rule": "display:none"}] - ) - esdsq.store_dashboard_time( - "admin/data/populate_datatable/create_datatable", - stage2_timer - ) - - # Store the total time for the entire function - esdsq.store_dashboard_time( - "admin/data/populate_datatable/total_time", - total_timer +def populate_datatable(df, table_id='', page_current=0): + if not isinstance(df, pd.DataFrame): + raise PreventUpdate + return dash_table.DataTable( + id=table_id, + # columns=[{"name": i, "id": i} for i in df.columns], + data=df.to_dict('records'), + export_format="csv", + filter_options={"case": "sensitive"}, + # filter_action="native", + sort_action="native", # give user capability to sort columns + sort_mode="single", # sort across 'multi' or 'single' columns + page_current=page_current, # set to current page + page_size=50, # number of rows visible per page + style_cell={ + 'textAlign': 'left', + # 'minWidth': '100px', + # 'width': '100px', + # 'maxWidth': '100px', + }, + style_table={'overflowX': 'auto'}, + css=[{"selector":".show-hide", "rule":"display:none"}] ) return result From fc8b29a790c6ef66831d3a405c026736d3f7c18a Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Sun, 27 Oct 2024 10:56:00 -0700 Subject: [PATCH 5/7] Reverted Data Page to have Trajectories selector from UUE Fix --- pages/data.py | 4 ++-- requirements.txt | 4 ++-- utils/db_utils.py | 17 +++++++++++------ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pages/data.py b/pages/data.py index bd5d027..c98791e 100644 --- a/pages/data.py +++ b/pages/data.py @@ -89,12 +89,12 @@ def clean_location_data(df): return df -def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_uuids): +def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_uuids, key_list): with ect.Timer() as total_timer: # Stage 1: Query trajectories with ect.Timer() as stage1_timer: - df = query_trajectories(start_date, end_date, tz) + df = query_trajectories(start_date, end_date, tz, key_list) esdsq.store_dashboard_time( "admin/data/update_store_trajectories/query_trajectories", stage1_timer diff --git a/requirements.txt b/requirements.txt index 6f0d5e4..6182fe8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ --extra-index-url https://plotly.nrel.gov/Docs/packages # dash is required to call `build:py` -dash==2.15.0 +dash==2.18.0 gunicorn==20.1.0 -plotly==5.14.1 +plotly==5.24.1 dash-bootstrap-components==1.4.1 dash_extensions==0.1.13 #dashboard_setup/nrel_dash_components-0.0.1.tar.gz # for docker-compose diff --git a/utils/db_utils.py b/utils/db_utils.py index f02ee29..2e522c1 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -355,9 +355,9 @@ def query_demographics(): return dataframes -def query_trajectories(start_date: str, end_date: str, tz: str): +def query_trajectories(start_date: str, end_date: str, tz: str, key_list): with ect.Timer() as total_timer: - + key_list = [key_list] if isinstance(key_list, str) else key_list # Stage 1: Convert date range to timestamps with ect.Timer() as stage1_timer: (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) @@ -370,7 +370,7 @@ def query_trajectories(start_date: str, end_date: str, tz: str): with ect.Timer() as stage2_timer: ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries( - key_list=["analysis/recreated_location"], + key_list=key_list, time_query=estt.TimeQuery("data.ts", start_ts, end_ts), ) esdsq.store_dashboard_time( @@ -408,9 +408,14 @@ def query_trajectories(start_date: str, end_date: str, tz: str): # Stage 5: Add human-readable mode string with ect.Timer() as stage5_timer: - df['data.mode_str'] = df['data.mode'].apply( - lambda x: ecwm.MotionTypes(x).name if x in set(enum.value for enum in ecwm.MotionTypes) else 'UNKNOWN' - ) + if 'background/location' in key_list: + if 'data.mode' in df.columns: + # Set the values in data.mode to blank ('') + df['data.mode'] = '' + else: + df['data.mode_str'] = df['data.mode'].apply( + lambda x: ecwm.MotionTypes(x).name if x in set(enum.value for enum in ecwm.MotionTypes) else 'UNKNOWN' + ) esdsq.store_dashboard_time( "admin/db_utils/query_trajectories/add_mode_string", stage5_timer From d241d375d833f93cd6de706a6bd8f5fbb32aab86 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Sun, 27 Oct 2024 11:14:04 -0700 Subject: [PATCH 6/7] Readded timings --- pages/data.py | 359 +++++++++++++++++++++++++--------------------- utils/db_utils.py | 170 +++++++++++++--------- 2 files changed, 301 insertions(+), 228 deletions(-) diff --git a/pages/data.py b/pages/data.py index c98791e..be8bf23 100644 --- a/pages/data.py +++ b/pages/data.py @@ -172,157 +172,172 @@ def render_content( start_date, end_date, timezone, n_intervals, key_list, current_page, loaded_uuids_store, all_data_loaded ): - initial_batch_size = 10 # Define the batch size for loading UUIDs - - # Update selected tab - selected_tab = tab - logging.debug(f"Callback - {selected_tab} Stage 1: Selected tab updated.") - - # Handle the UUIDs tab without fullscreen loading spinner - if tab == 'tab-uuids-datatable': - start_time = time.time() - logging.debug(f"Callback - {selected_tab} Stage 2: Handling UUIDs tab.") - - # Ensure store_uuids contains the key 'data' which is a list of dictionaries - if not isinstance(store_uuids, dict) or 'data' not in store_uuids: - logging.error(f"Expected store_uuids to be a dict with a 'data' key, but got {type(store_uuids)}") - return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True - - uuids_list = store_uuids['data'] - - # Ensure uuids_list is a list for slicing - if not isinstance(uuids_list, list): - logging.error(f"Expected store_uuids['data'] to be a list but got {type(uuids_list)}") - return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True - - loaded_data = loaded_uuids_store.get('data', []) - total_loaded = len(loaded_data) - - # Handle lazy loading - if not loaded_uuids_store.get('loaded', False): - total_to_load = total_loaded + initial_batch_size - total_to_load = min(total_to_load, len(uuids_list)) # Avoid loading more than available - - logging.debug(f"Callback - {selected_tab} Stage 3: Loading next batch of UUIDs from {total_loaded} to {total_to_load}.") - - new_data = uuids_list[total_loaded:total_to_load] - - if new_data: - # Process and append the new data to the loaded store - processed_data = db_utils.add_user_stats(new_data, initial_batch_size) - loaded_data.extend(processed_data) - - # Update the store with the new data - loaded_uuids_store['data'] = loaded_data # Mark all data as loaded if done - loaded_uuids_store['loaded'] = len(loaded_data) >= len(uuids_list) - - logging.debug(f"Callback - {selected_tab} Stage 4: New batch loaded. Total loaded: {len(loaded_data)}.") - - # Prepare the data to be displayed - columns = perm_utils.get_uuids_columns() # Get the relevant columns - df = pd.DataFrame(loaded_data) - - if df.empty or not perm_utils.has_permission('data_uuids'): - logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") - return html.Div([html.P("No data available or you don't have permission.")]), loaded_uuids_store, True - - df = df.drop(columns=[col for col in df.columns if col not in columns]) - - logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.") - content = html.Div([ - populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page - html.P( - f"Showing {len(loaded_data)} of {len(uuids_list)} UUIDs." + - (f" Loading 10 more..." if not loaded_uuids_store.get('loaded', False) else ""), - style={'margin': '15px 5px'} + with ect.Timer() as total_timer: + initial_batch_size = 10 # Define the batch size for loading UUIDs + + # Stage 1: Update selected tab + selected_tab = tab + logging.debug(f"Callback - {selected_tab} Stage 1: Selected tab updated.") + + # Handle the UUIDs tab without fullscreen loading spinner + if tab == 'tab-uuids-datatable': + with ect.Timer() as handle_uuids_timer: + logging.debug(f"Callback - {selected_tab} Stage 2: Handling UUIDs tab.") + + # Ensure store_uuids contains the key 'data' which is a list of dictionaries + if not isinstance(store_uuids, dict) or 'data' not in store_uuids: + logging.error(f"Expected store_uuids to be a dict with a 'data' key, but got {type(store_uuids)}") + return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True + + uuids_list = store_uuids['data'] + + # Ensure uuids_list is a list for slicing + if not isinstance(uuids_list, list): + logging.error(f"Expected store_uuids['data'] to be a list but got {type(uuids_list)}") + return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True + + loaded_data = loaded_uuids_store.get('data', []) + total_loaded = len(loaded_data) + + # Handle lazy loading + if not loaded_uuids_store.get('loaded', False): + total_to_load = total_loaded + initial_batch_size + total_to_load = min(total_to_load, len(uuids_list)) # Avoid loading more than available + + logging.debug(f"Callback - {selected_tab} Stage 3: Loading next batch of UUIDs from {total_loaded} to {total_to_load}.") + + new_data = uuids_list[total_loaded:total_to_load] + + if new_data: + # Process and append the new data to the loaded store + processed_data = db_utils.add_user_stats(new_data, initial_batch_size) + loaded_data.extend(processed_data) + + # Update the store with the new data + loaded_uuids_store['data'] = loaded_data # Mark all data as loaded if done + loaded_uuids_store['loaded'] = len(loaded_data) >= len(uuids_list) + + logging.debug(f"Callback - {selected_tab} Stage 4: New batch loaded. Total loaded: {len(loaded_data)}.") + + # Prepare the data to be displayed + columns = perm_utils.get_uuids_columns() # Get the relevant columns + df = pd.DataFrame(loaded_data) + + if df.empty or not perm_utils.has_permission('data_uuids'): + logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") + return html.Div([html.P("No data available or you don't have permission.")]), loaded_uuids_store, True + + df = df.drop(columns=[col for col in df.columns if col not in columns]) + + logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.") + content = html.Div([ + populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page + html.P( + f"Showing {len(loaded_data)} of {len(uuids_list)} UUIDs." + + (f" Loading 10 more..." if not loaded_uuids_store.get('loaded', False) else ""), + style={'margin': '15px 5px'} + ) + ]) + + return content, loaded_uuids_store, False if not loaded_uuids_store['loaded'] else True + + esdsq.store_dashboard_time( + "admin/data/render_content/handle_uuids_tab", + handle_uuids_timer ) - ]) - elapsed_time = time.time() - start_time - logging.info(f"Callback - {selected_tab} Stage 6: Total Time for UUIDs Tab: {elapsed_time:.2f} seconds") + # Handle Trips tab + elif tab == 'tab-trips-datatable': + with ect.Timer() as handle_trips_timer: + logging.debug(f"Callback - {selected_tab} Stage 2: Handling Trips tab.") - return content, loaded_uuids_store, False if not loaded_uuids_store['loaded'] else True + data = store_trips["data"] + columns = perm_utils.get_allowed_trip_columns() + columns.update(col['label'] for col in perm_utils.get_allowed_named_trip_columns()) + columns.update(store_trips["userinputcols"]) + has_perm = perm_utils.has_permission('data_trips') - # Handle other tabs normally - elif tab == 'tab-trips-datatable': - start_time = time.time() - logging.debug(f"Callback - {selected_tab} Stage 2: Handling Trips tab.") + df = pd.DataFrame(data) + if df.empty or not has_perm: + logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") + return None, loaded_uuids_store, True - data = store_trips["data"] - columns = perm_utils.get_allowed_trip_columns() - columns.update(col['label'] for col in perm_utils.get_allowed_named_trip_columns()) - columns.update(store_trips["userinputcols"]) - has_perm = perm_utils.has_permission('data_trips') + df = df.drop(columns=[col for col in df.columns if col not in columns]) + df = clean_location_data(df) - df = pd.DataFrame(data) - if df.empty or not has_perm: - logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") - return None, loaded_uuids_store, True + trips_table = populate_datatable(df) - df = df.drop(columns=[col for col in df.columns if col not in columns]) - df = clean_location_data(df) - - trips_table = populate_datatable(df) - elapsed_time = time.time() - start_time - logging.info(f"Callback - {selected_tab} Stage 3: Total Time for Trips Tab: {elapsed_time:.2f} seconds") - - return html.Div([ - html.Button('Display columns with raw units', id='button-clicked', n_clicks=0, style={'marginLeft': '5px'}), - trips_table - ]), loaded_uuids_store, True - - elif tab == 'tab-demographics-datatable': - start_time = time.time() - logging.debug(f"Callback - {selected_tab} Stage 2: Handling Demographics tab.") - - data = store_demographics["data"] - has_perm = perm_utils.has_permission('data_demographics') - - if len(data) == 1: - data = list(data.values())[0] - columns = list(data[0].keys()) - elif len(data) > 1: - if not has_perm: - return None, loaded_uuids_store, True - return html.Div([ - dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[ - dcc.Tab(label=key, value=key) for key in data - ]), - html.Div(id='subtabs-demographics-content') - ]), loaded_uuids_store, True - - elapsed_time = time.time() - start_time - logging.info(f"Callback - {selected_tab} Stage 3: Total Time for Demographics Tab: {elapsed_time:.2f} seconds") - - elif tab == 'tab-trajectories-datatable': - start_time = time.time() - logging.debug(f"Callback - {selected_tab} Stage 2: Handling Trajectories tab.") - - (start_date, end_date) = iso_to_date_only(start_date, end_date) - # Fetch new data based on the selected key_list from the keylist-switch - if store_trajectories == {} or key_list: - store_trajectories = update_store_trajectories(start_date, end_date, timezone, store_excluded_uuids, key_list) - - data = store_trajectories.get("data", []) - if data: - columns = list(data[0].keys()) - columns = perm_utils.get_trajectories_columns(columns) - has_perm = perm_utils.has_permission('data_trajectories') - - df = pd.DataFrame(data) - if df.empty or not has_perm: - logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") - return None, loaded_uuids_store, True + return html.Div([ + html.Button('Display columns with raw units', id='button-clicked', n_clicks=0, style={'marginLeft': '5px'}), + trips_table + ]), loaded_uuids_store, True + + esdsq.store_dashboard_time( + "admin/data/render_content/handle_trips_tab", + handle_trips_timer + ) + + # Handle Demographics tab + elif tab == 'tab-demographics-datatable': + with ect.Timer() as handle_demographics_timer: + data = store_demographics["data"] + has_perm = perm_utils.has_permission('data_demographics') + + if len(data) == 1: + data = list(data.values())[0] + columns = list(data[0].keys()) + elif len(data) > 1: + if not has_perm: + return None, loaded_uuids_store, True + return html.Div([ + dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[ + dcc.Tab(label=key, value=key) for key in data + ]), + html.Div(id='subtabs-demographics-content') + ]), loaded_uuids_store, True + + esdsq.store_dashboard_time( + "admin/data/render_content/handle_demographics_tab", + handle_demographics_timer + ) + + # Handle Trajectories tab + elif tab == 'tab-trajectories-datatable': + with ect.Timer() as handle_trajectories_timer: + (start_date, end_date) = iso_to_date_only(start_date, end_date) + # Fetch new data based on the selected key_list from the keylist-switch + if store_trajectories == {} or key_list: + store_trajectories = update_store_trajectories(start_date, end_date, timezone, store_excluded_uuids, key_list) + + data = store_trajectories.get("data", []) + if data: + columns = list(data[0].keys()) + columns = perm_utils.get_trajectories_columns(columns) + has_perm = perm_utils.has_permission('data_trajectories') + + df = pd.DataFrame(data) + if df.empty or not has_perm: + logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") + return None, loaded_uuids_store, True - df = df.drop(columns=[col for col in df.columns if col not in columns]) + df = df.drop(columns=[col for col in df.columns if col not in columns]) - elapsed_time = time.time() - start_time - logging.info(f"Callback - {selected_tab} Stage 3: Total Time for Trajectories Tab: {elapsed_time:.2f} seconds") + return populate_datatable(df), loaded_uuids_store, True - return populate_datatable(df), loaded_uuids_store, True + esdsq.store_dashboard_time( + "admin/data/render_content/handle_trajectories_tab", + handle_trajectories_timer + ) + + # Handle unhandled tabs or errors + else: + logging.debug(f"Callback - {selected_tab} Error Stage: No data loaded or unhandled tab.") + return None, loaded_uuids_store, True - # Default case: if no data is loaded or the tab is not handled - logging.debug(f"Callback - {selected_tab} Error Stage: No data loaded or unhandled tab.") + esdsq.store_dashboard_time( + "admin/data/render_content/total_time", + total_timer + ) return None, loaded_uuids_store, True # Handle subtabs for demographic table when there are multiple surveys @@ -422,27 +437,47 @@ def update_dropdowns_trips(n_clicks, button_label): def populate_datatable(df, table_id='', page_current=0): - if not isinstance(df, pd.DataFrame): - raise PreventUpdate - return dash_table.DataTable( - id=table_id, - # columns=[{"name": i, "id": i} for i in df.columns], - data=df.to_dict('records'), - export_format="csv", - filter_options={"case": "sensitive"}, - # filter_action="native", - sort_action="native", # give user capability to sort columns - sort_mode="single", # sort across 'multi' or 'single' columns - page_current=page_current, # set to current page - page_size=50, # number of rows visible per page - style_cell={ - 'textAlign': 'left', - # 'minWidth': '100px', - # 'width': '100px', - # 'maxWidth': '100px', - }, - style_table={'overflowX': 'auto'}, - css=[{"selector":".show-hide", "rule":"display:none"}] + with ect.Timer() as total_timer: + # Stage 1: Check if df is a DataFrame and raise PreventUpdate if not + with ect.Timer() as stage1_timer: + if not isinstance(df, pd.DataFrame): + raise PreventUpdate + esdsq.store_dashboard_time( + "admin/data/populate_datatable/check_dataframe_type", + stage1_timer + ) + + # Stage 2: Create DataTable + with ect.Timer() as stage2_timer: + table = dash_table.DataTable( + id=table_id, + # columns=[{"name": i, "id": i} for i in df.columns], + data=df.to_dict('records'), + export_format="csv", + filter_options={"case": "sensitive"}, + # filter_action="native", + sort_action="native", # give user capability to sort columns + sort_mode="single", # sort across 'multi' or 'single' columns + page_current=page_current, # set to current page + page_size=50, # number of rows visible per page + style_cell={ + 'textAlign': 'left', + # 'minWidth': '100px', + # 'width': '100px', + # 'maxWidth': '100px', + }, + style_table={'overflowX': 'auto'}, + css=[{"selector":".show-hide", "rule":"display:none"}] + ) + esdsq.store_dashboard_time( + "admin/db_utils/populate_datatable/create_datatable", + stage2_timer + ) + + esdsq.store_dashboard_time( + "admin/db_utils/populate_datatable/total_time", + total_timer ) + return table + - return result diff --git a/utils/db_utils.py b/utils/db_utils.py index 2e522c1..00e7d51 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -432,80 +432,118 @@ def query_trajectories(start_date: str, end_date: str, tz: str, key_list): def add_user_stats(user_data, batch_size=5): time_format = 'YYYY-MM-DD HH:mm:ss' - def process_user(user): - user_uuid = UUID(user['user_id']) + with ect.Timer() as total_timer: + # Stage 1: Define process_user + with ect.Timer() as stage1_timer: + def process_user(user): + with ect.Timer() as process_user_timer: + user_uuid = UUID(user['user_id']) + + # Fetch aggregated data for all users once and cache it + ts_aggregate = esta.TimeSeries.get_aggregate_time_series() - # Fetch aggregated data for all users once and cache it - ts_aggregate = esta.TimeSeries.get_aggregate_time_series() - - # Fetch data for the user, cached for repeated queries - profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) + # Fetch data for the user, cached for repeated queries + profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) + + total_trips = ts_aggregate.find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}] + ) + labeled_trips = ts_aggregate.find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] + ) + + user['total_trips'] = total_trips + user['labeled_trips'] = labeled_trips + + if profile_data: + user['platform'] = profile_data.get('curr_platform') + user['manufacturer'] = profile_data.get('manufacturer') + user['app_version'] = profile_data.get('client_app_version') + user['os_version'] = profile_data.get('client_os_version') + user['phone_lang'] = profile_data.get('phone_lang') + + if total_trips > 0: + ts = esta.TimeSeries.get_time_series(user_uuid) + first_trip_ts = ts.get_first_value_for_field( + key='analysis/confirmed_trip', + field='data.end_ts', + sort_order=pymongo.ASCENDING + ) + if first_trip_ts != -1: + user['first_trip'] = arrow.get(first_trip_ts).format(time_format) - total_trips = ts_aggregate.find_entries_count( - key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}] + last_trip_ts = ts.get_first_value_for_field( + key='analysis/confirmed_trip', + field='data.end_ts', + sort_order=pymongo.DESCENDING + ) + if last_trip_ts != -1: + user['last_trip'] = arrow.get(last_trip_ts).format(time_format) + + last_call_ts = ts.get_first_value_for_field( + key='stats/server_api_time', + field='data.ts', + sort_order=pymongo.DESCENDING + ) + if last_call_ts != -1: + user['last_call'] = arrow.get(last_call_ts).format(time_format) + + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/process_user", + process_user_timer + ) + return user + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/define_process_user", + stage1_timer ) - labeled_trips = ts_aggregate.find_entries_count( - key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] + + # Stage 2: Define batch_process + with ect.Timer() as stage2_timer: + def batch_process(users_batch): + with ect.Timer() as batch_process_timer: + with ThreadPoolExecutor() as executor: # Adjust max_workers based on CPU cores + futures = [executor.submit(process_user, user) for user in users_batch] + processed_batch = [future.result() for future in as_completed(futures)] + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/batch_process", + batch_process_timer + ) + return processed_batch + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/define_batch_process", + stage2_timer ) - user['total_trips'] = total_trips - user['labeled_trips'] = labeled_trips - - if profile_data: - user['platform'] = profile_data.get('curr_platform') - user['manufacturer'] = profile_data.get('manufacturer') - user['app_version'] = profile_data.get('client_app_version') - user['os_version'] = profile_data.get('client_os_version') - user['phone_lang'] = profile_data.get('phone_lang') - - if total_trips > 0: - ts = esta.TimeSeries.get_time_series(user_uuid) - first_trip_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.ASCENDING - ) - if first_trip_ts != -1: - user['first_trip'] = arrow.get(first_trip_ts).format(time_format) - - last_trip_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.DESCENDING - ) - if last_trip_ts != -1: - user['last_trip'] = arrow.get(last_trip_ts).format(time_format) - - last_call_ts = ts.get_first_value_for_field( - key='stats/server_api_time', - field='data.ts', - sort_order=pymongo.DESCENDING - ) - if last_call_ts != -1: - user['last_call'] = arrow.get(last_call_ts).format(time_format) + # Stage 3: Process batches + with ect.Timer() as stage3_timer: + total_users = len(user_data) + processed_data = [] - return user - - def batch_process(users_batch): - with ThreadPoolExecutor() as executor: # Adjust max_workers based on CPU cores - futures = [executor.submit(process_user, user) for user in users_batch] - processed_batch = [future.result() for future in as_completed(futures)] - return processed_batch - - total_users = len(user_data) - processed_data = [] - - for i in range(0, total_users, batch_size): - batch = user_data[i:i + batch_size] - processed_batch = batch_process(batch) - processed_data.extend(processed_batch) - - logging.debug(f'Processed {len(processed_data)} users out of {total_users}') - + for i in range(0, total_users, batch_size): + with ect.Timer() as stage_loop_timer: + batch = user_data[i:i + batch_size] + processed_batch = batch_process(batch) + processed_data.extend(processed_batch) + + logging.debug(f'Processed {len(processed_data)} users out of {total_users}') + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/processing_loop_stage", + stage_loop_timer + ) + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/process_batches", + stage3_timer + ) + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/total_time", + total_timer + ) return processed_data + def query_segments_crossing_endpoints(poly_region_start, poly_region_end, start_date: str, end_date: str, tz: str, excluded_uuids: list[str]): with ect.Timer() as total_timer: From ae1e29035210fd9996c6b9468bdd0dd8c10de0f5 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Sun, 27 Oct 2024 11:33:07 -0700 Subject: [PATCH 7/7] Fixed minor bug where uuids loaded infinitely due to misuse of interval disabling, fixed by modifying with statement positioning and return ordering. Revert whitespace changes Revert whitespace changes Bug Fix featuring the uuids misuse from prior commits Caught bug in the form of trips_data table being improperly named and not existing, fixed by renaming trips_data table. Fixed issues brought up and unnecessary changes Unformatted render Reverted more unnecessary changes unfix typo Reverted to old name Whitespace changes revert Whitespace changes revert Whitespace changes revert Whitespace changes revert --- app_sidebar_collapsible.py | 3 +- pages/data.py | 255 +++++++++++++++++++++---------------- utils/db_utils.py | 150 ++++++++++------------ 3 files changed, 215 insertions(+), 193 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 71b51b7..5c77e5d 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -210,12 +210,11 @@ def make_controls(): 'flex-direction': 'column'} ) - page_content = dcc.Loading( id='global-loading', type='default', fullscreen=True, - overlay_style={"visibility": "visible", "filter": "blur(2px)"}, + overlay_style={"visibility": "visible", "filter": "opacity(0.5)"}, style={"background-color": "transparent"}, children=html.Div(dash.page_container, style={ "margin-left": "5rem", diff --git a/pages/data.py b/pages/data.py index be8bf23..ca45aec 100644 --- a/pages/data.py +++ b/pages/data.py @@ -8,7 +8,6 @@ import logging import pandas as pd from dash.exceptions import PreventUpdate -import time from utils import constants from utils import permissions as perm_utils @@ -24,16 +23,12 @@ layout = html.Div( [ dcc.Markdown(intro), - dcc.Tabs( - id="tabs-datatable", - value='tab-uuids-datatable', - children=[ - dcc.Tab(label='UUIDs', value='tab-uuids-datatable'), - dcc.Tab(label='Trips', value='tab-trips-datatable'), - dcc.Tab(label='Demographics', value='tab-demographics-datatable'), - dcc.Tab(label='Trajectories', value='tab-trajectories-datatable'), - ] - ), + dcc.Tabs(id="tabs-datatable", value='tab-uuids-datatable', children=[ + dcc.Tab(label='UUIDs', value='tab-uuids-datatable'), + dcc.Tab(label='Trips', value='tab-trips-datatable'), + dcc.Tab(label='Demographics', value='tab-demographics-datatable'), + dcc.Tab(label='Trajectories', value='tab-trajectories-datatable'), + ]), html.Div(id='tabs-content'), dcc.Store(id='selected-tab', data='tab-uuids-datatable'), # Store to hold selected tab dcc.Interval(id='interval-load-more', interval=24000, n_intervals=0), # Interval for loading more data @@ -167,11 +162,7 @@ def update_uuids_page_current(page_current, selected_tab): State('store-loaded-uuids', 'data'), # Use State to track already loaded data State('store-loaded-uuids', 'loaded') # Keep track if we have finished loading all data ) -def render_content( - tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, - start_date, end_date, timezone, n_intervals, key_list, current_page, - loaded_uuids_store, all_data_loaded -): +def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone, n_intervals, key_list, current_page, loaded_uuids_store, all_data_loaded): with ect.Timer() as total_timer: initial_batch_size = 10 # Define the batch size for loading UUIDs @@ -179,6 +170,11 @@ def render_content( selected_tab = tab logging.debug(f"Callback - {selected_tab} Stage 1: Selected tab updated.") + # Initialize return variables + content = None + updated_loaded_uuids_store = loaded_uuids_store.copy() if loaded_uuids_store else {'data': [], 'loaded': False} + interval_disabled = True + # Handle the UUIDs tab without fullscreen loading spinner if tab == 'tab-uuids-datatable': with ect.Timer() as handle_uuids_timer: @@ -187,60 +183,65 @@ def render_content( # Ensure store_uuids contains the key 'data' which is a list of dictionaries if not isinstance(store_uuids, dict) or 'data' not in store_uuids: logging.error(f"Expected store_uuids to be a dict with a 'data' key, but got {type(store_uuids)}") - return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True - - uuids_list = store_uuids['data'] - - # Ensure uuids_list is a list for slicing - if not isinstance(uuids_list, list): - logging.error(f"Expected store_uuids['data'] to be a list but got {type(uuids_list)}") - return html.Div([html.P("Data structure error.")]), loaded_uuids_store, True - - loaded_data = loaded_uuids_store.get('data', []) - total_loaded = len(loaded_data) - - # Handle lazy loading - if not loaded_uuids_store.get('loaded', False): - total_to_load = total_loaded + initial_batch_size - total_to_load = min(total_to_load, len(uuids_list)) # Avoid loading more than available - - logging.debug(f"Callback - {selected_tab} Stage 3: Loading next batch of UUIDs from {total_loaded} to {total_to_load}.") - - new_data = uuids_list[total_loaded:total_to_load] - - if new_data: - # Process and append the new data to the loaded store - processed_data = db_utils.add_user_stats(new_data, initial_batch_size) - loaded_data.extend(processed_data) - - # Update the store with the new data - loaded_uuids_store['data'] = loaded_data # Mark all data as loaded if done - loaded_uuids_store['loaded'] = len(loaded_data) >= len(uuids_list) - - logging.debug(f"Callback - {selected_tab} Stage 4: New batch loaded. Total loaded: {len(loaded_data)}.") - - # Prepare the data to be displayed - columns = perm_utils.get_uuids_columns() # Get the relevant columns - df = pd.DataFrame(loaded_data) - - if df.empty or not perm_utils.has_permission('data_uuids'): - logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") - return html.Div([html.P("No data available or you don't have permission.")]), loaded_uuids_store, True - - df = df.drop(columns=[col for col in df.columns if col not in columns]) - - logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.") - content = html.Div([ - populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page - html.P( - f"Showing {len(loaded_data)} of {len(uuids_list)} UUIDs." + - (f" Loading 10 more..." if not loaded_uuids_store.get('loaded', False) else ""), - style={'margin': '15px 5px'} - ) - ]) - - return content, loaded_uuids_store, False if not loaded_uuids_store['loaded'] else True - + content = html.Div([html.P("Data structure error.")]) + interval_disabled = True + else: + uuids_list = store_uuids['data'] + + # Ensure uuids_list is a list for slicing + if not isinstance(uuids_list, list): + logging.error(f"Expected store_uuids['data'] to be a list but got {type(uuids_list)}") + content = html.Div([html.P("Data structure error.")]) + interval_disabled = True + else: + loaded_data = updated_loaded_uuids_store.get('data', []) + total_loaded = len(loaded_data) + + # Handle lazy loading + if not updated_loaded_uuids_store.get('loaded', False): + total_to_load = total_loaded + initial_batch_size + total_to_load = min(total_to_load, len(uuids_list)) # Avoid loading more than available + + logging.debug(f"Callback - {selected_tab} Stage 3: Loading next batch of UUIDs from {total_loaded} to {total_to_load}.") + + new_data = uuids_list[total_loaded:total_to_load] + + if new_data: + # Process and append the new data to the loaded store + processed_data = db_utils.add_user_stats(new_data, initial_batch_size) + loaded_data.extend(processed_data) + + # Update the store with the new data by creating a new dict + updated_loaded_uuids_store = { + 'data': loaded_data, + 'loaded': len(loaded_data) >= len(uuids_list) + } + + logging.debug(f"Callback - {selected_tab} Stage 4: New batch loaded. Total loaded: {len(loaded_data)}.") + + # Prepare the data to be displayed + columns = perm_utils.get_uuids_columns() # Get the relevant columns + df = pd.DataFrame(updated_loaded_uuids_store['data']) + + if df.empty or not perm_utils.has_permission('data_uuids'): + logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") + content = html.Div([html.P("No data available or you don't have permission.")]) + interval_disabled = True + else: + df = df.drop(columns=[col for col in df.columns if col not in columns]) + + logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.") + content = html.Div([ + populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page + html.P( + f"Showing {len(updated_loaded_uuids_store['data'])} of {len(uuids_list)} UUIDs." + + (f" Loading {initial_batch_size} more..." if not updated_loaded_uuids_store.get('loaded', False) else ""), + style={'margin': '15px 5px'} + ) + ]) + interval_disabled = updated_loaded_uuids_store.get('loaded', False) + + # Store timing after handling UUIDs tab esdsq.store_dashboard_time( "admin/data/render_content/handle_uuids_tab", handle_uuids_timer @@ -251,27 +252,30 @@ def render_content( with ect.Timer() as handle_trips_timer: logging.debug(f"Callback - {selected_tab} Stage 2: Handling Trips tab.") - data = store_trips["data"] + data = store_trips.get("data", []) columns = perm_utils.get_allowed_trip_columns() columns.update(col['label'] for col in perm_utils.get_allowed_named_trip_columns()) - columns.update(store_trips["userinputcols"]) + columns.update(store_trips.get("userinputcols", [])) has_perm = perm_utils.has_permission('data_trips') df = pd.DataFrame(data) if df.empty or not has_perm: logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") - return None, loaded_uuids_store, True + content = None + interval_disabled = True + else: + df = df.drop(columns=[col for col in df.columns if col not in columns]) + df = clean_location_data(df) - df = df.drop(columns=[col for col in df.columns if col not in columns]) - df = clean_location_data(df) + trips_table = populate_datatable(df, table_id='trips-datatable') - trips_table = populate_datatable(df) - - return html.Div([ - html.Button('Display columns with raw units', id='button-clicked', n_clicks=0, style={'marginLeft': '5px'}), - trips_table - ]), loaded_uuids_store, True + content = html.Div([ + html.Button('Display columns with raw units', id='button-clicked', n_clicks=0, style={'marginLeft': '5px'}), + trips_table + ]) + interval_disabled = True + # Store timing after handling Trips tab esdsq.store_dashboard_time( "admin/data/render_content/handle_trips_tab", handle_trips_timer @@ -280,22 +284,37 @@ def render_content( # Handle Demographics tab elif tab == 'tab-demographics-datatable': with ect.Timer() as handle_demographics_timer: - data = store_demographics["data"] + data = store_demographics.get("data", {}) has_perm = perm_utils.has_permission('data_demographics') if len(data) == 1: + # Here data is a dictionary data = list(data.values())[0] - columns = list(data[0].keys()) + columns = list(data[0].keys()) if data else [] + df = pd.DataFrame(data) + if df.empty: + content = None + interval_disabled = True + else: + content = populate_datatable(df) + interval_disabled = True elif len(data) > 1: if not has_perm: - return None, loaded_uuids_store, True - return html.Div([ - dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[ - dcc.Tab(label=key, value=key) for key in data - ]), - html.Div(id='subtabs-demographics-content') - ]), loaded_uuids_store, True - + content = None + interval_disabled = True + else: + content = html.Div([ + dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[ + dcc.Tab(label=key, value=key) for key in data + ]), + html.Div(id='subtabs-demographics-content') + ]) + interval_disabled = True + else: + content = None + interval_disabled = True + + # Store timing after handling Demographics tab esdsq.store_dashboard_time( "admin/data/render_content/handle_demographics_tab", handle_demographics_timer @@ -303,27 +322,35 @@ def render_content( # Handle Trajectories tab elif tab == 'tab-trajectories-datatable': + # Currently store_trajectories data is loaded only when the respective tab is selected + # Here we query for trajectory data once "Trajectories" tab is selected with ect.Timer() as handle_trajectories_timer: (start_date, end_date) = iso_to_date_only(start_date, end_date) - # Fetch new data based on the selected key_list from the keylist-switch - if store_trajectories == {} or key_list: + if store_trajectories == {}: store_trajectories = update_store_trajectories(start_date, end_date, timezone, store_excluded_uuids, key_list) - - data = store_trajectories.get("data", []) + data = store_trajectories["data"] if data: columns = list(data[0].keys()) columns = perm_utils.get_trajectories_columns(columns) has_perm = perm_utils.has_permission('data_trajectories') - df = pd.DataFrame(data) - if df.empty or not has_perm: - logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") - return None, loaded_uuids_store, True + df = pd.DataFrame(data) + if df.empty or not has_perm: + logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.") + content = None + interval_disabled = True + else: + df = df.drop(columns=[col for col in df.columns if col not in columns]) - df = df.drop(columns=[col for col in df.columns if col not in columns]) + datatable = populate_datatable(df) - return populate_datatable(df), loaded_uuids_store, True + content = datatable + interval_disabled = True + else: + content = None + interval_disabled = True + # Store timing after handling Trajectories tab esdsq.store_dashboard_time( "admin/data/render_content/handle_trajectories_tab", handle_trajectories_timer @@ -332,13 +359,17 @@ def render_content( # Handle unhandled tabs or errors else: logging.debug(f"Callback - {selected_tab} Error Stage: No data loaded or unhandled tab.") - return None, loaded_uuids_store, True + content = None + interval_disabled = True + # Store total timing after all stages esdsq.store_dashboard_time( "admin/data/render_content/total_time", total_timer ) - return None, loaded_uuids_store, True + + return content, updated_loaded_uuids_store, interval_disabled + # Handle subtabs for demographic table when there are multiple surveys @callback( @@ -404,7 +435,7 @@ def update_sub_tab(tab, store_demographics): return result @callback( - Output('trips-table', 'hidden_columns'), # Output hidden columns in the trips-table + Output('trips-datatable', 'hidden_columns'), # Output hidden columns in the trips-table Output('button-clicked', 'children'), # Updates button label Input('button-clicked', 'n_clicks'), # Number of clicks on the button State('button-clicked', 'children') # State representing the current label of button @@ -436,8 +467,10 @@ def update_dropdowns_trips(n_clicks, button_label): return hidden_col, button_label + def populate_datatable(df, table_id='', page_current=0): with ect.Timer() as total_timer: + # Stage 1: Check if df is a DataFrame and raise PreventUpdate if not with ect.Timer() as stage1_timer: if not isinstance(df, pd.DataFrame): @@ -446,10 +479,10 @@ def populate_datatable(df, table_id='', page_current=0): "admin/data/populate_datatable/check_dataframe_type", stage1_timer ) - - # Stage 2: Create DataTable + + # Stage 2: Create the DataTable from the DataFrame with ect.Timer() as stage2_timer: - table = dash_table.DataTable( + result = dash_table.DataTable( id=table_id, # columns=[{"name": i, "id": i} for i in df.columns], data=df.to_dict('records'), @@ -467,17 +500,17 @@ def populate_datatable(df, table_id='', page_current=0): # 'maxWidth': '100px', }, style_table={'overflowX': 'auto'}, - css=[{"selector":".show-hide", "rule":"display:none"}] + css=[{"selector": ".show-hide", "rule": "display:none"}] ) esdsq.store_dashboard_time( - "admin/db_utils/populate_datatable/create_datatable", + "admin/data/populate_datatable/create_datatable", stage2_timer ) esdsq.store_dashboard_time( - "admin/db_utils/populate_datatable/total_time", + "admin/data/populate_datatable/total_time", total_timer ) - return table + return result diff --git a/utils/db_utils.py b/utils/db_utils.py index 00e7d51..387b45c 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -434,88 +434,78 @@ def add_user_stats(user_data, batch_size=5): time_format = 'YYYY-MM-DD HH:mm:ss' with ect.Timer() as total_timer: # Stage 1: Define process_user - with ect.Timer() as stage1_timer: - def process_user(user): - with ect.Timer() as process_user_timer: - user_uuid = UUID(user['user_id']) - - # Fetch aggregated data for all users once and cache it - ts_aggregate = esta.TimeSeries.get_aggregate_time_series() - - # Fetch data for the user, cached for repeated queries - profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) - - total_trips = ts_aggregate.find_entries_count( - key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}] - ) - labeled_trips = ts_aggregate.find_entries_count( - key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] - ) - - user['total_trips'] = total_trips - user['labeled_trips'] = labeled_trips - - if profile_data: - user['platform'] = profile_data.get('curr_platform') - user['manufacturer'] = profile_data.get('manufacturer') - user['app_version'] = profile_data.get('client_app_version') - user['os_version'] = profile_data.get('client_os_version') - user['phone_lang'] = profile_data.get('phone_lang') - - if total_trips > 0: - ts = esta.TimeSeries.get_time_series(user_uuid) - first_trip_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.ASCENDING - ) - if first_trip_ts != -1: - user['first_trip'] = arrow.get(first_trip_ts).format(time_format) - - last_trip_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.DESCENDING - ) - if last_trip_ts != -1: - user['last_trip'] = arrow.get(last_trip_ts).format(time_format) - - last_call_ts = ts.get_first_value_for_field( - key='stats/server_api_time', - field='data.ts', - sort_order=pymongo.DESCENDING - ) - if last_call_ts != -1: - user['last_call'] = arrow.get(last_call_ts).format(time_format) - - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/process_user", - process_user_timer + def process_user(user): + with ect.Timer() as process_user_timer: + user_uuid = UUID(user['user_id']) + + # Fetch aggregated data for all users once and cache it + ts_aggregate = esta.TimeSeries.get_aggregate_time_series() + + # Fetch data for the user, cached for repeated queries + profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) + + total_trips = ts_aggregate.find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}] ) - return user - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/define_process_user", - stage1_timer - ) - - # Stage 2: Define batch_process - with ect.Timer() as stage2_timer: - def batch_process(users_batch): - with ect.Timer() as batch_process_timer: - with ThreadPoolExecutor() as executor: # Adjust max_workers based on CPU cores - futures = [executor.submit(process_user, user) for user in users_batch] - processed_batch = [future.result() for future in as_completed(futures)] - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/batch_process", - batch_process_timer + labeled_trips = ts_aggregate.find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] ) - return processed_batch - esdsq.store_dashboard_time( - "admin/db_utils/add_user_stats/define_batch_process", - stage2_timer - ) + + user['total_trips'] = total_trips + user['labeled_trips'] = labeled_trips + + if profile_data: + user['platform'] = profile_data.get('curr_platform') + user['manufacturer'] = profile_data.get('manufacturer') + user['app_version'] = profile_data.get('client_app_version') + user['os_version'] = profile_data.get('client_os_version') + user['phone_lang'] = profile_data.get('phone_lang') + + if total_trips > 0: + ts = esta.TimeSeries.get_time_series(user_uuid) + first_trip_ts = ts.get_first_value_for_field( + key='analysis/confirmed_trip', + field='data.end_ts', + sort_order=pymongo.ASCENDING + ) + if first_trip_ts != -1: + user['first_trip'] = arrow.get(first_trip_ts).format(time_format) + + last_trip_ts = ts.get_first_value_for_field( + key='analysis/confirmed_trip', + field='data.end_ts', + sort_order=pymongo.DESCENDING + ) + if last_trip_ts != -1: + user['last_trip'] = arrow.get(last_trip_ts).format(time_format) + + last_call_ts = ts.get_first_value_for_field( + key='stats/server_api_time', + field='data.ts', + sort_order=pymongo.DESCENDING + ) + if last_call_ts != -1: + user['last_call'] = arrow.get(last_call_ts).format(time_format) + + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/process_user", + process_user_timer + ) + return user + + def batch_process(users_batch): + with ect.Timer() as batch_process_timer: + with ThreadPoolExecutor() as executor: # Adjust max_workers based on CPU cores + futures = [executor.submit(process_user, user) for user in users_batch] + processed_batch = [future.result() for future in as_completed(futures)] + esdsq.store_dashboard_time( + "admin/db_utils/add_user_stats/get_last_trip_timestamp", + batch_process_timer + ) + return processed_batch + # Stage 3: Process batches with ect.Timer() as stage3_timer: