Skip to content

Commit

Permalink
Redesign the Copulogram class, using matplotlib subplots instead of s…
Browse files Browse the repository at this point in the history
…eaborn PaiGrid
  • Loading branch information
efekhari27 committed Aug 15, 2023
1 parent 4845db3 commit 9387d63
Show file tree
Hide file tree
Showing 25 changed files with 504 additions and 318 deletions.
391 changes: 198 additions & 193 deletions copulogram/Copulogram.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion copulogram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
"Copulogram",
"NonParametricModel"
]
__version__ = "0.0.3"
__version__ = "0.0.4"
288 changes: 288 additions & 0 deletions copulogram/deprecated_Copulogram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Copyright (C) EDF 2023
@author: Elias Fekhari
"""

#%%
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import rc
import matplotlib.pyplot as plt

# Interactive imports
from itertools import product
from bokeh.layouts import gridplot
from bokeh.models import (BasicTicker, Circle, ColumnDataSource,
DataRange1d, Grid, LassoSelectTool, LinearAxis,
Plot, ResetTool)
from bokeh.transform import factor_cmap, linear_cmap


class Copulogram:
"""
Draws a plot for multivariate distributions.
The lower triangle is a matrixplot of the data (without transformation),
while the upper triangle is a matrixplot of the ranked data.
Parameters
----------
data : pd.Dataframe()
Input dataset to be plotted. Must be a pandas DataFrame object.
A preprocessing removing every missing data is applied.
Example
--------
>>> TBD
"""
def __init__(
self,
data,
latex=False
):
self.data = data.dropna()
self.N = data.shape[0]
if latex:
rc('font', **{'family': 'Times'})
rc('text', usetex=True)
rc('font', size=18)# Set the default text font size
rc('axes', titlesize=20)# Set the axes title font size
rc('axes', labelsize=18)# Set the axes labels font size
rc('xtick', labelsize=18)# Set the font size for x tick labels
rc('ytick', labelsize=18)# Set the font size for y tick labels
rc('legend', fontsize=18)# Set the legend font size

def draw(self,
color='C0',
alpha=1.,
hue=None,
hue_palette=None,
kde_on_marginals=True,
quantile_contour_levels=None,
save_file=None,
marker='o',
subplot_size=2.5
):
"""
Draws the copulogram plot with a static or interactive option.
Parameters
----------
color : string
The matplotlib color on every element of the graph as long as "hue" is None.
alpha : float
The alpha blending value, between 0 (transparent) and 1 (opaque).
hue : string
Grouping variable that will produce points with different colors.
Can be either categorical or numeric, although color mapping will behave differently in latter case.
hue_palette : string
Method for choosing the colors to use when mapping the hue semantic.
By default "tab10" for categorical mapping, and "viridis" for continuous mapping.
kde_on_marginals : boolean
Defines the type of plot on the diagonal. Histogram when
the variable is set to False, kernel density estimation otherwise.
quantile_contour_levels : 1-d list of floats
When the variable takes a value, the contours of the quantiles
defined by the variable are plotted.
save_file : string
When this variable is not None, it saves the plot in the current repertory.
marker : string
Defines the scatterplots markers according to Matplotlib formalism.
subplot_size : float
Defines the size of each subplot in inches.
Returns
-------
copulogram : matplotlib.axes.Axes
The matplotlib axes containing the plot.
"""
df = self.data.copy(deep=True)
df_numeric = df._get_numeric_data()
plotted_cols = np.array(df_numeric.columns)
plotted_cols = np.delete(plotted_cols, np.where(plotted_cols == hue)).tolist()
if hue is None:
copulogram = sns.PairGrid(df_numeric, height=subplot_size)
if kde_on_marginals:
copulogram.map_diag(sns.kdeplot, color=color)
else:
copulogram.map_diag(sns.histplot, color=color, bins=20)

if quantile_contour_levels is None:
copulogram.map_lower(plt.scatter, color=color, alpha=alpha, marker=marker)
temp = (df_numeric.rank() / self.N)
xmaxs = df_numeric.max().values
xmins = df_numeric.min().values
copulogram.data = temp * (xmaxs - xmins) + xmins
copulogram = copulogram.map_upper(plt.scatter, color=color, alpha=alpha, marker=marker)
else:
copulogram.map_lower(sns.kdeplot, levels=quantile_contour_levels, color=color)
temp = df_numeric[plotted_cols].rank() / self.N
xmaxs = df_numeric[plotted_cols].max().values
xmins = df_numeric[plotted_cols].min().values
copulogram.data = temp * (xmaxs - xmins) + xmins
copulogram.map_upper(sns.kdeplot, levels=quantile_contour_levels, color=color)
else :
if hue_palette is None:
if df[hue].dtype =='O':
hue_palette = 'tab10'
else:
hue_palette='viridis'
copulogram = sns.PairGrid(df[plotted_cols + [hue]], hue=hue, palette=hue_palette, height=subplot_size)
if kde_on_marginals:
if df[hue].dtype =='O':
copulogram.map_diag(sns.kdeplot, color=".3")
else:
copulogram.map_diag(sns.kdeplot, hue=None, color=".3")
else:
if df[hue].dtype =='O':
copulogram.map_diag(sns.histplot, color=".3", bins=20)
else:
copulogram.map_diag(sns.histplot, hue=None, color=".3", bins=20)

if quantile_contour_levels is None:
copulogram.map_lower(sns.scatterplot, alpha=alpha, marker=marker)
temp = df_numeric[plotted_cols].rank() / self.N
xmaxs = df_numeric[plotted_cols].max().values
xmins = df_numeric[plotted_cols].min().values
temp = temp * (xmaxs - xmins) + xmins
temp[hue] = df[hue]
copulogram.data = temp
copulogram = copulogram.map_upper(sns.scatterplot, alpha=alpha, marker=marker)
else:
copulogram.map_lower(sns.kdeplot, levels=quantile_contour_levels)
temp = df_numeric[plotted_cols].rank() / self.N
xmaxs = df_numeric[plotted_cols].max().values
xmins = df_numeric[plotted_cols].min().values
temp = temp * (xmaxs - xmins) + xmins
temp[hue] = df[hue]
copulogram.data = temp
copulogram = copulogram.map_upper(sns.kdeplot, levels=quantile_contour_levels)

copulogram.add_legend(title=hue)

if save_file is not None:
plt.savefig(save_file, dpi=300, bbox_inches='tight')
return copulogram

def draw_interactive(self,
color='navy',
alpha=1.,
hue=None,
hue_palette=None,
marker='o',
subplot_size=5
):
"""
Draws the copulogram plot with a static or interactive option.
Parameters
----------
TBD
Returns
-------
copulogram : TBD
"""

df = self.data.copy(deep=True)
df_numeric = df._get_numeric_data()
rdf = df_numeric.rank()

plotted_cols = np.array(df_numeric.columns)
plotted_cols = np.delete(plotted_cols, np.where(plotted_cols == hue)).tolist()
dim = len(plotted_cols)

if hue is not None:
df_numeric[hue] = df[hue]
rdf[hue] = df[hue]
source = ColumnDataSource(data=df_numeric)
rsource = ColumnDataSource(data=rdf)

plot_list = []
for i, (y, x) in enumerate(product(plotted_cols, plotted_cols)):
# Scatter plot
if hue is None:
scatter_color = color
else:
if df[hue].dtype =='O':
if hue_palette is None:
hue_palette = "Category10_3"
scatter_color = factor_cmap(hue, hue_palette, sorted(df[hue].unique()))
else:
if hue_palette is None:
hue_palette="Spectral6"
scatter_color = linear_cmap(hue, hue_palette, low=df[hue].min(), high=df[hue].max())


circle = Circle(x=x, y=y, fill_alpha=alpha, size=5, line_color=None,
fill_color=scatter_color)
# Lower triangle
if (i%dim) <= (i//dim): # Column index smaller than row index (i.e., lower triangle)
# Define one empty plot
p = Plot(x_range=DataRange1d(bounds=(df[x].min(), df[x].max())), y_range=DataRange1d(bounds=(df[y].min(), df[y].max())),
background_fill_color="#fafafa",
border_fill_color="white", width=200, height=200, min_border=subplot_size)
r = p.add_glyph(source, circle)
# Delete diagonal plot
if (i%dim) == (i//dim):
r.visible = False
p.grid.grid_line_color = None
# Upper triangle
elif (i%dim) > (i//dim):
# Define one empty plot
p = Plot(x_range=DataRange1d(bounds=(rdf[x].min(), rdf[x].max())), y_range=DataRange1d(bounds=(rdf[y].min(), rdf[y].max())),
background_fill_color="#fafafa",
border_fill_color="white", width=200, height=200, min_border=5)
r = p.add_glyph(rsource, circle)
p.x_range.renderers.append(r)
p.y_range.renderers.append(r)
# First column ticks
if i % dim == 0:
p.min_border_left = p.min_border + 4
p.width += 40
yaxis = LinearAxis(axis_label=y)
yaxis.major_label_orientation = "vertical"
p.add_layout(yaxis, "left")
yticker = yaxis.ticker
else:
yticker = BasicTicker()
p.add_layout(Grid(dimension=1, ticker=yticker))

# Last row ticks
if i >= dim * (dim-1):
p.min_border_bottom = p.min_border + 40
p.height += 40
xaxis = LinearAxis(axis_label=x)
p.add_layout(xaxis, "below")
xticker = xaxis.ticker
else:
xticker = BasicTicker()
p.add_layout(Grid(dimension=0, ticker=xticker))
p.add_tools(LassoSelectTool(), ResetTool())
plot_list.append(p)

grid_plot = gridplot(plot_list, ncols=dim)
return grid_plot

##TODO:
# Add docstrings
# Remove the misleading yticks from the top left plot? Ideally we should add the index ticks on the top left of the plot
# Add color bar on the interactive method using : https://docs.bokeh.org/en/latest/docs/examples/basic/data/color_mappers.html


#%%
if __name__ == "__main__":
#data = sns.load_dataset('iris')
import pandas as pd
data = pd.read_csv("../examples/data/wind_waves_ANEMOC_1H.csv", index_col=0)
data = data.iloc[:1000]

output = data["U_hub (m/s)"] ** 3 * ((np.pi / 180) * data["θ_wind (deg)"]) + (data["Hs (m)"] ** 2 * data["Tp (s)"]) / ((np.pi / 180) * data["θ_wave_new (deg)"])
data['output'] = np.log10(output)

copulogram = Copulogram(data)
copulogram.draw_interactive(hue="output")
# %%
Binary file modified examples/figures/iris1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/figures/iris2.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/figures/iris_contours.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/figures/ishigami.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/figures/wind_waves.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/figures/wind_waves_contours.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/figures/wind_waves_simulated_100.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed examples/figures/wind_waves_simulated_10000.jpg
Binary file not shown.
Binary file removed examples/figures/wind_waves_simulated_20.jpg
Binary file not shown.
Binary file removed examples/figures/wind_waves_simulated_200.jpg
Binary file not shown.
Binary file removed examples/figures/wind_waves_simulated_40.jpg
Binary file not shown.
Binary file removed examples/figures/wind_waves_simulated_60.jpg
Binary file not shown.
Binary file removed examples/figures/wind_waves_simulated_8.jpg
Binary file not shown.
Binary file removed examples/figures/wind_waves_simulated_80.jpg
Binary file not shown.
Binary file added examples/figures/wind_waves_threshold.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/figures/wind_waves_woutput.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
61 changes: 0 additions & 61 deletions examples/iris_dataset.html

This file was deleted.

5 changes: 1 addition & 4 deletions examples/iris_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,9 @@

import seaborn as sns
import copulogram as cp
from bokeh.io import show

data = sns.load_dataset('iris')
copulogram = cp.Copulogram(data)
copulogram.draw(save_file="figures/iris1.jpg")
copulogram.draw(alpha=0.8, hue='species', kde_on_marginals=False, save_file="figures/iris2.jpg")
copulogram.draw(hue='species', quantile_contour_levels=[0.2, 0.4, 0.6, 0.8], save_file="figures/iris_contours.jpg")
interactive_plot = copulogram.draw_interactive(hue="species")
show(interactive_plot)
copulogram.draw(hue='species', quantile_contour_levels=[0.2, 0.4, 0.6, 0.8], save_file="figures/iris_contours.jpg")
2 changes: 1 addition & 1 deletion examples/ishigami.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
data = pd.DataFrame(np.array(X[:plotting_size]), columns=list(X.getDescription()))
data['Y'] = np.array(Y[:plotting_size])
copulogram = cp.Copulogram(data)
copulogram.draw(color='C7', marker='.', alpha=0.5, save_file="figures/ishigami.jpg")
copulogram.draw(color='C7', marker='.', alpha=0.5, save_file="figures/ishigami.jpg")
52 changes: 0 additions & 52 deletions examples/wind_waves_dataset.html

This file was deleted.

19 changes: 14 additions & 5 deletions examples/wind_waves_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,18 @@
output = data["$U$"] ** 3 * ((np.pi / 180) * data["$\\theta_{wind}$"]) + (data["$H_s$"] ** 2 * data["$T_p$"]) / ((np.pi / 180) * data["$\\theta_{wave}$"])
data['output'] = np.log10(output)
data.columns = ["$\\theta_{wind}$", "$U$", "$\\theta_{wave}$", "$H_s$", "$T_p$", "output"]
# Draw static copulogram on data

# Draw copulogram on data
copulogram = cp.Copulogram(data, latex=True)
copulogram.draw(hue="output", hue_colorbar="plasma", alpha=alpha, marker="o", kde_on_marginals=False, save_file="figures/wind_waves_woutput.jpg")

# Draw threshold event
import seaborn as sns
from matplotlib.colors import to_rgba

threshold = 4.
data['is_failed'] = "False"
data.loc[data[data['output'] > threshold].index, 'is_failed'] = "True"
color_palette = sns.color_palette([to_rgba('C0', 0.2), to_rgba('C1', 0.9)], as_cmap=True)
copulogram = cp.Copulogram(data, latex=True)
copulogram.draw(color="C7", hue="output", alpha=alpha, kde_on_marginals=False, save_file="figures/wind_waves_woutput.jpg", marker="o")
# Draw interactive copulogram on data
#interactive_plot = copulogram.draw_interactive(color="C7", hue="output", alpha=0.8)
#show(interactive_plot)
copulogram.draw(hue='is_failed', hue_colorbar=color_palette, save_file="figures/wind_waves_threshold.jpg")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"seaborn",
"matplotlib",
"openturns>=1.20",
"bokeh>=3.2",
#"bokeh>=3.2",
],
include_package_data=True,
classifiers=[
Expand Down

1 comment on commit 9387d63

@efekhari27
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the interactive development using Bokeh was dropped during this commit.

Please sign in to comment.