-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpys_sql.py
48 lines (41 loc) · 2.12 KB
/
pys_sql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Sourced from https://github.com/LucaCanali/Miscellaneous/blob/master/Pyspark_SQL_Magic_Jupyter/IPython_Pyspark_SQL_Magic.ipynb
# IPython magic functions to use with Pyspark and Spark SQL
# The following code is intended as examples of shorcuts to simplify the use of SQL in pyspark
# The defined functions are:
#
# %sql <statement> - return a Spark DataFrame for lazy evaluation of the SQL
# %sql_show <statement> - run the SQL statement and show max_show_lines (50) lines
# %sql_display <statement> - run the SQL statement and display the results using a HTML table
# - this is implemented passing via Pandas and displays up to max_show_lines (50)
# %sql_explain <statement> - display the execution plan of the SQL statement
#
# Use: %<magic> for line magic or %%<magic> for cell magic.
#
# Author: Luca.Canali@cern.ch
# September 2016
#
from IPython.core.magic import register_line_cell_magic
# Configuration parameters
max_show_lines = 50 # Limit on the number of lines to show with %sql_show and %sql_display
detailed_explain = True # Set to False if you want to see only the physical plan when running explain
@register_line_cell_magic
def sql(line, cell=None):
"Return a Spark DataFrame for lazy evaluation of the sql. Use: %sql or %%sql"
val = cell if cell is not None else line
return spark.sql(val)
@register_line_cell_magic
def sql_show(line, cell=None):
"Execute sql and show the first max_show_lines lines. Use: %sql_show or %%sql_show"
val = cell if cell is not None else line
return spark.sql(val).show(max_show_lines)
@register_line_cell_magic
def sql_display(line, cell=None):
"""Execute sql and convert results to Pandas DataFrame for pretty display or further processing.
Use: %sql_display or %%sql_display"""
val = cell if cell is not None else line
return spark.sql(val).limit(max_show_lines).toPandas()
@register_line_cell_magic
def sql_explain(line, cell=None):
"Display the execution plan of the sql. Use: %sql_explain or %%sql_explain"
val = cell if cell is not None else line
return spark.sql(val).explain(detailed_explain)