Python的Bokeh包是一个用于数据可视化的强大工具。它提供了一种简单而灵活的方式来创建各种交互式图形,并且能够在Web浏览器中进行展示。本文将详细介绍Bokeh包的使用。
基本介绍
Bokeh是一个开源的Python库,用于创建交互式的数据可视化图形。它可以直接在Web浏览器中显示,支持丰富的图形类型,包括折线图、散点图、柱状图、热力图等。Bokeh的设计理念是将数据和图形分离。
Bokeh项目在GitHub中标记有18k星,是Python中最受欢迎的画图包之一。
实现步骤
安装
首先,我们需要安装Bokeh包。可以使用pip来进行安装。
pip install bokeh
引入必要的库和模块
在创建Bokeh图形之前,我们需要引入必要的库和模块。可以使用import语句来引入Bokeh库和其他需要使用的模块。
创建基本图形
Bokeh提供了多种类型的图形可供选择。可以通过调用相应的函数来创建图形,并设置相应的属性。例如,可以使用figure()函数创建一个新的绘图对象,然后使用line()函数在图形上绘制折线图。
设置图形属性
在创建图形之后,可以通过设置图形属性来自定义图形的外观。例如,可以设置图形的标题、坐标轴标签、图例等。
添加工具栏和交互功能
Bokeh提供了丰富的工具和交互功能,可以让用户与图形进行互动。可以通过调用相应的函数来添加工具栏和交互功能,例如缩放、平移、选取等。
显示图形
最后,需要将创建好的图形对象进行显示。可以通过调用show()函数来显示图形,并在浏览器查看。
完整代码样例
# 导入所需的库和模块
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
# 准备数据
x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 7, 3]
# 创建绘图对象
p = figure(title="折线图示例", x_axis_label='X轴', y_axis_label='Y轴', width=800, height=400)
# 绘制数据
p.line(x, y, legend_label='数据', line_width=2)
# 设置图形属性
p.title.text_font_size = "20pt"
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
# 添加工具栏和交互功能
p.toolbar.active_drag = None
# 显示图形
output_notebook()
show(p)
更多案例举例
多种配色散点图
from bokeh.plotting import figure, show
from bokeh.sampledata.penguins import data
from bokeh.transform import factor_cmap, factor_mark
SPECIES = sorted(data.species.unique())
MARKERS = ['hex', 'circle_x', 'triangle']
p = figure(title = "Penguin size", background_fill_color="#fafafa")
p.xaxis.axis_label = 'Flipper Length (mm)'
p.yaxis.axis_label = 'Body Mass (g)'
p.scatter("flipper_length_mm", "body_mass_g", source=data,
legend_group="species", fill_alpha=0.4, size=12,
marker=factor_mark('species', MARKERS, SPECIES),
color=factor_cmap('species', 'Category10_3', SPECIES))
p.legend.location = "top_left"
p.legend.title = "Species"
show(p)
条形图
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.palettes import MediumContrast3
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']
data = {'fruits' : fruits,
'2015' : [2, 1, 4, 3, 2, 4],
'2016' : [5, 3, 3, 2, 4, 6],
'2017' : [3, 2, 4, 4, 5, 3]}
# this creates [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ("Pears", "2015), ... ]
x = [ (fruit, year) for fruit in fruits for year in years ]
counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack
source = ColumnDataSource(data=dict(x=x, counts=counts))
p = figure(x_range=FactorRange(*x), height=350, title="Fruit Counts by Year",
toolbar_location=None, tools="",output_backend="svg")
p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
fill_color=factor_cmap('x', palette=MediumContrast3, factors=years, start=1, end=2))
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
show(p)
分类热力图
from math import pi
import pandas as pd
from bokeh.models import BasicTicker, PrintfTickFormatter
from bokeh.plotting import figure, show
from bokeh.sampledata.unemployment1948 import data
from bokeh.transform import linear_cmap
data['Year'] = data['Year'].astype(str)
data = data.set_index('Year')
data.drop('Annual', axis=1, inplace=True)
data.columns.name = 'Month'
years = list(data.index)
months = list(reversed(data.columns))
# reshape to 1D array or rates with a month and year for each row.
df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()
# this is the colormap from the original NYTimes plot
colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
p = figure(title=f"US Unemployment ({years[0]} - {years[-1]})",
x_range=years, y_range=months,
x_axis_location="above", width=900, height=400,
tools=TOOLS, toolbar_location='below',
tooltips=[('date', '@Month @Year'), ('rate', '@rate%')])
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "7px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3
r = p.rect(x="Year", y="Month", width=1, height=1, source=df,
fill_color=linear_cmap("rate", colors, low=df.rate.min(), high=df.rate.max()),
line_color=None)
p.add_layout(r.construct_color_bar(
major_label_text_font_size="7px",
ticker=BasicTicker(desired_num_ticks=len(colors)),
formatter=PrintfTickFormatter(format="%d%%"),
label_standoff=6,
border_line_color=None,
padding=5,
), 'right')
show(p)
地图
这里地图使用的是openstreetmap,有时候需要开启科学上网才能显示。
import numpy as np
from bokeh.layouts import layout
from bokeh.models.widgets import Div
from bokeh.plotting import figure, show
# helper function for coordinate conversion between lat/lon in decimal degrees to web mercator
def lnglat_to_meters(longitude: float, latitude: float) -> tuple[float, float]:
""" Projects the given (longitude, latitude) values into Web Mercator
coordinates (meters East of Greenwich and meters North of the Equator).
"""
origin_shift = np.pi * 6378137
easting = longitude * origin_shift / 180.0
northing = np.log(np.tan((90 + latitude) * np.pi / 360.0)) * origin_shift / np.pi
return (easting, northing)
description = Div(text="""tile_demo.py
- Bokeh tile provider examples. Linked Pan and Zoom on all maps!""")
# Lady Bird Lake, Austin Texas
lat = 30.268801
lon = -97.763347
EN = lnglat_to_meters(lon, lat)
dE = 1000 # (m) Easting plus-and-minus from map center
dN = 1000 # (m) Northing plus-and-minus from map center
x_range = (EN[0]-dE, EN[0]+dE) # (m) Easting x_lo, x_hi
y_range = (EN[1]-dN, EN[1]+dN) # (m) Northing y_lo, y_hi
providers = [
"CartoDB Positron",
"CartoDB Positron retina",
"Stamen Terrain",
"Stamen Terrain retina",
"Stamen Toner",
"Stamen Toner Background",
"Stamen Toner Labels",
"OpenStreetMap Mapnik",
"Esri World Imagery",
]
plots = []
for i, vendor_name in enumerate(providers):
plot = figure(
x_range=x_range, y_range=y_range,
x_axis_type="mercator", y_axis_type="mercator",
height=250, width=300,
title=vendor_name,
toolbar_location=None, active_scroll="wheel_zoom",
)
plot.add_tile(vendor_name)
plots.append(plot)
layout = layout([
[description],
plots[0:3],
plots[3:6],
plots[6:9],
])
show(layout)
元素周期表
from bokeh.plotting import figure, show
from bokeh.sampledata.periodic_table import elements
from bokeh.transform import dodge, factor_cmap
periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]
df = elements.copy()
df["atomic mass"] = df["atomic mass"].astype(str)
df["group"] = df["group"].astype(str)
df["period"] = [periods[x-1] for x in df.period]
df = df[df.group != "-"]
df = df[df.symbol != "Lr"]
df = df[df.symbol != "Lu"]
cmap = {
"alkali metal" : "#a6cee3",
"alkaline earth metal" : "#1f78b4",
"metal" : "#d93b43",
"halogen" : "#999d9a",
"metalloid" : "#e08d49",
"noble gas" : "#eaeaea",
"nonmetal" : "#f1d4Af",
"transition metal" : "#599d7A",
}
TOOLTIPS = [
("Name", "@name"),
("Atomic number", "@{atomic number}"),
("Atomic mass", "@{atomic mass}"),
("Type", "@metal"),
("CPK color", "$color[hex, swatch]:CPK"),
("Electronic configuration", "@{electronic configuration}"),
]
p = figure(title="Periodic Table (omitting LA and AC Series)", width=1000, height=450,
x_range=groups, y_range=list(reversed(periods)),
tools="hover", toolbar_location=None, tooltips=TOOLTIPS)
r = p.rect("group", "period", 0.95, 0.95, source=df, fill_alpha=0.6, legend_field="metal",
color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys())))
text_props = dict(source=df, text_align="left", text_baseline="middle")
x = dodge("group", -0.4, range=p.x_range)
p.text(x=x, y="period", text="symbol", text_font_style="bold", **text_props)
p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number",
text_font_size="11px", **text_props)
p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name",
text_font_size="7px", **text_props)
p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass",
text_font_size="7px", **text_props)
p.text(x=["3", "3"], y=["VI", "VII"], text=["LA", "AC"], text_align="center", text_baseline="middle")
p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location ="top_center"
p.hover.renderers = [r] # only hover element boxes
show(p)
饼图
from math import pi
from bokeh.io import show
from bokeh.models import (AnnularWedge, ColumnDataSource,
Legend, LegendItem, Plot, Range1d)
from bokeh.sampledata.browsers import browsers_nov_2013 as df
xdr = Range1d(start=-2, end=2)
ydr = Range1d(start=-2, end=2)
plot = Plot(x_range=xdr, y_range=ydr)
plot.title.text = "Web browser market share (November 2013)"
plot.toolbar_location = None
colors = {
"Chrome": "seagreen",
"Firefox": "tomato",
"Safari": "orchid",
"Opera": "firebrick",
"IE": "skyblue",
"Other": "lightgray",
}
aggregated = df.groupby("Browser").sum(numeric_only=True)
selected = aggregated[aggregated.Share >= 1].copy()
selected.loc["Other"] = aggregated[aggregated.Share < 1].sum()
browsers = selected.index.tolist()
angles = selected.Share.map(lambda x: 2*pi*(x/100)).cumsum().tolist()
browsers_source = ColumnDataSource(dict(
start = [0] + angles[:-1],
end = angles,
colors = [colors[browser] for browser in browsers],
))
glyph = AnnularWedge(x=0, y=0, inner_radius=0.9, outer_radius=1.8,
start_angle="start", end_angle="end",
line_color="white", line_width=3, fill_color="colors")
r= plot.add_glyph(browsers_source, glyph)
legend = Legend(location="center")
for i, name in enumerate(colors):
legend.items.append(LegendItem(label=name, renderers=[r], index=i))
plot.add_layout(legend, "center")
show(plot)
饼图与柱状图合成
from numpy import arange, array, cos, log, pi, sin, sqrt
from bokeh.models import ColumnDataSource, Legend, LegendItem
from bokeh.plotting import figure, show
from bokeh.sampledata.antibiotics import data as df
DRUGS = ("penicillin", "streptomycin", "neomycin")
COLORS = ("#0d3362", "#c64737", "#000000")
GRAM = dict([
("negative", "#e69584"),
("positive", "#aeaeb8"),
])
big_angle = 2 * pi / (len(df) + 1)
angles = pi/2 - 3*big_angle/2 - array(df.index) * big_angle
df["start"] = angles
df["end"] = angles + big_angle
df["colors"] = [GRAM[gram] for gram in df.gram]
source = ColumnDataSource(df)
# Burtin's unusual inverted radial sqrt-log scale
micmin = sqrt(log(.001*1E4))
micmax = sqrt(log(1000*1E4))
def scale(mic):
return - sqrt(log(mic * 1E4)) + (micmin + micmax)
p = figure(
width=800, height=800, title=None, tools="", toolbar_location=None,
x_axis_type=None, y_axis_type=None, match_aspect=True,
min_border=0, outline_line_color="black", background_fill_color="#f0e1d2",
)
# large wedges for bacteria
br = p.annular_wedge(0, 0, micmax, micmin, "start", "end", fill_color="colors", line_color="#f0e1d2", source=source)
# circular axes and labels
radii = scale(10.0 ** arange(-3, 4))
p.circle(0, 0, radius=radii, fill_color=None, line_color="#f0e1d2")
p.text(
0, radii, ["0.001", "0.01", "0.1", "1", "10", "100", "1000"],
text_font_size="12px", anchor="center",
)
# small wedges for drugs
small_angle = big_angle / 7
for i, drug in enumerate(DRUGS):
start = angles+(5-2*i)*small_angle
end = angles+(6-2*i)*small_angle
p.annular_wedge(
0, 0, micmin, scale(df[drug]), start, end,
color=COLORS[i], line_color=None, legend_label=drug,
)
# bacteria labels
r = radii[0] * 1.1
xr = r * cos(angles + big_angle/2)
yr = r * sin(angles + big_angle/2)
p.text(
xr, yr, ["\n".join(x.split()) for x in df.bacteria],
text_font_size="13px", anchor="center",
)
p.legend.location = "center"
p.legend.background_fill_alpha = 0
p.legend.glyph_width = 45
p.legend.glyph_height = 20
p.x_range.range_padding = 0.2
p.y_range.range_padding = 0.2
p.grid.grid_line_color = None
legend = Legend(items=[
LegendItem(label="Gram-positive", renderers=[br], index=10),
LegendItem(label="Gram-negative", renderers=[br], index=0),
], location="bottom", orientation="horizontal", background_fill_alpha=0)
p.add_layout(legend, 'center')
show(p)
箱型图
import pandas as pd
from bokeh.models import ColumnDataSource, Whisker
from bokeh.plotting import figure, show
from bokeh.sampledata.autompg2 import autompg2
from bokeh.transform import factor_cmap
df = autompg2[["class", "hwy"]].rename(columns={"class": "kind"})
kinds = df.kind.unique()
# compute quantiles
qs = df.groupby("kind").hwy.quantile([0.25, 0.5, 0.75])
qs = qs.unstack().reset_index()
qs.columns = ["kind", "q1", "q2", "q3"]
df = pd.merge(df, qs, on="kind", how="left")
# compute IQR outlier bounds
iqr = df.q3 - df.q1
df["upper"] = df.q3 + 1.5*iqr
df["lower"] = df.q1 - 1.5*iqr
source = ColumnDataSource(df)
p = figure(x_range=kinds, tools="", toolbar_location=None,
title="Highway MPG distribution by vehicle class",
background_fill_color="#eaefef", y_axis_label="MPG")
# outlier range
whisker = Whisker(base="kind", upper="upper", lower="lower", source=source)
whisker.upper_head.size = whisker.lower_head.size = 20
p.add_layout(whisker)
# quantile boxes
cmap = factor_cmap("kind", "TolRainbow7", kinds)
p.vbar("kind", 0.7, "q2", "q3", source=source, color=cmap, line_color="black")
p.vbar("kind", 0.7, "q1", "q2", source=source, color=cmap, line_color="black")
# outliers
outliers = df[~df.hwy.between(df.lower, df.upper)]
p.scatter("kind", "hwy", source=outliers, size=6, color="black", alpha=0.3)
p.xgrid.grid_line_color = None
p.axis.major_label_text_font_size="14px"
p.axis.axis_label_text_font_size="12px"
show(p)
本文详细介绍了Python的Bokeh包的使用方法。通过引入库和模块,创建基本图形,并设置相关属性,加上工具栏和交互功能,最终显示交互式数据可视化图形。
Bokeh提供了丰富的图形类型和属性设置,感兴趣可以去官网查阅文档。
Python及AI交流学习群开放:感兴趣的朋友可扫码入群。入群后都是朋友,互相学习、共同进步(禁止广告投放)。