# make figures better:
font = {'weight':'normal','size':20}
matplotlib.rc('font', **font)
matplotlib.rc('figure', figsize=(9.0, 6.0))
matplotlib.rc('xtick.major', pad=10) # xticks too close to border!
import warnings
warnings.filterwarnings('ignore')
import random, math
import numpy as np
import scipy, scipy.stats
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
nicered = "#E6072A"
niceblu = "#424FA4"
nicegrn = "#6DC048"
James Bagrow, james.bagrow@uvm.edu, http://bagrow.com
grading criteria for midterm project
log-transforming data
Improving figures through iteration
Maps (from last time)
Working with colors
A lot of you have projects that can benefit from drawing maps.
Let's give a brief overview of how to do some mapping in python.
Drawing maps is surprisingly hard!
You need to take your map data (collections of latitude,longitude pairs) and project them from the sphere (actually the earth's shape is called a "geoid") onto the 2D plane.
There is a companion package to matplotlib called Basemap
Here's an example:
from mpl_toolkits.basemap import Basemap
# set up orthographic map projection with
# perspective of satellite looking down at 50N, 100W.
# use low resolution coastlines.
map = Basemap(projection='ortho',lat_0=40,lon_0=-100,resolution='l')
# draw coastlines, country boundaries, fill continents.
map.drawcoastlines(linewidth=0.25 )
map.drawcountries( linewidth=0.25 )
map.fillcontinents(color='coral',lake_color='aqua')
# draw the edge of the map projection region (the projection limb)
map.drawmapboundary(fill_color='aqua')
# draw lat/lon grid lines every 30 degrees.
map.drawmeridians(np.arange(0,360,30))
map.drawparallels(np.arange(-90,90,30))
# make up some data on a regular lat/lon grid.
nlats = 73; nlons = 145; delta = 2.*np.pi/(nlons-1)
lats = (0.5*np.pi-delta*np.indices((nlats,nlons))[0,:,:])
lons = (delta*np.indices((nlats,nlons))[1,:,:])
wave = 0.75*(np.sin(2.*lats)**8*np.cos(4.*lons))
mean = 0.5*np.cos(2.*lats)*((np.sin(2.*lats))**2 + 2.)
# compute native map projection coordinates of lat/lon grid.
x, y = map(lons*180./np.pi, lats*180./np.pi)
# contour data over the map.
cs = map.contour(x,y,wave+mean,15,linewidths=1.5)
plt.show()
The core of Basemap is the line:
map = Basemap(projection='ortho',lat_0=40,lon_0=-100,resolution='l')
This defined the map
object which lets us translate from latitudes and longitudes to cartesian (xy) coordinates.
This translation means putting a 3D sphere onto a 2D plane. This is called projection and there are many ways to do it.
All map projects must introduce some degree of distortion, especially when they try to show the entire globe. For example, the "mercator" projection used to be very common:
Mercator introduces crazy distortion at the north and south poles. For example, Greenland looks to be the same size as Africa. In fact, Africa is 14 times bigger.
A different projection, the Winkel Tripel, is better:
This is the current choice for the National Geographic Society.
Basemap incorporates the math for many projections. The previous example used the "orthographic" projection. This is very accurate because it only shows the visible portion of the globe.
Here's another example showing US cities and their populations.
This time we're going to draw only the US using yet another type of project calle the Lambert conformal conic projection (LCC). This project looks strange when showing the entire earth, but is relatively accurate over a small range, such as the continnental US.
The call to Basemap()
requires different input variables depending on the type of projection being used. For example, the LCC projection requires (lat,lon) pairs defining the lower left (ll) and upper right of the "figure".
#plt.close('all')
# Data of city location (logitude,latitude) and population
pop={'New York':8244910,'Los Angeles':3819702,
'Chicago':2707120,'Houston':2145146,
'Philadelphia':1536471,'Pheonix':1469471,
'San Antonio':1359758,'San Diego':1326179,
'Dallas':1223229,'San Jose':967487,
'Jacksonville':827908,'Indianapolis':827908,
'Austin':820611,'San Francisco':812826,
'Columbus':797434} # dictionary of the populations of each city
lat={'New York':40.6643,'Los Angeles':34.0194,
'Chicago':41.8376,'Houston':29.7805,
'Philadelphia':40.0094,'Pheonix':33.5722,
'San Antonio':29.4724,'San Diego':32.8153,
'Dallas':32.7942,'San Jose':37.2969,
'Jacksonville':30.3370,'Indianapolis':39.7767,
'Austin':30.3072,'San Francisco':37.7750,
'Columbus':39.9848} # dictionary of the latitudes of each city
lon={'New York':73.9385,'Los Angeles':118.4108,
'Chicago':87.6818,'Houston':95.3863,
'Philadelphia':75.1333,'Pheonix':112.0880,
'San Antonio':98.5251,'San Diego':117.1350,
'Dallas':96.7655,'San Jose':121.8193,
'Jacksonville':81.6613,'Indianapolis':86.1459,
'Austin':97.7560,'San Francisco':122.4183,
'Columbus':82.9850} # dictionary of the longitudes of each city
# build the map:
m = Basemap(llcrnrlon=-119,llcrnrlat=22, # define map corners
urcrnrlon=-64, urcrnrlat=49,
projection='lcc', # lambert conformal conic project
lat_1=33,lat_2=45,lon_0=-95,resolution='c')
# draw the US:
m.drawcoastlines()
m.drawstates()
m.drawcountries()
# draw the cities as translucent circles:
max_size=2000
for city in lon.keys():
x, y = m(-lon[city],lat[city])
m.scatter(x,y,max_size*pop[city]/pop['New York'],
marker='o',color=niceblu, alpha=0.5)
plt.show()
The shortest path between two points on a globe is not a line but a segment of a circle known as a great circle.
# create new figure, axes instances.
fig=plt.figure()
ax=fig.add_axes([0.1,0.1,0.8,0.8])
# setup mercator map projection.
m = Basemap(llcrnrlon=-100.,llcrnrlat=00.,
urcrnrlon=10., urcrnrlat=60.,
resolution='l',projection='merc'
)
# nylat, nylon are lat/lon of New York
nylat = 40.78; nylon = -73.98
# lonlat, lonlon are lat/lon of London.
lonlat = 51.53; lonlon = 0.08
# draw great circle route between NY and London
m.drawgreatcircle( nylon, nylat,
lonlon,lonlat,
linewidth=5,color=nicegrn, solid_capstyle="round" )
# draw great circle route between Miami and Madrid
m.drawgreatcircle( -80.28, 25.82,
-3.7025600, 40.4165000,
linewidth=5,color=nicered, solid_capstyle="round" )
m.drawcoastlines()
m.drawcountries()
#m.drawparallels(np.arange(10,90,20), labels=[1,1,0,1])
#m.drawmeridians(np.arange(-180,180,30),labels=[1,1,0,1])
m.drawmapboundary(fill_color="#F0F8FF")
m.fillcontinents(color="#DDDDDD",lake_color="#F0F8FF")
#plt.gca().axis("off")
plt.show()
These arcs can be used to visualize travel or communication data, for example.
Here's another big example called a choroplenth. It's a map of the population density of the US by state. The population density is represented by the color of the state's polygon.
from matplotlib.collections import LineCollection
from matplotlib.colors import rgb2hex
# Lambert Conformal map of lower 48 states.
m = Basemap(llcrnrlon=-119,llcrnrlat=22,
urcrnrlon=-64, urcrnrlat=49,
projection='lcc', lat_1=33,lat_2=45,lon_0=-95)
# laod state boundaries.
# data from U.S Census Bureau
# http://www.census.gov/geo/www/cob/st1990.html
shp_info = m.readshapefile('gz_2010_us_040_00_500k','states',drawbounds=False)
# This loads three files:
# gz_2010_us_040_00_500k.dbf
# gz_2010_us_040_00_500k.shp
# gz_2010_us_040_00_500k.shx
# population density by state from
# http://en.wikipedia.org/wiki/List_of_U.S._states_by_population_density
popdensity = {
'New Jersey': 438.00,
'Rhode Island': 387.35,
'Massachusetts': 312.68,
'Connecticut': 271.40,
'Maryland': 209.23,
'New York': 155.18,
'Delaware': 154.87,
'Florida': 114.43,
'Ohio': 107.05,
'Pennsylvania': 105.80,
'Illinois': 86.27,
'California': 83.85,
'Hawaii': 72.83,
'Virginia': 69.03,
'Michigan': 67.55,
'Indiana': 65.46,
'North Carolina': 63.80,
'Georgia': 54.59,
'Tennessee': 53.29,
'New Hampshire': 53.20,
'South Carolina': 51.45,
'Louisiana': 39.61,
'Kentucky': 39.28,
'Wisconsin': 38.13,
'Washington': 34.20,
'Alabama': 33.84,
'Missouri': 31.36,
'Texas': 30.75,
'West Virginia': 29.00,
'Vermont': 25.41,
'Minnesota': 23.86,
'Mississippi': 23.42,
'Iowa': 20.22,
'Arkansas': 19.82,
'Oklahoma': 19.40,
'Arizona': 17.43,
'Colorado': 16.01,
'Maine': 15.95,
'Oregon': 13.76,
'Kansas': 12.69,
'Utah': 10.50,
'Nebraska': 8.60,
'Nevada': 7.03,
'Idaho': 6.04,
'New Mexico': 5.79,
'South Dakota': 3.84,
'North Dakota': 3.59,
'Montana': 2.39,
'Wyoming': 1.96,
'Alaska': 0.42}
# choose a color for each state based on population density.
colors={}
statenames=[]
cmap = plt.cm.Blues_r # use 'hot' colormap
vmin = 0; vmax = 450 # set range.
sm = plt.cm.ScalarMappable(cmap=cmap,
norm=plt.normalize(vmin=vmin, vmax=vmax))
for shapedict in m.states_info:
statename = shapedict['NAME']
try:
pop = popdensity[statename]
except KeyError:
pop = 0.0
# calling colormap with value between 0 and 1 returns
# rgba value. Invert color range (hot colors are high
# population), take sqrt root to spread out colors more.
colors[statename] = cmap((pop-vmin)/(vmax-vmin))[:3]
statenames.append(statename)
# cycle through state names, color each one.
for nshape,seg in enumerate(m.states):
xx,yy = zip(*seg)
if statenames[nshape] != 'District of Columbia' and \
statenames[nshape] != "Puerto Rico":
color = rgb2hex(colors[statenames[nshape]])
plt.fill(xx,yy,color,edgecolor=color)
# draw meridians and parallels.
m.drawparallels(np.arange(25,65,20), labels=[0,0,0,0],
zorder=-1,color="w")
m.drawmeridians(np.arange(-120,-40,20),labels=[0,0,0,0],
zorder=-1,color="w")
# set up colorbar:
mm = plt.cm.ScalarMappable(cmap=cmap)
mm.set_array([vmin,vmax])
plt.colorbar(mm, label="Population per km$^2$",
ticks=[0,100,200,300,400],
orientation="horizontal", fraction=0.05,
)
#plt.title('Filling State Polygons by Population Density')
plt.gca().axis("off")
plt.show()
This example read what is called a shapefile, a standard way to store map data (which is surprisingly nontrivial data).
The three files being read in the previous example (by m.readshapefile
) are:
gz_2010_us_040_00_500k.dbf
gz_2010_us_040_00_500k.shp
gz_2010_us_040_00_500k.shx
You almost always want to find shapefiles online. I found these shape files from the US census: http://www2.census.gov/geo/tiger/GENZ2010/
In a sense, drawing is similar to plotting: If you want to draw a shape you need to manage the (x,y) coordinates of all the points that define that shape.
Lines, rectangles, polygons, circles, curves are define with xy coordinates on the image.
You are in charge of defining how these image coordinates relative to whatever drawing or plotting library you are using.
Matplotlib adds things like x- and y-axes automatically.
fig = plt.figure()
ax = fig.gca()
# create a rectangle instance
rect = matplotlib.patches.Rectangle( (0.2,0.5), width=0.1, height=1.2)
# now we add the Rectangle to the Axes
ax.add_patch(rect)
# now let's add a crazy polygon:
vertices = [
(0.5,0.5),
(0.6,0.9),
(0.7,0.2),
(0.55,0.3)
]
poly = matplotlib.patches.Polygon( vertices )
ax.add_patch(poly)
plt.show()
Oftentimes you need to work out the XY-coordinates to figure out what you need to code up.
Example Suppose we want to compute a histogram but we don't have a histogram plotting function.
Here are the histogram data:
# histogram our data with numpy
data = np.random.randn(1000)
Y, X = np.histogram(data, 100)
# len(Y) is 100, len(X) is 101
Important: the X
list is longer than the Y
list because it defines the bin edges ( You need three edges to define two bins, etc.)
We can write a loop and draw a bunch of rectangles, computing the x- and y-coordinates of each bin from the lists X
and Y
.
Instead, as an exercise, let's draw all the bins together as a single polygon. We need the x and y coordinates of all the corners of the top of histogram. Let's turn to paper (the whiteboard) to work this out, then we code it up.
[whiteboard]
OK, now that we see how the lists of vertices should be made, let's code it up:
Xext = []
for xi in X:
Xext.append(xi)
Xext.append(xi)
Yext = [0,]
for yi in Y:
Yext.append(yi)
Yext.append(yi)
Yext.append(0)
As a quick debugging step, let's plot these vertices as a scatter, then we'll draw the polygon:
plt.plot(Xext,Yext, '.-')
plt.show()
Looks good.
import matplotlib
from matplotlib.patches import Circle, Wedge, Polygon
from matplotlib.collections import PatchCollection
import matplotlib.pyplot as plt
my_poly = Polygon(zip(Xext,Yext), facecolor=niceblu)
fig = plt.figure()
ax = fig.gca() # gca = get current figure
ax.add_patch(my_poly)
#ax.set_xlim( min(Xext), max(Xext) )
#ax.set_ylim( min(Yext), max(Yext) )
# recompute the ax.dataLim
ax.relim()
# update ax.viewLim using the new dataLim
ax.autoscale_view()
plt.show()
OK, that wasn't very useful because we already have a plotting function that saves us all this work:
plt.hist(data, 100, fc=niceblu,ec="none") # fc = facecolor, ec = edgecolor
plt.show()
The point of this example was the whiteboard step, where we scribbled together the form the list of vertices should take. That kind of process is very common.
As another example, inspired by part of LEC14, let's work out how to draw a top-down cartoon of a basketball half-court.
We begin by:
import matplotlib
from matplotlib.lines import Line2D
from matplotlib.patches import Circle, Wedge, Polygon, Rectangle
from matplotlib.collections import PatchCollection
import matplotlib.pyplot as plt
%config InlineBackend.close_figures = False
W = 1.0
H = 0.94
plt.close()
fig = plt.figure(figsize=(9*1.5,6*1.5) )
fig.clear()
ax = fig.gca() # gca = get current figure
ax.set_ylim(0,0.94)
ax.set_aspect('equal')
verts = [ (0.5*W - 0.16*W, 0.000*H),
(0.5*W - 0.16*W, 0.404*H),
(0.5*W + 0.16*W, 0.404*H),
(0.5*W + 0.16*W, 0.000*H) ]
p = Polygon( verts, fc='none' )
ax.add_patch(p)
plt.draw()
(The frame defining the figure will serve as the sidelines and midcourt line.)
Now let's draw the center circle and the free throw circle:
# draw circles:
r = 0.12*W
verts_free = []
verts_cent = [] # same y-coord
for a in np.linspace(0.0,2*np.pi):
xi = r*np.cos(a)
yi = r*np.sin(a)
verts_free.append( (xi+W/2, yi+0.404*H) )
verts_cent.append( (xi+W/2, yi+1.000*H) )
p = Polygon( verts_free, fc='none' )
ax.add_patch(p)
p = Polygon( verts_cent, fc='none' )
ax.add_patch(p)
plt.show()
(Note: I'm being a sloppy with these hardwired numbers yi+0.404*H
, etc. It's better to define variables for these.)
Speaking of which, you may be asking where the numbers are coming from. I eyeballed a picture of a basketball court, but if you want to be accurate you should look up the dimensions of the court and then convert from physical dimensions to the image coordinates.
# draw basket and backboard:
c = Circle( (W/2, 0.10*H), radius=0.75/50, fc='none' )
# rect needs lower left corner xy and width,height:
bb = Rectangle( (W/2-0.06*W,0.06*H), width=0.12,height=0.0075,
fc='none')
ax.add_patch(c)
ax.add_patch(bb)
plt.show()
# draw left and right lane lines
p = Polygon( [(W/2-0.12*W,0*H), (W/2-0.12*W,0.404*H) ] , fc='none' )
x = W/2-0.12*W
y = 0.404*H
L = Line2D( (x,x),(0,y), linewidth=1.0, c='black' )
ax.add_line(L)
x = W/2+0.12*W
L = Line2D( (x,x),(0,y), linewidth=1.0, c='black' )
ax.add_line(L)
plt.show()
Those circles were easy, the three-point line is a little trickier because it's a circle that gets chopped off on the wings.
# draw the semi-circle for 3-point line:
r = 0.46*W
Xs,Ys = [],[]
for a in np.linspace( np.pi/8, 7*np.pi/8 ):
xi = r*np.cos(a)
yi = r*np.sin(a)
Xs.append(xi+W/2 )
Ys.append(yi+0.10*H)
L = Line2D( Xs,Ys, linewidth=1.0, c='black' )
ax.add_line(L)
plt.show()
# draw left/right edges of 3-point boundary:
# for this we need to get the xy coordinates of the ends of the semi-circle
# These are (Xs[0],Ys[0]) and (Xs[-1],Ys[-1])
x = Xs[0]
L = Line2D( (x,x),(0,Ys[0]), linewidth=1.0, c='black' )
ax.add_line(L)
x = Xs[-1]
L = Line2D( (x,x),(0,Ys[-1]), linewidth=1.0, c='black' )
ax.add_line(L)
plt.show()
Remove the x and y labels to hide the fact that our drawing is a figure:
ax.set_xticks([])
ax.set_yticks([])
plt.show()
And there we go!
Now the next step could be to wrap all of that up into a draw_court
function. Or maybe we can save that image and use it as a background for our basketball visualization.
plt.close()
%config InlineBackend.close_figures = False
You may have seen commands like this:
plt.plot(X,Y, '-', color="red")
and thought, "Oh ok, a red line.". But what about this:
plt.plot(X,Y, 'o-', color='#FF00AA', markerfacecolor="#00FF00")
Those strings, which you've likely seen before if you do any web design, are hexadecimal numbers representing RGB (red green blue) colors. This is known as a hex triplet. The leading "#" is a standard convention for denoting a triple.
A hex number is base-16. It ranges from 0 to 9 and then from A to F. Base-16 is convenient on the computer when working with bytes and it lets you represent a number between 0 and 256 with two digits, where as base-10 could only represent numbers between 0 and 99 with two digits.
The six-digits in a hex triple let us define the color channels for red, green, and blue:
#RRGGBB
So the color pure red, sometimes denoted RGB(1.0,0,0)
is #FF0000
. The first FF
the largest value possible, while the other two channels are 00
since there is no green or blue in the color.
Since hex triples are base-16, people often write the color scale going from 0 to 255 instead of 0 to 1, so pure red would then be RGB(256,0,0)
with the same hex representation.
This can sometimes cause problems. If a function wants the color channels to be in [0,1] and you pass a channel value of 128 (which represents 50%), the function may round that down to 1, the largest value it assumes can exist.
The way to represent color on a computer is non-trivial and there are lots of different color models beside RGB.
RGB is convenient because media that transmit light (such as TVs) use red, green, and blue pixels. Let's see how a modern computer display actually works, it's cool!
from IPython.display import YouTubeVideo
YouTubeVideo("jiejNAUwcQ8", width=600, height=600*0.8235)
So the computer is just an array of red, green, and blue pixels in close proximity. Colored light gets mixed. This is called additive mixing:
This is different from what paint and pigment does, which is called subtractive mixing:
Notice how the circles are "cyan", "magenta", and "yellow" and their intersections are red green and blue? This is why high-end graphic design doesn't use RGB colors but instead uses CMYK: it more accurately models the ink in a printing press.
Our brains will automatically mix light additively:
There are only three colors in that image!
Additive mixing is easy, it's just addition. Let's mix two colors. All we do is sum up the three color channels elementwise and then round down any numbers that are too big:
c1 = (1.0, 0.0, 0.0) # pure red in RGB
c2 = (0.0, 0.0, 1.0) # pure blue
cS = ( c1[0]+c2[0],
c1[1]+c2[1],
c1[2]+c2[2] )
# round down:
cS = ( min([cS[0],1]),
min([cS[1],1]),
min([cS[2],1]) )
print cS # should be 100% red and 100% blue
Now to convert that tuple to a hex triple string is a little weird. Here's a function:
def rgb_to_rgb256(rgb):
"""Map [0,1] rgb to [0,255] rgb."""
r,g,b = rgb
return ( int(255*r), int(255*g), int(255*b) )
def rgb256_to_hex(rgb):
"""Make hex triple from rgb"""
return '#%02X%02X%02X' % rgb
print cS
hex_triple = rgb256_to_hex( rgb_to_rgb256(cS) )
print hex_triple
Let's see what we've got:
h1 = rgb256_to_hex( rgb_to_rgb256(c1) )
h2 = rgb256_to_hex( rgb_to_rgb256(c2) )
circle1=plt.Circle((0.25,0.25),.25, color=h1)
circle3=plt.Circle((0.75,0.75),.25, color=h2)
circle2=plt.Circle((.5,.5),.25, color=hex_triple)
plt.clf()
fig = plt.gcf()
fig.gca().add_artist(circle1)
fig.gca().add_artist(circle2)
fig.gca().add_artist(circle3)
fig.gca().set_aspect('equal')
plt.show()
There is another convenient way to blend RGB colors mathematically, just take the averages of the rgb channels:
color = (255,0,0) # red
h = rgb256_to_hex(color)
print 0, h, color
plt.close('all')
fig = plt.figure(1)
fig.clear()
ax = fig.gca()
circ = Circle((0/10.0,0.25),0.1, color=h, ec='black')
ax.add_patch(circ)
circ = Circle((0/10.0,0.75),0.1, color=h, ec='none')
ax.add_patch(circ)
for i in range(7):
r,g,b = color
r = (r + 0)/2.0 # average with white, rgb(255,255,255)
g = (g + 0)/2.0
b = (b + 255)/2.0
color = (int(r),int(g),int(b))
h = rgb256_to_hex(color)
print i+1, h, color
# draw a circle
circ = Circle( ( (i+1)/7.0, 0.25), 0.1,
color=h, ec="black", lw=1.0)
ax.add_patch(circ)
circ = Circle( ( (i+1)/7.0, 0.75), 0.1,
color=h, ec="none", lw=1.0)
ax.add_patch(circ)
ax.set_aspect('equal')
plt.show()
This repeated averaging takes us from the first color towards the second. Although you see the scale appears to be nonlinear. This is because each step through the loop mixes 50% blue with 50% of the current color; there is far more blue than red.
If you want to uniformly change from red to blue, you just need to linearly interpolate the colors using a weighted average, for example:
r1,g1,b1 = (255,0,0)
r2,g2,b2 = (0,0,255)
plt.close('all')
fig = plt.figure(1)
fig.clear()
ax = fig.gca()
for a in np.arange(0,1.0001,0.1):
# linear interpolation:
r = (1-a)*r1 + a*r2
g = (1-a)*g1 + a*g2
b = (1-a)*b1 + a*b2
color = (int(r),int(g),int(b))
h = rgb256_to_hex(color)
print a, h, color
circ = Circle( ( a, 0.25), 0.1,
color=h, ec="black", lw=1.0)
ax.add_patch(circ)
circ = Circle( ( a, 0.75), 0.1,
color=h, ec="none", lw=1.0)
ax.add_patch(circ)
ax.set_aspect('equal')
plt.show()
Fancier interpolations are possible, which leads to the idea of colormaps.