GIS with and ¶
Part II: Working with Rasters¶
Set-up our environment as before¶
Let's import the packages we will use and set the paths for outputs.
# Let's import pandas and some other basic packages we will use
from __future__ import division
import pandas as pd
import numpy as np
import os, sys
# GIS packages
import geopandas as gpd
from geopandas.tools import overlay
from shapely.geometry import Polygon, Point
import georasters as gr
# Alias for Geopandas
gp = gpd
# Plotting
import matplotlib as mpl
import seaborn as sns
# Setup seaborn
sns.set()
# Mapping
import geoplot as gplt
import geoplot.crs as gcrs
import mapclassify as mc
import textwrap
%pylab --no-import-all
%matplotlib inline
Using matplotlib backend: <object object at 0x126ca9a40> %pylab is deprecated, use %matplotlib inline and import the required libraries. Populating the interactive namespace from numpy and matplotlib
# Functions for plotting
def center_wrap(text, cwidth=32, **kw):
'''Center Text (to be used in legend)'''
lines = text
#lines = textwrap.wrap(text, **kw)
return "\n".join(line.center(cwidth) for line in lines)
def MyChoropleth(mydf, myfile='', myvar='',
mylegend='',
k=5,
extent=[-180, -90, 180, 90],
bbox_to_anchor=(0.2, 0.5),
edgecolor='white', facecolor='lightgray',
scheme='FisherJenks', bins=None, pct=None,
legend_labels=None,
save=True,
percent=False,
cmap='Reds',
**kwargs):
# Chloropleth
# Color scheme
if scheme=='EqualInterval':
scheme = mc.EqualInterval(mydf[myvar], k=k)
elif scheme=='Quantiles':
scheme = mc.Quantiles(mydf[myvar], k=k)
elif scheme=='BoxPlot':
scheme = mc.BoxPlot(mydf[myvar], k=k)
elif scheme=='FisherJenks':
scheme = mc.FisherJenks(mydf[myvar], k=k)
elif scheme=='FisherJenksSampled':
scheme = mc.FisherJenksSampled(mydf[myvar], k=k)
elif scheme=='HeadTailBreaks':
scheme = mc.HeadTailBreaks(mydf[myvar], k=k)
elif scheme=='JenksCaspall':
scheme = mc.JenksCaspall(mydf[myvar], k=k)
elif scheme=='JenksCaspallForced':
scheme = mc.JenksCaspallForced(mydf[myvar], k=k)
elif scheme=='JenksCaspallSampled':
scheme = mc.JenksCaspallSampled(mydf[myvar], k=k)
elif scheme=='KClassifiers':
scheme = mc.KClassifiers(mydf[myvar], k=k)
elif scheme=='Percentiles':
scheme = mc.Percentiles(mydf[myvar], pct=pct)
elif scheme=='UserDefined':
scheme = mc.UserDefined(mydf[myvar], bins=bins)
if legend_labels is None:
# Format legend
upper_bounds = scheme.bins
# get and format all bounds
bounds = []
for index, upper_bound in enumerate(upper_bounds):
if index == 0:
lower_bound = mydf[myvar].min()
else:
lower_bound = upper_bounds[index-1]
# format the numerical legend here
if percent:
bound = f'{lower_bound:.0%} - {upper_bound:.0%}'
else:
bound = f'{float(lower_bound):,.0f} - {float(upper_bound):,.0f}'
bounds.append(bound)
legend_labels = bounds
#Plot
ax = gplt.choropleth(
mydf, hue=myvar, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', linewidth=1,
cmap=cmap, legend=True,
scheme=scheme,
legend_kwargs={'bbox_to_anchor': bbox_to_anchor,
'frameon': True,
'title':mylegend,
},
legend_labels = legend_labels,
figsize=(24, 16),
rasterized=True,
)
gplt.polyplot(
countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor=edgecolor, facecolor=facecolor,
ax=ax,
rasterized=True,
extent=extent,
)
if save:
plt.savefig(pathgraphs + myfile + '_' + myvar +'.pdf', dpi=300, bbox_inches='tight')
plt.savefig(pathgraphs + myfile + '_' + myvar +'.png', dpi=300, bbox_inches='tight')
pass
# Paths
pathout = './data/'
if not os.path.exists(pathout):
os.mkdir(pathout)
pathgraphs = './graphs/'
if not os.path.exists(pathgraphs):
os.mkdir(pathgraphs)
Initial Example -- Caloric Suitability Index¶
Let's download a raster with interesting data so we can visualize and analyze it.
Caloric Suitability Index CSI provides estimates for the potential calories that can be produced in any location using various crops.
For rasters we can use georasters
or rasterio
or various other tools.
georasters
is simple to use and has many functions that are useful to (social) scientists.- It tries to do for rasters what
geopandas
does for geometries. - Although mostly limited to what I have needed, it is expanding slowly to incorporate other uses.
Next we will use it to download a raster in GeoTiff
format from the Caloric Suitability Index CSI website.
- Since the data is made available via Google Drive, we will also learn how to use GD's API to download data.
- Once we have the data, we will import it as a
GeoRaster
, which is simply a maskednumpy
array with associated geographical information. - We can use many functions and properties of the
GeoRaster
to analyze our data. - Moreover, since it is based on
numpy
'sMaskedArray
object, any funtion that works onnumpy
arrays can be used on aGeoRaster
.
Download and Import Caloric Suitability Data (CSI)¶
Let's download the maximum pre- and post1500 CSI data, i.e. the maximum amount of calories that can be potentially preduced in a location with the crops available pre- and post-1500.
See the CSI website or the associated papers (e.g., Galor and Özak (2015,2016) for the construction and properties of the data).
Download¶
# Import GD API python package
from google_drive_downloader import GoogleDriveDownloader as gdd
# Check whether files have been already downloaded
# Otherwise download
if os.path.exists(pathout + 'pre1500MaxCalories.tif')==False:
gdd.download_file_from_google_drive(file_id='0By-h7HPv1NhVR1BTX0V6eUdmTW8', resourcekey='0-7-oOUj8ldKwWSmnieI4oog', dest_path=pathout+'pre1500MaxCalories.tif')
if os.path.exists(pathout + 'post1500MaxCalories.tif')==False:
gdd.download_file_from_google_drive(file_id='0By-h7HPv1NhVamdlWEtSSlpKOTA', resourcekey='0-nWBun0NiYSnYDCH_N2tr-w', dest_path=pathout+'post1500MaxCalories.tif')
Import Rasters¶
pre1500 = gr.from_file(pathout + 'pre1500MaxCalories.tif')
post1500 = gr.from_file(pathout + 'post1500MaxCalories.tif')
/Users/ozak/miniforge3/envs/GeoPython311env/lib/python3.11/site-packages/osgeo/gdal.py:312: FutureWarning: Neither gdal.UseExceptions() nor gdal.DontUseExceptions() has been explicitly called. In GDAL 4.0, exceptions will be enabled by default. warnings.warn(
Plot Rasters¶
We can plot this data easily using georasters
.
pre1500.plot()
<Axes: >
Not very nice looking, but provides the basic information we may want.
Of course we can improve using a few extra commands.
- Let's start by choosing a colormap and also normalizing the data.
- You can choose among the many colormaps provided by
matplotlib
.
myraster = pre1500
cmap = plt.cm.YlGn
norm = mpl.colors.Normalize(vmin=myraster.min(), vmax=myraster.max())
fig = plt.figure(figsize=(15,10), dpi=300, facecolor='w', edgecolor='k')
plt.matshow(pre1500.raster, cmap=cmap, norm=norm, rasterized=True)
plt.xticks([])
plt.yticks([])
plt.show()
<Figure size 4500x3000 with 0 Axes>
Let's add a colorbar and improve the figure a bit.
Then export it for using in our slides or paper.
myraster = pre1500
cmap = plt.cm.YlOrRd
norm = mpl.colors.Normalize(vmin=myraster.min(), vmax=myraster.max())
ax = myraster.plot(figsize=(15,10), cmap=cmap, norm=norm, rasterized=True)
plt.xticks([])
plt.yticks([])
plt.title('')
ax = plt.gca()
ax.set_aspect(1)
# create axes instance for colorbar on bottom.
ax = plt.gca()
pos = ax.get_position()
l, b, w, h = pos.bounds
cax = plt.axes([l+.3, b+0.03, .3, 0.01])
# draw colorbar on bottom.
cb = mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, spacing='proportional', orientation='horizontal')
cax.set_title('Maximum Calories Pre-1500')
plt.savefig(pathgraphs + 'pre1500MaxCalories.pdf', dpi=150, bbox_inches='tight')
plt.show()
myraster = post1500
cmap = plt.cm.YlOrRd
norm = mpl.colors.Normalize(vmin=myraster.min(), vmax=myraster.max())
ax = myraster.plot(figsize=(15,10), cmap=cmap, norm=norm, rasterized=True)
plt.xticks([])
plt.yticks([])
plt.title('')
ax = plt.gca()
ax.set_aspect(1)
# create axes instance for colorbar on bottom.
ax = plt.gca()
pos = ax.get_position()
l, b, w, h = pos.bounds
cax = plt.axes([l+.3, b+0.03, .3, 0.01])
# draw colorbar on bottom.
cb = mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, spacing='proportional', orientation='horizontal')
cax.set_title('Maximum Calories Post-1500')
plt.savefig(pathgraphs + 'post1500MaxCalories.pdf', dpi=150, bbox_inches='tight')
plt.show()
Not bad!¶
Now, let us add country borders so we can visualize a little bit better.
- We need to import a shapefile with the country borders.
- Let's use the same source as in the previous notebook.
import requests
import io
#headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}
url = 'https://naturalearth.s3.amazonaws.com/10m_cultural/ne_10m_admin_0_countries.zip'
r = requests.get(url, headers=headers)
countries = gp.read_file(io.BytesIO(r.content))
#countries = gpd.read_file('https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip')
Let's remove Antarctica so we do not plot it.
countries = countries.query("CONTINENT!='Antarctica'")
myraster = pre1500
cmap = plt.cm.YlGn
norm = mpl.colors.Normalize(vmin=myraster.min(), vmax=myraster.max())
df3 = countries.copy()
df3.geometry = countries.boundary
df3['fake'] = 0
plt.figure(figsize=(15,10))
plt.xticks([])
plt.yticks([])
plt.title('')
ax =plt.gca()
ax.set_aspect(1)
img_extent = (myraster.xmin, myraster.xmax, myraster.ymin, myraster.ymax)
ax.imshow(myraster.raster, norm=norm, origin='upper',extent=img_extent, cmap=cmap, interpolation='bilinear', aspect=1)
df3.plot(ax=ax, color='black', edgecolor='k', linewidth=0.5, rasterized=True)
# create axes instance for colorbar on bottom.
ax = plt.gca()
pos = ax.get_position()
l, b, w, h = pos.bounds
cax = plt.axes([l+.3, b+0.05, .3, 0.01])
# draw colorbar on bottom.
cb = mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, spacing='proportional', orientation='horizontal')
cax.set_title('Maximum Calories Pre-1500')
plt.savefig(pathgraphs + 'pre1500MaxCaloriesBorders.pdf', dpi=150, bbox_inches='tight')
plt.show()