# coding: utf-8

# # Numpy and Matplotlib #
# 
# These are two of the most fundamental parts of the scientific python "ecosystem". Most everything else is built on top of them.
# 

# In[2]:


import numpy as np


# What did we just do? We _imported_ a package. This brings new variables (mostly functions) into our interpreter. We access them as follows.

# In[ ]:


# find out what is in our namespace
dir()


# In[ ]:


# find out what's in numpy
dir(np)


# In[ ]:


# find out what version we have
np.__version__


# The numpy documentation is crucial!
# 
# http://docs.scipy.org/doc/numpy/reference/
# 
# ## NDArrays ##
# 
# The core class is the numpy ndarray (n-dimensional array).

# In[ ]:


from IPython.display import Image
Image(url='http://docs.scipy.org/doc/numpy/_images/threefundamental.png')


# In[ ]:


# create an array from a list
a = np.array([9,0,2,1,0])


# In[ ]:


# find out the datatype
a.dtype


# In[ ]:


# find out the shape
a.shape


# In[ ]:


# what is the shape
type(a.shape)


# In[ ]:


# another array with a different datatype and shape
b = np.array([[5,3,1,9],[9,2,3,0]], dtype=np.float64)


# In[ ]:


# check dtype and shape
b.dtype, b.shape


# __Important Concept__: The fastest varying dimension is the last dimension! The outer level of the hierarchy is the first dimension. (This is called "c-style" indexing)

# ## More array creation ##
# 
# There are lots of ways to create arrays.

# In[ ]:


# create some uniform arrays
c = np.zeros((9,9))
d = np.ones((3,6,3), dtype=np.complex128)
e = np.full((3,3), np.pi)
e = np.ones_like(c)
f = np.zeros_like(d)


# In[ ]:


# create some ranges
np.arange(10)


# In[ ]:


# arange is left inclusive, right exclusive
np.arange(2,4,0.25)


# In[ ]:


# linearly spaced
np.linspace(2,4,20)


# In[ ]:


# log spaced
np.logspace(1,2,10)


# In[ ]:


# two dimensional grids
x = np.linspace(-2*np.pi, 2*np.pi, 100)
y = np.linspace(-np.pi, np.pi, 50)
xx, yy = np.meshgrid(x, y)
xx.shape, yy.shape


# ## Indexing ##
# 
# Basic indexing is similar to lists

# In[ ]:


# get some individual elements of xx
xx[0,0], xx[-1,-1], xx[3,-5]


# In[ ]:


# get some whole rows and columns
xx[0].shape, xx[:,-1].shape


# In[ ]:


# get some ranges
xx[3:10,30:40].shape


# There are many advanced ways to index arrays. You can read about them in the manual. Here is one example.

# In[ ]:


# use a boolean array as an index
idx = xx<0
yy[idx].shape


# In[ ]:


# the array got flattened
xx.ravel().shape


# ## Array Operations ##
# 
# There are a huge number of operations available on arrays. All the familiar arithemtic operators are applied on an element-by-element basis.
# 
# ### Basic Math ##

# In[1]:


f = np.sin(xx) * np.cos(0.5*yy)


# At this point you might be getting curious what these arrays "look" like. So we need to introduce some visualization.

# In[ ]:


from matplotlib import pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[ ]:


plt.pcolormesh(f)


# ## Manipulating array dimensions ##

# In[ ]:


# transpose
plt.pcolormesh(f.T)


# In[ ]:


# reshape an array (wrong size)
#g = np.reshape(f, (8,9))


# In[ ]:


# reshape an array (right size) and mess it up
print(f.size)
g = np.reshape(f, (200,25))
plt.pcolormesh(g)


# In[ ]:


# tile an array
plt.pcolormesh(np.tile(f,(6,1)))


# ## Broadcasting ##
# 
# Broadcasting is an efficient way to multiply arrays of different sizes
# 

# In[ ]:


Image(url='http://scipy-lectures.github.io/_images/numpy_broadcasting.png',
     width=720)


# In[ ]:


# multiply f by x
print(f.shape, x.shape)
g = f * x
print(g.shape)


# In[ ]:


# multiply f by y
print(f.shape, y.shape)
h = f * y
print(h.shape)


# In[ ]:


# use newaxis special syntax
h = f * y[:,np.newaxis]
print(h.shape)


# In[ ]:


plt.pcolormesh(g)


# ## Reduction Operations ##

# In[ ]:


# sum
g.sum()


# In[ ]:


# mean
g.mean()


# In[ ]:


# std
g.std()


# In[ ]:


# apply on just one axis
g_ymean = g.mean(axis=0)
g_xmean = g.mean(axis=1)


# In[ ]:


plt.plot(x, g_ymean)


# In[ ]:


plt.plot(g_xmean, y)


# ## Fancy Plotting ##
# 
# Enough lessons, let's have some fun.

# In[ ]:


fig = plt.figure(figsize=(12,8))
ax1 = plt.subplot2grid((6,6),(0,1),colspan=5)
ax2 = plt.subplot2grid((6,6),(1,0),rowspan=5)


# In[ ]:


fig = plt.figure(figsize=(10,6))
ax1 = plt.subplot2grid((6,6),(0,1),colspan=5)
ax2 = plt.subplot2grid((6,6),(1,0),rowspan=5)
ax3 = plt.subplot2grid((6,6),(1,1),rowspan=5, colspan=5)

ax1.plot(x, g_ymean)
ax2.plot(g_xmean, y)
ax3.pcolormesh(x, y,  g)

ax1.set_xlim([x.min(), x.max()])
ax3.set_xlim([x.min(), x.max()])
ax2.set_ylim([y.min(), y.max()])
ax3.set_ylim([y.min(), y.max()])

plt.tight_layout()


# ## Real Data ##
# 
# ARGO float profile from North Atlantic

# In[ ]:


# download with curl
get_ipython().system('curl -O http://www.ldeo.columbia.edu/~rpa/argo_float_4901412.npz')


# In[ ]:


# load numpy file and examine keys
data = np.load('argo_float_4901412.npz')
data.keys()


# In[ ]:


# access some data
T = data['T']


# In[ ]:


# there are "nans", missing data, which screw up our routines
T.min()


# In[ ]:


ar_w_mask = np.ma.masked_array([1, 2, 3, 4, 5],
                        mask=[True, True, False, False, False])
ar_w_mask


# In[ ]:


ar_w_mask.mean()


# In[ ]:


T_ma = np.ma.masked_invalid(T)
T_ma.mean()


# ## Masked Arrays ##
# 
# This is how we deal with missing data in numpy

# In[ ]:


# create masked array
T = np.ma.masked_invalid(data['T'])
type(T)


# In[ ]:


# max and min
T.max(), T.min()


# In[ ]:


# load other data
S = np.ma.masked_invalid(data['S'])
P = np.ma.masked_invalid(data['P'])


# In[ ]:


# scatter plot
plt.scatter(S, T, c=P)
plt.grid()
plt.colorbar()