{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating EOFs & PCs using the 'eofs' package\n", "\n", "## Import the sst data from a netcdf file or multiple netcdf files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#! conda install -c conda-forge eofs" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "from matplotlib import pyplot as plt\n", "from eofs.standard import Eof\n", "%matplotlib inline\n", "from glob import glob\n", "import os\n", "#import warnings\n", "#warnings.simplefilter(\"ignore\") " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## load the SST data from files using xarray" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dir_base = '/home/pangeo/data/ersst/'\n", "#!ls /home/pangeo/data/ersst/\n", "! ulimit -n \n", "monthly_files = sorted(glob(os.path.join(dir_base, 'ersst.*.nc')))\n", "num_files = np.shape(monthly_files)[0]; print('number of files',num_files)\n", "\n", "# we can't open all of the files at once since we are only allowd 1024 open files\n", "monthly_files = sorted(glob(os.path.join(dir_base, 'ersst.19[5-9]*.nc')))\\\n", " + sorted(glob(os.path.join(dir_base, 'ersst.2*.nc')))\n", "\n", "num_files = np.shape(monthly_files)[0]; print('number of files',num_files)\n", "dset = xr.open_mfdataset(monthly_files)\n", "ds = dset.drop('sst').sel(time=slice('1958-01-01','2014-12-01'),lat=slice(-40,40), lon=slice(120,290))\n", "dset.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds['time'] = pd.date_range('1/1/1958', periods=ds.ssta.shape[0], freq='MS').shift(15, freq='D')\n", "dpac = ds.rename({'ssta':'anom','lat':'Y','lon':'X'}).drop(['lev']).squeeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ds_annual = dpac.resample(time='AS').mean(dim='time')\n", "ds_anom = ds_annual - ds_annual.mean(dim='time')\n", "ds_anom.to_netcdf('anom.nc')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### define weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "coslat = np.cos(np.deg2rad(dpac.Y))\n", "ds_anom['wgts'] = np.sqrt(coslat) + 0*dpac.X\n", "dpac.Y" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### instantiates the EOF solver, passing the weights array as an optional argument" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "solver = Eof(ds_anom.anom.values, weights=ds_anom.wgts.values)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### get the EOFs (spatial patterns) as correlations" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "number_of_eofs = 4\n", "Ss = solver.eofsAsCovariance(neofs=number_of_eofs)\n", "ds_anom['ev'] = np.arange(0,number_of_eofs)\n", "ds_anom['Ss'] = (['ev','Y','X'],Ss)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### get the Principal Components (PCs), normalized values" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "Ts = solver.pcs(npcs=number_of_eofs, pcscaling=1)\n", "ds_anom['Ts'] = (['ev','time'],Ts.T)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### plots " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(8, 10))\n", "plt.subplot(211)\n", "ds_anom.Ss[0].plot()\n", "plt.subplot(212)\n", "ds_anom.Ss[1].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ds_anom.Ts[0].plot(figsize=(10,5)); plt.title('PC1',fontsize=16)\n", "ds_anom.Ts[1].plot(figsize=(10,5)); plt.title('PC2',fontsize=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_anom.to_netcdf('python_EOF.nc')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "gist_info": { "gist_id": null, "gist_url": null }, "kernelspec": { "display_name": "Py3 pangeo", "language": "python", "name": "pangeo" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }