import fsspec
import xarray as xr
import matplotlib
import numpy as np


# if you are running on AWS cloud you can use the line below and it will be faster to read and access the data
#sst_ds = xr.open_zarr(fsspec.get_mapper("s3://surftemp-sst/data/sst.zarr"),consolidated=True)

# if you are running anywhere other than AWS cloud (eg. your local computer or mybinder.org) this is the line that will work
sst_ds = xr.open_zarr('https://surftemp-sst.s3.us-west-2.amazonaws.com/data/sst.zarr')

sst_ds

<xarray.Dataset>
Dimensions:                   (time: 14367, lat: 3600, lon: 7200, bnds: 2)
Coordinates:
  * lat                       (lat) float32 -89.97 -89.93 -89.88 ... 89.93 89.97
  * lon                       (lon) float32 -180.0 -179.9 -179.9 ... 179.9 180.0
  * time                      (time) datetime64[ns] 1981-09-01T12:00:00 ... 2...
Dimensions without coordinates: bnds
Data variables:
    analysed_sst              (time, lat, lon) float32 dask.array<chunksize=(50, 360, 720), meta=np.ndarray>
    analysed_sst_uncertainty  (time, lat, lon) float32 dask.array<chunksize=(50, 360, 720), meta=np.ndarray>
    lat_bnds                  (lat, bnds) float32 dask.array<chunksize=(360, 2), meta=np.ndarray>
    lon_bnds                  (lon, bnds) float32 dask.array<chunksize=(720, 2), meta=np.ndarray>
    mask                      (time, lat, lon) float32 dask.array<chunksize=(50, 360, 720), meta=np.ndarray>
    sea_ice_fraction          (time, lat, lon) float32 dask.array<chunksize=(50, 360, 720), meta=np.ndarray>
Attributes: (12/61)
    Conventions:                     CF-1.5, Unidata Observation Dataset v1.0
    Metadata_Conventions:            Unidata Dataset Discovery v1.0
    acknowledgment:                  Funded by the Copernicus Climate Change ...
    cdm_data_type:                   grid
    comment:                         These data were produced by the Met Offi...
    contact:                         http://copernicus-support.ecmwf.int
    ...                              ...
    time_coverage_resolution:        P1D
    time_coverage_start:             20201215T000000Z
    title:                           C3S SST L4 product
    tracking_id:                     7fdf2639-26e5-4d4f-a60e-0bcfc9744204
    uuid:                            7fdf2639-26e5-4d4f-a60e-0bcfc9744204
    westernmost_longitude:           -180.0

array([-89.975, -89.925, -89.875, ...,  89.875,  89.925,  89.975],
      dtype=float32)

array([-179.975, -179.925, -179.875, ...,  179.875,  179.925,  179.975],
      dtype=float32)

array(['1981-09-01T12:00:00.000000000', '1981-09-02T12:00:00.000000000',
       '1981-09-03T12:00:00.000000000', ..., '2020-12-29T12:00:00.000000000',
       '2020-12-30T12:00:00.000000000', '2020-12-31T12:00:00.000000000'],
      dtype='datetime64[ns]')


lat_min = -25
lat_max = -21
lon_min = 150
lon_max = 154


sst_ds["analysed_sst"].sel(time="2000-01-01").sel(lat=slice(lat_min,lat_max),lon=slice(lon_min,lon_max)) \
   .plot(cmap="coolwarm")

<matplotlib.collections.QuadMesh at 0x7fa096f8ba90>


climatology_start = "1982-01-01"
climatology_end = "2009-12-31"


climatology_raw_data = sst_ds["analysed_sst"] \
     .sel(time=slice(climatology_start,climatology_end)) \
     .sel(lat=slice(lat_min,lat_max),lon=slice(lon_min,lon_max))


climatology_rolling = climatology_raw_data.chunk({"lat": 10, "lon": 10, "time":-1}) \
    .pad(pad_width={"time":5},mode="wrap") \
    .rolling(time=11,center=True) \
    .construct("window") \
    .isel(time=slice(5,-5))


climatology_90th_percentile = climatology_rolling.chunk({"time":-1}) \
    .groupby("time.dayofyear") \
    .quantile(0.9,dim=["time","window"]).load()

/Users/cv922550/miniconda3/envs/sstenv/lib/python3.7/site-packages/numpy/lib/nanfunctions.py:1396: RuntimeWarning: All-NaN slice encountered
  overwrite_input, interpolation)


binomial_coefficients = (np.poly1d([0.5, 0.5]) ** 30).coeffs
climatology_90th_percentile_smoothed = (climatology_90th_percentile \
    .pad(pad_width={"dayofyear":15},mode="wrap")                                    
    .rolling(dayofyear=31,center=True) \
    .construct("window") \
    .isel(dayofyear=slice(15,-15)) \
    * binomial_coefficients).sum(axis=-1,skipna=False)


climatology_90th_percentile_smoothed.sel(lat=(lat_min+lat_max)/2,lon=(lon_min+lon_max)/2,method="nearest").plot()

[<matplotlib.lines.Line2D at 0x7fa08edd2550>]


# define the period of interest

period = ("2018-01-01","2018-12-31")


import datetime

time_window_days = 5

start_date = datetime.datetime.strptime(period[0],"%Y-%m-%d")
end_date = datetime.datetime.strptime(period[1],"%Y-%m-%d")
# need to widen the time range by +/- time_window_days to get accurate heatwave information for the entire period
data_start_date = start_date - datetime.timedelta(days=time_window_days)
data_end_date = end_date +  datetime.timedelta(days=time_window_days)

sst_data = sst_ds["analysed_sst"].sel(time=slice(start_date,end_date)) \
     .sel(lat=slice(lat_min,lat_max),lon=slice(lon_min,lon_max)) \
     .load()


# create a new dataarray with dayofyear instead of time 
sst_data["dayofyear"] = sst_data.time.dt.dayofyear
sst_data_doy = xr.DataArray(data=sst_data.data, dims=["dayofyear","lat", "lon"], coords=[sst_data.dayofyear,
                                                                                     sst_data.lat,
                                                                                     sst_data.lon])


n_lats = len(sst_data_doy["lat"])
n_lons = len(sst_data_doy["lon"])
n_days = len(sst_data_doy["dayofyear"])


# create an array to hold a count of the numbers of consecutive hot days
# we will step through the time period and update this array
hot_day_count = np.zeros(shape=(n_lats,n_lons))


mhw_flags = np.zeros(shape=(n_days,n_lats,n_lons))
for day in range(n_days):
    doy = sst_data_doy["dayofyear"].values[day]
    # update the hot_day_count for this day
    hot_day_count = xr.where(sst_data_doy.sel(dayofyear=doy) > climatology_90th_percentile_smoothed \
                             .sel(dayofyear=doy),hot_day_count+1,0).values
    # wherever there are at least time_window_days of hot weather at a cell, flag the current day and the 
    # preceeding (time_window_days-1) days as heat wave days
    mhw = hot_day_count >= time_window_days

    for prev in range(max(0,day-time_window_days+1),day+1):
        mhw_flags[prev,:,:] = np.where(mhw,1,mhw_flags[prev,:,:])
    
mhw_flags = np.where(np.isnan(sst_data),np.nan,mhw_flags)

mhw = xr.DataArray(data=mhw_flags,dims=["time","lat", "lon"], 
                   coords=[sst_data.time, sst_data.lat, sst_data.lon])

# clip the dataset back to the original time period
mhw = mhw.sel(time=slice(period[0],period[1]))


mhw.mean(dim=["lat","lon"],skipna=True).plot()

[<matplotlib.lines.Line2D at 0x7fa082e7ded0>]


mhw.mean(dim=["time"],skipna=True).plot()

<matplotlib.collections.QuadMesh at 0x7fa083358290>


# check which areas are experiencing a marine heatwave on a particular date
mhw.sel(time="2018-07-01").plot(cmap="coolwarm",add_colorbar=False)

<matplotlib.collections.QuadMesh at 0x7fa083664690>


# plot the development of a marine heatwave "spike" over a two month period

mhw.sel(time=slice("2018-07-01","2018-08-31")) \
   .plot(cmap="coolwarm", col="time",col_wrap=4,add_colorbar=False)

<xarray.plot.facetgrid.FacetGrid at 0x7fa08381f910>

Working with surftemp-sst data¶

Credits: Tutorial development¶

Credits: Creating of the Zarr CCI SST dataset.¶

Tutorial 2 - Worked Example - Marine Heatwaves¶

1.0 Importing packages¶

1.1 Opening the SST dataset¶

1.2 Select an area to study¶

1.2 Create a climatology¶

1.3 Apply the climatology to SST data over a given period to look for marine heatwaves¶