{ "cells": [ { "cell_type": "markdown", "id": "9e96adc8-68d1-45d5-ab1e-15a3a72ae637", "metadata": {}, "source": [ "# CALCUL AND SAVE SST STATS" ] }, { "cell_type": "code", "execution_count": 2, "id": "1aac342c-fb07-4020-9f63-b185496abe27", "metadata": {}, "outputs": [], "source": [ "import numpy as N\n", "import xarray as xr\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import sys\n", "import dask\n", "from os import path\n", "from dask.distributed import Client, LocalCluster, progress\n", "import time\n", "from tools import my_percentile\n", "from scipy.optimize import least_squares,curve_fit\n", "import dask_hpcconfig\n", "from dask_jobqueue import PBSCluster\n", "import glob" ] }, { "cell_type": "markdown", "id": "9064ab92-5d78-45a6-a6f8-2251f8eb3b9a", "metadata": { "tags": [] }, "source": [ "### conseils dask\n", "\n", "pour Datarmor il vaut mieux avoir 1 thread/worker\n", "il vaut mieux avoir des chunks assez fa" ] }, { "cell_type": "code", "execution_count": 3, "id": "293e8282-f4c5-42e5-a5ec-602afa308123", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home1/datahome/mcaillau/conda-env/pydask3/lib/python3.10/site-packages/dask_jobqueue/core.py:255: FutureWarning: job_extra has been renamed to job_extra_directives. You are still using it (even if only set to []; please also check config files). If you did not set job_extra_directives yet, job_extra will be respected for now, but it will be removed in a future release. If you already set job_extra_directives, job_extra is ignored and you can remove it.\n", " warnings.warn(warn, FutureWarning)\n", "/home1/datahome/mcaillau/conda-env/pydask3/lib/python3.10/site-packages/dask_jobqueue/pbs.py:82: FutureWarning: project has been renamed to account as this kwarg was used wit -A option. You are still using it (please also check config files). If you did not set account yet, project will be respected for now, but it will be removed in a future release. If you already set account, project is ignored and you can remove it.\n", " warnings.warn(warn, FutureWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "http://10.148.1.73:8787/status\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home1/datahome/mcaillau/conda-env/pydask3/lib/python3.10/site-packages/dask_jobqueue/core.py:255: FutureWarning: job_extra has been renamed to job_extra_directives. You are still using it (even if only set to []; please also check config files). If you did not set job_extra_directives yet, job_extra will be respected for now, but it will be removed in a future release. If you already set job_extra_directives, job_extra is ignored and you can remove it.\n", " warnings.warn(warn, FutureWarning)\n", "/home1/datahome/mcaillau/conda-env/pydask3/lib/python3.10/site-packages/dask_jobqueue/pbs.py:82: FutureWarning: project has been renamed to account as this kwarg was used wit -A option. You are still using it (please also check config files). If you did not set account yet, project will be respected for now, but it will be removed in a future release. If you already set account, project is ignored and you can remove it.\n", " warnings.warn(warn, FutureWarning)\n" ] } ], "source": [ "#pour l'ensemble des pas de temps\n", "#cluster = PBSCluster(processes=3,cores=9) #16min 44\n", "cluster = PBSCluster(processes=3,cores=3) #16min 44\n", "\n", "cluster.scale(jobs=4)\n", "print(cluster.dashboard_link)" ] }, { "cell_type": "code", "execution_count": 4, "id": "7d54b8c0-7f90-4d6f-b55a-253be3db0568", "metadata": {}, "outputs": [], "source": [ "# explicitly connect to the cluster we just created\n", "client = Client(cluster)" ] }, { "cell_type": "code", "execution_count": 5, "id": "385847a6-d04f-40af-9538-caa1b8448669", "metadata": {}, "outputs": [], "source": [ "#read grid\n", "ds_grid=xr.open_dataset('/home/shom_simuref/CROCO/ODC/CONFIGS/MEDITERRANEE_GLOBALE/CROCO_FILES/test2.nc')" ] }, { "cell_type": "code", "execution_count": 6, "id": "9ad6f687-3c5f-47b6-9f41-96f05d47808b", "metadata": {}, "outputs": [], "source": [ "path=\"/home/shom_simuref/CROCO/ODC/SIMU-RESULT/HINDCAST_2012_2013/OUTPUTS_201207_201307/\"" ] }, { "cell_type": "code", "execution_count": 7, "id": "d2ac1911-9fb8-4e41-803e-ae16d4600de3", "metadata": {}, "outputs": [], "source": [ "list_model=glob.glob(path+\"croco_his_surf_2*.nc\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "53ea47d2-001f-42fb-b5f4-3b122d4e8551", "metadata": {}, "outputs": [], "source": [ "chunks={'time':121,'xi_rho':315,'eta_rho':878}\n", "chunks={'time':800,'s_rho':-1,'xi_rho':865,'eta_rho':936}\n", "date_start=\"2012-06-01\"\n", "date_end=\"2013-08-01\"\n", "time_range=slice(date_start,date_end)" ] }, { "cell_type": "code", "execution_count": 9, "id": "fc3b2c57-44a8-4c91-a0b6-e0b452e26694", "metadata": {}, "outputs": [], "source": [ "#store data dir\n", "stat_dir='/home/shom_simuref/CROCO/ODC/POSTPROC/SST/'" ] }, { "cell_type": "code", "execution_count": 10, "id": "ae5157d2-511e-465e-a2a2-d29b72eeade7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data size in GB 425.54\n", "\n" ] } ], "source": [ "ds=xr.open_mfdataset(list_model, parallel=False,chunks=\"auto\",\n", " concat_dim=\"time\", combine=\"nested\",\n", " data_vars='minimal', coords='minimal', compat='override')\n", "# preprocess=partial_func,data_vars='minimal', coords='minimal', compat='override')\n", "\n", "print('data size in GB {:0.2f}\\n'.format(ds.nbytes / 1e9))\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "4738f6a4-f7ec-4ad7-818f-46e51be68685", "metadata": {}, "outputs": [], "source": [ "#ds=ds.isel(time=slice(0,-1,12))" ] }, { "cell_type": "code", "execution_count": 12, "id": "982c01c8-90b3-4a8f-b192-c3bdd6d6e107", "metadata": {}, "outputs": [], "source": [ "#because of MPINOLAND \n", "for var in [\"xi_rho\",\"eta_rho\",\"xi_u\",\"eta_v\",\"lon_rho\",\"lat_rho\",\"lon_u\",\"lat_v\"]:\n", " ds[var]=ds_grid[var]" ] }, { "cell_type": "code", "execution_count": 13, "id": "508a1eca-ad12-4225-b7eb-368e473fc4d4", "metadata": {}, "outputs": [], "source": [ "#remove duplicates\n", "_,index=N.unique(ds.time,return_index=True)\n", "ds=ds.isel(time=index)" ] }, { "cell_type": "code", "execution_count": 14, "id": "a8b631fc-8332-40a0-a6d7-5670ffacce7d", "metadata": {}, "outputs": [], "source": [ "#select time range\n", "ds=ds.sel(time=time_range)" ] }, { "cell_type": "code", "execution_count": 15, "id": "0e85cd68-226d-4cb1-a004-d0f277c01020", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
<xarray.DataArray 'temp' (time: 8760, eta_rho: 936, xi_rho: 2595)>\n", "dask.array<rechunk-merge, shape=(8760, 936, 2595), dtype=float32, chunksize=(800, 936, 865), chunktype=numpy.ndarray>\n", "Coordinates:\n", " * xi_rho (xi_rho) int64 0 1 2 3 4 5 6 ... 2588 2589 2590 2591 2592 2593 2594\n", " * eta_rho (eta_rho) int64 0 1 2 3 4 5 6 7 ... 928 929 930 931 932 933 934 935\n", " * time (time) datetime64[ns] 2012-07-01T13:00:00 ... 2013-07-01T12:00:00\n", " lon_rho (eta_rho, xi_rho) float64 dask.array<chunksize=(936, 865), meta=np.ndarray>\n", " lat_rho (eta_rho, xi_rho) float64 dask.array<chunksize=(936, 865), meta=np.ndarray>\n", "Attributes:\n", " long_name: SST\n", " units: Celsius\n", " field:
<xarray.DataArray 'temp' (time: 8760, Y: 936, X: 2595)>\n", "dask.array<rechunk-merge, shape=(8760, 936, 2595), dtype=float32, chunksize=(800, 936, 865), chunktype=numpy.ndarray>\n", "Coordinates:\n", " xi_rho (X) int64 0 1 2 3 4 5 6 7 ... 2588 2589 2590 2591 2592 2593 2594\n", " eta_rho (Y) int64 0 1 2 3 4 5 6 7 8 ... 927 928 929 930 931 932 933 934 935\n", " * time (time) datetime64[ns] 2012-07-01T13:00:00 ... 2013-07-01T12:00:00\n", " lon_rho (Y, X) float64 dask.array<chunksize=(936, 865), meta=np.ndarray>\n", " lat_rho (Y, X) float64 dask.array<chunksize=(936, 865), meta=np.ndarray>\n", " * X (X) float64 -7.0 -6.983 -6.967 -6.95 ... 36.18 36.2 36.22 36.23\n", " * Y (Y) float64 30.23 30.25 30.27 30.28 30.3 ... 45.77 45.78 45.8 45.82\n", "Attributes:\n", " long_name: SST\n", " units: Celsius\n", " field: