Source code for sloop.models.hycom3d.io

"""
In/output routines

Source for .a/.b handling: http://code.env.duke.edu/projects/mget/wiki/HYCOM

"""

import re
import math
import logging
import os
import shutil

import numpy as np
from ...log import LoggingLevelContext

with LoggingLevelContext(logging.WARNING):
    import xarray as xr
    import pandas as pd
import scipy.io as sio

from .__init__ import (
    get_cf_specs,
    SloopHycomError,
    HYCOM3D_SIGMA_TO_STMT_FNS,
    HYCOM3D_CONTEXT_TO_STMT_FNS,
    HYCOM3D_IBC_GEN_INICON,
    HYCOM3D_IBC_GEN_INICON_VCONF,
    HYCOM3D_MODEL_DEFSTRETCH_FILES
)
from sloop.times import convert_from_julian_day, convert_to_julian_day

BIN2HYCOM_SPECS = {
    "h": {
        "header": "epaisseurs de couches deduites des donnees Mercator",
        "positive": True,
    },
    "temp": {
        "header": "temperatures deduites des donnees Mercator",
        "positive": False,
    },
    "saln": {
        "header": "salinites deduites des donnees Mercator",
        "positive": False,
    },
    "u": {
        "header": "vitesses zonales deduites des donnees Mercator",
        "positive": False,
    },
    "v": {
        "header": "vitesses meridiennes deduites des donnees Mercator",
        "positive": False,
    },
}

[docs]def get_ab_file_names(path): """Get .a and .b file name from a single path Parameters ---------- path: str Root, .a file or .b file Return ------ str: afile str: bfile """ m = re.match(r"(?P<root>.+)\.[ab]$", path) if m is not None: root = m.group(1) else: root = path if root.endswith("."): root = root[:-1] afile = root + ".a" bfile = root + ".b" return afile, bfile
[docs]def read_regional_grid_b(grid_b_file): """Read the regional.grid.b file Parameters ---------- grid_b_file: str Return ------ dict """ _, grid_b_file = get_ab_file_names(grid_b_file) grid_specs = {} with open(grid_b_file, "rU") as f: grid_specs["idm"] = int(f.readline().split()[0]) # rows grid_specs["jdm"] = int(f.readline().split()[0]) # cols grid_specs["mapflg"] = int(f.readline().split()[0]) for line in f: if line: sline = line[:-1].split() key = sline[0][:-1] grid_specs[key + "_min"] = float(sline[-2]) grid_specs[key + "_max"] = float(sline[-1]) return grid_specs
[docs]def read_regional_grid(grid_file, grid_loc=None): """Read the coordinates stored in a regional grid Parameters ---------- grid_file: str Grid .a or .b file name grid_loc: None or str Grid to read as one or several of "p", "q", "u" or "v" Return ------ xarray.Dataset Dataset that contains the coordinates """ allowed_grid_locs = "pquv" grid_a_file, grid_b_file = get_ab_file_names(grid_file) grid_specs = read_regional_grid_b(grid_b_file) rows = grid_specs["idm"] cols = grid_specs["jdm"] # Compute the number of bytes of a variable stored in a HYCOM .a # file for a grid of this size. varByteLength = int(math.ceil(float(cols * rows * 4) / 16384) * 16384) # Read the longitude and latitude grids as numpy arrays. coords = {} with open(grid_a_file, "rb") as f: for i, gl in enumerate(allowed_grid_locs): if grid_loc is not None and gl not in grid_loc: continue # if grid_loc == 'p': # varsToSkip = 0 # elif grid_loc == 'q': # varsToSkip = 2 # elif grid_loc == 'u': # varsToSkip = 4 # elif grid_loc == 'v': # varsToSkip = 6 f.seek(i * 2 * varByteLength) lon = np.ascontiguousarray( np.cast["float"](np.fromfile(f, ">f4", rows * cols)).reshape( (cols, rows) ) ) f.seek((i * 2 + 1) * varByteLength) lat = np.ascontiguousarray( np.cast["float"](np.fromfile(f, ">f4", rows * cols)).reshape( (cols, rows) ) ) # I was told by Alan Wallcraft that the grid coordinates are only # accurate to four decimal digits (Fortran REAL*4 precision). # Round the coordinates to four digits. lon = lon.round(4) lat = lat.round(4) xname = gl + "lon" yname = gl + "lat" # Verifications assert ( lon.min() == np.round(grid_specs[xname + "_min"], 4) and lon.max() == np.round(grid_specs[xname + "_max"], 4) and lat.min() == np.round(grid_specs[yname + "_min"], 4) and lat.max() == np.round(grid_specs[yname + "_max"], 4) ), "grid a and b files are inconsistant" if gl != "p": sn_suffix = "_at_{}_location".format(gl) ln_suffix = " at {} location".format(gl.upper()) else: sn_suffix = ln_suffix = "" if grid_loc: xname = "lon" yname = "lat" coords[xname] = xr.DataArray( lon, name=xname, dims=("jdm", "idm"), attrs=dict( long_name="Longitude" + ln_suffix, standard_name="longitude" + sn_suffix, units="degree_east", ), ) coords[yname] = xr.DataArray( lat, name=yname, dims=("jdm", "idm"), attrs=dict( long_name="Latitude" + ln_suffix, standard_name="latitude" + sn_suffix, units="degree_north", ), ) # Create a dataset with coords only ds = xr.Dataset(coords=coords) ds.attrs.update(grid_specs) return ds
[docs]def read_regional_depth(depth_a_file, lon, lat): """Read the regional depth binary file Parameters ---------- depth_a_file: str Binary bathymetry file lon: xarray.DataArray Longitudes 2D array lat: xarray.DataArray Longitudes 2D array Return ------ xarray.DataArray Bathymetry """ grid_a_file, _ = get_ab_file_names(depth_a_file) jdm, idm = lon.shape file = open(depth_a_file, mode="rb") field = np.reshape( np.fromfile(file, dtype="float32", count=idm * jdm).byteswap(), (jdm, idm), ) file.close() field[field > 2 ** 99] = np.nan return xr.DataArray( field, name="bathymetry", dims=lon.dims, coords={"plon": lon, "plat": lat}, attrs=dict( long_name="Bathymetry", units="m", standard_name="model_sea_floor_" "depth_below_sea_level", ), )
[docs]def write_res(ds, outfile_pattern="{field}.res"): """Write a Dataset to an unformatted fortran files One file per variable. Parameters ---------- ds: xarray.Dataset outfile_pattern: str File pattern that must contain ``{field}`` Return ------ dict Dict of lists of written files, where keys are fields """ logger = logging.getLogger(__name__) cfs = get_cf_specs() # Coordinates lon = cfs.coords.get(ds, "lon").values.astype(">f4") lat = cfs.coords.get(ds, "lat").values.astype(">f4") if lon.ndim == 1: lon, lat = np.meshgrid(lon, lat) # lon = np.asfortranarray(lon.T) # lat = np.asfortranarray(lat.T) depth = cfs.get(ds, "depth").values.astype(">f4") time = ( (cfs.coords.get(ds, "time").values - np.datetime64("1950-01-01")) / np.timedelta64(1, "D") ).astype(">f4") # Loop on time outfiles = {} for cf_var_name in ["temp", "sal", "ssh"]: var = cfs.get(ds, cf_var_name).fillna(-1e34) field = cfs.data_vars.get_name(cf_var_name, specialize=True) shape = list(var.shape[::-1]) if var.ndim == 3: shape.insert(2, 1) dep = depth[0] else: dep = depth shape = np.array(shape, ">i4") outfile = outfile_pattern.format(**locals()) with sio.FortranFile(outfile, "w", ">u4") as f: f.write_record(shape) f.write_record(lon) f.write_record(lat) f.write_record(dep) f.write_record(time) f.write_record(var.values.astype(">f4")) outfiles[field] = outfile logger.debug("Created .res file: " + outfile) return outfiles
[docs]def nc_to_res( ncfiles, outfile_pattern="{field}.res{ifile:03d}", preproc=None ): """Convert netcdf files to .res files There is one .res file per variable and per netcdf file. Parameters ---------- ncfiles: list List of netcdf files outfile_pattern: str Pattern for .res file names. `{field}` refers to the Hycom variable name and `{ifile}` refers to the file index starting from 1. preproc: callable Callable that operates on the input datasets after reading it Return ------ dict List of output files """ resfiles = {} for ifile, ncfile in enumerate(ncfiles): ds = xr.open_dataset(ncfile) if preproc: ds = preproc(ds) field = "{field}" outfile_pattern = outfile_pattern.format(**locals()) for vname, resfile in write_res(ds, outfile_pattern).items(): resfiles.setdefault(vname, []).append(resfile) ds.close() return resfiles
[docs]def check_grid_dimensions(ddims, ds): """Check the consistency of grid dimensions between config and grid file Parameters ---------- conf: dict or vortex config Results from :func:`get_dimensions_dict_from_conf` ds: xarray.Dataset or str Result from :func:`read_regional_grid` """ # # From conf # if not isinstance(ddims, dict): # ddims = get_dimensions_dict_from_conf(ddims) # From file if isinstance(ds, str): ds = read_regional_grid(ds) # Check if "itdm0" not in ddims: ddims["itdm0"] = ds.plon.shape[1] if "jtdm0" not in ddims: ddims["jtdm0"] = ds.plon.shape[0] if ( int(ddims["itdm0"]) != ds.plon.shape[1] or int(ddims["jtdm0"]) != ds.plon.shape[0] ): raise SloopHycomError( "Dimensions inconsistency between " "config ({}, {}) " "and regional_grid file ({}, {}) ".format( ddims["itdm0"], ddims["jtdm0"], ds.plon.shape[1], ds.plon.shape[0], ) )
[docs]def setup_stmt_fns(sigma, context): """Place the stmt_fns.h file depending on the context Parameters ---------- sigma: str, int Sigma value like 0 or 2 context: {'ibc', 'model3d'} """ sigma = str(sigma) if sigma not in HYCOM3D_SIGMA_TO_STMT_FNS: raise SloopHycomError( "Invalid sigma value. Please choose one of: " + " ".join(HYCOM3D_SIGMA_TO_STMT_FNS) ) if context not in HYCOM3D_CONTEXT_TO_STMT_FNS: raise SloopHycomError( "Invalid context name. Please choose one of: " + " ".join(HYCOM3D_CONTEXT_TO_STMT_FNS) ) stmt_fns_in = HYCOM3D_SIGMA_TO_STMT_FNS[sigma] stmt_fns_out = HYCOM3D_CONTEXT_TO_STMT_FNS[context] if os.path.exists(stmt_fns_out): os.remove(stmt_fns_out) shutil.copy(stmt_fns_in, stmt_fns_out)
[docs]def setup_gen_inicon(vconf): """Place the gen_inicon_hycom.F file depending on the vconf Parameters ---------- vconf: str Vconf value like manga, med, indien, gijon """ logger = logging.getLogger(__name__) gen_inicon_out = HYCOM3D_IBC_GEN_INICON if vconf in HYCOM3D_IBC_GEN_INICON_VCONF: gen_inicon_in = HYCOM3D_IBC_GEN_INICON+f".{vconf}" else: gen_inicon_in = HYCOM3D_IBC_GEN_INICON+".generic" logger.info(f"{gen_inicon_in} will be used as {gen_inicon_out}") if os.path.exists(gen_inicon_out): os.remove(gen_inicon_out) shutil.copy(gen_inicon_in, gen_inicon_out)
[docs]def setup_defstretch(vconf): """Place the defstretch.f/F file depending on the vconf Parameters ---------- vconf: Str Vconf value like manga, med, indien, gijon """ logger = logging.getLogger(__name__) if vconf not in HYCOM3D_MODEL_DEFSTRETCH_FILES.keys(): vconf = "generic" defstretch_in = HYCOM3D_MODEL_DEFSTRETCH_FILES[vconf] defstretch_out = defstretch_in.replace(f".{vconf}", "") logger.info(f"{defstretch_in} will be used as {defstretch_out}") defstretch_path = os.path.dirname(defstretch_out) for fmt in [".F", ".f"]: defstretch_file = os.path.join(defstretch_path, "defstretch"+fmt) if os.path.exists(defstretch_file): os.remove(defstretch_file) shutil.copy(defstretch_in, defstretch_out)
RE_SCALAR_DECLARATION_MATCH = re.compile( r"^\s*(\S+)\s*'(\w+)\s*'\s+= (.+)$" ).match
[docs]def parse_scalar_declaration(line): """Parse a string that declare a scalar Format: `` 720 'idm ' = longitudinal array size`` Parameters ---------- line: str Return ------ tuple ``(name, value, description)`` where ``value`` is converted to its numerical form with eval. """ groups = RE_SCALAR_DECLARATION_MATCH(line).groups() return groups[1], eval(groups[0]), groups[2]
[docs]def read_blkdat_input(blkdat_input_file): """Read and parse the blkdat.input file Parameters ---------- blkdat_input_file: str Return ------ xarray.Dataset All scalars are stored to a 0D array with a long_name attribute. Sigma value is stored as an 1D array of dim "lev" = kdm. """ # Parse the file and get a dict import xarray as xr content = {} sigmas = [] header = "" with open(blkdat_input_file) as f: for i in range(3): header += f.readline()[:-1] f.readline() for line in f: if line: name, value, long_name = parse_scalar_declaration(line[:-1]) if name == "sigma": sigmas.append(value) else: content[name] = xr.DataArray( value, name=name, attrs={"long_name": long_name} ) # Sigma as array nlayer = len(sigmas) content["sigma"] = xr.DataArray( sigmas, coords=[ ("lev", np.arange(1, nlayer + 1), {"long_name": "Layer level"}) ], ) return xr.Dataset(content, attrs={"header": header})
[docs]def format_ds(ds): """Format variable in a dataset or dataarray to be hycom compliant""" # Time units time = get_cf_specs().coords.get(ds, "time") if time is not None: ds[time.name].encoding["units"] = "days since 1950-01-01" # Other encodings for vname, var in ds.items(): for attr in "dtype", "scale_factor", "add_offset": if attr in ds[vname].encoding: del ds[vname].encoding[attr] if ds[vname].dtype.kind in "fd": ds[vname].encoding["_FillValue"] = -1e34 return ds
[docs]def bin2hycom( resfile, nx, ny, nz, resdates, positive, fileab, header, field ): """Convert .res files to .a,.b files This does the job of program `bin2hycom` Parameters ---------- resfile: str nx: int ny: int nz: int resdates: array_like fileab: str header: str field: str positive: bool """ # Inits n2drec = int(((nx * ny + 4095) // 4096) * 4096) field1d = np.zeros(n2drec, dtype=">f4") filea, fileb = get_ab_file_names(fileab) # Read dates with open(resdates, "rb") as fd: nt = int(np.fromfile(fd, ">i4", 1)) fd.seek(4) dates = np.empty(nt, dtype=">f4") for it in range(nt): fd.seek(4*(it+1)) dates[it] = np.fromfile(fd, ">f4", 1) # Write header fb = open(fileb, "w") for line in header, "", "", "", f"i/jdm = {nx} {ny}": fb.write("{:<79}\n".format(line)) # Loop on .res file records with open(resfile, "rb") as fr, open(filea, "wb") as fa: for it, date in enumerate(dates): for iz in range(nz): irec = it * nz + iz # Read .res fr.seek(irec * nx * ny * 8) field2d = np.fromfile(fr, ">f8", nx * ny) # Write to .a field1d[: nx * ny] = field2d.ravel() if positive: field1d[field1d < 0] = 0 fa.seek(irec * n2drec * 4) field1d.tofile(fa) # Write to .b vmin = np.nanmin(field2d) vmax = np.nanmax(field2d) # (a21,1x,E18.8,1x,E18.8,1x,E18.8) fb.write( f"{field:>7}: date,range = " f"{date:18e} {vmin:18e} {vmax:18e}\n" )
[docs]def run_bin2hycom( nx, ny, nz, inpat="{field}.res", resdates="time_values.res", outpat="{short_name}-nest", ): """Run the :func:`binhycom` function with the default file names Specifications are provided by the :data:`BYN2HYCOM_SPECS` content. Parameters ---------- inpat: str Input .res file name pattern with "{field}" pattern inside. resdates: str Input .res file that contains dates. outpat: str Output .a and .b file name patterns with "{short_name} pattern inside and without extension. Return ------ list The effective list of output files. """ outfiles = [] for field, specs in BIN2HYCOM_SPECS.items(): resfile = inpat.format(**locals()) short_name = field[:1] fileab = outpat.format(**locals()) bin2hycom( resfile, nx, ny, nz, resdates, specs["positive"], fileab, specs["header"], field, ) outfiles.append(fileab + ".a") outfiles.append(fileab + ".b") return outfiles
[docs]def rest_head(date, outfile="rest_new_head", xcycles=1): """Write a binary date file for restart Parameters ========== date: A pandas compatible date that is rounded to midnight. It is typically at J-1. outfile: str Output file xcycles: int 1 for restart from an external source else > 1 """ date = pd.to_datetime(date).floor("D") date = (date - pd.to_datetime("1950-01-01")) / pd.to_timedelta("1D") with open(outfile, "wb") as f: np.array([xcycles, date], dtype=">f8").tofile(f)
[docs]def read_patch_input(patch_input): """Read the patch.input MPI tiling file Parameters ---------- patch_input: str Return ------ xarray.Dataset 2D arrays contains the start and end indices along i and 1D arrays contains the start and end indices along j. Other numbers are stored as global attributes. """ with open(patch_input) as f: # Read sizes names = f.readline()[:-1].split() numbers = [int(n) for n in f.readline()[:-1].split()] sizes = dict(zip(names, numbers)) print(sizes) # Init arrays ispt = xr.DataArray( np.zeros((sizes["mpe"], sizes["npe"]), dtype='i'), dims=("mpe", "npe"), attrs={"long_name": "i index start"}) iipe = ispt.copy() iipe.attrs.update(long_name="i index end") jspt = xr.DataArray( np.zeros(sizes["mpe"], dtype='i'), dims="mpe", attrs={"long_name": "j index start"}) jjpe = jspt.copy() jjpe.attrs.update(long_name="j index end") # Read i indices f.readline() for j in range(sizes["mpe"]): ispt[j] = f.readline()[11:-1].split() iipe[j] = f.readline()[11:-1].split() # Read j indices f.readline() jspt[:] = f.readline()[11:-1].split() jjpe[:] = f.readline()[11:-1].split() # Create dataset ds = xr.Dataset( {"ispt": ispt, "iipe": iipe, "jspt": jspt, "jjpe": jjpe}) for name in set(names).difference({"npe", "mpe"}): ds.attrs[name] = sizes[name] return ds
[docs]def hycom2nc(nx, ny, nz, grid_file, hycom_file, outpat="{field}-nest.nc", ): """func hycomnc function Parameters ---------- outpat: str Output nc file name Return ------ str The output file """ ds = read_regional_grid(grid_file, grid_loc='p') depth = np.arange(0, nz, 1) filea, fileb = get_ab_file_names(hycom_file) with open(fileb, 'r') as fb: series = fb.readlines()[5:] time = [] for itime, iseries in enumerate(np.arange(0, len(series), nz)): matches = re.search(r"\s*(?P<field>\w*)\W\s*date,range\s=\s*(?P<time>\d.\d*\w\W\d*)\s*(?P<min>\d.\d*\w\W\d*)\s*(?P<max>\d.\d*\w\W\d*)", series[iseries], re.DOTALL) time.append(convert_from_julian_day(float(matches['time'])*86400, unit='s')) field = matches['field'] field_values = np.zeros((len(time), nz, ny, nx)) fa = open(filea, 'r') n2drec = int(((nx * ny + 4095) // 4096) * 4096) for it in range(len(time)): for iz in range(len(depth)): irec = it * nz + iz fa.seek(irec * n2drec * 4) field_values[it, iz, :, :] = np.reshape(np.fromfile(fa, ">f4", nx * ny), (ny,nx)) fa.close() da = xr.DataArray(field_values, name=field, dims=('time', 'lev', 'y', 'x'), ) da = da.where(da!=0) da.encoding["_FillValue"] = -1.e34 ds = xr.merge([ds, da]) ds = ds.assign_coords(time=time, depth=depth) ds = format_ds(ds) ncout = outpat.format(**locals()) ds.to_netcdf(ncout) return ncout
[docs]def nc2hycom(varname, nc_in, header): varname = varname[0] if len(varname)>1 else varname ds = xr.open_dataset(nc_in) ds = ds.fillna(0) nt, nz, ny, nx = np.shape(ds[varname]) n2drec = int(((nx * ny + 4095) // 4096) * 4096) field1d = np.zeros(n2drec, dtype=">f4") filea, fileb = get_ab_file_names(nc_in.split('.')[0]) # Write header fb = open(fileb, "w") for line in header, "", "", "", f"i/jdm = {nx} {ny}": fb.write("{:<79}\n".format(line)) # Loop on .res file records with open(filea, "wb") as fa: for it, date in enumerate(ds.time.values): for iz in range(nz): irec = it * nz + iz field1d[: nx * ny] = ds[varname][it, iz, :, :].values.ravel() fa.seek(irec * n2drec * 4) field1d.tofile(fa) date = convert_to_julian_day(ds.time.values[it]) vmin = ds[varname][it, iz, :, :].values.min() vmax = ds[varname][it, iz, :, :].values.max() fb.write( f"{varname:>7}: date,range = " f"{date:18e} {vmin:18e} {vmax:18e}\n" ) fb.close()