Source code for sloop.models.hycom3d.io

"""
In/output routines

Source for .a/.b handling: http://code.env.duke.edu/projects/mget/wiki/HYCOM

"""

import re
import math
import logging
import os
import shutil

import numpy as np
from ...log import LoggingLevelContext

with LoggingLevelContext(logging.WARNING):
    import xarray as xr
    import pandas as pd
import scipy.io as sio

from .__init__ import (
    get_cf_specs,
    SloopHycomError,
    HYCOM3D_SIGMA_TO_STMT_FNS,
    HYCOM3D_CONTEXT_TO_STMT_FNS,
    HYCOM3D_IBC_GEN_INICON,
    HYCOM3D_IBC_GEN_INICON_VCONF,
    HYCOM3D_MODEL_DEFSTRETCH_FILES
)
from sloop.times import convert_from_julian_day, convert_to_julian_day

BIN2HYCOM_SPECS = {
    "h": {
        "header": "epaisseurs de couches deduites des donnees Mercator",
        "positive": True,
    },
    "temp": {
        "header": "temperatures deduites des donnees Mercator",
        "positive": False,
    },
    "saln": {
        "header": "salinites deduites des donnees Mercator",
        "positive": False,
    },
    "u": {
        "header": "vitesses zonales deduites des donnees Mercator",
        "positive": False,
    },
    "v": {
        "header": "vitesses meridiennes deduites des donnees Mercator",
        "positive": False,
    },
}

[docs]def get_ab_file_names(path):
    """Get .a and .b file name from a single path

    Parameters
    ----------
    path: str
        Root, .a file or .b file

    Return
    ------
    str: afile
    str: bfile

    """
    m = re.match(r"(?P<root>.+)\.[ab]$", path)
    if m is not None:
        root = m.group(1)
    else:
        root = path
        if root.endswith("."):
            root = root[:-1]
    afile = root + ".a"
    bfile = root + ".b"
    return afile, bfile


[docs]def read_regional_grid_b(grid_b_file):
    """Read the regional.grid.b file

    Parameters
    ----------
    grid_b_file: str

    Return
    ------
    dict
    """
    _, grid_b_file = get_ab_file_names(grid_b_file)

    grid_specs = {}
    with open(grid_b_file, "rU") as f:
        grid_specs["idm"] = int(f.readline().split()[0])  # rows
        grid_specs["jdm"] = int(f.readline().split()[0])  # cols
        grid_specs["mapflg"] = int(f.readline().split()[0])
        for line in f:
            if line:
                sline = line[:-1].split()
                key = sline[0][:-1]
                grid_specs[key + "_min"] = float(sline[-2])
                grid_specs[key + "_max"] = float(sline[-1])

    return grid_specs


[docs]def read_regional_grid(grid_file, grid_loc=None):
    """Read the coordinates stored in a regional grid

    Parameters
    ----------
    grid_file: str
        Grid .a or .b file name
    grid_loc: None or str
        Grid to read as one or several of "p", "q", "u" or "v"

    Return
    ------
    xarray.Dataset
        Dataset that contains the coordinates
    """

    allowed_grid_locs = "pquv"

    grid_a_file, grid_b_file = get_ab_file_names(grid_file)

    grid_specs = read_regional_grid_b(grid_b_file)
    rows = grid_specs["idm"]
    cols = grid_specs["jdm"]

    # Compute the number of bytes of a variable stored in a HYCOM .a
    # file for a grid of this size.

    varByteLength = int(math.ceil(float(cols * rows * 4) / 16384) * 16384)

    # Read the longitude and latitude grids as numpy arrays.

    coords = {}

    with open(grid_a_file, "rb") as f:

        for i, gl in enumerate(allowed_grid_locs):

            if grid_loc is not None and gl not in grid_loc:
                continue

            #            if grid_loc == 'p':
            #                varsToSkip = 0
            #            elif grid_loc == 'q':
            #                varsToSkip = 2
            #            elif grid_loc == 'u':
            #                varsToSkip = 4
            #            elif grid_loc == 'v':
            #                varsToSkip = 6

            f.seek(i * 2 * varByteLength)
            lon = np.ascontiguousarray(
                np.cast["float"](np.fromfile(f, ">f4", rows * cols)).reshape(
                    (cols, rows)
                )
            )
            f.seek((i * 2 + 1) * varByteLength)
            lat = np.ascontiguousarray(
                np.cast["float"](np.fromfile(f, ">f4", rows * cols)).reshape(
                    (cols, rows)
                )
            )

            # I was told by Alan Wallcraft that the grid coordinates are only
            # accurate to four decimal digits (Fortran REAL*4 precision).
            # Round the coordinates to four digits.

            lon = lon.round(4)
            lat = lat.round(4)

            xname = gl + "lon"
            yname = gl + "lat"

            # Verifications
            assert (
                lon.min() == np.round(grid_specs[xname + "_min"], 4)
                and lon.max() == np.round(grid_specs[xname + "_max"], 4)
                and lat.min() == np.round(grid_specs[yname + "_min"], 4)
                and lat.max() == np.round(grid_specs[yname + "_max"], 4)
            ), "grid a and b files are inconsistant"

            if gl != "p":
                sn_suffix = "_at_{}_location".format(gl)
                ln_suffix = " at {} location".format(gl.upper())
            else:
                sn_suffix = ln_suffix = ""

            if grid_loc:
                xname = "lon"
                yname = "lat"

            coords[xname] = xr.DataArray(
                lon,
                name=xname,
                dims=("jdm", "idm"),
                attrs=dict(
                    long_name="Longitude" + ln_suffix,
                    standard_name="longitude" + sn_suffix,
                    units="degree_east",
                ),
            )
            coords[yname] = xr.DataArray(
                lat,
                name=yname,
                dims=("jdm", "idm"),
                attrs=dict(
                    long_name="Latitude" + ln_suffix,
                    standard_name="latitude" + sn_suffix,
                    units="degree_north",
                ),
            )

    # Create a dataset with coords only
    ds = xr.Dataset(coords=coords)
    ds.attrs.update(grid_specs)
    return ds


[docs]def read_regional_depth(depth_a_file, lon, lat):
    """Read the regional depth binary file

    Parameters
    ----------
    depth_a_file: str
        Binary bathymetry file
    lon: xarray.DataArray
        Longitudes 2D array
    lat: xarray.DataArray
        Longitudes 2D array

    Return
    ------
    xarray.DataArray
        Bathymetry
    """
    grid_a_file, _ = get_ab_file_names(depth_a_file)

    jdm, idm = lon.shape
    file = open(depth_a_file, mode="rb")
    field = np.reshape(
        np.fromfile(file, dtype="float32", count=idm * jdm).byteswap(),
        (jdm, idm),
    )
    file.close()
    field[field > 2 ** 99] = np.nan

    return xr.DataArray(
        field,
        name="bathymetry",
        dims=lon.dims,
        coords={"plon": lon, "plat": lat},
        attrs=dict(
            long_name="Bathymetry",
            units="m",
            standard_name="model_sea_floor_" "depth_below_sea_level",
        ),
    )


[docs]def write_res(ds, outfile_pattern="{field}.res"):
    """Write a Dataset to an unformatted fortran files

    One file per variable.

    Parameters
    ----------
    ds: xarray.Dataset
    outfile_pattern: str
        File pattern that must contain ``{field}``

    Return
    ------
    dict
        Dict of lists of written files, where keys are fields

    """
    logger = logging.getLogger(__name__)
    cfs = get_cf_specs()

    # Coordinates
    lon = cfs.coords.get(ds, "lon").values.astype(">f4")
    lat = cfs.coords.get(ds, "lat").values.astype(">f4")
    if lon.ndim == 1:
        lon, lat = np.meshgrid(lon, lat)
    # lon = np.asfortranarray(lon.T)
    # lat = np.asfortranarray(lat.T)
    depth = cfs.get(ds, "depth").values.astype(">f4")
    time = (
        (cfs.coords.get(ds, "time").values - np.datetime64("1950-01-01"))
        / np.timedelta64(1, "D")
    ).astype(">f4")

    # Loop on time
    outfiles = {}

    for cf_var_name in ["temp", "sal", "ssh"]:

        var = cfs.get(ds, cf_var_name).fillna(-1e34)
        field = cfs.data_vars.get_name(cf_var_name, specialize=True)
        shape = list(var.shape[::-1])
        if var.ndim == 3:
            shape.insert(2, 1)
            dep = depth[0]
        else:
            dep = depth
        shape = np.array(shape, ">i4")

        outfile = outfile_pattern.format(**locals())
        with sio.FortranFile(outfile, "w", ">u4") as f:
            f.write_record(shape)
            f.write_record(lon)
            f.write_record(lat)
            f.write_record(dep)
            f.write_record(time)
            f.write_record(var.values.astype(">f4"))
        outfiles[field] = outfile
        logger.debug("Created .res file: " + outfile)

    return outfiles


[docs]def nc_to_res(
    ncfiles, outfile_pattern="{field}.res{ifile:03d}", preproc=None
):
    """Convert netcdf files to .res files

    There is one .res file per variable and per netcdf file.

    Parameters
    ----------
    ncfiles: list
        List of netcdf files
    outfile_pattern: str
        Pattern for .res file names. `{field}` refers to the
        Hycom variable name and `{ifile}` refers to the file index
        starting from 1.
    preproc: callable
        Callable that operates on the input datasets after reading it

    Return
    ------
    dict
        List of output files
    """
    resfiles = {}
    for ifile, ncfile in enumerate(ncfiles):
        ds = xr.open_dataset(ncfile)
        if preproc:
            ds = preproc(ds)
        field = "{field}"
        outfile_pattern = outfile_pattern.format(**locals())
        for vname, resfile in write_res(ds, outfile_pattern).items():
            resfiles.setdefault(vname, []).append(resfile)
        ds.close()
    return resfiles


[docs]def check_grid_dimensions(ddims, ds):
    """Check the consistency of grid dimensions between config and grid file

    Parameters
    ----------
    conf: dict or vortex config
        Results from :func:`get_dimensions_dict_from_conf`
    ds: xarray.Dataset or str
        Result from :func:`read_regional_grid`
    """

    # # From conf
    # if not isinstance(ddims, dict):
    #     ddims = get_dimensions_dict_from_conf(ddims)

    # From file
    if isinstance(ds, str):
        ds = read_regional_grid(ds)

    # Check
    if "itdm0" not in ddims:
        ddims["itdm0"] = ds.plon.shape[1]
    if "jtdm0" not in ddims:
        ddims["jtdm0"] = ds.plon.shape[0]
    if (
        int(ddims["itdm0"]) != ds.plon.shape[1]
        or int(ddims["jtdm0"]) != ds.plon.shape[0]
    ):
        raise SloopHycomError(
            "Dimensions inconsistency between "
            "config ({}, {}) "
            "and regional_grid file ({}, {}) ".format(
                ddims["itdm0"],
                ddims["jtdm0"],
                ds.plon.shape[1],
                ds.plon.shape[0],
            )
        )


[docs]def setup_stmt_fns(sigma, context):
    """Place the stmt_fns.h file depending on the context

    Parameters
    ----------
    sigma: str, int
        Sigma value like 0 or 2
    context: {'ibc', 'model3d'}
    """
    sigma = str(sigma)
    if sigma not in HYCOM3D_SIGMA_TO_STMT_FNS:
        raise SloopHycomError(
            "Invalid sigma value. Please choose one of: "
            + " ".join(HYCOM3D_SIGMA_TO_STMT_FNS)
        )
    if context not in HYCOM3D_CONTEXT_TO_STMT_FNS:
        raise SloopHycomError(
            "Invalid context name. Please choose one of: "
            + " ".join(HYCOM3D_CONTEXT_TO_STMT_FNS)
        )

    stmt_fns_in = HYCOM3D_SIGMA_TO_STMT_FNS[sigma]
    stmt_fns_out = HYCOM3D_CONTEXT_TO_STMT_FNS[context]

    if os.path.exists(stmt_fns_out):
        os.remove(stmt_fns_out)
    shutil.copy(stmt_fns_in, stmt_fns_out)


[docs]def setup_gen_inicon(vconf):
    """Place the gen_inicon_hycom.F file depending on the vconf
    
    Parameters
    ----------
    vconf: str
        Vconf value like manga, med, indien, gijon
    """
    logger = logging.getLogger(__name__)
    gen_inicon_out = HYCOM3D_IBC_GEN_INICON
    if vconf in HYCOM3D_IBC_GEN_INICON_VCONF:
        gen_inicon_in = HYCOM3D_IBC_GEN_INICON+f".{vconf}"
    else:
        gen_inicon_in = HYCOM3D_IBC_GEN_INICON+".generic"
    logger.info(f"{gen_inicon_in} will be used as {gen_inicon_out}")
    if os.path.exists(gen_inicon_out):
        os.remove(gen_inicon_out)
    shutil.copy(gen_inicon_in, gen_inicon_out)
    
    
[docs]def setup_defstretch(vconf):
    """Place the defstretch.f/F file depending on the vconf
    
    Parameters
    ----------
    vconf: Str
        Vconf value like manga, med, indien, gijon
    """
    logger = logging.getLogger(__name__)
    if vconf not in HYCOM3D_MODEL_DEFSTRETCH_FILES.keys():
        vconf = "generic"
    defstretch_in = HYCOM3D_MODEL_DEFSTRETCH_FILES[vconf]
    defstretch_out = defstretch_in.replace(f".{vconf}", "")
    logger.info(f"{defstretch_in} will be used as {defstretch_out}")
    defstretch_path = os.path.dirname(defstretch_out)
    for fmt in [".F", ".f"]:
        defstretch_file = os.path.join(defstretch_path,
                                       "defstretch"+fmt)
        if os.path.exists(defstretch_file):
            os.remove(defstretch_file)
    shutil.copy(defstretch_in, defstretch_out)
    
    
RE_SCALAR_DECLARATION_MATCH = re.compile(
    r"^\s*(\S+)\s*'(\w+)\s*'\s+= (.+)$"
).match


[docs]def parse_scalar_declaration(line):
    """Parse a string that declare a scalar

    Format: ``  720      'idm   ' = longitudinal array size``

    Parameters
    ----------
    line: str

    Return
    ------
    tuple
        ``(name, value, description)`` where ``value`` is converted to
        its numerical form with eval.
    """
    groups = RE_SCALAR_DECLARATION_MATCH(line).groups()
    return groups[1], eval(groups[0]), groups[2]


[docs]def read_blkdat_input(blkdat_input_file):
    """Read and parse the blkdat.input file

    Parameters
    ----------
    blkdat_input_file: str

    Return
    ------
    xarray.Dataset
        All scalars are stored to a 0D array with a long_name attribute.
        Sigma value is stored as an 1D array of dim "lev" = kdm.
    """

    # Parse the file and get a dict
    import xarray as xr

    content = {}
    sigmas = []
    header = ""
    with open(blkdat_input_file) as f:
        for i in range(3):
            header += f.readline()[:-1]
        f.readline()
        for line in f:
            if line:
                name, value, long_name = parse_scalar_declaration(line[:-1])
                if name == "sigma":
                    sigmas.append(value)
                else:
                    content[name] = xr.DataArray(
                        value, name=name, attrs={"long_name": long_name}
                    )

    # Sigma as array
    nlayer = len(sigmas)
    content["sigma"] = xr.DataArray(
        sigmas,
        coords=[
            ("lev", np.arange(1, nlayer + 1), {"long_name": "Layer level"})
        ],
    )

    return xr.Dataset(content, attrs={"header": header})


[docs]def format_ds(ds):
    """Format variable in a dataset or dataarray to be hycom compliant"""
    # Time units
    time = get_cf_specs().coords.get(ds, "time")
    if time is not None:
        ds[time.name].encoding["units"] = "days since 1950-01-01"

    # Other encodings
    for vname, var in ds.items():
        for attr in "dtype", "scale_factor", "add_offset":
            if attr in ds[vname].encoding:
                del ds[vname].encoding[attr]
        if ds[vname].dtype.kind in "fd":
            ds[vname].encoding["_FillValue"] = -1e34

    return ds


[docs]def bin2hycom(
    resfile, nx, ny, nz, resdates, positive, fileab, header, field
):
    """Convert .res files to .a,.b files

    This does the job of program `bin2hycom`

    Parameters
    ----------
    resfile: str
    nx: int
    ny: int
    nz: int
    resdates: array_like
    fileab: str
    header: str
    field: str
    positive: bool
    """
    # Inits
    n2drec = int(((nx * ny + 4095) // 4096) * 4096)
    field1d = np.zeros(n2drec, dtype=">f4")
    filea, fileb = get_ab_file_names(fileab)

    # Read dates
    with open(resdates, "rb") as fd:
        nt = int(np.fromfile(fd, ">i4", 1))
        fd.seek(4)
        dates = np.empty(nt, dtype=">f4")
        for it in range(nt):
            fd.seek(4*(it+1))
            dates[it] = np.fromfile(fd, ">f4", 1)

    # Write header
    fb = open(fileb, "w")
    for line in header, "", "", "", f"i/jdm =  {nx} {ny}":
        fb.write("{:<79}\n".format(line))

    # Loop on .res file records
    with open(resfile, "rb") as fr, open(filea, "wb") as fa:
        for it, date in enumerate(dates):
            for iz in range(nz):
                irec = it * nz + iz

                # Read .res
                fr.seek(irec * nx * ny * 8)
                field2d = np.fromfile(fr, ">f8", nx * ny)

                # Write to .a
                field1d[: nx * ny] = field2d.ravel()
                if positive:
                    field1d[field1d < 0] = 0
                fa.seek(irec * n2drec * 4)
                field1d.tofile(fa)

                # Write to .b
                vmin = np.nanmin(field2d)
                vmax = np.nanmax(field2d)
                # (a21,1x,E18.8,1x,E18.8,1x,E18.8)
                fb.write(
                    f"{field:>7}: date,range = "
                    f"{date:18e} {vmin:18e} {vmax:18e}\n"
                )


[docs]def run_bin2hycom(
    nx,
    ny,
    nz,
    inpat="{field}.res",
    resdates="time_values.res",
    outpat="{short_name}-nest",
):
    """Run the :func:`binhycom` function with the default file names

    Specifications are provided by the :data:`BYN2HYCOM_SPECS` content.

    Parameters
    ----------
    inpat: str
        Input .res file name pattern with "{field}" pattern inside.
    resdates: str
        Input .res file that contains dates.
    outpat: str
        Output .a and .b file name patterns with "{short_name}
        pattern inside and without extension.

    Return
    ------
    list
        The effective list of output files.
    """
    outfiles = []
    for field, specs in BIN2HYCOM_SPECS.items():
        resfile = inpat.format(**locals())
        short_name = field[:1]
        fileab = outpat.format(**locals())
        bin2hycom(
            resfile,
            nx,
            ny,
            nz,
            resdates,
            specs["positive"],
            fileab,
            specs["header"],
            field,
        )

        outfiles.append(fileab + ".a")
        outfiles.append(fileab + ".b")
    return outfiles


[docs]def rest_head(date, outfile="rest_new_head", xcycles=1):
    """Write a binary date file for restart

    Parameters
    ==========
    date:
        A pandas compatible date that is rounded to midnight.
        It is typically at J-1.
    outfile: str
        Output file
    xcycles: int
        1 for restart from an external source else > 1
    """
    date = pd.to_datetime(date).floor("D")
    date = (date - pd.to_datetime("1950-01-01")) / pd.to_timedelta("1D")
    with open(outfile, "wb") as f:
        np.array([xcycles, date], dtype=">f8").tofile(f)



[docs]def read_patch_input(patch_input):
    """Read the patch.input MPI tiling file

    Parameters
    ----------
    patch_input: str

    Return
    ------
    xarray.Dataset
        2D arrays contains the start and end indices along i
        and 1D arrays contains the start and end indices along j.
        Other numbers are stored as global attributes.
    """
    with open(patch_input) as f:

        # Read sizes
        names = f.readline()[:-1].split()
        numbers = [int(n) for n in f.readline()[:-1].split()]
        sizes = dict(zip(names, numbers))
        print(sizes)

        # Init arrays
        ispt = xr.DataArray(
            np.zeros((sizes["mpe"], sizes["npe"]), dtype='i'),
            dims=("mpe", "npe"),
            attrs={"long_name": "i index start"})
        iipe = ispt.copy()
        iipe.attrs.update(long_name="i index end")
        jspt = xr.DataArray(
            np.zeros(sizes["mpe"], dtype='i'), dims="mpe",
            attrs={"long_name": "j index start"})
        jjpe = jspt.copy()
        jjpe.attrs.update(long_name="j index end")

        # Read i indices
        f.readline()
        for j in range(sizes["mpe"]):
            ispt[j] = f.readline()[11:-1].split()
            iipe[j] = f.readline()[11:-1].split()

        # Read j indices
        f.readline()
        jspt[:] = f.readline()[11:-1].split()
        jjpe[:] = f.readline()[11:-1].split()

        # Create dataset
        ds = xr.Dataset(
            {"ispt": ispt, "iipe": iipe, "jspt": jspt, "jjpe": jjpe})
        for name in set(names).difference({"npe", "mpe"}):
            ds.attrs[name] = sizes[name]
        return ds


[docs]def hycom2nc(nx, ny, nz,
             grid_file,
             hycom_file,
             outpat="{field}-nest.nc",
    ):
    """func hycomnc function 
    
    Parameters
    ----------
    outpat: str
        Output nc file name

    Return
    ------
    str
        The output file
    """
     
    ds = read_regional_grid(grid_file, grid_loc='p')
    depth = np.arange(0, nz, 1)
    filea, fileb = get_ab_file_names(hycom_file)
    with open(fileb, 'r') as fb:
        series = fb.readlines()[5:]
    time = []
    for itime, iseries in enumerate(np.arange(0, len(series), nz)):
        matches  = re.search(r"\s*(?P<field>\w*)\W\s*date,range\s=\s*(?P<time>\d.\d*\w\W\d*)\s*(?P<min>\d.\d*\w\W\d*)\s*(?P<max>\d.\d*\w\W\d*)", 
                             series[iseries], re.DOTALL)
        time.append(convert_from_julian_day(float(matches['time'])*86400, unit='s'))
        field = matches['field']
    field_values = np.zeros((len(time), nz, ny, nx))
    fa = open(filea, 'r')
    n2drec = int(((nx * ny + 4095) // 4096) * 4096)
    for it in range(len(time)):
        for iz in range(len(depth)):
            irec = it * nz + iz
            fa.seek(irec * n2drec * 4)
            field_values[it, iz, :, :] = np.reshape(np.fromfile(fa, ">f4", nx * ny), (ny,nx))
    fa.close()
    da = xr.DataArray(field_values, name=field, 
                      dims=('time', 'lev', 'y', 'x'),
                      )
    da = da.where(da!=0)
    da.encoding["_FillValue"] = -1.e34
    ds = xr.merge([ds, da])
    ds = ds.assign_coords(time=time, depth=depth)
    ds = format_ds(ds) 
    ncout = outpat.format(**locals())
    ds.to_netcdf(ncout)
    return ncout

[docs]def nc2hycom(varname, nc_in, header):
    varname = varname[0] if len(varname)>1 else varname
    ds = xr.open_dataset(nc_in)
    ds = ds.fillna(0)
    nt, nz, ny, nx = np.shape(ds[varname])
    
    n2drec = int(((nx * ny + 4095) // 4096) * 4096)
    field1d = np.zeros(n2drec, dtype=">f4")
    filea, fileb = get_ab_file_names(nc_in.split('.')[0])

     # Write header
    fb = open(fileb, "w")
    for line in header, "", "", "", f"i/jdm =  {nx} {ny}":
        fb.write("{:<79}\n".format(line))

    # Loop on .res file records
    with open(filea, "wb") as fa:
        for it, date in enumerate(ds.time.values):
            for iz in range(nz):
                irec = it * nz + iz
                field1d[: nx * ny] = ds[varname][it, iz, :, :].values.ravel()
                fa.seek(irec * n2drec * 4)
                field1d.tofile(fa)
                date = convert_to_julian_day(ds.time.values[it]) 
                vmin = ds[varname][it, iz, :, :].values.min()
                vmax = ds[varname][it, iz, :, :].values.max()
                fb.write(
                    f"{varname:>7}: date,range = "
                    f"{date:18e} {vmin:18e} {vmax:18e}\n"
                )
    fb.close()