Source code for mule

# *****************************COPYRIGHT******************************
# (C) Crown copyright Met Office. All rights reserved.
# For further details please refer to the file LICENCE.txt
# which you should have received as part of this distribution.
# *****************************COPYRIGHT******************************
#
# This file is part of Mule.
#
# Mule is free software: you can redistribute it and/or modify it under
# the terms of the Modified BSD License, as published by the
# Open Source Initiative.
#
# Mule is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# Modified BSD License for more details.
#
# You should have received a copy of the Modified BSD License
# along with Mule.  If not, see <http://opensource.org/licenses/BSD-3-Clause>.

"""
This module provides a series of classes to allow interaction with various
file formats produced and used by the UM (Unified Model) system.

The top-level :class:`UMFile` class provides an object representing a generic
UM file of the fieldsfile-like type, as covered in document UMDP F03.
This enables any file of this general form to be handled.

In practice, most files will be of a specific known subtype and it is then
simpler and safer to use the appropriate subclass, :class:`~mule.ff.FieldsFile`
or :class:`~mule.lbc.LBCFile` :  These perform type-specific sanity checking,
and provide named attributes to access all of the header elements.

for example:

>>> ff = mule.FieldsFile.from_file(in_path)
>>> print 'model = ', ff.fixed_length_header.model_version
>>> ff.integer_constants.num_soil_levels = 0
>>> ff.fields = [fld for fld in ff.fields
...              if (fld.lbuser7 == 1 and fld.lbuser4 in (204, 207)
                     and 1990 <= fld.lbyr < 2000)]
>>> ff.to_file(out_path)

The more general :class:`UMFile` class is provided to handle files of other
types, and can also be used to correct or adjust files of recognised types that
are invalid because of unexpected or inconsistent header information.

"""

from __future__ import (absolute_import, division, print_function)

import os
import numpy as np
import numpy.ma
import weakref
import six
from contextlib import contextmanager
from mule.stashmaster import STASHmaster

__version__ = "2025.10.1"

# UM fixed length header names and positions
_UM_FIXED_LENGTH_HEADER = [
    ('data_set_format_version',            1),
    ('sub_model',                          2),
    ('vert_coord_type',                    3),
    ('horiz_grid_type',                    4),
    ('dataset_type',                       5),
    ('run_identifier',                     6),
    ('experiment_number',                  7),
    ('calendar',                           8),
    ('grid_staggering',                    9),
    ('time_type',                         10),
    ('projection_number',                 11),
    ('model_version',                     12),
    ('obs_file_type',                     14),
    ('last_fieldop_type',                 15),
    ('t1_year',                           21),
    ('t1_month',                          22),
    ('t1_day',                            23),
    ('t1_hour',                           24),
    ('t1_minute',                         25),
    ('t1_second',                         26),
    ('t1_year_day_number',                27),
    ('t2_year',                           28),
    ('t2_month',                          29),
    ('t2_day',                            30),
    ('t2_hour',                           31),
    ('t2_minute',                         32),
    ('t2_second',                         33),
    ('t2_year_day_number',                34),
    ('t3_year',                           35),
    ('t3_month',                          36),
    ('t3_day',                            37),
    ('t3_hour',                           38),
    ('t3_minute',                         39),
    ('t3_second',                         40),
    ('t3_year_day_number',                41),
    ('integer_constants_start',          100),
    ('integer_constants_length',         101),
    ('real_constants_start',             105),
    ('real_constants_length',            106),
    ('level_dependent_constants_start',  110),
    ('level_dependent_constants_dim1',   111),
    ('level_dependent_constants_dim2',   112),
    ('row_dependent_constants_start',    115),
    ('row_dependent_constants_dim1',     116),
    ('row_dependent_constants_dim2',     117),
    ('column_dependent_constants_start', 120),
    ('column_dependent_constants_dim1',  121),
    ('column_dependent_constants_dim2',  122),
    ('additional_parameters_start',      125),
    ('additional_parameters_dim1',       126),
    ('additional_parameters_dim2',       127),
    ('extra_constants_start',            130),
    ('extra_constants_length',           131),
    ('temp_historyfile_start',           135),
    ('temp_historyfile_length',          136),
    ('compressed_field_index1_start',    140),
    ('compressed_field_index1_length',   141),
    ('compressed_field_index2_start',    142),
    ('compressed_field_index2_length',   143),
    ('compressed_field_index3_start',    144),
    ('compressed_field_index3_length',   145),
    ('lookup_start',                     150),
    ('lookup_dim1',                      151),
    ('lookup_dim2',                      152),
    ('total_prognostic_fields',          153),
    ('data_start',                       160),
    ('data_dim1',                        161),
    ('data_dim2',                        162),
    ]


# UM FieldsFile/PP LOOKUP header default class (contains the bare-minumum
# assumed elements for the purposes of associating the data and identifying
# the exact type of field).
_LOOKUP_HEADER_DEFAULT = [
    ('lblrec',  15),
    ('lbpack',  21),
    ('lbrel',   22),
    ('lbegin',  29),
    ('lbnrec',  30),
    ('bacc',    51),
    ]

# UM FieldsFile/PP LOOKUP header names and positions for header release vn.2
_LOOKUP_HEADER_2 = [
    ('lbyr',     1),
    ('lbmon',    2),
    ('lbdat',    3),
    ('lbhr',     4),
    ('lbmin',    5),
    ('lbday',    6),
    ('lbyrd',    7),
    ('lbmond',   8),
    ('lbdatd',   9),
    ('lbhrd',   10),
    ('lbmind',  11),
    ('lbdayd',  12),
    ('lbtim',   13),
    ('lbft',    14),
    ('lblrec',  15),
    ('lbcode',  16),
    ('lbhem',   17),
    ('lbrow',   18),
    ('lbnpt',   19),
    ('lbext',   20),
    ('lbpack',  21),
    ('lbrel',   22),
    ('lbfc',    23),
    ('lbcfc',   24),
    ('lbproc',  25),
    ('lbvc',    26),
    ('lbrvc',   27),
    ('lbexp',   28),
    ('lbegin',  29),
    ('lbnrec',  30),
    ('lbproj',  31),
    ('lbtyp',   32),
    ('lblev',   33),
    ('lbrsvd1', 34),
    ('lbrsvd2', 35),
    ('lbrsvd3', 36),
    ('lbrsvd4', 37),
    ('lbsrce',  38),
    ('lbuser1', 39),
    ('lbuser2', 40),
    ('lbuser3', 41),
    ('lbuser4', 42),
    ('lbuser5', 43),
    ('lbuser6', 44),
    ('lbuser7', 45),
    ('brsvd1',  46),
    ('brsvd2',  47),
    ('brsvd3',  48),
    ('brsvd4',  49),
    ('bdatum',  50),
    ('bacc',    51),
    ('blev',    52),
    ('brlev',   53),
    ('bhlev',   54),
    ('bhrlev',  55),
    ('bplat',   56),
    ('bplon',   57),
    ('bgor',    58),
    ('bzy',     59),
    ('bdy',     60),
    ('bzx',     61),
    ('bdx',     62),
    ('bmdi',    63),
    ('bmks',    64),
    ]

# UM FieldsFile/PP LOOKUP header names and positions for header release vn.3
# These are identical to header release vn.2 above apart from the 6th and 12th
# elements, which had their meanings changed from "day of year" to "second"
_LOOKUP_HEADER_3 = [(name, position) for name, position in _LOOKUP_HEADER_2]
_LOOKUP_HEADER_3[5] = ('lbsec', 6)
_LOOKUP_HEADER_3[11] = ('lbsecd', 12)

# Global default word (record) size (in bytes)
_DEFAULT_WORD_SIZE = 8

# Default missing values for **header** objects (not values in data!)
_INTEGER_MDI = -32768
_REAL_MDI = -1073741824.0


[docs] class _HeaderMetaclass(type): """ Metaclass used to give named attributes to other classes. This metaclass is used in the construction of several header-like classes in this API; note that it is applied on *defining* the classes (i.e. when the module is imported), *not* later when a specific instance of the classes are initialised. The purpose of this class is to attach a set of named attributes to the header object and associate these with specific indices of the underlying array of header values. The target class defines this "mapping" itself, allowing this metaclass to be used for multiple header-like objects. """ def __new__(cls, classname, bases, class_dict): """ Called upon definition of the target class to add the named attributes. The target class should define a HEADER_MAPPING attribute to specify the mapping to be used for the attributes. The metaclass will assume the actual data values exist in an attribute of the target class called "_values". """ # This method will return a new "getter"; which retrieves a set of # indices from the named attribute containing the actual value array # inside the target class def make_getter(array_attribute, indices): def getter(self): return getattr(self, array_attribute)[indices] return getter # ... and this one does the same thing but returns a "setter" to allow # assignment of values to the array inside the target class def make_setter(array_attribute, indices): def setter(self, values): getattr(self, array_attribute)[indices] = values return setter # Retrieve the desired mapping defined by the target class mapping = class_dict.get("HEADER_MAPPING") if mapping is not None: for name, indices in mapping: # Add a new named attribute to the class under the name given # in the mapping, and use the two functions above to provide # the methods to get + set the attribute appropriately class_dict[name] = property(make_getter("_values", indices), make_setter("_values", indices)) # Finish construction of the class return super(_HeaderMetaclass, cls).__new__(cls, classname, bases, class_dict)
[docs] class BaseHeaderComponent(six.with_metaclass(_HeaderMetaclass, object)): """ Base class for a UM header component. .. Note:: This class is not intended to be used directly; it acts only to group together the common parts of the :class:`BaseHeaderComponent1D` and :class:`BaseHeaderComponent2D` classes. """ # The values in this base class should be overridden as they will # not do anything useful if left set to None. MDI = None """The value to use to indicate missing header values.""" DTYPE = None """The data-type of the words in the header.""" CREATE_DIMS = None """ A tuple defining the default dimensions of the header to be produced by the :meth:`~BaseHeaderComponent.empty` method, when the caller provides incomplete shape information. Where an element of the tuple is "None", the arguments to the empty method *must* specify a size for the corresponding dimension. """ HEADER_MAPPING = None """ A list containing a series of tuple-pairs; the raw value of an index in the header, and a named-attribute to associate with it (see the help for the :class:`_HeaderMetaclass` for further details). """ @property def shape(self): """Return the shape of the header object.""" return self._values[..., 1:].shape @property def raw(self): """Return the raw values of the header object.""" return self._values.view()
[docs] def copy(self): """Create a copy of the header object.""" return type(self)(self.raw[..., 1:])
[docs] class BaseHeaderComponent1D(BaseHeaderComponent): """1-Dimensional UM header component.""" CREATE_DIMS = (None,)
[docs] def __init__(self, values): """ Initialise the object from a series of values. Args: * values: array-like object containing values in this header. .. Note:: The values are internally stored offset by 1 element (so that when the raw values are accessed their indexing is 1-based, to match up with their definitions in UMDP F03). """ self._values = np.empty(len(values) + 1, dtype=object) self._values[1:] = np.asarray(values, dtype=self.DTYPE)
[docs] @classmethod def empty(cls, num_words=None): """ Create an instance of the class from-scratch. Kwargs: * num_words: The number of words to use to create the header. .. Note:: Passing "num_words" may be optional or mandatory depending on the value of the class's CREATE_DIMS attribute. """ if num_words is None: num_words = cls.CREATE_DIMS[0] if num_words is None: raise(ValueError('"num_words" has no valid default')) return cls([cls.MDI]*num_words)
[docs] @classmethod def from_file(cls, source, num_words): """ Create an instance of the class populated by values from a file. Args: * source: The (open) file object containing the header value, with its file pointer positioned at the start of this header. * num_words: The number of words to read in from the file to populate the header. """ values = np.fromfile(source, dtype=cls.DTYPE, count=num_words) return cls(values)
[docs] def to_file(self, output_file): """ Write the header to a file object. Args: * output_file: The (open) file object for the header to be written to. """ output_file.write(self._values[1:].astype(self.DTYPE))
[docs] class BaseHeaderComponent2D(BaseHeaderComponent): """2-Dimensional UM header component.""" CREATE_DIMS = (None, None)
[docs] def __init__(self, values): """ Initialise the object from a series of values. Args: * values: 2-dimensional array-like object containing values in this header. .. Note:: The values are internally stored offset by 1 element in their second dimension (so that when the raw values are accessed their indexing is 1-based, to match up with the definitions in UMDP F03). """ self._values = np.empty((values.shape[0], values.shape[1] + 1), dtype=object) self._values[:, 1:] = values
[docs] @classmethod def empty(cls, dim1=None, dim2=None): """ Create an instance of the class from-scratch. Kwargs: * dim1: The number of words to use for the header's first dimension. * dim2: The number of words to use for the header's second dimension. .. Note:: Setting "dim1" and/or "dim2" may be optional or mandatory depending on the values of the class's CREATE_DIMS attribute. """ if dim1 is None: dim1 = cls.CREATE_DIMS[0] if dim2 is None: dim2 = cls.CREATE_DIMS[1] if dim1 is None: raise(ValueError('"dim1" has no valid default')) if dim2 is None: raise(ValueError('"dim2" has no valid default')) values = np.empty((dim1, dim2), dtype=cls.DTYPE) values[:, :] = cls.MDI return cls(values)
[docs] @classmethod def from_file(cls, source, dim1, dim2): """ Create an instance of the class populated by values from a file. Args: * source: The (open) file object containing the header value, with its file pointer positioned at the start of this header. * dim1: The number of words to read in from the file to populate each row of the header. * dim2: The number of the above rows to read in from the file to populate the header. """ values = np.fromfile(source, dtype=cls.DTYPE, count=np.prod((dim1, dim2))) values = values.reshape((dim1, dim2), order="F") return cls(values)
[docs] def to_file(self, output_file): """ Write the header to a file object. Args: * output_file: The (open) file object for the header to be written to. """ output_file.write(np.ravel( self._values[:, 1:].astype(self.DTYPE), order="F"))
[docs] class FixedLengthHeader(BaseHeaderComponent1D): """ The fixed length header component of a UM file. This component is different to the others since its length is not able to be altered at creation-time; the fixed length header is always a specific number of words in length. """ HEADER_MAPPING = _UM_FIXED_LENGTH_HEADER MDI = _INTEGER_MDI DTYPE = ">i8" _NUM_WORDS = 256 """The (fixed) number of words in a UM fixed length header."""
[docs] def __init__(self, values): """ Initialise the object from a series of values. Args: * values: array-like object containing values contained in this header. Must be the exact length specified by _NUM_WORDS. .. Note:: The values are internally stored offset by 1 element (so that when the raw values are accessed their indexing is 1-based, to match up with their definitions in UMDP F03). """ if len(values) != self._NUM_WORDS: _msg = ('Incorrect size for fixed length header; given {0} words ' 'but should be {1}.'.format(len(values), self._NUM_WORDS)) raise ValueError(_msg) super(FixedLengthHeader, self).__init__(values)
[docs] @classmethod def empty(cls): """ Create an instance of the class from-scratch. Unlike the other header components the fixed length header always creates a class of a fixed size (based on its _NUM_WORDS attribute). """ return super(FixedLengthHeader, cls).empty(cls._NUM_WORDS)
[docs] @classmethod def from_file(cls, source): """ Create an instance of the class populated by values from a file. Unlike the other header components the fixed length header always reads a specific number of values (based on its _NUM_WORDS attribute). Args: * source: The (open) file object containing the header value, with its file pointer positioned at the start of this header. """ return super(FixedLengthHeader, cls).from_file(source, cls._NUM_WORDS)
[docs] class IntegerConstants(BaseHeaderComponent1D): """The integer constants component of a UM file.""" MDI = _INTEGER_MDI DTYPE = ">i8"
[docs] class RealConstants(BaseHeaderComponent1D): """The real constants component of a UM file.""" MDI = _REAL_MDI DTYPE = ">f8"
[docs] class LevelDependentConstants(BaseHeaderComponent2D): """The level dependent constants component of a UM file.""" MDI = _REAL_MDI DTYPE = ">f8"
[docs] class RowDependentConstants(BaseHeaderComponent2D): """The row dependent constants component of a UM file.""" MDI = _REAL_MDI DTYPE = ">f8"
[docs] class ColumnDependentConstants(BaseHeaderComponent2D): """The column dependent constants component of a UM file.""" MDI = _REAL_MDI DTYPE = ">f8"
[docs] class UnsupportedHeaderItem1D(BaseHeaderComponent1D): """An unsupported 1-dimensional component of a UM file.""" __metaclass__ = type MDI = _INTEGER_MDI DTYPE = ">i8"
[docs] class UnsupportedHeaderItem2D(BaseHeaderComponent2D): """An unsupported 2-dimensional component of a UM file.""" __metaclass__ = type MDI = _INTEGER_MDI DTYPE = ">i8"
[docs] class Field(six.with_metaclass(_HeaderMetaclass, object)): """ Represents a single entry in the lookup table, and provides access to the data referenced by it. .. Note:: This class assumes the (common) UM lookup header comprising of 64 words split between 45 integer and 19 real values. """ HEADER_MAPPING = _LOOKUP_HEADER_DEFAULT # The expected number of lookup entries which are integers and reals. NUM_LOOKUP_INTS = 45 NUM_LOOKUP_REALS = 19 # The types of the integers and reals. DTYPE_INT = ">i8" DTYPE_REAL = ">f8"
[docs] def __init__(self, int_headers, real_headers, data_provider): """ Initialise the Field object. Args: * int_headers: A sequence of integer header values. * real_headers: A sequence of floating-point header values. * data_provider: An object representing the field data payload. Typically, this is an object with a "._data_array" method, in which case the data can be fetched with :meth:`get_data`. """ # Create a numpy object array to hold the entire lookup, leaving a # space for the zeroth index so that it behaves like the 1-based # indexing referred to in UMDP F03 self._values = np.ndarray(len(int_headers) + len(real_headers) + 1, dtype=object) # Populate the first half with the integers self._values[1:len(int_headers)+1] = ( np.asarray(int_headers, dtype=self.DTYPE_INT)) # And the rest with the real values self._values[len(int_headers)+1:] = ( np.asarray(real_headers, dtype=self.DTYPE_REAL)) # Create views onto the above array to retrieve the integer/real # parts of the lookup header separately (for writing out) self._lookup_ints = self._values[1:len(int_headers)+1] self._lookup_reals = self._values[len(int_headers)+1:] # Save the reference to the given data provider. self._data_provider = data_provider # Initialise an empty stash entry (this may optionally be set by # the containing file object later on) self.stash = None
[docs] @classmethod def empty(cls): """ Create an instance of the class from-scratch. The instance will be filled with empty values (-99 for integers, and 0.0 for reals), and will have no data_provider set. """ integers = np.empty(cls.NUM_LOOKUP_INTS, cls.DTYPE_INT) integers[:] = -99 reals = np.empty(cls.NUM_LOOKUP_REALS, cls.DTYPE_REAL) reals[:] = 0.0 return cls(integers, reals, None)
@property def raw(self): """Return the raw values in the lookup array.""" return self._values.view()
[docs] def to_file(self, output_file): """ Write the lookup header to a file object. Args: * output_file: The (open) file object for the lookup to be written to. """ output_file.write(self._lookup_ints.astype(self.DTYPE_INT)) output_file.write(self._lookup_reals.astype(self.DTYPE_REAL))
[docs] def copy(self): """ Create a Field which copies its header information from this one, and takes its data from the same data provider. """ new_field = type(self)(self._lookup_ints.copy(), self._lookup_reals.copy(), self._data_provider) new_field.stash = self.stash return new_field
[docs] def set_data_provider(self, data_provider): """ Set the field data payload. Args: * data_provider: An object representing the field data payload. Typically, this is an object with a "._data_array" method, which means the data can be accessed with :meth:`get_data`. """ self._data_provider = data_provider
[docs] def num_values(self): """Return the number of values defined by this header.""" return len(self._values) - 1
[docs] def get_data(self): """Return the data for this field as an array.""" data = None if hasattr(self._data_provider, '_data_array'): data = self._data_provider._data_array() return data
[docs] def _get_raw_payload_bytes(self): """ Return a buffer containing the raw bytes of the data payload. The field data must be unmodified and using the same packing code as the original data (this can be tested by calling _can_copy_deferred_data). """ data = None if hasattr(self._data_provider, "_read_bytes"): data = self._data_provider._read_bytes() return data
[docs] def _can_copy_deferred_data(self, required_lbpack, required_bacc, required_word): """ Return whether or not it is possible to simply re-use the bytes making up the field; for this to be possible the data must be unmodified, and the requested output packing and disk word size must be the same as the input. """ # Whether or not this is possible depends on if the Field's # data provider has been wrapped in any operations compatible = hasattr(self._data_provider, "_read_bytes") if compatible: # Is the packing code the same src_lbpack = self._data_provider.source.lbpack compatible = required_lbpack == src_lbpack # If it's WGDOS packing, the accuracy matters too if src_lbpack == 1: src_bacc = self._data_provider.source.bacc compatible = compatible and required_bacc == src_bacc else: # Otherwise the disk size matters src_word = self._data_provider.DISK_RECORD_SIZE compatible = compatible and required_word == src_word return compatible
[docs] class Field2(Field): """ Represents an entry from the LOOKUP component with a header release number of 2. """ HEADER_MAPPING = _LOOKUP_HEADER_2
[docs] class Field3(Field): """ Represents an entry from the LOOKUP component with a header release number of 3. """ HEADER_MAPPING = _LOOKUP_HEADER_3
[docs] class ArrayDataProvider(object): """ A :class:`Field` data provider that contains an actual array of values. This is used to make a field with an ordinary array as its data payload. .. Note:: This must be used with caution, as multiple fields with a concrete data payload can easily consume large amounts of space. By contrast, processing field payloads from an existing file will normally only load one at a time. """
[docs] def __init__(self, array): """ Create a data-provider which contains a concrete data array. Args: * array (array-like): The data payload. It is converted to a numpy array. It must be 2D unmasked data. """ if numpy.ma.is_masked(array): raise ValueError('ArrayDataProvider does not handle masked data.') array = numpy.asarray(array) shape = array.shape if len(shape) != 2: msg = 'ArrayDataProvider has shape {}, which is not 2-dimensional.' raise ValueError(msg.format(shape)) self._array = array
[docs] def _data_array(self): """Return the data payload.""" return self._array
[docs] class _OperatorDataProvider(object): """ A :class:`Field` data provider that fetches its data from a :class:`DataOperator`, by calling :meth:`transform`. ..Note: This should only really ever be instantiated from within the :class:`DataOperator`. """
[docs] def __init__(self, operator, source, new_field): """ Create a wrapper, including references to the operator, the original source data and and the result field. Args: * operator: A reference to the :class:`DataOperator` instance which created this provider (to allow its :meth:`transform` method to be accessed in :meth:`_data_array` below). * source: The source object for the above :class:`DataOperator` - this can be anything, and is required here so that it can be passed onto the operator's meth:`transform` method below. * new_field: The new field returned by the above :class:`DataOperator` - this is again needed by the operator's meth:`transform` method. """ self.operator = operator self.source = source # The reference which is passed to the transform below must be a # weakref. The reason for this is to avoid a circular dependency # that will interfere with Python's garbage collection. Since the # operator will ultimately be attached to the new_field object, it # *must not* hold a reference to it as well. self.result_field = weakref.ref(new_field)
[docs] def _data_array(self): """Return the data using the provided operator.""" return self.operator.transform(self.source, self.result_field)
[docs] class DataOperator(object): """ Base class which should be sub-classed to perform manipulations on the data of a field. The :class:`Field` classes never store any data directly in memory; only the means to retrieve it from disk and perform any required operations (which will only be executed when explicitly requested - this would normally be at the point the file is being written/closed). .. Note:: the user must override the "__init__", "new_field" and "transform" methods of this baseclass to create a valid operator. A DataOperator is used to produce new :class:`Field` s, which are calculated from existing source fields and which can also calculate their data results from the source data at a subsequent time. The normal usage occurs in 3 separate stages: * :meth:`__init__` creates a new operator with any instance-specific parameters. * :meth:`__call__` is used to produce a new, transformed :class:`Field` objects from existing ones, via the user :meth:`new_field` method. * :meth:`transform` is called by an output field to calculate its data payload. For example: >>> class XSampler(DataOperator): ... def __init__(self, factor): ... self.factor = factor ... def new_field(self, source_field): ... fld = source_field.copy() ... fld.lbnpt /= self.factor ... fld.bdx *= self.factor ... return fld ... def transform(self, source_field, result_field): ... data = source_field.get_data() ... return data[:, ::self.factor] ... >>> XStep4 = XSampler(factor=4) >>> ff.fields = [XStep4(fld) for fld in ff.fields] >>> ff.to_file(out_path) """
[docs] def __init__(self, *args, **kwargs): """ Initialise the operator object - this should be overridden by the user. This method should accept any user arguments to be "baked" into the operator or to otherwise initialise it as-per the user's requirements; for example an operator which scales the values in fields by a constant amount might want to accept an argument giving that amount. """ msg = ("The __init__ method of the DataOperator baseclass should be " "overridden by the user") raise NotImplementedError(msg)
[docs] def __call__(self, source, *args, **kwargs): """ Wrap the operator around a source object. This calls the user-supplied :meth:`new_field` method, and configures the resulting field to return its data from the :meth:`transform` method of the data operator. Args: * source: This can be an object of any type; it is typically an existing :class:`Field` which the result field is based on. Returns: * new_field (:class:`Field`): A new Field instance, which returns data generated via the :meth:`transform` method. """ new_field = self.new_field(source, *args, **kwargs) provider = _OperatorDataProvider(self, source, new_field) new_field.set_data_provider(provider) return new_field
[docs] def new_field(self, source, *args, **kwargs): """ Produce a new output :class:`Field` from a source object - this method should be overridden by the user. This method encodes how to produce a new field, which is typically derived by calculation from an existing source field or fields. It is called by the :meth:`__call__` method. Args: * source: This can be an object of any type; it is typically an existing :class:`Field` which the result field is based on. Returns: * new_field (:class:`Field`): A new Field instance, whose lookup attributes reflect the final state of the result: E.G. if the operator affects the number of rows in the field, then 'new_field' must have its row settings set accordingly. .. Note:: It is advisable not to modify the "source" object inside this method; modifications should be confined to the new field object. """ msg = ("The new_field method of the DataOperator baseclass should be " "overridden by the user") raise NotImplementedError(msg)
[docs] def transform(self, source, result_field): """ Calculate the data payload for a result field - this method should be overridden by the user. This method must return a 2D numpy array containing the field data. Typically it will extract the data payload from a source field and manipulate it in some way. Args: * source: The original 'source' argument from the :meth:`__call__` invocation that created 'result_field'. Usually, this is a pre-existing :class:`Field` object from which the result field is calculated. * result_field: The 'new' field that was created by a call to :meth:`__call__`, for which the data is now wanted. This should not be modified, but provides access to any necessary context information determined when it was created. Returns: * data (array): The data array for 'result_field'. """ msg = ("The transform method of the DataOperator baseclass should be " "overridden by the user") raise NotImplementedError(msg)
[docs] class RawReadProvider(object): """ A generic 'data provider' object, which deals with the most basic/common data-provision operation of reading in Field data from a file. This class should not be used directly, since it does not define a "_data_array" method, and so cannot return any data. A series of subclasses of this class are provided which define the '_data_array' method for the different packing types found in various types of :class:`UMFile`. """ DISK_RECORD_SIZE = _DEFAULT_WORD_SIZE
[docs] def __init__(self, source, sourcefile, offset): """ Initialise the read provider. Args: * source: Initial field object reference (populated with the lookup values from the file specified in sourcefile. * sourcefile: Filename associated with source FieldsFileVariant. * offset: Starting position of Field data in sourcefile (in bytes). """ self.source = source self.sourcefile = sourcefile self.offset = offset
@contextmanager def _with_source(self): # Context manager to temporarily reopen the sourcefile if the original # provided at create time has been closed. reopen_required = self.sourcefile.closed close_required = False try: if reopen_required: self.sourcefile = open(self.sourcefile.name, "rb") close_required = True yield self.sourcefile finally: if close_required: self.sourcefile.close() def _read_bytes(self): # Return the raw data payload, as an array of bytes. # This is independent of the content type. field = self.source with self._with_source(): self.sourcefile.seek(self.offset) data_size = field.lbnrec * self.DISK_RECORD_SIZE data_bytes = self.sourcefile.read(data_size) return data_bytes
[docs] class _NullReadProvider(RawReadProvider): """ A 'raw' data provider object to be used when a packing code is unrecognised - to be able to represent unknown-type data in a :class:`Field`. """ def _data_array(self): lbpack = self.source.raw[21] msg = "Packing code {0} unsupported".format(lbpack) raise NotImplementedError(msg)
[docs] class UMFile(object): """Represents the structure of a single UM file.""" # The base UMFile object uses the base versions of the standard components, # these will allow any shape for each component, and do not have associated # mappings for the values (so they will not have nicely named properties). COMPONENTS = (('integer_constants', IntegerConstants), ('real_constants', RealConstants), ('level_dependent_constants', LevelDependentConstants), ('row_dependent_constants', RowDependentConstants), ('column_dependent_constants', ColumnDependentConstants), ('additional_parameters', UnsupportedHeaderItem2D), ('extra_constants', UnsupportedHeaderItem1D), ('temp_historyfile', UnsupportedHeaderItem1D), ('compressed_field_index1', UnsupportedHeaderItem1D), ('compressed_field_index2', UnsupportedHeaderItem1D), ('compressed_field_index3', UnsupportedHeaderItem1D), ) """ A series of tuples containing the name of a header component, and the class which should be used to represent it. The name will become the final attribute name to store the component, but it must also correspond to a name in the HEADER_MAPPING of the fixed length header. """ # The base UMFile object does not provide any read or write operators, # since these depend on the specific type of file. Therefore this base # class can only "pass-through" field data; it can't change the values or # the packing used for any of the fields. READ_PROVIDERS = {} """ A dictionary which maps a string containing the trailing 3 digits (n3 - n1) of a field's lbpack (packing code) onto a suitable data-provider object to read the field. Any packing code not in this list will default to using a :class:`_NullReadProvider` object (which can only be used to copy the raw byte-data of the field - not to unpack it or access the data). """ WRITE_OPERATORS = {} """ A dictionary which maps a string containing the trailing 3 digits (n3 - n1) of a field's lbpack (packing code) onto a suitable :class:`DataOperator` object to write the field. Any packing code found in a field from this object's field list but not found here will cause an exception when trying to write to a file. """ WORD_SIZE = _DEFAULT_WORD_SIZE """ The word/record size for the file, for all supported UM file types this should be left as the default - 8 (i.e. 64-bit words). """ # As well as setting the default release Field classes to use, a reference # is required to an unknown version of the Field class - to assist in the # initial reading of an unknown field. FIELD_CLASSES = {2: Field2, 3: Field3, -99: Field} """ Maps the lblrel (header release number) of each field onto an appropriate :class:`Field` subclass to represent it. .. Note:: This mapping *must* contain an entry for -99, and the :class:`Field` object it returns *must* at a minimum contain attribute mappings for the 5 key elements (lbrel, lblrec, lbnrec, lbegin and lbpack - see UMDP F03), as well as suitable shape information. """ # Data alignment values (to match with UM definitions). _WORDS_PER_SECTOR = 512 # Padding for each field (in words). _DATA_START_ALIGNMENT = 524288 # Padding to start of data (in bytes).
[docs] def __init__(self): """ Create a blank UMFile instance. The initial creation contains only an empty :class:`FixedLengthHeader` object, plus an empty (None) named attribute for each component in the COMPONENTS attribute. In most cases this __init__ should not be called directly, but indirectly via the from_file or from_template classmethods. """ self._source = None self._source_path = None # At the class definition level, WRITE_OPERATORS is a mapping onto the # write operator classes. Before these can be used to output data # they need to be instantiated; the instances are then re-attached to # WRITE_OPERATORS to be called upon later. self._write_operators = {} for lbpack_write in self.WRITE_OPERATORS.keys(): self._write_operators[lbpack_write] = ( self.WRITE_OPERATORS[lbpack_write]()) # Attach an empty fixed length header self.fixed_length_header = FixedLengthHeader.empty() # Add a blank entry for each required component. for name, _ in self.COMPONENTS: setattr(self, name, None) # Add a blank entry for the associated stashmaster self.stashmaster = None # Initialise the field list. self.fields = []
def __del__(self): """ Ensure any associated file is closed if this object goes out of scope. """ if self._source and not self._source.closed: self._source.close() def __str__(self): items = [] for name, kind in self.COMPONENTS: value = getattr(self, name) if value is not None: items.append('{0}={1}'.format(name, value.shape)) if self.fields: items.append('fields={0}'.format(len(self.fields))) return '<{0}: {1}>'.format(type(self).__name__, ', '.join(items)) def __repr__(self): fmt = '<{0}: fields={1}>' return fmt.format(type(self).__name__, len(self.fields))
[docs] @classmethod def from_file(cls, file_or_filepath, remove_empty_lookups=False, stashmaster=None): """ Initialise a UMFile, populated using the contents of a file. Kwargs: * file_or_filepath: An open file-like object, or file path. A path is opened for read; a 'file-like' must support seeks. * remove_empty_lookups: If set to True, will remove any "empty" lookup headers from the field-list (UM files often have pre-allocated numbers of lookup entries, some of which are left unused). * stashmaster: A :class:`mule.stashmaster.STASHMaster` object containing the details of the STASHmaster to associate with the fields in the file (if not provided will attempt to load a central STASHmaster based on the version in the fixed length header). .. Note:: As part of this the "validate" method will be called. For the base :class:`UMFile` class this does nothing, but sub-classes may override it to provide specific validation checks. """ # First create the class and then populate it from the file. new_umf = cls() new_umf._read_file(file_or_filepath) if remove_empty_lookups: new_umf.remove_empty_lookups() # Try to attach STASH entries to the fields, using the STASHmaster # associated with the model version found in the header (note that # this doesn't work for ancillary files) if stashmaster is not None: new_umf.attach_stashmaster_info(stashmaster) else: stashmaster = STASHmaster.from_umfile(new_umf) if stashmaster is not None: new_umf.attach_stashmaster_info(stashmaster) # Validate the new object, to check it has been created properly new_umf.validate(filename=new_umf._source_path, warn=True) return new_umf
[docs] @classmethod def from_template(cls, template=None): """ Create a fieldsfile from a template. The template is a dictionary of key:value, where 'key' is a component name and 'value' is a component settings dictionary. A component given a component settings dictionary in the template is guaranteed to exist in the resulting file object. Within a component dictionary, key:value pairs indicate the values that named component properties must be set to. If a component dictionary contains the special key 'dims', the associated value is a tuple of dimensions, which is passed to a component.empty() call to produce a new component of that type. Note that in some cases "None" may be used to indicate a dimension which the file-type fixes (e.g. the number of level types). .. for example:: ff = FieldsFile.from_template( 'fixed_length_header': {'dataset_type':3}, # set a particular header word 'real_constants': {}, # Add a standard-size 'real_constants' array 'level_dependent_constants': {'dims':(20, None)}) # add level-constants for 20 levels The resulting file is usually incomplete, but can be used as a convenient starting-point for creating files with a given structure. .. Note:: When a particular component contains known values in any position of its "CREATE_DIMS" attribute (i.e. not "None"), the template may omit this dimension (as is done in the example above for the 'level_dependent_constants' 2nd dimension. """ # First create the class and then populate it from the template. new_umf = cls() new_umf._apply_template(template) return new_umf
[docs] def attach_stashmaster_info(self, stashmaster): """ Attach references to the relevant entries in a provided :class:mule.stashmaster.STASHmaster object to each of the fields in this object. Args: * stashmaster: A :class:mule.stashmaster.STASHmaster instance which should be parsed and attached to any fields in the file. """ self.stashmaster = stashmaster for field in self.fields: if hasattr(field, "lbuser4") and field.lbuser4 in stashmaster: field.stash = stashmaster[field.lbuser4] else: field.stash = None
[docs] def copy(self, include_fields=False): """ Make a copy of a UMFile object including all of its headers, and optionally also including copies of all of its fields. Kwargs: * include_fields: If True, the field list in the copied object will be populated with copies of the fields from the source object, otherwise the fields list in the new object will be empty """ new_umf = self.__class__() new_umf.fixed_length_header = self.fixed_length_header.copy() for name, _ in self.COMPONENTS: component = getattr(self, name) if component is not None: setattr(new_umf, name, component.copy()) else: setattr(new_umf, name, component) new_umf.stashmaster = self.stashmaster if include_fields: new_umf.fields = [field.copy() for field in self.fields] return new_umf
[docs] def validate(self, filename=None, warn=False): """ Apply any consistency checks to check the file is "valid". .. Note:: In the base :class:`UMFile` class this routine does nothing but a format-specific subclass can override this method to do whatever it considers appropriate to validate the file object. """ pass
[docs] def remove_empty_lookups(self): """ Calling this method will delete any fields from the field list which are empty. """ self.fields = [field for field in self.fields if field.raw[1] != -99]
[docs] def to_file(self, output_file_or_path): """ Write to an output file or path. Args: * output_file_or_path (string or file-like): An open file or filepath. If a path, it is opened and closed again afterwards. .. Note:: As part of this the "validate" method will be called. For the base :class:`UMFile` class this does nothing, but sub-classes may override it to provide specific validation checks. """ # Call validate - to ensure the file about to be written out doesn't # contain obvious errors. This is done here before any new file is # created so that we don't create a blank file if the validation fails if isinstance(output_file_or_path, six.string_types): self.validate(filename=output_file_or_path) else: self.validate(filename=output_file_or_path.name) if isinstance(output_file_or_path, six.string_types): with open(output_file_or_path, 'wb') as output_file: self._write_to_file(output_file) else: self._write_to_file(output_file_or_path)
def _read_file(self, file_or_filepath): """Populate the class from an existing file object or file""" if isinstance(file_or_filepath, six.string_types): self._source_path = file_or_filepath # If a filename is provided, open the file and populate the # fixed_length_header using its contents self._source = open(self._source_path, "rb") else: # Treat the argument as an open file. self._source = file_or_filepath self._source_path = file_or_filepath.name source = self._source # Attach the fixed length header to the class self.fixed_length_header = ( FixedLengthHeader.from_file(source)) # Apply the appropriate headerclass from each component for name, headerclass in self.COMPONENTS: start = getattr(self.fixed_length_header, name+'_start') if start <= 0: continue if len(headerclass.CREATE_DIMS) == 1: length = getattr(self.fixed_length_header, name+'_length') header = headerclass.from_file(source, length) elif len(headerclass.CREATE_DIMS) == 2: dim1 = getattr(self.fixed_length_header, name+'_dim1') dim2 = getattr(self.fixed_length_header, name+'_dim2') header = headerclass.from_file(source, dim1, dim2) # Attach the component to the class setattr(self, name, header) # Now move onto reading in the lookup headers. lookup_start = self.fixed_length_header.lookup_start if lookup_start > 0: source.seek((lookup_start - 1) * self.WORD_SIZE) shape = (self.fixed_length_header.lookup_dim1, self.fixed_length_header.lookup_dim2) lookup = np.fromfile(source, dtype='>i{0}'.format(self.WORD_SIZE), count=np.prod(shape)) # Catch if the file has no lookups/data to read if len(lookup) > 0: lookup = lookup.reshape(shape, order="F") else: lookup = None else: lookup = None # Read and add all the fields. self.fields = [] if lookup is not None: # A quick helper function to create the default field class # from a part of the raw array default_field_class = self.FIELD_CLASSES[-99] def default_from_raw(values): ints = values[:default_field_class.NUM_LOOKUP_INTS] reals = (values[default_field_class.NUM_LOOKUP_INTS:] .view(default_field_class.DTYPE_REAL)) return default_field_class(ints, reals, None) # Setup the first field, and check if it is using well-formed # records (i.e. the header defines the position of the field). first_field = default_from_raw(lookup.T[0]) is_well_formed = (first_field.lbnrec != 0 and first_field.lbegin != 0) if not is_well_formed: # If the file is not well formed, keep a running offset. running_offset = ((self.fixed_length_header.data_start - 1) * self.WORD_SIZE) for raw_headers in lookup.T: # Populate the default field class first - for now we only # need the minimum information about the field. default_field = default_from_raw(raw_headers) # Lookup what the final class for the field should be # (based on its release version). field_class = self.FIELD_CLASSES.get( default_field.lbrel, self.FIELD_CLASSES[-99]) # Update the field to the correct Field subclass. field = field_class(default_field._lookup_ints, default_field._lookup_reals, None) # Attach an appropriate data provider (unless the field is # empty - in which case it doesn't need a provider). if raw_headers[0] == -99: provider = None else: # Update the running offset if needed. if not is_well_formed: offset = running_offset else: offset = field.lbegin*self.WORD_SIZE # Now select which type of basic reading and unpacking # provider is suitable for the type of file and data, # starting by checking the number format (N4 position) num_format = (field.lbpack//1000) % 10 # Check number format is valid if num_format not in (0, 2, 3): msg = 'Unsupported number format (lbpack N4): {0}' raise ValueError(msg.format(num_format)) # With that check out of the way remove the N4 digit and # proceed with the N1 - N3 digits lbpack321 = field.lbpack - num_format*1000 # Select an appropriate read provider for this packing # code if one is available, otherwise use the default # provider (which cannot actually decode the data) read_provider = ( self.READ_PROVIDERS.get( "{0:03d}".format(lbpack321), _NullReadProvider)) # Create the provider, passing a reference to the field, # the file object and the start position to read the data # (Note that we pass a copy of the field, not the original # - this is because we *do not* want that reference to be # modified; since it will be needed to read the data). provider = read_provider(field.copy(), source, offset) # Now attach the selected provider to the field object and # add it to the field-list field.set_data_provider(provider) self.fields.append(field) # Update the running offset if required if not is_well_formed: running_offset += field.lblrec*self.WORD_SIZE def _apply_template(self, template): """Apply the assignments specified in a template.""" # Apply changes to fixed-length-header and components, *in that order* # Note: flh *always* exists, so can safely add it to the list of # components with a "None" class. all_headers = [('fixed_length_header', None)] + list(self.COMPONENTS) # Take a copy of the template and remove elements as they are set template = template.copy() for component_name, component_class in all_headers: settings_dict = template.pop(component_name, None) if settings_dict is not None: create_dims = settings_dict.pop('dims', []) component = getattr(self, component_name, None) if create_dims or component is None: # Create a new component, or replace with given dimensions. component = component_class.empty(*create_dims) # Install new component. setattr(self, component_name, component) # Assign to specific properties of the component. for item_name, value in six.iteritems(settings_dict): if not hasattr(component, item_name): msg = ('File header component "{0}" ' 'has no element named "{1}"') raise ValueError(msg.format(component_name, item_name)) setattr(component, item_name, value) if template: # Complain if there are any unhandled entries in the template msg = "Template contains unrecognised header components: {0}" names = template.keys() names = ['"{0}"'.format(name) for name in names] names = ", ".join(names) raise ValueError(msg.format(names)) def _calc_lookup_and_data_positions(self, lookup_start): """Sets the lookup and data positional information in the header""" header = self.fixed_length_header if self.fields: header.lookup_start = lookup_start lookup_lengths = set([field.num_values() for field in self.fields]) if len(lookup_lengths) != 1: msg = 'Inconsistent lookup header lengths - {0}' raise ValueError(msg.format(lookup_lengths)) lookup_length = lookup_lengths.pop() n_fields = len(self.fields) header.lookup_dim1 = lookup_length header.lookup_dim2 = n_fields # make space for the lookup word_number = lookup_start + lookup_length * n_fields # Round up to the nearest whole number of "sectors". offset = word_number - 1 offset -= offset % -self._DATA_START_ALIGNMENT header.data_start = offset + 1 def _write_singular_headers(self, output_file): """ Write all components to the file, _except_ the fixed length header. Also updates all the component location and dimension records in the fixed-length header. That is done here to ensure that these header words are in accord with the actual file locations. """ # Go through each component defined for this file type for name, component_class in self.COMPONENTS: component = getattr(self, name) # Construct component position and shape info (or missing-values). file_word_position = output_file.tell() // self.WORD_SIZE + 1 if component is not None: shape = component.shape ndims = len(shape) else: # Missing component : Use all-empty empty values. ndims = len(component_class.CREATE_DIMS) MDI = FixedLengthHeader.MDI shape = [MDI] * ndims file_word_position = MDI if ndims not in (1, 2): msg = 'Component type {0} has {1} dimensions, can not write.' raise ValueError(msg.format(name, ndims)) # Record the position of this component in the fixed-length header. flh = self.fixed_length_header setattr(flh, name+'_start', file_word_position) # Record the component dimensions in the fixed-length header. if ndims == 1: setattr(flh, name+'_length', shape[0]) elif ndims == 2: setattr(flh, name+'_dim1', shape[0]) setattr(flh, name+'_dim2', shape[1]) # Write out the component itself (if there is one). if component: component.to_file(output_file) def _write_to_file(self, output_file): """Write out to an open output file.""" # A reference to the header flh = self.fixed_length_header # Skip past the fixed length header for now output_file.seek(flh._NUM_WORDS * self.WORD_SIZE) # Write out the singular headers (i.e. all headers apart from the # lookups, which will be done below). # This also updates all the fixed-length-header entries that specify # the position and size of the other header components. self._write_singular_headers(output_file) # Update the fixed length header position entries corresponding to # the data and lookup single_headers_end = output_file.tell() // self.WORD_SIZE self._calc_lookup_and_data_positions(single_headers_end + 1) if self.fields: # Skip the LOOKUP component and write the DATA component. # We need to adjust the LOOKUP headers to match where # the DATA payloads end up, so to avoid repeatedly # seeking backwards and forwards it makes sense to wait # until we've adjusted them all and write them out in # one go. output_file.seek((flh.data_start - 1) * self.WORD_SIZE) sector_size = self._WORDS_PER_SECTOR * self.WORD_SIZE # Write out all the field data payloads. for field in self.fields: if field.lbrel != -99.0: # Output 'recognised' lookup types (not blank entries). field.lbegin = output_file.tell() // self.WORD_SIZE # WGDOS packed fields can be tagged with an accuracy of # -99.0; this indicates that they should not be packed, # so reset the packing code here accordingly if field.lbpack % 10 == 1 and int(field.bacc) == -99: field.lbpack = 10*(field.lbpack//10) if field._can_copy_deferred_data( field.lbpack, field.bacc, self.WORD_SIZE): # The original, unread file data is encoded as wanted, # so extract the raw bytes and write them back out # again unchanged; however first trim off any existing # padding to allow the code below to re-pad the output data_bytes = field._get_raw_payload_bytes() data_bytes = data_bytes[ :field.lblrec * field._data_provider.DISK_RECORD_SIZE] output_file.write(data_bytes) # Calculate lblrec and lbnrec based on what will be # written (just in case they are wrong or have come # from a pp file) field.lblrec = ( field._data_provider.DISK_RECORD_SIZE * field.lblrec // self.WORD_SIZE) field.lbnrec = ( field.lblrec - (field.lblrec % -self._WORDS_PER_SECTOR)) else: # Strip just the n1-n3 digits from the lbpack value # since the later digits are not relevant lbpack321 = "{0:03d}".format( field.lbpack - ((field.lbpack//1000) % 10)*1000) # Select an appropriate operator for writing the data # (if one is available for the given packing code) if lbpack321 in self.WRITE_OPERATORS: write_operator = self._write_operators[lbpack321] else: msg = ('Cannot save data with lbpack={0} : ' 'packing not supported.') raise ValueError(msg.format(field.lbpack)) # Use the write operator to prepare the field data for # writing to disk data_bytes, data_size = write_operator.to_bytes(field) # The bytes returned by the operator are in the exact # format to be written output_file.write(data_bytes) # and the operator also returns the exact number of # words/records taken up by the data; this is exactly # what needs to go in the Field's lblrec field.lblrec = data_size # The other record header, lbnrec, is the number of # words/records used to store the data; this may be # different to the above in the case of packed data; # if the packing method has a different word size. # Calculate the actual on-disk word size here size_on_disk = ((write_operator.WORD_SIZE*data_size) // self.WORD_SIZE) # Padding will also be applied to ensure that the next # block of data is aligned with a sector boundary field.lbnrec = ( size_on_disk - (size_on_disk % -self._WORDS_PER_SECTOR)) # Pad out the data section to a whole number of sectors. overrun = output_file.tell() % sector_size if overrun != 0: padding = np.zeros(sector_size - overrun, 'i1') output_file.write(padding) # Update the fixed length header to reflect the extent # of the DATA component. flh.data_dim1 = ((output_file.tell() // self.WORD_SIZE) - flh.data_start + 1) # Go back and write the LOOKUP component. output_file.seek((flh.lookup_start - 1) * self.WORD_SIZE) # Write out all the field lookups. for field in self.fields: field.to_file(output_file) # Write the fixed length header - now that we know how big # the DATA component was. output_file.seek(0) self.fixed_length_header.to_file(output_file)
# Import the derived UM File formats from mule.ff import FieldsFile # noqa: E402 from mule.lbc import LBCFile # noqa: E402 from mule.ancil import AncilFile # noqa: E402 from mule.dump import DumpFile # noqa: E402 from mule.dumpfromgrib import DumpFromGribFile # noqa: E402 # Mapping from known dataset types to the appropriate class to use with # load_umfile DATASET_TYPE_MAPPING = { 1: DumpFile, 2: DumpFile, 3: FieldsFile, 4: AncilFile, 5: LBCFile, }
[docs] def load_umfile(unknown_umfile, stashmaster=None): """ Load a UM file of undetermined type, by checking its dataset type and attempting to load it as the correct class. Args: * unknown_umfile: A file or file-like object containing an unknown file to be loaded based on its dataset_type. Kwargs: * stashmaster: A :class:`mule.stashmaster.STASHMaster` object containing the details of the STASHmaster to associate with the fields in the file (if not provided will attempt to load a central STASHmaster based on the version in the fixed length header). """ def _load_umfile(file_path, open_file): # Read the fixed length header and use the dataset_type to obtain # a suitable subclass flh = FixedLengthHeader.from_file(open_file) file_class = DATASET_TYPE_MAPPING.get(flh.dataset_type) # modify the file class if this is a dump on an Arakawa A grid. if (flh.dataset_type == 1) and (flh.grid_staggering == 1): file_class = DumpFromGribFile if not file_class: msg = ("Unknown dataset_type {0}, supported types are {1}" .format(flh.dataset_type, str(DATASET_TYPE_MAPPING.keys()))) raise ValueError(msg) umf_new = file_class.from_file(file_path, stashmaster=stashmaster) return umf_new # Handle the case of the file being either the path to a file to be opened # (and closed again) or an existing open file object. if isinstance(unknown_umfile, six.string_types): file_path = unknown_umfile with open(file_path, "rb") as open_file: result = _load_umfile(file_path, open_file) else: open_file = unknown_umfile file_path = open_file.name result = _load_umfile(file_path, open_file) return result