Source code for mule.pp

# *****************************COPYRIGHT******************************
# (C) Crown copyright Met Office. All rights reserved.
# For further details please refer to the file LICENCE.txt
# which you should have received as part of this distribution.
# *****************************COPYRIGHT******************************
#
# This file is part of Mule.
#
# Mule is free software: you can redistribute it and/or modify it under
# the terms of the Modified BSD License, as published by the
# Open Source Initiative.
#
# Mule is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# Modified BSD License for more details.
#
# You should have received a copy of the Modified BSD License
# along with Mule.  If not, see <http://opensource.org/licenses/BSD-3-Clause>.

"""
This module provides tools for interacting with "pp" files.

"""
from __future__ import (absolute_import, division, print_function)

import six
import mule
import struct
import numpy as np

GRID_STAGGER = {3: "new_dynamics", 6: "endgame"}


# Borrow the Mule field class, since a Field in a pp file is essentially the
# same as a Field in a UM file; but adjust the data types since it is 32-bit
[docs] class PPField(mule.Field): DTYPE_INT = ">i4" DTYPE_REAL = ">f4"
# As above but with header release 2 headers
[docs] class PPField2(PPField, mule.Field2): pass
# As above but with header release 3 headers
[docs] class PPField3(PPField, mule.Field3): pass
# Mapping to go from release number to field object FIELD_SELECT = {2: PPField2, 3: PPField3} # Similarly, adjust the record size for the read providers required here PP_WORD_SIZE = 4
[docs] class _ReadPPProviderUnpacked(mule.ff._ReadFFProviderCray32Packed): DISK_RECORD_SIZE = PP_WORD_SIZE
[docs] class _ReadPPProviderWGDOSPacked(mule.ff._ReadFFProviderWGDOSPacked): DISK_RECORD_SIZE = PP_WORD_SIZE
# Create mappings for the lbpack n3-n1 digits (similar to how the mule file # classes contain mappings like these). The only real difference is that the # "Unpacked" provider uses the 32-bit class (since PP files are 32-bit) _READ_PROVIDERS = { "000": _ReadPPProviderUnpacked, "001": _ReadPPProviderWGDOSPacked, } _WRITE_OPERATORS = { "000": mule.ff._WriteFFOperatorCray32Packed(), "001": mule.ff._WriteFFOperatorWGDOSPacked() }
[docs] def file_is_pp_file(file_path): """ Checks to see if a given file is a pp file. Args: * file_path: Path to the file to be checked. Returns: * True if file is a pp file, False otherwise. """ # The logic behind this is that the first 32-bit word of a pp file should # be the record length of the first record (a lookup entry). Since this # has 64, 4-byte words we check to see if it is 64*4 = 256. In a regular # UM File the first 64-bit word should be either 15, 20 or IMDI, and in # each of these cases it is not possible for the first half of the word # to be 256, making this a safe way to detect a pp file. first_word = np.fromfile(file_path, dtype=">i4", count=1) return first_word == 256
[docs] def fields_from_pp_file(pp_file_obj_or_path): """ Reads in a PP file as a list of field objects. Args: * pp_file_obj_or_path: Either an (opened) file object, or the path to a file containing the pp data. Returns: * pp_fields List of :class:`mule.pp.PPField` objects. """ if isinstance(pp_file_obj_or_path, six.string_types): pp_file = open(pp_file_obj_or_path, "rb") else: pp_file = pp_file_obj_or_path field_count = 0 fields = [] while True: # Increment counter field_count += 1 # Read the record length reclen = np.fromfile(pp_file, ">i4", 1) # Check for end of file if len(reclen) == 0: break else: reclen = reclen[0] if reclen != 256: msg = "Field {0}; Incorrectly sized lookup record: {1}" raise ValueError(msg.format(field_count, reclen)) # Read the record (the header) ints = np.fromfile(pp_file, ">i4", mule.Field.NUM_LOOKUP_INTS) reals = np.fromfile(pp_file, ">f4", mule.Field.NUM_LOOKUP_REALS) # Read the check record reclen_check = np.fromfile(pp_file, ">i4", 1)[0] # They should match if reclen != reclen_check: msg = "Field {0}; Inconsistent header record lengths: {1} and {2}" raise ValueError(msg.format(field_count, reclen, reclen_check)) # Load into the basic field class field_ref = PPField(ints, reals, None) # Use the release number to select a better class if possible fclass = FIELD_SELECT.get(field_ref.lbrel, None) if fclass is not None: field_ref = fclass(ints, reals, None) # Read the record length for the data reclen = np.fromfile(pp_file, ">i4", 1)[0] # This should be equivalent to lbnrec, but can sometimes be set to # zero... so to allow the existing provider to work add this value # to the reference field's headers field_ref.lbnrec = reclen // PP_WORD_SIZE # Associate the provider offset = pp_file.tell() # Strip just the n1-n3 digits from the lbpack value # and check for a suitable write operator lbpack321 = "{0:03d}".format( field_ref.lbpack - ((field_ref.lbpack // 1000) % 10) * 1000) if lbpack321 not in _READ_PROVIDERS: msg = "Field{0}; Cannot interpret unsupported packing code {1}" raise ValueError(msg.format(field_count, lbpack321)) provider = _READ_PROVIDERS[lbpack321](field_ref, pp_file, offset) field = type(field_ref)(ints, reals, provider) # Change the DTYPE variables back to 64-bit - this is slightly hacky # but *only* the UM File logic in the main part of Mule utilises this, # and it will go wrong if it gets a PPField with it set to 32-bit field.DTYPE_REAL = ">f8" field.DTYPE_INT = ">i8" # Now check if the field contains extra data if field.lbext > 0: # Skip past the field data only (relative seek to avoid overflows) pp_file.seek((field.lblrec - field.lbext) * PP_WORD_SIZE, 1) # Save the current file position start = pp_file.tell() # Now load in the vectors as they are encountered until the # end of the record is reached vectors = {} ext_consumed = 0 while pp_file.tell() - start < field.lbext * PP_WORD_SIZE: # First read the code vector_code = np.fromfile(pp_file, ">i4", 1)[0] # Split the code into its parts vector_points = vector_code // 1000 vector_type = vector_code % 1000 # Then read the vector into the dictionary vectors[vector_type] = ( np.fromfile(pp_file, ">f4", vector_points)) ext_consumed += vector_points # Having finished populating the vectors dict, attach it # to the field object field.pp_extra_data = vectors else: # If there isn't any extra data simply # Skip the whole record (relative seek here to avoid overflows) field.pp_extra_data = None pp_file.seek(reclen, 1) # Read the check record reclen_check = np.fromfile(pp_file, ">i4", 1)[0] # They should match if reclen != reclen_check: msg = "Field {0}; Inconsistent data record lengths; {1} and {2}" raise ValueError(msg.format(field_count, reclen, reclen_check)) fields.append(field) pp_file.close() return fields
[docs] def fields_to_pp_file(pp_file_obj_or_path, field_or_fields, umfile=None, keep_addressing=False): """ Writes a list of field objects to a PP file. Args: * pp_file_obj_or_path: Either an (opened) file object, or the path to a file where the pp data should be written. * field_or_fields: A list of :class:`mule.Field` subclass instances to be written to the file. * umfile: If the fields being written contain data on a variable resolution grid, provide a :class:`mule.UMFile` subclass instance here to add the row + column dependent constant information to the pp field "extra data". * keep_addressing: Whether or not to preserve the values of LBNREC, LBEGIN and LBUSER(2) from the original file - these are not used by pp files so by default will be set to zero. """ if isinstance(pp_file_obj_or_path, six.string_types): pp_file = open(pp_file_obj_or_path, "wb") else: pp_file = pp_file_obj_or_path for field in list(field_or_fields): if field.lbrel not in (2, 3): continue # Similar to the mule file classes, the unpacking of data can be # skipped if the packing and accuracy are unchanged and the fields # have the appropriate word size on disk if (field._can_copy_deferred_data( field.lbpack, field.bacc, PP_WORD_SIZE)): # Get the raw bytes containing the data data_bytes = field._get_raw_payload_bytes() # Remove any padding from WGDOS fields if field.lbpack == 1: true_len = struct.unpack(">i", data_bytes[:4])[0] data_bytes = data_bytes[:true_len*4] else: # If the field has been modified follow a similar set of steps to # the mule file classes lbpack321 = "{0:03d}".format( field.lbpack - ((field.lbpack // 1000) % 10) * 1000) if lbpack321 not in _WRITE_OPERATORS: msg = "Cannot write out packing code {0}" raise ValueError(msg.format(lbpack321)) data_bytes, _ = _WRITE_OPERATORS[lbpack321].to_bytes(field) # Calculate LBLREC field.lblrec = len(data_bytes) // PP_WORD_SIZE # Ensure the value of LBLREC lies on a whole 8-byte word if field.lblrec % 2 != 0: field.lblrec += 1 data_bytes += b"\x00\x00\x00\x00" # If the field appears to be variable resolution, attach the # relevant extra data (requires that a UM file object was given) vector = {} if (field.bzx == field.bmdi or field.bzy == field.bmdi or field.bdx == field.bmdi or field.bdy == field.bmdi): # The variable resolution data can either be already attached # to the field (most likely if it has already come from an existing # pp file) or be supplied via a umfile object + STASH entry... if (hasattr(field, "pp_extra_data") and field.pp_extra_data is not None): vector = field.pp_extra_data extra_len = 6 + 3 * field.lbnpt + 3 * field.lbrow else: if umfile is None: msg = ("Variable resolution field/s found, but no " "UM file object provided") raise ValueError(msg) if not hasattr(field, "stash"): msg = ("Variable resolution field/s found, but no " "STASH information attached to field objects") raise ValueError(msg) stagger = GRID_STAGGER[ umfile.fixed_length_header.grid_staggering] grid_type = field.stash.grid rdc = umfile.row_dependent_constants cdc = umfile.column_dependent_constants # Calculate U vectors if grid_type in (11, 18): # U points vector[1] = cdc.lambda_u if stagger == "new_dynamics": vector[12] = cdc.lambda_p vector[13] = np.append(cdc.lambda_p[1:-1], [2.0 * cdc.lambda_u[-1] - cdc.lambda_p[-1]]) elif stagger == "endgame": vector[12] = np.append([2.0 * cdc.lambda_u[0] - cdc.lambda_p[0]], cdc.lambda_p[:-1]) vector[13] = cdc.lambda_p else: # Any other grid types vector[1] = cdc.lambda_p if stagger == "new_dynamics": vector[12] = np.append([2.0 * cdc.lambda_p[1] - cdc.lambda_v[1]], cdc.lambda_u[1:]) vector[13] = cdc.lambda_u elif stagger == "endgame": vector[12] = cdc.lambda_u vector[13] = np.append(cdc.lambda_u[1:], [2.0 * cdc.lambda_p[-1] - cdc.lambda_u[-1]]) # Calculate V vectors if grid_type in (11, 19): # V points vector[2] = rdc.phi_v if stagger == "new_dynamics": vector[14] = rdc.phi_p vector[15] = np.append(rdc.phi_p[1:], [2.0 * rdc.phi_v[-1] - rdc.phi_p[-1]]) elif stagger == "endgame": vector[14] = np.append([2.0 * rdc.phi_v[0] - rdc.phi_p[0]], rdc.phi_p[:-1]) vector[15] = np.append(rdc.phi_p[:-1], [2.0 * rdc.phi_v[-1] - rdc.phi_p[-1]]) else: # Any other grid types vector[2] = rdc.phi_p[:-1] if stagger == "new_dynamics": vector[14] = np.append([2.0 * rdc.phi_p[0] - rdc.phi_v[0]], rdc.phi_v[1:]) vector[15] = np.append(rdc.phi_v[:-1], [2.0 * rdc.phi_p[-1] - rdc.phi_v[-1]]) elif stagger == "endgame": vector[14] = rdc.phi_v[:-1] vector[15] = rdc.phi_v[1:] # Work out extra data sizes and adjust the headers accordingly extra_len = 6 + 3 * field.lbnpt + 3 * field.lbrow field.lbext = extra_len field.lblrec += extra_len # (Optionally) zero LBNREC, LBEGIN and LBUSER2, since they have no # meaning in a pp file (because pp files have sequential records # rather than direct) if not keep_addressing: field.lbnrec = 0 field.lbegin = 0 field.lbuser2 = 0 # Convert the numbers from the lookup to 32-bit ints = field._lookup_ints.astype(">i4") reals = field._lookup_reals.astype(">f4") # Calculate the record length (pp files are not direct-access, so each # record begins and ends with its own length) lookup_reclen = np.array( (len(ints) + len(reals)) * PP_WORD_SIZE).astype(">i4") # Write the first record (the field header) pp_file.write(lookup_reclen) pp_file.write(ints) pp_file.write(reals) pp_file.write(lookup_reclen) # Similarly echo the record length before and after the data reclen = len(data_bytes) if vector: reclen += extra_len * PP_WORD_SIZE keys = [1, 2, 12, 13, 14, 15] sizes = ([field.lbnpt, field.lbrow] + [field.lbnpt] * 2 + [field.lbrow] * 2) pp_file.write(np.array(reclen).astype(">i4")) pp_file.write(data_bytes) if vector: for key, size in zip(keys, sizes): pp_file.write(np.array(1000 * size + key).astype(">i4")) pp_file.write(vector[key].astype(">f4")) pp_file.write(np.array(reclen).astype(">i4")) pp_file.close()