# *****************************COPYRIGHT******************************
# (C) Crown copyright Met Office. All rights reserved.
# For further details please refer to the file LICENCE.txt
# which you should have received as part of this distribution.
# *****************************COPYRIGHT******************************
#
# This file is part of Mule.
#
# Mule is free software: you can redistribute it and/or modify it under
# the terms of the Modified BSD License, as published by the
# Open Source Initiative.
#
# Mule is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Modified BSD License for more details.
#
# You should have received a copy of the Modified BSD License
# along with Mule. If not, see <http://opensource.org/licenses/BSD-3-Clause>.
"""
This module provides tools for interacting with "pp" files.
"""
from __future__ import (absolute_import, division, print_function)
import six
import mule
import struct
import numpy as np
GRID_STAGGER = {3: "new_dynamics", 6: "endgame"}
# Borrow the Mule field class, since a Field in a pp file is essentially the
# same as a Field in a UM file; but adjust the data types since it is 32-bit
[docs]
class PPField(mule.Field):
DTYPE_INT = ">i4"
DTYPE_REAL = ">f4"
# As above but with header release 2 headers
[docs]
class PPField2(PPField, mule.Field2):
pass
# As above but with header release 3 headers
[docs]
class PPField3(PPField, mule.Field3):
pass
# Mapping to go from release number to field object
FIELD_SELECT = {2: PPField2, 3: PPField3}
# Similarly, adjust the record size for the read providers required here
PP_WORD_SIZE = 4
[docs]
class _ReadPPProviderUnpacked(mule.ff._ReadFFProviderCray32Packed):
DISK_RECORD_SIZE = PP_WORD_SIZE
[docs]
class _ReadPPProviderWGDOSPacked(mule.ff._ReadFFProviderWGDOSPacked):
DISK_RECORD_SIZE = PP_WORD_SIZE
# Create mappings for the lbpack n3-n1 digits (similar to how the mule file
# classes contain mappings like these). The only real difference is that the
# "Unpacked" provider uses the 32-bit class (since PP files are 32-bit)
_READ_PROVIDERS = {
"000": _ReadPPProviderUnpacked,
"001": _ReadPPProviderWGDOSPacked,
}
_WRITE_OPERATORS = {
"000": mule.ff._WriteFFOperatorCray32Packed(),
"001": mule.ff._WriteFFOperatorWGDOSPacked()
}
[docs]
def file_is_pp_file(file_path):
"""
Checks to see if a given file is a pp file.
Args:
* file_path:
Path to the file to be checked.
Returns:
* True if file is a pp file, False otherwise.
"""
# The logic behind this is that the first 32-bit word of a pp file should
# be the record length of the first record (a lookup entry). Since this
# has 64, 4-byte words we check to see if it is 64*4 = 256. In a regular
# UM File the first 64-bit word should be either 15, 20 or IMDI, and in
# each of these cases it is not possible for the first half of the word
# to be 256, making this a safe way to detect a pp file.
first_word = np.fromfile(file_path, dtype=">i4", count=1)
return first_word == 256
[docs]
def fields_from_pp_file(pp_file_obj_or_path):
"""
Reads in a PP file as a list of field objects.
Args:
* pp_file_obj_or_path:
Either an (opened) file object, or the path
to a file containing the pp data.
Returns:
* pp_fields
List of :class:`mule.pp.PPField` objects.
"""
if isinstance(pp_file_obj_or_path, six.string_types):
pp_file = open(pp_file_obj_or_path, "rb")
else:
pp_file = pp_file_obj_or_path
field_count = 0
fields = []
while True:
# Increment counter
field_count += 1
# Read the record length
reclen = np.fromfile(pp_file, ">i4", 1)
# Check for end of file
if len(reclen) == 0:
break
else:
reclen = reclen[0]
if reclen != 256:
msg = "Field {0}; Incorrectly sized lookup record: {1}"
raise ValueError(msg.format(field_count, reclen))
# Read the record (the header)
ints = np.fromfile(pp_file, ">i4", mule.Field.NUM_LOOKUP_INTS)
reals = np.fromfile(pp_file, ">f4", mule.Field.NUM_LOOKUP_REALS)
# Read the check record
reclen_check = np.fromfile(pp_file, ">i4", 1)[0]
# They should match
if reclen != reclen_check:
msg = "Field {0}; Inconsistent header record lengths: {1} and {2}"
raise ValueError(msg.format(field_count, reclen, reclen_check))
# Load into the basic field class
field_ref = PPField(ints, reals, None)
# Use the release number to select a better class if possible
fclass = FIELD_SELECT.get(field_ref.lbrel, None)
if fclass is not None:
field_ref = fclass(ints, reals, None)
# Read the record length for the data
reclen = np.fromfile(pp_file, ">i4", 1)[0]
# This should be equivalent to lbnrec, but can sometimes be set to
# zero... so to allow the existing provider to work add this value
# to the reference field's headers
field_ref.lbnrec = reclen // PP_WORD_SIZE
# Associate the provider
offset = pp_file.tell()
# Strip just the n1-n3 digits from the lbpack value
# and check for a suitable write operator
lbpack321 = "{0:03d}".format(
field_ref.lbpack - ((field_ref.lbpack // 1000) % 10) * 1000)
if lbpack321 not in _READ_PROVIDERS:
msg = "Field{0}; Cannot interpret unsupported packing code {1}"
raise ValueError(msg.format(field_count, lbpack321))
provider = _READ_PROVIDERS[lbpack321](field_ref, pp_file, offset)
field = type(field_ref)(ints, reals, provider)
# Change the DTYPE variables back to 64-bit - this is slightly hacky
# but *only* the UM File logic in the main part of Mule utilises this,
# and it will go wrong if it gets a PPField with it set to 32-bit
field.DTYPE_REAL = ">f8"
field.DTYPE_INT = ">i8"
# Now check if the field contains extra data
if field.lbext > 0:
# Skip past the field data only (relative seek to avoid overflows)
pp_file.seek((field.lblrec - field.lbext) * PP_WORD_SIZE, 1)
# Save the current file position
start = pp_file.tell()
# Now load in the vectors as they are encountered until the
# end of the record is reached
vectors = {}
ext_consumed = 0
while pp_file.tell() - start < field.lbext * PP_WORD_SIZE:
# First read the code
vector_code = np.fromfile(pp_file, ">i4", 1)[0]
# Split the code into its parts
vector_points = vector_code // 1000
vector_type = vector_code % 1000
# Then read the vector into the dictionary
vectors[vector_type] = (
np.fromfile(pp_file, ">f4", vector_points))
ext_consumed += vector_points
# Having finished populating the vectors dict, attach it
# to the field object
field.pp_extra_data = vectors
else:
# If there isn't any extra data simply
# Skip the whole record (relative seek here to avoid overflows)
field.pp_extra_data = None
pp_file.seek(reclen, 1)
# Read the check record
reclen_check = np.fromfile(pp_file, ">i4", 1)[0]
# They should match
if reclen != reclen_check:
msg = "Field {0}; Inconsistent data record lengths; {1} and {2}"
raise ValueError(msg.format(field_count, reclen, reclen_check))
fields.append(field)
pp_file.close()
return fields
[docs]
def fields_to_pp_file(pp_file_obj_or_path, field_or_fields,
umfile=None, keep_addressing=False):
"""
Writes a list of field objects to a PP file.
Args:
* pp_file_obj_or_path:
Either an (opened) file object, or the path
to a file where the pp data should be written.
* field_or_fields:
A list of :class:`mule.Field` subclass instances
to be written to the file.
* umfile:
If the fields being written contain data on a variable
resolution grid, provide a :class:`mule.UMFile` subclass
instance here to add the row + column dependent
constant information to the pp field "extra data".
* keep_addressing:
Whether or not to preserve the values of LBNREC, LBEGIN
and LBUSER(2) from the original file - these are not used
by pp files so by default will be set to zero.
"""
if isinstance(pp_file_obj_or_path, six.string_types):
pp_file = open(pp_file_obj_or_path, "wb")
else:
pp_file = pp_file_obj_or_path
for field in list(field_or_fields):
if field.lbrel not in (2, 3):
continue
# Similar to the mule file classes, the unpacking of data can be
# skipped if the packing and accuracy are unchanged and the fields
# have the appropriate word size on disk
if (field._can_copy_deferred_data(
field.lbpack, field.bacc, PP_WORD_SIZE)):
# Get the raw bytes containing the data
data_bytes = field._get_raw_payload_bytes()
# Remove any padding from WGDOS fields
if field.lbpack == 1:
true_len = struct.unpack(">i", data_bytes[:4])[0]
data_bytes = data_bytes[:true_len*4]
else:
# If the field has been modified follow a similar set of steps to
# the mule file classes
lbpack321 = "{0:03d}".format(
field.lbpack - ((field.lbpack // 1000) % 10) * 1000)
if lbpack321 not in _WRITE_OPERATORS:
msg = "Cannot write out packing code {0}"
raise ValueError(msg.format(lbpack321))
data_bytes, _ = _WRITE_OPERATORS[lbpack321].to_bytes(field)
# Calculate LBLREC
field.lblrec = len(data_bytes) // PP_WORD_SIZE
# Ensure the value of LBLREC lies on a whole 8-byte word
if field.lblrec % 2 != 0:
field.lblrec += 1
data_bytes += b"\x00\x00\x00\x00"
# If the field appears to be variable resolution, attach the
# relevant extra data (requires that a UM file object was given)
vector = {}
if (field.bzx == field.bmdi
or field.bzy == field.bmdi
or field.bdx == field.bmdi
or field.bdy == field.bmdi):
# The variable resolution data can either be already attached
# to the field (most likely if it has already come from an existing
# pp file) or be supplied via a umfile object + STASH entry...
if (hasattr(field, "pp_extra_data")
and field.pp_extra_data is not None):
vector = field.pp_extra_data
extra_len = 6 + 3 * field.lbnpt + 3 * field.lbrow
else:
if umfile is None:
msg = ("Variable resolution field/s found, but no "
"UM file object provided")
raise ValueError(msg)
if not hasattr(field, "stash"):
msg = ("Variable resolution field/s found, but no "
"STASH information attached to field objects")
raise ValueError(msg)
stagger = GRID_STAGGER[
umfile.fixed_length_header.grid_staggering]
grid_type = field.stash.grid
rdc = umfile.row_dependent_constants
cdc = umfile.column_dependent_constants
# Calculate U vectors
if grid_type in (11, 18): # U points
vector[1] = cdc.lambda_u
if stagger == "new_dynamics":
vector[12] = cdc.lambda_p
vector[13] = np.append(cdc.lambda_p[1:-1],
[2.0 * cdc.lambda_u[-1]
- cdc.lambda_p[-1]])
elif stagger == "endgame":
vector[12] = np.append([2.0 * cdc.lambda_u[0]
- cdc.lambda_p[0]],
cdc.lambda_p[:-1])
vector[13] = cdc.lambda_p
else: # Any other grid types
vector[1] = cdc.lambda_p
if stagger == "new_dynamics":
vector[12] = np.append([2.0 * cdc.lambda_p[1]
- cdc.lambda_v[1]],
cdc.lambda_u[1:])
vector[13] = cdc.lambda_u
elif stagger == "endgame":
vector[12] = cdc.lambda_u
vector[13] = np.append(cdc.lambda_u[1:],
[2.0 * cdc.lambda_p[-1]
- cdc.lambda_u[-1]])
# Calculate V vectors
if grid_type in (11, 19): # V points
vector[2] = rdc.phi_v
if stagger == "new_dynamics":
vector[14] = rdc.phi_p
vector[15] = np.append(rdc.phi_p[1:],
[2.0 * rdc.phi_v[-1]
- rdc.phi_p[-1]])
elif stagger == "endgame":
vector[14] = np.append([2.0 * rdc.phi_v[0]
- rdc.phi_p[0]],
rdc.phi_p[:-1])
vector[15] = np.append(rdc.phi_p[:-1],
[2.0 * rdc.phi_v[-1]
- rdc.phi_p[-1]])
else: # Any other grid types
vector[2] = rdc.phi_p[:-1]
if stagger == "new_dynamics":
vector[14] = np.append([2.0 * rdc.phi_p[0]
- rdc.phi_v[0]],
rdc.phi_v[1:])
vector[15] = np.append(rdc.phi_v[:-1],
[2.0 * rdc.phi_p[-1]
- rdc.phi_v[-1]])
elif stagger == "endgame":
vector[14] = rdc.phi_v[:-1]
vector[15] = rdc.phi_v[1:]
# Work out extra data sizes and adjust the headers accordingly
extra_len = 6 + 3 * field.lbnpt + 3 * field.lbrow
field.lbext = extra_len
field.lblrec += extra_len
# (Optionally) zero LBNREC, LBEGIN and LBUSER2, since they have no
# meaning in a pp file (because pp files have sequential records
# rather than direct)
if not keep_addressing:
field.lbnrec = 0
field.lbegin = 0
field.lbuser2 = 0
# Convert the numbers from the lookup to 32-bit
ints = field._lookup_ints.astype(">i4")
reals = field._lookup_reals.astype(">f4")
# Calculate the record length (pp files are not direct-access, so each
# record begins and ends with its own length)
lookup_reclen = np.array(
(len(ints) + len(reals)) * PP_WORD_SIZE).astype(">i4")
# Write the first record (the field header)
pp_file.write(lookup_reclen)
pp_file.write(ints)
pp_file.write(reals)
pp_file.write(lookup_reclen)
# Similarly echo the record length before and after the data
reclen = len(data_bytes)
if vector:
reclen += extra_len * PP_WORD_SIZE
keys = [1, 2, 12, 13, 14, 15]
sizes = ([field.lbnpt, field.lbrow]
+ [field.lbnpt] * 2 + [field.lbrow] * 2)
pp_file.write(np.array(reclen).astype(">i4"))
pp_file.write(data_bytes)
if vector:
for key, size in zip(keys, sizes):
pp_file.write(np.array(1000 * size + key).astype(">i4"))
pp_file.write(vector[key].astype(">f4"))
pp_file.write(np.array(reclen).astype(">i4"))
pp_file.close()