

;+
; :Description:
;   The FLOAT16_ENCODE function converts a scalar or array of 32-bit
;   floating-point numbers into 16-bit "half-precision" floating-point values.
;   Since IDL does not have a native 16-bit float data type, the result is
;   encoded as 16-bit unsigned integers using the IEEE 754 binary16
;   (or float16) format, where the bits are "seeeeeffffffffff",
;   where "s" is the sign bit, "eeeee" is the 5-bit exponent (with an offset of 15),
;   and the f's are 10 bits for the fractional part. The significand actually
;   has 11 bits of precision because it has an implicit lead bit of 1
;   (unless the exponent bits are all zero).
;   
;   Note that binary16 numbers have a limited range and precision, and are
;   only suitable for certain applications. The smallest representable
;   number is +/-5.96e-08, while the largest is +/-65504. Numbers smaller
;   than +/-5.96e-08 will be encoded as zero, while numbers larger than
;   +/-65504 will be encoded as +/-Infinity (0x0x7c00 and 0xfc00).
;   Not-a-number (NaN) values will be encoded as the quiet NaN 0x7fff.
;
; :Returns: Array<Uint>
;   A scalar or array of 16-bit unsigned integers (Uint) containing
;   the encoded binary16 values. If the input is an array, then the result
;   will be an array of the same dimensions.
;
; :Arguments:
;   array: in, required, Float
;     Set this argument to a scalar or array of numbers. If the argument
;     is not type Float, it will be converted first to type Float.
;
;-
function float16_encode, array
  compile_opt idl2
  on_error, 2

  ; Clip to +/-65520, which is equal to Infinity in binary16
  ; If we don't clip then bigger values will swamp the formulas below
  ; and produce nonsensical results. +/-64r5t520 will return exactly Infinity.
  ; This avoids needing to do a "where" for out-of-range values.
  x = -65520.0 > float(array) < 65520.0
  nans = where(finite(x, /nan), /null)

  ; Round the final digit to the nearest even number
  b = long(x, 0, n_elements(x)) + 0x00001000
  b = isa(array, /array) ? reform(b, array.dim, /overwrite) : b[0]
  !null = check_math()

  signbit = ishft(b, -16) and 0b1000000000000000
  ; Mask off the 8 bits of the float32 exponent
  exponent = ishft(b, -23) and 0b011111111
  ; Pull out the 23 float32 mantissa bits
  mantissa = b and 0x007FFFFF

  norm_exponent = ishft(exponent - 112, 10) and 0b0111110000000000
  ; The (exponent gt 112) will only let normalized exponents through.
  ; Other exponents will give a zero value.
  normalized = (exponent gt 112) * (norm_exponent or ishft(mantissa, -13))

  ; 0x007FF000 = 0x00800000 - 0x00001000 = decimal indicator flag - initial rounding
  subnorm_mantissa = ishft(ishft(mantissa + 0x007FF000, -(125 - exponent)) + 1, -1)
  ; The (exponent gt/le) will only let subnormalized exponents through.
  ; Other exponents will give a zero value.
  subnormalized = (exponent gt 101 and exponent le 112) * subnorm_mantissa

  result = uint(signbit or normalized or subnormalized)
  result[nans] = 0x7fff    ; quiet NaN
  return, result
end
