#!/usr/bin/env python

"""
convert netCDF file to HDF5 using Scientific.IO.NetCDF and PyTables.
Jeff Whitaker <jeffrey.s.whitaker@noaa.gov>

Added some flags to select filters, as well as some small improvements.
Francesc Altet <faltet@carabos.com>

This requires Scientific from 
http://starship.python.net/~hinsen/ScientificPython

"""
import Scientific.IO.NetCDF as NetCDF
import tables, sys, os.path, getopt, time, math
import Numeric as N

def nctoh5(ncfilename, h5filename, filters, overwritefile, unpackshort, quantize):
    # open netCDF file
    ncfile = NetCDF.NetCDFFile(ncfilename, mode = "r")
    # open h5 file
    if overwritefile:
        h5file = tables.openFile(h5filename, mode = "w")
    else:
        h5file = tables.openFile(h5filename, mode = "a")        
    # loop over variables in netCDF file.
    nobjects = 0; nbytes = 0  # Initialize counters
    for varname in ncfile.variables.keys():
        var = ncfile.variables[varname]
        if var.typecode() == 's' and hasattr(var,'scale_factor') and hasattr(var,'add_offset'):
            ispackedshort = 1   
        else:
            if unpackshort:
                print varname, 'has no scale_factor and add_offset attributes, or data is not short integers - not unpacking'
            ispackedshort = 0
        if quantize and hasattr(var,'least_significant_digit'):
            precision = math.pow(10,-var.least_significant_digit[0])
            exp = math.log(precision,10)
            if exp < 0:
                exp = int(math.floor(exp))
            else:
                exp = int(math.ceil(exp))
            bits = math.ceil(math.log(math.pow(10,-exp),2))
            scale = math.pow(2,bits)
            quantizedata = 1
            print varname, ' quantizing with ',bits,' bits of precision'
        else:
            if quantize:
                print varname,' has no least_signficant_digit attribute, not quantizing'
            quantizedata = 0
        vardims = list(var.dimensions)
        vardimsizes = [ncfile.dimensions[vardim] for vardim in vardims]
        # Check if any dimension is enlargeable
        extdim = -1; ndim = 0
        for vardim in vardimsizes:
            if vardim == None:
                extdim = ndim
                break
            ndim += 1
        # use long_name for title.
        if hasattr(var,'long_name'):
            title = var.long_name
        else: # or, just use some bogus title.
            title = varname + ' array'
        # Create an EArray to keep the NetCDF variable
        if extdim < 0:
            # Make 0 the enlargeable dimension
            extdim = 0
        vardimsizes[extdim] = 0
        if unpackshort and ispackedshort:
            dtype = 'f'
        else:
            dtype=var.typecode()
        if dtype == 'c':
            # Special case for Numeric character objects
            # (on which base Scientific Python works)
            atom = tables.StringAtom(shape=tuple(vardimsizes), length=1) 
        else:
            atom = tables.Atom(dtype=dtype, shape=tuple(vardimsizes))
        vardata = h5file.createEArray(h5file.root, varname,
                                      atom, title, filters=filters,
                                      expectedrows=vardimsizes[extdim])
        # write data to enlargeable array one chunk of records at a time.
        # (so the whole array doesn't have to be kept in memory).
        nrowsinbuf = vardata._v_maxTuples
        # The slices parameter for var.__getitem__()
        slices = [slice(0, dim, 1) for dim in var.shape]
        # range to copy
        start = 0; stop = var.shape[extdim]; step = 1
        # Start the copy itself
        for start2 in range(start, stop, step*nrowsinbuf):
            # Save the records on disk
            stop2 = start2+step*nrowsinbuf
            if stop2 > stop:
                stop2 = stop
            # Set the proper slice in the extensible dimension
            slices[extdim] = slice(start2, stop2, step)
            if unpackshort and ispackedshort:
                data = (var.scale_factor*var[tuple(slices)] + var.add_offset).astype('f')
            else:
                data = var[tuple(slices)]
            if quantizedata:
                data = (N.around(scale*data)/scale).astype(dtype)
            vardata.append(data)
        # Increment the counters
        nobjects += 1
        nbytes += reduce(lambda x,y:x*y, vardata.shape) * vardata.itemsize
        # set variable attributes.
        for key,val in var.__dict__.iteritems():
            setattr(vardata.attrs,key,val)
        setattr(vardata.attrs,'dimensions',tuple(vardims))
    # set global (file) attributes.
    for key,val in ncfile.__dict__.iteritems():
        setattr(h5file.root._v_attrs,key,val)
    # Close the files.
    h5file.close(); ncfile.close()
    return (nobjects, nbytes)

usage = """usage: %s [-h] [-v] [-o] [--complevel=(0-9)] [--complib=lib] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--unpackshort=(0|1)] [--quantize=(0|1)] netcdffilename hdf5filename
 -h -- Print usage message.
 -v -- Show more information.
 -o -- Overwite destination file.
 --complevel=(0-9) -- Set a compression level (0 for no compression, which
     is the default).
 --complib=lib -- Set the compression library to be used during the copy.
     lib can be set to "zlib", "lzo" or "ucl". Defaults to "zlib".
 --shuffle=(0|1) -- Activate or not the shuffling filter (default is active
     if complevel>0).
 --fletcher32=(0|1) -- Whether to activate or not the fletcher32 filter (not
     active by default).
 --unpackshort=(0|1) -- Unpack short integer variables to float variables
     using scale_factor and add_offset netCDF variable attributes 
     (not active by default).
 --quantize=(0|1) -- Quantize data to improve compression using 
     least_significant_digit netCDF variable attribute (not active by default).
     See http://www.cdc.noaa.gov/cdc/conventions/cdc_netcdf_standard.shtml
     for further explanation of what this attribute means.
\n""" % os.path.basename(sys.argv[0])

try:
    opts, pargs = getopt.getopt(sys.argv[1:], 'hvo',
                                ['complevel=',
                                 'complib=',
                                 'shuffle=',
                                 'fletcher32=',
                                 'unpackshort=',
                                 'quantize='
                                 ])
except:
    (type, value, traceback) = sys.exc_info()
    print "Error parsing the options. The error was:", value
    sys.stderr.write(usage)
    sys.exit(0)

# default options
verbose = 0
overwritefile = 0
complevel = None
complib = None
shuffle = None
fletcher32 = None
unpackshort = 0
quantize = 0

# Get the options
for option in opts:
    if option[0] == '-h':
        sys.stderr.write(usage)
        sys.exit(0)
    elif option[0] == '-v':
        verbose = 1
    elif option[0] == '-o':
        overwritefile = 1
    elif option[0] == '--complevel':
        complevel = int(option[1])
    elif option[0] == '--complib':
        complib = option[1]
    elif option[0] == '--shuffle':
        shuffle = int(option[1])
    elif option[0] == '--fletcher32':
        fletcher32 = int(option[1])
    elif option[0] == '--unpackshort':
        unpackshort = int(option[1])
    elif option[0] == '--quantize':
        quantize = int(option[1])
    else:
        print option[0], ": Unrecognized option"
        sys.stderr.write(usage)
        sys.exit(0)
        
# if we pass a number of files different from 2, abort
if len(pargs) <> 2:
    print "You need to pass both source and destination!."
    sys.stderr.write(usage)
    sys.exit(0)

# Catch the files passed as the last arguments
ncfilename = pargs[0]
h5filename = pargs[1]

# Build the Filters instance
if (complevel, complib, shuffle, fletcher32) == (None,)*4:
    filters = None
else:
    if complevel is None: complevel = 0
    if complevel > 0 and shuffle is None:
        shuffle = 1
    else:
        shuffle = 0
    if complib is None: complib = "zlib"
    if fletcher32 is None: fletcher32 = 0
    filters = tables.Filters(complevel=complevel, complib=complib,
                             shuffle=shuffle, fletcher32=fletcher32)

# Some timing
t1 = time.time()
cpu1 = time.clock()
# Copy the file
if verbose: 
    print "+=+"*20
    print "Starting conversion from %s to %s" % (ncfilename, h5filename)
    print "Applying filters:", filters
    print "+=+"*20

# Do the conversion
(nobjects, nbytes) = nctoh5(ncfilename, h5filename, filters, overwritefile, 
                            unpackshort, quantize)

# Gather some statistics
t2 = time.time()
cpu2 = time.clock()
tcopy = round(t2-t1, 3)
cpucopy = round(cpu2-cpu1, 3)
tpercent = int(round(cpucopy/tcopy, 2)*100)
if verbose:
    print "Number of variables copied:", nobjects
    print "KBytes copied:", round(nbytes/1024.,3)
    print "Time copying: %s s (real) %s s (cpu)  %s%%" % \
          (tcopy, cpucopy, tpercent)
    print "Copied variable/sec: ", round(nobjects / float(tcopy),1)
    print "Copied KB/s :", int(nbytes / (tcopy * 1024))
