#!/usr/bin/env python
# vim: set syntax=python:

import os, sys, getopt, re, string 

helpMsg = '''

######################
       mkdes
######################

Version: 1.01
Author: Benyang Tang,  btang@pacific.jpl.nasa.gov


=== Introduction ===

mkdes is a tool to generate a descriptor file when given a set of netCDF files. The descriptor file is to be used in the visualization software Ferret.

mkdes needs Python. All Linux computers should have Python installed. Other
unix computers may have Python installed too. type 'which python' to test.


=== Download ===

http://www.ocgy.ubc.ca/~tang/softwares/mkdes/mkdes.html


=== Usage ===

mkdes [--help --des=desFile --timevar=timeVariableName --dir=dir_prefix --modulo --title=Tilte --f90 --ncdump=ncdumpCommand] netCDF1 [netCDF2 ...]

The arguments netCDF1, netCDF2, ... need not to be in any special order. mkdes is smart enough to figure the time order out.

The time grids of each netCDF file have to be evenly spaced. But each netCDF file can have its own time interval. 

The time grids from different netCDF files can overlap. mkdes will drop off the overlapping time levels. Here is an example:
netCDF1 has time grids of [101, 102, 103, 104, 105, 106]
netCDF2 has time grids of [103, 104]
netCDF3 has time grids of [104, 105, 106]
Then, mkdes will take the first 2 time levels from netCDF1, the first time level from netCDF2, and all the time levels from netCDF3.

=== options ===
 
--help  
print this help message.

--timevar=timeVariableName
Specify the time variable name. The default is, of caurse, time . It is case-insensitive.

--des=desFile
Specify the resulting descriptor file. The default is a file named temp999.des in the current directory.

--dir=dir_prefix
Specify a directory which prefixes the netCDF filenames in the descriptor file.
The default is no directory prefix. If you like to prefix the current
directory, use --dir='.' .

--modulo
Specify whether the time is modulo. The default is not.

--f90
Specify whether the descriptor file is f90 style (for Ferret in Linux) or f77 (for other platforms). The default is f77.

--ncdump=ncdumpCommand
This script needs a tool called ncdump. If your system has netCDF library
installed, it should have ncdump. mkdes will search several directories for ncdump.
If it fails to find, it will print a message telling that you need to use this
option to specify the ncdump path.

--title=Title
Specify the title of the descriptor file. The default is to be read from the netCDF files.


=== examples ===

# to print the help message and do nothing else:
mkdes --help

# to generate a f77 style descriptor file, no directory prefixing, outputing the descriptor file as temp999.des:
mkdes netCDF1 netCDF2

# to prefix the current directory to the netCDF file names in the descriptor file:
mkdes --dir='.' netCDF1 netCDF2

# to generate descriptor file myDes.des:
mkdes --des=myDes.des netCDF1 netCDF2

# to generate a f90 descriptor file:
mkdes --f90 netCDF1 netCDF2

# If the time variable in the netCDF files is not time, but timeZZZ
mkdes --timevar=timeZZZ netCDF1 netCDF2

'''

'''
=== To Do:
() date format
() other names for time?
() find the identical netCDF files
'''
#=== parameters

#=== get options
optFormat = ''
longOptFormat = ['help', 'timevar=', 'ncdump=', 'f90', 'title=', 'modulo', 'des=', 'dir=']
try:
  opts, ncFiles = getopt.getopt(sys.argv[1:],optFormat,longOptFormat)
except getopt.error, msg:
  print msg
  sys.exit(1)

ifSilent = 0

ncdumpCmd = ''
isF90 = 0
title0 = ''
isModulo = '.false.'
desFile = 'temp999.des'
dirPrefix = None
timeVar = 'time'

for o,a in opts:
  if o=='--ncdump':
    ncdumpCmd = a
  if o=='--title':
    title0 = a
  if o in ['--f90', '--F90']:
    isF90 = 1
  if o in ['--modulo']:
    isModulo = '.true.'
  if o in ['--des']:
    desFile = a
  if o in ['--dir']:
    dirPrefix = a
  if o in ['--timevar']:
    timeVar = a
  if o in ['--help']:
    print helpMsg
    sys.exit(0)
 
#--- what should be the dirPrefix
currentDir = os.getcwd()
if dirPrefix == '.':
  dirPrefix = currentDir
  
#--- if no ncFiles
if not ncFiles:
  print 'Please specify at least one netCDF files.'
  sys.exit(1)

#--- check whether netCDF files exist
ncFiles.sort()
fileIgnored = []
for i in range(len(ncFiles)):
  ncFile = ncFiles[i]
  if i > 0:
    if ncFile == ncFiles[i-1]:
      fileIgnored.append(i)
  if not os.path.isfile(ncFile):
    print 'file %s does not exist. Do nothing and exiting.' %(ncFile)
    sys.exit(1)
    
if fileIgnored:
  fileIgnored.reverse()
  for i in fileIgnored:
    del ncFiles[i]
  
#--- ncdump command
if not ncdumpCmd:
  searchPath = ['ncdump', '/usr/bin/ncdump', '/usr/local/bin/ncdump', '~/bin/ncdump']
  for a in searchPath:
    try:
      temp1 = os.popen(a,'r').read()
    except:
      print '\nCommand %s generates: \n%s\n\n' %(a,temp1)
    else:
      ncdumpCmd = a
      break

if not ncdumpCmd:
  print '\nCannot find ncdump. Please specify the absolute path of ncdump by option --ncdump .'
  sys.exit(1)
    
#--- derived parameters

#--- constants
monthName = {
'01':'jan',
'02':'feb',
'03':'mar',
'04':'apr',
'05':'may',
'06':'jun',
'07':'jul',
'08':'aug',
'09':'sep',
'10':'oct',
'11':'nov',
'12':'dec',
}

#=================
def main():
#=================

#=== get times of each netCDF file
# p1 =  re.compile(r'\s*\n\s*%s = ([\d\.].*?);'%timeVar,re.I | re.S)
  p1 =  re.compile(r'\s*\n\s*%s\w* = ([\d\.].*?);'%timeVar,re.I | re.S)
  f1 = lambda a: float(a)

  fileDict = {} # the structure of fileDict: {times[0]: [ncFile, times], ...}
  for ncFile in ncFiles:
    print '\nReading time info from %s .' %(ncFile)
    temp1 = os.popen('%s -c %s' %(ncdumpCmd, ncFile),'r').read()
    temp2 = p1.search(temp1)

    if not temp2:
      print 'Cannot find time axis in %s . The file is not used.' %(ncFile)
      continue

    temp3 = temp2.group(1)
    temp4 = string.split(temp3,',')
    try:
      times = map(f1,temp4)
    except:
      print 'Cannot read time in %s . The file is not used.' %(ncFile)
      continue

    if not times:
      print 'number of time levels = 0 in %s . The file is not used.' %(ncFile)
      continue
  
#--- check whether times are evenly spaced
    temp5 = times[:-1]
    temp6 = times[1:]
    f2 = lambda a1,a2: a2-a1
    temp7 = map(f2,temp5,temp6)
    dt1 = (max(temp7) + min(temp7))/2.
#   print min(temp7), max(temp7), abs( (max(temp7)-min(temp7))/max(temp7) ), dt1
    if abs( (max(temp7)-min(temp7))/max(temp7) ) > 0.01:
      print 'time is not evenly spaced in %s . The file is not used.' %(ncFile)
      continue

#--- check whether 2 files have the same starting time
    ifKeepOld = 0
    if fileDict.has_key(times[0]):
      if len(times) <= len(fileDict[times[0]][1]):
        ifKeepOld = 1

      print '\nThe following 2 files have the same starting time=%g : \n%s\n%s' %(times[0], fileDict[times[0]][0] ,ncFile)
      if ifKeepOld:
        print 'The first one is ignored.\n'
      else:
        print 'The second one is ignored.\n'

    if not ifKeepOld:
      fileDict[times[0]] = [ncFile, times, dt1]

  time0 = fileDict.keys()
  if not time0:
    print 'None of the netCDF files is qualified to be used in generating the descriptor file. Do nothing and exit.'
    sys.exit(1)

  time0.sort()

#=== get the time unit
  t0_1 = ''
  t0_2 = ''
  t0 = ''
  unit1 = ''
  
  p1 =  re.compile(r'\n\s*%s\w*:units = "(.*?)"'%timeVar,re.I)
  for time0I in time0[0:1]:
    ncFile = fileDict[time0I][0]
    print '\nGetting time unit from %s .' %(ncFile)
    temp1 = os.popen('%s -c %s' %(ncdumpCmd, ncFile),'r').read()
    temp2 = p1.search(temp1)
    if temp2:
      temp3 = temp2.group(1)

#--- whether time unit contains 'since'
      temp4 = string.split(temp3,' since ')
      if len(temp4)==2:

#--- whether the base time has date and time parts
        temp11 = string.split(temp4[1],' ')
        if len(temp11) == 2:

#--- correct the date part 
          temp12 = string.split(temp11[0],'-')
          if len(temp12)==3:
            if len(temp12[1])==3:
              if temp12[1] in monthName.values():
                t0_1 = '%s' %(temp11[0])
            else:
              if temp12[1] in monthName.keys():
                t0_1 = '%s-%s-%s' %(temp12[2], monthName[temp12[1]], temp12[0])

#--- correct the time part 
          temp12 = string.split(temp11[1],':')
          if len(temp12)==3:
            if len(temp12[2])>2:
              temp13 = string.split(temp12[2],'.')
              if len(temp13)==2:
                try: 
                  temp14 = int( temp13[0] )
                except: pass
                else:
                  t0_2 = '%s:%s:%02d' %(temp12[0], temp12[1], temp14)
            else: t0_2 = temp11[1]

#--- combine the date part and time part
          if t0_1 != '' and t0_2 != '' : 
            t0 = '%s %s' %(t0_1, t0_2)

#--- make the relative time unit
        if temp4[0] in ['minutes', 'minute']: unit1 = '%5.1f' %(60)
        if temp4[0] in ['hours','hour']: unit1 = '%5.1f' %(3600)
        if temp4[0] in ['days', 'day']: unit1 = '%5.1f' %(24*3600)
        if temp4[0] in ['years','year']: unit1 = '%5.1f' %(365.25*24*3600)
    else: continue

  if t0 == '' or unit1 == '':
    print 'Cannot figure out the following time unit in %s: \n%s. \nPlease modify the generated descriptor file to put time unit.' %(ncFile,temp3)

#--- file title
  p2 =  re.compile(r'\n\s*:title = "(.*?)"')
  if not title0:
    temp2 = p2.search(temp1)
    if temp2:
      title1 = temp2.group(1) 
  else: title1 = title0

#=== header
  headerInfo = (title1,t0,unit1,isModulo)

  if isF90:
    header1 = '''
 &FORMAT_RECORD
   D_TYPE        = '  MC',
   D_FORMAT      = '  1A',
 /

 &BACKGROUND_RECORD
   D_TITLE       = '%s',
   D_T0TIME      = '%s',
   D_TIME_UNIT   = %s,
   D_TIME_MODULO = %s,
 /

 &MESSAGE_RECORD
      D_MESSAGE     = ' ',
      D_ALERT_ON_OPEN      = F,
      D_ALERT_ON_OUTPUT    = F,
 /  

 &EXTRA_RECORD
 /  
''' %headerInfo       

  else:
    header1 = '''
 $FORMAT_RECORD
   D_TYPE        = '  MC',
   D_FORMAT      = '  1A',
 $END

 $BACKGROUND_RECORD
   D_TITLE       = '%s',
   D_T0TIME      = '%s',
   D_TIME_UNIT   = %s,
   D_TIME_MODULO = %s,
 $END
 
 $MESSAGE_RECORD 
      D_MESSAGE     = ' ',
      D_ALERT_ON_OPEN      = F,
      D_ALERT_ON_OUTPUT    = F,
 $END
   
 $EXTRA_RECORD
 $END           
''' %headerInfo

#=== tail
  if isF90:
    tail1 = '''
 &STEPFILE_RECORD
   S_FILENAME    = '**END OF STEPFILES**'
 /
'''

  else:
    tail1 = '''
 $STEPFILE_RECORD
   S_FILENAME    = '**END OF STEPFILES**',
 $END
'''

#=== lines for files
  body1 = ''
  for i in range(len(time0)):
    time0I = time0[i]
    ncFile = fileDict[time0I][0]
    timeI = fileDict[time0I][1]
    nTime = len(timeI)
    dt1 = fileDict[time0I][2]

#--- check for time overlap
    nTimeNew = nTime
    if i != len(time0)-1:
      timeI_ = fileDict[time0[i+1]][1]
      if timeI_[0] <= timeI[0]+(nTime-1)*dt1:
        nTimeNew = int( (timeI_[0] - timeI[0] - .0001*dt1) / dt1 ) + 1

    if nTime != nTimeNew:
      print '\n%s :\n%d time levels in file;  %d levels are used.' %(ncFile, nTime, nTimeNew)      
    else:
      print '\n%s :\n%d time levels in file;  all are used.' %(ncFile, nTime)      

#--- generate body
    nTime = nTimeNew

    if dirPrefix and (not os.path.isabs(ncFile)):
      ncFile1 = os.path.join(dirPrefix,ncFile)
    else: 
      ncFile1 = ncFile

    if isF90:
      temp1 = '''
 &STEPFILE_RECORD
   S_FILENAME    = '%s',
   S_AUX_SET_NUM = 0,
   S_START       = %5.1f,
   S_END         = %5.1f,
   S_DELTA       = %5.1f,
   S_NUM_OF_FILES       = 1,
   S_REGVARFLAG  = ' ',
 /
''' %(ncFile1, timeI[0], timeI[0]+(nTime-1)*dt1, dt1)

    else:
      temp1 = '''
 $STEPFILE_RECORD
   S_FILENAME    = '%s',
   S_AUX_SET_NUM = 0,
   S_START       = %5.1f,
   S_END         = %5.1f,
   S_DELTA       = %5.1f,
   S_NUM_OF_FILES       = 1,
   S_REGVARFLAG  = ' ',
 $END
''' %(ncFile1, timeI[0], timeI[0]+(nTime-1)*dt1, dt1)

#--- check to see whether the line is too long.
    if not isF90:
      temp2 = string.split(temp1,'\n')[2]
      if len(temp2)>75:
        print '''Warning: The following line may be too long:
%s 
If Ferret does not take the generated descriptor file, see here for help:
http://ferret.wrc.noaa.gov/Ferret/Mail_Archives/fu_2002/msg00164.html
''' %(temp2)
      
    body1 = body1 + temp1

#=== write to desFile
    open(desFile,'w').write( header1 + body1 + tail1)

main()