#!/usr/bin/env python # vim: set syntax=python: import os, sys, getopt, re, string helpMsg = ''' ###################### mkdes ###################### Version: 1.01 Author: Benyang Tang, btang@pacific.jpl.nasa.gov === Introduction === mkdes is a tool to generate a descriptor file when given a set of netCDF files. The descriptor file is to be used in the visualization software Ferret. mkdes needs Python. All Linux computers should have Python installed. Other unix computers may have Python installed too. type 'which python' to test. === Download === http://www.ocgy.ubc.ca/~tang/softwares/mkdes/mkdes.html === Usage === mkdes [--help --des=desFile --timevar=timeVariableName --dir=dir_prefix --modulo --title=Tilte --f90 --ncdump=ncdumpCommand] netCDF1 [netCDF2 ...] The arguments netCDF1, netCDF2, ... need not to be in any special order. mkdes is smart enough to figure the time order out. The time grids of each netCDF file have to be evenly spaced. But each netCDF file can have its own time interval. The time grids from different netCDF files can overlap. mkdes will drop off the overlapping time levels. Here is an example: netCDF1 has time grids of [101, 102, 103, 104, 105, 106] netCDF2 has time grids of [103, 104] netCDF3 has time grids of [104, 105, 106] Then, mkdes will take the first 2 time levels from netCDF1, the first time level from netCDF2, and all the time levels from netCDF3. === options === --help print this help message. --timevar=timeVariableName Specify the time variable name. The default is, of caurse, time . It is case-insensitive. --des=desFile Specify the resulting descriptor file. The default is a file named temp999.des in the current directory. --dir=dir_prefix Specify a directory which prefixes the netCDF filenames in the descriptor file. The default is no directory prefix. If you like to prefix the current directory, use --dir='.' . --modulo Specify whether the time is modulo. The default is not. --f90 Specify whether the descriptor file is f90 style (for Ferret in Linux) or f77 (for other platforms). The default is f77. --ncdump=ncdumpCommand This script needs a tool called ncdump. If your system has netCDF library installed, it should have ncdump. mkdes will search several directories for ncdump. If it fails to find, it will print a message telling that you need to use this option to specify the ncdump path. --title=Title Specify the title of the descriptor file. The default is to be read from the netCDF files. === examples === # to print the help message and do nothing else: mkdes --help # to generate a f77 style descriptor file, no directory prefixing, outputing the descriptor file as temp999.des: mkdes netCDF1 netCDF2 # to prefix the current directory to the netCDF file names in the descriptor file: mkdes --dir='.' netCDF1 netCDF2 # to generate descriptor file myDes.des: mkdes --des=myDes.des netCDF1 netCDF2 # to generate a f90 descriptor file: mkdes --f90 netCDF1 netCDF2 # If the time variable in the netCDF files is not time, but timeZZZ mkdes --timevar=timeZZZ netCDF1 netCDF2 ''' ''' === To Do: () date format () other names for time? () find the identical netCDF files ''' #=== parameters #=== get options optFormat = '' longOptFormat = ['help', 'timevar=', 'ncdump=', 'f90', 'title=', 'modulo', 'des=', 'dir='] try: opts, ncFiles = getopt.getopt(sys.argv[1:],optFormat,longOptFormat) except getopt.error, msg: print msg sys.exit(1) ifSilent = 0 ncdumpCmd = '' isF90 = 0 title0 = '' isModulo = '.false.' desFile = 'temp999.des' dirPrefix = None timeVar = 'time' for o,a in opts: if o=='--ncdump': ncdumpCmd = a if o=='--title': title0 = a if o in ['--f90', '--F90']: isF90 = 1 if o in ['--modulo']: isModulo = '.true.' if o in ['--des']: desFile = a if o in ['--dir']: dirPrefix = a if o in ['--timevar']: timeVar = a if o in ['--help']: print helpMsg sys.exit(0) #--- what should be the dirPrefix currentDir = os.getcwd() if dirPrefix == '.': dirPrefix = currentDir #--- if no ncFiles if not ncFiles: print 'Please specify at least one netCDF files.' sys.exit(1) #--- check whether netCDF files exist ncFiles.sort() fileIgnored = [] for i in range(len(ncFiles)): ncFile = ncFiles[i] if i > 0: if ncFile == ncFiles[i-1]: fileIgnored.append(i) if not os.path.isfile(ncFile): print 'file %s does not exist. Do nothing and exiting.' %(ncFile) sys.exit(1) if fileIgnored: fileIgnored.reverse() for i in fileIgnored: del ncFiles[i] #--- ncdump command if not ncdumpCmd: searchPath = ['ncdump', '/usr/bin/ncdump', '/usr/local/bin/ncdump', '~/bin/ncdump'] for a in searchPath: try: temp1 = os.popen(a,'r').read() except: print '\nCommand %s generates: \n%s\n\n' %(a,temp1) else: ncdumpCmd = a break if not ncdumpCmd: print '\nCannot find ncdump. Please specify the absolute path of ncdump by option --ncdump .' sys.exit(1) #--- derived parameters #--- constants monthName = { '01':'jan', '02':'feb', '03':'mar', '04':'apr', '05':'may', '06':'jun', '07':'jul', '08':'aug', '09':'sep', '10':'oct', '11':'nov', '12':'dec', } #================= def main(): #================= #=== get times of each netCDF file # p1 = re.compile(r'\s*\n\s*%s = ([\d\.].*?);'%timeVar,re.I | re.S) p1 = re.compile(r'\s*\n\s*%s\w* = ([\d\.].*?);'%timeVar,re.I | re.S) f1 = lambda a: float(a) fileDict = {} # the structure of fileDict: {times[0]: [ncFile, times], ...} for ncFile in ncFiles: print '\nReading time info from %s .' %(ncFile) temp1 = os.popen('%s -c %s' %(ncdumpCmd, ncFile),'r').read() temp2 = p1.search(temp1) if not temp2: print 'Cannot find time axis in %s . The file is not used.' %(ncFile) continue temp3 = temp2.group(1) temp4 = string.split(temp3,',') try: times = map(f1,temp4) except: print 'Cannot read time in %s . The file is not used.' %(ncFile) continue if not times: print 'number of time levels = 0 in %s . The file is not used.' %(ncFile) continue #--- check whether times are evenly spaced temp5 = times[:-1] temp6 = times[1:] f2 = lambda a1,a2: a2-a1 temp7 = map(f2,temp5,temp6) dt1 = (max(temp7) + min(temp7))/2. # print min(temp7), max(temp7), abs( (max(temp7)-min(temp7))/max(temp7) ), dt1 if abs( (max(temp7)-min(temp7))/max(temp7) ) > 0.01: print 'time is not evenly spaced in %s . The file is not used.' %(ncFile) continue #--- check whether 2 files have the same starting time ifKeepOld = 0 if fileDict.has_key(times[0]): if len(times) <= len(fileDict[times[0]][1]): ifKeepOld = 1 print '\nThe following 2 files have the same starting time=%g : \n%s\n%s' %(times[0], fileDict[times[0]][0] ,ncFile) if ifKeepOld: print 'The first one is ignored.\n' else: print 'The second one is ignored.\n' if not ifKeepOld: fileDict[times[0]] = [ncFile, times, dt1] time0 = fileDict.keys() if not time0: print 'None of the netCDF files is qualified to be used in generating the descriptor file. Do nothing and exit.' sys.exit(1) time0.sort() #=== get the time unit t0_1 = '' t0_2 = '' t0 = '' unit1 = '' p1 = re.compile(r'\n\s*%s\w*:units = "(.*?)"'%timeVar,re.I) for time0I in time0[0:1]: ncFile = fileDict[time0I][0] print '\nGetting time unit from %s .' %(ncFile) temp1 = os.popen('%s -c %s' %(ncdumpCmd, ncFile),'r').read() temp2 = p1.search(temp1) if temp2: temp3 = temp2.group(1) #--- whether time unit contains 'since' temp4 = string.split(temp3,' since ') if len(temp4)==2: #--- whether the base time has date and time parts temp11 = string.split(temp4[1],' ') if len(temp11) == 2: #--- correct the date part temp12 = string.split(temp11[0],'-') if len(temp12)==3: if len(temp12[1])==3: if temp12[1] in monthName.values(): t0_1 = '%s' %(temp11[0]) else: if temp12[1] in monthName.keys(): t0_1 = '%s-%s-%s' %(temp12[2], monthName[temp12[1]], temp12[0]) #--- correct the time part temp12 = string.split(temp11[1],':') if len(temp12)==3: if len(temp12[2])>2: temp13 = string.split(temp12[2],'.') if len(temp13)==2: try: temp14 = int( temp13[0] ) except: pass else: t0_2 = '%s:%s:%02d' %(temp12[0], temp12[1], temp14) else: t0_2 = temp11[1] #--- combine the date part and time part if t0_1 != '' and t0_2 != '' : t0 = '%s %s' %(t0_1, t0_2) #--- make the relative time unit if temp4[0] in ['minutes', 'minute']: unit1 = '%5.1f' %(60) if temp4[0] in ['hours','hour']: unit1 = '%5.1f' %(3600) if temp4[0] in ['days', 'day']: unit1 = '%5.1f' %(24*3600) if temp4[0] in ['years','year']: unit1 = '%5.1f' %(365.25*24*3600) else: continue if t0 == '' or unit1 == '': print 'Cannot figure out the following time unit in %s: \n%s. \nPlease modify the generated descriptor file to put time unit.' %(ncFile,temp3) #--- file title p2 = re.compile(r'\n\s*:title = "(.*?)"') if not title0: temp2 = p2.search(temp1) if temp2: title1 = temp2.group(1) else: title1 = title0 #=== header headerInfo = (title1,t0,unit1,isModulo) if isF90: header1 = ''' &FORMAT_RECORD D_TYPE = ' MC', D_FORMAT = ' 1A', / &BACKGROUND_RECORD D_TITLE = '%s', D_T0TIME = '%s', D_TIME_UNIT = %s, D_TIME_MODULO = %s, / &MESSAGE_RECORD D_MESSAGE = ' ', D_ALERT_ON_OPEN = F, D_ALERT_ON_OUTPUT = F, / &EXTRA_RECORD / ''' %headerInfo else: header1 = ''' $FORMAT_RECORD D_TYPE = ' MC', D_FORMAT = ' 1A', $END $BACKGROUND_RECORD D_TITLE = '%s', D_T0TIME = '%s', D_TIME_UNIT = %s, D_TIME_MODULO = %s, $END $MESSAGE_RECORD D_MESSAGE = ' ', D_ALERT_ON_OPEN = F, D_ALERT_ON_OUTPUT = F, $END $EXTRA_RECORD $END ''' %headerInfo #=== tail if isF90: tail1 = ''' &STEPFILE_RECORD S_FILENAME = '**END OF STEPFILES**' / ''' else: tail1 = ''' $STEPFILE_RECORD S_FILENAME = '**END OF STEPFILES**', $END ''' #=== lines for files body1 = '' for i in range(len(time0)): time0I = time0[i] ncFile = fileDict[time0I][0] timeI = fileDict[time0I][1] nTime = len(timeI) dt1 = fileDict[time0I][2] #--- check for time overlap nTimeNew = nTime if i != len(time0)-1: timeI_ = fileDict[time0[i+1]][1] if timeI_[0] <= timeI[0]+(nTime-1)*dt1: nTimeNew = int( (timeI_[0] - timeI[0] - .0001*dt1) / dt1 ) + 1 if nTime != nTimeNew: print '\n%s :\n%d time levels in file; %d levels are used.' %(ncFile, nTime, nTimeNew) else: print '\n%s :\n%d time levels in file; all are used.' %(ncFile, nTime) #--- generate body nTime = nTimeNew if dirPrefix and (not os.path.isabs(ncFile)): ncFile1 = os.path.join(dirPrefix,ncFile) else: ncFile1 = ncFile if isF90: temp1 = ''' &STEPFILE_RECORD S_FILENAME = '%s', S_AUX_SET_NUM = 0, S_START = %5.1f, S_END = %5.1f, S_DELTA = %5.1f, S_NUM_OF_FILES = 1, S_REGVARFLAG = ' ', / ''' %(ncFile1, timeI[0], timeI[0]+(nTime-1)*dt1, dt1) else: temp1 = ''' $STEPFILE_RECORD S_FILENAME = '%s', S_AUX_SET_NUM = 0, S_START = %5.1f, S_END = %5.1f, S_DELTA = %5.1f, S_NUM_OF_FILES = 1, S_REGVARFLAG = ' ', $END ''' %(ncFile1, timeI[0], timeI[0]+(nTime-1)*dt1, dt1) #--- check to see whether the line is too long. if not isF90: temp2 = string.split(temp1,'\n')[2] if len(temp2)>75: print '''Warning: The following line may be too long: %s If Ferret does not take the generated descriptor file, see here for help: http://ferret.wrc.noaa.gov/Ferret/Mail_Archives/fu_2002/msg00164.html ''' %(temp2) body1 = body1 + temp1 #=== write to desFile open(desFile,'w').write( header1 + body1 + tail1) main()