/** Compilation needs a C compiler which supports C99: cc -O -std=gnu99 -o hitranSort hitranSort.c Usage: Generate an intermediate input file by merging some of the HITRAN molecular line lists cat *.par > tmp.par Then sort this intermediate file (which is the only and mandatory command line argument) according to wavenumber, writing the result into another file: hitranSort tmp.par > HITRAN2004.par Clean up the intermediate file: rm tmp.par Usage: hitranSort [-l lowwn] [-h hiwn] inputfile where the option -l allows to set a lower limit on the wavenumber and where the option -h allows to set an upper limit on the wavenumber that will be passed to the output. Notes: The lines of the input file are sorted on the floating point numbers that take 12 bytes (skipping the first 3 bytes in the line). Since this is a common feature in the older (pre-2004) format with 100 bytes per line and a the newer (>=2004) format with 160 bytes per line, this program handles both formats. The program also sorts files of the 100 bytes-per-line format of the SAO92 database (so-called SAO92A format), because this is the same format as the two formats of the HITRAN database in the first 3+12 bytes. However, it does not make sense to mix both types of databases (ie, the 160 and 100-byte format) in the "cat" command (see above) that generates the input file to this program, because one would end up with an output that has mixed line lengths. See http://www.cfa.harvard.edu/firs/sao92.html . Richard J. Mathar, 2009-02-22 Richard J. Mathar home page www.strw.leidenuniv.nl/~mathar. **/ #include #include #include #include /** Maximum number of bytes per HITRAN input line, which is followed by \n or \r\n, then \0 * The bare format (without \r\n\0) contains 160 bytes. * @since 2004 Was 103 for the HITRAN2000 format, but extended for HITRAN2004 */ #define HITRANLL 163 /* Extract a wavenumber from a HITRAN line. * The information is organized as follows in each of the HITRAN lines: * molec Fortran I2 * isot Fortran I1 * waven Fortran F12.6 * strength Fortran E10.3 * dipole Fortran E10.3 * width Fortran F5.4 * width_s Fortran F5.4 * lowsta Fortran F10.4 cm-1 * @param lin the 100 (or 160) bytes of the ASCII l ine * @return the wavenumber of starting after the 3 initial bytes. */ static double waveNum(const void *lin) { double wv; sscanf(lin,"%*3s%12lf",&wv) ; return wv ; } /** Comparison function for qsort(3). * @param tr1 The first HITRAN line. * @param tr2 The second HITRAN line. * @return 1 if the wave number in the first line is larger than the * wave number in the second line, -1 otherwise. */ static int comparTrans(const void *tr1, const void *tr2) { double wv1 = waveNum(tr1), wv2 = waveNum(tr2); #ifdef DEBUG fprintf(stderr,"1: %s",tr1) ; fprintf(stderr,"2: %s",tr2) ; fprintf(stderr,"c: %lf %lf\n",wv1,wv2) ; #endif if ( wv1 > wv2) return 1; else if ( wv1 < wv2) return -1; else return 0; } /** Return HITRAN line count of file stream. * The only aim of this function is to estimate the count of bytes * of the input represented by f. * @param f the pointer to the HITRAN stream * @param wnrang lower and upper filter limit of wavenumbers. * @return the number of lines in f that match the wnrang pass band. */ static int prescan(FILE *f, double wnrang[2]) { char linebuf[HITRANLL] ; /* line count, initialized to zero. This is the value to be returned. */ int lc=0 ; /* scanner status: essentially telling whether we've reached EOF */ char * readStat ; rewind(f) ; readStat = fgets(linebuf,HITRANLL,f) ; while ( readStat ) { /* skip comment lines */ if ( linebuf[0] == '#') ; else { const double wn = waveNum(linebuf) ; if ( wn >= wnrang[0] && wn <= wnrang[1]) lc++ ; } readStat = fgets(linebuf,HITRANLL,f) ; } fprintf(stderr,"# %d transitions in file\n",lc) ; return lc; } /** Sort the HITRAN lines pointed to by f. * @param f The input stream with the HITRAN lines. * @param lc The line count to be expected in f, not including the comment lines. * @param wnrang lower and upper filter limit of wavenumbers. */ static void hitrSort(FILE *f, int lc, double wnrang[2]) { char *linebuf = malloc(HITRANLL) ; /* line count */ char * readStat ; int lineno =0 ; /* allocate lc lines with HITRANLL bytes per line. This * may swallow the entire file into memory. */ char *all = malloc(lc*HITRANLL) ; fprintf(stderr,"# allocated %d MBytes of main memory\n",lc*HITRANLL/1000000) ; rewind(f) ; readStat = fgets(linebuf,HITRANLL,f) ; while ( readStat ) { /* skip comment lines */ if ( linebuf[0] == '#') ; else { const double wn = waveNum(linebuf) ; if ( wn >= wnrang[0] && wn <= wnrang[1]) { /* add to big buffer */ memcpy(all+lineno*HITRANLL,linebuf,HITRANLL) ; lineno++ ; } } readStat = fgets(linebuf,HITRANLL,f) ; } fprintf(stderr,"# second scan %d lines\n",lineno) ; #ifdef DEBUG /* write sorted result to stdout */ for(int l =0 ; l < 10 ; l++) fprintf(stderr,"%d: %s",l,all+l*HITRANLL) ; #endif fprintf(stderr,"# start sort...\n") ; /* sort them all */ qsort(all,lc,HITRANLL, comparTrans) ; /* write sorted result to stdout */ for(lineno =0 ; lineno < lc ; lineno++) printf("%s",all+lineno*HITRANLL) ; /* de-allocate resources grabbed above */ free(all) ; free(linebuf) ; } /** Main entry. * There is currently only one mandatory command line argument: * The name of the UNIX file which contains the (un-sorted) HITRAN lines. * @since 2009-02-22 added command line options -l and -h. */ int main(int argc, char *argv[]) { FILE *f ; /* wave number range for filtering the input file. * Lines with wa wave number below wnrang[0] or above wnrang[1] are not * passed through. */ double wnrang[2] ; /* command line option character */ int c; /* default of pass band is "wide open". Since F12.6 is used, the upper * limit could be safely set to 10^6. */ wnrang[0] = -1.0 ; wnrang[1] = 1.e60 ; while( (c=getopt(argc,argv,"l:h:")) != -1 ) { switch(c) { case 'l': wnrang[0] = atof(optarg) ; break; case 'h': wnrang[1] = atof(optarg) ; break; case '?': fprintf(stderr,"unrecognized option...ignored\n") ; break; } } if( optind >= argc) { fprintf(stderr,"usage: %s [-l lowwaven] [-h highwaven] input-file.par\n",argv[0]) ; return 1 ; } /* test whether we can open the file named in the command line */ f = fopen(argv[optind],"r") ; if ( f== NULL) { fprintf(stderr,"Could not open %s for reading\n",argv[optind]) ; return 1 ; } else { /* count the total number of lines in the input file. * This may be less than obtained by 'wc -l' since we're not * counting comment lines here. */ int lc = prescan(f,wnrang) ; /* In a second scan, get the lc lines of the file, sort them, * and print them to stdout. */ hitrSort(f,lc,wnrang) ; fclose(f) ; return 0 ; } }