#!/usr/bin/perl -s # The following documentation is formatted to be read by running # unix> perldoc wdsToSql.pl =pod =head1 Description This is the script wdsToSql.pl that translates ASCII files representations of the Washington Double Star Catalog (WDS) into a SQL command sequence which can be fed into a SQL server (MySQL, sqlite, ...) to simplify selections. =head1 Syntax wdsToSql.pl [-o] perldoc wdsToSql.pl =head1 Options The only available option is B<-o> which must be used to tell the program that the standard input is the Brbital catalog, not the double star catalog. =head1 Usage =over 4 =item 1. Create the SQL command file (here: C or C respectively) by piping all available WDS text files with the "new" 130 or 264 bytes/row format through this shell script here. This creates a much bigger file than the original files combined. The switch -o must be used in case this is the orbital catalog: unix> cat wdsnew*.txt | wdsToSql.pl > wds.sql unix> wdsToSql.pl -o < orb6orbits.txt > orb6.sql =item 2. Start the database server (look for mysqld for example on Linux systems), if needed: unix> sqlite3 =item 3. execute the previously generated file(s) as commands sqlite> .read wds.sql sqlite> .read orb6.sql =item 4. use SQL commands to select data from the database. Examples are =over 2 =item * select all WDS entries where the denomination starts with two zeros followed by 10 and any other trailing letters: sqlite> SELECT den FROM wds WHERE den LIKE '0010%'; =item * select all WDS entries which have more than two component: sqlite> SELECT den FROM wds WHERE comp NOT LIKE ' '; sqlite> SELECT * FROM wds WHERE comp NOT LIKE ' '; =item * show all WDS denominations with proper motions of the primay less than 2 (in the original units) sqlite> SELECT den FROM wds WHERE pm1ra < 2 AND pm1dec < 2; sqlite> SELECT * FROM wds WHERE pm1ra < 2 AND pm1dec < 2; =item * show all WDS denominations with absolute values of the proper motions of the primay less than 2 (in the original units), probably closer to what one wants to get instead of example c: sqlite> SELECT * FROM wds WHERE ABS(pm1ra) < 2 AND ABS(pm1dec) < 2; =item * show the entire WDS DB sqlite> SELECT * FROM wds; =item * show all WDS stars observed after the year 2000 sqlite> SELECT * FROM wds WHERE datelst > 2000; =item * show all WDS stars observed after the year 2000 with spectral type A: sqlite> SELECT * FROM wds WHERE datelst > 2000 AND styp LIKE 'A%'; =item * show all WDS stars observed after the year 2000 with spectral type A or B: sqlite> SELECT * FROM wds WHERE datelst > 2000 AND styp LIKE 'A%' OR styp LIKE 'B%'; =item * show all orbits with best grade and periods less than 200 days sqlite> SELECT * FROM orb6 WHERE grade = 1 AND P < 200 AND Pu = 'd'; =item * show all WDS entries with a magnitude of the first component (numerically) smaller than 10 sqlite> SELECT * FROM wds WHERE magfst < 10; =item * list star separations of the WDS entries with a magnitude of the first component (numerically) smaller than 10 sqlite> SELECT sepfst FROM wds WHERE magfst < 10; list star separations of the WDS entries with an infrared magnitude of the first component (numerically) smaller than 11 sqlite> SELECT sepfst FROM wds WHERE note LIKE '%K%' AND magfst < 11; count entries within the WDS with an infrared magnitude of the first component (numerically) smaller than 11 sqlite> SELECT COUNT(sepfst) FROM wds WHERE note LIKE '%K%' AND magfst < 11; =item * show duration and unit for entries in the orbital catalog with magnitude brighter than 10 and periods between half a year and 4 years sqlite> SELECT P,Pu FROM orb6 WHERE magfst < '10' AND ( Pu='y' AND P<'4' AND P>'0.5' OR Pu='d' AND P>'182' AND P<'1460'); =item * dump star separations of the WDS entries with a magnitude of the first component (numerically) smaller than 10 into the file wdstmp sqlite> .output wdstmp sqlite> SELECT sepfst FROM wds WHERE magfst < 10; sqlite> .output stdout =back =item 5. Quit the server sqlite> .quit =back =head1 See also mysql(1) sqlite3(1) http://ad.usno.navy.mil/wds/orb6/newformat.html http://ad.usno.navy.mil/wds/wdsweb_format.txt http://aam.ugpl.de/node/236 http://dev.mysql.com/doc/ http://sourceforge.net/search/?type=soft&exact=0&q=mysql&offset=25 http://www.geocities.com/SiliconValley/Vista/2207/sql1.html http://www.1keydata.com/sql/sql.html http://www.dcs.napier.ac.uk/~andrew/sql/ http://www.gnu.org/directory/postgresql.html http://www.gnu.org/directory/mysql.html http://www.sql-tutorial.net/ =head1 To do The current conversion of the RA and DEC coordinates is done as if these were text strings, which makes selection of sky sections awkward. These might better be represented as (signed) floating point numbers in a +-DDMMSS.sss and HHMMSS.sss format to allow use of the SQL greater-equal-less operators instead of the "C" operators. =head1 Author Richard J. Mathar, http://www.strw.leidenuniv.nl/~mathar =head1 History # who when what # mathar 2005-09-27 created # mathar 2005-09-28 corrected bug of dot delimiters (instead of comma) in DEC(..,..) # mathar 2005-11-07 corrected typo in the usage documentation =cut $col = 0 ; if ( $o eq '1') { # the case of handling the orbital catalog of visual binaries # see http://ad.usno.navy.mil/wds/orb6/newformat.html $db = 'orb6' ; # @n[$col] = 'ra' ; @c[$col] = '1-9' ; @f[$col] = 'char(9)' ; $col++ ; # @n[$col] = 'dec' ; @c[$col] = '10-18' ; @f[$col] = 'char(9)' ; $col++ ; @n[$col] = 'coo' ; @c[$col] = '1-18' ; @f[$col] = 'char(18)' ; $col++ ; @n[$col] = 'den' ; @c[$col] = '20-29' ; @f[$col] = 'char(10)' ; $pkey=$col ; $col++ ; @n[$col] = 'disc' ; @c[$col] = '31-44' ; @f[$col] = 'char(14)' ; $col++ ; @n[$col] = 'ads' ; @c[$col] = '46-50' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'hip' ; @c[$col] = '52-57' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'magfstC' ; @c[$col] = '66-66' ; @f[$col] = 'char(1)' ; $col++ ; @n[$col] = 'magfst' ; @c[$col] = '67-71' ; @f[$col] = 'DEC(5,2)' ; $col++ ; @n[$col] = 'magfstF' ; @c[$col] = '72-72' ; @f[$col] = 'char(1)' ; $col++ ; @n[$col] = 'magsndC' ; @c[$col] = '73-73' ; @f[$col] = 'char(1)' ; $col++ ; @n[$col] = 'magsnd' ; @c[$col] = '74-78' ; @f[$col] = 'DEC(5,2)' ; $col++ ; @n[$col] = 'magsndF' ; @c[$col] = '79-79' ; @f[$col] = 'char(1)' ; $col++ ; @n[$col] = 'P' ; @c[$col] = '82-92' ; @f[$col] = 'DEC(11,6)' ; $skey=$col; $col++ ; @n[$col] = 'Pu' ; @c[$col] = '93-93' ; @f[$col] = 'char(1)' ; $col++ ; @n[$col] = 'a' ; @c[$col] = '106-114' ; @f[$col] = 'DEC(9,5)' ; $col++ ; @n[$col] = 'au' ; @c[$col] = '115-115' ; @f[$col] = 'char(1)' ; $col++ ; @n[$col] = 'i' ; @c[$col] = '126-133' ; @f[$col] = 'DEC(8,4)' ; $col++ ; @n[$col] = 'Omegg' ; @c[$col] = '144-151' ; @f[$col] = 'DEC(8,4)' ; $col++ ; @n[$col] = 'T0' ; @c[$col] = '163-174' ; @f[$col] = 'DEC(12,6)' ; $col++ ; @n[$col] = 'T0u' ; @c[$col] = '175-175' ; @f[$col] = 'char(1)' ; $col++ ; @n[$col] = 'e' ; @c[$col] = '188-195' ; @f[$col] = 'DEC(8,6)' ; $col++ ; @n[$col] = 'omeg' ; @c[$col] = '206-213' ; @f[$col] = 'DEC(8,4)' ; $col++ ; @n[$col] = 'equin' ; @c[$col] = '224-227' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'datelst' ; @c[$col] = '229-232' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'grade' ; @c[$col] = '234-234' ; @f[$col] = 'INTEGER' ; $tkey=$col ; $col++ ; } else { # the case of handling the double star catalog # see http://ad.usno.navy.mil/wds/wdsweb_format.txt $db = 'wds' ; @n[$col] = 'den' ; @c[$col] = '1-10' ; @f[$col] = 'char(10)' ; $pkey=$col ; $col++ ; @n[$col] = 'disc' ; @c[$col] = '11-17' ; @f[$col] = 'char(7)' ; $skey=$col ; $col++ ; @n[$col] = 'comp' ; @c[$col] = '18-22' ; @f[$col] = 'char(5)' ; $tkey=$col ; $col++ ; @n[$col] = 'datefst' ; @c[$col] = '24-27' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'datelst' ; @c[$col] = '29-32' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'nobs' ; @c[$col] = '34-37' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'posfst' ; @c[$col] = '39-41' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'poslst' ; @c[$col] = '43-45' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'sepfst' ; @c[$col] = '47-51' ; @f[$col] = 'DEC(5,1)' ; $col++ ; @n[$col] = 'seplst' ; @c[$col] = '53-57' ; @f[$col] = 'DEC(5,1)' ; $col++ ; @n[$col] = 'magfst' ; @c[$col] = '59-63' ; @f[$col] = 'DEC(5,2)' ; $col++ ; @n[$col] = 'magsnd' ; @c[$col] = '65-69' ; @f[$col] = 'DEC(5,2)' ; $col++ ; @n[$col] = 'styp' ; @c[$col] = '71-79' ; @f[$col] = 'char(9)' ; $col++ ; @n[$col] = 'pm1ra' ; @c[$col] = '81-84' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'pm1dec' ; @c[$col] = '85-88' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'pm2ra' ; @c[$col] = '90-93' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'pm2dec' ; @c[$col] = '94-97' ; @f[$col] = 'INTEGER' ; $col++ ; @n[$col] = 'dnum' ; @c[$col] = '99-106' ; @f[$col] = 'char(8)' ; $col++ ; @n[$col] = 'note' ; @c[$col] = '108-111' ; @f[$col] = 'char(4)' ; $col++ ; @n[$col] = 'coo' ; @c[$col] = '113-130' ; @f[$col] = 'char(18)' ; $col++ ; } print 'CREATE TABLE ',$db,' (' ; # this is the constant (standard) part of the SQL insert statement $stdinsrt = 'INSERT INTO ' . $db . ' (' ; for ( $col = 0 ; $col < @n ; $col++) { if ( $col != 0 ) { print ", " ; $stdinsrt = $stdinsrt . ',' ; } print $n[$col],' ',$f[$col] ; # define the SQL column structure $stdinsrt = $stdinsrt . '"' . $n[$col] . '"' ; (@c1[$col],@c2[$col]) = split("-",@c[$col]) ; # isolate start and stop byte number @c1[$col]-- ; # index starts one lower at 0 in perl @cd[$col] = $c2[$col] - $c1[$col] ; # width of the byte columns } $stdinsrt .= ') VALUES(' ; print ", PRIMARY KEY (",@n[$pkey],",",@n[$skey],",",@n[$tkey],") ) ;\n" ; while ( $line = <>) { # debugging ... print $line ; print $stdinsrt ; for( $col = 0 ; $col < @n ; $col++) { $val = substr($line,@c1[$col],@cd[$col]) ; # extract the value from this particular line if ( $col != 0 ) { print "," ; } if ( $f[$col] =~ m/char/ ) { print '\'' . $val . '\'' ; } else { printf "%g",$val ; } } print ");\n" ; }