/** * db1b_formats.sas * * SAS proc format labels for the DB1B data sets. * * Jason R. Blevins * Durham, January 16, 2007 */ /* Define value formats */ proc format; value geotype 1 = 'Non-contiguous Domestic' 2 = 'Contiguous Domestic' ; value yesno 0 = 'No' 1 = 'Yes' ; value distgrp 1 = "1-500 mi." 2 = "500-999 mi." 3 = "1000-1499 mi." 4 = "1500-1999 mi." 5 = "2000-2499 mi." 6 = "2500-2999 mi." 7 = "3000-3499 mi." 8 = "3500-3999 mi." 9 = "4000-4499 mi." 10 = "4500-4999 mi." 11 = "5000-5499 mi." 12 = "5500-5999 mi." 13 = "6000-6499 mi." 14 = "6500-6999 mi." 15 = "7000-7499 mi." 16 = "7500-7999 mi." 17 = "8000-8499 mi." 18 = "8500-8999 mi." 19 = "9000-9499 mi." 20 = "9500-9999 mi." 21 = "10000-10499 mi." 22 = "10500-10999 mi." 23 = "11000-11499 mi." 24 = "11500-11999 mi." 25 = "12000+ mi." ; value $cpntype 'A' = 'US Reporting Carrier Flying Between Two U.S. Points' 'D' = 'US Non-Reporting Carrier Flying Within N. America Or Surface Traffic' 'E' = 'Foreign Carrier Flying Between Two U.S. Points (Cabotage)' ; run; Content-Type: text/plain; charset=UTF-8 Content-Length: 3774 Content-Disposition: inline; filename="process_db1bc.sas" Last-Modified: Wed, 19 Now 2008 11:50:21 GMT Expires: Wed, 19 Now 2008 11:55:21 GMT /** * process_db1bc.sas * * SAS macro to load and process a raw DB1B Coupon datset. * * Before this file is included, a data step should be started and * in infile statement should have been issued. Furthermore, * the file 'db1bc_formats.sas' should have been included. * * Usage Example: * * %include 'db1bc_formats.sas' * * data db1bc; * infile 'db1b_coupon-1993_1.csv' dsd firstobs=2; * %include 'process_db1bc.sas' * keep ItinID Year Quarter CpnOrigin CpnDest; * run; * * Jason R. Blevins * Chapel Hill, January 15, 2007 */ /* Label the variables */ attrib ItinID length=7 label = 'Itinerary ID' MktID length=7 label = 'Market ID' CpnSeqNum length=3 label = 'Coupon Sequence Number' ItinCoupons length=3 label = 'No. Coupons in Itinerary' Year length=3 label = 'Year' Quarter length=3 label = 'Quarter' CpnOrigin length=$3 label = 'Origin Airport Code' CpnOriginAptInd length=3 label = 'Multiple Airports at Origin' CpnOriginCityNum length=4 label = 'Origin City Code' CpnOriginCountry length=$2 label = 'Origin Country Code' CpnOriginStateFips length=4 label = 'Origin State FIPS Code' CpnOriginState length=$2 label = 'Origin State Code' CpnOriginStateName length=$25 label = 'Origin State Name' CpnOriginWac length=3 label = 'Origin World Area Code' CpnDest length=$3 label = 'Dest. Airport Code' CpnDestAptInd length=3 label = 'Multiple Airports at Dest.' CpnDestCityNum length=4 label = 'Dest. City Code' CpnDestCountry length=$2 label = 'Dest. Country Code' CpnDestStateFips length=4 label = 'Dest. State FIPS Code' CpnDestState length=$2 label = 'Dest. State Code' CpnDestStateName length=$25 label = 'Dest. State Name' CpnDestWac length=3 label = 'Dest. World Area Code' CpnBreak length=$1 label = 'Trip Break Code' CpnType length=$1 label = 'Coupon Type Code' CpnTkCarrier length=$2 label = 'Coupon Ticketing Carrier' CpnOpCarrier length=$2 label = 'Coupon Operating Carrier' CpnRpCarrier length=$2 label = 'Itinerary Reporting Carrier' CpnPassengers length=4 label = 'No. Passengers on Coupon' CpnFareClass length=$1 label = 'Coupon Fare Class' CpnDistance length=6 label = 'Coupon Distance' CpnDistanceGrp length=3 label = 'Coupon Distance Group' CpnGateway length=3 label = 'Gateway Indicator' ItinGeoType length=3 label = 'Itinerary Geography Type' CpnGeoType length=3 label = 'Coupon Geography Type' ; /* Read the source CSV file */ input ItinID MktID CpnSeqNum ItinCoupons Year Quarter CpnOrigin $ CpnOriginAptInd CpnOriginCityNum CpnOriginCountry $ CpnOriginStateFips CpnOriginState $ CpnOriginStateName $ CpnOriginWac CpnDest $ CpnDestAptInd CpnDestCityNum CpnDestCountry $ CpnDestStateFips CpnDestState $ CpnDestStateName $ CpnDestWac CpnBreak $ CpnType $ CpnTkCarrier $ CpnOpCarrier $ CpnRpCarrier $ CpnPassengers CpnFareClass $ CpnDistance CpnDistanceGrp CpnGateway ItinGeoType CpnGeoType ; /* Associate variable labels */ format CpnDistanceGrp distgrp. CpnGateway yesno. ItinGeoType geotype. CpnGeoType geotype. ; Content-Type: text/plain; charset=UTF-8 Content-Length: 3817 Content-Disposition: inline; filename="process_db1bm.sas" Last-Modified: Wed, 19 Now 2008 11:50:21 GMT Expires: Wed, 19 Now 2008 11:55:21 GMT /** * process_db1bm.sas * * SAS macro to load and process a raw DB1B Market datset. It * is designed to be used within a DATA step. It defines the names * and storage-efficient data types for the columns of the DB1BMarket * dataset. * * Jason R. Blevins * Chapel Hill, January 15, 2007 */ /* Define the internal representations and labels */ attrib ItinID length=7 label='Itinerary ID' MktID length=7 label='Market ID' MktCoupons length=3 label='No. Coupons in Market' Year length=3 label='Year' Quarter length=3 label='Quarter' MktOrigin length=$3 label='Origin Airport Code' MktOriginAptInd length=3 label='Multiple Airports at Origin' MktOriginCityNum length=4 label='Origin City Code' MktOriginCountry length=$2 label='Origin Country Code' MktOriginStateFips length=4 label='Origin State FIPS Code' MktOriginState length=$2 label='Origin State Code' MktOriginStateName length=$25 label='Origin State Name' MktOriginWac length=3 label='Origin World Area Code' MktDest length=$3 label='Dest. Airport Code' MktDestAptInd length=3 label='Multiple Airports at Dest' MktDestCityNum length=4 label='Dest. City Code' MktDestCountry length=$2 label='Dest. Country Code' MktDestStateFips length=4 label='Dest. State FIPS Code' MktDestState length=$2 label='Dest. State Code' MktDestStateName length=$25 label='Dest. State Name' MktDestWac length=3 label='Dest. World Area Code' MktAirportGrp length=$30 label='Market Airport Group' MktWacGrp length=$30 label='Market World Area Code Group' MktTkCarrierChg length=3 label='Ticketing Carrier Change' MktTkCarrierGrp length=$30 label='Ticketing Carrier Group' MktOpCarrierChg length=3 label='Operating Carrier Change' MktOpCarrierGrp length=$30 label='Operating Carrier Group' ItinRpCarrier length=$2 label='Reporting Carrier Code' MktTkCarrier length=$2 label='Ticketing Carrier (Market)' MktOpCarrier length=$2 label='Operating Carrier (Market)' MktBulkFare length=3 label='Bulk Fare Indicator (Market)' MktPassengers length=4 label='Number of Passengers (Market)' MktFare length=6 label='Market Fare' MktDistance length=6 label='Market Distance' MktDistanceGrp length=3 label='Market Distance Group' MktMilesFlown length=6 label='Market Miles Flown' MktNonStopMiles length=6 label='Non-Stop Market Miles' ItinGeoType length=3 label='Itinerary Geography Type' MktGeoType length=3 label='Market Geography Type' ; /* Read the source CSV file */ input ItinID MktID MktCoupons Year Quarter MktOrigin $ MktOriginAptInd MktOriginCityNum MktOriginCountry $ MktOriginStateFips MktOriginState $ MktOriginStateName $ MktOriginWac MktDest $ MktDestAptInd MktDestCityNum MktDestCountry $ MktDestStateFips MktDestState $ MktDestStateName $ MktDestWac MktAirportGrp $ MktWacGrp $ MktTkCarrierChg MktTkCarrierGrp $ MktOpCarrierChg MktOpCarrierGrp $ ItinRpCarrier $ MktTkCarrier $ MktOpCarrier $ MktBulkFare MktPassengers MktFare MktDistance MktDistanceGrp MktMilesFlown MktNonStopMiles ItinGeoType MktGeoType ; Content-Type: text/plain; charset=UTF-8 Content-Length: 2515 Content-Disposition: inline; filename="process_db1bt.sas" Last-Modified: Wed, 19 Now 2008 11:50:21 GMT Expires: Wed, 19 Now 2008 11:55:21 GMT /** * process_db1bt.sas * * SAS macro to load and process a raw DB1B Ticket datset. This script is * designed to be used inside a SAS DATA step to process rows from a DB1B * Ticket dataset. * * Jason R. Blevins * Chapel Hill, January 15, 2007 */ /* Define the internal representations and labels */ attrib ItinID length=7 label='Itinerary ID' Coupons length=3 label='No. Coupons in Itinerary' Year length=3 label='Year' Quarter length=3 label='Quarter' ItinOrigin length=$3 label='Itinerary Origin Airport' ItinOriginAptInd length=3 label='Multiple Airports at Origin' ItinOriginCityNum length=4 label='Itinerary Origin City Code' ItinOriginCountry length=$2 label='Itinerary Origin Country Code' ItinOriginStateFips length=4 label='Itinerary Origin State FIPS' ItinOriginState length=$2 label='Itinerary Origin State Code' ItinOriginStateName length=$25 label='Itinerary Origin State Name' ItinOriginWac length=3 label='Itinerary Origin WAC' ItinRoundTrip length=3 label='Round Trip Itinerary' ItinOnLine length=3 label='OnLine Itinerary' ItinDollarCred length=3 label='Itinerary Fare Credibility' ItinYield length=6 label='Total Itinerary Yield' ItinRpCarrier length=$2 label='Itinerary Reporting Carrier' ItinPassengers length=4 label='No. Passengers (Itinerary)' ItinFare length=6 label='Total Itinerary Fare' ItinBulkFare length=3 label='Bulk Fare (Itinerary)' ItinDistance length=6 label='Itinerary Distance' ItinDistanceGrp length=3 label='Itinerary Distance Group' ItinMilesFlown length=6 label='Itinerary Miles Flown' ItinGeoType length=3 label='Itinerary Geography Type' ; /* Read the source CSV file */ input ItinID Coupons Year Quarter ItinOrigin $ ItinOriginAptInd ItinOriginCityNum ItinOriginCountry $ ItinOriginStateFips ItinOriginState $ ItinOriginStateName $ ItinOriginWac ItinRoundTrip ItinOnLine ItinDollarCred ItinYield ItinRpCarrier $ ItinPassengers ItinFare ItinBulkFare ItinDistance ItinDistanceGrp ItinMilesFlown ItinGeoType ; Content-Type: text/plain; charset=UTF-8 Content-Length: 3617 Content-Disposition: inline; filename="process_t100ds.sas" Last-Modified: Wed, 19 Now 2008 11:50:21 GMT Expires: Wed, 19 Now 2008 11:55:21 GMT /** * process_t100.sas * SAS macro to load raw T100 Domestic Segment data. * * Jason R. Blevins * Durham, January 16, 2007 * * This script processes a T100 Domestic Segment data set, storing * the variables as efficiently as possible by using the attrib statement * to declare the data types. * * This SAS code is designed to be used within a DATA step to assist in * naming the variables and setting the proper data types. Example: * * filename t100ds 't100d_segment-1993.csv'; * * data t100ds; * infile t100ds dsd firstobs=2 lrecl=8192; * * %include 'process_t100ds.sas'; * * keep * year quarter month origin dest unique_carrier distance * departures_performed seats passengers * ; * run; */ attrib year length=3 label='' quarter length=3 label='' month length=3 label='' origin length=$3 label='' origin_city_name length=$40 label='' origin_city_num length=4 label='' origin_state_abr length=$2 label='' origin_state_fips length=4 label='' origin_state_name length=$40 label='' origin_wac length=3 label='' dest length=$3 label='' dest_city_name length=$40 label='' dest_city_num length=4 label='' dest_state_abr length=$2 label='' dest_state_fips length=4 label='' dest_state_name length=$40 label='' dest_wac length=3 label='' airline_id length=4 label='' unique_carrier length=$2 label='' unique_carrier_name length=$40 label='' unique_carrier_entity length=$10 label='' carrier_region length=$1 label='' carrier length=$2 label='' carrier_name length=$40 label='' carrier_group length=3 label='' carrier_group_new length=3 label='' distance length=6 label='' distance_group length=3 label='' class length=$1 label='' aircraft_group length=3 label='' aircraft_type length=3 label='' aircraft_config length=3 label='' departures_scheduled length=4 label='' departures_performed length=4 label='' payload length=6 label='' seats length=6 label='' passengers length=6 label='' freight length=6 label='' mail length=6 label='' ramp_to_ramp length=6 label='' air_time length=6 label='' ; input year quarter month origin $ origin_city_name $ origin_city_num origin_state_abr $ origin_state_fips origin_state_name $ origin_wac dest $ dest_city_name $ dest_city_num dest_state_abr $ dest_state_fips dest_state_name $ dest_wac airline_id unique_carrier $ unique_carrier_name $ unique_carrier_entity $ carrier_region $ carrier $ carrier_name $ carrier_group carrier_group_new distance distance_group class $ aircraft_group aircraft_type aircraft_config departures_scheduled departures_performed payload seats passengers freight mail ramp_to_ramp air_time ; Content-Type: text/plain; charset=UTF-8 Content-Length: 1469 Content-Disposition: inline; filename="t100ds.rb" Last-Modified: Wed, 19 Now 2008 11:50:21 GMT Expires: Wed, 19 Now 2008 11:55:21 GMT #! /usr/bin/ruby # # Loads and processes a T100 Domestic Segment CSV file from Transtats. # # Usage: ruby t100ds.rb t100ds_2005.csv # # Jason R. Blevins , # Durham, February 19, 2007 require 'csv' # Take the filename as a command-line argument filename = ARGV[0] # Initialize counters count = 0 # Process the CSV file row by row CSV.open(filename, 'r') do |row| count += 1 # Count rows next if (count == 1) # Skip the header # Construct a hash with the row information entry = { :year => row[0].to_i, :quarter => row[1].to_i, :month => row[2].to_i, :origin => row[3], :dest => row[10], :carrier => row[22], :distance => row[26].to_i, :distance_group => row[27].to_i, :service_class => row[28], :aircraft => row[30].to_i, :aircraft_config => row[31].to_i, :departures_scheduled => row[32].to_i, :departures_performed => row[33].to_i, :payload => row[34].to_i, :seats => row[35].to_i, :passengers => row[36].to_i, :freight => row[37].to_i, :mail => row[38].to_i, :ramp_to_ramp => row[39].to_i, :air_time => row[40].to_i } # Process the entry hash here # ... end puts "Read #{count} rows" Content-Type: text/plain; charset=UTF-8 Content-Length: 381 Content-Disposition: inline; filename="t100ds_fmt.sed" Last-Modified: Wed, 19 Now 2008 11:50:21 GMT Expires: Wed, 19 Now 2008 11:55:21 GMT # t100ds_fmt.sed # # removes the quoting and changes the delimiter to a pipe. The # file can then be processed by a field-aware tool such as awk? # # Jason R. Blevins # Durham, January 18, 2007 # # Usage: # # $ cat t100d_segment-1993.csv | sed -f t100ds_fmt.sed s/\",\"/\|/g s/\",/\|/g s/,\"/\|/g s/\([[:digit:]]\),/\1\|/g s/,\([[:digit:]]\)/\|\1/g Content-Type: text/plain; charset=UTF-8 Content-Length: 1684 Content-Disposition: inline; filename="t100ds_qtr.awk" Last-Modified: Wed, 19 Now 2008 11:50:21 GMT Expires: Wed, 19 Now 2008 11:55:21 GMT # t100ds_qtr.awk # # The following awk? script aggregates a (monthly) T100 Domestic # Segment dataset to the quarter level for matching with other # quarterly datasets such as the Airline Origin and Destination # Survey. This script presumes that the file that has already been # processed into separate fields (for example, by using by the # t100ds_fmt.sed script). Only departures scheduled, departures # performed, seats, and passengers are aggregated, but adding # additional variables is straightforward. # # Usage: # # $ cat t100d_segment-1993.csv | sed -f t100ds_fmt.sed | awk -F\| -f t100ds_qtr.awk # # Example output: # # SFO DFW AA 4 728 709 128723 77447 # CMH FAR NW 3 1 1 100 81 # BOS MCO DL 3 276 276 48546 42182 # DTW MKE TW 2 2 2 268 52 # DFW OMA AA 2 364 361 51237 28839 # IND BNA AA 1 168 165 16003 8250 # SJC BUR QQ 4 89 88 12320 2439 # # Jason R. Blevins # Durham, January 18, 2007 BEGIN { } NR > 1 { # Strip headers # origin dest carrier quarter distance dep_sched dep_perf seats passengers origin=$4; dest=$11; carrier=$23; quarter=$2; distance=$27; dep_sched=$33; dep_perf=$34; seats=$36; passengers=$37; odcq_dep_sched[origin " " dest " " carrier " " quarter]+=dep_sched; odcq_dep_perf[origin " " dest " " carrier " " quarter]+=dep_perf; odcq_seats[origin " " dest " " carrier " " quarter]+=seats; odcq_passengers[origin " " dest " " carrier " " quarter]+=passengers; } END { for (i in odcq_dep_sched) { print i, odcq_dep_sched[i], odcq_dep_perf[i], odcq_seats[i], odcq_passengers[i]; } }