190 likes | 400 Views
Using SAS for Data Warehousing. Paul Hansen Anthem BCBS of Colorado paul.hansen@anthem.com. General Data Warehousing Strategy. Operational Data Store. Data Marts Dimensional Models. Base Tables. Base Tables. Very large Tend to mirror operational system Limited scalability
E N D
Using SAS for Data Warehousing Paul Hansen Anthem BCBS of Colorado paul.hansen@anthem.com
General Data Warehousing Strategy Operational Data Store Data Marts Dimensional Models Base Tables
Base Tables • Very large • Tend to mirror operational system • Limited scalability • Not efficient
Dimensional Model • Performance oriented • Size is kept manageable • Scales well • Designed for business needs • Efficient queries
Typical Data Mart • Fact tables are “long and skinny” • Dimensional tables are “short and wide” Dimensional Tables Dimensional Tables Fact Table
Dimensional Model • Fact tables contain facts we wish to measure, e.g., providers, members, employer groups, claims, etc. • Dimension tables contain attributes about facts we wish to “measure by”, e.g., dates, addresses, zip codes, lines of business, products, etc.
Variety of Data Marts/Warehouses • Small Region: 50 – 100M rows • Large Region: 500M – 1B rows • Platforms: HP UX, Sun Solaris, IBM AIX • DBMS: Oracle, Sybase, Terradata, DB2
And if that weren’t enough... • Add varying levels of granularity of data, different operating systems, DBMS, etc., how is meaningful analysis done across systems?
The Optimal Solution • In the end, there can be only one - SAS!
Out With the Old DW PC SAS
In With the New DW 1 DW 2 PC SAS SAS Server IBM Mainframe DM 1
Connect to Unix SAS Server1 of 2 • /***** • * NAME: h:\phansen\sas\sas_class\connect_sasserver_example.sas • *****/ • /***** • * Conect to UNIX • *****/ • %let sassvr=XXXX.XXXX.XXXX.XXXX; • filename rlink 'g:\sas\tcpunix.scr'; • options comamid=tcp remote=sassvr; • signon sassvr wait=yes; • rsubmit sassvr; • %let homevar=%sysget(HOME); • %put &homevar.; • %sysrput homevar=&homevar.; endrsubmit; • /* -- libnames that point to directories on server -- */ • libname home remote "&homevar." server=sassvr RENGINE=v9; • libname misc remote '/actuarial/misc' server=sassvr RENGINE=v9; • /* a libname to the Format Library by using the library keyword as the libref, you can then access the user formats as well as the sas built-in formats. */ • libname library remote '/actdata/stopwatch2/netbuild/formats' server=sassvr RENGINE=v9; • %let wdw=<db schema>; %syslput wdw=&wdw.; • %let WDWUID=<userid>; %syslput wdwuid=&wdwuid.; • %let WDWPWD=<PASSWORD>; %syslput wdwpwd=&wdwpwd.; • %let wdwpath=‘<database>.world'; %syslput wdwpath=&wdwpath.; • %let edw=EDWDBP; %syslput edw=&edw.; • %let edwschema=EDW; %syslput edwschema=&edwschema.; • %let EDWUID=<userid>; %syslput edwuid=&edwuid.; • %let EDWPWD=<PASSWORD>; %syslput edwpwd=&edwpwd.;
Connect to Unix SAS Server2 of 2 • %let edwt=EDWDBT; %syslput edwt=&edwt.; • %let edwtschema=EDW_UAT; %syslput edwtschema=&edwtschema.; • run; • LIBNAME convdb RENGINE=ORACLE • ROPTIONS="username=&wdwuid. password=&wdwpwd. • path=&wdwpath." • server=sassvr; • LIBNAME convdb_b RENGINE=ORACLE • ROPTIONS="username=&wdwuid. password=&wdwpwd. • path=&wdwpath. schema=rdwuser" • server=sassvr; • LIBNAME convdb_d RENGINE=ORACLE • ROPTIONS="username=&wdwuid. password=&wdwpwd. • path=&wdwpath. schema=dwhmgr" • server=sassvr; • LIBNAME edw RENGINE=DB2 • ROPTIONS="user=&edwuid. password=&edwpwd. • database=&edw. schema=edw" • server=sassvr; • LIBNAME edwt RENGINE=DB2 • ROPTIONS="user=&edwuid. password=&edwpwd. • database=&edwt. schema=edwt" • server=sassvr; • run;
Tcp Unix Script • /* trace on; */ • /* echo on; */ • log "NOTE: Script file 'edwp.scr' entered."; • if not tcp then goto notcp; • if signoff then goto signoff; • /* --------------- TCP SIGNON ------------------------------------*/ • waitfor 'Login:' • , 'Username:' • , 'Scripted signon not allowed' : noscript • , 120 seconds: noinit; • /*----------------UNIX LOGON---------------------------------------*/ • /*-- for some reason, it needs a LF to turn the line around --*/ • /*-- after the login name has been typed. (A CR will not do) --*/ • /*-----------------------------------------------------------------*/ • input 'Userid?'; • type LF; • waitfor "Password", 30 seconds : nolog; • input nodisplay 'Password?'; • type LF; • unx_log: • waitfor 'Hello>' : unxspawn /*- Unix spawner prompt-*/ • , '$' /*-- a common prompt character --*/ • , '>' /*-- another common prompt character --*/ • , '%' /*-- another common prompt character --*/ • , '}' /*-- another common prompt character --*/ • , 'Login incorrect' : nouser • , 'Enter terminal type' : unx_term • , 'TERM' : unx_term • , 30 seconds : timeout
TCP Unix Script • ; • log 'NOTE: Logged onto UNIX... Starting remote SAS now.'; • /* noterminal suppressses prompts from remote SAS session. */ • /* nosyntaxcheck prevents remote side from going into syntax */ • /* checking mode when a syntax error is encountered. */ • type 'sas -dmr -comamid tcp -device grlink -noterminal -nosyntaxcheck' LF; • waitfor 'SESSION ESTABLISHED', 90 seconds : nosas; • log 'NOTE: SAS/CONNECT conversation established.'; • stop; • unxspawn: • type 'sas -dmr -comamid tcp -device grlink -noterminal '; • type '-nosyntaxcheck' LF; • waitfor 'SESSION ESTABLISHED', 90 seconds : nosas; • stop; • /*---------------- TCP SIGNOFF --------------------------------------*/ • signoff: • waitfor '$' • , '>' /*-- another common prompt character --*/ • , '%' /*-- another common prompt character --*/ • , '}' /*-- another common prompt character --*/ • , 30 seconds • ; • type 'logout' LF; • log 'NOTE: SAS/CONNECT conversation terminated.'; • stop;
TCP Unix Script • /*--------------- SUBROUTINES -----------------------------------*/ • unx_term: • /*---------------------------------------------------------------*/ • /*-- some unixen want the terminal-type. --*/ • /*-- so tell them we are the most basic of terminals. --*/ • /*---------------------------------------------------------------*/ • type 'tty' LF; • goto unx_log; • /*--------------- ERROR ROUTINES --------------------------------*/ • timeout: • log 'ERROR: Timeout waiting for remote session response.'; • abort; • nouser: • log 'ERROR: Unrecognized userid or password.'; • abort; • notcp: • log 'ERROR: Incorrect communications access method.'; • log 'NOTE: You must set "OPTIONS COMAMID=TCP;" before using this'; • log ' script file.'; • abort; • noinit: • log 'ERROR: Did not understand remote session banner.'; • nolog: • log 'ERROR: Did not receive userid or password prompt.'; • abort;
TCP Unix Script • nosas: • log 'ERROR: Did not get SAS software startup messages.'; • abort; • noscript: • /* This is the result of trying to signon with a script file */ • /* to a Unix spawner that has been invoked with the -noscript */ • /* option. You need to clear any script file reference and */ • /* then re-execute SIGNON. */ • log 'ERROR: Scripted signons are not allowed.'; • log 'NOTE: Clear any script file reference and retry SIGNON.'; • abort;
Using Your Connections1 of 2 • /********** • * Program: h:\phansen\sas\sas_class\edw_procedure_code_modifiers_example.sas • * Date: 20051103 • * Comments: • **********/ • /* ------------------------------- Unix Pull --------------------------------- */ • rsubmit sassvr; • options symbolgen mprint • ls=95 ps=54 nocenter nodate nonumber; • /***** • * Assign libnames • *****/ • libname unixdata '/home/hansepl/data'; • libname dosdata 'h:\phansen\temp'; • run; • /***** • * Connect to DB and get data • *****/ • options obs=max; • procsql feedback; • connect to DB2 (user=&edwuid. password=&edwpwd. database=&edwt. schema=&edwtschema.); • create table unixdata.claim_sums_by_paid_dt as • select * • from connection to DB2 (
Using Your Connections2 of 2 • select paid_dt, • sum(billed_charge_amt) billed, • sum(allowed_amt) allowed, • sum(paid_amt) paid • from &schema..claim_line • where substr(line_of_business_cd, 4, 1) IN ('D', 'E') • and transaction_source_system_cd IN ('WGS20', 'STAR') • group by paid_dt • ); disconnect from DB2; • quit; • run; • /***** • * download data to network drive • *****/ • procdownload DATA=unixdata.claim_sums_by_paid_dt OUT=dosdata.claim_sums_by_paid_dt; • run; • endrsubmit; • signoff sassvr; • /* ----------------------------- End of Unix Pull ----------------------------- */