270 likes | 372 Views
Cary Miller DSUG Colorado Day September 13, 2007. An extensible directory-walking macro. The problem. Search a directory and all subdirectories for files with certain attributes such as Large files Files with a specific string in the file name Files containing a specific string inside.
E N D
Cary Miller DSUG Colorado Day September 13, 2007 An extensible directory-walking macro
The problem • Search a directory and all subdirectories for files with certain attributes such as • Large files • Files with a specific string in the file name • Files containing a specific string inside
Why? • Large file system • Unfamiliar system (new job) • Human memory failure • Automation/repetition • Duplicates Linux tools (grep, etc)
How to walk a directory • Recursion with a generic macro • Customize behavior with • Pluggable macros • Enclosing macros • Tools used • File functions • String functions • Modularity
Recursion • Simple in concept, tricky in practice • A macro that calls itself • Factorial • Fibonacci
%macro factorial(n); %if &n GT 1%then%eval(&n*%factorial(%eval(&n-1))) %else1 %mend; %let x = %factorial(1); *ok; %let x = %factorial(2); *ok; %let x = %factorial(3); *ok; %put &x; %put %factorial(5);
%macro dirWalk(fname); %if &fname is a directory %then; for each &file in &fname %dirWalk(&fname\&file); %ELSE*&fname is an ordinary file; %put &fname; %mend dirWalk;
%macro dirWalk(fname); %local i filrf rc did memcnt; * Try to open this fname as a directory; %let rc=%sysfunc(filename(filrf,&fname)); %let did=%sysfunc(dopen(&filrf)); %if &did > 0%then%do; * seems to be a directory, so walk it; %let memcnt=%sysfunc(dnum(&did)); %if &memcnt > 0%then%do i=1%to &memcnt; %let name=&fname/%sysfunc(dread(&did,&i)); %dirWalk(&name); %end; %let rc=%sysfunc(dclose(&did)); %end; %ELSE %PUT &fname; %mend dirWalk;
180 %dirwalk(s:\foo); s:\foo s:\foo/c.txt s:\foo/d.sas s:\foo/a s:\foo/a/x.txt s:\foo/a/y.txt s:\foo/b
Recursion works! • Add functionality by passing custom macro
%macro dirWalk(fname, fileMacro); %if &fname is a directory %then; for each &file in &fname %dirWalk(&fname\&file); %ELSE*&fname is an ordinary file; %&fileMacro(&fname); %mend dirWalk; %macro parrot(fname); %put ..... &fname ...... parrot; %mend;
Great • Same result as the code that did not pass a macro name. • So why go to the extra trouble? • Because we want to pass a non-trivial macro.
%macro parrot(fname); %put ..... &fname ...... parrot; %mend; %macro inFileName(fname, substr); /* If substr is in fname print fname. */ %if%index(&fname,&substr) GT 0 %then%put &fname; %mend;
How to pass a macro that accepts parameters? %macro dirWalk(fname, fileMacro, macroParams); %if &fname is a directory %then; for each &file in &fname %dirWalk(&fname\&fname); %ELSE*&fname is an ordinary file; %&fileMacro(&fname, ¯oParams); %mend dirWalk; %macro parrot(fname, ignore); %put ..... &fname ...... parrot; %mend;
146 %dirwalk(s:\foo, fileMacro=parrot, macroParams=nothing); ..... s:\foo\c.txt ...... parrot ..... s:\foo\d.sas ...... parrot ..... s:\foo\a\x.txt ...... parrot ..... s:\foo\a\y.txt ...... parrot 148 %dirwalk(s:\~Cary\, fileMacro=inFileName, macroParams=txt); s:\~Cary\\bak\phone.txt s:\~Cary\\sas\sas.formats\win32user.txt s:\~Cary\\sas.items\bak\sas.item.macro.txt s:\~Cary\\sas.mm\notes.txt s:\~Cary\\to.go\data\RN99060.txt s:\~Cary\\to.go\data\states.54.txt s:\~Cary\\to.go\sas.cfmc\reports.1\plan.txt s:\~Cary\\vb.code\newReport.txt s:\~Cary\\vb.code\report.code.txt
It works! • But the macro call is unwieldy • Enclose the call in another macro
%macro listAll(dirName); %dirwalk(&dirName, fileMacro=parrot, macroParams=nothing); %mend; %macro subInFN(dirName, substring); %dirwalk(&dirName, fileMacro=inFileName, macroParams=&substring); %mend;
146 %dirwalk(s:\foo, fileMacro=parrot, macroParams=nothing); 146 %listAll(s:\foo); ..... s:\foo\c.txt ...... parrot ..... s:\foo\d.sas ...... parrot ..... s:\foo\a\x.txt ...... parrot ..... s:\foo\a\y.txt ...... parrot 148 %dirwalk(s:\~Cary\, fileMacro=inFileName, macroParams=txt); 148 %subInFname(s:\~Cary\, substring=txt); s:\~Cary\\bak\phone.txt s:\~Cary\\sas\sas.formats\win32user.txt s:\~Cary\\sas.items\bak\sas.item.macro.txt s:\~Cary\\sas.mm\notes.txt s:\~Cary\\to.go\data\RN99060.txt s:\~Cary\\to.go\data\states.54.txt s:\~Cary\\to.go\sas.cfmc\reports.1\plan.txt s:\~Cary\\vb.code\newReport.txt s:\~Cary\\vb.code\report.code.txt
That’s it! • Adding plug-ins and wrappers is easy
Find big files %macro findBigFile(fname, sizeCutoff); /* If the file is big print out its name and size. */ %let fsize = %fileDataParse(&fname, fsize); %if &fsize GT &sizeCutoff %then %put &fsize &fname; %mend; %macro findBig(dirName, cutoff=10000); %dirwalk(&dirName, fileMacro=findBigFile, macroParams=&cutoff); %mend;
%macro fileData(filename); /* Grab the result of a DOS 'dir' command and return it. */ %local fname fid str rc command; %let command = dir &filename; %let rc=%sysfunc(filename(fname,&command,pipe)); %let fid=%sysfunc(fopen(&fname,s)); %do %while(%sysfunc(fread(&fid)) EQ 0); %let rc=%sysfunc(fget(&fid,str,200)); %let ind = %index("&str", %filebase(&filename)); %if &ind EQ 0 %then %let str=; %else %let res=&str; &str %end; %let rc=%sysfunc(fclose(&fid)); %mend; /*%let foo = %fileData(&testFile1); %put &foo;*/
%macro fileDataParse(filename, attribute); /* Parse the line with the data. Grab the pieces. Return one piece. */ /* This could probably be done better. */ %let str = %fileData(&filename); %let str = %sysfunc(compress("&str", ',')); /*remove commas*/ %let str = %sysfunc(compbl(&str)); /*reduce space*/ %let len = %eval(%sysfunc(length(&str)) -2); %let str = %substr(&str,2,&len); /*remove enclosing quotes*/ %let modDate = %sysfunc(scan(&str,1, ' ')); %let modTime = %sysfunc(scan(&str,2, ' ')) %sysfunc(scan(&str,3, ' ')); %let fsize = %sysfunc(scan(&str,4, ' ')); %let fname = %scan(&str,5, ' '); &&&attribute. %mend; /*%let fs = %fileDataParse(&testFile1, fsize); %put fs &fs;*/ /*%let fn = %fileDataParse(&testFile1, fname); %put fn &fn;*/ /*%let md = %fileDataParse(&testFile1, modDate); %put md &md;*/ /*%let mt = %fileDataParse(&testFile1, modTime); %put mt &mt;*/
%findBig(&testDir2, cutoff=1000); 7338 annotate.png 3161 sas.item.macros.sas 17408 sas.items.xls 3216 dirwalk.1.sas 5142 dirwalk.2.sas 12485 recursion.1.sas 11125 recursion.sas 4585 recursion.whitlock.sas Big file example
String in file example %strInFile(s:\~Cary, goto, line) s:\~Cary\sas.items\flatten.sas :::: %if &i = 3 %then %goto continue ;
Same strategy works for common analytic tasks • Frequency tables • Histograms • etc
If you don’t want to reinvent the wheel • Roland Rashleigh-Berry has similar macros • DataSavantConsulting.com • SAS-L