SAS編程-宏:Compare宏程序的兩種思路

前面文章SAS編程:Compare結(jié)果輸出方式介紹 中,介紹了Compare過(guò)程步的4種輸出比較結(jié)果的方式。各家公司基于此,一般都有相關(guān)的Compare宏程序。這類(lèi)宏程序的思路大體有兩種:

第一種:結(jié)果輸出到SAS日志
第二種:結(jié)果輸出到SAS數(shù)據(jù)集

公司宏程序一般不會(huì)簡(jiǎn)單地輸出對(duì)上或沒(méi)對(duì)上的信息,而是基于自動(dòng)宏變量SYSINFO的返回值,輸出具體的Compare結(jié)果。下面簡(jiǎn)單介紹,這兩種思路的實(shí)現(xiàn)思路。

1. 結(jié)果輸出到SAS日志

上一篇文章已經(jīng)介紹過(guò)將自動(dòng)宏變量SYSINFO信息輸出到SAS日志,稍微改動(dòng)一下就可以作為宏程序。在沒(méi)有完全對(duì)上時(shí),輸出信息前添加Error字段,方便檢查識(shí)別。

data base;
  set sashelp.class;
run;

data comp;
  set sashelp.class;

  if _n_ = 1 then height =100;
  label weight = "W";
run;

proc compare base = base comp=comp out=df outbase outcomp outnoequal outdif; 
run;

%let rc=&sysinfo;

data _null_;

put '<<<< Proc Compare Results: ';

/* 0. No differences */
 if &rc = '0'b then
      put '<<<< No differences identified';
/* 1. Test for data set label */
   if &rc = '1'b then
      put '<<<< ' 'Err' 'or: '  'Data sets have different labels';
/* 2. Test for data set types */
   if &rc = '1.'b then
      put '<<<< ' 'Err' 'or: '  'Data set types differ';
/* 3. Test for variable informats */
   if &rc = '1..'b then
      put '<<<< ' 'Err' 'or: '  'Variable has different informat';
/* 4. Test for variable formats */
   if &rc = '1...'b then
      put '<<<< ' 'Err' 'or: '  'Variable has different format';
/* 5. Test for length */
   if &rc = '1....'b then
      put '<<<< ' 'Err' 'or: '  'Variable has different lengths between the base data set 
      and the comparison data set';
/* 6. Test for label */
   if &rc = '1.....'b then
      put '<<<< ' 'Err' 'or: '  'Variable has different label';
/* 7. Test for base observation */
if &rc = '1......'b then
      put '<<<< ' 'Err' 'or: '  'Base data set has observation not in comparison data set';
/* 8. Test for comparison observation */
   if &rc = '1.......'b then
      put  '<<<< ' 'Err' 'or: '  'Comparison data set has observation not in base';
/* 9. Test for base BY group */
if &rc = '1........'b then
      put '<<<< ' 'Err' 'or: '  'Base data set has BY group not in comparison';
/* 10. Test for comparison BY group */
   if &rc = '1.........'b then
      put  '<<<< ' 'Err' 'or: '  'Comparison data set has BY group not in base';
/* 11. Variable in base data set not in compare data set */
   if &rc ='1..........'b then 
      put  '<<<< ' 'Err' 'or: '  'Variable in base data set not found in comparison data set';
/* 12. Comparison data set has variable not in base data set */
   if &rc = '1...........'b then
      put  '<<<< ' 'Err' 'or: '  'Comparison data set has variable not contained in the 
      base data set';
/* 13. Test for values */
   if &rc = '1............'b then
      put '<<<< ' 'Err' 'or: '  'A value comparison was unequal';
/* 14. Conflicting variable types */
   if &rc ='1.............'b then
      put  '<<<< ' 'Err' 'or: '  'Conflicting variable types between the two data sets 
      being compared';
/* 15. Test for BY variables */
   if &rc = '1..............'b then
      put '<<<< ' 'Err' 'or: '  'BY variables do not match';
/* 16. Fatal error*/
   if &rc ='1...............'b then
      put  '<<<< ' 'Err' 'or: '  'Fatal error: comparison not done';
run;

以上程序運(yùn)行后,日志輸出結(jié)果如下,兩個(gè)數(shù)據(jù)集不一致信息都會(huì)輸出到日志。同時(shí),Error字段標(biāo)記,方便日志檢查。

Log

如果兩個(gè)數(shù)據(jù)集是完全比對(duì)上的,日志輸出結(jié)果如下:

Log

2. 結(jié)果輸出到SAS數(shù)據(jù)集

將Compare結(jié)果輸出到SAS數(shù)據(jù)集,跟輸出到Log有一些不同。

日志信息只是針對(duì)單個(gè)數(shù)據(jù)集的比對(duì)結(jié)果,即一個(gè)Log對(duì)應(yīng)一個(gè)比對(duì)結(jié)果;而數(shù)據(jù)集記錄可以不斷追加補(bǔ)充,可以將文件夾中的所有比對(duì)結(jié)果都輸出到數(shù)據(jù)集中。日志中是按行輸出不一致的信息,為了方便查閱,日志信息最好輸出到同一行記錄中。多次比對(duì)時(shí),日志文件會(huì)自動(dòng)替換,數(shù)據(jù)集的記錄需要考慮如何替換更新。

我這里提供一個(gè)實(shí)現(xiàn)思路。如果結(jié)果數(shù)據(jù)集不存在,新建一個(gè)數(shù)據(jù)集保存比對(duì)信息。對(duì)于所有可能的不一致信息,先保存在不同的變量中,最后有一個(gè)變量進(jìn)行拼接匯總。對(duì)于多次比對(duì)的情況,結(jié)果數(shù)據(jù)集中保存程序運(yùn)行時(shí)間較晚的那一條記錄。

實(shí)現(xiàn)宏程序如下:

data base;
  set sashelp.class;
run;

data comp;
  set sashelp.class;

  if _n_ = 1 then height =100;
  label weight = "W";
run;

%macro Compare(base=, comp=, outres=, outdif=);

proc compare base=&base. comp=&comp. out=&outdif outbase outcomp outnoequal outdif;
run;

**Get compare reslult code;
%let rc=&sysinfo;

**Get libname and memename of dataset

*Base;
%if %index(&base., .) %then %do;
  %let lib_base = %sysfunc(upcase( %scan(&base., 1, .)  ));
  %let mem_base = %sysfunc(lowcase(%scan(&base., 2, .) ));
%end;
%else %do;
  %let lib_base = WORK;
  %let mem_base = %sysfunc(lowcase(&base.));  
%end;

*comp;
%if %index(&comp., .) %then %do;
  %let lib_comp = %sysfunc(upcase(%scan(&comp., 1, .) ));
  %let mem_comp = %sysfunc(lowcase(%scan(&comp., 2, .) ));
%end;
%else %do;
  %let lib_comp = WORK;
  %let mem_comp = %sysfunc(lowcase(&comp. ));  
%end;

data _tmp1;

  retain lib_base  mem_base lib_comp mem_comp comp_dtm comp_code comp_res;
  length lib_base lib_comp $8 mem_base mem_comp $64 com_res $500 c0-c16 $50;

  lib_base = "&lib_base.";
  lib_comp = "&lib_comp.";

  mem_base = "&mem_base.";
  mem_comp = "&mem_comp.";

/* 0. No differences */
 if &rc = 0 then
      c0 = 'No differences identified';
/* 1. Test for data set label */
   if &rc = '1'b then
      c1 = '01: Data sets have different labels';
/* 2. Test for data set types */
   if &rc = '1.'b then
      c2 = '02: Data set types differ';
/* 3. Test for variable informats */
   if &rc = '1..'b then
      c3 = '03: Variable has different informat';
/* 4. Test for variable formats */
   if &rc = '1...'b then
      c4 = '04: Variable has different format';
/* 5. Test for length */
   if &rc = '1....'b then
      c5 = '05: Variable has different lengths between the base data set 
      and the comparison data set';
/* 6. Test for label */
   if &rc = '1.....'b then
      c6 = '06: Variable has different label';
/* 7. Test for base observation */
if &rc = '1......'b then
      c7 = '07: Base data set has observation not in comparison data set';
/* 8. Test for comparison observation */
   if &rc = '1.......'b then
      c8 = '08: Comparison data set has observation not in base';
/* 9. Test for base BY group */
if &rc = '1........'b then
      c9 = '09: Base data set has BY group not in comparison';
/* 10. Test for comparison BY group */
   if &rc = '1.........'b then
      c10 = '10: Comparison data set has BY group not in base';
/* 11. Variable in base data set not in compare data set */
   if &rc ='1..........'b then 
      c11 = '11: Variable in base data set not found in comparison data set';
/* 12. Comparison data set has variable not in base data set */
   if &rc = '1...........'b then
      c12 = '12: Comparison data set has variable not contained in the 
      base data set';
/* 13. Test for values */
   if &rc = '1............'b then
      c13 = '13: A value comparison was unequal';
/* 14. Conflicting variable types */
   if &rc ='1.............'b then
      c14 =   '14: Conflicting variable types between the two data sets 
      being compared';
/* 15. Test for BY variables */
   if &rc = '1..............'b then
      c15 = '15: BY variables do not match';
/* 16. Fatal error*/
   if &rc ='1...............'b then
      c16 = '16: Fatal error: comparison not done';
  
  format comp_dtm e8601dt.;
  comp_dtm = datetime();

  comp_code = &rc;
  comp_res = catx("; ", of c0-c16);

  keep lib_base  mem_base lib_comp mem_comp comp_dtm comp_code comp_res;
run;

**Create compare result dataset;
%if %sysfunc(exist(&outres.)) %then %do;
data _tmp2;
  set &outres. _tmp1;
run;

**Keep the latest record for one dataset;
proc sql noprint;
  create table &outres. as
    select * 
    from _tmp2
    group by lib_base, mem_base
    having comp_dtm = max(comp_dtm)
  ;
quit;

%end;
%else %do;
 data &outres.;
  set _tmp;
run;
%end;

%mend;

%compare(
  base = base
  ,comp = comp
  ,outres = comp_res
  ,outdif = df
);

總結(jié)

就我個(gè)人而言,我認(rèn)為最簡(jiǎn)潔的方式就是Compare過(guò)程步的Error選項(xiàng),簡(jiǎn)潔明了。知曉沒(méi)對(duì)上之后,直接查看out=選項(xiàng)輸出的結(jié)果。不過(guò),畢竟各家公司都有自己的SOP,需要按照自家公司流程進(jìn)行。

希望以上內(nèi)容能夠幫助讀者理解Compare流程。

感謝閱讀, 歡迎關(guān)注!
若有疑問(wèn),歡迎評(píng)論交流!

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀(guān)點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容