最后都匯到一張結(jié)果表格中
之前我用的是把表格轉(zhuǎn)置后,用nmiss和cmiss求橫向的數(shù)組的缺失值
這樣代碼比較簡單,但是后來發(fā)現(xiàn)proc transpose實在是太慢了,而且耗資源
這下改了下,直接用sql對列進行計算了,不需要轉(zhuǎn)置了
- %macro GetMissNum(SourceTable,TargetTable,InputVar);
- /**********************************************************************/
- /* 此宏的作用是統(tǒng)計原表中不同變量的缺失值數(shù)量。其中SourceTable是原始 */
- /* 表格,SourceTable是結(jié)果表格;TargetTable是結(jié)果表格;InputVar是原始 */
- /* 表格中的變量,可設(shè)多個變量,用空格分隔,也可如下設(shè)置:=_Numeric_表 */
- /* 示統(tǒng)計全部數(shù)值型變量,=_Character_表示統(tǒng)計全部字符型變量,=_All_表 */
- /* 示統(tǒng)計全部變量。 */
- /* */
- /* 最終結(jié)果表格中包含所有指定變量的名稱、類型和相應(yīng)的缺失值數(shù)量。 */
- /* */
- /* Created on 2013.5.8 */
- /* Modified on 2013.5.8 */
- /**********************************************************************/
- /* 檢查TargetTable的存在性 */
- %if &TargetTable EQ %STR() %then %do;
- %put ERROR: The TargetTable should not be blank, please check it again.;
- %goto exit;
- %end;
- /* 檢查InputVar的合法性 */
- %if %UPCASE(&InputVar) NE _NUMERIC_ AND %UPCASE(&InputVar) NE _CHARACTER_ AND %UPCASE(&InputVar) NE _ALL_ %then %do;
- %ChkVar(SourceTable=&SourceTable,InputVar=&InputVar,FlagVarExists=GMN_FlagVarExists);
- %if %SYSFUNC(FIND(&GMN_FlagVarExists,0)) NE 0 %then %do;
- %put ERROR: The InputVar should be _Numeric_, _Character_, _All_ or any variable name in SourceTable, case insensitive and without quotes.;
- %put ERROR: The InputVar "%SCAN(&InputVar,%SYSFUNC(FIND(&GMN_FlagVarExists,0)))" does not exist in SourceTable, please check it again.;
- %goto exit;
- %end;
- %end;
- %if %UPCASE(&InputVar) EQ _NUMERIC_ %then %do;
- proc contents data=&SourceTable position out=GMN_VarList(keep=name type varnum) noprint;
- run;
- /* 檢查數(shù)值型變量是否存在 */
- %ChkValue(SourceTable=GMN_VarList,
- InputVar=type,
- Value=1,
- FlagValueExists=GMN_FlagNumVarExists);
- %if &GMN_FlagNumVarExists GT 0 %then %do;
- proc sql noprint;
- select NAME into :GMN_NumVarList separated by ' '
- from GMN_VarList
- where TYPE EQ 1;
- quit;
- /* 化簡原始表格 */
- data &TargetTable(keep=&GMN_NumVarList drop=GMN_i);
- set &SourceTable;
- array VarList &GMN_NumVarList;
- do GMN_i=1 to dim(VarList);
- if VarList{GMN_i} NE . then VarList{GMN_i}=0;
- else VarList{GMN_i}=1;
- end;
- run;
- %GetStatsForTable(SourceTable=&TargetTable,
- TargetTable=&TargetTable,
- ByFactors=,
- InputVar=&GMN_NumVarList,
- InputVarType=,
- OutputVarType=,
- Weight=,
- Statistic=SUM);
- proc transpose data=&TargetTable out=&TargetTable;
- var &GMN_NumVarList;
- run;
- data &TargetTable(rename=(_NAME_=VarName _LABEL_=VarLabel COL1=MissNum));
- retain _NAME_ _LABEL_ VarType COL1;
- set &TargetTable;
- VarType='N';
- run;
- %end;
- %else %do;
- %put ERROR: There is no numeric variable existed in SourceTable, please check it again.;
- %goto exit;
- %end;
- %end;
- %else %if %UPCASE(&InputVar) EQ _CHARACTER_ %then %do;
- proc contents data=&SourceTable position out=GMN_VarList(keep=name type varnum) noprint;
- run;
- /* 檢查字符型變量是否存在 */
- %ChkValue(SourceTable=GMN_VarList,
- InputVar=type,
- Value=2,
- FlagValueExists=GMN_FlagCharVarExists);
- %if &GMN_FlagCharVarExists GT 0 %then %do;
- proc sql noprint;
- select NAME,STRIP(NAME)||'_Temp' into :GMN_CharVarList separated by ' ',:GMN_CharVarList_Temp separated by ' '
- from GMN_VarList
- where TYPE EQ 2;
- quit;
- /* 化簡原始表格 */
- data &TargetTable(keep=&GMN_CharVarList_Temp drop=GMN_j);
- set &SourceTable;
- array VarList &GMN_CharVarList;
- array VarList_Temp &GMN_CharVarList_Temp;
- do GMN_j=1 to dim(VarList);
- if VarList{GMN_j} NE "" then VarList_Temp{GMN_j}=0;
- else VarList_Temp{GMN_j}=1;
- end;
- run;
- %GetStatsForTable(SourceTable=&TargetTable,
- TargetTable=&TargetTable,
- ByFactors=,
- InputVar=&GMN_CharVarList_Temp,
- InputVarType=,
- OutputVarType=,
- Weight=,
- Statistic=SUM);
- proc transpose data=&TargetTable out=&TargetTable;
- var &GMN_CharVarList_Temp;
- run;
- data &TargetTable(rename=(_NAME_=VarName _LABEL_=VarLabel COL1=MissNum));
- retain _NAME_ _LABEL_ VarType COL1;
- set &TargetTable;
- VarType='C';
- _NAME_=SUBSTR(_NAME_,1,LENGTH(_NAME_)-5);
- run;
- %end;
- %else %do;
- %put ERROR: There is no character variable existed in SourceTable, please check it again.;
- %goto exit;
- %end;
- %end;
- %else %do;
- proc contents data=&SourceTable position out=GMN_VarList(keep=name type varnum) noprint;
- run;
- /* 篩選指定的變量 */
- %if %UPCASE(&InputVar) NE _ALL_ %then %do;
- %let InputVar_Comma=%PrxChange(InputString=&InputVar,PrxString=s/(\w+)/'$1'/); /* 給InputVar中的代碼加引號 */
- %let InputVar_Comma=%SYSFUNC(TRANSLATE(&InputVar_Comma,%STR(,),%STR( ))); /* 替換InputVar中的空格為逗號 */
- proc sql noprint;
- create table GMN_VarList as
- select * from GMN_VarList
- where Name in (&InputVar_Comma)
- order by Name;
- quit;
- %end;
- /* 檢查數(shù)值型變量是否存在 */
- %ChkValue(SourceTable=GMN_VarList,
- InputVar=type,
- Value=1,
- FlagValueExists=GMN_FlagNumVarExists);
- /* 檢查字符型變量是否存在 */
- %ChkValue(SourceTable=GMN_VarList,
- InputVar=type,
- Value=2,
- FlagValueExists=GMN_FlagCharVarExists);
- %if &GMN_FlagNumVarExists GT 0 %then %do;
- proc sql noprint;
- select NAME into :GMN_NumVarList separated by ' '
- from GMN_VarList
- where TYPE EQ 1;
- quit;
- /* 化簡原始表格 */
- data GMN_NumMiss(keep=&GMN_NumVarList drop=GMN_k);
- set &SourceTable;
- array VarList &GMN_NumVarList;
- do GMN_k=1 to dim(VarList);
- if VarList{GMN_k} NE . then VarList{GMN_k}=0;
- else VarList{GMN_k}=1;
- end;
- run;
- %GetStatsForTable(SourceTable=GMN_NumMiss,
- TargetTable=GMN_NumMiss,
- ByFactors=,
- InputVar=&GMN_NumVarList,
- InputVarType=,
- OutputVarType=,
- Weight=,
- Statistic=SUM);
- proc transpose data=GMN_NumMiss out=GMN_NumMiss;
- var &GMN_NumVarList;
- run;
- data GMN_NumMiss(rename=(_NAME_=VarName _LABEL_=VarLabel COL1=MissNum));
- retain _NAME_ _LABEL_ VarType COL1;
- set GMN_NumMiss;
- VarType='N';
- run;
- %end;
- %if &GMN_FlagCharVarExists GT 0 %then %do;
- proc sql noprint;
- select NAME,STRIP(NAME)||'_Temp' into :GMN_CharVarList separated by ' ',:GMN_CharVarList_Temp separated by ' '
- from GMN_VarList
- where TYPE EQ 2;
- quit;
- /* 化簡原始表格 */
- data GMN_CharMiss(keep=&GMN_CharVarList_Temp drop=GMN_l);
- set &SourceTable;
- array VarList &GMN_CharVarList;
- array VarList_Temp &GMN_CharVarList_Temp;
- do GMN_l=1 to dim(VarList);
- if VarList{GMN_l} NE "" then VarList_Temp{GMN_l}=0;
- else VarList_Temp{GMN_l}=1;
- end;
- run;
- %GetStatsForTable(SourceTable=GMN_CharMiss,
- TargetTable=GMN_CharMiss,
- ByFactors=,
- InputVar=&GMN_CharVarList_Temp,
- InputVarType=,
- OutputVarType=,
- Weight=,
- Statistic=SUM);
- proc transpose data=GMN_CharMiss out=GMN_CharMiss;
- var &GMN_CharVarList_Temp;
- run;
- data GMN_CharMiss(rename=(_NAME_=VarName _LABEL_=VarLabel COL1=MissNum));
- retain _NAME_ _LABEL_ VarType COL1;
- set GMN_CharMiss;
- VarType='C';
- _NAME_=SUBSTR(_NAME_,1,LENGTH(_NAME_)-5);
- run;
- %end;
- data &TargetTable;
- set
- %if &GMN_FlagNumVarExists GT 0 %then %do;
- GMN_NumMiss
- %end;
- %if &GMN_FlagCharVarExists GT 0 %then %do;
- GMN_CharMiss
- %end;
- ;
- run;
- %end;
- /* 刪除不必要的表格 */
- proc datasets lib=work nolist;
- delete GMN_:;
- quit;
- %exit:
- %mend;
- %macro Demo();
- %let SourceTable=Cars;
- %let TargetTable=MissNum;
- %let InputVar=Cylinders; /* =_Numeric_表示統(tǒng)計全部數(shù)值型變量,=_Character_表示統(tǒng)計全部字符型變量,=_All_表示統(tǒng)計全部變量,大小寫不敏感 */
- %GetMissNum(&SourceTable,&TargetTable,&InputVar);
- %mend;