In the post (https://statcompute.wordpress.com/2017/06/15/finer-monotonic-binning-based-on-isotonic-regression), it is shown how to do a finer monotonic binning with isotonic regression in R.
Below is a SAS macro implementing the monotonic binning with the same idea of isotonic regression. This macro is more efficient than the one shown in (https://statcompute.wordpress.com/2012/06/10/a-sas-macro-implementing-monotonic-woe-transformation-in-scorecard-development) without iterative binning and is also able to significantly increase the binning granularity.
%macro monobin(data = , y = , x = );
options mprint mlogic;
data _data_ (keep = _x _y);
set
&data;
where
&y
in
(0, 1)
and
&x ~= .;
_y = &y;
_x = &x;
run;
proc transreg data = _last_ noprint;
model identity(_y) = monotone(_x);
output
out
= _tmp1 tip = _t;
run;
proc summary data = _last_ nway;
class _t_x;
output
out
= _data_ (
drop
= _freq_ _type_) mean(_y) = _rate;
run;
proc sort data = _last_;
by
_rate;
run;
data _tmp2;
set
_last_;
by
_rate;
_idx = _n_;
if _rate = 0
then
_idx = _idx + 1;
if _rate = 1
then
_idx = _idx - 1;
run;
proc sql noprint;
create
table
_tmp3
as
select
a.*,
b._idx
from
_tmp1
as
a
inner
join
_tmp2
as
b
on
a._t_x = b._t_x;
create
table
_tmp4
as
select
a._idx,
min
(a._x)
as
_min_x,
max
(a._x)
as
_max_x,
sum
(a._y)
as
_bads,
count
(a._y)
as
_freq,
mean(a._y)
as
_rate,
sum
(a._y) / b.bads
as
_bpct,
sum
(1 - a._y) / (b.freq - b.bads)
as
_gpct,
log(calculated _bpct / calculated _gpct)
as
_woe,
(calculated _bpct - calculated _gpct) * calculated _woe
as
_iv
from
_tmp3
as
a, (
select
count
(*)
as
freq,
sum
(_y)
as
bads
from
_tmp3)
as
b
group
by
a._idx;
quit;
title
"Monotonic WoE Binning for %upcase(%trim(&x))"
;
proc print data = _last_ label noobs;
var _min_x _max_x _bads _freq _rate _woe _iv;
label
_min_x =
"Lower"
_max_x =
"Upper"
_bads =
"#Bads"
_freq =
"#Freq"
_rate =
"BadRate"
_woe =
"WoE"
_iv =
"IV"
;
sum
_bads _freq _iv;
run;
title;
%mend monobin;
Below is the sample output for LTV, showing an identical binning scheme to the one generated by the R isobin() function.
更多相关文章请看 https://statcompute.wordpress.com/category/scorecard/
原创文章,作者:xsmile,如若转载,请注明出处:http://www.17bigdata.com/granular-monotonic-binning-in-sas/
更多内容请访问:IT源点
注意:本文归作者所有,未经作者允许,不得转载