[英]jQuery loop through strings calculate percentage discount from string values
[英]SAS calculate CDF as the percentage of values <= any defined values in a weighted variable
對於數據集中的加權變量“ leadb”,其想法是從值B開始,然后找到CDF作為值<= B的百分比以及該百分比的置信區間。
我可以使用PROC SURVEYMEAN
為指定的任何單個值執行此操作,但是我不知道如何讓SAS同時給我多個百分比。 如果我想計算介於0和max + 1之間的值的百分比(間隔為(max + 1)/ 100),應該如何修改代碼?
謝謝!
data Test;
set data2012;
if leadb <= 5 then lead5 = 1;
else if leadb ne . then lead5 = 0;
else lead5=.;
if (gender = 2 and age >= 16 and age <= 49) then wocba = 1;
else wocba = 0;
run;
proc surveymeans data=Test;
strata stratum;
cluster psu;
weight weight2;
var lead5;
domain wocba;
ods output domain=mystats;
run;
data mystats;
set mystats;
where wocba = 1;
lower = max(lowerclmean,0); /* since lower bound might be zero, but proportion is >= 0 */
upper = max(upperclmean,0);
run;
proc print data=mystats;
title "Proportion of blood lead values >= 5 for women of child-bearing age (16-49)";
title2 "Weighted by rates of giving birth by age and race";
title3 "With a 95% confidence interval";
run;
要計算考慮了加權調查設計的累積分布函數,您需要執行幾個步驟。
(注意:我不確定使用按百分比估算的StdErr在累計百分比上的合法性。您必須自己決定。但是如果是我,我會使用它。)
請參見下面的代碼。 我希望這有幫助!
*** GENERATE TEST DATA ***;
data have;
do i=1 to 200;
leadb = ranexp(123321) * 5;
*** SURVEY VARIABLES ***;
stratum = mod(i, 12) + 1;
if ranuni(456654) > 0.5 then psu = 1;
else psu = 2;
weight2 = ranuni(1991) * 1000;
*** DOMAIN VARIABLE ***;
if ranuni(789987) > 0.7 then wocba = 1;
else wocba = 0;
output;
end;
run;
*** GET MIN/MAX ***;
proc summary data=have;
var leadb;
output out=stats min=min max=max ;
run;
*** USE MIN/MAX TO CREATE INTERVALS TO BIN THE DATA BY APPLYING A FORMAT ***;
*** CREATE A CONTROL DATASET THAT WILL BE CONVERTED INTO A FORMAT ***;
data control_dset;
set stats (drop=_type_ _freq_);
min=floor(min);
max=ceil(max);
*** CALCULATE INTERVAL BASED ON MIN AND MAX OF DATA ***;
interval = round( (max - min + 1)/100 , 0.1);
fmtname = 'leadfmt';
type = 'n';
eexcl = 'Y'; *** END VALUE IS EXCLUDED FROM RANCE ***;
do i = min to max by interval;
start = i;
end = i + interval;
label = start;
output;
end;
run;
*** CONVERT CONTROL DATASET TO A FORMAT ***;
proc format cntlin=control_dset;
run;
*** APPLY FORMAT TO BIN THE DATA INTO INTERVALS ***;
data start;
set have;
lead_interval = put(leadb, leadfmt.) + 0;
run;
ODS TRACE ON / LISTING;
*** USE SURVEMEANS TO GET CUMULATIVE FREQUENCIES FOR BINNED CATEGORIES ***;
*** NOTE: SURVEYFREQ DOES -NOT- HAVE A DOMAIN STATEMENT ***;
*** INSTEAD, PUT DOMAIN VARIABLE IN TABLE STATEMENT AND THEN GET APPROPRIATE ROW OR COL PERCENT FROM OUTPUT ***;
proc surveyfreq data=start;
ods output summary=summary;
ods output crosstabs=crosstabs;
strata stratum;
cluster psu;
weight weight2;
*** USE THE DOMAIN / SUBPOPULATION VARIABLE IN THE TABLE STATEMENT ***;
tables wocba * lead_interval / row ;
run;
ods trace off;
*** CALCULATE CUMULATIVE PERCENT ***;
data really_close;
set crosstabs;
retain CumRowPercent 0;
*** SUBSET ROW PERCENTS FOR DOMAIN, ALSO DELETE IF COUNT = 0 ***;
if wocba = 1 and strip(F_lead_interval) not= 'Total' and frequency > 0;
*** CALCULATE CUMULATIVE PERCENT ***;
CumRowPercent = sum(RowPercent, CumRowPercent);
drop Percent StdErr StdDev;
run;
*** I AM NOT SURE HOW LEGITIMATE IT IS TO USE USE THE RowStdErr WITH THE CUMULATIVE ROW PERCENTS ***;
*** CONSULT YOUR FAVORITE STATISTICIAN FOR A FIRM OPINION!!! ***;
*** GET T-STATISTIC TO CALCULATE 95% CONFIDENCE INTERVAL ***;
data tstat;
*** SUMMARY STATISTICS FROM PROC SURVEYFREQ ***;
set summary end=lastrec;
retain nclus nstrat;
if index( upcase(Label1), 'STRATA') then nstrat = nvalue1;
else if index( upcase(Label1), 'CLUSTER') then nclus = nvalue1;
*** DEGREES OF FREEDOM = NUMBER OF CLUSTERS - NUMBER OF STRATA ***;
df = nclus - nstrat;
*** GET T-STATISTIC FOR 95% CONFIDENCE INTERVAL ***;
tstat = abs( quantile('T', 0.05/2, df) );
if lastrec;
drop label1 cvalue1 nvalue1;
run;
*** CALCULATE 95% CI ***;
data want;
set really_close ;
if _N_ =1 then set tstat;
CumRowPct_Lower = CumRowPercent - tstat * RowStdErr;
CumRowPct_Upper = CumRowPercent + tstat * RowStdErr;
if CumRowPct_Lower < 0 then CumRowPct_Lower = 0;
if CumRowPct_Upper > 100 then CumRowPct_Upper = 100;
keep lead_interval CumRowPercent CumRowPct_Lower CumRowPct_Upper;
run;
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.