150 likes | 184 Views
This analysis uses R to perform various statistical tests and generate summary statistics on a dataset of adult information.
E N D
STR Assignment 2 Solve using R Presented by , Shikha Rani Deo BA(2014-2015)
adult <- read.table("C:/Users/shikharanideo/Desktop/Praxis/statistics/adult.txt", header=FALSE, sep=",", na.strings="NA", dec=".", strip.white=TRUE)
STRATIFIED SAMPLING Sample_17082014_1756 <- StrataSample(data = adult, strata1 = 10, sstotal= 2000, ppstype = "fixed", nminimum = 100, neyvar = ) • View(Sample_17082014_1756) • showData(Sample_17082014_1756, placement='-20+200', font=getRcmdr('logFont'), maxwidth=80, maxheight=30) > .Table <- table(Sample_17082014_1756$V10) > .Table # counts for V10 Female Male 1000 1000
SUMMARY OF SAMPLE • summary(Sample_17082014_1756)
RATIO OF VARIANCE(AGE) • Input: var.test(V1 ~ V10, alternative='two.sided', conf.level=.95, data=Sample_17082014_1756) # V1=Age, V10 = sex • OutPut: Ratio of Variance (1.1)>1 Ratio of Standard Deviation = sqrt(Ratio of Variance)= sqrt (1.104924)=1.0511
RATIO OF VARIANCE(hours per week) • Input: var.test(V13 ~ V10, alternative='two.sided', conf.level=.95, data=Sample_17082014_1756) # V1=Age, V13 = hours per week • OutPut: Ratio of Variance(1.02) ~ 1 Ratio of Standard Deviation = sqrt(Ratio of Variance)= sqrt(1.015206) = 1.008
WELCH TWO SAMPLE T-TEST(Age) • Input: • t.test(V1~V10, alternative='two.sided', conf.level=.95, var.equal=FALSE, data=Sample_17082014_1756) # V1=Age, V10= sex . Here var.equal=FALSE • OutPut: • Mean Age for female < Mean Age for male
WELCH TWO SAMPLE T-TEST(hours per week) • Input: t.test(V13~V10, alternative='two.sided', conf.level=.95, var.equal=TRUE, data=Sample_17082014_1756) # V1=Age, V13= Hours per week . Here var.equal=TRUE • OutPut: Mean(# hours per week (female))< Mean(# hours per week (male))
WILCOX TEST(age) • Input: wilcox.test(V1 ~ V10, alternative="two.sided", data=Sample_17082014_1756) # # V1=Age, V10= sex Output:
WILCOX TEST(Hours per week) • Input: wilcox.test(V13 ~ V10, alternative="two.sided", data=Sample_17082014_1756) # V1=Age, V13= Hours per week Output:
SUMMARY(sample) • Input: summary(adult) • Output:
SUBSET 'sample'("Not-in-Family) • Input: NOFdata <- subset(Sample_17082014_2245, subset=V8=="Not-in-family") showData(NOFdata, placement='-20+200', font=getRcmdr('logFont'), maxwidth=80, maxheight=30) • Output:
Subset'sample' ("Not-in Family" and "self employed") • Input: SEdata <- subset(NOFdata, subset=V2=="Self-emp-not-inc"|V2=="Self-emp-inc") showData(SEdata, placement='-20+200', font=getRcmdr('logFont'), maxwidth=80, maxheight=30) • Output: • P("Self Employed Person" and "Not-in-family" )= 38/2000= 0.019
CODE(CONTINGENCY TABLE) • Input: .Table <- xtabs(~V10+V6, data=Sample_17082014_2245, subset=V6=="Divorced"| V6=="Married-civ-spouse"|V6=="Never-married") .Table .Test <- chisq.test(.Table, correct=FALSE) .Test round(.Test$residuals^2, 2) # Chi-square Components remove(.Test) remove(.Table)
OUTPUT(CONTINGENCY TABLE) Output: Since X-squared =NAN and p-value= NA. There is no correlation between these category and gender • Pearson's Chi-squared test • Chi-square Components