########################################################################
## DESCRIPTION
## A WeBWorK problem that asks students to answer questions on a confidence interval, 
## population standard deviation unknown using raw data values.
## WeBWorK problem written by JoAnne Taormina, <joanne(dot)taormina(at)ncc(dot)edu>
## adapted from a question from Mario F. Triola's Elementary Statistics text book.
## ENDDESCRIPTION
##
## KEYWORDS('confidence interval', 'sample standard deviation', 't-score')
##
## Author('JoAnne Taormina')
## Institution('Nassau Community College')
########################################################################
DOCUMENT();
loadMacros(
"PGstandard.pl",
"MathObjects.pl",
"PGgraphmacros.pl",
"weightedGrader.pl",
"PGunion.pl",
"parserMultiAnswer.pl",
"PGcourse.pl",
"PGchoicemacros.pl",
);

# make sure we're in the context we want
Context("Numeric");

$total_values=13;

# the data
@data_150 = ();
@data_4000 = ();
for($i = 0; $i < $total_values; $i ++)
{
  $data_4000[$i] = random(118, 138, 1);
}
for($i = 0; $i < $total_values; $i ++)
{
  $data_150[$i] = random(126, 142, 1);
}
for($i=0; $i<$total_values; $i++){
 $sum_150+=$data_150[$i];
 $sum_4000+=$data_4000[$i];
}
$mean_150 = $sum_150/$total_values;
$mean_4000 = $sum_4000/$total_values;

@deviation_from_mean_150=();
@deviation_from_mean_4000=();
@deviation_from_mean_squared_150=();
@deviation_from_mean_squared_4000=();
$sum_deviation_from_mean_squared_150 = 0;
$sum_deviation_from_mean_squared_4000 = 0;

for($i=0; $i<$total_values; $i++){
 $deviation_from_mean_150[$i] = $data_150[$i] - $mean_150;
 $deviation_from_mean_squared_150[$i] = $deviation_from_mean_150[$i]**2; 
 $sum_deviation_from_mean_squared_150 += $deviation_from_mean_squared_150[$i];

 $deviation_from_mean_4000[$i] = $data_4000[$i] - $mean_4000;
 $deviation_from_mean_squared_4000[$i] = $deviation_from_mean_4000[$i]**2; 
 $sum_deviation_from_mean_squared_4000 += $deviation_from_mean_squared_4000[$i];

}

$sample_variance_150 = $sum_deviation_from_mean_squared_150/($total_values-1);
$sample_std_dev_150 = sqrt($sample_variance_150);

$sample_variance_4000 = $sum_deviation_from_mean_squared_4000/($total_values-1);
$sample_std_dev_4000 = sqrt($sample_variance_4000);

# round $mean_150 to 2 decimal places
$mean_150 = int(100*$mean_150+.5*($mean_150 <=> 0))/100;
# round $sample_std_dev_150 to 4 decimal places
$sample_std_dev_150 = int(10000*$sample_std_dev_150+.5*($sample_std_dev_150 <=> 0))/10000;

# round $mean_4000 to 2 decimal places
$mean_4000 = int(100*$mean_4000+.5*($mean_4000 <=> 0))/100;
# round $sample_std_dev_4000 to 4 decimal places
$sample_std_dev_4000 = int(10000*$sample_std_dev_4000+.5*($sample_std_dev_4000 <=> 0))/10000;

$t95 = 2.18;

$conf95_int_low_150 =$mean_150 - $t95*($sample_std_dev_150/sqrt($total_values));
# round $conf95_int_low_150 to 2 decimal places
$conf95_int_low_150 = int(100*$conf95_int_low_150+.5*($conf95_int_low_150 <=> 0))/100;
$conf95_int_low_150 = Compute($conf95_int_low_150);
$conf95_int_high_150=$mean_150 + $t95*($sample_std_dev_150/sqrt($total_values));
# round $conf95_int_high_150 to 2 decimal places
$conf95_int_high_150 = int(100*$conf95_int_high_150+.5*($conf95_int_high_150 <=> 0))/100;
$conf95_int_high_150 = Compute($conf95_int_high_150);
$conf95_int_low_4000=$mean_4000-$t95*($sample_std_dev_4000/sqrt($total_values));
# round $conf95_int_low_4000 to 2 decimal places
$conf95_int_low_4000 = int(100*$conf95_int_low_4000+.5*($conf95_int_low_4000 <=> 0))/100;
$conf95_int_low_4000 = Compute($conf95_int_low_4000);
$conf95_int_high_4000=$mean_4000 + $t95*($sample_std_dev_4000/sqrt($total_values));
# round $conf95_int_high_4000 to 2 decimal places
$conf95_int_high_4000 = int(100*$conf95_int_high_4000+.5*($conf95_int_high_40o0 <=> 0))/100;
$conf95_int_high_4000 = Compute($conf95_int_high_4000);
$mean_150 = Compute($mean_150);
$mean_4000 = Compute($mean_4000);

$sample_std_dev_150 = Compute($sample_std_dev_150);
$sample_std_dev_4000= Compute($sample_std_dev_4000);

$t95 = Compute($t95);
$sqrt_total_values = sqrt($total_values);
$sqrt_total_values = Compute($sqrt_total_values);

# set up for a multiple choice problem.
$radio_conf_int = new_multiple_choice();
if ($conf95_int_low_150<=$conf95_int_high_4000 && $conf95_int_low_4000 <= $conf95_int_high_150)
{
  $correct_choice = "No.  The confidence intervals for skull breadths from 4000 B.C. and 150 A.D. overlap, so it's possible that the true population mean skull breadth for the two time periods could be the same.";

$choice_a =  "Yes.  The confidence intervals for skull breadths from 4000 B.C. and 150 A.D. do not overlap, so the true population mean skull breadth for the two time periods could be different.";
}
else
{
  $correct_choice = "Yes.  The confidence intervals for skull breadths from 4000 B.C. and 150 A.D. do not overlap, so it's possible that the true population mean skull breadth for the two time periods could be different.";

$choice_a =  "No.  The confidence intervals for skull breadths from 4000 B.C. and 150 A.D. overlap, so the true population mean skull breadth for the two time periods could be the same.";
}
if($conf95_int_low_150 > $conf95_int_low_4000)
{
  $choice_b = "Yes.  The true population mean skull breadth for 150 A.D. skulls must be higher than that 4000 B.C. skulls since the confidence interval limits are higher for the 150 A.D. skulls.";
}
else
{
  $choice_b = "Yes.  The true population mean skull breadth for 4000 B.C. skulls must be higher than that 150 A.D. skulls since the confidence interval limits are higher for the 4000 B.C. skulls.";
}

$choice_c = "No.  The margin of error of the confidence intervals for 150 a.D. skulls and 4000 B.C. skulls are unequal, so the true population mean skull breadths for the two time periods could be the same.";

$radio_conf_int->qa("Based on these confidence intervals, can we conclude with 95% certainty that the true population mean skull breadth of the Egyptian male has changed from 4000 B.C. to 150 A.D.?", $correct_choice);
$radio_conf_int->extra($choice_a, $choice_b, $choice_c);

Context()->texStrings;

$showformula = ColumnTable( 

ColumnTable(

ans_rule(5)." - ".ans_rule(5), 
ans_rule(5).$BR.$HR.ans_rule(5), 
indent => 0, separation => 10, valign => "MIDDLE"
), 
ColumnTable(
"to".$SPACE.$SPACE.$SPACE.$SPACE.$SPACE.$SPACE.$SPACE.ans_rule(5)." + ".ans_rule(5), 
ans_rule(5).$BR.$HR.ans_rule(5), 
indent => 0, separation => 10, valign => "MIDDLE"),    

indent => 0, separation => 10, valign => "MIDDLE");

$showformula2 = ColumnTable( 

ColumnTable(

ans_rule(5)." - ".ans_rule(5), 
ans_rule(5).$BR.$HR.ans_rule(5), 
indent => 0, separation => 10, valign => "MIDDLE"
), 
ColumnTable(
"to".$SPACE.$SPACE.$SPACE.$SPACE.$SPACE.$SPACE.$SPACE.ans_rule(5)." + ".ans_rule(5), 
ans_rule(5).$BR.$HR.ans_rule(5), 
indent => 0, separation => 10, valign => "MIDDLE"),    

indent => 0, separation => 10, valign => "MIDDLE");

TEXT(beginproblem());
Context()->texStrings;
BEGIN_TEXT
Archeologists have discovered a sample of ancient Egyptian skulls from two different time periods, 4000 B.C. and 150 A.D.  They believe that the mean maximum skull breadth (the width of the skull at its widest part) has increased from 4000 B.C. to 150 A.C.  A sample of 13 Eqyptian skull breadths (in mm) from each time period is show below:
$BR $BR
$BBOLD 4000 B.C. $EBOLD
\[ $data_4000[0], \ $data_4000[1], \ $data_4000[2], \ $data_4000[3], \ $data_4000[4], \ $data_4000[5], \ $data_4000[6], \ $data_4000[7], \ $data_4000[8], \ $data_4000[9], \ $data_4000[10], \ $data_4000[11], \ $data_4000[12] \] 
$BR 
$BBOLD 150 A.D. $EBOLD
\[ $data_150[0], \ $data_150[1], \ $data_150[2], \ $data_150[3], \ $data_150[4], \ $data_150[5], \ $data_150[6], \ $data_150[7], \ $data_150[8], \ $data_150[9], \ $data_150[10], \ $data_150[11], \ $data_150[12] \] 
$BR 
(a) Calculate the mean and standard deviation for each data set.  Round each mean to 2 decimal places, and each standard deviation to 4 decimal places.
$BR $BR
$BBOLD 4000 B.C. mean: $EBOLD \{ ans_rule(5) \} $SPACE $SPACE $BBOLD 150 A.D. mean: $EBOLD \{ ans_rule(5) \} $BR $BR
$BBOLD 4000 B.C. standard deviation: $EBOLD \{ ans_rule(5) \} $SPACE $SPACE
$BBOLD 150 A.D. standard deviation: $EBOLD \{ ans_rule(5) \}
$BR $BR

(b) Using your answers from part (a) and the formula for a $BBOLD 95% confidence interval $EBOLD as presented in lecture, fill in the blanks with the appropriate values for this problem for calculating the confidence interval below.  To enter \( \sqrt x \) where x is any number, type  sqrt(x).  For example, \( \sqrt 2 \) should be written as sqrt(2).
$BR $BR $BBOLD 4000 B.C. skull breadths: $EBOLD 
$BCENTER $showformula   $ECENTER
$BR $BR $BBOLD 150 A.D. skull breadths: $EBOLD 
$BCENTER $showformula2   $ECENTER
$BR $BR

(c) Using the formulas from part (b), find the $BBOLD 95% confidence intervals $EBOLD for the mean maximum skull breadths of all Egyptian skulls from each period.  Round each answer to the nearest hundredth (2 decimal places).  
$BR $BR $BBOLD 4000 B.C. skull breadths: $EBOLD
$BCENTER \{ ans_rule(5) \} to \{ ans_rule(5) \} $ECENTER
$BR $BR $BBOLD 150 B.C. skull breadths: $EBOLD
$BCENTER \{ ans_rule(5) \} to \{ ans_rule(5) \} $ECENTER
$BR $BR
(d)
\{ $radio_conf_int->print_q() \}
$BR
\{ $radio_conf_int->print_a() \}
$BR$BR

END_TEXT
Context()->normalStrings;

Context("Numeric");
WEIGHTED_ANS($mean_4000->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($t95->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sample_std_dev_4000->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sqrt_total_values->cmp(tolType=>'absolute',tolerance=>.0001), 3);
WEIGHTED_ANS($mean_4000->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($t95->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sample_std_dev_4000->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sqrt_total_values->cmp(tolType=>'absolute',tolerance=>.0001), 3);

WEIGHTED_ANS($mean_150->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($t95->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sample_std_dev_150->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sqrt_total_values->cmp(tolType=>'absolute',tolerance=>.0001), 3);
WEIGHTED_ANS($mean_150->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($t95->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sample_std_dev_150->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sqrt_total_values->cmp(tolType=>'absolute',tolerance=>.0001), 3);

WEIGHTED_ANS($mean_4000->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($mean_150->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sample_std_dev_4000->cmp(tolType=>'absolute',tolerance=>0), 2);
WEIGHTED_ANS($sample_std_dev_150->cmp(tolType=>'absolute',tolerance=>0), 2);

WEIGHTED_ANS($conf95_int_low_4000->cmp(tolType=>'absolute',tolerance=>.02), 11);
WEIGHTED_ANS($conf95_int_high_4000->cmp(tolType=>'absolute',tolerance=>.02), 11);

WEIGHTED_ANS($conf95_int_low_150->cmp(tolType=>'absolute',tolerance=>.02), 11);
WEIGHTED_ANS($conf95_int_high_150->cmp(tolType=>'absolute',tolerance=>.02), 11);

WEIGHTED_ANS( radio_cmp( $radio_conf_int->correct_ans() ), 12 );

ENDDOCUMENT();