########################################################################
## DESCRIPTION
## A WeBWorK problem that asks students to conduct a chi square hypothesis 
## test for independence.
## WeBWorK problem written by JoAnne Taormina, <joanne(dot)taormina(at)ncc(dot)edu>
## ENDDESCRIPTION
##
## KEYWORDS('hypothesis test, 'chi-square', 'independence', 'p-value')
##
## Author('JoAnne Taormina')
## Institution('Nassau Community College')
########################################################################

DOCUMENT();        # This should be the first executable line in the problem.

loadMacros(
"PG.pl",
"PGbasicmacros.pl",
"PGchoicemacros.pl",
"PGanswermacros.pl",
"PGstatisticsmacros.pl",
"PGnumericalmacros.pl",
"weightedGrader.pl",
"PGstandard.pl",
"MathObjects.pl",
"parserPopUp.pl",
);
install_weighted_grader();

TEXT(beginproblem());
$showPartialCorrectAnswers = 1;

@observed = ();

$observed[0] = random(50,55,1);
$observed[1] = random(30,35,1);
$observed[2] = random(25, 30, 1);
$observed[3] = random(45, 50, 1);
$observed[4] = random(25,30,1);
$observed[5] = random(45,50,1);

# the table data
$table_start = begintable(5);
$table_row[0] = row("    ", "   ", "Time spent on SmileBook");
$table_row[1]  = row( "    ", "    ","1-30 minutes/day", "31-60 minutes/day", "more than 1 hour/day");
$table_row[2]  = row("Gender",  "Males","$observed[0]", "$observed[1]", "$observed[2]");
$table_row[3]  = row( "    ", "Females","$observed[3]", "$observed[4]", "$observed[5]");
$table_end   = endtable();

$row1_total = $observed[0] + $observed[1] + $observed[2];
$row2_total = $observed[3] + $observed[4] + $observed[5];

$column1_total = $observed[0] + $observed[3];
$column2_total = $observed[1] + $observed[4];
$column3_total = $observed[2] + $observed[5];

$n = $row1_total + $row2_total;

@expected = ();

$expected[0] = ($row1_total * $column1_total)/$n;
$expected[1] = ($row1_total * $column2_total)/$n;
$expected[2] = ($row1_total * $column3_total)/$n;
$expected[3] = ($row2_total * $column1_total)/$n;
$expected[4] = ($row2_total * $column2_total)/$n;
$expected[5] = ($row2_total * $column3_total)/$n;

for($i = 0; $i<6; $i++)
{
  $chi_square += (($observed[$i] - $expected[$i])**2)/$expected[$i];  
}

$df = 2;

$alpha = 5;

@choices = ("population mean", "sample mean", "population proportion", "sample proportion", "population classifications", "sample classifications");
@choices = @choices[shuffle(6)];
$popup_H0_classifications = PopUp(["Choose:",$choices[0], $choices[1], $choices[2], $choices[3],$choices[4],$choices[5]], "population classifications");

@choices = ("are dependent", "are independent", "have a correlation", "have no correlation");
@choices = @choices[shuffle(4)];
$popup_H0_independence = PopUp(["Choose:",$choices[0], $choices[1], $choices[2], $choices[3]], "are independent");

@choices = ("population mean", "sample mean", "population proportion", "sample proportion", "population classifications", "sample classifications");
@choices = @choices[shuffle(6)];
$popup_Ha_classifications = PopUp(["Choose:",$choices[0], $choices[1], $choices[2], $choices[3],$choices[4],$choices[5]], "population classifications");

@choices = ("are dependent", "are independent", "have a correlation", "have no correlation");
@choices = @choices[shuffle(4)];
$popup_Ha_dependence = PopUp(["Choose:",$choices[0], $choices[1], $choices[2], $choices[3]], "are dependent");

# set up for a multiple choice problem.
$radio_dist = new_multiple_choice();
$radio_dist->qa("Choose the correct distribution", "Chi-Square Distribution");
$radio_dist->extra("Normal Distribution", "T-Distribution");

@choices = ("less than or equal to 0.05", "greater than or equal to 0.05", "less than or equal to 0.0005", "greater than or equal to 0.0005","equal to 0.05", "equal to 0.0005");
@perm = shuffle(6);
@choices = @choices[@perm];
$popup_p_value = PopUp(["Choose:",$choices[0], $choices[1], $choices[2], $choices[3], $choices[4], $choices[5]],"less than or equal to 0.05");

# set up for a multiple choice problem.
$radio_samp_stat = new_multiple_choice();
$radio_samp_stat->qa("Symbol of the sample statistic:", "\( \chi^2 \)" );
$radio_samp_stat->extra("\( \bar{x}_2-\bar{x}_1 \)", "\( \mu_{\bar{x}} \)", "\( \bar{x} \)", "\( \mu_{\bar{x}_1-\bar{x}_2} \)", "\( \bar{x}_1-\bar{x}_2 \)");

$p_value=chisqrprob($df, $chi_square);

$p_value = int(10000*$p_value+.5*($p_value <=> 0))/10000;
$chi_square = int(100*$chi_square+.5*($chi_square <=> 0))/100;
$sample_stat = $chi_square;

$alpha = Compute($alpha);
$p_value = Compute($p_value);
$sample_stat = Compute($sample_stat);
$df = Compute($df);
for($i = 0; $i<6; $i++)
{
  $expected[$i] = int(100*$expected[$i]+.5*($expected[$i] <=> 0))/100;
  $expected[$i] = Compute($expected[$i]);
}
@choices = ("Reject Ho and accept Ha", "Reject Ha and accept Ho", "Fail to reject Ho", "Fail to reject Ha");
@choices = @choices[shuffle(4)];
if($p_value <= .05)
{
  $correct_h = "Reject Ho and accept Ha";
  $correct_i = "Yes, because there is a statistically significant relationship between gender and time spent on Smilebook.";
  $extra_i[0] = "No, because there is not a statistically significant relationship between gender and time spent on Smilebook.";
}
else
{
  $correct_h = "Fail to reject Ho";
  $correct_i = "No, because there is not a statistically significant relationship between gender and time spent on Smilebook.";
  $extra_i[0] = "Yes, because there is a statistically significant relationship between gender and time spent on Smilebook."; 
}
$popup_conclusion = PopUp(["Choose:",$choices[0], $choices[1], $choices[2], $choices[3]],$correct_h);

# set up for a multiple choice problem.
$radio_answer = new_multiple_choice();
$radio_answer->qa("Is gender related to time (minutes per day) spent on a Smilebook account?", $correct_i);
$extra_i[1] = "Yes, because there is not a statistically significant relationship between gender and time spent on Smilebook.";
$extra_i[2] = "No, because there is a statistically significant relationship between gender and time spent on Smilebook.";
$radio_answer->extra($extra_i[0], $extra_i[1],  $extra_i[2]);

# set up for a multiple choice problem.
$radio_type1_error = new_multiple_choice();
$radio_type1_error->qa("If a type I error were made in this test, it would mean", "gender and time spent on Smilebook are unrelated, but we incorrectly rejected the null hypothesis.");
$radio_type1_error->extra("gender and time spent on Smilebook are related, but we incorrectly rejected the null hypothesis.", "gender and time spent on Smilebook are unrelated, but we incorrectly failed to reject the null hypothesis.", "gender and time spent on Smilebook are related, but we incorrectly failed to reject the null hypothesis.");

# set up for a multiple choice problem.
$radio_type2_error = new_multiple_choice();
$radio_type2_error->qa("If a type II error were made in this test, it would mean", "gender and time spent on Smilebook are related, but we incorrectly failed to reject the null hypothesis.");
$radio_type2_error->extra("gender and time spent on Smilebook are unrelated, but we incorrectly rejected the null hypothesis.", "gender and time spent on Smilebook are related, but we incorrectly rejected the null hypothesis.", "gender and time spent on Smilebook are unrelated, but we incorrectly failed to reject the null hypothesis.");
	   
BEGIN_TEXT
Smilebook, a social networking site on the Internet, would like to determine if gender is related to number of minutes per day spent on a Smilebook account. A random sample of \($n \) users was taken with the following results:$BR$BR
$table_start
$table_row[0]
$table_row[1]
$table_row[2]
$table_row[3]
$table_row[4]
$table_row[5]
$table_end
$BR$BR

Conduct a hypothesis test based on this sample data, with \( \alpha = $alpha%\).  Based on the sample data can Smilebook conclude that gender is related to time (minutes per day) spent on a Smilebook account? 
$PAR
$BBOLD (a) $EBOLD State the hypotheses: $BR $BR
\(H_0:\) $SPACE The \{ $popup_H0_classifications->menu() \} $SPACE time spent on Smilebook and gender \{ $popup_H0_independence->menu() \}.  
$BR $BR
\(H_a:\) $SPACE The \{ $popup_Ha_classifications->menu() \} $SPACE time spent on Smilebook and gender \{ $popup_Ha_dependence->menu() \}.  
$BR $BR

$BBOLD (b) $EBOLD 
\{ $radio_dist->print_q() \}
\{ $radio_dist->print_a() \} $BR 

$BBOLD (c) $EBOLD  State the decision rule: $BR $BR
Reject \(H_0\) at \( \alpha \)= \{ ans_rule(5) \}% if the p-value of the sample statistic is \{ $popup_p_value->menu() \}. $BR $BR

$BBOLD (d) Experiment $EBOLD $BR
\{ $radio_samp_stat->print_q() \}
\{ $radio_samp_stat->print_a() \} $BR 
Value of the sample statistic $BITALIC rounded to 2 decimal places$EITALIC: \{ ans_rule(7) \} $SPACE $BR$BR
P-value of the sample statistic $BITALIC rounded to 4 decimal places$EITALIC:  \{ ans_rule(5) \} $BR $BR

$BBOLD (e) Conclusion $EBOLD $BR
\{ $popup_conclusion->menu() \} at \( \alpha \)= \{ ans_rule(5) \}% $BR $BR

$BBOLD (f) Answer the question $EBOLD $BR
\{ $radio_answer->print_q() \}
\{ $radio_answer->print_a() \} $BR $BR

$BR$BR

END_TEXT
Context("Numeric");

WEIGHTED_ANS( $popup_H0_classifications->cmp(),4 );
WEIGHTED_ANS( $popup_H0_independence->cmp(),6 );
WEIGHTED_ANS( $popup_Ha_classifications->cmp(),4 );
WEIGHTED_ANS( $popup_Ha_dependence->cmp(),6 );

WEIGHTED_ANS( radio_cmp( $radio_dist->correct_ans() ), 4 );

WEIGHTED_ANS($alpha->cmp(tolType=>'absolute',tolerance=>0), 4);
WEIGHTED_ANS( $popup_p_value->cmp(),4 );

WEIGHTED_ANS( radio_cmp( $radio_samp_stat->correct_ans() ), 4 );
WEIGHTED_ANS($sample_stat->cmp(tolType=>'absolute',tolerance=>0), 15);
WEIGHTED_ANS($p_value->cmp(tolType=>'absolute',tolerance=>.0001), 15);

WEIGHTED_ANS( $popup_conclusion->cmp(),15 );
WEIGHTED_ANS($alpha->cmp(tolType=>'absolute',tolerance=>0), 4);

WEIGHTED_ANS( radio_cmp( $radio_answer->correct_ans() ), 15 );

ENDDOCUMENT();       # This should be the last executable line in the problem.