######################################################################## ## DESCRIPTION ## A WeBWorK problem that asks questions about the correlation between two sets of ## data values. ## WeBWorK problem written by JoAnne Taormina, ## ENDDESCRIPTION ## ## KEYWORDS('linear regression', 'correlation', 'coefficient of determination', 'correlation coefficient', 'scatter diagram') ## ## Author('JoAnne Taormina') ## Institution('Nassau Community College') ######################################################################## DOCUMENT(); loadMacros( "PGstandard.pl", "MathObjects.pl", "PGgraphmacros.pl", "weightedGrader.pl", "PGchoicemacros.pl", "unionTables.pl", ); install_weighted_grader(); # make sure we're in the context we want Context("Numeric"); # the data; x is hours watching TV, y is GPA @datax = (); @datay = (); # create the first 4 data values for($i = 0; $i < 4; $i ++) { $datax[$i] = random(2, 12, 1); $datay[$i] = random(2.9, 4.0, .1); $mockdatax[$i] = random(2, 12, 1); $mockdatay[$i] = random(2.9, 4.0, .1); } # create the second 4 data values for($i = 4; $i < 8; $i ++) { $datax[$i] = random(10, 18, 1); $datay[$i] = random(2.2, 3.2, .1); $mockdatax[$i] = random(10, 18, 1); $mockdatay[$i] = random(2.2, 3.2, .1); } # create the third 4 data values for($i = 8; $i < 12; $i ++) { $datax[$i] = random(16, 25, 1); $datay[$i] = random(1.0, 2.5, .1); $mockdatax[$i] = random(16, 25, 1); $mockdatay[$i] = random(1.0, 2.5, .1); } $n = 12; $sum_x = 0; $sum_x2 = 0; $sum_y = 0; $sum_y2 = 0; $sum_xy = 0; for ($i = 0; $i<12; $i++) { $sum_x += $datax[$i]; $sum_y += $datay[$i]; $sum_x2 += $datax[$i]**2; $sum_y2 += $datay[$i]**2; $sum_xy += $datay[$i]*$datax[$i]; } $r = ($n*$sum_xy - $sum_x*$sum_y)/(sqrt($n*$sum_x2-$sum_x**2)*sqrt($n*$sum_y2-$sum_y**2)); $r_squared = $r**2; $a = ($n*$sum_xy - $sum_x*$sum_y)/($n*$sum_x2-$sum_x**2); $b = ($sum_y-$a*$sum_x)/$n; $a = int(100*$a+.5*($a <=> 0))/100; $b = int(100*$b+.5*($b <=> 0))/100; $r = int(100*$r+.5*($r <=> 0))/100; $r_squared = int(100*$r_squared+.5*($r_squared <=> 0))/100; # set up for a multiple choice problem. $radio_correlation_coefficient = new_multiple_choice(); $radio_correlation_coefficient->qa("The correct symbol for the correlation coefficient found in part (a) is ", "\( r \)"); $radio_correlation_coefficient->extra("\( \sigma \)", "\( \rho \)", "\( \bar{x} \)", "\( r^2 \)"); # set up for a multiple choice problem. $radio_coefficient_det = new_multiple_choice(); $radio_coefficient_det->qa("The correct symbol for the coefficient of determination found in part (c) is ", "\( r^2 \)"); $radio_coefficient_det->extra("\( \sigma \)", "\( \rho \)", "\( \bar{x} \)", "\( r \)"); $random_coef_det = random(.50, .70, .01); $random_coef_det_squared = $random_coef_det**2; $random_coef_det_squared = int(100*$random_coef_det_squared+.5*($random_coef_det_squared <=> 0))/100; $random_coef_det_percent = $random_coef_det*100; $random_coef_det_squared_percent = $random_coef_det_squared*100; $random_coef_det_percent_remaining = 100 - $random_coef_det_percent; $random_coef_det_squared_percent_remaining = 100 - $random_coef_det_squared_percent; # set up for a multiple choice problem. $coef_det_interpretation = new_multiple_choice(); $coef_det_interpretation->qa("Suppose that the coefficient of determination for another sample was found to be $random_coef_det. How should the meaning be interpreted?", "$random_coef_det_percent% of the variation in a student's GPA is influenced by the number of hours the student watches television. The remaining $random_coef_det_percent_remaining% is due to unexplained factors outside of the context of this problem."); $coef_det_interpretation->extra( "$random_coef_det_percent% of the variation in the number of hours a student watches television is influenced by the student's GPA. The remaining $random_coef_det_percent_remaining% is due to unexplained factors outside of the context of this problem.", "$random_coef_det_squared_percent% of the variation in a student's GPA is influenced by the number of hours the student watches television. The remaining $random_coef_det_squared_percent_remaining% is due to unexplained factors outside of the context of this problem.", "$random_coef_det_squared_percent% of the variation in the number of hours a student watches television is influenced by the student's GPA. The remaining $random_coef_det_squared_percent_remaining% is due to unexplained factors outside of the context of this problem."); $random_hours= random(5, 12, 1); $calc_GPA = $a * $random_hours + $b; # round $calc_GPA to the nearest tenth $calc_GPA = int(10*$calc_GPA+.5*($calc_GPA <=> 0))/10; $random_hours1= random(13, 25, 1); $calc_GPA1 = $a * $random_hours1 + $b; # round $calc_GPA1 to the nearest tenth $calc_GPA1 = int(10*$calc_GPA1+.5*($calc_GPA1 <=> 0))/10; $r = Compute($r); $r_squared = Compute($r_squared); $a = Compute($a); $b = Compute($b); $calc_GPA = Compute($calc_GPA); $calc_GPA1 = Compute($calc_GPA1); $aplus1 = $a + .03; $aminus1 = $a-.03; $bplus1=$b+1; $bminus1=$b-1; $f[0] = $aplus1 . "x+" . $b . " for x in <-3,35> using color:black and weight:2"; $f[1] = $aminus1 . "x+" . $b . " for x in <-3,35> using color:black and weight:2"; $f[2] = $aminus1 . "x+" . $b . " for x in <-3,35> using color:black and weight:2"; $f[3] = $aplus1 . "x+" . $b . " for x in <-3,35> using color:black and weight:2"; $f[4] = $a . "x+" . $b . " for x in <-3,35> using color:black and weight:2"; $f[5] = $a . "x+" . $b . " for x in <-3,35> using color:black and weight:2"; for ($i = 0; $i <= 5; $i++){ $graph1[$i] = init_graph(-2,-.5,30,5,'axes'=>[0,0]); $graph1[$i]->lb('reset'); $graph1[$i]->lb(new Label(28,.4,"x",'black','right','top')); $graph1[$i]->lb(new Label(2,4.8,"y",'black','right','top')); $graph1[$i]->h_ticks(0,"black",2,4,6,8,10,12,14,16,18,20,22,24,26,28); $graph1[$i]->v_ticks(0,"black",.5,1,1.5,2,2.5,3,3.5,4,4.5); $graph1[$i]->lb(new Label(-1,1,1,'black','left','middle')); $graph1[$i]->lb(new Label(-1,2,2,'black','left','middle')); $graph1[$i]->lb(new Label(-1,3,3,'black','left','middle')); $graph1[$i]->lb(new Label(-1,4,4,'black','left','middle')); $graph1[$i]->lb(new Label(4,-.5,4,'black','center','bottom')); $graph1[$i]->lb(new Label(8,-.5,8,'black','center','bottom')); $graph1[$i]->lb(new Label(12,-.5,12,'black','center','bottom')); $graph1[$i]->lb(new Label(16,-.5,16,'black','center','bottom')); $graph1[$i]->lb(new Label(20,-.5,20,'black','center','bottom')); $graph1[$i]->lb(new Label(24,-.5,24,'black','center','bottom')); $graph1[$i]->lb(new Label(28,-.5,28,'black','center','bottom')); }; @points = (); for($i=0; $i<$n; $i++) { $points[$i]=closed_circle($datax[$i],$datay[$i],blue); } $graph1[1] -> stamps(@points); $graph1[3] -> stamps(@points); $graph1[5] -> stamps(@points); for($i=0; $i<$n; $i++) { $points[$i]=closed_circle($mockdatax[$i],$mockdatay[$i],blue); } $graph1[0] -> stamps(@points); $graph1[2] -> stamps(@points); $graph1[4] -> stamps(@points); plot_functions($graph1[0],$f[0]); plot_functions($graph1[1],$f[1]); plot_functions($graph1[2],$f[2]); plot_functions($graph1[3],$f[3]); plot_functions($graph1[4],$f[4]); plot_functions($graph1[5],$f[5]); $fig1[0] = image(insertGraph($graph1[0]),width =>250,height =>200,tex_size =>310); $fig1[1] = image(insertGraph($graph1[1]),width =>250,height =>200,tex_size =>310); $fig1[2] = image(insertGraph($graph1[2]),width =>250,height =>200,tex_size =>310); $fig1[3] = image(insertGraph($graph1[3]),width =>250,height =>200,tex_size =>310); $fig1[4] = image(insertGraph($graph1[4]),width =>250,height =>200,tex_size =>310); $fig1[5] = image(insertGraph($graph1[5]),width =>250,height =>200,tex_size =>310); @perm = shuffle(6); @inv = invert(@perm); @fig1 = @fig1[@perm]; @eqn = ("\( \ y = 2 f(x) \)", "\( \ y = 0.5 f(x) \)"); @letter = ("A", "B", "C", "D", "E","F"); $correct = $letter[$inv[5]]; TEXT(beginproblem()); Context()->texStrings; BEGIN_TEXT A sample of 12 students were asked how many hours they each watched television per week, x, and their GPA, y. $BR $BR $BCENTER \{ begintable(2) \} \{ row( "\(x : \)", "\($datax[0] \)", "\($datax[1] \)", "\($datax[2] \)", "\($datax[3] \)", "\($datax[4] \)", "\($datax[5] \)", "\($datax[6] \)", "\($datax[7] \)", "\($datax[8] \)", "\($datax[9] \)", "\($datax[10] \)", "\($datax[11] \)" ) \} \{ row( "\(y : \)", "\($datay[0] \)", "\($datay[1] \)", "\($datay[2] \)", "\($datay[3] \)", "\($datay[4] \)", "\($datay[5] \)", "\($datay[6] \)", "\($datay[7] \)", "\($datay[8] \)", "\($datay[9] \)", "\($datay[10] \)", "\($datay[11] \)" ) \} \{ endtable() \} $ECENTER $BR $BR (a) Find the correlation coefficient for the sample data. Round to the nearest hundredth (2 decimal places). $BR $BR $BCENTER \{ ans_rule(5) \} $BR $BR $ECENTER (b) \{ $radio_correlation_coefficient->print_q() \} \{ $radio_correlation_coefficient->print_a() \} $BR$BR (c) Find the coefficient of determination for the sample data. Round to the nearest hundredth (2 decimal places). $BR $BR $BCENTER \{ ans_rule(5) \} $BR $BR $ECENTER (d) \{ $radio_coefficient_det->print_q() \} \{ $radio_coefficient_det->print_a() \} $BR$BR (e) \{ $coef_det_interpretation->print_q() \} \{ $coef_det_interpretation->print_a() \} $BR$BR (f) Find the regression equation for the sample data in the form \(y' = ax + b \). Round $BITALIC a $EITALIC and $BITALIC b $EITALIC to the nearest hundredth (2 decimal places). $BR $BR $BCENTER y' = \{ ans_rule(5)\} x + \{ ans_rule(5) \} $BR $BR $ECENTER (g) Graph the scatter plot and the regression equation on your calcluator. Which of the graphs A-F matches your graph? \{ pop_up_list(["?","A","B","C","D","E","F"]) \} $BR $BCENTER (Click on a graph to enlarge it) $ECENTER $PAR \{ BeginTable(). AlignedRow([$fig1[0],$fig1[1],$fig1[2]]). TableSpace(5,0). AlignedRow(["A","B","C"]). TableSpace(25,6). AlignedRow([$fig1[3],$fig1[4],$fig1[5]]). TableSpace(5,0). AlignedRow(["D","E","F"]). EndTable(). $BR.$BCENTER."(Click on a graph to enlarge it)".$ECENTER \} $BR $BR (h) Suppose that a student watches television $random_hours hours per week. Use the regression equation found in part (f) to predict the student's GPA. Round GPA to the nearest tenth (1 decimal place). $BR $BR $BCENTER \{ ans_rule(5)\} $BR $BR $ECENTER (i) Suppose that a student watches television $random_hours1 hours per week. Use the regression equation found in part (f) to predict the student's GPA. Round GPA to the nearest tenth (1 decimal place). $BR $BR $BCENTER \{ ans_rule(5)\} $BR $BR $ECENTER END_TEXT Context("Numeric"); WEIGHTED_ANS( $r->cmp(tolType=>'absolute',tolerance=>0), 8 ); WEIGHTED_ANS( radio_cmp( $radio_correlation_coefficient->correct_ans() ), 4 ); WEIGHTED_ANS( $r_squared->cmp(tolType=>'absolute',tolerance=>0), 8 ); WEIGHTED_ANS( radio_cmp( $radio_coefficient_det->correct_ans() ), 4 ); WEIGHTED_ANS( radio_cmp( $coef_det_interpretation->correct_ans() ), 15 ); WEIGHTED_ANS( $a->cmp(tolType=>'absolute',tolerance=>0), 8 ); WEIGHTED_ANS( $b->cmp(tolType=>'absolute',tolerance=>0), 8 ); Context()->normalStrings; WEIGHTED_ANS(str_cmp( $correct ),15); Context("Numeric"); WEIGHTED_ANS( $calc_GPA->cmp(tolType=>'absolute',tolerance=>.11), 15 ); WEIGHTED_ANS( $calc_GPA1->cmp(tolType=>'absolute',tolerance=>.11), 15 ); ENDDOCUMENT();