/* ------------------------------------------------------------------------- // Figure 10.20: Linear regression. // Figure 10.21: The get_data() function. // Figure 10.22: The regression function: Calculate the regression line. // Figure 10.18: Function average(), modified. // Figure 10.23: The sq_sum() function. // ------------------------------------------------------------------------- // This program produces a least-squares fit of a line to a // collection of data values. The user enters the data items // at the keyboard as pairs of numbers representing the data // values. The program calculates the slope, a, and intercept, // b, of the least-squares fit line. It also returns the sum // of the deviations between the line and the data values. // ------------------------------------------------------------------------- */ #include "tools.h" #define NMAX 100 /* maximum number of data values */ /* Prototypes ------------------------------------------------------------ */ int get_data( double t[], double y[] ); void regression( double t[], double y[], int n ); double average( double x[], int n ); double sq_sum( double a[], double b[], int n, double a_mean, double b_mean ); void main( void ) { double t[NMAX], y[NMAX]; /* parallel arrays of data values. */ int n; /* actual number of values */ banner(); puts( "\n Least squares fit of straight line\n" ); n = get_data( t, y ); /* Read data values */ regression( t, y, n ); /* Compute regression line, print answers. */ bye(); } /* ---------------------------------------------------------------------- // Read in data value pairs from the keyboard until sentinel input. */ int get_data( double t[], double y[] ) { int k; /* Number of points to be fit. */ puts(" Please enter data values when prompted, (-1, -1) to quit.\n" ); for (k = 0; k < NMAX; k++) { printf( "\tEnter (t[%i] y[%i]): ", k, k ); /* Enter data pair */ scanf( "%lg%lg", &t[k], &y[k] ); if (t[k]== -1 && y[k] == -1) break; /* quit for sentinel */ } return k; /* Return actual number of data items */ } /* -------------------------------------------------------------------------- // The following routine computes the slope and intercept of the // regression line and the correlation coefficient between that line // and the given values and prints these values. */ void regression( double t[], double y[], int n ) { int k; /* loop counter */ double t_mean, y_mean; /* means of data values */ double tt, ty, yy; /* sums of t*t, t*y, y*y */ double slope, intercept; /* line slope, intercept, and */ double correlation; /* correlation coefficient */ t_mean = average( t, n ); /* Compute means of coordinate values */ y_mean = average( y, n ); tt = sq_sum( t, t, n, t_mean, t_mean ); ty = sq_sum( t, y, n, t_mean, y_mean ); yy = sq_sum( y, y, n, y_mean, y_mean ); slope = ty / tt; /* Compute best-fit line parameters */ intercept = y_mean - slope * t_mean; correlation = ty / sqrt( tt * yy ); puts( "\n For the following data values" ); for (k = 0; k < n; k++) { /* Echo print the data */ printf( "\tData[%2i]: t = %.2f, y= %.2f \n", k, t[k], y[k] ); } printf( "\n The least squares equation is: y = %.2f * t + %.2f \n", slope, intercept ); printf( " The correlation coefficient is: r = %.3f \n", correlation ); } /* ----------------------------------------------------------------- // Given an array of values, print the values and calculate the mean. */ double average ( double x[], int n ) { double sum; int k; /* Loop counter and array subscript. */ for (sum = k = 0; k < n; ++k) { sum += x[k]; } return sum / n; } /* ----------------------------------------------------------------- // Compute the sum of the products of the differences from means. */ double sq_sum( double a[], double b[], int n, double a_mean, double b_mean ) { double sum; /* Accumulator for final answer. */ int k; /* Loop counter. */ for (sum = k = 0; k < n; k++) sum += (a[k] - a_mean) * (b[k] - b_mean); return sum; }