#include #include #include #include struct regression { long long SX; long long SY; long long SXX; long long SYY; long long SXY; }; struct point { int x; int y; }; #include "linear_regression_sample_datasets.h" void print_linear_regression_results(struct regression* r, int nb_points) { printf("SX=%lld\n", r->SX); printf("SXX=%lld\n", r->SXX); printf("SY=%lld\n", r->SY); printf("SXY=%lld\n", r->SXY); double a, b; b = ((double)nb_points*r->SXY - r->SX*r->SY)/((double)nb_points*r->SXX - r->SX*r->SX); a = ((double)r->SY - b*r->SX)/nb_points; printf("\nEquation of best fit is: y = %0.4f + %0.4fx\n",a,b); } void linear_regression_update(struct regression* r, struct point* point) { r->SX += (long long) point->x; r->SXX += (long long) point->x * point->x; r->SY += (long long) point->y; r->SXY += (long long) point->x * point->y; } void linear_regression(struct point* points, int n) { struct regression r; r.SX = 0; r.SXX = 0; r.SY = 0; r.SYY = 0; r.SXY = 0; for(int i = 0; i < n; i++) { linear_regression_update(&r, &points[i]); } print_linear_regression_results(&r, n); } /* Read the points from f and fill the points array. * * @param f: the input file * @param points: the array to fill * @param nb_points_max: the size of the array * @return the number of points that were read */ int read_points(FILE* f, struct point* points, int nb_points_max) { /* TODO */ int i = 0; while(fscanf(f, "%d,%d", &points[i].x, &points[i].y)>0) { // printf("read (%d, %d)\n", points[i].x, points[i].y); i++; } return i; } /* Return the size of f */ size_t file_size(FILE* f) { fseek(f, 0, SEEK_END); size_t ret = ftell(f); fseek(f, 0, SEEK_SET); printf("file size: %lu\n", ret); return ret; } /* Read points from a file, and perform a linear regression */ void process_file(char* input_file) { /* TODO: open the file */ FILE* f = fopen(input_file, "r"); if(!f) { fprintf(stderr, "cannot open %s: %s\n", input_file, strerror(errno)); return; } /* load the points to the points array */ size_t size = file_size(f); // size in byte of the file /* each line contains at least 4 char. Let's allocation a buffer large enough to store all the points*/ size_t nb_points_max = size / 4; struct point * points = malloc(sizeof(struct point) * nb_points_max); int nb_points = read_points(f, points, nb_points_max); linear_regression(points, nb_points); fclose(f); } void process_sample_dataset() { struct point *points = dataset_500; int nb_points = 500; linear_regression(points, nb_points); } int main(int argc, char**argv) { if(argc < 2) { printf("usage: %s input_file\n", argv[0]); return EXIT_FAILURE; } /* Uncomment this line to enable loading the dataset from the header file instead of reading it from a file */ //process_sample_dataset(); process_file(argv[1]); return(0); }