// generate_data.c
// compile with: gcc -o generate_data generate_data.c -std=c99 -lm

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define DATASET_SIZE 1000

double dist(double *a, double *b)
{
  double s = 0.0;
  for (int i = 0; i < 5; i++)
    s += (a[i] - b[i]) * (a[i] - b[i]); 
  return sqrt(s);
}

/* generates DATASET_SIZE points in 5-dimensional space - they are uniformly
 * sampled around 6 points specified bellow, points are chosen so that at least
 * three of the * "spheres" around them will lie approximaetly on a line, but 
 * the central being classified with an opposite polarity - thus creating 
 * a non-separable set of points */
int main()
{
  double b[6][5] = { 1, 1, 1, 1, 1,
	             3, 3, 3, 3, 3,
		     3, -1, 2.8, -1.4, 2,
		     2.9, 1, 2.9, 0.8, 2.6,
		     -1, -0.9, -2.1, -4, 2,
		     2.1, -2.3, 0.3, -1.2, 0.4 };
  double *points[6];

  for (int i = 0; i < 6; i++)
    points[i] = b[i];
 
  /* print distances of the 6 points... */
  //for (int i = 0; i < 6; i++)
  //  for (int j = 0; j < 6; j++)
  //    printf("d(%d,%d) = %.2f\n", i, j, dist(points[i], points[j]));

  /* generate and print the dataset */
  for (int i = 0; i < DATASET_SIZE; i++)
  {
    int bi = (rand() % 6);
    for (int j = 0; j < 5; j++)
      printf("%.5f ", b[bi][j] - 1.2 + rand() / (double)RAND_MAX * 2.4);
    printf("%d\n", (bi < 3)?1:(-1));
  }

  return 0;
}

