// generate_data_pca.c
// compile with: gcc -o generate_data_pca generate_data_pca.c -std=c99 -lm

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define DATASET_SIZE 1000
#define DIMENSIONS 20

/* generates DATASET_SIZE points in DIMENSIONS-dimensional space.
 * they are formed along DIMENSIONS/2 principal directions in this space,
 * each dimension having different amplitude, and all directions being 
 * perpendicular to each other. */

double vects[DIMENSIONS / 2][DIMENSIONS];
double origin[DIMENSIONS];

#define NSUM 25

// simple way to sample normal distribution
double gaussrand()
{
	double x = 0;
	int i;
	for(i = 0; i < NSUM; i++)
		x += (double)rand() / RAND_MAX;

	x -= NSUM / 2.0;
	x /= sqrt(NSUM / 12.0);

	return x;
}

int main()
{

  // generate center of the cloud
  for (int j = 0; j < DIMENSIONS; j++)
      origin[j] = -5.357592 + (rand() % 10000) / 1000.0;
  // generate the main principal vector
  for (int j = 0; j < DIMENSIONS; j++)
      vects[0][j] = -5.12345 + (rand() % 1000) / 100.0;

  // generate the remaining DIMENSIONS / 2 - 1 principal vectors (they are quite simple, 0 in all except of two dimensions */
  for (int i = 1; i < DIMENSIONS / 2; i++)
  {
    for (int j = 0; j < DIMENSIONS; j++) vects[i][j] = 0.0;
    double quot = 0.98765 + (rand() % 10000) / 1000.0;   // scaling coefficient for the non-zero dimensions
    vects[i][i * 2] = -quot * vects[0][i * 2 + 1];       
    vects[i][i * 2 + 1] = quot * vects[0][i * 2];
  }

  // generate and print the dataset 
  for (int i = 0; i < DATASET_SIZE; i++)
  { 
    // generate next point
    double point[DIMENSIONS];
    for (int j = 0; j < DIMENSIONS; j++) point[j] = origin[j];  // point starts from the center of cloud
    for (int j = 0; j < DIMENSIONS / 2; j++) 
    {
       double delta = gaussrand() * (DIMENSIONS * 2 - j * 4);   // and is shifted always by delta_j * vect[j]
       for (int k = 0; k < DIMENSIONS; k++)
         point[k] += delta * vects[j][k];
    }

    // and print it
    for (int j = 0; j < DIMENSIONS; j++)
       printf("%f%c", point[j], (j < DIMENSIONS - 1)?' ':'\n');
  }
  return 0;
}

