/* * readdata.cpp, jonathan bober , * anyone is free to modify this program in anyway they want, and to do * whatever they want with it, so long as they do not claim that shoddy * modifications were made by me. * if someone uses this for something useful, i would be happy to know. * 12/2002 */ //this program reads in a file containing a list of primes //and the longest increasing subsequences of the permutations //they define. it may also read in a list if numbers and //means about those numbers, or can be modified to do various //other things. //mostly, this is a collection of functions that are used in different //ways depending on some simple modifications of the main() function #include #include #include #include using namespace std; const int top = 50000010; //the largest prime that will be read in int * len; //array that will contain the lengths that will be read in //takes a list of [sorted] numbers, (normally doubles) and creates a histogram //(probabiliy density graph) of them. doesnt create an actual graph - simply //outputs to standard out a list of x and y coordinates which can be used by //(for example) the graph program of gnu plotutils (search www.gnu.org for //plotutils for more info.) can be easily modified to produce a probability //distribution graph (not sure if i am using the right terms here.) template void hist(std::list input, int total = 0, double begin = -10, double inc = .02) { double area = 0; double ceiling = begin; //long total = input.count(); //long total = 0; if(total==0) { for(std::list::iterator i = input.begin(); i != input.end(); i++) total++; } long count = 0; T prev = begin; for(std::list::iterator i = input.begin(); i != input.end(); i++) { if(*i > ceiling && prev < ceiling) { std::cout << ceiling << " " << (double)count/(total*inc)<< std::endl; area += ((double)count/total); ceiling+=inc; count = 0; } else if (*i > ceiling) { while(*i > ceiling) ceiling+=inc; std::cout << ceiling - inc << " " << 0 << std::endl; } prev = *i; count++; } cerr << area << endl; } //gets the mean of the lengths in an interval centered at N double getmean(int N) { double l = log10((double)N); int A = (int)(300*(l * l)); // this is the interval length, which // can be changed easily int count = 0; int total = 0; for(int i = N - A; i < N + A; i++) { if(0 <= i && i <= top) { if(len[i] != -1) { count++; total += len[i]; } } } cout << count << " "; return total/(double)count; } //builds a list of data from the len array. take an interval //centered at N and builds a list of doubles, normalized somehow, //depending on how the code is modified. void buildList(int N, double mean, list * o) { double l = log10((double)N); int A = (int)(300*(l*l*l)); double two_twothird = pow(2, 2.0/3.0); double N_onesixth = pow(N, 1.0/6.0); double temp = two_twothird*N_onesixth; for(int i = N - A; i < N + A; i++) { if(0 <= i && i <= top) { if(len[i] != -1) { // cout << len[i] - mean << endl; o->push_back(((double)len[i] - (mean - 2 * sqrt(N) + 2 * sqrt(i)))/temp); //this can be modified to do various things //one modification simlpy inserts the lengths //and later the mean and variance of the list //is found and normalization uses this. } } } o->sort(); //code can be inserted here to normalize the list to have //mean 0 and standard deviation 1 } //prints out a list of a few stats of the distribution in the list o //can (and normally should) be modified to return these stats instead //of printing them out. void getStats(list * o) { int count = 0; double total = 0; double mean = 0; double mean_squared = 0; for(list::iterator i = o->begin(); i!=o->end(); i++) { count++; mean += *i; mean_squared += (*i) * (*i); } mean = mean / count; mean_squared = mean_squared / count; double variance = mean_squared - mean*mean; double stddev = sqrt(variance); cout << "mean: " << mean << endl; cout << "variance: " << variance << endl; cout << "standard deviation: " << stddev << endl; } //use depends on how this is put together. //currently, this function reads in a list of primes and lengths and then //computes some means and prints out some stuff about the lengths. it is //not too useful in its exact form. // //a good use modification i have made is to use this file to make a file //full of values for means, and then read this file in with the file //full of data, as computing many means can be time consuming. int main(int argc, char * argv[]) { len = new int[top]; for(int i = 0; i < top; i++) { len[i] = -1; } ifstream in(argv[1]); int prime; int curlen; in >> prime; in >> curlen; while(!in.eof()) { len[prime] = curlen; in >> prime; in >> curlen; } cerr << "got data" << endl; cout.precision(10); int N = 3000000; double mean = getmean(N); // for(int N = 1000000; N < 4500000; N+=10) cout << N << (char)9 << mean << (char)9 << 2 * sqrt(N) << endl; list l; buildList(N, mean, &l); for(list::iterator i = l.begin(); i != l.end(); i++) cout << *i << endl; getStats(&l); hist(l); return 0; }