#!/usr/local/bin/kermit + # # s t a t i s t i c s # # Given a file in which each line contains a pair of numbers, X and Y, # computes and prints the max, min, mean, variance, and standard deviation of # the X's and Y's, and the correlation coefficient of X and Y. The numbers in # file may (but need not) have decimal points and fractional parts. # # Usage: statistics # Requires: C-Kermit 7.0 or later. # Illustrates: Floating-point computation and formatting, file i/o. # # Note that floating-point arithmetic, unlike integer arithmetic, is done with # function calls. The floating point function names all begin with "ffp". # Also note the use of \fcontent() in the FOPEN statement; this to allow for # DOS filenames with backslash path-separators. # # Author: F. da Cruz, The Kermit Project, Columbia University, July 1999. # Version 2: Adds IF FLOAT checking. # if < \v(argc) 2 exit 1 Usage: \%0 filename ; A filename is required ; Output function for X and Y statistics. ; Args: Label, X value, Y value. ; define xxout { echo {\frpad(\%1:,16)\flpad(\ffprou(\%2,2),10)\%9\flpad(\ffprou(\%3,2),10)} } fopen /read \%c \fcontents(\%1) ; Open the data file if fail exit 1 ; Check .\%n = 0 ; Number of X,Y pairs .x:sum = 0 ; Initialize sums .y:sum = 0 ; (not strictly necessary) .x:sum2 = 0 .y:sum2 = 0 .xy:sum = 0 while not \f_eof(\%c) { ; Until end of input file... fread \%c line ; Read a line (record) if fail break ; Check if not def line continue ; Ignore empty lines .\%x := \fword(\m(line),1,{ }) ; Extract X and Y from the record .\%y := \fword(\m(line),2,{ }) if ( not ( float \%x && float \%y ) ) exit 1 Bad record: "\m(line)" .x:sum := \ffpadd(\m(x:sum),\%x) ; Get sums and check validity .y:sum := \ffpadd(\m(y:sum),\%y) if = \%n 0 { .x:max := \%x, .x:min := \%x, .y:max := \%y, .y:min := \%y } incr \%n ; Count this record .x:max := \ffpmax(\m(x:max),\%x) ; Accumulate max, min, other stats .x:min := \ffpmin(\m(x:min),\%x) .y:max := \ffpmax(\m(y:max),\%y) .y:min := \ffpmin(\m(y:min),\%y) .x:sum2 := \ffpadd(\m(x:sum2),\ffpmult(\%x,\%x)) ; Sum of squares (X) .y:sum2 := \ffpadd(\m(y:sum2),\ffpmult(\%y,\%y)) ; Sum of squares (Y) .xy:sum := \ffpadd(\m(xy:sum),\ffpmult(\%x,\%y)) ; Sum of products (XY) } fclose \%c ; Finished reading data - close file echo echo Records: \%n if < \%n 2 exit 1 {Sorry, at least two data pairs are required.} ; Compute means, intermediate quantities, variances. ; .x:mean := \ffpdiv(\m(x:sum),\%n) .y:mean := \ffpdiv(\m(y:sum),\%n) .x:ss := \ffpsub(\m(x:sum2),\ffpdiv(\ffpmult(\m(x:sum),\m(x:sum)),\%n)) .y:ss := \ffpsub(\m(y:sum2),\ffpdiv(\ffpmult(\m(y:sum),\m(y:sum)),\%n)) .xy:ss := \ffpsub(\m(xy:sum),\ffpdiv(\ffpmult(\m(x:sum),\m(y:sum)),\%n)) .x:var := \ffpdiv(\m(x:ss),\%n) .y:var := \ffpdiv(\m(y:ss),\%n) ; Print the results echo { X Y} xxout Sum \m(x:sum) \m(y:sum) xxout Minimum \m(x:min) \m(y:min) xxout Maximum \m(x:max) \m(y:max) xxout Mean \m(x:mean) \m(y:mean) xxout Variance \m(x:var) \m(y:var) xxout {Std deviation} \ffpsqrt(\m(x:var)) \ffpsqrt(\m(y:var)) echo echo Correlation coefficient: - \ffpround(\ffpdiv(\m(xy:ss),\ffpsqrt(\ffpmul(\m(x:ss),\m(y:ss)))),2) exit 0