C implementation of KLT

The committed files are a fully functioning implementation of the
Karhunen–Loève transform (KLT) as defined on Wikipedia, and verified
using multiple languages - Python, R and MATLAB. Note. It may be common
for libraries to call this algorithm 'covariance', 'eig' or similar to
distiguish from the SVD alogrithm or otherwise.

The source code is built entirely on the GNU GSL (2.7) library, to
utilise BLAS optimised functionality.
This commit is contained in:
Ethan Smith-Coss 2025-05-30 23:30:35 +01:00
commit 9fb5cefa08
Signed by: TheOnePath
GPG Key ID: 1D351CCC6D01F32B
8 changed files with 418 additions and 0 deletions

55
.gitignore vendored Normal file
View File

@ -0,0 +1,55 @@
# ---> C
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
build/

64
Makefile Normal file
View File

@ -0,0 +1,64 @@
CC = gcc
INCLUDE_DIR = ./include
SOURCE_DIR = ./src
CFLAGS += -I$(INCLUDE_DIR) -ansi -std=c99
LIBS += -lgsl -lopenblas -lm
# Debug flags and release flags for GCC
ifeq ($(BUILD),release)
CFLAGS += -O2 -s -DNDEBUG
_BUILD_DIR = ./build/release
_OBJECT_DIR = ./obj/release
else
CFLAGS += -O0 -g
_BUILD_DIR = ./build/debug
_OBJECT_DIR = ./obj/debug
endif
# Check for required directories for GCC outputs
ifeq ($(wildcard $(OBJECT_DIR)/.),)
$(shell mkdir -p $(OBJECT_DIR))
endif
# Valgrind specific flags for debugging help
VALGRIND_ARGS += -s --leak-check=full --track-origins=yes \
--show-leak-kinds=all
# Get the system architecture if not specified
ifeq ($(ARCH),)
ARCH = $(shell uname -m)
BUILD_DIR = $(addsuffix $(ARCH), $(_BUILD_DIR)/)
OBJECT_DIR = $(addsuffix $(ARCH), $(_OBJECT_DIR)/)
endif
# Create directories if they don't exist
ifeq ($(wildcard $(BUILD_DIR)/.),)
$(shell mkdir -p $(BUILD_DIR))
endif
ifeq ($(wildcard $(OBJECT_DIR)/.),)
$(shell mkdir -p $(OBJECT_DIR))
endif
# Binary name for executable to compile
BINARY_NAME = main
# Source list files
SRC = pca matio
OBJ = $(patsubst %,$(OBJECT_DIR)/%.o,$(SRC) $(BINARY_NAME))
$(OBJECT_DIR)/%.o: $(SOURCE_DIR)/%.c
$(CC) -c -o $@ $< $(CFLAGS)
$(BUILD_DIR)/$(BINARY_NAME).$(ARCH): $(OBJ)
gcc -std=c99 $(CFLAGS) $(OBJ) $(LIBS) -o $(BUILD_DIR)/$(BINARY_NAME).$(ARCH)
.PHONY : release
release:
$(MAKE) "BUILD=release"
.PHONY : clean
clean:
rm -rf $(OBJECT_DIR)/*.o build
.PHONY : valgrind
valgrind:
valgrind $(VALGRIND_ARGS) $(BUILD_DIR)/$(BINARY_NAME).$(ARCH)

4
compile_flags.txt Normal file
View File

@ -0,0 +1,4 @@
-Iinclude/
-ansi
-std=c99
-Wall

19
include/matio.h Normal file
View File

@ -0,0 +1,19 @@
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#ifndef MATIO_H
#define MATIO_H
void
print_matrix ( gsl_matrix *matrix );
void
print_matrix_complex ( gsl_matrix_complex *matrix );
void
print_vector ( gsl_vector *vec );
void
print_flatten ( gsl_matrix *matrix );
#endif

24
include/pca.h Normal file
View File

@ -0,0 +1,24 @@
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#ifndef PCA_H
#define PCA_H
#define PCA_AUTO_ALGORITHM -1
#define PCA_KLT_ALGORITHM 1
#define PCA_SVD_ALGORITHM 2
#define PCA_ALS_ALGORITHM 3
#define PCA_CE_MAGIC -1
int
pca (gsl_matrix *A, int nc, int algorithm, gsl_matrix *m);
int
_klt (gsl_matrix *A, gsl_vector *eval, gsl_matrix *m);
int
_internal_eigen_sign_correction (gsl_matrix *evec);
#endif

51
src/main.c Normal file
View File

@ -0,0 +1,51 @@
#include <stdio.h>
#include <stdlib.h>
/*** GSL library imports ***/
#include <gsl/gsl_errno.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_sort_vector.h>
#include <gsl/gsl_permute_vector.h>
#include <gsl/gsl_permutation.h>
#include "matio.h"
#include "pca.h"
#ifndef DISABLE_ERROR_HANDLER
#define DISABLE_ERROR_HANDLER 1
#endif
int main(int argc, char *argv[])
{
if (DISABLE_ERROR_HANDLER == 0)
(void)gsl_set_error_handler_off();
gsl_rng *rng = gsl_rng_alloc(gsl_rng_taus);
double sds[5] = { 5,2,10, 5, 3, };
int col, row = 0;
gsl_matrix *X = gsl_matrix_alloc(50, 5);
for (col = 0; col < X->size2; col++) {
for (row = 0; row < X->size1; row++) {
int num = (int)(gsl_ran_gaussian(rng, sds[col]) * 1e6);
(void)gsl_matrix_set(X, row, col, (double)(num / 1e6));
}
}
gsl_matrix *m_pca = gsl_matrix_alloc(X->size1, 2);
pca(X, PCA_CE_MAGIC, PCA_KLT_ALGORITHM, m_pca);
print_matrix(m_pca);
gsl_rng_free(rng);
gsl_matrix_free(X);
gsl_matrix_free(m_pca);
return EXIT_SUCCESS;
}

44
src/matio.c Normal file
View File

@ -0,0 +1,44 @@
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#include "matio.h"
void print_matrix ( gsl_matrix *matrix ) {
printf("\n");
for ( int i = 0 ; i < matrix->size1 ; i++ ) {
for ( int j = 0 ; j < matrix->size2 ; j++ ) {
printf(" %.6f ", gsl_matrix_get(matrix, i, j));
}
printf("\n");
}
printf("\n");
}
void print_matrix_complex ( gsl_matrix_complex *matrix ) {
printf("\n");
for ( int i = 0 ; i < matrix->size1 ; i++ ) {
for ( int j = 0 ; j < matrix->size2 ; j++ ) {
printf(" %.6g ", gsl_matrix_complex_get(matrix, i, j));
}
printf("\n");
}
printf("\n");
}
void print_vector ( gsl_vector *vec ) {
printf("\n");
for ( int i = 0 ; i < vec->size ; i++ ) {
printf(" %.6f ", gsl_vector_get(vec, i));
}
printf("\n");
}
void print_flatten ( gsl_matrix *matrix ) {
printf("\n");
for ( int i = 0 ; i < matrix->size1 ; i++ ) {
for (int j = 0; j < matrix->size2; j++) {
printf("%g,", gsl_matrix_get(matrix, i, j));
}
}
printf("\n");
}

157
src/pca.c Normal file
View File

@ -0,0 +1,157 @@
#include <gsl/gsl_vector_double.h>
#include <math.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_blas.h>
#include <stdio.h>
#include "pca.h"
int pca (gsl_matrix *A, int nc, int algorithm, gsl_matrix *m) {
int i, exit_status = 0;
gsl_vector *eigenvals = gsl_vector_alloc(A->size2);
gsl_matrix *components = gsl_matrix_alloc(A->size1, A->size2);
switch (algorithm) {
case PCA_KLT_ALGORITHM: {
exit_status = _klt(A, eigenvals, components);
break;
}
default: exit_status = -1;
}
// Eigenvector subset based on eigenvalue cumulative energy (min. 90%)
if (nc == PCA_CE_MAGIC) {
double ce = 0, gp = gsl_vector_sum(eigenvals);
for (i = 0; i < eigenvals->size; i++) {
ce += gsl_vector_get(eigenvals, i);
if (ce / gp >= 0.9) {
nc = i + 1;
break;
}
}
// nc is different and possibly m is no longer sized correctly
gsl_matrix_free(m);
m = gsl_matrix_alloc(A->size1, nc);
}
// select L first columns to reduce by (L < p)
gsl_vector *tmp = gsl_vector_alloc(m->size1);
for (i = 0; i < nc; i++) {
exit_status = gsl_matrix_get_col(tmp, components, i);
exit_status = gsl_matrix_set_col(m, i, tmp);
}
gsl_vector_free(tmp);
gsl_vector_free(eigenvals);
gsl_matrix_free(components);
return exit_status;
}
int _klt (gsl_matrix *A, gsl_vector *eval, gsl_matrix *m) {
int exit_status, row, col = 0;
int size = A->size2;
// Covariance matrix of the data
gsl_matrix *C = gsl_matrix_alloc(size, size);
exit_status = gsl_blas_dgemm(CblasTrans,
CblasNoTrans,
1.0,
A,
A,
0.0,
C);
// Divide covariance matrix by 1/N-1 (Bessel's Correction)
for (col = 0; col < C->size2; col++) {
for (row = 0; row < C->size1; row++) {
gsl_matrix_set(C,
row,
col,
gsl_matrix_get(C, row, col) / (A->size1 - 1));
}
}
// allocate workspace for eigenvalue and eigenvector computation
gsl_eigen_symmv_workspace *eigen_work = gsl_eigen_symmv_alloc(size);
// vector/matrix allocation for eigenvalue/eigenvector
gsl_matrix *V = gsl_matrix_alloc(size, size);
// compute the unordered eigenvalues and eigenvectors
exit_status = gsl_eigen_symmv(C,
eval,
V,
eigen_work);
// sort the D and V in descending order of magnitude
exit_status = gsl_eigen_symmv_sort(eval,
V,
GSL_EIGEN_SORT_ABS_DESC);
exit_status = _internal_eigen_sign_correction(V);
// Project new data onto the new basis
exit_status = gsl_blas_dgemm(CblasNoTrans,
CblasNoTrans,
1.0,
A,
V,
0.0,
m);
gsl_eigen_symmv_free(eigen_work);
gsl_matrix_free(V);
gsl_matrix_free(C);
return exit_status;
}
int _internal_eigen_sign_correction(gsl_matrix *evec) {
/* enforce eigenvector sign convention: the largest element in each column
* will have positive sign.
* @NOTE: This is not mentioned on the PCA Wiki page and it's not well
* documented. Found in the MATLAB code for `pca` as the final step.
* The sign of all the elements in the vector are flipped (vector points in
* the opposite direction).
* */
// argmax each column in the eigenvector matrix
size_t argmax;
double sign;
int size = evec->size2;
gsl_vector *cols = gsl_vector_alloc(size);
gsl_matrix *sign_map = gsl_matrix_alloc(size, size);
int col, row, j, exit_status = 0;
for (col = 0; col < evec->size2; col++) {
exit_status = gsl_matrix_get_col(cols, evec, col);
for (j = 0; j < cols->size; j++) {
gsl_vector_set(cols,
j,
fabs(gsl_vector_get(cols, j)));
}
argmax = gsl_vector_max_index(cols);
sign = gsl_matrix_get(evec, argmax, col) < 0 ? -1.0 : 1.0;
for (row = 0; row < sign_map->size1; row++) {
gsl_matrix_set(sign_map, row, col, sign);
}
}
exit_status = gsl_matrix_mul_elements(evec, sign_map);
gsl_vector_free(cols);
gsl_matrix_free(sign_map);
return exit_status;
}