From 75b47ec45d8076732b3f30c1b3ebc3d35e6ed7fe Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 16 Jan 2012 14:27:00 +0200 Subject: [PATCH] compositor: implement inverse matrix transformation Implement 4x4 matrix inversion based on LU-decomposition with partial pivoting. Instead of simply computing the inverse matrix explicitly, introduce the type struct weston_inverse_matrix for storing the LU-decomposition and the permutation from pivoting. Using doubles, this struct has greater precision than struct weston_matrix. If you need only few (less than 5, presumably) multiplications with the inverse matrix, is it cheaper to use weston_inverse_matrix, and not compute the inverse matrix explicitly into a weston_matrix. Signed-off-by: Pekka Paalanen --- src/compositor.c | 2 +- src/compositor.h | 2 +- src/matrix.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++- src/matrix.h | 11 ++++- 4 files changed, 133 insertions(+), 5 deletions(-) diff --git a/src/compositor.c b/src/compositor.c index 69ad60e4..cb19f342 100644 --- a/src/compositor.c +++ b/src/compositor.c @@ -545,7 +545,7 @@ static void weston_surface_update_transform(struct weston_surface *surface) { struct weston_matrix *matrix = &surface->transform.matrix; - struct weston_matrix *inverse = &surface->transform.inverse; + struct weston_inverse_matrix *inverse = &surface->transform.inverse; struct weston_transform *tform; if (!surface->transform.dirty) diff --git a/src/compositor.h b/src/compositor.h index 53126651..2064d676 100644 --- a/src/compositor.h +++ b/src/compositor.h @@ -230,7 +230,7 @@ struct weston_surface { /* derived state, set up by weston_surface_update_transform */ struct weston_matrix matrix; - struct weston_matrix inverse; + struct weston_inverse_matrix inverse; int enabled; } transform; diff --git a/src/matrix.c b/src/matrix.c index 941e9584..06cdc5e4 100644 --- a/src/matrix.c +++ b/src/matrix.c @@ -1,5 +1,6 @@ /* * Copyright © 2011 Intel Corporation + * Copyright © 2012 Collabora, Ltd. * * Permission to use, copy, modify, distribute, and sell this software and * its documentation for any purpose is hereby granted without fee, provided @@ -22,6 +23,7 @@ #include #include +#include #include #include @@ -101,9 +103,126 @@ weston_matrix_transform(struct weston_matrix *matrix, struct weston_vector *v) *v = t; } +static inline void +swap_rows(double *a, double *b) +{ + unsigned k; + double tmp; + + for (k = 0; k < 13; k += 4) { + tmp = a[k]; + a[k] = b[k]; + b[k] = tmp; + } +} + +static inline unsigned +find_pivot(double *column, unsigned k) +{ + unsigned p = k; + for (++k; k < 4; ++k) + if (fabs(column[p]) < fabs(column[k])) + p = k; + + return p; +} + +/* + * reference: Gene H. Golub and Charles F. van Loan. Matrix computations. + * 3rd ed. The Johns Hopkins University Press. 1996. + * LU decomposition, forward and back substitution: Chapter 3. + */ + WL_EXPORT int -weston_matrix_invert(struct weston_matrix *inverse, +weston_matrix_invert(struct weston_inverse_matrix *inverse, const struct weston_matrix *matrix) { - return -1; /* fail */ + double A[16]; + unsigned p[4] = { 0, 1, 2, 3 }; + unsigned i, j, k; + unsigned pivot; + double pv; + + for (i = 16; i--; ) + A[i] = matrix->d[i]; + + /* LU decomposition with partial pivoting */ + for (k = 0; k < 4; ++k) { + pivot = find_pivot(&A[k * 4], k); + if (pivot != k) { + unsigned tmp = p[k]; + p[k] = p[pivot]; + p[pivot] = tmp; + swap_rows(&A[k], &A[pivot]); + } + + pv = A[k * 4 + k]; + if (fabs(pv) < 1e-9) + return -1; /* zero pivot, not invertible */ + + for (i = k + 1; i < 4; ++i) { + A[i + k * 4] /= pv; + + for (j = k + 1; j < 4; ++j) + A[i + j * 4] -= A[i + k * 4] * A[k + j * 4]; + } + } + + memcpy(inverse->LU, A, sizeof(A)); + memcpy(inverse->p, p, sizeof(p)); + return 0; +} + +WL_EXPORT void +weston_matrix_inverse_transform(struct weston_inverse_matrix *inverse, + struct weston_vector *v) +{ + /* Solve A * x = v, when we have P * A = L * U. + * P * A * x = P * v => L * U * x = P * v + * Let U * x = b, then L * b = P * v. + */ + unsigned *p = inverse->p; + double *LU = inverse->LU; + double b[4]; + unsigned k, j; + + /* Forward substitution, column version, solves L * b = P * v */ + /* The diagonal of L is all ones, and not explicitly stored. */ + b[0] = v->f[p[0]]; + b[1] = (double)v->f[p[1]] - b[0] * LU[1 + 0 * 4]; + b[2] = (double)v->f[p[2]] - b[0] * LU[2 + 0 * 4]; + b[3] = (double)v->f[p[3]] - b[0] * LU[3 + 0 * 4]; + b[2] -= b[1] * LU[2 + 1 * 4]; + b[3] -= b[1] * LU[3 + 1 * 4]; + b[3] -= b[2] * LU[3 + 2 * 4]; + + /* backward substitution, column version, solves U * y = b */ +#if 1 + /* hand-unrolled, 25% faster for whole function */ + b[3] /= LU[3 + 3 * 4]; + b[0] -= b[3] * LU[0 + 3 * 4]; + b[1] -= b[3] * LU[1 + 3 * 4]; + b[2] -= b[3] * LU[2 + 3 * 4]; + + b[2] /= LU[2 + 2 * 4]; + b[0] -= b[2] * LU[0 + 2 * 4]; + b[1] -= b[2] * LU[1 + 2 * 4]; + + b[1] /= LU[1 + 1 * 4]; + b[0] -= b[1] * LU[0 + 1 * 4]; + + b[0] /= LU[0 + 0 * 4]; +#else + for (j = 3; j > 0; --j) { + b[j] /= LU[j + j * 4]; + for (k = 0; k < j; ++k) + b[k] -= b[j] * LU[k + j * 4]; + } + + b[0] /= LU[0 + 0 * 4]; +#endif + + /* the result */ + for (j = 0; j < 4; ++j) + v->f[j] = b[j]; } diff --git a/src/matrix.h b/src/matrix.h index 2c3285a6..21c1e785 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -1,5 +1,6 @@ /* * Copyright © 2008-2011 Kristian Høgsberg + * Copyright © 2012 Collabora, Ltd. * * Permission to use, copy, modify, distribute, and sell this software and * its documentation for any purpose is hereby granted without fee, provided @@ -27,6 +28,11 @@ struct weston_matrix { GLfloat d[16]; }; +struct weston_inverse_matrix { + double LU[16]; /* column-major */ + unsigned p[4]; /* permutation */ +}; + struct weston_vector { GLfloat f[4]; }; @@ -44,7 +50,10 @@ void weston_matrix_transform(struct weston_matrix *matrix, struct weston_vector *v); int -weston_matrix_invert(struct weston_matrix *inverse, +weston_matrix_invert(struct weston_inverse_matrix *inverse, const struct weston_matrix *matrix); +void +weston_matrix_inverse_transform(struct weston_inverse_matrix *inverse, + struct weston_vector *v); #endif /* WESTON_MATRIX_H */