At the heart of the method is the following basic recurrence.
If square
, which computes one step of
this basic recurrence. I'm not trying to do any clever blocking.
You may choose to be more clever in your assignment, but it is not
required.
The return value for square
is true if l
and lnew
are
identical, and false otherwise.
int square(int n, // Number of nodes
int* restrict l, // Partial distance at step s
int* restrict lnew) // Partial distance at step s+1
{
int done = 1;
#pragma omp parallel for shared(l, lnew) reduction(&& : done)
for (int j = 0; j < n; ++j) {
for (int i = 0; i < n; ++i) {
int lij = lnew[j*n+i];
for (int k = 0; k < n; ++k) {
int lik = l[k*n+i];
int lkj = l[j*n+k];
if (lik + lkj < lij) {
lij = lik+lkj;
done = 0;
}
}
lnew[j*n+i] = lij;
}
}
return done;
}
The value infinitize
and deinfinitize
convert back
and forth between the zero-for-no-edge and
static inline void infinitize(int n, int* l)
{
for (int i = 0; i < n*n; ++i)
if (l[i] == 0)
l[i] = n+1;
}
static inline void deinfinitize(int n, int* l)
{
for (int i = 0; i < n*n; ++i)
if (l[i] == n+1)
l[i] = 0;
}
Of course, any loop-free path in a graph with
The shortest_path
routine attempts to save a little bit of work
by only repeatedly squaring until two successive matrices are the
same (as indicated by the return value of the square
routine).
void shortest_paths(int n, int* restrict l)
{
// Generate l_{ij}^0 from adjacency matrix representation
infinitize(n, l);
for (int i = 0; i < n*n; i += n+1)
l[i] = 0;
// Repeated squaring until nothing changes
int* restrict lnew = (int*) calloc(n*n, sizeof(int));
memcpy(lnew, l, n*n * sizeof(int));
for (int done = 0; !done; ) {
done = square(n, l, lnew);
memcpy(l, lnew, n*n * sizeof(int));
}
free(lnew);
deinfinitize(n, l);
}
Of course, we need to run the shortest path algorithm on something!
For the sake of keeping things interesting, let's use a simple random graph
model to generate the input data. The
int* gen_graph(int n, double p)
{
int* l = calloc(n*n, sizeof(int));
struct mt19937p state;
sgenrand(10302011UL, &state);
for (int j = 0; j < n; ++j) {
for (int i = 0; i < n; ++i)
l[j*n+i] = (genrand(&state) < p);
l[j*n+j] = 0;
}
return l;
}
Simple tests are always useful when tuning code, so I have included
two of them. Since this computation doesn't involve floating point
arithmetic, we should get bitwise identical results from run to
run, even if we do optimizations that change the associativity of
our computations. The function fletcher16
computes a simple
simple checksum. over the output of the
shortest_paths
routine, which we can then use to quickly tell
whether something has gone wrong. The write_matrix
routine
actually writes out a text representation of the matrix, in case we
want to load it into MATLAB to compare results.
int fletcher16(int* data, int count)
{
int sum1 = 0;
int sum2 = 0;
for(int index = 0; index < count; ++index) {
sum1 = (sum1 + data[index]) % 255;
sum2 = (sum2 + sum1) % 255;
}
return (sum2 << 8) | sum1;
}
void write_matrix(const char* fname, int n, int* a)
{
FILE* fp = fopen(fname, "w+");
if (fp == NULL) {
fprintf(stderr, "Could not open output file: %s\n", fname);
exit(-1);
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j)
fprintf(fp, "%d ", a[j*n+i]);
fprintf(fp, "\n");
}
fclose(fp);
}
const char* usage =
"path.x -- Parallel all-pairs shortest path on a random graph\n"
"Flags:\n"
" - n -- number of nodes (200)\n"
" - p -- probability of including edges (0.05)\n"
" - i -- file name where adjacency matrix should be stored (none)\n"
" - o -- file name where output matrix should be stored (none)\n";
int main(int argc, char** argv)
{
int n = 200; // Number of nodes
double p = 0.05; // Edge probability
const char* ifname = NULL; // Adjacency matrix file name
const char* ofname = NULL; // Distance matrix file name
// Option processing
extern char* optarg;
const char* optstring = "hn:d:p:o:i:";
int c;
while ((c = getopt(argc, argv, optstring)) != -1) {
switch (c) {
case 'h':
fprintf(stderr, "%s", usage);
return -1;
case 'n': n = atoi(optarg); break;
case 'p': p = atof(optarg); break;
case 'o': ofname = optarg; break;
case 'i': ifname = optarg; break;
}
}
// Graph generation + output
int* l = gen_graph(n, p);
if (ifname)
write_matrix(ifname, n, l);
// Time the shortest paths code
double t0 = omp_get_wtime();
shortest_paths(n, l);
double t1 = omp_get_wtime();
printf("== OpenMP with %d threads\n", omp_get_max_threads());
printf("n: %d\n", n);
printf("p: %g\n", p);
printf("Time: %g\n", t1-t0);
printf("Check: %X\n", fletcher16(l, n*n));
// Generate output file
if (ofname)
write_matrix(ofname, n, l);
// Clean up
free(l);
return 0;
}