Shonan Challenge for Staged HPC:
(Simplified) Hidden Markov Model

Kenichi Asai

May 23, 2012

This document in pdf.

1  Challenge Specification

Program in OCaml.

1.1  Input Program

A general matrix-vector multiplication program (to obtain the next state in the hidden Markov model):
(* f : int array array -> int array -> int array *)
let f a v =
  let v' = Array.make n 0 in
  for i = 0 to n-1 do
    for j = 0 to n-1 do
      v'.(i) <- v'.(i) + a.(i).(j) * v.(j)
    done
  done;
  v'
with a concrete sparse adjacency matrix. For example:
(* a : int array array *)
let a = [| [| 1; 0; 0; 1; 0 |];
           [| 0; 0; 1; 0; 0 |];
           [| 0; 1; 0; 0; 0 |];
           [| 0; 0; 1; 1; 1 |];
           [| 0; 0; 1; 0; 1 |] |]

1.2  Output Program 1

A program where the elements of the array are inlined:
(* f : int array -> int array *)
let f v =
  let v' = Array.make 5 0 in
  v'.(0) <- v.(0) + v.(3);
  v'.(1) <- v.(2);
  v'.(2) <- v.(1);
  v'.(3) <- v.(2) + v.(3) + v.(4);
  v'.(4) <- v.(2) + v.(4);
  v'

1.3  Output Program 2

A program where the elements of the array are inlined if the number of non-zero elements in a row is less than a threshold (e.g., three):
(* f : int array -> int array *)
let f v =
  let v' = Array.make 5 0 in
  v'.(0) <- v.(0) + v.(3);
  v'.(1) <- v.(2);
  v'.(2) <- v.(1);
  for j = 0 to 4 do
    v'.(3) <- v'.(3) + a.(3).(j) * v.(j)
  done;
  v'.(4) <- v.(2) + v.(4);
  v'

2  Solution 1

Program in MetaOCaml.

2.1  Hack for handling semicolor properly

The current MetaOCaml does not handle code-producing `for' loops in a nice way. Here is a hack to handle them properly (due to Ken Shan). It is essentially the CPS version of a `for' loop.
(* for_to : int -> int -> (int -> 'a -> 'a) -> 'a -> 'a *)
let rec for_to n p f init =
    if n > p then init else for_to n (p-1) f (f p init)

(* id : 'a -> 'a *)
let id c = c

(* semi : 'a -> 'b -> 'b *)
let semi c1 c2 = c1; c2

2.2  Rewriting the input program

Required rewriting of the input program for staging. Semantically, it is identical to the input program.
(* f1 : int array array -> int array -> int array *)
let f1 a = fun v ->
  let v' = Array.make n 0 in
  for_to 0 (n-1) (fun i ->
    for_to 0 (n-1) (fun j ->
      if a.(i).(j) = 1 then
        semi (v'.(i) <- v'.(i) + v.(j))
      else id))
  v'

2.3  Staging the rewritten program

Staging the rewritten program to turn it into a code-generating program.
(* semi' : ('a, 'b) code -> ('a, 'c) code -> ('a, 'c) code *)
let semi' c1 c2 = .<(.~c1; .~c2)>.

(* f1' : int array array -> ('a, int array -> int array) code *)
let f1' a = .<fun v ->
  let v' = Array.make n 0 in .~(
  for_to 0 (n-1) (fun i ->
    for_to 0 (n-1) (fun j ->
      if a.(i).(j) = 1 then
        semi' .< v'.(i) <- v'.(i) + v.(j) >.
      else id))
  .<v'>.)>.

2.4  Generated program

Generated program for the exmaple matrix. Variable names are changed for better readability.
# f1' a;;
- : ('a, int array -> int array) code =
.<fun v ->
   let v' = Array.make 5 0 in
   v'.(0) <- v'.(0) + v.(0);
   v'.(0) <- v'.(0) + v.(3);
   v'.(1) <- v'.(1) + v.(2);
   v'.(2) <- v'.(2) + v.(1);
   v'.(3) <- v'.(3) + v.(2);
   v'.(3) <- v'.(3) + v.(3);
   v'.(3) <- v'.(3) + v.(4);
   v'.(4) <- v'.(4) + v.(2);
   v'.(4) <- v'.(4) + v.(4);
   v'>.
If we want to optimize sequences of assigments further (to turn the first two assignments to v'.(0) <- v.(0) + v.(3);, for example), we need some more elaboration. (Possibly, the compiler can do such a rather simple optimization?)

3  Solution 2

Program in MetaOCaml.

3.1  Rewriting the input program

Required rewriting of the input program for staging (using the same hack for `for' loop).
(* non_zeros : int array -> int *)
let rec non_zeros v =
  let n = ref 0 in
  for i = 0 to Array.length v - 1 do
    if v.(i) <> 0 then n := !n + 1
  done;
  !n

(* example threshold *)
let threshold = 3

(* f2 : int array array -> int array -> int array *)
let f2 a = fun v ->
  let v' = Array.make n 0 in
  for_to 0 (n-1) (fun i ->
    if non_zeros a.(i) < threshold
    then for_to 0 (n-1) (fun j ->
           if a.(i).(j) = 1 then
             semi (v'.(i) <- v'.(i) + v.(j))
           else id)
    else semi
         (for j = 0 to (let n' = n-1 in n') do
            v'.(i) <- v'.(i) + a.(i).(j) * v.(j)
          done))
  v'
To handle the row with many non-zero elements, we explicitly introduce an if expression into the program. Since both the branches perform the same task, however, it is still semantically identical to the original program. Notice that CPS for_to is used in the `then' branch, while the original for is used in the `else' branch for the better staging result.

3.2  Staging the rewritten program

Staging the rewritten program to turn it into a code-generating program (semi' being the same as before).
(* f2' : int array array -> ('a, int array -> int array) code *)
let f2' a = .<fun v ->
  let v' = Array.make n 0 in .~(
  for_to 0 (n-1) (fun i ->
    if non_zeros a.(i) < threshold
    then for_to 0 (n-1) (fun j ->
           if a.(i).(j) = 1 then
             semi' .< v'.(i) <- v'.(i) + v.(j) >.
           else id)
    else semi'
      .< for j = 0 to .~(let n' = n-1 in .<n'>.) do
           v'.(i) <- v'.(i) + a.(i).(j) * v.(j)
         done >.)
  .<v'>.)>.

3.3  Generated program

Generated program for the exmaple matrix. Variable names are changed for better readability.
# f2' a;;
- : ('a, int array -> int array) code =
.<fun v ->
   let v' = Array.make 5 0 in
   v'.(0) <- v'.(0) + v.(0);
   v'.(0) <- v'.(0) + v.(3);
   v'.(1) <- v'.(1) + v.(2);
   v'.(2) <- v'.(2) + v.(1);
   for j = 0 to 4 do
     v'.(3) <- v'.(3) + a.(3).(j) * v.(j)
   done;
   v'.(4) <- v'.(4) + v.(2);
   v'.(4) <- v'.(4) + v.(4);
   v'>.

This document was translated from LATEX by HEVEA.