特集記事

マルチスレッドを意識しないマルチスレッド・ライブラリ「Intel Concurent Collections」がおもしろい

2013/09/03 14:00

ポスト

list08

#include <iostream>
#include <iomanip>
#include <valarray>
#include <algorithm>
#include <utility>
#include <cassert>
#include <cnc/cnc.h>

using namespace std;

typedef std::pair<size_t,size_t> position;

// forward decl.
template<size_t J, size_t K, size_t L>
struct my_context;

template<size_t J, size_t K, size_t L>
struct start {
  int execute(const int& tag, my_context<J,K,L>& ctx) const;
};

template<size_t J, size_t K, size_t L>
struct produce {
  int execute(const position& pos, my_context<J,K,L>& ctx) const;
};

template<size_t J, size_t K, size_t L>
struct collect {
  int execute(const position& pos, my_context<J,K,L>& ctx) const;
};

template<size_t J, size_t K, size_t L>
struct my_context : public CnC::context<my_context<J,K,L>> {
  CnC::step_collection<start<J,K,L>>   startStep;
  CnC::step_collection<produce<J,K,L>> produceStep;
  CnC::step_collection<collect<J,K,L>> collectStep;

  CnC::tag_collection<int>      startTag;
  CnC::tag_collection<position> calculateTag;

  CnC::item_collection<position,int> productItem;

  valarray<int> A; // JxK
  valarray<int> B; // KxL
  valarray<int> C; // JxL

  my_context() : startStep(*this), produceStep(*this), collectStep(*this),
                 startTag(*this), calculateTag(*this), productItem(*this) {
    startTag.prescribes(startStep, *this);      // startStep    は startStep   に着火する
    calculateTag.prescribes(produceStep,*this); // calculateTag は produceStep に着火する
    calculateTag.prescribes(collectStep,*this); // calculateTag は collectStep に着火する
    produceStep.produces(productItem);          // produceStep  は productItem を生産する
    collectStep.consumes(productItem);          // collectStep  は productItem を消費する
  }

  void initialize(const int* a, const int* b) {
    A = valarray<int>(a, J*K);
    B = valarray<int>(b, K*L);
    C = valarray<int>(J*L);
  }
};

template<size_t J, size_t K, size_t L>
int start<J,K,L>::execute(const int& tag, my_context<J,K,L>& ctx) const {
  for ( size_t r = 0; r < J; ++r ) {
    for ( size_t c = 0; c < L; ++c ) {
      ctx.calculateTag.put(position(r,c)); // r行c列の値を求めよ!
    }
  }
  return CnC::CNC_Success;
}

template<size_t J, size_t K, size_t L>
int produce<J,K,L>::execute(const position& pos, my_context<J,K,L>& ctx) const {
  valarray<int> ar = ctx.A[slice(pos.first*K, K, 1)]; // A の r 行と
  valarray<int> bc = ctx.B[slice(pos.second , K, L)]; // B の c 列の
  ctx.productItem.put(pos, (ar*bc).sum());            // 内積を productItem に出力する
  return CnC::CNC_Success;
}

template<size_t J, size_t K, size_t L>
int collect<J,K,L>::execute(const position& pos, my_context<J,K,L>& ctx) const {
  int product;
  ctx.productItem.get(pos, product);         // productItem から得た値を
  ctx.C[pos.first*L + pos.second] = product; // C に転記する
  return CnC::CNC_Success;

}

// シングルスレッドで行列の積を求める
valarray<int> singlethread_calculation(size_t J, size_t K, size_t L, 
                                       const int* a, const int* b) {
  valarray<int> A(a, J*K);
  valarray<int> B(b, K*L);
  valarray<int> C(J*L);

  for ( size_t r = 0; r < J; ++r ) {
    for ( size_t c = 0; c < L; ++c ) {
      valarray<int> ar = A[slice(r*K, K, 1)]; // VAのr行 と
      valarray<int> bc = B[slice(c,   K, L)]; // VBのc列 の
      C[r*L+c] = (ar*bc).sum();               // 内積がVC[r][c] である。 
    }
  } 
  return C;
}

int main() {
  const size_t J = 2;
  const size_t K = 3;
  const size_t L = 4;

  int ma[J][K] = { {  2,  3,  4 },
                   {  5,  6,  7 } };
  int mb[K][L] = { { 10, 11, 12, 13 },
                   { 14, 15, 16, 17 },
                   { 18, 19, 20, 21 } };
  
  valarray<int> C = singlethread_calculation(J,K,L, &ma[0][0], &mb[0][0]);

  for ( size_t i = 0; i < J*L; ++i ) {
    if ( i % L == 0 ) cout << endl;
    cout << setw(4) << C[i];
  }
  cout << endl;

  // CnC で行列の積を求める
  my_context<J,K,L> ctx;
  ctx.initialize(&ma[0][0], &mb[0][0]);
  ctx.startTag.put(0);
  ctx.wait();

  for ( size_t i = 0; i < J*L; ++i ) {
    if ( i % L == 0 ) cout << endl;
    cout << setw(4) << ctx.C[i];
    assert( C[i] == ctx.C[i] );
  }
  cout << endl;

}

最後に

　さて、いくつかサンプルをご覧に入れましたが、これらサンプルのどれにも明示的にスレッドを起こしている箇所はありません。step/item/tagをつないでtagに着火すれば、各stepをいかにスケジュールするかは魔法の箱（CnC Runtime）が考えてくれます。論理プロセッサ数に応じたスケーラビリティもCnC Runtimeが善きに計らってくれます。マルチスレッドを意識せずにマルチスレッド・アプリケーションが作れちゃう。

　「CnC:Intel Concurrent Collections for C++」は現バージョンが0.9、正式リリースにはもうしばらくかかりそうです。CnC 1.0のお披露目を心待ちにしています。

この記事は参考になりましたか？

連載通知を行うには会員登録(無料)が必要です。
既に会員の方はを行ってください。

印刷用を表示

ポスト

特集記事連載記事一覧: 【随時更新・まとめ読み】開発者のための「AWS re:Invent 2024」関連記事

開発者のための「WWDC 2024」関連発表まとめ

LeSSやSAFeなど、大規模アジャイルにおけるソフトウェア品質の課題とは？

もっと読む

この記事の著者: επιστημη（エピステーメー）

C++に首まで浸かったプログラマ。Microsoft MVP, Visual C++ (2004.01～2018.06) "だった"りわんくま同盟でたまにセッションスピーカやったり中国茶淹れてにわか茶...

※プロフィールは、執筆時点、または直近の記事の寄稿時点での内容です

この著者の最近の執筆記事