本文整理汇总了C++中boost::compute::command_queue类的典型用法代码示例。如果您正苦于以下问题:C++ command_queue类的具体用法?C++ command_queue怎么用?C++ command_queue使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了command_queue类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: test_fill
inline void test_fill(T v1, T v2, T v3, bc::command_queue queue) {
if(boost::is_same<typename bc::scalar_type<T>::type, bc::double_>::value &&
!queue.get_device().supports_extension("cl_khr_fp64")) {
std::cerr << "Skipping test_fill<" << bc::type_name<T>() << ">() "
"on device which doesn't support cl_khr_fp64" << std::endl;
return;
}
bc::vector<T> vector(4, queue.get_context());
bc::fill(vector.begin(), vector.end(), v1, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v1, v1, v1, v1));
vector.resize(1000, queue);
bc::fill(vector.begin(), vector.end(), v2, queue);
queue.finish();
BOOST_CHECK_EQUAL(vector.front(), v2);
BOOST_CHECK_EQUAL(vector.back(), v2);
bc::fill(vector.begin() + 500, vector.end(), v3, queue);
queue.finish();
BOOST_CHECK_EQUAL(vector.front(), v2);
BOOST_CHECK_EQUAL(vector[499], v2);
BOOST_CHECK_EQUAL(vector[500], v3);
BOOST_CHECK_EQUAL(vector.back(), v3);
}
开发者ID:alexeykuzmin0,项目名称:ThePlatform,代码行数:26,代码来源:test_fill.cpp
示例2: decltype
static decltype(auto) call(std::vector<neu::layer::any_layer>& layers,
InputRange const& initial_delta,
OutputRange& result_prev_delta,
boost::compute::command_queue& queue) {
gpu_vector delta(initial_delta.begin(), initial_delta.end(), queue);
gpu_vector prev_delta(queue.get_context());
for(int i = layers.size()-1; i >= 0; --i) {
auto& l = layers.at(i);
prev_delta.resize(::neu::layer::whole_input_size(l), queue);
auto prev_delta_range = range::to_range(prev_delta);
#ifdef NEU_BENCHMARK_ENABLE
boost::timer t;
#endif //NEU_BENCHMARK_ENABLE
l.backward(
range::to_range(delta), prev_delta_range,
queue);
#ifdef NEU_BENCHMARK_ENABLE
queue.finish();
std::cout << "layer" << i << "\tbackward\t" << t.elapsed() << " secs" << std::endl;
#endif //NEU_BENCHMARK_ENABLE
delta.swap(prev_delta);
}
range::copy(delta, result_prev_delta, queue);
}
开发者ID:okdshin,项目名称:Neu,代码行数:25,代码来源:any_layer_vector.hpp
示例3: decltype
decltype(auto) matrix_transpose(
InputRange const& input, OutputRange& output,
int row_size, int col_size,
boost::compute::command_queue& queue) {
NEU_ASSERT(row_size*col_size == range::distance(input));
static auto transpose_kernel =
neu::make_kernel(neu::layer::impl::matrix_transpose_kernel_source,
"matrix_transpose", queue.get_context());
transpose_kernel.set_args(
range::get_buffer(input),
static_cast<cl_int>(range::get_begin_index(input)),
range::get_buffer(output),
static_cast<cl_int>(range::get_begin_index(output)),
static_cast<cl_int>(row_size),
static_cast<cl_int>(col_size));
std::size_t global[2] = {
static_cast<std::size_t>(((col_size-1)/32+1)*32),
static_cast<std::size_t>(((row_size-1)/32+1)*32)
};
std::size_t local[2] = {
static_cast<std::size_t>(32),
static_cast<std::size_t>(32)
};
queue.enqueue_nd_range_kernel(transpose_kernel, 2, nullptr, global, local);
}
开发者ID:okdshin,项目名称:Neu,代码行数:25,代码来源:matrix_transpose.hpp
示例4: perf_random_number_engine
void perf_random_number_engine(const size_t size,
const size_t trials,
compute::command_queue& queue)
{
typedef typename Engine::result_type T;
// create random number engine
Engine engine(queue);
// create vector on the device
std::cout << "size = " << size << std::endl;
compute::vector<T> vector(size, queue.get_context());
// generate random numbers
perf_timer t;
for(size_t i = 0; i < trials; i++){
t.start();
engine.generate(vector.begin(), vector.end(), queue);
queue.finish();
t.stop();
}
// print result
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "rate: " << perf_rate<T>(size, t.min_time()) << " MB/s" << std::endl;
}
开发者ID:junmuz,项目名称:compute,代码行数:26,代码来源:perf_random_number_engine.cpp
示例5: test_fill_n
inline void test_fill_n(T v1, T v2, T v3, bc::command_queue queue) {
if(boost::is_same<typename bc::scalar_type<T>::type, bc::double_>::value &&
!queue.get_device().supports_extension("cl_khr_fp64")) {
std::cerr << "Skipping test_fill_n<" << bc::type_name<T>() << ">() "
"on device which doesn't support cl_khr_fp64" << std::endl;
return;
}
bc::vector<T> vector(4, queue.get_context());
bc::fill_n(vector.begin(), 4, v1, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v1, v1, v1, v1));
bc::fill_n(vector.begin(), 3, v2, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v1));
bc::fill_n(vector.begin() + 1, 2, v3, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v2, v3, v3, v1));
bc::fill_n(vector.begin(), 4, v2, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v2));
// fill last element
bc::fill_n(vector.end() - 1, 1, v3, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v3));
// fill first element
bc::fill_n(vector.begin(), 1, v1, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v1, v2, v2, v3));
}
开发者ID:alexeykuzmin0,项目名称:ThePlatform,代码行数:35,代码来源:test_fill.cpp
示例6: read
void read(boost::compute::command_queue q, size_t offset,
size_t size, T *host, bool blocking = false
) const
{
if (size) {
if (blocking) {
q.enqueue_read_buffer(
buffer, sizeof(T) * offset, sizeof(T) * size, host
);
} else {
q.enqueue_read_buffer_async(
buffer, sizeof(T) * offset, sizeof(T) * size, host
);
}
}
}
开发者ID:ddemidov,项目名称:vexcl,代码行数:16,代码来源:device_vector.hpp
示例7: test_fill_n
inline void test_fill_n(T v1, T v2, T v3, bc::command_queue queue) {
bc::vector<T> vector(4, queue.get_context());
bc::fill_n(vector.begin(), 4, v1, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v1, v1, v1, v1));
bc::fill_n(vector.begin(), 3, v2, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v1));
bc::fill_n(vector.begin() + 1, 2, v3, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v2, v3, v3, v1));
bc::fill_n(vector.begin(), 4, v2, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v2));
// fill last element
bc::fill_n(vector.end() - 1, 1, v3, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v3));
// fill first element
bc::fill_n(vector.begin(), 1, v1, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v1, v2, v2, v3));
}
开发者ID:jpola,项目名称:compute,代码行数:28,代码来源:test_fill.cpp
示例8: test_fill
inline void test_fill(T v1, T v2, T v3, bc::command_queue queue) {
bc::vector<T> vector(4, queue.get_context());
bc::fill(vector.begin(), vector.end(), v1, queue);
queue.finish();
CHECK_RANGE_EQUAL(T, 4, vector, (v1, v1, v1, v1));
vector.resize(1000, queue);
bc::fill(vector.begin(), vector.end(), v2, queue);
queue.finish();
BOOST_CHECK_EQUAL(vector.front(), v2);
BOOST_CHECK_EQUAL(vector.back(), v2);
bc::fill(vector.begin() + 500, vector.end(), v3, queue);
queue.finish();
BOOST_CHECK_EQUAL(vector.front(), v2);
BOOST_CHECK_EQUAL(vector[499], v2);
BOOST_CHECK_EQUAL(vector[500], v3);
BOOST_CHECK_EQUAL(vector.back(), v3);
}
开发者ID:jpola,项目名称:compute,代码行数:19,代码来源:test_fill.cpp
示例9: device_vector
device_vector(const boost::compute::command_queue &q, size_t n,
const T *host = 0, mem_flags flags = MEM_READ_WRITE)
{
if (host && !(flags & CL_MEM_USE_HOST_PTR))
flags |= CL_MEM_COPY_HOST_PTR;
if (n)
buffer = boost::compute::buffer(q.get_context(), n * sizeof(T),
flags, static_cast<void*>(const_cast<T*>(host)));
}
开发者ID:ddemidov,项目名称:vexcl,代码行数:10,代码来源:device_vector.hpp
示例10: generate
void generate(OutputIterator first, OutputIterator last, Function op, boost::compute::command_queue &queue)
{
boost::compute::vector<T> tmp(std::distance(first, last), queue.get_context());
BOOST_COMPUTE_FUNCTION(T, max_random, (const T x),
{
if(get_global_id(0) < 1)
return (ValueType) MAX_RANDOM;
else
return (ValueType) 0;
});
开发者ID:2bbb,项目名称:compute,代码行数:11,代码来源:test_uniform_real_distribution.cpp
示例11: map
mapped_array map(boost::compute::command_queue q) const {
return mapped_array(
static_cast<T*>(
q.enqueue_map_buffer(
buffer, CL_MAP_READ,
0, size() * sizeof(T)
)
),
buffer_unmapper(q, buffer)
);
}
开发者ID:ddemidov,项目名称:vexcl,代码行数:11,代码来源:device_vector.hpp
示例12: saxpy
void saxpy(const int num, bool gen = true, int iter = 0)
{
static compute::device gpu;
static compute::context context;
static compute::command_queue queue;
static compute::vector<T> x;
static compute::vector<T> y;
static compute::vector<T> res;
static T alpha = 3.5;
using compute::lambda::_1;
using compute::lambda::_2;
if (gen) {
gpu = compute::system::default_device();
context = compute::context(gpu);
queue = compute::command_queue(context, gpu);
x = compute::vector<T>(num, context);
std::vector<T> h_x(num);
std::generate(h_x.begin(), h_x.end(), rand);
compute::copy(h_x.begin(), h_x.end(), x.begin(), queue);
y = compute::vector<T>(num, context);
std::vector<T> h_y(num);
std::generate(h_y.begin(), h_y.end(), rand);
compute::copy(h_y.begin(), h_y.end(), y.begin(), queue);
res = compute::vector<T>(num, context);
queue.finish();
}
for (int i = 0; i < iter; i++) {
compute::transform(x.begin(), x.end(),
y.begin(), res.begin(),
alpha * _1 + _2,
queue);
}
queue.finish();
}
开发者ID:Brainiarc7,项目名称:valhalla,代码行数:41,代码来源:saxpy.cpp
示例13: perf_accumulate
double perf_accumulate(const compute::vector<T>& data,
const size_t trials,
compute::command_queue& queue)
{
perf_timer t;
for(size_t trial = 0; trial < trials; trial++){
t.start();
compute::accumulate(data.begin(), data.end(), T(0), queue);
queue.finish();
t.stop();
}
return t.min_time();
}
开发者ID:Soledad89,项目名称:compute,代码行数:13,代码来源:perf_accumulate.cpp
示例14: test_copy_if_odd
void test_copy_if_odd(compute::command_queue &queue)
{
// create input and output vectors on the device
const compute::context &context = queue.get_context();
compute::vector<int> input(PERF_N, context);
compute::vector<int> output(PERF_N, context);
// generate random numbers between 1 and 10
compute::default_random_engine rng(queue);
compute::uniform_int_distribution<int> d(1, 10);
d.generate(input.begin(), input.end(), rng, queue);
BOOST_COMPUTE_FUNCTION(bool, is_odd, (int x),
{
return x & 1;
});
开发者ID:EdKeith,项目名称:compute,代码行数:16,代码来源:perf_copy_if.cpp
示例15: tesselate_sphere
// tesselates a sphere with radius, phi_slices, and theta_slices. returns
// a shared opencl/opengl buffer containing the vertex data.
compute::opengl_buffer tesselate_sphere(float radius,
size_t phi_slices,
size_t theta_slices,
compute::command_queue &queue)
{
using compute::dim;
const compute::context &context = queue.get_context();
const size_t vertex_count = phi_slices * theta_slices;
// create opengl buffer
GLuint vbo;
vtkgl::GenBuffersARB(1, &vbo);
vtkgl::BindBufferARB(vtkgl::ARRAY_BUFFER, vbo);
vtkgl::BufferDataARB(vtkgl::ARRAY_BUFFER,
sizeof(float) * 4 * vertex_count,
NULL,
vtkgl::STREAM_DRAW);
vtkgl::BindBufferARB(vtkgl::ARRAY_BUFFER, 0);
// create shared opengl/opencl buffer
compute::opengl_buffer vertex_buffer(context, vbo);
// tesselate_sphere kernel source
const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
__kernel void tesselate_sphere(float radius,
uint phi_slices,
uint theta_slices,
__global float4 *vertex_buffer)
{
const uint phi_i = get_global_id(0);
const uint theta_i = get_global_id(1);
const float phi = phi_i * 2.f * M_PI_F / phi_slices;
const float theta = theta_i * 2.f * M_PI_F / theta_slices;
float4 v;
v.x = radius * cos(theta) * cos(phi);
v.y = radius * cos(theta) * sin(phi);
v.z = radius * sin(theta);
v.w = 1.f;
vertex_buffer[phi_i*phi_slices+theta_i] = v;
}
);
开发者ID:2bbb,项目名称:compute,代码行数:48,代码来源:opengl_sphere.cpp
示例16: build_sources
/**
* If VEXCL_CACHE_KERNELS macro is defined, then program binaries are cached
* in filesystem and reused in the following runs.
*/
inline boost::compute::program build_sources(
const boost::compute::command_queue &queue,
const std::string &source,
const std::string &options = ""
)
{
#ifdef VEXCL_SHOW_KERNELS
std::cout << source << std::endl;
#else
if (getenv("VEXCL_SHOW_KERNELS"))
std::cout << source << std::endl;
#endif
return boost::compute::program::build_with_source(
source, queue.get_context(),
options + " " + get_compile_options(queue)
);
}
开发者ID:ddemidov,项目名称:vexcl,代码行数:22,代码来源:compiler.hpp
示例17: box_filter_image
inline void box_filter_image(const compute::image2d &input,
compute::image2d &output,
compute::uint_ box_height,
compute::uint_ box_width,
compute::command_queue &queue)
{
using compute::dim;
const compute::context &context = queue.get_context();
// simple box filter kernel source
const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
__kernel void box_filter(__read_only image2d_t input,
__write_only image2d_t output,
uint box_height,
uint box_width)
{
int x = get_global_id(0);
int y = get_global_id(1);
int h = get_image_height(input);
int w = get_image_width(input);
int k = box_width;
int l = box_height;
if(x < k/2 || y < l/2 || x >= w-(k/2) || y >= h-(l/2)){
write_imagef(output, (int2)(x, y), (float4)(0, 0, 0, 1));
}
else {
const sampler_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
float4 sum = { 0, 0, 0, 0 };
for(int i = 0; i < k; i++){
for(int j = 0; j < l; j++){
sum += read_imagef(input, sampler, (int2)(x+i-k, y+j-l));
}
}
sum /= (float) k * l;
float4 value = (float4)( sum.x, sum.y, sum.z, 1.f );
write_imagef(output, (int2)(x, y), value);
}
}
开发者ID:yielding,项目名称:code,代码行数:41,代码来源:qimage_blur.cpp
示例18: config
/// Select best launch configuration for the given shared memory requirements.
void config(const boost::compute::command_queue &queue, std::function<size_t(size_t)> smem) {
boost::compute::device dev = queue.get_device();
size_t ws;
if ( is_cpu(queue) ) {
ws = 1;
} else {
// Select workgroup size that would fit into the device.
ws = dev.get_info<std::vector<size_t>>(CL_DEVICE_MAX_WORK_ITEM_SIZES)[0] / 2;
size_t max_ws = max_threads_per_block(queue);
size_t max_smem = max_shared_memory_per_block(queue);
// Reduce workgroup size until it satisfies resource requirements:
while( (ws > max_ws) || (smem(ws) > max_smem) )
ws /= 2;
}
config(num_workgroups(queue), ws);
}
开发者ID:clouddon,项目名称:vexcl,代码行数:22,代码来源:kernel.hpp
示例19: tune_accumulate
void tune_accumulate(const compute::vector<T>& data,
const size_t trials,
compute::command_queue& queue)
{
boost::shared_ptr<compute::detail::parameter_cache>
params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
const std::string cache_key =
std::string("__boost_reduce_on_gpu_") + compute::type_name<T>();
const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 };
const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
double min_time = std::numeric_limits<double>::max();
compute::uint_ best_tpb = 0;
compute::uint_ best_vpt = 0;
for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
params->set(cache_key, "tpb", tpbs[i]);
for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){
params->set(cache_key, "vpt", vpts[j]);
try {
const double t = perf_accumulate(data, trials, queue);
if(t < min_time){
best_tpb = tpbs[i];
best_vpt = vpts[j];
min_time = t;
}
}
catch(compute::opencl_error&){
// invalid parameters for this device, skip
}
}
}
// store optimal parameters
params->set(cache_key, "tpb", best_tpb);
params->set(cache_key, "vpt", best_vpt);
}
开发者ID:Soledad89,项目名称:compute,代码行数:40,代码来源:perf_accumulate.cpp
示例20: vector
vector(vector_expression<E, gpu_tag> const& e, boost::compute::command_queue& queue)
: m_storage(e().size(), queue.get_context())
, m_queue(&queue){
assign(*this, e);
}
开发者ID:inglada,项目名称:Shark,代码行数:5,代码来源:vector.hpp
注:本文中的boost::compute::command_queue类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论