C++amp简要范例

参考自：https://msdn.microsoft.com/en-us/library/hh265136.aspx

#include <amp.h>
#include <amp_math.h>
#include <iostream>

using namespace concurrency;
const int size = 5;

// C++AMP样例
void CppAmpMethod()
{
	int aCPP[] = { 1, 2, 3, 4, 5 };
	int bCPP[] = { 6, 7, 8, 9, 10 };

	int sumCPP[size];

	// Create C++ AMP objects.
	array_view<const int, 1> a(size, aCPP);
	array_view<const int, 1> b(size, bCPP);
	array_view<int, 1> sum(size, sumCPP);
	sum.discard_data();

	parallel_for_each(
		// Define the compute domain, which is the set of threads that are created
		sum.extent,
		// Define the code to run on each thread on the accelerator
		[=](index<1> idx) restrict(amp)
	{
		sum[idx] = a[idx] + b[idx];
	}
	);

	// print the results. The expected output is "7, 9, 11, 13, 15"
	for (int i = 0; i < size; i++)
	{
		std::cout << sum[i] << "\n";
	}
}

// array_view用法范例1
void index1()
{
	int aCPP[] = { 1, 2, 3, 4, 5 };
	array_view<int, 1> a(5, aCPP);
	index<1> idx(2);
	std::cout << a[idx] << "\n";
	// Output: 3
}

// array_view用法范例2
void index2()
{
	int aCPP[] = { 1, 2, 3,
				   4, 5, 6 };
	array_view<int, 2> a(2, 3, aCPP);
	index<2> idx(1, 2);
	std::cout << a[idx] << "\n";
	// Output: 6
}

// array_view用法范例3
void index3()
{
	int aCPP[] = {
		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };

	array_view<int, 3> a(2, 3, 4, aCPP);

	// Specifies the element at 3, 1, 0
	index<3> idx(0, 1, 3);
	std::cout << a[idx] << "\n";
	// Output: 8
}

// extent用法范例1
void extent1()
{
	int aCPP[] = {
		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
	// There are 3 rows and 4 columns, and the depth is two.
	array_view<int, 3> a(2, 3, 4, aCPP);
	std::cout << "The number of colmns is " << a.extent[2] << "\n";
	std::cout << "The number of rows is " << a.extent[1] << "\n";
	std::cout << "The depth is " << a.extent[0] << "\n";

	std::cout << "Length in most significant dimension is " << a.extent[0] << "\n";
}

// extent用法范例2
void extent2()
{
	int aCPP[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,		17, 18, 19, 20, 21, 22, 23, 24 };

	extent<3> e(2, 3, 4);
	array_view<int, 3> a(e, aCPP);
	std::cout << "The num of columns is " << a.extent[2] << "\n";
	std::cout << "The num of rows is " << a.extent[1] << "\n";
	std::cout << "The depth is " << a.extent[0] << "\n";
}

// araay范例
void array1()
{
	std::vector<int> data(5);
	for (int count = 0; count < 5; count++)
	{
		data[count] = count;
	}

	array<int, 1> a(5, data.begin(), data.end());

	parallel_for_each(
		a.extent,
		[=, &a](index<1> idx) restrict(amp)
		{
			a[idx] = a[idx] * 10;
		}
	);

	data = a;
	for (int i = 0; i < 5; i++)
	{
		std::cout << data[i] << "\n";
	}
}

// 和cpu共享内存
void shareMemory1()
{
	accelerator acc = accelerator(accelerator::default_accelerator);

	// Early out if the defult accelerator doesn‘t support shared memory.
	if (!acc.supports_cpu_shared_memory)
	{
		std::cout << "The defult acclerator does not support shared memory " << std::endl;
		return;
	}

	// Override the default CPU access type.
	//acc.default_cpu_access_type = access_type_read_write;

	// Create an accelerator_view from the default accelerator.
	// The accelerator_view inherits its default_cpu_access_type from acc.
	accelerator_view acc_v = acc.default_view;

	// Create an extent object to size the arrays.
	extent<1> ex(10);

	// Input array that can be written on the CPU.
	array<int, 1> arr_w(ex, acc_v, access_type_write);

	// Output array that can be read on the CPU
	array<int, 1> arr_r(ex, acc_v, access_type_read);

	// Read-write array that can be both written to and read from on the CPU.
	array<int, 1> arr_rm(ex, acc_v, access_type_read_write);
}

// parallel_for_each用法范例1
void AddArrays()
{
	int aCPP[] = { 1, 2, 3, 4, 5 };
	int bCPP[] = { 6, 7, 8, 9, 10 };
	int sumCPP[] = { 0, 0, 0, 0, 0 };

	array_view<int, 1> a(5, aCPP);
	array_view<int, 1> b(5, bCPP);
	array_view<int, 1> sum(5, sumCPP);

	parallel_for_each(
		sum.extent,
		[=](index<1> idx) restrict(amp)
		{
			sum[idx] = a[idx] + b[idx];
		}
	);

	for (int i = 0; i < 5; i++)
	{
		std::cout << sum[i] << "\n";
	}
}

void AddElements(index<1> idx, array_view<int, 1> sum, array_view<int, 1> a, array_view<int, 1> b) restrict(amp)
{
	sum[idx] = a[idx] + b[idx];
}

// parallel_for_each用法范例2
void AddArraysWitchFunction()
{
	int aCPP[] = { 1, 2, 3, 4, 5 };
	int bCPP[] = { 6, 7, 8, 9, 10 };
	int sumCPP[] = { 0, 0, 0, 0, 0 };

	array_view<int, 1> a(5, aCPP);
	array_view<int, 1> b(5, bCPP);
	array_view<int, 1> sum(5, sumCPP);

	parallel_for_each(
		sum.extent,
		[=](index<1> idx) restrict(amp)
		{
		AddElements(idx, sum, a, b);
		}
	);

	for (int i = 0; i < 5; i++)
	{
		std::cout << sum[i] << "\n";
	}
}

// 二维分割切块加速
void acceleratingCode()
{
	// Sample data:
	int sampledata[] = {
		2, 2, 9, 7, 1, 4,
		4, 4, 8, 8, 3, 4,
		1, 5, 1, 2, 5, 2,
		6, 8, 3, 2, 7, 2
	};

	// The tiles:
	// 2 2      9 7      1 4
	// 4 4      8 8      3 4
	//
	// 1 5      1 2      5 2
	// 6 8      3 2      7 2

	// Averages:
	int averagedata[] = {
		0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0,
	};

	array_view<int, 2> sample(4, 6, sampledata);
	array_view<int, 2> average(4, 6, averagedata);

	parallel_for_each(
		// Create threads for sample.extent and divide the extent into 2 x 2 tiles
		sample.extent.tile<2, 2>(),
		[=](tiled_index<2, 2> idx) restrict(amp)
		{
			// Create a 2 x 2 array to hold the values in this tile.
			tile_static int nums[2][2];
			// Copy the values for the tile into the 2 x 2 array.
			nums[idx.local[1]][idx.local[0]] = sample[idx.global];
			// When all the threads have executed and the 2 x 2 array is complete, find the average.
			idx.barrier.wait();
			int sum = nums[0][0] + nums[0][1] + nums[1][0] + nums[1][1];
			// Copy the average into the array_view.
			average[idx.global] = sum / 4;
		}
	);

	for (int i = 0; i < 4; i++)
	{
		for (int j = 0; j < 6; j++)
		{
			std::cout << average(i, j) << " ";
		}
		std::cout << "\n";
	}

	// Output
	// 3 3 8 8 3 3
	// 3 3 8 8 3 3
	// 5 5 2 2 4 4
	// 5 5 2 2 4 4
}

// parallel_for_each用法范例3：使用并且的数学库
void MathExample()
{
	double numbers[] = { 1.0, 10.0, 60.0, 100.0, 600.0, 1000.0 };
	array_view<double, 1> logs(6, numbers);
	parallel_for_each(
		logs.extent,
		[=](index<1> idx) restrict(amp)
		{
			logs[idx] = concurrency::fast_math::log10(logs[idx]);
		}
	);

	for (int i = 0; i < 6; i++)
	{
		std::cout << logs[i] << "\n";
	}
} 

int main()
{
	CppAmpMethod();
	//index1();
	//index2();
	//index3();

	//extent1();
	//extent2();
	//array1();

	//shareMemory1();
	//AddArrays();
	//AddArraysWitchFunction();
	//acceleratingCode();
	//MathExample();

	return 1;
}

时间： 2024-12-28 16:29:27

C++amp简要范例的相关文章

Android 布局简要范例

Android的布局决定着实际的UI界面呈现情况,正是这些UI界面的组合与千变万化,才呈现出了各式各样的风格. 而这些基础的布局框架结构很重要,需要玩的很熟悉.我将以前参考的部分代码示例,所做的相关实践在这里记录一下,稍后相关代码也会删除掉,这里做个备忘. 好了,现在开始: 我的代码结构是这样子的: 每个布局文件个类,如果要做具体的个性化,在其中的包里面扩展: Android的界面是有布局和组件协同完成的,布局好比是建筑里的框架,而组件则相当于建筑里的砖瓦.组件按照布局的要求依次排列,

狂刷Android范例之一：ReadAsset

狂刷Android范例之一:ReadAsset 说明狂刷Android范例系列文章开张了.每篇学习一个Android范例,将一个范例单独生成一个可运行的app,并对重点源代码进行简要分析.然后提供打包好的源代码下载. 功能功能很简单,读取app自带的资源,例如一个文本. 代码包在此,无需下载分: http://download.csdn.net/detail/logicteamleader/8780131 来源 ReadAsset例子来自于Android-20的com.example.and

狂刷Android范例之3：读写外部存储设备

狂刷Android范例之3:读写外部存储设备说明狂刷Android范例系列文章开张了.每篇学习一个Android范例,将一个范例单独生成一个可运行的app,并对重点源代码进行简要分析.然后提供打包好的源代码下载. 功能提供一个经典范例,监控Android外部存储设备状态,对公用目录,app私有目录进行读写操作,并展示在app界面上. 代码包在此,无需下载分: http://download.csdn.net/detail/logicteamleader/8790109 来源 ReadAss

react及flux架构范例Todomvc分析

react及flux架构范例Todomvc分析通过分析flux-todomvc源码,学习如何通过react构建web程序,了解编写react应用程序的一般步骤,同时掌握Flux的单向数据流动架构思想关于react react一个最吸引我的特性是组件,它是模块化的,所有的组件是独立的,又可以通过嵌套来构建更大型的组件,一个个小组件经过层层组装,最终形成web应用程序,它让我开始重新思考如何去构建大型的web应用程序. 关于Flux Flux是一个思想而非框架,强调数据自上而下传递的单向流动理念

狂刷Android范例之5：读取手机通讯录

狂刷Android范例之5:读取手机通讯录说明狂刷Android范例系列文章开张了.每篇学习一个Android范例,将一个范例单独生成一个可运行的app,并对重点源代码进行简要分析.然后提供打包好的源代码下载. 功能提供完整代码,通过ContenResolver,读取手机通讯录的内容. 代码包在此,无需下载分: http://download.csdn.net/detail/logicteamleader/8806135 来源例子来自于Android-20的com.example.and

狂刷Android范例之二：剪贴板范例

狂刷Android范例之二:剪贴板范例ClipboardSample 说明狂刷Android范例系列文章开张了.每篇学习一个Android范例,将一个范例单独生成一个可运行的app,并对重点源代码进行简要分析.然后提供打包好的源代码下载. 功能功能很简单,使用Android提供的剪贴板,复制不同类型的数据到剪贴板. 代码包在此,无需下载分: http://download.csdn.net/detail/logicteamleader/8786187 来源 ClipboardSample例子

uboot源码简要分析

uboot源码简要分析一.uboot源码整体框架源码解压以后,我们可以看到以下的文件和文件夹: cpu 与处理器相关的文件.每个子目录中都包括cpu.c和interrupt.c.start.S.u-boot.lds. cpu.c:初始化CPU.设置指令Cache和数据Cache等 interrupt.c:设置系统的各种中断和异常 start.S:是U-boot启动时执行的第一个文件,它主要做最早期的系统初始化,代码重定向和设置系统堆栈,为进入U-boot第二阶段的C程序奠定基础. u-boo

linux命令格式，获取帮助及其目录结构简要理解

我们都知道,一台计算机要是没通电,和一堆废铁没什么区别.那么,通电开机进入系统后,会进入交互界面,等待用户操作,人与计算机交互界面有两种: GUI:图形用户接口.如我们平时使用的Windows ,linux的X window,有KDE和GOME. CLI:命令行接口,使用的SHELL类型有bash ,csh,tcshell,zshell等. 例如:[[email protected] ~]# commandbin root:当前登录的用户名. dxlcentOS:当前主机的主机名.@是一个分隔

Shell脚本编程知识点总结及范例

一:关于语言 1)编译性语言编译型语言多半运作于底层,所处理的是字节.整数.浮点数或其它及其机器层经的对象.处理过程为:源程序--预处理--编译--汇编--链接,编译性语言为静态语言. 2)解释性语言解释性语言读入程序代码并将其转化为内部的形式加以执行.处理过程:解释性(文本文件)-解释器去读取并执行.解释性语言为动态语言. 二:基础变量类型 linux脚本中的变量不需要事先声明,而是直接定义使用(这点不同于其他高级编程语言中变量的使用)bash变量类型分为本地变量和环境变量. 本地变量