Quantcast
Channel: Altera Forums
Viewing all articles
Browse latest Browse all 19390

multiple kernel in a .cl file

$
0
0
I have a question about in appended single CL file case,
1、in followed CL file ,when only have clip_8b_kernel、ITransformOne_kernel、GetSSE_kernel and if put ITransformOne_kernel before GetSSE_kernel will get an error as followed if place them in the order of clip_8b_kernel、GetSSE_kernel、ITransformOne_kernel can compile successfully .
2、this CL file include multiple kernel and i find If compile any of these kernels separately have no error but the kernel of ITransform_C_kernel and ITransform_one_kernel Cannot exist simultaneously, if exist simultaneously it will have the error followed.


error content:
aoc: Linking with IP library ...
/build/swbuild/SJ/nightly/17.0/290/l64/p4/acl/llvm/include/acl/Analysis/LocalMemSizing.h(83) *******
Requested memsize for unavailable aspace


m_local_mem_size_per_workgroup.count(aspace) FAILED
0 libLLVM-3.0.so 0x00007f348ee2846f
1 libLLVM-3.0.so 0x00007f348ee2a3e2
2 libpthread.so.0 0x00007f348da905e0
3 libLLVM-3.0.so 0x00007f348f617ef9 acl::get_bits(llvm::Value const*, llvm::TargetData*, acl::LocalMemSizing*, bool, bool, bool) + 7289
4 libLLVM-3.0.so 0x00007f348e37a880 grif::Pass_AreaReport::add_private_var_resources() + 8800
5 libLLVM-3.0.so 0x00007f348e37c7e1 grif::Pass_AreaReport::runPass(grif::XNode*) + 913
6 libLLVM-3.0.so 0x00007f348e34c58a acl::DSDKGenerate::runOnModule(llvm::Module&) + 8010
7 libLLVM-3.0.so 0x00007f348f049fc1 llvm::MPPassManager::runOnModule(llvm::Module&) + 577
8 libLLVM-3.0.so 0x00007f348f04a16b llvm::PassManagerImpl::run(llvm::Module&) + 187
9 aocl-llc 0x000000000040be40 main + 5360
10 libc.so.6 0x00007f348ca9ec05 __libc_start_main + 245
11 aocl-llc 0x00000000004098e9
Stack dump:
0. Program arguments:
/home/****/inteldevstack/intelFPGA_pro/hld/linux64/bin/aocl-llc -march=griffin -board /home/****/inteldevstack/a10_gx_pac_ias_1_0_prq/opencl/opencl_bsp/hardware/pac_a10/board_spec.xml -dbg-info-enabled test.bc -o test.v
1. Running pass 'Generate DSDK netlist and HDL' on module 'test.bc'.
Error: Verilog generator FAILED.
Refer to test/test.log for details.

CL file:

typedef uchar uint8_t;
typedef short uint16_t;
typedef int uint32_t;
#define clip_8b_const_v_LEN 1
#define clip_8b_out_clip_8b_return_LEN 1
#define ITransformOne_in_ref_LEN 200
#define ITransformOne_in_in_LEN 200
#define ITransformOne_inout_dst_LEN 200
#define GetSSE_in_a_LEN 200
#define GetSSE_in_b_LEN 200
#define GetSSE_const_w_LEN 1
#define GetSSE_const_h_LEN 1
#define GetSSE_out_GetSSE_return_LEN 1
#define ITransform_C_in_ref_LEN 200
#define ITransform_C_in_in_LEN 200
#define ITransform_C_inout_dst_LEN 200
#define ITransform_C_const_do_two_LEN 1
/****** VP8Calc -- VP8Transform *************************************************/
#define WEBP_INLINE inline
#define BPS 32


static WEBP_INLINE uchar clip_8b(int v) {
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
}


#define STORE(x, y, v) \
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
#define kC1 (20091 + (1 << 16)) // DF: NEW
#define kC2 35468 // DF: NEW
#define MUL(a, b) (((a) * (b)) >> 16)


static WEBP_INLINE void ITransformOne(const uint8_t * ref, const uint16_t* in,
uint8_t* dst) {
int C[4 * 4], *tmp;
int i;
tmp = C;
for (i = 0; i < 4; ++i) { // vertical pass
const int a = in[0] + in[8];
const int b = in[0] - in[8];
const int c = MUL(in[4], kC2) - MUL(in[12], kC1);
const int d = MUL(in[4], kC1) + MUL(in[12], kC2);
tmp[0] = a + d;
tmp[1] = b + c;
tmp[2] = b - c;
tmp[3] = a - d;
tmp += 4;
in++;
}


tmp = C;
for (i = 0; i < 4; ++i) { // horizontal pass
const int dc = tmp[0] + 4;
const int a = dc + tmp[8];
const int b = dc - tmp[8];
const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
STORE(0, i, a + d);
STORE(1, i, b + c);
STORE(2, i, b - c);
STORE(3, i, a - d);
tmp++;
}
}


void ITransform_C(const uint8_t * ref,const uint16_t * in,uint8_t * dst, int do_two) {
ITransformOne(ref, in, dst);
if (do_two) {
ITransformOne(ref+ 4, in + 16, dst+ 4);
}


}


static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
int w, int h) {
int count = 0;
int y, x;
for (y = 0; y < h; ++y) {
// DF: TBC, may need to unroll this
for (x = 0; x < w; ++x) {
const int diff = (int)a[x] - b[x];
count += diff * diff;
}
a += BPS;
b += BPS;
}
return count;
}


__kernel void clip_8b_kernel( int v,__global uchar* restrict clip_8b_return){
printf("start func:clip_8b\n");
uchar clip_8b_return_[clip_8b_out_clip_8b_return_LEN];
clip_8b_return[0] = clip_8b(v);
for(int i = 0;i<clip_8b_out_clip_8b_return_LEN;i++){
clip_8b_return[i] = clip_8b_return_[i];
}
}
__kernel void ITransformOne_kernel(__global uint8_t * restrict ref,__global uint16_t* restrict in,__global uint8_t* restrict dst){
printf("start func:ITransformOne\n");
uint8_t ref_[ITransformOne_in_ref_LEN];
uint16_t in_[ITransformOne_in_in_LEN];
uint8_t dst_[ITransformOne_inout_dst_LEN];
for(int i = 0;i<ITransformOne_in_ref_LEN;i++){
ref_[i] = ref[i];
}
for(int i = 0;i<ITransformOne_in_in_LEN;i++){
in_[i] = in[i];
}
for(int i = 0;i<ITransformOne_inout_dst_LEN;i++){
dst_[i] = dst[i];
}
ITransformOne(ref_,in_,dst_);
for(int i = 0;i<ITransformOne_inout_dst_LEN;i++){
dst[i] = dst_[i];
}
}
__kernel void GetSSE_kernel(__global uint8_t* restrict a,__global uint8_t* restrict b, int w, int h,__global int* restrict GetSSE_return){
printf("start func:GetSSE\n");
uint8_t a_[GetSSE_in_a_LEN];
uint8_t b_[GetSSE_in_b_LEN];
int GetSSE_return_[GetSSE_out_GetSSE_return_LEN];
for(int i = 0;i<GetSSE_in_a_LEN;i++){
a_[i] = a[i];
}
for(int i = 0;i<GetSSE_in_b_LEN;i++){
b_[i] = b[i];
}
GetSSE_return[0] = GetSSE(a_,b_,w,h);
for(int i = 0;i<GetSSE_out_GetSSE_return_LEN;i++){
GetSSE_return[i] = GetSSE_return_[i];
}
}
__kernel void ITransform_C_kernel(__global uint8_t * restrict ref,__global uint16_t * restrict in,__global uint8_t * restrict dst, int do_two){
printf("start func:ITransform_C\n");
uint8_t ref_[ITransform_C_in_ref_LEN];
uint16_t in_[ITransform_C_in_in_LEN];
uint8_t dst_[ITransform_C_inout_dst_LEN];
for(int i = 0;i<ITransform_C_in_ref_LEN;i++){
ref_[i] = ref[i];
}
for(int i = 0;i<ITransform_C_in_in_LEN;i++){
in_[i] = in[i];
}
for(int i = 0;i<ITransform_C_inout_dst_LEN;i++){
dst_[i] = dst[i];
}
ITransform_C(ref_,in_,dst_,do_two);
for(int i = 0;i<ITransform_C_inout_dst_LEN;i++){
dst[i] = dst_[i];
}
}






Who can help me? Thanks in advance.

Viewing all articles
Browse latest Browse all 19390

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>