more advancement with opencl

This commit is contained in:
Robert Plummer 2017-07-30 10:50:11 -04:00
parent 2a3945efe1
commit 33e841c9da
5 changed files with 77 additions and 915 deletions

View File

@ -1,60 +1,4 @@
"use strict";
// Dependency:
var fs = require("fs");
var path = require("path");
var cwd = __dirname;
var nooocl = require('nooocl');
var CLHost = nooocl.CLHost;
var CLContext = nooocl.CLContext;
var CLBuffer = nooocl.CLBuffer;
var CLCommandQueue = nooocl.CLCommandQueue;
var NDRange = nooocl.NDRange;
var CLError = nooocl.CLError;
var fastcall = require("fastcall");
var ref = fastcall.ref;
var double = ref.types.double;
// Initialize OpenCL then we get host, device, context, and a queue
var host = CLHost.createV11();
var defs = host.cl.defs;
var platforms = host.getPlatforms();
var device;
function searchForDevice(hardware) {
platforms.forEach(function (p) {
var devices = hardware === "gpu" ? p.gpuDevices() : p.cpuDevices();
devices = devices.filter(function (d) {
// Is double precision supported?
// See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
return true;
return d.doubleFpConfig &
(defs.CL_FP_FMA | defs.CL_FP_ROUND_TO_NEAREST | defs.CL_FP_ROUND_TO_ZERO | defs.CL_FP_ROUND_TO_INF | defs.CL_FP_INF_NAN | defs.CL_FP_DENORM);
});
if (devices.length) {
device = devices[0];
}
if (device) {
return false;
}
});
}
searchForDevice("gpu");
if (!device) {
console.warn("No GPU device has been found, searching for a CPU fallback.");
searchForDevice("cpu");
}
if (!device) {
throw new Error("No capable OpenCL 1.1 device has been found.");
}
else {
console.log("Running on device: " + device.name + " - " + device.platform.name);
}
var context = new CLContext(device);
var queue = new CLCommandQueue(context, device);
'use strict';
// Initialize data on the host side:
var n = 1000;
@ -101,28 +45,28 @@ __kernel void vecAdd( __global double *a,
`;
var program = context.createProgram(kernelSourceCode);
console.log("Building ...");
console.log('Building ...');
// Building is always asynchronous in NOOOCL!
nooocl.scope(function () {
return program.build("-cl-fast-relaxed-math")
return program.build('-cl-fast-relaxed-math')
.then(function () {
var buildStatus = program.getBuildStatus(device);
var buildLog = program.getBuildLog(device);
console.log(buildLog);
if (buildStatus < 0) {
throw new CLError(buildStatus, "Build failed.");
throw new CLError(buildStatus, 'Build failed.');
}
console.log("Build completed.");
console.log('Build completed.');
// Kernel stuff:
var kernel = program.createKernel("vecAdd");
var kernel = program.createKernel('vecAdd');
kernel.setArg(0, d_a);
kernel.setArg(1, d_b);
kernel.setArg(2, d_c);
// Notice: in NOOOCL you have specify type of value arguments,
// because there is no C compatible type system exists in JavaScript.
kernel.setArg(3, n, "uint");
kernel.setArg(3, n, 'uint');
// Ranges:
// Number of work items in each local work group
@ -130,7 +74,7 @@ nooocl.scope(function () {
// Number of total work items - localSize must be devisor
var globalSize = new NDRange(Math.ceil(n / 64) * 64);
console.log("Launching the kernel.");
console.log('Launching the kernel.');
// Enqueue the kernel asynchronously
queue.enqueueNDRangeKernel(kernel, globalSize, localSize);
@ -139,7 +83,7 @@ nooocl.scope(function () {
// when the queue ends.
// We should query a waitable queue which returns an event for each enqueue operations,
// and the event's promise can be used for continuation of the control flow on the host side.
console.log("Waiting for result.");
console.log('Waiting for result.');
return queue.waitable().enqueueReadBuffer(d_c, 0, bytes, h_c).promise
.then(function() {
// Data gets back to host, we're done:
@ -150,9 +94,9 @@ nooocl.scope(function () {
sum += double.get(h_c, offset);
}
console.log("Final result: " + sum / n);
console.log('Final result: ' + sum / n);
});
});
});
console.log("(Everything after this point is asynchronous.)");
console.log('(Everything after this point is asynchronous.)');

View File

@ -1,41 +0,0 @@
const nooocl = require('nooocl');
const CLHost = nooocl.CLHost;
const host = CLHost.createV11();
const defs = host.cl.defs;
const platforms = host.getPlatforms();
let device;
function searchForDevice(hardware) {
platforms.forEach(function (p) {
let devices = hardware === 'gpu' ? p.gpuDevices() : p.cpuDevices();
devices = devices.filter(function (d) {
// Is double precision supported?
// See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
return d.doubleFpConfig &
(defs.CL_FP_FMA | defs.CL_FP_ROUND_TO_NEAREST | defs.CL_FP_ROUND_TO_ZERO | defs.CL_FP_ROUND_TO_INF | defs.CL_FP_INF_NAN | defs.CL_FP_DENORM);
});
if (devices.length) {
device = devices[0];
}
if (device) {
return false;
}
});
}
module.exports = function() {
searchForDevice('gpu');
if (!device) {
console.warn('No GPU device has been found, searching for a CPU fallback.');
searchForDevice('cpu');
}
if (!device) {
throw new Error('No capable OpenCL 1.1 device has been found.');
}
else {
console.log('Running on device: ' + device.name + ' - ' + device.platform.name);
}
return device;
};

View File

@ -9,15 +9,24 @@ const CLCommandQueue = nooocl.CLCommandQueue;
const NDRange = nooocl.NDRange;
const CLError = nooocl.CLError;
const device = require('./device');
const fastcall = require('fastcall');
const ref = fastcall.ref;
const double = ref.types.double;
// Initialize OpenCL then we get host, device, context, and a queue
const host = CLHost.createV11();
const defs = host.cl.defs;
const platforms = host.getPlatforms();
const KernelBase = require('../kernel-base');
const utils = require('../../core/utils');
const Texture = require('../../core/texture');
const fragShaderString = require('./shader-frag');
const vertShaderString = require('./shader-vert');
const kernelString = require('./kernel-string');
const canvases = [];
const canvasTexSizes = {};
let device = null;
module.exports = class OpenCLKernel extends KernelBase {
/**
@ -45,24 +54,19 @@ module.exports = class OpenCLKernel extends KernelBase {
*/
constructor(fnString, settings) {
super(fnString, settings);
this.textureCache = {};
this.threadDim = {};
this.programUniformLocationCache = {};
this.framebuffer = null;
this.buffer = null;
this.program = null;
this.functionBuilder = settings.functionBuilder;
this.outputToTexture = settings.outputToTexture;
this.endianness = utils.systemEndianness();
this.subKernelOutputTextures = null;
this.subKernelOutputVariableNames = null;
this.paramTypes = null;
this.argumentsLength = 0;
this.ext = null;
this.compiledFragShaderString = null;
this.compiledVertShaderString = null;
if (!this._openCl) this._openCl = device();
if (!this._openCl) {
this._openCl = new CLContext(this.getDevice());
}
}
/**
@ -76,50 +80,7 @@ module.exports = class OpenCLKernel extends KernelBase {
*
*/
validateOptions() {
const isReadPixel = utils.isFloatReadPixelsSupported();
if (this.floatTextures === true && !utils.OES_texture_float) {
throw 'Float textures are not supported on this browser';
} else if (this.floatOutput === true && this.floatOutputForce !== true && !isReadPixel) {
throw 'Float texture outputs are not supported on this browser';
} else if (this.floatTextures === undefined && utils.OES_texture_float) {
//NOTE: handle
this.floatTextures = true;
this.floatOutput = isReadPixel && !this.graphical;
}
if (!this.dimensions || this.dimensions.length === 0) {
if (arguments.length !== 1) {
throw 'Auto dimensions only supported for kernels with only one input';
}
const argType = utils.getArgumentType(arguments[0]);
if (argType === 'Array') {
this.dimensions = utils.getDimensions(argType);
} else if (argType === 'Texture') {
this.dimensions = arguments[0].dimensions;
} else {
throw 'Auto dimensions not supported for input type: ' + argType;
}
}
this.texSize = utils.dimToTexSize({
floatTextures: this.floatTextures,
floatOutput: this.floatOutput
}, this.dimensions, true);
if (this.graphical) {
if (this.dimensions.length !== 2) {
throw 'Output must have 2 dimensions on graphical mode';
}
if (this.floatOutput) {
throw 'Cannot use graphical mode and float output at the same time';
}
this.texSize = utils.clone(this.dimensions);
} else if (this.floatOutput === undefined && utils.OES_texture_float) {
this.floatOutput = true;
}
}
/**
@ -135,72 +96,21 @@ module.exports = class OpenCLKernel extends KernelBase {
build() {
this.validateOptions();
this.setupParams(arguments);
const texSize = this.texSize;
const gl = this._openCl;
const canvas = this._canvas;
let canvasIndex = canvases.indexOf(canvas);
if (canvasIndex === -1) {
canvasIndex = canvases.length;
canvases.push(canvas);
canvasTexSizes[canvasIndex] = [];
}
const cl = this._openCl;
this.queue = new CLCommandQueue(cl, device);
const sizes = canvasTexSizes[canvasIndex];
sizes.push(texSize);
const maxTexSize = [0, 0];
for (let i = 0; i < sizes.length; i++) {
const size = sizes[i];
if (maxTexSize[0] < size[0]) {
maxTexSize[0] = size[0];
}
if (maxTexSize[1] < size[1]) {
maxTexSize[1] = size[1];
}
}
const compiledKernelString = `#pragma OPENCL EXTENSION cl_khr_fp64 : enable
${ this._addKernels() }
${ this.functionBuilder.getPrototypeString('kernel') }`;
gl.viewport(0, 0, maxTexSize[0], maxTexSize[1]);
const threadDim = this.threadDim = utils.clone(this.dimensions);
while (threadDim.length < 3) {
threadDim.push(1);
}
if (this.functionBuilder) this._addKernels();
const compiledVertShaderString = this._getVertShaderString(arguments);
const vertShader = gl.createShader(gl.VERTEX_SHADER);
gl.shaderSource(vertShader, compiledVertShaderString);
gl.compileShader(vertShader);
const compiledFragShaderString = this._getFragShaderString(arguments);
const fragShader = gl.createShader(gl.FRAGMENT_SHADER);
gl.shaderSource(fragShader, compiledFragShaderString);
gl.compileShader(fragShader);
if (!gl.getShaderParameter(vertShader, gl.COMPILE_STATUS)) {
console.log(compiledVertShaderString);
console.error('An error occurred compiling the shaders: ' + gl.getShaderInfoLog(vertShader));
throw 'Error compiling vertex shader';
}
if (!gl.getShaderParameter(fragShader, gl.COMPILE_STATUS)) {
console.log(compiledFragShaderString);
console.error('An error occurred compiling the shaders: ' + gl.getShaderInfoLog(fragShader));
throw 'Error compiling fragment shader';
}
if (this.debug) {
if (this.debug) {
console.log('Options:');
console.dir(this);
console.log('GLSL Shader Output:');
console.log(compiledFragShaderString);
console.log('OpenCL Shader Output:');
console.log(compiledKernelString);
}
const program = this.program = gl.createProgram();
gl.attachShader(program, vertShader);
gl.attachShader(program, fragShader);
gl.linkProgram(program);
this.framebuffer = gl.createFramebuffer();
this.framebuffer.width = texSize[0];
this.framebuffer.height = texSize[1];
this.program = cl.createProgram(compiledKernelString);
return this;
}
@ -501,37 +411,6 @@ module.exports = class OpenCLKernel extends KernelBase {
return location;
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getFragShaderArtifactMap
*
* @desc Generate Shader artifacts for the kernel program.
* The final object contains HEADER, KERNEL, MAIN_RESULT, and others.
*
* @param {Array} args - The actual parameters sent to the Kernel
*
* @returns {Object} An object containing the Shader Artifacts(CONSTANTS, HEADER, KERNEL, etc.)
*
*/
_getFragShaderArtifactMap(args) {
return {
HEADER: this._getHeaderString(),
LOOP_MAX: this._getLoopMaxString(),
CONSTANTS: this._getConstantsString(),
DECODE32_ENDIANNESS: this._getDecode32EndiannessString(),
ENCODE32_ENDIANNESS: this._getEncode32EndiannessString(),
GET_WRAPAROUND: this._getGetWraparoundString(),
GET_TEXTURE_CHANNEL: this._getGetTextureChannelString(),
GET_TEXTURE_INDEX: this._getGetTextureIndexString(),
GET_RESULT: this._getGetResultString(),
MAIN_PARAMS: this._getMainParamsString(args),
MAIN_CONSTANTS: this._getMainConstantsString(),
KERNEL: this._getKernelString(),
MAIN_RESULT: this._getMainResultString()
};
}
/**
* @memberOf OpenCLKernel#
* @function
@ -546,489 +425,26 @@ module.exports = class OpenCLKernel extends KernelBase {
*
*/
_addArgument(value, type, name) {
const gl = this._openCl;
const cl = this._openCl;
const argumentTexture = this.getArgumentTexture(name);
if (value.constructor === Texture) {
type = 'Texture';
}
switch (type) {
case 'Array':
{
const dim = utils.getDimensions(value, true);
const size = utils.dimToTexSize({
floatTextures: this.floatTextures,
floatOutput: this.floatOutput
}, dim);
// Initialize data on the host side:
const n = 1000;
const bytes = n * double.size;
const pointer = new Buffer(n * double.size);
gl.activeTexture(gl.TEXTURE0 + this.argumentsLength);
gl.bindTexture(gl.TEXTURE_2D, argumentTexture);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST);
// // Initialize vectors on host
// for (var i = 0; i < n; i++) {
// var offset = i * double.size;
// double.set(h_a, offset, 0.1 + 0.2);
// double.set(h_b, offset, 0);
// }
let length = size[0] * size[1];
if (this.floatTextures) {
length *= 4;
}
const valuesFlat = new Float32Array(length);
utils.flattenTo(value, valuesFlat);
let buffer;
if (this.floatTextures) {
buffer = new Float32Array(valuesFlat);
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, size[0], size[1], 0, gl.RGBA, gl.FLOAT, buffer);
} else {
buffer = new Uint8Array((new Float32Array(valuesFlat)).buffer);
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, size[0], size[1], 0, gl.RGBA, gl.UNSIGNED_BYTE, buffer);
}
const loc = this.getUniformLocation('user_' + name);
const locSize = this.getUniformLocation('user_' + name + 'Size');
const dimLoc = this.getUniformLocation('user_' + name + 'Dim');
if (!this.hardcodeConstants) {
gl.uniform3fv(dimLoc, dim);
gl.uniform2fv(locSize, size);
}
gl.uniform1i(loc, this.argumentsLength);
break;
}
case 'Number':
{
const loc = this.getUniformLocation('user_' + name);
gl.uniform1f(loc, value);
break;
}
case 'Texture':
{
const inputTexture = value;
const dim = utils.getDimensions(inputTexture.dimensions, true);
const size = inputTexture.size;
gl.activeTexture(gl.TEXTURE0 + this.argumentsLength);
gl.bindTexture(gl.TEXTURE_2D, inputTexture.texture);
const loc = this.getUniformLocation('user_' + name);
const locSize = this.getUniformLocation('user_' + name + 'Size');
const dimLoc = this.getUniformLocation('user_' + name + 'Dim');
gl.uniform3fv(dimLoc, dim);
gl.uniform2fv(locSize, size);
gl.uniform1i(loc, this.argumentsLength);
break;
}
default:
throw 'Input type not supported (WebGL): ' + value;
}
const buffer = new CLBuffer(cl, defs.CL_MEM_READ_ONLY, bytes);
this.queue.enqueueWriteBuffer(buffer, 0, bytes, pointer);
this.kernel.setArg(this.argumentsLength, buffer);
this.argumentsLength++;
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getHeaderString
*
* @desc Get the header string for the program.
* This returns an empty string if no sub-kernels are defined.
*
* @returns {String} result
*
*/
_getHeaderString() {
return (
this.subKernels !== null || this.subKernelProperties !== null ?
//webgl2 '#version 300 es\n' :
'#extension GL_EXT_draw_buffers : require\n' :
''
);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getLoopMaxString
*
* @desc Get the maximum loop size String.
*
* @returns {String} result
*
*/
_getLoopMaxString() {
return (
this.loopMaxIterations ?
` ${ parseInt(this.loopMaxIterations) }.0;\n` :
' 100.0;\n'
);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getConstantsString
*
* @desc Generate transpiled glsl Strings for constant parameters sent to a kernel
*
* They can be defined by *hardcodeConstants*
*
* @returns {String} result
*
*/
_getConstantsString() {
const result = [];
const threadDim = this.threadDim;
const texSize = this.texSize;
if (this.hardcodeConstants) {
result.push(
`highp vec3 uOutputDim = vec3(${ threadDim[0] },${ threadDim[1] }, ${ threadDim[2] })`,
`highp vec2 uTexSize = vec2(${ texSize[0] }, ${ texSize[1] })`
);
} else {
result.push(
'uniform highp vec3 uOutputDim',
'uniform highp vec2 uTexSize'
);
}
return this._linesToString(result);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getTextureCoordinate
*
* @desc Get texture coordinate string for the program
*
* @returns {String} result
*
*/
_getTextureCoordinate() {
const names = this.subKernelOutputVariableNames;
if (names === null || names.length < 1) {
return 'varying highp vec2 vTexCoord;\n';
} else {
return 'out highp vec2 vTexCoord;\n';
}
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getDecode32EndiannessString
*
* @desc Get Decode32 endianness string for little-endian and big-endian
*
* @returns {String} result
*
*/
_getDecode32EndiannessString() {
return (
this.endianness === 'LE' ?
'' :
' rgba.rgba = rgba.abgr;\n'
);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getEncode32EndiannessString
*
* @desc Get Encode32 endianness string for little-endian and big-endian
*
* @returns {String} result
*
*/
_getEncode32EndiannessString() {
return (
this.endianness === 'LE' ?
'' :
' rgba.rgba = rgba.abgr;\n'
);
}
/**
* @function
* @memberOf OpenCLKernel#
* @name _getGetWraparoundString
*
* @returns {String} wraparound string
*/
_getGetWraparoundString() {
return (
this.wraparound ?
' xyz = mod(xyz, texDim);\n' :
''
);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getGetTextureChannelString
*
*/
_getGetTextureChannelString() {
if (!this.floatTextures) return '';
return this._linesToString([
' int channel = int(integerMod(index, 4.0))',
' index = float(int(index) / 4)'
]);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getGetTextureIndexString
*
* @desc Get generic texture index string, if floatTextures flag is true.
*
* @example
* ' index = float(int(index)/4);\n'
*
*/
_getGetTextureIndexString() {
return (
this.floatTextures ?
' index = float(int(index)/4);\n' :
''
);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getGetResultString
*
*/
_getGetResultString() {
if (!this.floatTextures) return ' return decode32(texel);\n';
return this._linesToString([
' if (channel == 0) return texel.r',
' if (channel == 1) return texel.g',
' if (channel == 2) return texel.b',
' if (channel == 3) return texel.a'
]);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getMainParamsString
*
* @desc Generate transpiled glsl Strings for user-defined parameters sent to a kernel
*
* @param {Array} args - The actual parameters sent to the Kernel
*
* @returns {String} result
*
*/
_getMainParamsString(args) {
const result = [];
const paramTypes = this.paramTypes;
const paramNames = this.paramNames;
for (let i = 0; i < paramNames.length; i++) {
const param = args[i];
const paramName = paramNames[i];
const paramType = paramTypes[i];
if (this.hardcodeConstants) {
if (paramType === 'Array' || paramType === 'Texture') {
const paramDim = utils.getDimensions(param, true);
const paramSize = utils.dimToTexSize({
floatTextures: this.floatTextures,
floatOutput: this.floatOutput
}, paramDim);
result.push(
`uniform highp sampler2D user_${ paramName }`,
`highp vec2 user_${ paramName }Size = vec2(${ paramSize[0] }.0, ${ paramSize[1] }.0)`,
`highp vec3 user_${ paramName }Dim = vec3(${ paramDim[0] }.0, ${ paramDim[1]}.0, ${ paramDim[2] }.0)`
);
} else if (paramType === 'Number' && Number.isInteger(param)) {
result.push(`highp float user_${ paramName } = ${ param }.0`);
} else if (paramType === 'Number') {
result.push(`highp float user_${ paramName } = ${ param }`);
}
} else {
if (paramType === 'Array' || paramType === 'Texture') {
result.push(
`uniform highp sampler2D user_${ paramName }`,
`uniform highp vec2 user_${ paramName }Size`,
`uniform highp vec3 user_${ paramName }Dim`
);
} else if (paramType === 'Number') {
result.push(`uniform highp float user_${ paramName }`);
}
}
}
return this._linesToString(result);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getMainConstantsString
*
*/
_getMainConstantsString() {
const result = [];
if (this.constants) {
for (let name in this.constants) {
if (!this.constants.hasOwnProperty(name)) continue;
let value = parseFloat(this.constants[name]);
if (Number.isInteger(value)) {
result.push('const float constants_' + name + ' = ' + parseInt(value) + '.0');
} else {
result.push('const float constants_' + name + ' = ' + parseFloat(value));
}
}
}
return this._linesToString(result);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getKernelString
*
* @desc Get Kernel program string (in *glsl*) for a kernel.
*
* @returns {String} result
*
*/
_getKernelString() {
const result = [];
const names = this.subKernelOutputVariableNames;
if (names !== null) {
result.push('highp float kernelResult = 0.0');
for (let i = 0; i < names.length; i++) {
result.push(
`highp float ${ names[i] } = 0.0`
);
}
/* this is v2 prep
result.push('highp float kernelResult = 0.0');
result.push('layout(location = 0) out highp float fradData0 = 0.0');
for (let i = 0; i < names.length; i++) {
result.push(
`highp float ${ names[i] } = 0.0`,
`layout(location = ${ i + 1 }) out highp float fragData${ i + 1 } = 0.0`
);
}*/
} else {
result.push('highp float kernelResult = 0.0');
}
return this._linesToString(result) + this.functionBuilder.getPrototypeString('kernel');
}
/**
*
* @memberOf OpenCLKernel#
* @function
* @name _getMainResultString
*
* @desc Get main result string with checks for floatOutput, graphical, subKernelsOutputs, etc.
*
* @returns {String} result
*
*/
_getMainResultString() {
const names = this.subKernelOutputVariableNames;
const result = [];
if (this.floatOutput) {
result.push(' index *= 4.0');
}
if (this.graphical) {
result.push(
' threadId = indexTo3D(index, uOutputDim)',
' kernel()',
' gl_FragColor = actualColor'
);
} else if (this.floatOutput) {
result.push(
' threadId = indexTo3D(index, uOutputDim)',
' kernel()',
' gl_FragColor.r = kernelResult',
' index += 1.0',
' threadId = indexTo3D(index, uOutputDim)',
' kernel()',
' gl_FragColor.g = kernelResult',
' index += 1.0',
' threadId = indexTo3D(index, uOutputDim)',
' kernel()',
' gl_FragColor.b = kernelResult',
' index += 1.0',
' threadId = indexTo3D(index, uOutputDim)',
' kernel()',
' gl_FragColor.a = kernelResult'
);
} else if (names !== null) {
result.push(' threadId = indexTo3D(index, uOutputDim)');
result.push(' kernel()');
result.push(' gl_FragData[0] = encode32(kernelResult)');
for (let i = 0; i < names.length; i++) {
result.push(` gl_FragData[${ i + 1 }] = encode32(${ names[i] })`);
}
/* this is v2 prep
* result.push(' kernel()');
result.push(' fragData0 = encode32(kernelResult)');
for (let i = 0; i < names.length; i++) {
result.push(` fragData${ i + 1 } = encode32(${ names[i] })`);
}*/
} else {
result.push(
' threadId = indexTo3D(index, uOutputDim)',
' kernel()',
' gl_FragColor = encode32(kernelResult)'
);
}
return this._linesToString(result);
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _linesToString
*
* @param {Array} lines - An Array of strings
*
* @returns {String} Single combined String, seperated by *\n*
*
*/
_linesToString(lines) {
if (lines.length > 0) {
return lines.join(';\n') + ';\n';
} else {
return '\n';
}
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _replaceArtifacts
*
* @param {String} src - Shader string
* @param {Array} map - Variables/Constants associated with shader
*
*/
_replaceArtifacts(src, map) {
return src.replace(/[ ]*__([A-Z]+[0-9]*([_]?[A-Z])*)__;\n/g, (match, artifact) => {
if (map.hasOwnProperty(artifact)) {
return map[artifact];
}
throw `unhandled artifact ${ artifact }`;
});
}
/**
* @memberOf OpenCLKernel#
* @function
@ -1086,47 +502,35 @@ module.exports = class OpenCLKernel extends KernelBase {
}
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getFragShaderString
*
* @desc Get the fragment shader String.
* If the String hasn't been compiled yet,
* then this method compiles it as well
*
* @param {Array} args - The actual parameters sent to the Kernel
*
* @returns {String} Fragment Shader string
*
*/
_getFragShaderString(args) {
if (this.compiledFragShaderString !== null) {
return this.compiledFragShaderString;
}
return this.compiledFragShaderString = this._replaceArtifacts(fragShaderString, this._getFragShaderArtifactMap(args));
}
/**
* @memberOf OpenCLKernel#
* @function
* @name _getVertShaderString
*
* @desc Get the vertical shader String
*
* @param {Array} args - The actual parameters sent to the Kernel
*
* @returns {String} Vertical Shader string
*
*/
_getVertShaderString(args) {
if (this.compiledVertShaderString !== null) {
return this.compiledVertShaderString;
}
//TODO: webgl2 compile like frag shader
return this.compiledVertShaderString = vertShaderString;
}
getDevice() {
if (device !== null) return device;
const mode = this.mode;
for (let platformIndex = 0; platformIndex < platforms.length; platformIndex++) {
const devices = (mode === 'gpu'
? platforms[platformIndex].gpuDevices()
: platforms[platformIndex].cpuDevices());
for (let deviceIndex = 0; deviceIndex < devices.length; deviceIndex++) {
// Is double precision supported?
// See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
if (devices[deviceIndex].doubleFpConfig
& (
defs.CL_FP_FMA
| defs.CL_FP_ROUND_TO_NEAREST
| defs.CL_FP_ROUND_TO_ZERO
| defs.CL_FP_ROUND_TO_INF
| defs.CL_FP_INF_NAN
| defs.CL_FP_DENORM
)) {
return devices[deviceIndex];
}
}
if (mode === 'auto') {
console.warn('No GPU device has been found, searching for a CPU fallback.');
return this.getDevice('cpu');
}
}
}
/**
* @memberOf OpenCLKernel#
* @function

View File

@ -1,132 +0,0 @@
module.exports = `__HEADER__;
precision highp float;
precision highp int;
precision highp sampler2D;
const float LOOP_MAX = __LOOP_MAX__;
#define EPSILON 0.0000001;
__CONSTANTS__;
varying highp vec2 vTexCoord;
vec4 round(vec4 x) {
return floor(x + 0.5);
}
highp float round(highp float x) {
return floor(x + 0.5);
}
vec2 integerMod(vec2 x, float y) {
vec2 res = floor(mod(x, y));
return res * step(1.0 - floor(y), -res);
}
vec3 integerMod(vec3 x, float y) {
vec3 res = floor(mod(x, y));
return res * step(1.0 - floor(y), -res);
}
vec4 integerMod(vec4 x, vec4 y) {
vec4 res = floor(mod(x, y));
return res * step(1.0 - floor(y), -res);
}
highp float integerMod(highp float x, highp float y) {
highp float res = floor(mod(x, y));
return res * (res > floor(y) - 1.0 ? 0.0 : 1.0);
}
highp int integerMod(highp int x, highp int y) {
return int(integerMod(float(x), float(y)));
}
// Here be dragons!
// DO NOT OPTIMIZE THIS CODE
// YOU WILL BREAK SOMETHING ON SOMEBODY\'S MACHINE
// LEAVE IT AS IT IS, LEST YOU WASTE YOUR OWN TIME
const vec2 MAGIC_VEC = vec2(1.0, -256.0);
const vec4 SCALE_FACTOR = vec4(1.0, 256.0, 65536.0, 0.0);
const vec4 SCALE_FACTOR_INV = vec4(1.0, 0.00390625, 0.0000152587890625, 0.0); // 1, 1/256, 1/65536
highp float decode32(highp vec4 rgba) {
__DECODE32_ENDIANNESS__;
rgba *= 255.0;
vec2 gte128;
gte128.x = rgba.b >= 128.0 ? 1.0 : 0.0;
gte128.y = rgba.a >= 128.0 ? 1.0 : 0.0;
float exponent = 2.0 * rgba.a - 127.0 + dot(gte128, MAGIC_VEC);
float res = exp2(round(exponent));
rgba.b = rgba.b - 128.0 * gte128.x;
res = dot(rgba, SCALE_FACTOR) * exp2(round(exponent-23.0)) + res;
res *= gte128.y * -2.0 + 1.0;
return res;
}
highp vec4 encode32(highp float f) {
highp float F = abs(f);
highp float sign = f < 0.0 ? 1.0 : 0.0;
highp float exponent = floor(log2(F));
highp float mantissa = (exp2(-exponent) * F);
// exponent += floor(log2(mantissa));
vec4 rgba = vec4(F * exp2(23.0-exponent)) * SCALE_FACTOR_INV;
rgba.rg = integerMod(rgba.rg, 256.0);
rgba.b = integerMod(rgba.b, 128.0);
rgba.a = exponent*0.5 + 63.5;
rgba.ba += vec2(integerMod(exponent+127.0, 2.0), sign) * 128.0;
rgba = floor(rgba);
rgba *= 0.003921569; // 1/255
__ENCODE32_ENDIANNESS__;
return rgba;
}
// Dragons end here
highp float index;
highp vec3 threadId;
highp vec3 indexTo3D(highp float idx, highp vec3 texDim) {
highp float z = floor(idx / (texDim.x * texDim.y));
idx -= z * texDim.x * texDim.y;
highp float y = floor(idx / texDim.x);
highp float x = integerMod(idx, texDim.x);
return vec3(x, y, z);
}
highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float z, highp float y, highp float x) {
highp vec3 xyz = vec3(x, y, z);
xyz = floor(xyz + 0.5);
__GET_WRAPAROUND__;
highp float index = round(xyz.x + texDim.x * (xyz.y + texDim.y * xyz.z));
__GET_TEXTURE_CHANNEL__;
highp float w = round(texSize.x);
vec2 st = vec2(integerMod(index, w), float(int(index) / int(w))) + 0.5;
__GET_TEXTURE_INDEX__;
highp vec4 texel = texture2D(tex, st / texSize);
__GET_RESULT__;
}
highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float y, highp float x) {
return get(tex, texSize, texDim, 0.0, y, x);
}
highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float x) {
return get(tex, texSize, texDim, 0.0, 0.0, x);
}
highp vec4 actualColor;
void color(float r, float g, float b, float a) {
actualColor = vec4(r,g,b,a);
}
void color(float r, float g, float b) {
color(r,g,b,1.0);
}
__MAIN_PARAMS__;
__MAIN_CONSTANTS__;
__KERNEL__;
void main(void) {
index = floor(vTexCoord.s * float(uTexSize.x)) + floor(vTexCoord.t * float(uTexSize.y)) * uTexSize.x;
__MAIN_RESULT__;
}`;

View File

@ -1,13 +0,0 @@
module.exports = `precision highp float;
precision highp int;
precision highp sampler2D;
attribute highp vec2 aPos;
attribute highp vec2 aTexCoord;
varying highp vec2 vTexCoord;
void main(void) {
gl_Position = vec4(aPos, 0, 1);
vTexCoord = aTexCoord;
}`;