diff --git a/example.js b/example.js index 2202b8cd..2e6d6ebf 100644 --- a/example.js +++ b/example.js @@ -1,60 +1,4 @@ -"use strict"; - -// Dependency: -var fs = require("fs"); -var path = require("path"); -var cwd = __dirname; -var nooocl = require('nooocl'); -var CLHost = nooocl.CLHost; -var CLContext = nooocl.CLContext; -var CLBuffer = nooocl.CLBuffer; -var CLCommandQueue = nooocl.CLCommandQueue; -var NDRange = nooocl.NDRange; -var CLError = nooocl.CLError; -var fastcall = require("fastcall"); -var ref = fastcall.ref; -var double = ref.types.double; - -// Initialize OpenCL then we get host, device, context, and a queue -var host = CLHost.createV11(); -var defs = host.cl.defs; - -var platforms = host.getPlatforms(); -var device; -function searchForDevice(hardware) { - platforms.forEach(function (p) { - var devices = hardware === "gpu" ? p.gpuDevices() : p.cpuDevices(); - devices = devices.filter(function (d) { - // Is double precision supported? - // See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html - return true; - return d.doubleFpConfig & - (defs.CL_FP_FMA | defs.CL_FP_ROUND_TO_NEAREST | defs.CL_FP_ROUND_TO_ZERO | defs.CL_FP_ROUND_TO_INF | defs.CL_FP_INF_NAN | defs.CL_FP_DENORM); - }); - if (devices.length) { - device = devices[0]; - } - if (device) { - return false; - } - }); -} - -searchForDevice("gpu"); -if (!device) { - console.warn("No GPU device has been found, searching for a CPU fallback."); - searchForDevice("cpu"); -} - -if (!device) { - throw new Error("No capable OpenCL 1.1 device has been found."); -} -else { - console.log("Running on device: " + device.name + " - " + device.platform.name); -} - -var context = new CLContext(device); -var queue = new CLCommandQueue(context, device); +'use strict'; // Initialize data on the host side: var n = 1000; @@ -101,28 +45,28 @@ __kernel void vecAdd( __global double *a, `; var program = context.createProgram(kernelSourceCode); -console.log("Building ..."); +console.log('Building ...'); // Building is always asynchronous in NOOOCL! nooocl.scope(function () { - return program.build("-cl-fast-relaxed-math") + return program.build('-cl-fast-relaxed-math') .then(function () { var buildStatus = program.getBuildStatus(device); var buildLog = program.getBuildLog(device); console.log(buildLog); if (buildStatus < 0) { - throw new CLError(buildStatus, "Build failed."); + throw new CLError(buildStatus, 'Build failed.'); } - console.log("Build completed."); + console.log('Build completed.'); // Kernel stuff: - var kernel = program.createKernel("vecAdd"); + var kernel = program.createKernel('vecAdd'); kernel.setArg(0, d_a); kernel.setArg(1, d_b); kernel.setArg(2, d_c); // Notice: in NOOOCL you have specify type of value arguments, // because there is no C compatible type system exists in JavaScript. - kernel.setArg(3, n, "uint"); + kernel.setArg(3, n, 'uint'); // Ranges: // Number of work items in each local work group @@ -130,7 +74,7 @@ nooocl.scope(function () { // Number of total work items - localSize must be devisor var globalSize = new NDRange(Math.ceil(n / 64) * 64); - console.log("Launching the kernel."); + console.log('Launching the kernel.'); // Enqueue the kernel asynchronously queue.enqueueNDRangeKernel(kernel, globalSize, localSize); @@ -139,7 +83,7 @@ nooocl.scope(function () { // when the queue ends. // We should query a waitable queue which returns an event for each enqueue operations, // and the event's promise can be used for continuation of the control flow on the host side. - console.log("Waiting for result."); + console.log('Waiting for result.'); return queue.waitable().enqueueReadBuffer(d_c, 0, bytes, h_c).promise .then(function() { // Data gets back to host, we're done: @@ -150,9 +94,9 @@ nooocl.scope(function () { sum += double.get(h_c, offset); } - console.log("Final result: " + sum / n); + console.log('Final result: ' + sum / n); }); }); }); -console.log("(Everything after this point is asynchronous.)"); \ No newline at end of file +console.log('(Everything after this point is asynchronous.)'); \ No newline at end of file diff --git a/src/backend/open-cl/device.js b/src/backend/open-cl/device.js deleted file mode 100644 index e45de965..00000000 --- a/src/backend/open-cl/device.js +++ /dev/null @@ -1,41 +0,0 @@ -const nooocl = require('nooocl'); -const CLHost = nooocl.CLHost; -const host = CLHost.createV11(); -const defs = host.cl.defs; -const platforms = host.getPlatforms(); -let device; - -function searchForDevice(hardware) { - platforms.forEach(function (p) { - let devices = hardware === 'gpu' ? p.gpuDevices() : p.cpuDevices(); - devices = devices.filter(function (d) { - // Is double precision supported? - // See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html - return d.doubleFpConfig & - (defs.CL_FP_FMA | defs.CL_FP_ROUND_TO_NEAREST | defs.CL_FP_ROUND_TO_ZERO | defs.CL_FP_ROUND_TO_INF | defs.CL_FP_INF_NAN | defs.CL_FP_DENORM); - }); - if (devices.length) { - device = devices[0]; - } - if (device) { - return false; - } - }); -} - -module.exports = function() { - searchForDevice('gpu'); - if (!device) { - console.warn('No GPU device has been found, searching for a CPU fallback.'); - searchForDevice('cpu'); - } - - if (!device) { - throw new Error('No capable OpenCL 1.1 device has been found.'); - } - else { - console.log('Running on device: ' + device.name + ' - ' + device.platform.name); - } - - return device; -}; \ No newline at end of file diff --git a/src/backend/open-cl/kernel.js b/src/backend/open-cl/kernel.js index 7178aabd..b2198dec 100644 --- a/src/backend/open-cl/kernel.js +++ b/src/backend/open-cl/kernel.js @@ -9,15 +9,24 @@ const CLCommandQueue = nooocl.CLCommandQueue; const NDRange = nooocl.NDRange; const CLError = nooocl.CLError; -const device = require('./device'); + +const fastcall = require('fastcall'); +const ref = fastcall.ref; +const double = ref.types.double; + +// Initialize OpenCL then we get host, device, context, and a queue +const host = CLHost.createV11(); +const defs = host.cl.defs; +const platforms = host.getPlatforms(); + const KernelBase = require('../kernel-base'); const utils = require('../../core/utils'); const Texture = require('../../core/texture'); const fragShaderString = require('./shader-frag'); const vertShaderString = require('./shader-vert'); const kernelString = require('./kernel-string'); -const canvases = []; -const canvasTexSizes = {}; +let device = null; + module.exports = class OpenCLKernel extends KernelBase { /** @@ -45,24 +54,19 @@ module.exports = class OpenCLKernel extends KernelBase { */ constructor(fnString, settings) { super(fnString, settings); - this.textureCache = {}; - this.threadDim = {}; - this.programUniformLocationCache = {}; - this.framebuffer = null; - this.buffer = null; this.program = null; this.functionBuilder = settings.functionBuilder; this.outputToTexture = settings.outputToTexture; - this.endianness = utils.systemEndianness(); this.subKernelOutputTextures = null; this.subKernelOutputVariableNames = null; this.paramTypes = null; this.argumentsLength = 0; - this.ext = null; this.compiledFragShaderString = null; this.compiledVertShaderString = null; - if (!this._openCl) this._openCl = device(); + if (!this._openCl) { + this._openCl = new CLContext(this.getDevice()); + } } /** @@ -76,50 +80,7 @@ module.exports = class OpenCLKernel extends KernelBase { * */ validateOptions() { - const isReadPixel = utils.isFloatReadPixelsSupported(); - if (this.floatTextures === true && !utils.OES_texture_float) { - throw 'Float textures are not supported on this browser'; - } else if (this.floatOutput === true && this.floatOutputForce !== true && !isReadPixel) { - throw 'Float texture outputs are not supported on this browser'; - } else if (this.floatTextures === undefined && utils.OES_texture_float) { - //NOTE: handle - this.floatTextures = true; - this.floatOutput = isReadPixel && !this.graphical; - } - if (!this.dimensions || this.dimensions.length === 0) { - if (arguments.length !== 1) { - throw 'Auto dimensions only supported for kernels with only one input'; - } - - const argType = utils.getArgumentType(arguments[0]); - if (argType === 'Array') { - this.dimensions = utils.getDimensions(argType); - } else if (argType === 'Texture') { - this.dimensions = arguments[0].dimensions; - } else { - throw 'Auto dimensions not supported for input type: ' + argType; - } - } - - this.texSize = utils.dimToTexSize({ - floatTextures: this.floatTextures, - floatOutput: this.floatOutput - }, this.dimensions, true); - - if (this.graphical) { - if (this.dimensions.length !== 2) { - throw 'Output must have 2 dimensions on graphical mode'; - } - - if (this.floatOutput) { - throw 'Cannot use graphical mode and float output at the same time'; - } - - this.texSize = utils.clone(this.dimensions); - } else if (this.floatOutput === undefined && utils.OES_texture_float) { - this.floatOutput = true; - } } /** @@ -135,72 +96,21 @@ module.exports = class OpenCLKernel extends KernelBase { build() { this.validateOptions(); this.setupParams(arguments); - const texSize = this.texSize; - const gl = this._openCl; - const canvas = this._canvas; - let canvasIndex = canvases.indexOf(canvas); - if (canvasIndex === -1) { - canvasIndex = canvases.length; - canvases.push(canvas); - canvasTexSizes[canvasIndex] = []; - } + const cl = this._openCl; + this.queue = new CLCommandQueue(cl, device); - const sizes = canvasTexSizes[canvasIndex]; - sizes.push(texSize); - const maxTexSize = [0, 0]; - for (let i = 0; i < sizes.length; i++) { - const size = sizes[i]; - if (maxTexSize[0] < size[0]) { - maxTexSize[0] = size[0]; - } - if (maxTexSize[1] < size[1]) { - maxTexSize[1] = size[1]; - } - } + const compiledKernelString = `#pragma OPENCL EXTENSION cl_khr_fp64 : enable + ${ this._addKernels() } + ${ this.functionBuilder.getPrototypeString('kernel') }`; - gl.viewport(0, 0, maxTexSize[0], maxTexSize[1]); - const threadDim = this.threadDim = utils.clone(this.dimensions); - while (threadDim.length < 3) { - threadDim.push(1); - } - - if (this.functionBuilder) this._addKernels(); - - const compiledVertShaderString = this._getVertShaderString(arguments); - const vertShader = gl.createShader(gl.VERTEX_SHADER); - gl.shaderSource(vertShader, compiledVertShaderString); - gl.compileShader(vertShader); - - const compiledFragShaderString = this._getFragShaderString(arguments); - const fragShader = gl.createShader(gl.FRAGMENT_SHADER); - gl.shaderSource(fragShader, compiledFragShaderString); - gl.compileShader(fragShader); - - if (!gl.getShaderParameter(vertShader, gl.COMPILE_STATUS)) { - console.log(compiledVertShaderString); - console.error('An error occurred compiling the shaders: ' + gl.getShaderInfoLog(vertShader)); - throw 'Error compiling vertex shader'; - } - if (!gl.getShaderParameter(fragShader, gl.COMPILE_STATUS)) { - console.log(compiledFragShaderString); - console.error('An error occurred compiling the shaders: ' + gl.getShaderInfoLog(fragShader)); - throw 'Error compiling fragment shader'; - } - - if (this.debug) { + if (this.debug) { console.log('Options:'); console.dir(this); - console.log('GLSL Shader Output:'); - console.log(compiledFragShaderString); + console.log('OpenCL Shader Output:'); + console.log(compiledKernelString); } - const program = this.program = gl.createProgram(); - gl.attachShader(program, vertShader); - gl.attachShader(program, fragShader); - gl.linkProgram(program); - this.framebuffer = gl.createFramebuffer(); - this.framebuffer.width = texSize[0]; - this.framebuffer.height = texSize[1]; + this.program = cl.createProgram(compiledKernelString); return this; } @@ -501,37 +411,6 @@ module.exports = class OpenCLKernel extends KernelBase { return location; } - /** - * @memberOf OpenCLKernel# - * @function - * @name _getFragShaderArtifactMap - * - * @desc Generate Shader artifacts for the kernel program. - * The final object contains HEADER, KERNEL, MAIN_RESULT, and others. - * - * @param {Array} args - The actual parameters sent to the Kernel - * - * @returns {Object} An object containing the Shader Artifacts(CONSTANTS, HEADER, KERNEL, etc.) - * - */ - _getFragShaderArtifactMap(args) { - return { - HEADER: this._getHeaderString(), - LOOP_MAX: this._getLoopMaxString(), - CONSTANTS: this._getConstantsString(), - DECODE32_ENDIANNESS: this._getDecode32EndiannessString(), - ENCODE32_ENDIANNESS: this._getEncode32EndiannessString(), - GET_WRAPAROUND: this._getGetWraparoundString(), - GET_TEXTURE_CHANNEL: this._getGetTextureChannelString(), - GET_TEXTURE_INDEX: this._getGetTextureIndexString(), - GET_RESULT: this._getGetResultString(), - MAIN_PARAMS: this._getMainParamsString(args), - MAIN_CONSTANTS: this._getMainConstantsString(), - KERNEL: this._getKernelString(), - MAIN_RESULT: this._getMainResultString() - }; - } - /** * @memberOf OpenCLKernel# * @function @@ -546,489 +425,26 @@ module.exports = class OpenCLKernel extends KernelBase { * */ _addArgument(value, type, name) { - const gl = this._openCl; + const cl = this._openCl; const argumentTexture = this.getArgumentTexture(name); - if (value.constructor === Texture) { - type = 'Texture'; - } - switch (type) { - case 'Array': - { - const dim = utils.getDimensions(value, true); - const size = utils.dimToTexSize({ - floatTextures: this.floatTextures, - floatOutput: this.floatOutput - }, dim); + // Initialize data on the host side: + const n = 1000; + const bytes = n * double.size; + const pointer = new Buffer(n * double.size); - gl.activeTexture(gl.TEXTURE0 + this.argumentsLength); - gl.bindTexture(gl.TEXTURE_2D, argumentTexture); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST); +// // Initialize vectors on host +// for (var i = 0; i < n; i++) { +// var offset = i * double.size; +// double.set(h_a, offset, 0.1 + 0.2); +// double.set(h_b, offset, 0); +// } - let length = size[0] * size[1]; - if (this.floatTextures) { - length *= 4; - } - - const valuesFlat = new Float32Array(length); - utils.flattenTo(value, valuesFlat); - - let buffer; - if (this.floatTextures) { - buffer = new Float32Array(valuesFlat); - gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, size[0], size[1], 0, gl.RGBA, gl.FLOAT, buffer); - } else { - buffer = new Uint8Array((new Float32Array(valuesFlat)).buffer); - gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, size[0], size[1], 0, gl.RGBA, gl.UNSIGNED_BYTE, buffer); - } - - const loc = this.getUniformLocation('user_' + name); - const locSize = this.getUniformLocation('user_' + name + 'Size'); - const dimLoc = this.getUniformLocation('user_' + name + 'Dim'); - - if (!this.hardcodeConstants) { - gl.uniform3fv(dimLoc, dim); - gl.uniform2fv(locSize, size); - } - gl.uniform1i(loc, this.argumentsLength); - break; - } - case 'Number': - { - const loc = this.getUniformLocation('user_' + name); - gl.uniform1f(loc, value); - break; - } - case 'Texture': - { - const inputTexture = value; - const dim = utils.getDimensions(inputTexture.dimensions, true); - const size = inputTexture.size; - - gl.activeTexture(gl.TEXTURE0 + this.argumentsLength); - gl.bindTexture(gl.TEXTURE_2D, inputTexture.texture); - - const loc = this.getUniformLocation('user_' + name); - const locSize = this.getUniformLocation('user_' + name + 'Size'); - const dimLoc = this.getUniformLocation('user_' + name + 'Dim'); - - gl.uniform3fv(dimLoc, dim); - gl.uniform2fv(locSize, size); - gl.uniform1i(loc, this.argumentsLength); - break; - } - default: - throw 'Input type not supported (WebGL): ' + value; - } + const buffer = new CLBuffer(cl, defs.CL_MEM_READ_ONLY, bytes); + this.queue.enqueueWriteBuffer(buffer, 0, bytes, pointer); + this.kernel.setArg(this.argumentsLength, buffer); this.argumentsLength++; } - /** - * @memberOf OpenCLKernel# - * @function - * @name _getHeaderString - * - * @desc Get the header string for the program. - * This returns an empty string if no sub-kernels are defined. - * - * @returns {String} result - * - */ - _getHeaderString() { - return ( - this.subKernels !== null || this.subKernelProperties !== null ? - //webgl2 '#version 300 es\n' : - '#extension GL_EXT_draw_buffers : require\n' : - '' - ); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getLoopMaxString - * - * @desc Get the maximum loop size String. - * - * @returns {String} result - * - */ - _getLoopMaxString() { - return ( - this.loopMaxIterations ? - ` ${ parseInt(this.loopMaxIterations) }.0;\n` : - ' 100.0;\n' - ); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getConstantsString - * - * @desc Generate transpiled glsl Strings for constant parameters sent to a kernel - * - * They can be defined by *hardcodeConstants* - * - * @returns {String} result - * - */ - _getConstantsString() { - const result = []; - const threadDim = this.threadDim; - const texSize = this.texSize; - if (this.hardcodeConstants) { - result.push( - `highp vec3 uOutputDim = vec3(${ threadDim[0] },${ threadDim[1] }, ${ threadDim[2] })`, - `highp vec2 uTexSize = vec2(${ texSize[0] }, ${ texSize[1] })` - ); - } else { - result.push( - 'uniform highp vec3 uOutputDim', - 'uniform highp vec2 uTexSize' - ); - } - - return this._linesToString(result); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getTextureCoordinate - * - * @desc Get texture coordinate string for the program - * - * @returns {String} result - * - */ - _getTextureCoordinate() { - const names = this.subKernelOutputVariableNames; - if (names === null || names.length < 1) { - return 'varying highp vec2 vTexCoord;\n'; - } else { - return 'out highp vec2 vTexCoord;\n'; - } - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getDecode32EndiannessString - * - * @desc Get Decode32 endianness string for little-endian and big-endian - * - * @returns {String} result - * - */ - _getDecode32EndiannessString() { - return ( - this.endianness === 'LE' ? - '' : - ' rgba.rgba = rgba.abgr;\n' - ); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getEncode32EndiannessString - * - * @desc Get Encode32 endianness string for little-endian and big-endian - * - * @returns {String} result - * - */ - _getEncode32EndiannessString() { - return ( - this.endianness === 'LE' ? - '' : - ' rgba.rgba = rgba.abgr;\n' - ); - } - - /** - * @function - * @memberOf OpenCLKernel# - * @name _getGetWraparoundString - * - * @returns {String} wraparound string - */ - _getGetWraparoundString() { - return ( - this.wraparound ? - ' xyz = mod(xyz, texDim);\n' : - '' - ); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getGetTextureChannelString - * - */ - _getGetTextureChannelString() { - if (!this.floatTextures) return ''; - - return this._linesToString([ - ' int channel = int(integerMod(index, 4.0))', - ' index = float(int(index) / 4)' - ]); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getGetTextureIndexString - * - * @desc Get generic texture index string, if floatTextures flag is true. - * - * @example - * ' index = float(int(index)/4);\n' - * - */ - _getGetTextureIndexString() { - return ( - this.floatTextures ? - ' index = float(int(index)/4);\n' : - '' - ); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getGetResultString - * - */ - _getGetResultString() { - if (!this.floatTextures) return ' return decode32(texel);\n'; - return this._linesToString([ - ' if (channel == 0) return texel.r', - ' if (channel == 1) return texel.g', - ' if (channel == 2) return texel.b', - ' if (channel == 3) return texel.a' - ]); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getMainParamsString - * - * @desc Generate transpiled glsl Strings for user-defined parameters sent to a kernel - * - * @param {Array} args - The actual parameters sent to the Kernel - * - * @returns {String} result - * - */ - _getMainParamsString(args) { - const result = []; - const paramTypes = this.paramTypes; - const paramNames = this.paramNames; - for (let i = 0; i < paramNames.length; i++) { - const param = args[i]; - const paramName = paramNames[i]; - const paramType = paramTypes[i]; - if (this.hardcodeConstants) { - if (paramType === 'Array' || paramType === 'Texture') { - const paramDim = utils.getDimensions(param, true); - const paramSize = utils.dimToTexSize({ - floatTextures: this.floatTextures, - floatOutput: this.floatOutput - }, paramDim); - - result.push( - `uniform highp sampler2D user_${ paramName }`, - `highp vec2 user_${ paramName }Size = vec2(${ paramSize[0] }.0, ${ paramSize[1] }.0)`, - `highp vec3 user_${ paramName }Dim = vec3(${ paramDim[0] }.0, ${ paramDim[1]}.0, ${ paramDim[2] }.0)` - ); - } else if (paramType === 'Number' && Number.isInteger(param)) { - result.push(`highp float user_${ paramName } = ${ param }.0`); - } else if (paramType === 'Number') { - result.push(`highp float user_${ paramName } = ${ param }`); - } - } else { - if (paramType === 'Array' || paramType === 'Texture') { - result.push( - `uniform highp sampler2D user_${ paramName }`, - `uniform highp vec2 user_${ paramName }Size`, - `uniform highp vec3 user_${ paramName }Dim` - ); - } else if (paramType === 'Number') { - result.push(`uniform highp float user_${ paramName }`); - } - } - } - return this._linesToString(result); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getMainConstantsString - * - */ - _getMainConstantsString() { - const result = []; - if (this.constants) { - for (let name in this.constants) { - if (!this.constants.hasOwnProperty(name)) continue; - let value = parseFloat(this.constants[name]); - - if (Number.isInteger(value)) { - result.push('const float constants_' + name + ' = ' + parseInt(value) + '.0'); - } else { - result.push('const float constants_' + name + ' = ' + parseFloat(value)); - } - } - } - return this._linesToString(result); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getKernelString - * - * @desc Get Kernel program string (in *glsl*) for a kernel. - * - * @returns {String} result - * - */ - _getKernelString() { - const result = []; - const names = this.subKernelOutputVariableNames; - if (names !== null) { - result.push('highp float kernelResult = 0.0'); - for (let i = 0; i < names.length; i++) { - result.push( - `highp float ${ names[i] } = 0.0` - ); - } - - /* this is v2 prep - result.push('highp float kernelResult = 0.0'); - result.push('layout(location = 0) out highp float fradData0 = 0.0'); - for (let i = 0; i < names.length; i++) { - result.push( - `highp float ${ names[i] } = 0.0`, - `layout(location = ${ i + 1 }) out highp float fragData${ i + 1 } = 0.0` - ); - }*/ - } else { - result.push('highp float kernelResult = 0.0'); - } - - return this._linesToString(result) + this.functionBuilder.getPrototypeString('kernel'); - } - - /** - * - * @memberOf OpenCLKernel# - * @function - * @name _getMainResultString - * - * @desc Get main result string with checks for floatOutput, graphical, subKernelsOutputs, etc. - * - * @returns {String} result - * - */ - _getMainResultString() { - const names = this.subKernelOutputVariableNames; - const result = []; - if (this.floatOutput) { - result.push(' index *= 4.0'); - } - - if (this.graphical) { - result.push( - ' threadId = indexTo3D(index, uOutputDim)', - ' kernel()', - ' gl_FragColor = actualColor' - ); - } else if (this.floatOutput) { - result.push( - ' threadId = indexTo3D(index, uOutputDim)', - ' kernel()', - ' gl_FragColor.r = kernelResult', - ' index += 1.0', - ' threadId = indexTo3D(index, uOutputDim)', - ' kernel()', - ' gl_FragColor.g = kernelResult', - ' index += 1.0', - ' threadId = indexTo3D(index, uOutputDim)', - ' kernel()', - ' gl_FragColor.b = kernelResult', - ' index += 1.0', - ' threadId = indexTo3D(index, uOutputDim)', - ' kernel()', - ' gl_FragColor.a = kernelResult' - ); - } else if (names !== null) { - result.push(' threadId = indexTo3D(index, uOutputDim)'); - result.push(' kernel()'); - result.push(' gl_FragData[0] = encode32(kernelResult)'); - for (let i = 0; i < names.length; i++) { - result.push(` gl_FragData[${ i + 1 }] = encode32(${ names[i] })`); - } - /* this is v2 prep - * result.push(' kernel()'); - result.push(' fragData0 = encode32(kernelResult)'); - for (let i = 0; i < names.length; i++) { - result.push(` fragData${ i + 1 } = encode32(${ names[i] })`); - }*/ - } else { - result.push( - ' threadId = indexTo3D(index, uOutputDim)', - ' kernel()', - ' gl_FragColor = encode32(kernelResult)' - ); - } - - return this._linesToString(result); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _linesToString - * - * @param {Array} lines - An Array of strings - * - * @returns {String} Single combined String, seperated by *\n* - * - */ - _linesToString(lines) { - if (lines.length > 0) { - return lines.join(';\n') + ';\n'; - } else { - return '\n'; - } - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _replaceArtifacts - * - * @param {String} src - Shader string - * @param {Array} map - Variables/Constants associated with shader - * - */ - _replaceArtifacts(src, map) { - return src.replace(/[ ]*__([A-Z]+[0-9]*([_]?[A-Z])*)__;\n/g, (match, artifact) => { - if (map.hasOwnProperty(artifact)) { - return map[artifact]; - } - throw `unhandled artifact ${ artifact }`; - }); - } - /** * @memberOf OpenCLKernel# * @function @@ -1086,47 +502,35 @@ module.exports = class OpenCLKernel extends KernelBase { } } - /** - * @memberOf OpenCLKernel# - * @function - * @name _getFragShaderString - * - * @desc Get the fragment shader String. - * If the String hasn't been compiled yet, - * then this method compiles it as well - * - * @param {Array} args - The actual parameters sent to the Kernel - * - * @returns {String} Fragment Shader string - * - */ - _getFragShaderString(args) { - if (this.compiledFragShaderString !== null) { - return this.compiledFragShaderString; - } - return this.compiledFragShaderString = this._replaceArtifacts(fragShaderString, this._getFragShaderArtifactMap(args)); - } - - /** - * @memberOf OpenCLKernel# - * @function - * @name _getVertShaderString - * - * @desc Get the vertical shader String - * - * @param {Array} args - The actual parameters sent to the Kernel - * - * @returns {String} Vertical Shader string - * - */ - _getVertShaderString(args) { - if (this.compiledVertShaderString !== null) { - return this.compiledVertShaderString; - } - //TODO: webgl2 compile like frag shader - return this.compiledVertShaderString = vertShaderString; - } + getDevice() { + if (device !== null) return device; + const mode = this.mode; + for (let platformIndex = 0; platformIndex < platforms.length; platformIndex++) { + const devices = (mode === 'gpu' + ? platforms[platformIndex].gpuDevices() + : platforms[platformIndex].cpuDevices()); + for (let deviceIndex = 0; deviceIndex < devices.length; deviceIndex++) { + // Is double precision supported? + // See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html + if (devices[deviceIndex].doubleFpConfig + & ( + defs.CL_FP_FMA + | defs.CL_FP_ROUND_TO_NEAREST + | defs.CL_FP_ROUND_TO_ZERO + | defs.CL_FP_ROUND_TO_INF + | defs.CL_FP_INF_NAN + | defs.CL_FP_DENORM + )) { + return devices[deviceIndex]; + } + } + if (mode === 'auto') { + console.warn('No GPU device has been found, searching for a CPU fallback.'); + return this.getDevice('cpu'); + } + } + } /** * @memberOf OpenCLKernel# * @function diff --git a/src/backend/open-cl/shader-frag.js b/src/backend/open-cl/shader-frag.js deleted file mode 100644 index 544bdd7f..00000000 --- a/src/backend/open-cl/shader-frag.js +++ /dev/null @@ -1,132 +0,0 @@ -module.exports = `__HEADER__; -precision highp float; -precision highp int; -precision highp sampler2D; - -const float LOOP_MAX = __LOOP_MAX__; -#define EPSILON 0.0000001; - -__CONSTANTS__; - -varying highp vec2 vTexCoord; - -vec4 round(vec4 x) { - return floor(x + 0.5); -} - -highp float round(highp float x) { - return floor(x + 0.5); -} - -vec2 integerMod(vec2 x, float y) { - vec2 res = floor(mod(x, y)); - return res * step(1.0 - floor(y), -res); -} - -vec3 integerMod(vec3 x, float y) { - vec3 res = floor(mod(x, y)); - return res * step(1.0 - floor(y), -res); -} - -vec4 integerMod(vec4 x, vec4 y) { - vec4 res = floor(mod(x, y)); - return res * step(1.0 - floor(y), -res); -} - -highp float integerMod(highp float x, highp float y) { - highp float res = floor(mod(x, y)); - return res * (res > floor(y) - 1.0 ? 0.0 : 1.0); -} - -highp int integerMod(highp int x, highp int y) { - return int(integerMod(float(x), float(y))); -} - -// Here be dragons! -// DO NOT OPTIMIZE THIS CODE -// YOU WILL BREAK SOMETHING ON SOMEBODY\'S MACHINE -// LEAVE IT AS IT IS, LEST YOU WASTE YOUR OWN TIME -const vec2 MAGIC_VEC = vec2(1.0, -256.0); -const vec4 SCALE_FACTOR = vec4(1.0, 256.0, 65536.0, 0.0); -const vec4 SCALE_FACTOR_INV = vec4(1.0, 0.00390625, 0.0000152587890625, 0.0); // 1, 1/256, 1/65536 -highp float decode32(highp vec4 rgba) { - __DECODE32_ENDIANNESS__; - rgba *= 255.0; - vec2 gte128; - gte128.x = rgba.b >= 128.0 ? 1.0 : 0.0; - gte128.y = rgba.a >= 128.0 ? 1.0 : 0.0; - float exponent = 2.0 * rgba.a - 127.0 + dot(gte128, MAGIC_VEC); - float res = exp2(round(exponent)); - rgba.b = rgba.b - 128.0 * gte128.x; - res = dot(rgba, SCALE_FACTOR) * exp2(round(exponent-23.0)) + res; - res *= gte128.y * -2.0 + 1.0; - return res; -} - -highp vec4 encode32(highp float f) { - highp float F = abs(f); - highp float sign = f < 0.0 ? 1.0 : 0.0; - highp float exponent = floor(log2(F)); - highp float mantissa = (exp2(-exponent) * F); - // exponent += floor(log2(mantissa)); - vec4 rgba = vec4(F * exp2(23.0-exponent)) * SCALE_FACTOR_INV; - rgba.rg = integerMod(rgba.rg, 256.0); - rgba.b = integerMod(rgba.b, 128.0); - rgba.a = exponent*0.5 + 63.5; - rgba.ba += vec2(integerMod(exponent+127.0, 2.0), sign) * 128.0; - rgba = floor(rgba); - rgba *= 0.003921569; // 1/255 - __ENCODE32_ENDIANNESS__; - return rgba; -} -// Dragons end here - -highp float index; -highp vec3 threadId; - -highp vec3 indexTo3D(highp float idx, highp vec3 texDim) { - highp float z = floor(idx / (texDim.x * texDim.y)); - idx -= z * texDim.x * texDim.y; - highp float y = floor(idx / texDim.x); - highp float x = integerMod(idx, texDim.x); - return vec3(x, y, z); -} - -highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float z, highp float y, highp float x) { - highp vec3 xyz = vec3(x, y, z); - xyz = floor(xyz + 0.5); - __GET_WRAPAROUND__; - highp float index = round(xyz.x + texDim.x * (xyz.y + texDim.y * xyz.z)); - __GET_TEXTURE_CHANNEL__; - highp float w = round(texSize.x); - vec2 st = vec2(integerMod(index, w), float(int(index) / int(w))) + 0.5; - __GET_TEXTURE_INDEX__; - highp vec4 texel = texture2D(tex, st / texSize); - __GET_RESULT__; -} - -highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float y, highp float x) { - return get(tex, texSize, texDim, 0.0, y, x); -} - -highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float x) { - return get(tex, texSize, texDim, 0.0, 0.0, x); -} - -highp vec4 actualColor; -void color(float r, float g, float b, float a) { - actualColor = vec4(r,g,b,a); -} - -void color(float r, float g, float b) { - color(r,g,b,1.0); -} - -__MAIN_PARAMS__; -__MAIN_CONSTANTS__; -__KERNEL__; - -void main(void) { - index = floor(vTexCoord.s * float(uTexSize.x)) + floor(vTexCoord.t * float(uTexSize.y)) * uTexSize.x; - __MAIN_RESULT__; -}`; \ No newline at end of file diff --git a/src/backend/open-cl/shader-vert.js b/src/backend/open-cl/shader-vert.js deleted file mode 100644 index 0df7980f..00000000 --- a/src/backend/open-cl/shader-vert.js +++ /dev/null @@ -1,13 +0,0 @@ -module.exports = `precision highp float; -precision highp int; -precision highp sampler2D; - -attribute highp vec2 aPos; -attribute highp vec2 aTexCoord; - -varying highp vec2 vTexCoord; - -void main(void) { - gl_Position = vec4(aPos, 0, 1); - vTexCoord = aTexCoord; -}`; \ No newline at end of file