mirror of
https://github.com/gpujs/gpu.js.git
synced 2026-01-25 16:08:02 +00:00
more advancement with opencl
This commit is contained in:
parent
2a3945efe1
commit
33e841c9da
78
example.js
78
example.js
@ -1,60 +1,4 @@
|
||||
"use strict";
|
||||
|
||||
// Dependency:
|
||||
var fs = require("fs");
|
||||
var path = require("path");
|
||||
var cwd = __dirname;
|
||||
var nooocl = require('nooocl');
|
||||
var CLHost = nooocl.CLHost;
|
||||
var CLContext = nooocl.CLContext;
|
||||
var CLBuffer = nooocl.CLBuffer;
|
||||
var CLCommandQueue = nooocl.CLCommandQueue;
|
||||
var NDRange = nooocl.NDRange;
|
||||
var CLError = nooocl.CLError;
|
||||
var fastcall = require("fastcall");
|
||||
var ref = fastcall.ref;
|
||||
var double = ref.types.double;
|
||||
|
||||
// Initialize OpenCL then we get host, device, context, and a queue
|
||||
var host = CLHost.createV11();
|
||||
var defs = host.cl.defs;
|
||||
|
||||
var platforms = host.getPlatforms();
|
||||
var device;
|
||||
function searchForDevice(hardware) {
|
||||
platforms.forEach(function (p) {
|
||||
var devices = hardware === "gpu" ? p.gpuDevices() : p.cpuDevices();
|
||||
devices = devices.filter(function (d) {
|
||||
// Is double precision supported?
|
||||
// See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
|
||||
return true;
|
||||
return d.doubleFpConfig &
|
||||
(defs.CL_FP_FMA | defs.CL_FP_ROUND_TO_NEAREST | defs.CL_FP_ROUND_TO_ZERO | defs.CL_FP_ROUND_TO_INF | defs.CL_FP_INF_NAN | defs.CL_FP_DENORM);
|
||||
});
|
||||
if (devices.length) {
|
||||
device = devices[0];
|
||||
}
|
||||
if (device) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
searchForDevice("gpu");
|
||||
if (!device) {
|
||||
console.warn("No GPU device has been found, searching for a CPU fallback.");
|
||||
searchForDevice("cpu");
|
||||
}
|
||||
|
||||
if (!device) {
|
||||
throw new Error("No capable OpenCL 1.1 device has been found.");
|
||||
}
|
||||
else {
|
||||
console.log("Running on device: " + device.name + " - " + device.platform.name);
|
||||
}
|
||||
|
||||
var context = new CLContext(device);
|
||||
var queue = new CLCommandQueue(context, device);
|
||||
'use strict';
|
||||
|
||||
// Initialize data on the host side:
|
||||
var n = 1000;
|
||||
@ -101,28 +45,28 @@ __kernel void vecAdd( __global double *a,
|
||||
`;
|
||||
var program = context.createProgram(kernelSourceCode);
|
||||
|
||||
console.log("Building ...");
|
||||
console.log('Building ...');
|
||||
// Building is always asynchronous in NOOOCL!
|
||||
nooocl.scope(function () {
|
||||
return program.build("-cl-fast-relaxed-math")
|
||||
return program.build('-cl-fast-relaxed-math')
|
||||
.then(function () {
|
||||
var buildStatus = program.getBuildStatus(device);
|
||||
var buildLog = program.getBuildLog(device);
|
||||
console.log(buildLog);
|
||||
if (buildStatus < 0) {
|
||||
throw new CLError(buildStatus, "Build failed.");
|
||||
throw new CLError(buildStatus, 'Build failed.');
|
||||
}
|
||||
console.log("Build completed.");
|
||||
console.log('Build completed.');
|
||||
|
||||
// Kernel stuff:
|
||||
var kernel = program.createKernel("vecAdd");
|
||||
var kernel = program.createKernel('vecAdd');
|
||||
|
||||
kernel.setArg(0, d_a);
|
||||
kernel.setArg(1, d_b);
|
||||
kernel.setArg(2, d_c);
|
||||
// Notice: in NOOOCL you have specify type of value arguments,
|
||||
// because there is no C compatible type system exists in JavaScript.
|
||||
kernel.setArg(3, n, "uint");
|
||||
kernel.setArg(3, n, 'uint');
|
||||
|
||||
// Ranges:
|
||||
// Number of work items in each local work group
|
||||
@ -130,7 +74,7 @@ nooocl.scope(function () {
|
||||
// Number of total work items - localSize must be devisor
|
||||
var globalSize = new NDRange(Math.ceil(n / 64) * 64);
|
||||
|
||||
console.log("Launching the kernel.");
|
||||
console.log('Launching the kernel.');
|
||||
|
||||
// Enqueue the kernel asynchronously
|
||||
queue.enqueueNDRangeKernel(kernel, globalSize, localSize);
|
||||
@ -139,7 +83,7 @@ nooocl.scope(function () {
|
||||
// when the queue ends.
|
||||
// We should query a waitable queue which returns an event for each enqueue operations,
|
||||
// and the event's promise can be used for continuation of the control flow on the host side.
|
||||
console.log("Waiting for result.");
|
||||
console.log('Waiting for result.');
|
||||
return queue.waitable().enqueueReadBuffer(d_c, 0, bytes, h_c).promise
|
||||
.then(function() {
|
||||
// Data gets back to host, we're done:
|
||||
@ -150,9 +94,9 @@ nooocl.scope(function () {
|
||||
sum += double.get(h_c, offset);
|
||||
}
|
||||
|
||||
console.log("Final result: " + sum / n);
|
||||
console.log('Final result: ' + sum / n);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
console.log("(Everything after this point is asynchronous.)");
|
||||
console.log('(Everything after this point is asynchronous.)');
|
||||
@ -1,41 +0,0 @@
|
||||
const nooocl = require('nooocl');
|
||||
const CLHost = nooocl.CLHost;
|
||||
const host = CLHost.createV11();
|
||||
const defs = host.cl.defs;
|
||||
const platforms = host.getPlatforms();
|
||||
let device;
|
||||
|
||||
function searchForDevice(hardware) {
|
||||
platforms.forEach(function (p) {
|
||||
let devices = hardware === 'gpu' ? p.gpuDevices() : p.cpuDevices();
|
||||
devices = devices.filter(function (d) {
|
||||
// Is double precision supported?
|
||||
// See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
|
||||
return d.doubleFpConfig &
|
||||
(defs.CL_FP_FMA | defs.CL_FP_ROUND_TO_NEAREST | defs.CL_FP_ROUND_TO_ZERO | defs.CL_FP_ROUND_TO_INF | defs.CL_FP_INF_NAN | defs.CL_FP_DENORM);
|
||||
});
|
||||
if (devices.length) {
|
||||
device = devices[0];
|
||||
}
|
||||
if (device) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = function() {
|
||||
searchForDevice('gpu');
|
||||
if (!device) {
|
||||
console.warn('No GPU device has been found, searching for a CPU fallback.');
|
||||
searchForDevice('cpu');
|
||||
}
|
||||
|
||||
if (!device) {
|
||||
throw new Error('No capable OpenCL 1.1 device has been found.');
|
||||
}
|
||||
else {
|
||||
console.log('Running on device: ' + device.name + ' - ' + device.platform.name);
|
||||
}
|
||||
|
||||
return device;
|
||||
};
|
||||
@ -9,15 +9,24 @@ const CLCommandQueue = nooocl.CLCommandQueue;
|
||||
const NDRange = nooocl.NDRange;
|
||||
const CLError = nooocl.CLError;
|
||||
|
||||
const device = require('./device');
|
||||
|
||||
const fastcall = require('fastcall');
|
||||
const ref = fastcall.ref;
|
||||
const double = ref.types.double;
|
||||
|
||||
// Initialize OpenCL then we get host, device, context, and a queue
|
||||
const host = CLHost.createV11();
|
||||
const defs = host.cl.defs;
|
||||
const platforms = host.getPlatforms();
|
||||
|
||||
const KernelBase = require('../kernel-base');
|
||||
const utils = require('../../core/utils');
|
||||
const Texture = require('../../core/texture');
|
||||
const fragShaderString = require('./shader-frag');
|
||||
const vertShaderString = require('./shader-vert');
|
||||
const kernelString = require('./kernel-string');
|
||||
const canvases = [];
|
||||
const canvasTexSizes = {};
|
||||
let device = null;
|
||||
|
||||
module.exports = class OpenCLKernel extends KernelBase {
|
||||
|
||||
/**
|
||||
@ -45,24 +54,19 @@ module.exports = class OpenCLKernel extends KernelBase {
|
||||
*/
|
||||
constructor(fnString, settings) {
|
||||
super(fnString, settings);
|
||||
this.textureCache = {};
|
||||
this.threadDim = {};
|
||||
this.programUniformLocationCache = {};
|
||||
this.framebuffer = null;
|
||||
|
||||
this.buffer = null;
|
||||
this.program = null;
|
||||
this.functionBuilder = settings.functionBuilder;
|
||||
this.outputToTexture = settings.outputToTexture;
|
||||
this.endianness = utils.systemEndianness();
|
||||
this.subKernelOutputTextures = null;
|
||||
this.subKernelOutputVariableNames = null;
|
||||
this.paramTypes = null;
|
||||
this.argumentsLength = 0;
|
||||
this.ext = null;
|
||||
this.compiledFragShaderString = null;
|
||||
this.compiledVertShaderString = null;
|
||||
if (!this._openCl) this._openCl = device();
|
||||
if (!this._openCl) {
|
||||
this._openCl = new CLContext(this.getDevice());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -76,50 +80,7 @@ module.exports = class OpenCLKernel extends KernelBase {
|
||||
*
|
||||
*/
|
||||
validateOptions() {
|
||||
const isReadPixel = utils.isFloatReadPixelsSupported();
|
||||
if (this.floatTextures === true && !utils.OES_texture_float) {
|
||||
throw 'Float textures are not supported on this browser';
|
||||
} else if (this.floatOutput === true && this.floatOutputForce !== true && !isReadPixel) {
|
||||
throw 'Float texture outputs are not supported on this browser';
|
||||
} else if (this.floatTextures === undefined && utils.OES_texture_float) {
|
||||
//NOTE: handle
|
||||
this.floatTextures = true;
|
||||
this.floatOutput = isReadPixel && !this.graphical;
|
||||
}
|
||||
|
||||
if (!this.dimensions || this.dimensions.length === 0) {
|
||||
if (arguments.length !== 1) {
|
||||
throw 'Auto dimensions only supported for kernels with only one input';
|
||||
}
|
||||
|
||||
const argType = utils.getArgumentType(arguments[0]);
|
||||
if (argType === 'Array') {
|
||||
this.dimensions = utils.getDimensions(argType);
|
||||
} else if (argType === 'Texture') {
|
||||
this.dimensions = arguments[0].dimensions;
|
||||
} else {
|
||||
throw 'Auto dimensions not supported for input type: ' + argType;
|
||||
}
|
||||
}
|
||||
|
||||
this.texSize = utils.dimToTexSize({
|
||||
floatTextures: this.floatTextures,
|
||||
floatOutput: this.floatOutput
|
||||
}, this.dimensions, true);
|
||||
|
||||
if (this.graphical) {
|
||||
if (this.dimensions.length !== 2) {
|
||||
throw 'Output must have 2 dimensions on graphical mode';
|
||||
}
|
||||
|
||||
if (this.floatOutput) {
|
||||
throw 'Cannot use graphical mode and float output at the same time';
|
||||
}
|
||||
|
||||
this.texSize = utils.clone(this.dimensions);
|
||||
} else if (this.floatOutput === undefined && utils.OES_texture_float) {
|
||||
this.floatOutput = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -135,72 +96,21 @@ module.exports = class OpenCLKernel extends KernelBase {
|
||||
build() {
|
||||
this.validateOptions();
|
||||
this.setupParams(arguments);
|
||||
const texSize = this.texSize;
|
||||
const gl = this._openCl;
|
||||
const canvas = this._canvas;
|
||||
let canvasIndex = canvases.indexOf(canvas);
|
||||
if (canvasIndex === -1) {
|
||||
canvasIndex = canvases.length;
|
||||
canvases.push(canvas);
|
||||
canvasTexSizes[canvasIndex] = [];
|
||||
}
|
||||
const cl = this._openCl;
|
||||
this.queue = new CLCommandQueue(cl, device);
|
||||
|
||||
const sizes = canvasTexSizes[canvasIndex];
|
||||
sizes.push(texSize);
|
||||
const maxTexSize = [0, 0];
|
||||
for (let i = 0; i < sizes.length; i++) {
|
||||
const size = sizes[i];
|
||||
if (maxTexSize[0] < size[0]) {
|
||||
maxTexSize[0] = size[0];
|
||||
}
|
||||
if (maxTexSize[1] < size[1]) {
|
||||
maxTexSize[1] = size[1];
|
||||
}
|
||||
}
|
||||
const compiledKernelString = `#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
${ this._addKernels() }
|
||||
${ this.functionBuilder.getPrototypeString('kernel') }`;
|
||||
|
||||
gl.viewport(0, 0, maxTexSize[0], maxTexSize[1]);
|
||||
const threadDim = this.threadDim = utils.clone(this.dimensions);
|
||||
while (threadDim.length < 3) {
|
||||
threadDim.push(1);
|
||||
}
|
||||
|
||||
if (this.functionBuilder) this._addKernels();
|
||||
|
||||
const compiledVertShaderString = this._getVertShaderString(arguments);
|
||||
const vertShader = gl.createShader(gl.VERTEX_SHADER);
|
||||
gl.shaderSource(vertShader, compiledVertShaderString);
|
||||
gl.compileShader(vertShader);
|
||||
|
||||
const compiledFragShaderString = this._getFragShaderString(arguments);
|
||||
const fragShader = gl.createShader(gl.FRAGMENT_SHADER);
|
||||
gl.shaderSource(fragShader, compiledFragShaderString);
|
||||
gl.compileShader(fragShader);
|
||||
|
||||
if (!gl.getShaderParameter(vertShader, gl.COMPILE_STATUS)) {
|
||||
console.log(compiledVertShaderString);
|
||||
console.error('An error occurred compiling the shaders: ' + gl.getShaderInfoLog(vertShader));
|
||||
throw 'Error compiling vertex shader';
|
||||
}
|
||||
if (!gl.getShaderParameter(fragShader, gl.COMPILE_STATUS)) {
|
||||
console.log(compiledFragShaderString);
|
||||
console.error('An error occurred compiling the shaders: ' + gl.getShaderInfoLog(fragShader));
|
||||
throw 'Error compiling fragment shader';
|
||||
}
|
||||
|
||||
if (this.debug) {
|
||||
if (this.debug) {
|
||||
console.log('Options:');
|
||||
console.dir(this);
|
||||
console.log('GLSL Shader Output:');
|
||||
console.log(compiledFragShaderString);
|
||||
console.log('OpenCL Shader Output:');
|
||||
console.log(compiledKernelString);
|
||||
}
|
||||
|
||||
const program = this.program = gl.createProgram();
|
||||
gl.attachShader(program, vertShader);
|
||||
gl.attachShader(program, fragShader);
|
||||
gl.linkProgram(program);
|
||||
this.framebuffer = gl.createFramebuffer();
|
||||
this.framebuffer.width = texSize[0];
|
||||
this.framebuffer.height = texSize[1];
|
||||
this.program = cl.createProgram(compiledKernelString);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -501,37 +411,6 @@ module.exports = class OpenCLKernel extends KernelBase {
|
||||
return location;
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getFragShaderArtifactMap
|
||||
*
|
||||
* @desc Generate Shader artifacts for the kernel program.
|
||||
* The final object contains HEADER, KERNEL, MAIN_RESULT, and others.
|
||||
*
|
||||
* @param {Array} args - The actual parameters sent to the Kernel
|
||||
*
|
||||
* @returns {Object} An object containing the Shader Artifacts(CONSTANTS, HEADER, KERNEL, etc.)
|
||||
*
|
||||
*/
|
||||
_getFragShaderArtifactMap(args) {
|
||||
return {
|
||||
HEADER: this._getHeaderString(),
|
||||
LOOP_MAX: this._getLoopMaxString(),
|
||||
CONSTANTS: this._getConstantsString(),
|
||||
DECODE32_ENDIANNESS: this._getDecode32EndiannessString(),
|
||||
ENCODE32_ENDIANNESS: this._getEncode32EndiannessString(),
|
||||
GET_WRAPAROUND: this._getGetWraparoundString(),
|
||||
GET_TEXTURE_CHANNEL: this._getGetTextureChannelString(),
|
||||
GET_TEXTURE_INDEX: this._getGetTextureIndexString(),
|
||||
GET_RESULT: this._getGetResultString(),
|
||||
MAIN_PARAMS: this._getMainParamsString(args),
|
||||
MAIN_CONSTANTS: this._getMainConstantsString(),
|
||||
KERNEL: this._getKernelString(),
|
||||
MAIN_RESULT: this._getMainResultString()
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
@ -546,489 +425,26 @@ module.exports = class OpenCLKernel extends KernelBase {
|
||||
*
|
||||
*/
|
||||
_addArgument(value, type, name) {
|
||||
const gl = this._openCl;
|
||||
const cl = this._openCl;
|
||||
const argumentTexture = this.getArgumentTexture(name);
|
||||
if (value.constructor === Texture) {
|
||||
type = 'Texture';
|
||||
}
|
||||
switch (type) {
|
||||
case 'Array':
|
||||
{
|
||||
const dim = utils.getDimensions(value, true);
|
||||
const size = utils.dimToTexSize({
|
||||
floatTextures: this.floatTextures,
|
||||
floatOutput: this.floatOutput
|
||||
}, dim);
|
||||
// Initialize data on the host side:
|
||||
const n = 1000;
|
||||
const bytes = n * double.size;
|
||||
const pointer = new Buffer(n * double.size);
|
||||
|
||||
gl.activeTexture(gl.TEXTURE0 + this.argumentsLength);
|
||||
gl.bindTexture(gl.TEXTURE_2D, argumentTexture);
|
||||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
|
||||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
|
||||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
|
||||
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST);
|
||||
// // Initialize vectors on host
|
||||
// for (var i = 0; i < n; i++) {
|
||||
// var offset = i * double.size;
|
||||
// double.set(h_a, offset, 0.1 + 0.2);
|
||||
// double.set(h_b, offset, 0);
|
||||
// }
|
||||
|
||||
let length = size[0] * size[1];
|
||||
if (this.floatTextures) {
|
||||
length *= 4;
|
||||
}
|
||||
|
||||
const valuesFlat = new Float32Array(length);
|
||||
utils.flattenTo(value, valuesFlat);
|
||||
|
||||
let buffer;
|
||||
if (this.floatTextures) {
|
||||
buffer = new Float32Array(valuesFlat);
|
||||
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, size[0], size[1], 0, gl.RGBA, gl.FLOAT, buffer);
|
||||
} else {
|
||||
buffer = new Uint8Array((new Float32Array(valuesFlat)).buffer);
|
||||
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, size[0], size[1], 0, gl.RGBA, gl.UNSIGNED_BYTE, buffer);
|
||||
}
|
||||
|
||||
const loc = this.getUniformLocation('user_' + name);
|
||||
const locSize = this.getUniformLocation('user_' + name + 'Size');
|
||||
const dimLoc = this.getUniformLocation('user_' + name + 'Dim');
|
||||
|
||||
if (!this.hardcodeConstants) {
|
||||
gl.uniform3fv(dimLoc, dim);
|
||||
gl.uniform2fv(locSize, size);
|
||||
}
|
||||
gl.uniform1i(loc, this.argumentsLength);
|
||||
break;
|
||||
}
|
||||
case 'Number':
|
||||
{
|
||||
const loc = this.getUniformLocation('user_' + name);
|
||||
gl.uniform1f(loc, value);
|
||||
break;
|
||||
}
|
||||
case 'Texture':
|
||||
{
|
||||
const inputTexture = value;
|
||||
const dim = utils.getDimensions(inputTexture.dimensions, true);
|
||||
const size = inputTexture.size;
|
||||
|
||||
gl.activeTexture(gl.TEXTURE0 + this.argumentsLength);
|
||||
gl.bindTexture(gl.TEXTURE_2D, inputTexture.texture);
|
||||
|
||||
const loc = this.getUniformLocation('user_' + name);
|
||||
const locSize = this.getUniformLocation('user_' + name + 'Size');
|
||||
const dimLoc = this.getUniformLocation('user_' + name + 'Dim');
|
||||
|
||||
gl.uniform3fv(dimLoc, dim);
|
||||
gl.uniform2fv(locSize, size);
|
||||
gl.uniform1i(loc, this.argumentsLength);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw 'Input type not supported (WebGL): ' + value;
|
||||
}
|
||||
const buffer = new CLBuffer(cl, defs.CL_MEM_READ_ONLY, bytes);
|
||||
this.queue.enqueueWriteBuffer(buffer, 0, bytes, pointer);
|
||||
this.kernel.setArg(this.argumentsLength, buffer);
|
||||
this.argumentsLength++;
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getHeaderString
|
||||
*
|
||||
* @desc Get the header string for the program.
|
||||
* This returns an empty string if no sub-kernels are defined.
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getHeaderString() {
|
||||
return (
|
||||
this.subKernels !== null || this.subKernelProperties !== null ?
|
||||
//webgl2 '#version 300 es\n' :
|
||||
'#extension GL_EXT_draw_buffers : require\n' :
|
||||
''
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getLoopMaxString
|
||||
*
|
||||
* @desc Get the maximum loop size String.
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getLoopMaxString() {
|
||||
return (
|
||||
this.loopMaxIterations ?
|
||||
` ${ parseInt(this.loopMaxIterations) }.0;\n` :
|
||||
' 100.0;\n'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getConstantsString
|
||||
*
|
||||
* @desc Generate transpiled glsl Strings for constant parameters sent to a kernel
|
||||
*
|
||||
* They can be defined by *hardcodeConstants*
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getConstantsString() {
|
||||
const result = [];
|
||||
const threadDim = this.threadDim;
|
||||
const texSize = this.texSize;
|
||||
if (this.hardcodeConstants) {
|
||||
result.push(
|
||||
`highp vec3 uOutputDim = vec3(${ threadDim[0] },${ threadDim[1] }, ${ threadDim[2] })`,
|
||||
`highp vec2 uTexSize = vec2(${ texSize[0] }, ${ texSize[1] })`
|
||||
);
|
||||
} else {
|
||||
result.push(
|
||||
'uniform highp vec3 uOutputDim',
|
||||
'uniform highp vec2 uTexSize'
|
||||
);
|
||||
}
|
||||
|
||||
return this._linesToString(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getTextureCoordinate
|
||||
*
|
||||
* @desc Get texture coordinate string for the program
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getTextureCoordinate() {
|
||||
const names = this.subKernelOutputVariableNames;
|
||||
if (names === null || names.length < 1) {
|
||||
return 'varying highp vec2 vTexCoord;\n';
|
||||
} else {
|
||||
return 'out highp vec2 vTexCoord;\n';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getDecode32EndiannessString
|
||||
*
|
||||
* @desc Get Decode32 endianness string for little-endian and big-endian
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getDecode32EndiannessString() {
|
||||
return (
|
||||
this.endianness === 'LE' ?
|
||||
'' :
|
||||
' rgba.rgba = rgba.abgr;\n'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getEncode32EndiannessString
|
||||
*
|
||||
* @desc Get Encode32 endianness string for little-endian and big-endian
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getEncode32EndiannessString() {
|
||||
return (
|
||||
this.endianness === 'LE' ?
|
||||
'' :
|
||||
' rgba.rgba = rgba.abgr;\n'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @function
|
||||
* @memberOf OpenCLKernel#
|
||||
* @name _getGetWraparoundString
|
||||
*
|
||||
* @returns {String} wraparound string
|
||||
*/
|
||||
_getGetWraparoundString() {
|
||||
return (
|
||||
this.wraparound ?
|
||||
' xyz = mod(xyz, texDim);\n' :
|
||||
''
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getGetTextureChannelString
|
||||
*
|
||||
*/
|
||||
_getGetTextureChannelString() {
|
||||
if (!this.floatTextures) return '';
|
||||
|
||||
return this._linesToString([
|
||||
' int channel = int(integerMod(index, 4.0))',
|
||||
' index = float(int(index) / 4)'
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getGetTextureIndexString
|
||||
*
|
||||
* @desc Get generic texture index string, if floatTextures flag is true.
|
||||
*
|
||||
* @example
|
||||
* ' index = float(int(index)/4);\n'
|
||||
*
|
||||
*/
|
||||
_getGetTextureIndexString() {
|
||||
return (
|
||||
this.floatTextures ?
|
||||
' index = float(int(index)/4);\n' :
|
||||
''
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getGetResultString
|
||||
*
|
||||
*/
|
||||
_getGetResultString() {
|
||||
if (!this.floatTextures) return ' return decode32(texel);\n';
|
||||
return this._linesToString([
|
||||
' if (channel == 0) return texel.r',
|
||||
' if (channel == 1) return texel.g',
|
||||
' if (channel == 2) return texel.b',
|
||||
' if (channel == 3) return texel.a'
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getMainParamsString
|
||||
*
|
||||
* @desc Generate transpiled glsl Strings for user-defined parameters sent to a kernel
|
||||
*
|
||||
* @param {Array} args - The actual parameters sent to the Kernel
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getMainParamsString(args) {
|
||||
const result = [];
|
||||
const paramTypes = this.paramTypes;
|
||||
const paramNames = this.paramNames;
|
||||
for (let i = 0; i < paramNames.length; i++) {
|
||||
const param = args[i];
|
||||
const paramName = paramNames[i];
|
||||
const paramType = paramTypes[i];
|
||||
if (this.hardcodeConstants) {
|
||||
if (paramType === 'Array' || paramType === 'Texture') {
|
||||
const paramDim = utils.getDimensions(param, true);
|
||||
const paramSize = utils.dimToTexSize({
|
||||
floatTextures: this.floatTextures,
|
||||
floatOutput: this.floatOutput
|
||||
}, paramDim);
|
||||
|
||||
result.push(
|
||||
`uniform highp sampler2D user_${ paramName }`,
|
||||
`highp vec2 user_${ paramName }Size = vec2(${ paramSize[0] }.0, ${ paramSize[1] }.0)`,
|
||||
`highp vec3 user_${ paramName }Dim = vec3(${ paramDim[0] }.0, ${ paramDim[1]}.0, ${ paramDim[2] }.0)`
|
||||
);
|
||||
} else if (paramType === 'Number' && Number.isInteger(param)) {
|
||||
result.push(`highp float user_${ paramName } = ${ param }.0`);
|
||||
} else if (paramType === 'Number') {
|
||||
result.push(`highp float user_${ paramName } = ${ param }`);
|
||||
}
|
||||
} else {
|
||||
if (paramType === 'Array' || paramType === 'Texture') {
|
||||
result.push(
|
||||
`uniform highp sampler2D user_${ paramName }`,
|
||||
`uniform highp vec2 user_${ paramName }Size`,
|
||||
`uniform highp vec3 user_${ paramName }Dim`
|
||||
);
|
||||
} else if (paramType === 'Number') {
|
||||
result.push(`uniform highp float user_${ paramName }`);
|
||||
}
|
||||
}
|
||||
}
|
||||
return this._linesToString(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getMainConstantsString
|
||||
*
|
||||
*/
|
||||
_getMainConstantsString() {
|
||||
const result = [];
|
||||
if (this.constants) {
|
||||
for (let name in this.constants) {
|
||||
if (!this.constants.hasOwnProperty(name)) continue;
|
||||
let value = parseFloat(this.constants[name]);
|
||||
|
||||
if (Number.isInteger(value)) {
|
||||
result.push('const float constants_' + name + ' = ' + parseInt(value) + '.0');
|
||||
} else {
|
||||
result.push('const float constants_' + name + ' = ' + parseFloat(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
return this._linesToString(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getKernelString
|
||||
*
|
||||
* @desc Get Kernel program string (in *glsl*) for a kernel.
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getKernelString() {
|
||||
const result = [];
|
||||
const names = this.subKernelOutputVariableNames;
|
||||
if (names !== null) {
|
||||
result.push('highp float kernelResult = 0.0');
|
||||
for (let i = 0; i < names.length; i++) {
|
||||
result.push(
|
||||
`highp float ${ names[i] } = 0.0`
|
||||
);
|
||||
}
|
||||
|
||||
/* this is v2 prep
|
||||
result.push('highp float kernelResult = 0.0');
|
||||
result.push('layout(location = 0) out highp float fradData0 = 0.0');
|
||||
for (let i = 0; i < names.length; i++) {
|
||||
result.push(
|
||||
`highp float ${ names[i] } = 0.0`,
|
||||
`layout(location = ${ i + 1 }) out highp float fragData${ i + 1 } = 0.0`
|
||||
);
|
||||
}*/
|
||||
} else {
|
||||
result.push('highp float kernelResult = 0.0');
|
||||
}
|
||||
|
||||
return this._linesToString(result) + this.functionBuilder.getPrototypeString('kernel');
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getMainResultString
|
||||
*
|
||||
* @desc Get main result string with checks for floatOutput, graphical, subKernelsOutputs, etc.
|
||||
*
|
||||
* @returns {String} result
|
||||
*
|
||||
*/
|
||||
_getMainResultString() {
|
||||
const names = this.subKernelOutputVariableNames;
|
||||
const result = [];
|
||||
if (this.floatOutput) {
|
||||
result.push(' index *= 4.0');
|
||||
}
|
||||
|
||||
if (this.graphical) {
|
||||
result.push(
|
||||
' threadId = indexTo3D(index, uOutputDim)',
|
||||
' kernel()',
|
||||
' gl_FragColor = actualColor'
|
||||
);
|
||||
} else if (this.floatOutput) {
|
||||
result.push(
|
||||
' threadId = indexTo3D(index, uOutputDim)',
|
||||
' kernel()',
|
||||
' gl_FragColor.r = kernelResult',
|
||||
' index += 1.0',
|
||||
' threadId = indexTo3D(index, uOutputDim)',
|
||||
' kernel()',
|
||||
' gl_FragColor.g = kernelResult',
|
||||
' index += 1.0',
|
||||
' threadId = indexTo3D(index, uOutputDim)',
|
||||
' kernel()',
|
||||
' gl_FragColor.b = kernelResult',
|
||||
' index += 1.0',
|
||||
' threadId = indexTo3D(index, uOutputDim)',
|
||||
' kernel()',
|
||||
' gl_FragColor.a = kernelResult'
|
||||
);
|
||||
} else if (names !== null) {
|
||||
result.push(' threadId = indexTo3D(index, uOutputDim)');
|
||||
result.push(' kernel()');
|
||||
result.push(' gl_FragData[0] = encode32(kernelResult)');
|
||||
for (let i = 0; i < names.length; i++) {
|
||||
result.push(` gl_FragData[${ i + 1 }] = encode32(${ names[i] })`);
|
||||
}
|
||||
/* this is v2 prep
|
||||
* result.push(' kernel()');
|
||||
result.push(' fragData0 = encode32(kernelResult)');
|
||||
for (let i = 0; i < names.length; i++) {
|
||||
result.push(` fragData${ i + 1 } = encode32(${ names[i] })`);
|
||||
}*/
|
||||
} else {
|
||||
result.push(
|
||||
' threadId = indexTo3D(index, uOutputDim)',
|
||||
' kernel()',
|
||||
' gl_FragColor = encode32(kernelResult)'
|
||||
);
|
||||
}
|
||||
|
||||
return this._linesToString(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _linesToString
|
||||
*
|
||||
* @param {Array} lines - An Array of strings
|
||||
*
|
||||
* @returns {String} Single combined String, seperated by *\n*
|
||||
*
|
||||
*/
|
||||
_linesToString(lines) {
|
||||
if (lines.length > 0) {
|
||||
return lines.join(';\n') + ';\n';
|
||||
} else {
|
||||
return '\n';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _replaceArtifacts
|
||||
*
|
||||
* @param {String} src - Shader string
|
||||
* @param {Array} map - Variables/Constants associated with shader
|
||||
*
|
||||
*/
|
||||
_replaceArtifacts(src, map) {
|
||||
return src.replace(/[ ]*__([A-Z]+[0-9]*([_]?[A-Z])*)__;\n/g, (match, artifact) => {
|
||||
if (map.hasOwnProperty(artifact)) {
|
||||
return map[artifact];
|
||||
}
|
||||
throw `unhandled artifact ${ artifact }`;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
@ -1086,47 +502,35 @@ module.exports = class OpenCLKernel extends KernelBase {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getFragShaderString
|
||||
*
|
||||
* @desc Get the fragment shader String.
|
||||
* If the String hasn't been compiled yet,
|
||||
* then this method compiles it as well
|
||||
*
|
||||
* @param {Array} args - The actual parameters sent to the Kernel
|
||||
*
|
||||
* @returns {String} Fragment Shader string
|
||||
*
|
||||
*/
|
||||
_getFragShaderString(args) {
|
||||
if (this.compiledFragShaderString !== null) {
|
||||
return this.compiledFragShaderString;
|
||||
}
|
||||
return this.compiledFragShaderString = this._replaceArtifacts(fragShaderString, this._getFragShaderArtifactMap(args));
|
||||
}
|
||||
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
* @name _getVertShaderString
|
||||
*
|
||||
* @desc Get the vertical shader String
|
||||
*
|
||||
* @param {Array} args - The actual parameters sent to the Kernel
|
||||
*
|
||||
* @returns {String} Vertical Shader string
|
||||
*
|
||||
*/
|
||||
_getVertShaderString(args) {
|
||||
if (this.compiledVertShaderString !== null) {
|
||||
return this.compiledVertShaderString;
|
||||
}
|
||||
//TODO: webgl2 compile like frag shader
|
||||
return this.compiledVertShaderString = vertShaderString;
|
||||
}
|
||||
getDevice() {
|
||||
if (device !== null) return device;
|
||||
const mode = this.mode;
|
||||
for (let platformIndex = 0; platformIndex < platforms.length; platformIndex++) {
|
||||
const devices = (mode === 'gpu'
|
||||
? platforms[platformIndex].gpuDevices()
|
||||
: platforms[platformIndex].cpuDevices());
|
||||
|
||||
for (let deviceIndex = 0; deviceIndex < devices.length; deviceIndex++) {
|
||||
// Is double precision supported?
|
||||
// See: https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
|
||||
if (devices[deviceIndex].doubleFpConfig
|
||||
& (
|
||||
defs.CL_FP_FMA
|
||||
| defs.CL_FP_ROUND_TO_NEAREST
|
||||
| defs.CL_FP_ROUND_TO_ZERO
|
||||
| defs.CL_FP_ROUND_TO_INF
|
||||
| defs.CL_FP_INF_NAN
|
||||
| defs.CL_FP_DENORM
|
||||
)) {
|
||||
return devices[deviceIndex];
|
||||
}
|
||||
}
|
||||
if (mode === 'auto') {
|
||||
console.warn('No GPU device has been found, searching for a CPU fallback.');
|
||||
return this.getDevice('cpu');
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @memberOf OpenCLKernel#
|
||||
* @function
|
||||
|
||||
@ -1,132 +0,0 @@
|
||||
module.exports = `__HEADER__;
|
||||
precision highp float;
|
||||
precision highp int;
|
||||
precision highp sampler2D;
|
||||
|
||||
const float LOOP_MAX = __LOOP_MAX__;
|
||||
#define EPSILON 0.0000001;
|
||||
|
||||
__CONSTANTS__;
|
||||
|
||||
varying highp vec2 vTexCoord;
|
||||
|
||||
vec4 round(vec4 x) {
|
||||
return floor(x + 0.5);
|
||||
}
|
||||
|
||||
highp float round(highp float x) {
|
||||
return floor(x + 0.5);
|
||||
}
|
||||
|
||||
vec2 integerMod(vec2 x, float y) {
|
||||
vec2 res = floor(mod(x, y));
|
||||
return res * step(1.0 - floor(y), -res);
|
||||
}
|
||||
|
||||
vec3 integerMod(vec3 x, float y) {
|
||||
vec3 res = floor(mod(x, y));
|
||||
return res * step(1.0 - floor(y), -res);
|
||||
}
|
||||
|
||||
vec4 integerMod(vec4 x, vec4 y) {
|
||||
vec4 res = floor(mod(x, y));
|
||||
return res * step(1.0 - floor(y), -res);
|
||||
}
|
||||
|
||||
highp float integerMod(highp float x, highp float y) {
|
||||
highp float res = floor(mod(x, y));
|
||||
return res * (res > floor(y) - 1.0 ? 0.0 : 1.0);
|
||||
}
|
||||
|
||||
highp int integerMod(highp int x, highp int y) {
|
||||
return int(integerMod(float(x), float(y)));
|
||||
}
|
||||
|
||||
// Here be dragons!
|
||||
// DO NOT OPTIMIZE THIS CODE
|
||||
// YOU WILL BREAK SOMETHING ON SOMEBODY\'S MACHINE
|
||||
// LEAVE IT AS IT IS, LEST YOU WASTE YOUR OWN TIME
|
||||
const vec2 MAGIC_VEC = vec2(1.0, -256.0);
|
||||
const vec4 SCALE_FACTOR = vec4(1.0, 256.0, 65536.0, 0.0);
|
||||
const vec4 SCALE_FACTOR_INV = vec4(1.0, 0.00390625, 0.0000152587890625, 0.0); // 1, 1/256, 1/65536
|
||||
highp float decode32(highp vec4 rgba) {
|
||||
__DECODE32_ENDIANNESS__;
|
||||
rgba *= 255.0;
|
||||
vec2 gte128;
|
||||
gte128.x = rgba.b >= 128.0 ? 1.0 : 0.0;
|
||||
gte128.y = rgba.a >= 128.0 ? 1.0 : 0.0;
|
||||
float exponent = 2.0 * rgba.a - 127.0 + dot(gte128, MAGIC_VEC);
|
||||
float res = exp2(round(exponent));
|
||||
rgba.b = rgba.b - 128.0 * gte128.x;
|
||||
res = dot(rgba, SCALE_FACTOR) * exp2(round(exponent-23.0)) + res;
|
||||
res *= gte128.y * -2.0 + 1.0;
|
||||
return res;
|
||||
}
|
||||
|
||||
highp vec4 encode32(highp float f) {
|
||||
highp float F = abs(f);
|
||||
highp float sign = f < 0.0 ? 1.0 : 0.0;
|
||||
highp float exponent = floor(log2(F));
|
||||
highp float mantissa = (exp2(-exponent) * F);
|
||||
// exponent += floor(log2(mantissa));
|
||||
vec4 rgba = vec4(F * exp2(23.0-exponent)) * SCALE_FACTOR_INV;
|
||||
rgba.rg = integerMod(rgba.rg, 256.0);
|
||||
rgba.b = integerMod(rgba.b, 128.0);
|
||||
rgba.a = exponent*0.5 + 63.5;
|
||||
rgba.ba += vec2(integerMod(exponent+127.0, 2.0), sign) * 128.0;
|
||||
rgba = floor(rgba);
|
||||
rgba *= 0.003921569; // 1/255
|
||||
__ENCODE32_ENDIANNESS__;
|
||||
return rgba;
|
||||
}
|
||||
// Dragons end here
|
||||
|
||||
highp float index;
|
||||
highp vec3 threadId;
|
||||
|
||||
highp vec3 indexTo3D(highp float idx, highp vec3 texDim) {
|
||||
highp float z = floor(idx / (texDim.x * texDim.y));
|
||||
idx -= z * texDim.x * texDim.y;
|
||||
highp float y = floor(idx / texDim.x);
|
||||
highp float x = integerMod(idx, texDim.x);
|
||||
return vec3(x, y, z);
|
||||
}
|
||||
|
||||
highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float z, highp float y, highp float x) {
|
||||
highp vec3 xyz = vec3(x, y, z);
|
||||
xyz = floor(xyz + 0.5);
|
||||
__GET_WRAPAROUND__;
|
||||
highp float index = round(xyz.x + texDim.x * (xyz.y + texDim.y * xyz.z));
|
||||
__GET_TEXTURE_CHANNEL__;
|
||||
highp float w = round(texSize.x);
|
||||
vec2 st = vec2(integerMod(index, w), float(int(index) / int(w))) + 0.5;
|
||||
__GET_TEXTURE_INDEX__;
|
||||
highp vec4 texel = texture2D(tex, st / texSize);
|
||||
__GET_RESULT__;
|
||||
}
|
||||
|
||||
highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float y, highp float x) {
|
||||
return get(tex, texSize, texDim, 0.0, y, x);
|
||||
}
|
||||
|
||||
highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float x) {
|
||||
return get(tex, texSize, texDim, 0.0, 0.0, x);
|
||||
}
|
||||
|
||||
highp vec4 actualColor;
|
||||
void color(float r, float g, float b, float a) {
|
||||
actualColor = vec4(r,g,b,a);
|
||||
}
|
||||
|
||||
void color(float r, float g, float b) {
|
||||
color(r,g,b,1.0);
|
||||
}
|
||||
|
||||
__MAIN_PARAMS__;
|
||||
__MAIN_CONSTANTS__;
|
||||
__KERNEL__;
|
||||
|
||||
void main(void) {
|
||||
index = floor(vTexCoord.s * float(uTexSize.x)) + floor(vTexCoord.t * float(uTexSize.y)) * uTexSize.x;
|
||||
__MAIN_RESULT__;
|
||||
}`;
|
||||
@ -1,13 +0,0 @@
|
||||
module.exports = `precision highp float;
|
||||
precision highp int;
|
||||
precision highp sampler2D;
|
||||
|
||||
attribute highp vec2 aPos;
|
||||
attribute highp vec2 aTexCoord;
|
||||
|
||||
varying highp vec2 vTexCoord;
|
||||
|
||||
void main(void) {
|
||||
gl_Position = vec4(aPos, 0, 1);
|
||||
vTexCoord = aTexCoord;
|
||||
}`;
|
||||
Loading…
x
Reference in New Issue
Block a user