mirror of
https://github.com/trekhleb/javascript-algorithms.git
synced 2025-12-08 19:06:00 +00:00
* Add "Binary representation of floating point numbers" section. * Adding a half-precision explanatory picture. * Binary representation of the floating-point numbers.
120 lines
3.6 KiB
JavaScript
120 lines
3.6 KiB
JavaScript
/**
|
|
* Sequence of 0s and 1s.
|
|
* @typedef {number[]} Bits
|
|
*/
|
|
|
|
/**
|
|
* @typedef {{
|
|
* signBitsCount: number,
|
|
* exponentBitsCount: number,
|
|
* fractionBitsCount: number,
|
|
* }} PrecisionConfig
|
|
*/
|
|
|
|
/**
|
|
* @typedef {{
|
|
* half: PrecisionConfig,
|
|
* single: PrecisionConfig,
|
|
* double: PrecisionConfig
|
|
* }} PrecisionConfigs
|
|
*/
|
|
|
|
/**
|
|
* ┌───────────────── sign bit
|
|
* │ ┌───────────── exponent bits
|
|
* │ │ ┌───── fraction bits
|
|
* │ │ │
|
|
* X XXXXX XXXXXXXXXX
|
|
*
|
|
* @type {PrecisionConfigs}
|
|
*/
|
|
const precisionConfigs = {
|
|
// @see: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
|
|
half: {
|
|
signBitsCount: 1,
|
|
exponentBitsCount: 5,
|
|
fractionBitsCount: 10,
|
|
},
|
|
// @see: https://en.wikipedia.org/wiki/Single-precision_floating-point_format
|
|
single: {
|
|
signBitsCount: 1,
|
|
exponentBitsCount: 8,
|
|
fractionBitsCount: 23,
|
|
},
|
|
// @see: https://en.wikipedia.org/wiki/Double-precision_floating-point_format
|
|
double: {
|
|
signBitsCount: 1,
|
|
exponentBitsCount: 11,
|
|
fractionBitsCount: 52,
|
|
},
|
|
};
|
|
|
|
/**
|
|
* Converts the binary representation of the floating point number to decimal float number.
|
|
*
|
|
* @param {Bits} bits - sequence of bits that represents the floating point number.
|
|
* @param {PrecisionConfig} precisionConfig - half/single/double precision config.
|
|
* @return {number} - floating point number decoded from its binary representation.
|
|
*/
|
|
function bitsToFloat(bits, precisionConfig) {
|
|
const { signBitsCount, exponentBitsCount } = precisionConfig;
|
|
|
|
// Figuring out the sign.
|
|
const sign = (-1) ** bits[0]; // -1^1 = -1, -1^0 = 1
|
|
|
|
// Calculating the exponent value.
|
|
const exponentBias = 2 ** (exponentBitsCount - 1) - 1;
|
|
const exponentBits = bits.slice(signBitsCount, signBitsCount + exponentBitsCount);
|
|
const exponentUnbiased = exponentBits.reduce(
|
|
(exponentSoFar, currentBit, bitIndex) => {
|
|
const bitPowerOfTwo = 2 ** (exponentBitsCount - bitIndex - 1);
|
|
return exponentSoFar + currentBit * bitPowerOfTwo;
|
|
},
|
|
0,
|
|
);
|
|
const exponent = exponentUnbiased - exponentBias;
|
|
|
|
// Calculating the fraction value.
|
|
const fractionBits = bits.slice(signBitsCount + exponentBitsCount);
|
|
const fraction = fractionBits.reduce(
|
|
(fractionSoFar, currentBit, bitIndex) => {
|
|
const bitPowerOfTwo = 2 ** -(bitIndex + 1);
|
|
return fractionSoFar + currentBit * bitPowerOfTwo;
|
|
},
|
|
0,
|
|
);
|
|
|
|
// Putting all parts together to calculate the final number.
|
|
return sign * (2 ** exponent) * (1 + fraction);
|
|
}
|
|
|
|
/**
|
|
* Converts the 16-bit binary representation of the floating point number to decimal float number.
|
|
*
|
|
* @param {Bits} bits - sequence of bits that represents the floating point number.
|
|
* @return {number} - floating point number decoded from its binary representation.
|
|
*/
|
|
export function bitsToFloat16(bits) {
|
|
return bitsToFloat(bits, precisionConfigs.half);
|
|
}
|
|
|
|
/**
|
|
* Converts the 32-bit binary representation of the floating point number to decimal float number.
|
|
*
|
|
* @param {Bits} bits - sequence of bits that represents the floating point number.
|
|
* @return {number} - floating point number decoded from its binary representation.
|
|
*/
|
|
export function bitsToFloat32(bits) {
|
|
return bitsToFloat(bits, precisionConfigs.single);
|
|
}
|
|
|
|
/**
|
|
* Converts the 64-bit binary representation of the floating point number to decimal float number.
|
|
*
|
|
* @param {Bits} bits - sequence of bits that represents the floating point number.
|
|
* @return {number} - floating point number decoded from its binary representation.
|
|
*/
|
|
export function bitsToFloat64(bits) {
|
|
return bitsToFloat(bits, precisionConfigs.double);
|
|
}
|