Colab Notebook
I'm struggling to do this in JS, but its super easy in Python!
A Note About Entropy in Data
In the context of data, entropy is a measure of uncertainty, randomness, or information content.
1. Information Theory (Shannon Entropy)
In information theory, entropy (denoted H) quantifies how much information is contained in a message or dataset — or how unpredictable the data is.
Formal definition:
For a discrete random variable with possible outcomes and probability , the Shannon entropy is:
- High entropy → Data is more random or surprising (e.g., raw noise).
- Low entropy → Data is more predictable or structured (e.g., a file full of zeros).
Example:
- A fair coin toss (50/50): high entropy (you can’t guess the outcome easily).
- A biased coin that always lands heads: zero entropy (you always know the outcome).
2. Data Compression
Entropy gives a lower bound on how much you can compress data:
- If a dataset has high entropy, it contains more unique information, and can't be compressed much.
- If it has low entropy, it has redundancy, and can be compressed effectively.
JPEG, ZIP, and other compression formats rely on reducing entropy by removing patterns or repetitions.
3. Entropy in Data Corruption or Degradation
When data loses structure (e.g., through corruption, noise, or glitching), entropy tends to increase, since the data becomes more random or less ordered.
This is relevant in your exploration of "structured erosion" of media—entropy can describe how order decays over time.
Would you like a small Python example showing how to compute entropy on an image or dataset?
Attempt in Javascript...
function loadImageData(imagePath, callback) {
const img = new Image()
img.src = imagePath
img.onload = () => {
const w = img.width
const h = img.height
// draw into a canvas to get image data
const canvas = document.createElement('canvas')
canvas.width = w
canvas.height = h
const ctx = canvas.getContext('2d')
ctx.drawImage(img, 0, 0, w, h)
const imageData = ctx.getImageData(0, 0, w, h)
callback(imageData)
}
}
JPEGs are fricking magic
It's like that scene in Willy Wonka where the kid is sent through the TV
The Process
1: Convert to YCbCr color space
We don't see color quite as well as greyscale (something to do with rods and cones in your eyes), also we are more sensitive to green than other colors. JPEG (and many other technologies) make use of this "hack" by converting the image to YCbCr color space.
Original
Y
Cb
Cr
Conversion code:
function convertToYCbCr(imageData) {
// Create ImageData objects for Y, Cb, and Cr channels
const yData = new ImageData(imageData.width, imageData.height)
const cbData = new ImageData(imageData.width, imageData.height)
const crData = new ImageData(imageData.width, imageData.height)
const data = imageData.data
for (let i = 0; i < imageData.data.length; i += 4) {
const r = data[i]
const g = data[i + 1]
const b = data[i + 2]
// Using BT.601 "full range" style formulas (similar to JPEG)
const y = 0.299 * r + 0.587 * g + 0.114 * b
const cb = -0.168736 * r - 0.331264 * g + 0.5 * b + 128
const cr = 0.5 * r - 0.418688 * g - 0.081312 * b + 128
// Clamp values to 0-255 for display
const yDisplay = Math.max(0, Math.min(255, y))
const cbDisplay = Math.max(0, Math.min(255, cb))
const crDisplay = Math.max(0, Math.min(255, cr))
yData.data[i] = yDisplay
yData.data[i + 1] = yDisplay
yData.data[i + 2] = yDisplay
yData.data[i + 3] = 255.0
cbData.data[i] = cbDisplay
cbData.data[i + 1] = cbDisplay
cbData.data[i + 2] = cbDisplay
cbData.data[i + 3] = 255.0
crData.data[i] = crDisplay
crData.data[i + 1] = crDisplay
crData.data[i + 2] = crDisplay
crData.data[i + 3] = 255.0
}
return { yData, cbData, crData }
}
function convertToRGB(yData, cbData, crData) {
// Create single RGB ImageData object
const rgbData = new ImageData(yData.width, yData.height)
for (let i = 0; i < yData.data.length; i += 4) {
const y = yData.data[i]
const cb = cbData.data[i] - 128 // Subtract 128 to get back to signed values
const cr = crData.data[i] - 128
// Convert YCbCr back to RGB using BT.601 "full range" inverse formulas
const r = y + 1.402 * cr
const g = y - 0.344136 * cb - 0.714136 * cr
const b = y + 1.772 * cb
// Clamp values to 0-255 and set RGB channels
rgbData.data[i] = Math.max(0, Math.min(255, r))
rgbData.data[i + 1] = Math.max(0, Math.min(255, g))
rgbData.data[i + 2] = Math.max(0, Math.min(255, b))
rgbData.data[i + 3] = 255
}
return rgbData
}
function convertToYCbCr(imageData) {
// Create ImageData objects for Y, Cb, and Cr channels
const yData = new ImageData(imageData.width, imageData.height)
const cbData = new ImageData(imageData.width, imageData.height)
const crData = new ImageData(imageData.width, imageData.height)
const data = imageData.data
for (let i = 0; i < imageData.data.length; i += 4) {
const r = data[i]
const g = data[i + 1]
const b = data[i + 2]
// Using BT.601 "full range" style formulas (similar to JPEG)
const y = 0.299 * r + 0.587 * g + 0.114 * b
const cb = -0.168736 * r - 0.331264 * g + 0.5 * b + 128
const cr = 0.5 * r - 0.418688 * g - 0.081312 * b + 128
// Clamp values to 0-255 for display
const yDisplay = Math.max(0, Math.min(255, y))
const cbDisplay = Math.max(0, Math.min(255, cb))
const crDisplay = Math.max(0, Math.min(255, cr))
yData.data[i] = yDisplay
yData.data[i + 1] = yDisplay
yData.data[i + 2] = yDisplay
yData.data[i + 3] = 255.0
cbData.data[i] = cbDisplay
cbData.data[i + 1] = cbDisplay
cbData.data[i + 2] = cbDisplay
cbData.data[i + 3] = 255.0
crData.data[i] = crDisplay
crData.data[i + 1] = crDisplay
crData.data[i + 2] = crDisplay
crData.data[i + 3] = 255.0
}
return { yData, cbData, crData }
}
function loadImageData(imagePath, callback) {
const img = new Image()
img.src = imagePath
img.onload = () => {
const w = img.width
const h = img.height
// draw into a canvas to get image data
const canvas = document.createElement('canvas')
canvas.width = w
canvas.height = h
const ctx = canvas.getContext('2d')
ctx.drawImage(img, 0, 0, w, h)
const imageData = ctx.getImageData(0, 0, w, h)
callback(imageData)
}
}
const originalImageCanvas = document.getElementById('originalImageCanvas')
const yCanvas = document.getElementById('yCanvas')
const cbCanvas = document.getElementById('cbCanvas')
const crCanvas = document.getElementById('crCanvas')
const ctxOriginal = originalImageCanvas.getContext('2d')
const ctxY = yCanvas.getContext('2d')
const ctxCb = cbCanvas.getContext('2d')
const ctxCr = crCanvas.getContext('2d')
loadImageData('/images/_notes/jpeg/mad_cat.jpg', (imageData) => {
// make all canvases the same size - matching the image
originalImageCanvas.width = imageData.width
originalImageCanvas.height = imageData.height
yCanvas.width = imageData.width
yCanvas.height = imageData.height
cbCanvas.width = imageData.width
cbCanvas.height = imageData.height
crCanvas.width = imageData.width
crCanvas.height = imageData.height
// draw into the original canvas
ctxOriginal.putImageData(imageData, 0, 0)
// Convert to YCbCr
const { yData, cbData, crData } = convertToYCbCr(imageData)
ctxY.putImageData(yData, 0, 0)
ctxCb.putImageData(cbData, 0, 0)
ctxCr.putImageData(crData, 0, 0)
})
2: Downsample the chroma channels (Cb and Cr) by a factor of 2, 4, etc
Since we are less sensitive to color, we can downsample the chroma channels by a factor of 2, 4, etc. Here we are downsampling by a factor of 4 to exaggerate the effect. Downsampling is done by averaging the values of the pixels in the block.
function downsample(imageData) {
const downsampleFactor = 4
const downsampledData = new ImageData(imageData.width / downsampleFactor, imageData.height / downsampleFactor)
for (let y = 0; y < imageData.height; y += downsampleFactor) {
for (let x = 0; x < imageData.width; x += downsampleFactor) {
let sum = 0
for (let dy = 0; dy < downsampleFactor; dy++) {
for (let dx = 0; dx < downsampleFactor; dx++) {
const i = ((y + dy) * imageData.width + (x + dx)) * 4
sum += imageData.data[i]
}
}
// Average the values
const avg = sum / (downsampleFactor * downsampleFactor)
// Set the downsampled pixel
const dstIdx = ((y / downsampleFactor) * (imageData.width / downsampleFactor) + x / downsampleFactor) * 4
downsampledData.data[dstIdx] = avg
downsampledData.data[dstIdx + 1] = avg
downsampledData.data[dstIdx + 2] = avg
downsampledData.data[dstIdx + 3] = 255
}
}
return downsampledData
}
function upsample(imageData) {
const upsampleFactor = 4
const originalWidth = imageData.width
const originalHeight = imageData.height
const upsampledWidth = originalWidth * upsampleFactor
const upsampledHeight = originalHeight * upsampleFactor
const upsampledData = new ImageData(upsampledWidth, upsampledHeight)
for (let y = 0; y < originalHeight; y++) {
// Loop through each row of the original image
for (let x = 0; x < originalWidth; x++) {
// Loop through each column of the original image
// Get the color of the current original pixel
const srcIdx = (y * originalWidth + x) * 4
const r = imageData.data[srcIdx]
const g = imageData.data[srcIdx + 1]
const b = imageData.data[srcIdx + 2]
const a = imageData.data[srcIdx + 3]
// Fill a block of pixels in the upsampled image
for (let dy = 0; dy < upsampleFactor; dy++) {
// Loop for the height of the block
for (let dx = 0; dx < upsampleFactor; dx++) {
// Loop for the width of the block
const destY = y * upsampleFactor + dy
const destX = x * upsampleFactor + dx
const dstIdx = (destY * upsampledWidth + destX) * 4
upsampledData.data[dstIdx] = r
upsampledData.data[dstIdx + 1] = g
upsampledData.data[dstIdx + 2] = b
upsampledData.data[dstIdx + 3] = a
}
}
}
}
return upsampledData
}
Cb
Cr
Recombined Image After Downsampling (split with original for comparison)
I can't tell the difference between the original and the recombined image, can you?
function upsample(imageData) {
const upsampleFactor = 4
const originalWidth = imageData.width
const originalHeight = imageData.height
const upsampledWidth = originalWidth * upsampleFactor
const upsampledHeight = originalHeight * upsampleFactor
const upsampledData = new ImageData(upsampledWidth, upsampledHeight)
for (let y = 0; y < originalHeight; y++) {
// Loop through each row of the original image
for (let x = 0; x < originalWidth; x++) {
// Loop through each column of the original image
// Get the color of the current original pixel
const srcIdx = (y * originalWidth + x) * 4
const r = imageData.data[srcIdx]
const g = imageData.data[srcIdx + 1]
const b = imageData.data[srcIdx + 2]
const a = imageData.data[srcIdx + 3]
// Fill a block of pixels in the upsampled image
for (let dy = 0; dy < upsampleFactor; dy++) {
// Loop for the height of the block
for (let dx = 0; dx < upsampleFactor; dx++) {
// Loop for the width of the block
const destY = y * upsampleFactor + dy
const destX = x * upsampleFactor + dx
const dstIdx = (destY * upsampledWidth + destX) * 4
upsampledData.data[dstIdx] = r
upsampledData.data[dstIdx + 1] = g
upsampledData.data[dstIdx + 2] = b
upsampledData.data[dstIdx + 3] = a
}
}
}
}
return upsampledData
}
function convertToRGB(yData, cbData, crData) {
// Create single RGB ImageData object
const rgbData = new ImageData(yData.width, yData.height)
for (let i = 0; i < yData.data.length; i += 4) {
const y = yData.data[i]
const cb = cbData.data[i] - 128 // Subtract 128 to get back to signed values
const cr = crData.data[i] - 128
// Convert YCbCr back to RGB using BT.601 "full range" inverse formulas
const r = y + 1.402 * cr
const g = y - 0.344136 * cb - 0.714136 * cr
const b = y + 1.772 * cb
// Clamp values to 0-255 and set RGB channels
rgbData.data[i] = Math.max(0, Math.min(255, r))
rgbData.data[i + 1] = Math.max(0, Math.min(255, g))
rgbData.data[i + 2] = Math.max(0, Math.min(255, b))
rgbData.data[i + 3] = 255
}
return rgbData
}
function downsample(imageData) {
const downsampleFactor = 4
const downsampledData = new ImageData(imageData.width / downsampleFactor, imageData.height / downsampleFactor)
for (let y = 0; y < imageData.height; y += downsampleFactor) {
for (let x = 0; x < imageData.width; x += downsampleFactor) {
let sum = 0
for (let dy = 0; dy < downsampleFactor; dy++) {
for (let dx = 0; dx < downsampleFactor; dx++) {
const i = ((y + dy) * imageData.width + (x + dx)) * 4
sum += imageData.data[i]
}
}
// Average the values
const avg = sum / (downsampleFactor * downsampleFactor)
// Set the downsampled pixel
const dstIdx = ((y / downsampleFactor) * (imageData.width / downsampleFactor) + x / downsampleFactor) * 4
downsampledData.data[dstIdx] = avg
downsampledData.data[dstIdx + 1] = avg
downsampledData.data[dstIdx + 2] = avg
downsampledData.data[dstIdx + 3] = 255
}
}
return downsampledData
}
function convertToYCbCr(imageData) {
// Create ImageData objects for Y, Cb, and Cr channels
const yData = new ImageData(imageData.width, imageData.height)
const cbData = new ImageData(imageData.width, imageData.height)
const crData = new ImageData(imageData.width, imageData.height)
const data = imageData.data
for (let i = 0; i < imageData.data.length; i += 4) {
const r = data[i]
const g = data[i + 1]
const b = data[i + 2]
// Using BT.601 "full range" style formulas (similar to JPEG)
const y = 0.299 * r + 0.587 * g + 0.114 * b
const cb = -0.168736 * r - 0.331264 * g + 0.5 * b + 128
const cr = 0.5 * r - 0.418688 * g - 0.081312 * b + 128
// Clamp values to 0-255 for display
const yDisplay = Math.max(0, Math.min(255, y))
const cbDisplay = Math.max(0, Math.min(255, cb))
const crDisplay = Math.max(0, Math.min(255, cr))
yData.data[i] = yDisplay
yData.data[i + 1] = yDisplay
yData.data[i + 2] = yDisplay
yData.data[i + 3] = 255.0
cbData.data[i] = cbDisplay
cbData.data[i + 1] = cbDisplay
cbData.data[i + 2] = cbDisplay
cbData.data[i + 3] = 255.0
crData.data[i] = crDisplay
crData.data[i + 1] = crDisplay
crData.data[i + 2] = crDisplay
crData.data[i + 3] = 255.0
}
return { yData, cbData, crData }
}
function loadImageData(imagePath, callback) {
const img = new Image()
img.src = imagePath
img.onload = () => {
const w = img.width
const h = img.height
// draw into a canvas to get image data
const canvas = document.createElement('canvas')
canvas.width = w
canvas.height = h
const ctx = canvas.getContext('2d')
ctx.drawImage(img, 0, 0, w, h)
const imageData = ctx.getImageData(0, 0, w, h)
callback(imageData)
}
}
const cbDownsampledCanvas = document.getElementById('cbDownsampled')
const crDownsampledCanvas = document.getElementById('crDownsampled')
const ctxCbDownsampled = cbDownsampledCanvas.getContext('2d')
const ctxCrDownsampled = crDownsampledCanvas.getContext('2d')
const recombinedCanvas = document.getElementById('recombined')
const recombinedCtx = recombinedCanvas.getContext('2d')
loadImageData('/images/_notes/jpeg/mad_cat.jpg', (imageData) => {
const { yData, cbData, crData } = convertToYCbCr(imageData)
// Set canvas sizes to match downsampled dimensions
cbDownsampledCanvas.width = imageData.width / 4
cbDownsampledCanvas.height = imageData.height / 4
crDownsampledCanvas.width = imageData.width / 4
crDownsampledCanvas.height = imageData.height / 4
// Downsample the chroma channels
const downsampledCbImageData = downsample(cbData)
const downsampledCrImageData = downsample(crData)
// display the downsampled chroma channels
ctxCbDownsampled.putImageData(downsampledCbImageData, 0, 0)
ctxCrDownsampled.putImageData(downsampledCrImageData, 0, 0)
// Upsample the chroma channels so we can recombine and compare
const upsampledCbImageData = upsample(downsampledCbImageData)
const upsampledCrImageData = upsample(downsampledCrImageData)
// Recombine the Y, Cb, and Cr channels
const recombinedImageData = convertToRGB(yData, upsampledCbImageData, upsampledCrImageData)
// display the recombined image split with original for compare
const displayData = recombinedCtx.getImageData(0, 0, recombinedCanvas.width, recombinedCanvas.height)
for (let i = 0; i < displayData.data.length; i += 4) {
const x = (i / 4) % recombinedCanvas.width
if (x > recombinedCanvas.width / 2) {
displayData.data[i] = imageData.data[i]
displayData.data[i + 1] = imageData.data[i + 1]
displayData.data[i + 2] = imageData.data[i + 2]
displayData.data[i + 3] = imageData.data[i + 3]
} else {
displayData.data[i] = recombinedImageData.data[i]
displayData.data[i + 1] = recombinedImageData.data[i + 1]
displayData.data[i + 2] = recombinedImageData.data[i + 2]
displayData.data[i + 3] = recombinedImageData.data[i + 3]
}
}
recombinedCtx.putImageData(displayData, 0, 0)
})
3: Apply the DCT (Discrete Cosine Transform)
This is the magical step in JPEG compression, converting image data into frequency data. Spacial domain to frequency domain.
To do this we use the DCT-II (Discrete Cosine Transform). DCT involves a lot of cosine terms, which are expensive to compute. We can precompute them to speed up the process. Below is a somewhat optimized version of the DCT.
/**
* Performs a 2D Discrete Cosine Transform (DCT-II) on image data.
* This implementation uses the separability property of 2D DCT
* and pre-computes cosine terms and normalization factors for optimization.
*
* @param {object} imageData - An object with width, height, and data properties.
* data is expected to be a Uint8ClampedArray (like from a Canvas).
* Assumes 4 channels per pixel (RGBA), uses the Red channel.
* @returns {Array<Array<number>>} A 2D array (height x width) of DCT coefficients.
*/
function dctOptimizedWithPrecomputation(imageData) {
const { width, height, data } = imageData
// 1. Convert image to a 2D array of grayscale values (using Red channel)
// For actual JPEG compression, you would typically level-shift by subtracting 128.
// Here, we'll use the raw pixel values.
const gray = []
for (let y = 0; y < height; y++) {
const row = new Array(width)
for (let x = 0; x < width; x++) {
const i = (y * width + x) * 4 // Index for the Red channel
row[x] = data[i] - 128 // level-shifted
}
gray.push(row)
}
// 2. Precompute cosine tables
const PI = Math.PI
// Cosine table for horizontal (X) DCT: cosX[v][x] = cos(((2x + 1) * v * PI) / (2 * width))
const cosX = new Array(width)
for (let v = 0; v < width; v++) {
cosX[v] = new Array(width)
for (let x = 0; x < width; x++) {
cosX[v][x] = Math.cos(((2 * x + 1) * v * PI) / (2 * width))
}
}
// Cosine table for vertical (Y) DCT: cosY[u][y] = cos(((2y + 1) * u * PI) / (2 * height))
const cosY = new Array(height)
for (let u = 0; u < height; u++) {
cosY[u] = new Array(height)
for (let y = 0; y < height; y++) {
cosY[u][y] = Math.cos(((2 * y + 1) * u * PI) / (2 * height))
}
}
// 3. Pre-compute Alpha Normalization Factors
const alphaWidth = new Array(width)
for (let v = 0; v < width; v++) {
alphaWidth[v] = v === 0 ? Math.sqrt(1 / width) : Math.sqrt(2 / width)
}
const alphaHeight = new Array(height)
for (let u = 0; u < height; u++) {
alphaHeight[u] = u === 0 ? Math.sqrt(1 / height) : Math.sqrt(2 / height)
}
// 4. Apply 1D DCT-II to each row (Horizontal Pass)
const tempRowDCT = [] // intermediate matrix after row DCTs
for (let r = 0; r < height; r++) {
// for each row
const newRow = new Array(width).fill(0)
for (let v = 0; v < width; v++) {
// v is the frequency component for the row
let sum = 0
for (let x = 0; x < width; x++) {
// x is the spatial domain index for the row
sum += gray[r][x] * cosX[v][x]
}
newRow[v] = sum
}
tempRowDCT.push(newRow)
}
// 5. Apply 1D DCT-II to each column of tempRowDCT (Vertical Pass)
const dctCoefficients = [] // Final 2D array for DCT coefficients
for (let u = 0; u < height; u++) {
// u is the frequency component for the column
dctCoefficients[u] = new Array(width) // Initialize the row for this u
for (let v = 0; v < width; v++) {
// v is the column index in tempRowDCT (and final v)
let sum = 0
for (let y = 0; y < height; y++) {
// y is the spatial domain index for the column (row index in tempRowDCT)
sum += tempRowDCT[y][v] * cosY[u][y]
}
dctCoefficients[u][v] = alphaHeight[u] * alphaWidth[v] * sum
}
}
return dctCoefficients // Returns a 2D array [height][width]
}
/**
* Creates an ImageData object for visualizing DCT coefficients.
* This is NOT an Inverse DCT; it's a visual representation of the frequency magnitudes.
*
* @param {Array<Array<number>>} dctCoefficients - The 2D array of DCT coefficients.
* @param {number} width - The width of the original image (and the DCT matrix).
* @param {number} height - The height of the original image (and the DCT matrix).
* @param {boolean} useLogScale - Whether to apply a logarithmic scale for better visualization.
* @returns {ImageData} An ImageData object representing the DCT coefficient magnitudes.
*/
function visualizeDctCoefficients(dctCoefficients, width, height, useLogScale = true) {
// Create a new ImageData object.
// In a browser, you might need a canvas context to create ImageData:
// const canvas = document.createElement('canvas');
// const ctx = canvas.getContext('2d');
// const newImageData = ctx.createImageData(width, height);
// For simplicity here, we'll assume ImageData can be constructed if not in a strict browser context
// or that this function will be used where ctx is available to create it.
// Let's proceed as if we can directly make one for the logic, but note this for browser use.
const newImageData = new ImageData(width, height) // This works in modern browsers & Node.js with canvas lib
const data = newImageData.data
const tempValues = []
let minVal = Infinity
let maxVal = -Infinity
for (let u = 0; u < height; u++) {
const row = []
for (let v = 0; v < width; v++) {
const coeff = dctCoefficients[u][v]
let displayVal
if (useLogScale) {
// Log scale emphasizes smaller AC coefficients.
// Add 1 to handle zero coefficients before log.
displayVal = Math.log(1 + Math.abs(coeff))
} else {
// Linear scale of absolute magnitudes
displayVal = Math.abs(coeff)
}
row.push(displayVal)
if (displayVal < minVal) minVal = displayVal
if (displayVal > maxVal) maxVal = displayVal
}
tempValues.push(row)
}
const range = maxVal - minVal
for (let u = 0; u < height; u++) {
for (let v = 0; v < width; v++) {
let scaledValue = 0
const displayVal = tempValues[u][v]
if (range > 0) {
scaledValue = ((displayVal - minVal) / range) * 255
} else if (minVal > 0) {
// All values are the same, and non-zero
scaledValue = 128 // Or 255 if you prefer, or minVal if it's already 0-255
}
// scaledValue is already 0 if range is 0 and minVal is 0
scaledValue = Math.max(0, Math.min(255, scaledValue)) // Clamp to 0-255
const i = (u * width + v) * 4
data[i] = scaledValue // Red
data[i + 1] = scaledValue // Green
data[i + 2] = scaledValue // Blue
data[i + 3] = 255 // Alpha (fully opaque)
}
}
return newImageData
}
Results of the DCT:
/**
* Creates an ImageData object for visualizing DCT coefficients.
* This is NOT an Inverse DCT; it's a visual representation of the frequency magnitudes.
*
* @param {Array<Array<number>>} dctCoefficients - The 2D array of DCT coefficients.
* @param {number} width - The width of the original image (and the DCT matrix).
* @param {number} height - The height of the original image (and the DCT matrix).
* @param {boolean} useLogScale - Whether to apply a logarithmic scale for better visualization.
* @returns {ImageData} An ImageData object representing the DCT coefficient magnitudes.
*/
function visualizeDctCoefficients(dctCoefficients, width, height, useLogScale = true) {
// Create a new ImageData object.
// In a browser, you might need a canvas context to create ImageData:
// const canvas = document.createElement('canvas');
// const ctx = canvas.getContext('2d');
// const newImageData = ctx.createImageData(width, height);
// For simplicity here, we'll assume ImageData can be constructed if not in a strict browser context
// or that this function will be used where ctx is available to create it.
// Let's proceed as if we can directly make one for the logic, but note this for browser use.
const newImageData = new ImageData(width, height) // This works in modern browsers & Node.js with canvas lib
const data = newImageData.data
const tempValues = []
let minVal = Infinity
let maxVal = -Infinity
for (let u = 0; u < height; u++) {
const row = []
for (let v = 0; v < width; v++) {
const coeff = dctCoefficients[u][v]
let displayVal
if (useLogScale) {
// Log scale emphasizes smaller AC coefficients.
// Add 1 to handle zero coefficients before log.
displayVal = Math.log(1 + Math.abs(coeff))
} else {
// Linear scale of absolute magnitudes
displayVal = Math.abs(coeff)
}
row.push(displayVal)
if (displayVal < minVal) minVal = displayVal
if (displayVal > maxVal) maxVal = displayVal
}
tempValues.push(row)
}
const range = maxVal - minVal
for (let u = 0; u < height; u++) {
for (let v = 0; v < width; v++) {
let scaledValue = 0
const displayVal = tempValues[u][v]
if (range > 0) {
scaledValue = ((displayVal - minVal) / range) * 255
} else if (minVal > 0) {
// All values are the same, and non-zero
scaledValue = 128 // Or 255 if you prefer, or minVal if it's already 0-255
}
// scaledValue is already 0 if range is 0 and minVal is 0
scaledValue = Math.max(0, Math.min(255, scaledValue)) // Clamp to 0-255
const i = (u * width + v) * 4
data[i] = scaledValue // Red
data[i + 1] = scaledValue // Green
data[i + 2] = scaledValue // Blue
data[i + 3] = 255 // Alpha (fully opaque)
}
}
return newImageData
}
/**
* Performs a 2D Discrete Cosine Transform (DCT-II) on image data.
* This implementation uses the separability property of 2D DCT
* and pre-computes cosine terms and normalization factors for optimization.
*
* @param {object} imageData - An object with width, height, and data properties.
* data is expected to be a Uint8ClampedArray (like from a Canvas).
* Assumes 4 channels per pixel (RGBA), uses the Red channel.
* @returns {Array<Array<number>>} A 2D array (height x width) of DCT coefficients.
*/
function dctOptimizedWithPrecomputation(imageData) {
const { width, height, data } = imageData
// 1. Convert image to a 2D array of grayscale values (using Red channel)
// For actual JPEG compression, you would typically level-shift by subtracting 128.
// Here, we'll use the raw pixel values.
const gray = []
for (let y = 0; y < height; y++) {
const row = new Array(width)
for (let x = 0; x < width; x++) {
const i = (y * width + x) * 4 // Index for the Red channel
row[x] = data[i] - 128 // level-shifted
}
gray.push(row)
}
// 2. Precompute cosine tables
const PI = Math.PI
// Cosine table for horizontal (X) DCT: cosX[v][x] = cos(((2x + 1) * v * PI) / (2 * width))
const cosX = new Array(width)
for (let v = 0; v < width; v++) {
cosX[v] = new Array(width)
for (let x = 0; x < width; x++) {
cosX[v][x] = Math.cos(((2 * x + 1) * v * PI) / (2 * width))
}
}
// Cosine table for vertical (Y) DCT: cosY[u][y] = cos(((2y + 1) * u * PI) / (2 * height))
const cosY = new Array(height)
for (let u = 0; u < height; u++) {
cosY[u] = new Array(height)
for (let y = 0; y < height; y++) {
cosY[u][y] = Math.cos(((2 * y + 1) * u * PI) / (2 * height))
}
}
// 3. Pre-compute Alpha Normalization Factors
const alphaWidth = new Array(width)
for (let v = 0; v < width; v++) {
alphaWidth[v] = v === 0 ? Math.sqrt(1 / width) : Math.sqrt(2 / width)
}
const alphaHeight = new Array(height)
for (let u = 0; u < height; u++) {
alphaHeight[u] = u === 0 ? Math.sqrt(1 / height) : Math.sqrt(2 / height)
}
// 4. Apply 1D DCT-II to each row (Horizontal Pass)
const tempRowDCT = [] // intermediate matrix after row DCTs
for (let r = 0; r < height; r++) {
// for each row
const newRow = new Array(width).fill(0)
for (let v = 0; v < width; v++) {
// v is the frequency component for the row
let sum = 0
for (let x = 0; x < width; x++) {
// x is the spatial domain index for the row
sum += gray[r][x] * cosX[v][x]
}
newRow[v] = sum
}
tempRowDCT.push(newRow)
}
// 5. Apply 1D DCT-II to each column of tempRowDCT (Vertical Pass)
const dctCoefficients = [] // Final 2D array for DCT coefficients
for (let u = 0; u < height; u++) {
// u is the frequency component for the column
dctCoefficients[u] = new Array(width) // Initialize the row for this u
for (let v = 0; v < width; v++) {
// v is the column index in tempRowDCT (and final v)
let sum = 0
for (let y = 0; y < height; y++) {
// y is the spatial domain index for the column (row index in tempRowDCT)
sum += tempRowDCT[y][v] * cosY[u][y]
}
dctCoefficients[u][v] = alphaHeight[u] * alphaWidth[v] * sum
}
}
return dctCoefficients // Returns a 2D array [height][width]
}
function loadImageData(imagePath, callback) {
const img = new Image()
img.src = imagePath
img.onload = () => {
const w = img.width
const h = img.height
// draw into a canvas to get image data
const canvas = document.createElement('canvas')
canvas.width = w
canvas.height = h
const ctx = canvas.getContext('2d')
ctx.drawImage(img, 0, 0, w, h)
const imageData = ctx.getImageData(0, 0, w, h)
callback(imageData)
}
}
const dctResultsCanvas = document.getElementById('dct_results')
const ctxDctResults = dctResultsCanvas.getContext('2d')
loadImageData('/images/_notes/jpeg/mad_cat.jpg', (imageData) => {
console.time('DCT Optimized')
const dctResult = dctOptimizedWithPrecomputation(imageData)
console.timeEnd('DCT Optimized')
console.log("DCT Coefficients (first few rows/cols of first block if it's block-based):")
for (let i = 0; i < Math.min(5, dctResult.length); i++) {
console.log(
dctResult[i].slice(0, Math.min(5, dctResult[0] ? dctResult[0].length : 0)).map((val) => val.toFixed(2))
)
}
// Visualize DCT coefficients
const canvasWidth = dctResultsCanvas.width
const canvasHeight = dctResultsCanvas.height
const dctImageData = visualizeDctCoefficients(dctResult, canvasWidth, canvasHeight, true)
ctxDctResults.putImageData(dctImageData, 0, 0)
})
4: Quantize (lossy part)
Most images shot by camera don't contain high frequency changes. Also, our vision is less sensitive to it. Let's remove high frequency changes to save some space.
How are quantization tables made?
5: Encode the data (lossless part)
???
X: Reverse the process
Further Exploration
- Painting app where you paint the coefficients
- Make your own quantization table interactively