feat: Add waveforms to voice messages (#180)

Co-authored-by: Edgard <edgardmessias@gmail.com>
2025-10-13 00:32:22 +00:00 · 2023-07-14 03:08:11 +03:00
parent e0e7d40847
commit 2c76713565
7 changed files with 622 additions and 455 deletions
--- a/src/Utils/generics.ts
+++ b/src/Utils/generics.ts
@@ -213,6 +213,7 @@ export const printQRIfNecessaryListener = (ev: BaileysEventEmitter, logger: Logg
 	ev.on('connection.update', async({ qr }) => {
 		if(qr) {
 			const QR = await import('qrcode-terminal')
+				.then(m => m.default || m)
 				.catch(() => {
 					logger.error('QR code terminal not added as dependency')
 				})
--- a/src/Utils/messages-media.ts
+++ b/src/Utils/messages-media.ts
@@ -204,6 +204,45 @@ export async function getAudioDuration(buffer: Buffer | string | Readable) {
 	return metadata.format.duration
 }

+/**
+  referenced from and modifying https://github.com/wppconnect-team/wa-js/blob/main/src/chat/functions/prepareAudioWaveform.ts
+ */
+export async function getAudioWaveform(bodyPath: string, logger?: Logger) {
+	try {
+		const { default: audioDecode } = await import('audio-decode')
+		const fileBuffer = await fs.readFile(bodyPath)
+		const audioBuffer = await audioDecode.default(fileBuffer)
+
+		const rawData = audioBuffer.getChannelData(0) // We only need to work with one channel of data
+		const samples = 64 // Number of samples we want to have in our final data set
+		const blockSize = Math.floor(rawData.length / samples) // the number of samples in each subdivision
+		const filteredData: number[] = []
+		for(let i = 0; i < samples; i++) {
+		  	const blockStart = blockSize * i // the location of the first sample in the block
+		  	let sum = 0
+		  	for(let j = 0; j < blockSize; j++) {
+				sum = sum + Math.abs(rawData[blockStart + j]) // find the sum of all the samples in the block
+			}
+
+			filteredData.push(sum / blockSize) // divide the sum by the block size to get the average
+		}
+
+		// This guarantees that the largest data point will be set to 1, and the rest of the data will scale proportionally.
+		const multiplier = Math.pow(Math.max(...filteredData), -1)
+		const normalizedData = filteredData.map((n) => n * multiplier)
+
+		// Generate waveform like WhatsApp
+		const waveform = new Uint8Array(
+			normalizedData.map((n) => Math.floor(100 * n))
+		)
+
+		return waveform
+	} catch(e) {
+		logger?.debug('Failed to generate waveform: ' + e)
+	}
+}
+
+
 export const toReadable = (buffer: Buffer) => {
 	const readable = new Readable({ read: () => {} })
 	readable.push(buffer)
--- a/src/Utils/messages.ts
+++ b/src/Utils/messages.ts
@@ -26,7 +26,7 @@ import {
 import { isJidGroup, jidNormalizedUser } from '../WABinary'
 import { sha256 } from './crypto'
 import { generateMessageID, getKeyAuthor, unixTimestampSeconds } from './generics'
-import { downloadContentFromMessage, encryptedStream, generateThumbnail, getAudioDuration, MediaDownloadOptions } from './messages-media'
+import { downloadContentFromMessage, encryptedStream, generateThumbnail, getAudioDuration, getAudioWaveform, MediaDownloadOptions } from './messages-media'

 type MediaUploadData = {
 	media: WAMediaUpload
@@ -39,6 +39,7 @@ type MediaUploadData = {
 	mimetype?: string
 	width?: number
 	height?: number
+	waveform?: Uint8Array
 }

 const MIMETYPE_MAP: { [T in MediaType]?: string } = {
@@ -138,6 +139,7 @@ export const prepareWAMessageMedia = async(
 	const requiresDurationComputation = mediaType === 'audio' && typeof uploadData.seconds === 'undefined'
 	const requiresThumbnailComputation = (mediaType === 'image' || mediaType === 'video') &&
 										(typeof uploadData['jpegThumbnail'] === 'undefined')
+	const requiresWaveformProcessing = mediaType === 'audio' && uploadData?.ptt === true
 	const requiresOriginalForSomeProcessing = requiresDurationComputation || requiresThumbnailComputation
 	const {
 		mediaKey,
@@ -188,6 +190,11 @@ export const prepareWAMessageMedia = async(
 					uploadData.seconds = await getAudioDuration(bodyPath!)
 					logger?.debug('computed audio duration')
 				}
+
+				if(requiresWaveformProcessing) {
+					uploadData.waveform = await getAudioWaveform(bodyPath!, logger)
+					logger?.debug('processed waveform')
+				}
 			} catch(error) {
 				logger?.warn({ trace: error.stack }, 'failed to obtain extra info')
 			}