feat: Add waveforms to voice messages (#180)

Co-authored-by: Edgard <edgardmessias@gmail.com>
2025-10-13 00:32:22 +00:00 · 2023-07-14 03:08:11 +03:00
parent e0e7d40847
commit 2c76713565
7 changed files with 622 additions and 455 deletions
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -1,5 +1,10 @@
 {
    "extends": "@adiwajshing",
+    "parserOptions": {
+        "sourceType": "module",
+        "project": "./tsconfig.json"
+    },
+    "ignorePatterns": ["src/Tests/*"],
    "rules": {
        "@typescript-eslint/no-explicit-any": [
            "warn",
@@ -7,6 +12,15 @@
                "ignoreRestArgs": true
            }
        ],
+        "@typescript-eslint/no-inferrable-types": [
+            "warn"
+        ],
+        "@typescript-eslint/no-redundant-type-constituents": [
+            "warn"
+        ],
+        "@typescript-eslint/no-unnecessary-type-assertion": [
+            "warn"
+        ],
        "no-restricted-syntax": [
            "warn",
            {
--- a/package.json
+++ b/package.json
@@ -34,8 +34,8 @@
    "changelog:update": "conventional-changelog -p angular -i CHANGELOG.md -s -r 0",
    "example": "node --inspect -r ts-node/register Example/example.ts",
    "gen:protobuf": "sh WAProto/GenerateStatics.sh",
-    "lint": "eslint . --ext .js,.ts,.jsx,.tsx",
-    "lint:fix": "eslint . --fix --ext .js,.ts,.jsx,.tsx",
+    "lint": "eslint src --ext .js,.ts,.jsx,.tsx",
+    "lint:fix": "eslint src --fix --ext .js,.ts,.jsx,.tsx",
    "prepack": "tsc",
    "prepare": "tsc",
    "release": "release-it",
@@ -44,11 +44,12 @@
  "dependencies": {
    "@adiwajshing/keyed-db": "^0.2.4",
    "@hapi/boom": "^9.1.3",
+    "audio-decode": "^2.1.3",
    "axios": "^1.3.3",
    "cache-manager": "^5.2.2",
    "futoin-hkdf": "^1.5.1",
    "libphonenumber-js": "^1.10.20",
-    "libsignal": "https://github.com/adiwajshing/libsignal-node.git",
+    "libsignal": "github:adiwajshing/libsignal-node",
    "music-metadata": "^7.12.3",
    "node-cache": "^5.1.2",
    "pino": "^7.0.0",
@@ -57,7 +58,7 @@
    "ws": "^8.13.0"
  },
  "devDependencies": {
-    "@adiwajshing/eslint-config": "https://github.com/adiwajshing/eslint-config.git",
+    "@adiwajshing/eslint-config": "github:adiwajshing/eslint-config",
    "@types/got": "^9.6.11",
    "@types/jest": "^27.5.1",
    "@types/node": "^16.0.0",
@@ -74,7 +75,7 @@
    "ts-jest": "^27.0.3",
    "ts-node": "^10.8.1",
    "typedoc": "^0.24.7",
-    "typescript": "^4.0.0"
+    "typescript": "^4.6.4"
  },
  "peerDependencies": {
    "jimp": "^0.16.1",
--- a/src/Utils/generics.ts
+++ b/src/Utils/generics.ts
@@ -213,6 +213,7 @@ export const printQRIfNecessaryListener = (ev: BaileysEventEmitter, logger: Logg
 	ev.on('connection.update', async({ qr }) => {
 		if(qr) {
 			const QR = await import('qrcode-terminal')
+				.then(m => m.default || m)
 				.catch(() => {
 					logger.error('QR code terminal not added as dependency')
 				})
--- a/src/Utils/messages-media.ts
+++ b/src/Utils/messages-media.ts
@@ -204,6 +204,45 @@ export async function getAudioDuration(buffer: Buffer | string | Readable) {
 	return metadata.format.duration
 }

+/**
+  referenced from and modifying https://github.com/wppconnect-team/wa-js/blob/main/src/chat/functions/prepareAudioWaveform.ts
+ */
+export async function getAudioWaveform(bodyPath: string, logger?: Logger) {
+	try {
+		const { default: audioDecode } = await import('audio-decode')
+		const fileBuffer = await fs.readFile(bodyPath)
+		const audioBuffer = await audioDecode.default(fileBuffer)
+
+		const rawData = audioBuffer.getChannelData(0) // We only need to work with one channel of data
+		const samples = 64 // Number of samples we want to have in our final data set
+		const blockSize = Math.floor(rawData.length / samples) // the number of samples in each subdivision
+		const filteredData: number[] = []
+		for(let i = 0; i < samples; i++) {
+		  	const blockStart = blockSize * i // the location of the first sample in the block
+		  	let sum = 0
+		  	for(let j = 0; j < blockSize; j++) {
+				sum = sum + Math.abs(rawData[blockStart + j]) // find the sum of all the samples in the block
+			}
+
+			filteredData.push(sum / blockSize) // divide the sum by the block size to get the average
+		}
+
+		// This guarantees that the largest data point will be set to 1, and the rest of the data will scale proportionally.
+		const multiplier = Math.pow(Math.max(...filteredData), -1)
+		const normalizedData = filteredData.map((n) => n * multiplier)
+
+		// Generate waveform like WhatsApp
+		const waveform = new Uint8Array(
+			normalizedData.map((n) => Math.floor(100 * n))
+		)
+
+		return waveform
+	} catch(e) {
+		logger?.debug('Failed to generate waveform: ' + e)
+	}
+}
+
+
 export const toReadable = (buffer: Buffer) => {
 	const readable = new Readable({ read: () => {} })
 	readable.push(buffer)
--- a/src/Utils/messages.ts
+++ b/src/Utils/messages.ts
@@ -26,7 +26,7 @@ import {
 import { isJidGroup, jidNormalizedUser } from '../WABinary'
 import { sha256 } from './crypto'
 import { generateMessageID, getKeyAuthor, unixTimestampSeconds } from './generics'
-import { downloadContentFromMessage, encryptedStream, generateThumbnail, getAudioDuration, MediaDownloadOptions } from './messages-media'
+import { downloadContentFromMessage, encryptedStream, generateThumbnail, getAudioDuration, getAudioWaveform, MediaDownloadOptions } from './messages-media'

 type MediaUploadData = {
 	media: WAMediaUpload
@@ -39,6 +39,7 @@ type MediaUploadData = {
 	mimetype?: string
 	width?: number
 	height?: number
+	waveform?: Uint8Array
 }

 const MIMETYPE_MAP: { [T in MediaType]?: string } = {
@@ -138,6 +139,7 @@ export const prepareWAMessageMedia = async(
 	const requiresDurationComputation = mediaType === 'audio' && typeof uploadData.seconds === 'undefined'
 	const requiresThumbnailComputation = (mediaType === 'image' || mediaType === 'video') &&
 										(typeof uploadData['jpegThumbnail'] === 'undefined')
+	const requiresWaveformProcessing = mediaType === 'audio' && uploadData?.ptt === true
 	const requiresOriginalForSomeProcessing = requiresDurationComputation || requiresThumbnailComputation
 	const {
 		mediaKey,
@@ -188,6 +190,11 @@ export const prepareWAMessageMedia = async(
 					uploadData.seconds = await getAudioDuration(bodyPath!)
 					logger?.debug('computed audio duration')
 				}
+
+				if(requiresWaveformProcessing) {
+					uploadData.waveform = await getAudioWaveform(bodyPath!, logger)
+					logger?.debug('processed waveform')
+				}
 			} catch(error) {
 				logger?.warn({ trace: error.stack }, 'failed to obtain extra info')
 			}
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -1,7 +1,8 @@
 {
  "compilerOptions": {
    "target": "es2018",
-    "module": "commonjs",
+    "module": "CommonJS",
+    "moduleResolution": "NodeNext",
    "experimentalDecorators": true,
    "allowJs": false,
    "checkJs": false,
--- a/yarn.lock
+++ b/yarn.lock