Example: voice note transcription
Build a plugin that detects voice notes, sends audio to a speech-to-text provider, and replies with the text.

Example: voice note transcription
This example builds a plugin that listens for WhatsApp voice notes, sends the decrypted audio to OpenAI's transcription API, and replies to the sender with the text.
What the plugin does
When someone sends a voice note to your WhatsApp session:
- The plugin detects it is a voice note
- Decrypts the audio data
- Sends it to a speech-to-text API
- Replies to the sender with the transcription as a quoted message
Detect voice notes
Voice notes arrive as messages with specific properties:
type VoiceNoteMessage = {
type: 'audio';
mimetype: string;
from: string;
id: string;
};
function getVoiceNote(message: unknown): VoiceNoteMessage | null {
if (!message || typeof message !== 'object') return null;
const candidate = message as {
type?: unknown;
mimetype?: unknown;
from?: unknown;
id?: unknown;
};
if (candidate.type !== 'audio') return null;
if (typeof candidate.mimetype !== 'string' || !candidate.mimetype.includes('ogg')) return null;
if (typeof candidate.from !== 'string' || typeof candidate.id !== 'string') return null;
return {
type: 'audio',
mimetype: candidate.mimetype,
from: candidate.from,
id: candidate.id,
};
}
'message.received': async ({ message, logger }) => {
const voiceNote = getVoiceNote(message);
if (!voiceNote) return;
logger.info('Voice note received');
}Decrypt media
Use client.decryptMedia to get the raw audio bytes:
function errorMessage(error: unknown) {
return error instanceof Error ? error.message : String(error);
}
'message.received': async ({ message, client, logger }) => {
const voiceNote = getVoiceNote(message);
if (!voiceNote) return;
try {
const mediaData = await client.decryptMedia(message);
// mediaData is a Buffer containing the OGG audio
logger.info('Media decrypted', { size: mediaData.length });
} catch (error) {
logger.error('Failed to decrypt media', { error: errorMessage(error) });
}
}Call the transcription API
Send the audio to a speech-to-text service. This example uses OpenAI's audio transcription endpoint:
async function transcribe(audioBuffer: Buffer, apiKey: string): Promise<string> {
const formData = new FormData();
formData.append('file', new Blob([audioBuffer]), 'audio.ogg');
formData.append('model', 'whisper-1');
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
},
body: formData,
});
if (!response.ok) {
throw new Error(`STT API error: ${response.status}`);
}
const result = await response.json() as { text?: unknown };
if (typeof result.text !== 'string') {
throw new Error('STT API response did not include text');
}
return result.text;
}Reply with the transcript
Use client.reply to send the transcription as a quoted reply:
'message.received': async ({ message, client, logger }) => {
// ... after transcription ...
const voiceNote = getVoiceNote(message);
if (!voiceNote) return;
const transcription = await transcribe(mediaData, config.apiKey);
await client.reply(voiceNote.from, transcription, voiceNote.id);
logger.info('Transcription sent');
}Configuration
Use Zod for configuration validation so the plugin fails at startup when the API key is missing:
import { createPlugin } from '@open-wa/plugin-sdk';
import { z } from 'zod';
const configSchema = z.object({
apiKey: z.string().min(1, 'STT API key is required'),
language: z.string().default('en'),
enabled: z.boolean().default(true),
});
export default createPlugin({
meta: { name: 'voice-transcriber' },
configSchema,
init: async ({ events, logger, config, client }) => {
if (!config.enabled) {
logger.info('Plugin disabled');
return;
}
logger.info('Voice transcriber loaded');
events.on('message.received', async ({ message }) => {
const voiceNote = getVoiceNote(message);
if (!voiceNote) return;
try {
const mediaData = await client.decryptMedia(message);
const transcription = await transcribe(mediaData, config.apiKey);
await client.reply(voiceNote.from, transcription, voiceNote.id);
logger.info('Transcription sent');
} catch (error) {
logger.error('Transcription failed', { error: errorMessage(error) });
}
});
},
});Full code example
// voice-transcriber.ts
import { createPlugin } from '@open-wa/plugin-sdk';
import { z } from 'zod';
type VoiceNoteMessage = {
type: 'audio';
mimetype: string;
from: string;
id: string;
};
function getVoiceNote(message: unknown): VoiceNoteMessage | null {
if (!message || typeof message !== 'object') return null;
const candidate = message as {
type?: unknown;
mimetype?: unknown;
from?: unknown;
id?: unknown;
};
if (candidate.type !== 'audio') return null;
if (typeof candidate.mimetype !== 'string' || !candidate.mimetype.includes('ogg')) return null;
if (typeof candidate.from !== 'string' || typeof candidate.id !== 'string') return null;
return {
type: 'audio',
mimetype: candidate.mimetype,
from: candidate.from,
id: candidate.id,
};
}
function errorMessage(error: unknown) {
return error instanceof Error ? error.message : String(error);
}
const configSchema = z.object({
apiKey: z.string().min(1, 'STT API key is required'),
language: z.string().default('en'),
enabled: z.boolean().default(true),
});
async function transcribe(audioBuffer: Buffer, apiKey: string, language: string): Promise<string> {
const formData = new FormData();
formData.append('file', new Blob([audioBuffer]), 'audio.ogg');
formData.append('model', 'whisper-1');
formData.append('language', language);
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
},
body: formData,
});
if (!response.ok) {
throw new Error(`STT API error: ${response.status} ${response.statusText}`);
}
const result = await response.json() as { text?: unknown };
if (typeof result.text !== 'string') {
throw new Error('STT API response did not include text');
}
return result.text;
}
export default createPlugin({
meta: { name: 'voice-transcriber' },
configSchema,
init: async ({ events, logger, config, client }) => {
if (!config.enabled) {
logger.info('Voice transcriber disabled');
return;
}
logger.info('Voice transcriber loaded');
events.on('message.received', async ({ message }) => {
const voiceNote = getVoiceNote(message);
if (!voiceNote) return;
try {
const mediaData = await client.decryptMedia(message);
logger.info('Media decrypted', { size: mediaData.length });
const transcription = await transcribe(mediaData, config.apiKey, config.language);
logger.info('Transcription complete', { length: transcription.length });
await client.reply(voiceNote.from, transcription, voiceNote.id);
} catch (error) {
logger.error('Transcription failed', { error: errorMessage(error) });
}
});
},
});Load and test it
Add to your wa.config.js:
// wa.config.js
export default {
plugins: [
'./plugins/voice-transcriber', // Local path
],
pluginConfig: {
'voice-transcriber': {
apiKey: process.env.STT_API_KEY,
language: 'en',
enabled: true,
},
},
};Start a named session for testing:
npx @open-wa/wa-automate --session-id dictation-test --port 8080Send a voice note to your session. The plugin should reply with the transcription. If nothing happens, check that the message is an OGG audio voice note and that STT_API_KEY is set.
What can go wrong
The example catches errors and logs them without crashing:
- Decryption failure: Logged, message skipped
- API failure: Logged, message skipped
- Empty transcription: Still sent (the user may want to know the API heard nothing)
Related
- Plugin getting started, build your first plugin
- PluginClient reference, available client methods
- External API patterns, calling external services

Was this helpful?
Wally and his cute companion coffee mug are coding day and night to keep this up-to-date!
