Files
Baileys/proto-extract/index.js
2023-05-13 18:10:02 -03:00

383 lines
13 KiB
JavaScript

const request = require('request-promise-native')
const acorn = require('acorn')
const walk = require('acorn-walk')
const fs = require('fs/promises')
const addPrefix = (lines, prefix) => lines.map(line => prefix + line)
const extractAllExpressions = (node) => {
const expressions = [node]
const exp = node.expression
if(exp) {
expressions.push(exp)
}
if(node.expression?.expressions?.length) {
for(const exp of node.expression?.expressions) {
expressions.push(...extractAllExpressions(exp))
}
}
return expressions
}
async function findAppModules() {
const ua = {
headers: {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0',
'Sec-Fetch-Dest': 'script',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-origin',
'Referer': 'https://web.whatsapp.com/',
'Accept': '*/*',
'Accept-Language': 'Accept-Language: en-US,en;q=0.5',
}
}
const baseURL = 'https://web.whatsapp.com'
const serviceworker = await request.get(`${baseURL}/serviceworker.js`, ua)
const versions = [...serviceworker.matchAll(/assets-manifest-([\d\.]+).json/g)].map(r => r[1])
const version = versions[0]
let bootstrapQRURL = ''
if(version) {
const asset = await request.get(`${baseURL}/assets-manifest-${version}.json`, ua)
const hashFiles = JSON.parse(asset)
const files = Object.keys(hashFiles)
const app = files.find(f => /^app\./.test(f))
bootstrapQRURL = `${baseURL}/${app}`
} else {
const index = await request.get(baseURL, ua)
const bootstrapQRID = index.match(/src="\/app.([0-9a-z]{10,}).js"/)[1]
bootstrapQRURL = baseURL + '/app.' + bootstrapQRID + '.js'
}
console.error('Found source JS URL:', bootstrapQRURL)
const qrData = await request.get(bootstrapQRURL, ua)
const waVersion = qrData.match(/appVersion:"(\d\.\d+\.\d+)"/)[1]
console.log('Current version:', waVersion)
// This one list of types is so long that it's split into two JavaScript declarations.
// The module finder below can't handle it, so just patch it manually here.
const patchedQrData = qrData.replace('t.ActionLinkSpec=void 0,t.TemplateButtonSpec', 't.ActionLinkSpec=t.TemplateButtonSpec')
//const patchedQrData = qrData.replace("Spec=void 0,t.", "Spec=t.")
const qrModules = acorn.parse(patchedQrData).body[0].expression.arguments[0].elements[1].properties
const result = qrModules.filter(m => {
const hasProto = !!m.value.body.body.find(b => {
const expressions = extractAllExpressions(b)
return expressions?.find(e => e?.left?.property?.name === 'internalSpec')
})
if(hasProto) {
return true
}
})
return result
}
(async() => {
const unspecName = name => name.endsWith('Spec') ? name.slice(0, -4) : name
const unnestName = name => name.split('$').slice(-1)[0]
const getNesting = name => name.split('$').slice(0, -1).join('$')
const makeRenameFunc = () => (
name => {
name = unspecName(name)
return name// .replaceAll('$', '__')
// return renames[name] ?? unnestName(name)
}
)
// The constructor IDs that can be used for enum types
// const enumConstructorIDs = [76672, 54302]
const modules = await findAppModules()
// Sort modules so that whatsapp module id changes don't change the order in the output protobuf schema
// const modules = []
// for (const mod of wantedModules) {
// modules.push(unsortedModules.find(node => node.key.value === mod))
// }
// find aliases of cross references between the wanted modules
const modulesInfo = {}
const moduleIndentationMap = {}
modules.forEach(({ key, value }) => {
const requiringParam = value.params[2].name
modulesInfo[key.value] = { crossRefs: [] }
walk.simple(value, {
VariableDeclarator(node) {
if(node.init && node.init.type === 'CallExpression' && node.init.callee.name === requiringParam && node.init.arguments.length === 1) {
modulesInfo[key.value].crossRefs.push({ alias: node.id.name, module: node.init.arguments[0].value })
}
}
})
})
// find all identifiers and, for enums, their array of values
for(const mod of modules) {
const modInfo = modulesInfo[mod.key.value]
const rename = makeRenameFunc(mod.key.value)
// all identifiers will be initialized to "void 0" (i.e. "undefined") at the start, so capture them here
walk.ancestor(mod, {
UnaryExpression(node, anc) {
if(!modInfo.identifiers && node.operator === 'void') {
const assignments = []
let i = 1
anc.reverse()
while(anc[i].type === 'AssignmentExpression') {
assignments.push(anc[i++].left)
}
const makeBlankIdent = a => {
const key = rename(a.property.name)
const indentation = getNesting(key)
const value = { name: key }
moduleIndentationMap[key] = moduleIndentationMap[key] || { }
moduleIndentationMap[key].indentation = indentation
if(indentation.length) {
moduleIndentationMap[indentation] = moduleIndentationMap[indentation] || { }
moduleIndentationMap[indentation].members = moduleIndentationMap[indentation].members || new Set()
moduleIndentationMap[indentation].members.add(key)
}
return [key, value]
}
modInfo.identifiers = Object.fromEntries(assignments.map(makeBlankIdent).reverse())
}
}
})
const enumAliases = {}
// enums are defined directly, and both enums and messages get a one-letter alias
walk.simple(mod, {
VariableDeclarator(node) {
if(
node.init?.type === 'CallExpression'
// && enumConstructorIDs.includes(node.init.callee?.arguments?.[0]?.value)
&& !!node.init.arguments.length
&& node.init.arguments[0].type === 'ObjectExpression'
&& node.init.arguments[0].properties.length
) {
const values = node.init.arguments[0].properties.map(p => ({
name: p.key.name,
id: p.value.value
}))
enumAliases[node.id.name] = values
}
},
AssignmentExpression(node) {
if(node.left.type === 'MemberExpression' && modInfo.identifiers[rename(node.left.property.name)]) {
const ident = modInfo.identifiers[rename(node.left.property.name)]
ident.alias = node.right.name
// enumAliases[ident.alias] = enumAliases[ident.alias] || []
ident.enumValues = enumAliases[ident.alias]
}
},
})
}
// find the contents for all protobuf messages
for(const mod of modules) {
const modInfo = modulesInfo[mod.key.value]
const rename = makeRenameFunc(mod.key.value)
// message specifications are stored in a "internalSpec" attribute of the respective identifier alias
walk.simple(mod, {
AssignmentExpression(node) {
if(node.left.type === 'MemberExpression' && node.left.property.name === 'internalSpec' && node.right.type === 'ObjectExpression') {
const targetIdent = Object.values(modInfo.identifiers).find(v => v.alias === node.left.object.name)
if(!targetIdent) {
console.warn(`found message specification for unknown identifier alias: ${node.left.object.name}`)
return
}
// partition spec properties by normal members and constraints (like "__oneofs__") which will be processed afterwards
const constraints = []
let members = []
for(const p of node.right.properties) {
p.key.name = p.key.type === 'Identifier' ? p.key.name : p.key.value
const arr = p.key.name.substr(0, 2) === '__' ? constraints : members
arr.push(p)
}
members = members.map(({ key: { name }, value: { elements } }) => {
let type
const flags = []
const unwrapBinaryOr = n => (n.type === 'BinaryExpression' && n.operator === '|') ? [].concat(unwrapBinaryOr(n.left), unwrapBinaryOr(n.right)) : [n]
// find type and flags
unwrapBinaryOr(elements[1]).forEach(m => {
if(m.type === 'MemberExpression' && m.object.type === 'MemberExpression') {
if(m.object.property.name === 'TYPES') {
type = m.property.name.toLowerCase()
} else if(m.object.property.name === 'FLAGS') {
flags.push(m.property.name.toLowerCase())
}
}
})
// determine cross reference name from alias if this member has type "message" or "enum"
if(type === 'message' || type === 'enum') {
const currLoc = ` from member '${name}' of message '${targetIdent.name}'`
if(elements[2].type === 'Identifier') {
type = Object.values(modInfo.identifiers).find(v => v.alias === elements[2].name)?.name
if(!type) {
console.warn(`unable to find reference of alias '${elements[2].name}'` + currLoc)
}
} else if(elements[2].type === 'MemberExpression') {
const crossRef = modInfo.crossRefs.find(r => r.alias === elements[2].object.name)
if(crossRef && modulesInfo[crossRef.module].identifiers[rename(elements[2].property.name)]) {
type = rename(elements[2].property.name)
} else {
console.warn(`unable to find reference of alias to other module '${elements[2].object.name}' or to message ${elements[2].property.name} of this module` + currLoc)
}
}
}
return { name, id: elements[0].value, type, flags }
})
// resolve constraints for members
constraints.forEach(c => {
if(c.key.name === '__oneofs__' && c.value.type === 'ObjectExpression') {
const newOneOfs = c.value.properties.map(p => ({
name: p.key.name,
type: '__oneof__',
members: p.value.elements.map(e => {
const idx = members.findIndex(m => m.name === e.value)
const member = members[idx]
members.splice(idx, 1)
return member
})
}))
members.push(...newOneOfs)
}
})
targetIdent.members = members
}
}
})
}
const decodedProtoMap = { }
const spaceIndent = ' '.repeat(4)
for(const mod of modules) {
const modInfo = modulesInfo[mod.key.value]
const identifiers = Object.values(modInfo.identifiers)
// enum stringifying function
const stringifyEnum = (ident, overrideName = null) => [].concat(
[`enum ${overrideName || ident.displayName || ident.name} {`],
addPrefix(ident.enumValues.map(v => `${v.name} = ${v.id};`), spaceIndent),
['}']
)
// message specification member stringifying function
const stringifyMessageSpecMember = (info, completeFlags, parentName = undefined) => {
if(info.type === '__oneof__') {
return [].concat(
[`oneof ${info.name} {`],
addPrefix([].concat(...info.members.map(m => stringifyMessageSpecMember(m, false))), spaceIndent),
['}']
)
} else {
if(info.flags.includes('packed')) {
info.flags.splice(info.flags.indexOf('packed'))
info.packed = ' [packed=true]'
}
if(completeFlags && info.flags.length === 0) {
info.flags.push('optional')
}
const ret = []
const indentation = moduleIndentationMap[info.type]?.indentation
let typeName = unnestName(info.type)
if(indentation !== parentName && indentation) {
typeName = `${indentation.replaceAll('$', '.')}.${typeName}`
}
// if(info.enumValues) {
// // typeName = unnestName(info.type)
// ret = stringifyEnum(info, typeName)
// }
ret.push(`${info.flags.join(' ') + (info.flags.length === 0 ? '' : ' ')}${typeName} ${info.name} = ${info.id}${info.packed || ''};`)
return ret
}
}
// message specification stringifying function
const stringifyMessageSpec = (ident) => {
const members = moduleIndentationMap[ident.name]?.members
const result = []
result.push(
`message ${ident.displayName || ident.name} {`,
...addPrefix([].concat(...ident.members.map(m => stringifyMessageSpecMember(m, true, ident.name))), spaceIndent),
)
if(members?.size) {
const sortedMembers = Array.from(members).sort()
for(const memberName of sortedMembers) {
let entity = modInfo.identifiers[memberName]
if(entity) {
const displayName = entity.name.slice(ident.name.length + 1)
entity = { ...entity, displayName }
result.push(...addPrefix(getEntity(entity), spaceIndent))
} else {
console.log('missing nested entity ', memberName)
}
}
}
result.push('}')
result.push('')
return result
}
const getEntity = (v) => {
let result
if(v.members) {
result = stringifyMessageSpec(v)
} else if(v.enumValues?.length) {
result = stringifyEnum(v)
} else {
result = ['// Unknown entity ' + v.name]
}
return result
}
const stringifyEntity = v => {
return {
content: getEntity(v).join('\n'),
name: v.name
}
}
for(const value of identifiers) {
const { name, content } = stringifyEntity(value)
if(!moduleIndentationMap[name]?.indentation?.length) {
decodedProtoMap[name] = content
}
// decodedProtoMap[name] = content
}
}
// console.log(moduleIndentationMap)
const decodedProto = Object.keys(decodedProtoMap).sort()
const sortedStr = decodedProto.map(d => decodedProtoMap[d]).join('\n')
const decodedProtoStr = `syntax = "proto2";\npackage proto;\n\n${sortedStr}`
const destinationPath = '../WAProto/WAProto.proto'
await fs.writeFile(destinationPath, decodedProtoStr)
console.log(`Extracted protobuf schema to "${destinationPath}"`)
})()