wasm: Parse wasm binaries to get dependencies

Currently we are searching binaries for string ending with ".so" in
order to find shared library dependencies. This is not the cleanest
solution.
Parse wasm binaries instead, and extract the information in structured
way.

Fixes: QTBUG-121833
Change-Id: Id0efb07af4efe7641ed4fd66f76c395722478f95
Reviewed-by: Morten Johan Sørvig <morten.sorvig@qt.io>
This commit is contained in:
Piotr Wiercinski 2024-02-20 10:18:59 +01:00
parent cd67684c89
commit 382ee7e200
2 changed files with 82 additions and 11 deletions

View File

@ -8,6 +8,8 @@ import subprocess
import json
import re
from wasm_binary_tools import WasmBinary
# Paths to shared libraries and qml imports on the Qt installation on the web server.
# "$QTDIR" is replaced by qtloader.js at load time (defaults to "qt"), and makes
# possible to relocate the application build relative to the Qt build on the web server.
@ -29,17 +31,8 @@ def preload_file(source, destination):
def find_dependencies(filepath):
# Very basic dependency finder which scans for ".so" strings in the file
try:
with open(filepath, "rb") as file:
content = file.read()
return [
m.group(0).decode("utf-8")
for m in re.finditer(rb"[\w\-.]+\.so", content)
]
except IOError as e:
eprint(f"Error: {e}")
return []
binary = WasmBinary(filepath)
return binary.get_dependencies()
def extract_preload_files_from_imports(imports):

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python3
# Copyright (C) 2024 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
import sys
import struct
class WasmBinary:
"""For reference of binary format see Emscripten source code, especially library_dylink.js."""
def __init__(self, filepath):
self._offset = 0
self._end = 0
self._dependencies = []
with open(filepath, 'rb') as file:
self._binary = file.read()
self._check_preamble()
self._parse_subsections()
def get_dependencies(self):
return self._dependencies
def _get_leb(self):
ret = 0
mul = 1
while True:
byte = self._binary[self._offset]
self._offset += 1
ret += (byte & 0x7f) * mul
mul *= 0x80
if not (byte & 0x80):
break
return ret
def _get_string(self):
length = self._get_leb()
self._offset += length
return self._binary[self._offset - length:self._offset].decode('utf-8')
def _check_preamble(self):
preamble = memoryview(self._binary)[:24]
int32View = struct.unpack('<6I', preamble)
assert int32View[0] == 0x6d736100, "magic number not found"
assert self._binary[8] == 0, "dynlink section needs to be first"
self._offset = 9
section_size = self._get_leb()
self._end = self._offset + section_size
name = self._get_string()
assert name == "dylink.0", "section dylink.0 not found"
def _parse_subsections(self):
WASM_DYLINK_NEEDED = 0x2
while self._offset < self._end:
subsection_type = self._binary[self._offset]
self._offset += 1
subsection_size = self._get_leb()
if subsection_type == WASM_DYLINK_NEEDED:
needed_dynlibs_count = self._get_leb()
for _ in range(needed_dynlibs_count):
self._dependencies.append(self._get_string())
else:
self._offset += subsection_size # we don't care about other sections for now
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python wasm_binary_tools.py <shared_object>")
sys.exit(1)
file_path = sys.argv[1]
binary = WasmBinary(file_path)
dependencies = binary.get_dependencies()
for d in dependencies:
print(d)