Source code for autobahn.nvx._compile_args

###############################################################################
#
# The MIT License (MIT)
#
# Copyright (c) typedef int GmbH
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
###############################################################################

"""
Centralized compiler flag selection for NVX (Native Vector Extensions) builds.

This module determines appropriate architecture-specific optimization flags
for building native C extensions using CFFI. It balances portability and
performance based on the build context.

Strategy
--------

**By default** - for *every* build context (PyPI wheels, local source installs,
and cross-compilation) - a safe, portable baseline architecture is used. This
keeps the resulting binaries runnable on a wide range of CPUs without SIGILL
(Illegal Instruction) crashes, and never hands a cross-compilation toolchain the
host-only ``-march=native`` flag (which it rejects).

Maximum-performance ``-march=native`` code generation is **opt-in** via
``AUTOBAHN_ARCH_TARGET=native``; use it only when the build host is also the run
host (e.g. Gentoo/Arch packages, dedicated single-machine deployments).

Architecture Baselines
----------------------

x86-64 (portable baseline):
    -march=x86-64-v2 (microarchitecture level 2, 2009+)
    Includes: SSE4.2, POPCNT, SSSE3, SSE4.1
    Compatible with: Intel Nehalem+, AMD Bulldozer+ (2009+)
    Coverage: ~99% of x86-64 CPUs from 2010 onwards

ARM64 (portable baseline):
    -march=armv8-a (baseline ARMv8-A architecture)
    Compatible with: Raspberry Pi 4/5, AWS Graviton, Apple M1/M2, etc.
    Coverage: All 64-bit ARM CPUs

Environment Variables
---------------------

AUTOBAHN_ARCH_TARGET : str, optional
    User/distro override for architecture target:
    - "native" : Force -march=native (maximum performance, build host only;
                 unsafe for distributed wheels and cross-compilation)
    - "safe"   : Force portable baseline (explicit; same as the default)
    - Not set  : Portable baseline (the safe default; works for cross-compilation)

AUTOBAHN_WHEEL_BUILD : str, optional
    Explicit marker for wheel builds ("true" or "1")
    When set, forces portable baseline regardless of auto-detection

CI : str, optional
    Standard CI environment variable. When "true" or "1", assumes wheel build.

CIBUILDWHEEL : str, optional
    Set by cibuildwheel. Indicates wheel build for distribution.

AUDITWHEEL_PLAT : str, optional
    Set by auditwheel/manylinux builds. Indicates wheel build.

Examples
--------

**GitHub Actions building wheels (default):**
    >>> # Default is the portable baseline
    >>> get_compile_args()
    ['-std=c99', '-Wall', '-Wno-strict-prototypes', '-O3', '-march=x86-64-v2']

**User installing from source (default):**
    >>> # Default is the portable baseline (safe for cross-compilation too)
    >>> get_compile_args()
    ['-std=c99', '-Wall', '-Wno-strict-prototypes', '-O3', '-march=x86-64-v2']

**Opting in to -march=native (build host == run host):**
    >>> import os
    >>> os.environ['AUTOBAHN_ARCH_TARGET'] = 'native'
    >>> get_compile_args()
    ['-std=c99', '-Wall', '-Wno-strict-prototypes', '-O3', '-march=native']

**Gentoo building package:**
    >>> # Gentoo wants -march=native for optimized binaries
    >>> import os
    >>> os.environ['AUTOBAHN_ARCH_TARGET'] = 'native'
    >>> get_compile_args()
    ['-std=c99', '-Wall', '-Wno-strict-prototypes', '-O3', '-march=native']

**Debian building package:**
    >>> # Debian wants portable binaries for all users
    >>> import os
    >>> os.environ['AUTOBAHN_ARCH_TARGET'] = 'safe'
    >>> get_compile_args()
    ['-std=c99', '-Wall', '-Wno-strict-prototypes', '-O3', '-march=x86-64-v2']

Notes
-----

This module is used by all NVX components:
- autobahn.nvx._xormasker (WebSocket frame XOR masking)
- autobahn.nvx._utf8validator (WebSocket UTF-8 validation)

Related Issues
--------------
- #1717: SIGILL crashes from -march=native in distributed wheels
- #1834: -march=native breaks cross-compilation; the default is now the portable
  baseline for all build contexts, with -march=native available opt-in.

See Also
--------
- https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
- https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
"""

import os
import sys
import sysconfig
import platform


[docs] def is_building_wheel(): """ Detect if we're building a wheel for distribution vs. a local source install. .. note:: As of #1834 the default architecture target is the portable baseline for *every* build context (wheels, local source installs, cross-compilation), so this helper no longer influences :func:`get_compile_args`. It is retained for backward compatibility and for external callers/tooling. Returns ------- bool True if building a wheel (need portability), False if building locally. Notes ----- Detection heuristics: 1. CI environment variables (CI, CIBUILDWHEEL) 2. manylinux/auditwheel markers (AUDITWHEEL_PLAT) 3. Explicit marker (AUTOBAHN_WHEEL_BUILD) 4. Default: False (assume local source build) """ # CI environments (building wheels for distribution) if os.environ.get("CI") in ("true", "1"): return True # cibuildwheel if os.environ.get("CIBUILDWHEEL"): return True # manylinux/auditwheel builds if os.environ.get("AUDITWHEEL_PLAT"): return True # Explicit marker from our workflows if os.environ.get("AUTOBAHN_WHEEL_BUILD") in ("true", "1"): return True # Default: assume local source build return False
[docs] def get_compile_args(): """ Returns appropriate compiler arguments for building NVX native extensions. This function determines the optimal compiler flags based on: - Target platform (Windows, Linux, macOS, etc.) - Target architecture (x86-64, ARM64, etc.) - Build context (wheel distribution vs. local source install) - User/distro overrides via environment variables Returns ------- list of str Compiler arguments suitable for current build context. For MSVC: ['/O2', '/W3'] For GCC/Clang: ['-std=c99', '-Wall', '-O3', '-march=...'] Examples -------- >>> args = get_compile_args() >>> # Use with CFFI: >>> ffi.set_source("_nvx_module", c_source, extra_compile_args=args) """ if sys.platform == "win32": # MSVC on Windows # Note: MSVC doesn't support -march, uses /arch instead # But default codegen is already optimized for current arch return ["/O2", "/W3"] # GCC/Clang on POSIX (Linux, macOS, *BSD) machine = _get_target_machine() # Base flags for all POSIX platforms base_args = [ "-std=c99", "-Wall", "-Wno-strict-prototypes", "-O3", ] # User can explicitly override architecture target arch_override = os.environ.get("AUTOBAHN_ARCH_TARGET", "").lower() if arch_override == "native": # Explicit opt-in only: -march=native generates code for the exact CPU of # the *build* host (maximum performance). Use this when the build host is # also the run host (e.g. Gentoo, Arch Linux, performance-critical # deployments). It is NOT safe for distributed wheels or cross-compilation. return base_args + ["-march=native"] # Default for everyone (AUTOBAHN_ARCH_TARGET unset or "safe"): a portable # baseline architecture. Defaulting to "safe" rather than -march=native is # what makes cross-compilation work out of the box - a cross toolchain # rejects -march=native ("unknown value 'native' for '-march'", #1834) - and # also prevents SIGILL crashes from over-optimized distributed wheels (#1717). arch_flag = _get_safe_march_flag(machine) if arch_flag: return base_args + [arch_flag] # Unknown / cross target architecture: emit no -march flag and let the # toolchain defaults (or distro-supplied CFLAGS, e.g. Buildroot/Yocto) # govern code generation. return base_args
[docs] def _get_target_machine(): """ Return the *target* machine architecture for the build. Uses ``sysconfig.get_platform()`` rather than ``platform.machine()`` so that the correct architecture is detected when cross-compiling (e.g. Buildroot/Yocto building aarch64 on an x86-64 host): ``platform.machine()`` reports the build host (``uname``), whereas ``sysconfig`` reflects the interpreter's configured target platform. Falls back to ``platform.machine()`` when no architecture token can be derived. (Target-detection approach contributed in PR #1835 by @jameshilliard.) Returns ------- str Lower-cased target architecture token, e.g. "x86_64", "aarch64", "arm64". """ plat = sysconfig.get_platform().lower() # Examples: 'linux-x86_64', 'linux-aarch64', 'macosx-11.0-arm64', # 'win-amd64', 'win32'. if plat == "win32": return "x86" machine = plat.rsplit("-", 1)[-1] return machine or platform.machine().lower()
[docs] def _get_safe_march_flag(machine): """ Returns safe -march flag for portable binaries on the given machine architecture. Parameters ---------- machine : str Machine architecture from platform.machine().lower() Returns ------- str or None Safe -march flag for the architecture, or None for unknown architectures. """ if machine in ("x86_64", "amd64", "x64"): # x86-64: Use microarchitecture level 2 (2009+ CPUs) # Includes: SSE4.2, POPCNT, SSSE3, SSE4.1 # Compatible with: Intel Nehalem+, AMD Bulldozer+ return "-march=x86-64-v2" elif machine in ("aarch64", "arm64"): # ARM64: Use ARMv8-A baseline (all 64-bit ARM) # Compatible with: Raspberry Pi 4/5, AWS Graviton, Apple Silicon return "-march=armv8-a" else: # Unknown architecture: let compiler use safe defaults # (no -march flag = generic code for the architecture) return None