Skip to content

Optimizations

optimizations

Performance optimizations — Step 10.

Installs GPU acceleration libraries from a config-driven package list.

Each package in dependencies.jsonoptimizations.packages[] declares: - requires: environment filters (e.g. ["nvidia", "linux"]) - pypi_package: pip name, optionally per-platform - torch_constraints: version-aware pip specifiers - install_options / retry_options: uv pip install flags

Skipped entirely if no NVIDIA GPU is detected (all current packages require "nvidia" in their requires list).

No external scripts are downloaded or executed.

install_sageattention(python_exe, install_path, deps, log)

Install SageAttention wheel(s) based on GPU compute capability.

Iterates all entries in deps.pip_packages.sageattention_wheels whose compute-capability range matches the detected GPU. On Blackwell GPUs this installs both SageAttention 2 (stable INT8/FP16 backend used by --use-sage-attention) and SageAttention 3 (experimental FP4 backend selectable via KJNodes).

Checksums are looked up from tools_manifest.json rather than hardcoded in dependencies.json. Falls back to PyPI sageattention with --no-build-isolation only when no pre-built wheel was installed.

Parameters:

Name Type Description Default
python_exe Path

Path to the venv Python executable.

required
install_path Path

Root installation directory.

required
deps DependenciesConfig

Parsed dependencies.json.

required
log InstallerLogger

Installer logger for user-facing messages.

required
Source code in src/installer/optimizations.py
def install_sageattention(
    python_exe: Path,
    install_path: Path,
    deps: DependenciesConfig,
    log: InstallerLogger,
) -> None:
    """Install SageAttention wheel(s) based on GPU compute capability.

    Iterates **all** entries in ``deps.pip_packages.sageattention_wheels``
    whose compute-capability range matches the detected GPU.  On Blackwell
    GPUs this installs both SageAttention 2 (stable INT8/FP16 backend used
    by ``--use-sage-attention``) and SageAttention 3 (experimental FP4
    backend selectable via KJNodes).

    Checksums are looked up from ``tools_manifest.json`` rather than
    hardcoded in ``dependencies.json``.  Falls back to PyPI
    ``sageattention`` with ``--no-build-isolation`` only when **no**
    pre-built wheel was installed.

    Args:
        python_exe: Path to the venv Python executable.
        install_path: Root installation directory.
        deps: Parsed ``dependencies.json``.
        log: Installer logger for user-facing messages.
    """
    from src.utils.download import download_file

    # Detect compute capability
    cc = get_compute_capability()
    if cc is None:
        cc = _get_compute_capability_from_torch(python_exe)

    if cc is None:
        log.info("Could not detect GPU compute capability — skipping SageAttention.")
        return

    log.sub(f"GPU compute capability: {cc[0]}.{cc[1]}")

    # Load tools manifest for checksum verification
    from src.installer.environment import load_tools_manifest, lookup_wheel_checksum
    manifest = load_tools_manifest(install_path)

    # Find matching wheels from config
    sa_wheels = deps.pip_packages.sageattention_wheels
    py_version = (sys.version_info.major, sys.version_info.minor)
    cuda_tag = "cu130"  # Default CUDA tag for our builds

    # Try CUDA version from torch for more accurate tag
    cuda_ver = _get_cuda_version_from_torch(python_exe)
    if cuda_ver:
        parts = cuda_ver.split(".")
        if len(parts) >= 2:
            from src.utils.gpu import cuda_tag_from_version
            tag = cuda_tag_from_version((int(parts[0]), int(parts[1])))
            if tag:
                cuda_tag = tag

    installed_count = 0

    for sa_whl in sa_wheels:
        if not sa_whl.matches_gpu(cc):
            continue

        # Check if this specific package is already installed
        pkg_check_name = sa_whl.name.replace("-", "_")
        already = _check_package_installed(python_exe, pkg_check_name)
        if already:
            log.sub(f"{sa_whl.name} already installed: v{already}", style="success")
            installed_count += 1
            continue

        resolved = sa_whl.resolve(py_version, cuda_tag)
        if resolved is None:
            log.info(f"{sa_whl.name}: no wheel for Python {py_version[0]}.{py_version[1]}, skipping.")
            continue

        whl_name, whl_url, _legacy_checksum = resolved
        # Prefer manifest checksum; fall back to legacy hardcoded checksum
        whl_checksum = lookup_wheel_checksum(manifest, whl_url) or _legacy_checksum
        wheel_path = python_exe.parent.parent / f"{whl_name}.whl"
        log.sub(f"Installing {sa_whl.name} from pre-built wheel...")

        try:
            download_file(whl_url, wheel_path, checksum=whl_checksum, mirrors=deps.mirrors)
            uv_install(python_exe, [str(wheel_path)], ignore_errors=True)
            ver = _check_package_installed(python_exe, pkg_check_name)
            if ver:
                log.sub(f"{sa_whl.name} installed: v{ver}", style="success")
                installed_count += 1
            else:
                log.warning(f"{sa_whl.name} wheel installed but not importable.", level=2)
        except Exception as e:
            log.warning(f"Failed to install {sa_whl.name} wheel: {e}", level=2)
        finally:
            wheel_path.unlink(missing_ok=True)

    if installed_count > 0:
        return

    # Fallback: compile from PyPI (slow, needs build tools)
    log.sub("No pre-built SageAttention wheel available — trying PyPI (may need to compile)...")
    try:
        uv_install(
            python_exe,
            ["sageattention"],
            no_build_isolation=True,
            ignore_errors=True,
            timeout=600,
        )
        installed = _check_package_installed(python_exe, "sageattention")
        if installed:
            log.sub(f"sageattention installed from PyPI: v{installed}", style="success")
        else:
            log.warning("sageattention could not be installed from PyPI.", level=2)
    except CommandError:
        log.warning("sageattention compilation from PyPI failed.", level=2)

install_optimizations(python_exe, comfy_path, install_path, deps, log)

Install GPU optimization packages from the config-driven list.

Iterates over deps.optimizations.packages, filters by platform and GPU, and installs each compatible package via uv.

Skipped entirely if no NVIDIA GPU is detected (all current packages require "nvidia").

Parameters:

Name Type Description Default
python_exe Path

Path to the venv Python executable.

required
comfy_path Path

ComfyUI repository directory.

required
install_path Path

Root installation directory.

required
deps DependenciesConfig

Parsed dependencies.json.

required
log InstallerLogger

Installer logger for user-facing messages.

required
Source code in src/installer/optimizations.py
def install_optimizations(
    python_exe: Path,
    comfy_path: Path,
    install_path: Path,
    deps: DependenciesConfig,
    log: InstallerLogger,
) -> None:
    """Install GPU optimization packages from the config-driven list.

    Iterates over ``deps.optimizations.packages``, filters by platform
    and GPU, and installs each compatible package via ``uv``.

    Skipped entirely if no NVIDIA GPU is detected (all current packages
    require ``"nvidia"``).

    Args:
        python_exe: Path to the venv Python executable.
        comfy_path: ComfyUI repository directory.
        install_path: Root installation directory.
        deps: Parsed ``dependencies.json``.
        log: Installer logger for user-facing messages.
    """
    has_nvidia = detect_nvidia_gpu()

    # Fallback to PyTorch sanity check if nvidia-smi failed
    if not has_nvidia:
        code = "import torch; print('YES' if torch.cuda.is_available() and torch.version.cuda else 'NO')"
        result = subprocess.run(
            [str(python_exe), "-c", code],
            capture_output=True, text=True, timeout=30
        )
        if result.returncode == 0 and "YES" in result.stdout:
            log.sub("NVIDIA GPU detected via PyTorch fallback.", style="success")
            has_nvidia = True

    if not has_nvidia:
        log.info("No NVIDIA GPU — skipping GPU optimizations.")
        return

    platform = _get_current_platform()

    # Gather the list of packages from config
    packages: list[OptimizationPackage] = []
    if deps.optimizations:
        packages = deps.optimizations.packages

    if not packages:
        log.info("No optimization packages configured.")
        return

    log.item("Installing GPU optimization packages...")

    # Set CUDA_HOME if available
    cuda_path = os.environ.get("CUDA_PATH")
    if cuda_path:
        os.environ["CUDA_HOME"] = cuda_path

    # Detect torch version once for all packages
    cuda_ver = _get_cuda_version_from_torch(python_exe)
    if cuda_ver:
        log.sub(f"CUDA {cuda_ver} detected from torch.", style="success")
    else:
        log.warning("Could not detect CUDA from torch.", level=2)

    torch_ver = _get_torch_version(python_exe)

    # Install each compatible package (skip sageattention — handled separately)
    for pkg in packages:
        if pkg.name == "sageattention":
            continue  # Handled by install_sageattention()

        if not _check_requirements(
            pkg.requires,
            has_nvidia=has_nvidia,
            platform=platform,
        ):
            log.info(f"{pkg.name}: skipped (requires {pkg.requires}, env={platform}).")
            continue

        _install_package(pkg, python_exe, platform, torch_ver, log)

    # SageAttention: uses dedicated wheel-based installer
    install_sageattention(python_exe, install_path, deps, log)