Source code for onnxscript.tools.transformers_models

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
# pylint: disable=import-outside-toplevel
from __future__ import annotations

import random
from typing import Any, Sequence

import onnx
import onnx.inliner
import torch

import onnxscript.optimizer
import onnxscript.rewriter


def export_to_onnx(
    model: Any,
    *args: Sequence[Any],
    optimize: bool = True,
    export_api: bool = True,
    no_grad: bool = False,
) -> onnx.ModelProto:
    """
    Export a model to ONNX.
    If optimize is True, it calls *onnxscript.optimizer.optimize*,
    *onnxscript.rewriter.rewriter*, *onnx.inliner.inline_local_functions*.
    If *export_api* is True, the function uses ``torch.onnx.export``
    and not ``torch.onnx.dynamo_export``.
    """
    if no_grad:
        with torch.no_grad():
            if export_api:
                prog = torch.onnx.export(model, args, dynamo=True)  # pylint: disable=no-value-for-parameter
            else:
                prog = torch.onnx.dynamo_export(model, *args)
    else:
        if export_api:
            prog = torch.onnx.export(model, args, dynamo=True)  # pylint: disable=no-value-for-parameter
        else:
            prog = torch.onnx.dynamo_export(model, *args)
    assert prog is not None
    model = prog.model
    if optimize:
        model = onnxscript.optimizer.optimize(
            model,
            num_iterations=2,
        )
        model = onnxscript.rewriter.rewrite(model)
        model_proto = onnxscript.ir.to_proto(model)
        model_proto = onnx.inliner.inline_local_functions(model_proto)
    return model_proto


def ids_tensor(
    shape: Sequence[int],
    vocab_size: int,
    rng: random.Random | None = None,
    name: str | None = None,
):
    """Creates a random int32 tensor of the shape within the vocab size."""
    del name  # unused

    if rng is None:
        rng = random.Random()

    total_dims = 1
    for dim in shape:
        total_dims *= dim

    values = []
    for _ in range(total_dims):
        values.append(rng.randint(0, vocab_size - 1))

    return torch.tensor(data=values, dtype=torch.long).view(shape).contiguous()


def get_input_dims_for_llm(
    dynamic_shapes: bool, warmup: int, repeat: int
) -> list[tuple[int, int]]:
    """Returns input dimensions for model such as llama, phi, ..."""
    if not dynamic_shapes:
        return [(2, 1024)] * (warmup + repeat)
    w = [(2, 1024), (3, 1024), (2, 1096)] * warmup
    w = w[:warmup]
    r = [(2, 1024), (3, 1024), (4, 1024), (2, 1096), (2, 1112)] * repeat
    r = r[:repeat]
    return w + r



[docs]
def get_model_and_inputs(
    model: str,
    config: str,
    dynamic_shapes: bool,
    device: str = "cpu",
    num_hidden_layers: int = 1,
    with_mask: bool = True,
    implementation: str = "eager",
    dtype: str | None = None,
    warmup: int = 5,
    repeat: int = 10,
) -> tuple[Any, list[tuple[torch.Tensor, ...]], dict | None]:
    """
    Returns a model and a couple of dummy inputs.

    Args:
        model: model name, 'phi', 'llama', 'phi3', ...
        config: 'small', 'medium', 'large', ...
        dynamic_shapes: dynamic or static shapes
        device: 'cpu' or 'cuda'
        num_hidden_layers: Number of hidden layers.
        with_mask: One input or two inputs.
        implementation: eager or sdpa
        warmup: Number of inputs to generate.
        repeat: Number of inputs to generate for repeat.
        dtype: If specified, cast the model and the inputs into this type.

    Returns:
        model and list of inputs
    """
    if model == "llama":
        import onnxscript.tools.transformers_models.llama as m_llama

        tmodel, inputs, dynamic_shapes_def = m_llama.get_llama_model_from_config(
            warmup=warmup,
            repeat=repeat,
            implementation=implementation,
            with_mask=with_mask,
            num_hidden_layers=num_hidden_layers,
            dynamic_shapes=dynamic_shapes,
            config=config,
        )

    elif model == "mistral":
        import onnxscript.tools.transformers_models.mistral as m_mistral

        tmodel, inputs, dynamic_shapes_def = m_mistral.get_mistral_model_from_config(
            warmup=warmup,
            repeat=repeat,
            implementation=implementation,
            with_mask=with_mask,
            num_hidden_layers=num_hidden_layers,
            dynamic_shapes=dynamic_shapes,
            config=config,
        )

    elif model == "phi":
        import onnxscript.tools.transformers_models.phi as m_phi

        tmodel, inputs, dynamic_shapes_def = m_phi.get_phi_model_from_config(
            warmup=warmup,
            repeat=repeat,
            implementation=implementation,
            with_mask=with_mask,
            num_hidden_layers=num_hidden_layers,
            dynamic_shapes=dynamic_shapes,
            config=config,
        )

    elif model == "phi3":
        import onnxscript.tools.transformers_models.phi3 as m_phi3

        tmodel, inputs, dynamic_shapes_def = m_phi3.get_phi3_model_from_config(
            warmup=warmup,
            repeat=repeat,
            implementation=implementation,
            with_mask=with_mask,
            num_hidden_layers=num_hidden_layers,
            dynamic_shapes=dynamic_shapes,
            config=config,
        )

    else:
        raise ValueError(f"Model {model!r} is unknown.")

    if dtype is not None:
        dt = getattr(torch, dtype)
        tmodel = tmodel.to(dt)
        inputs = [
            tuple((i if i.dtype in {torch.int64, torch.int32} else i.to(dt)) for i in inp)
            for inp in inputs
        ]

    if device == "cuda":
        tmodel = tmodel.to("cuda")
        inputs = [tuple(i.to("cuda") for i in inp) for inp in inputs]

    return tmodel, inputs, dynamic_shapes_def