AMD ROCm for local training and inferencing

Hello everyone! Did anybody try to install AMD ROCm on the FW16 and train/run local small models? I’m planning to use Ubuntu and run ROCm+PyTorch to get started with my Uni projects. Any advice?

Hey there :slight_smile:

I didnt actually try it, because I dont work at all with ML. But if you dont have a FW16 yet, I could do some tests for you?

Here is a useful thread:

I am using it in EndevourOS (Arch). I had to install rocm-hip-sdk and set the flag: HSA_OVERRIDE_GFX_VERSION=11.0.0

(more on this requirement here: RuntimeError: HIP error: invalid device function - if there is a solution already existed against this issue. · Issue #2536 · ROCm/ROCm · GitHub)

Keep in mind that the FW16 dGPU Radeon ™ RX *7700S is not officially supported:

Also, here are some useful scripts to test your cuda installation. I used Python 3.11:
Check Rocm:

import torch, grp, pwd, os, subprocess

devices = []
try:
    print("\n\nChecking ROCM support...")
    result = subprocess.run(["rocminfo"], stdout=subprocess.PIPE)
    cmd_str = result.stdout.decode("utf-8")
    cmd_split = cmd_str.split("Agent ")
    for part in cmd_split:
        item_single = part[0:1]
        item_double = part[0:2]
        if item_single.isnumeric() or item_double.isnumeric():
            new_split = cmd_str.split("Agent " + item_double)
            device = (
                new_split[1]
                .split("Marketing Name:")[0]
                .replace("  Name:                    ", "")
                .replace("\n", "")
                .replace("                  ", "")
                .split("Uuid:")[0]
                .split("*******")[1]
            )
            devices.append(device)
    if len(devices) > 0:
        print("GOOD: ROCM devices found: ", len(devices))
    else:
        print("BAD: No ROCM devices found.")

    print("Checking PyTorch...")
    x = torch.rand(5, 3)
    has_torch = False
    len_x = len(x)
    if len_x == 5:
        has_torch = True
        for i in x:
            if len(i) == 3:
                has_torch = True
            else:
                has_torch = False
    if has_torch:
        print("GOOD: PyTorch is working fine.")
    else:
        print("BAD: PyTorch is NOT working.")

    print("Checking user groups...")
    user = os.getlogin()
    groups = [g.gr_name for g in grp.getgrall() if user in g.gr_mem]
    gid = pwd.getpwnam(user).pw_gid
    groups.append(grp.getgrgid(gid).gr_name)
    if "render" in groups and "video" in groups:
        print("GOOD: The user", user, "is in RENDER and VIDEO groups.")
    else:
        print(
            "BAD: The user",
            user,
            "is NOT in RENDER and VIDEO groups. This is necessary in order to PyTorch use HIP resources",
        )

    if torch.cuda.is_available():
        print("GOOD: PyTorch ROCM support found.")
        t = torch.tensor([5, 5, 5], dtype=torch.int64, device="cuda")
        print("Testing PyTorch ROCM support...")
        if str(t) == "tensor([5, 5, 5], device='cuda:0')":
            print("Everything fine! You can run PyTorch code inside of: ")
            for idx, device in enumerate(devices):
                print(f"---> {device}")
    else:
        print("BAD: PyTorch ROCM support NOT found.")
except:
    print(
        "Cannot find rocminfo command information. Unable to determine if AMDGPU drivers with ROCM support were installed."
    )

Check Cuda GPU:

import random
from time import time
from os import putenv

import torch


class SmallModel(torch.nn.Module):
    def __init__(self, in_f) -> None:
        super().__init__()
        self.cnn = torch.nn.Sequential(
            torch.nn.Linear(in_f, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 10000),
            torch.nn.ReLU(),
            torch.nn.Linear(10000, 100),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # print(x.device)
        return self.cnn(x)


# putenv("HSA_OVERRIDE_GFX_VERSION", "11.0.0")
device = "cuda:0"
# device = "cpu"

a = torch.randn((100, 100))
a = a.to(device)
m = SmallModel(100)
m = m.to(device)
start_time = time()
for k in range(100):
    b = m(a)

print(f"Total time: {(time() - start_time)*1000} ms with device {device}")
1 Like