Merge tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull habanalabs updates from Greg KH: "Here is another round of misc driver patches for 5.15-rc1. In here is only updates for the Habanalabs driver. This request is late because the previously-objected-to dma-buf patches are all removed and some fixes that you and others found are now included in here as well. All of these have been in linux-next for well over a week with no reports of problems, and they are all self-contained to only this one driver. Full details are in the shortlog" * tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (61 commits) habanalabs/gaudi: hwmon default card name habanalabs: add support for f/w reset habanalabs/gaudi: block ICACHE_BASE_ADDERESS_HIGH in TPC habanalabs: cannot sleep while holding spinlock habanalabs: never copy_from_user inside spinlock habanalabs: remove unnecessary device status check habanalabs: disable IRQ in user interrupts spinlock habanalabs: add "in device creation" status habanalabs/gaudi: invalidate PMMU mem cache on init habanalabs/gaudi: size should be printed in decimal habanalabs/gaudi: define DC POWER for secured PMC habanalabs/gaudi: unmask out of bounds SLM access interrupt habanalabs: add userptr_lookup node in debugfs habanalabs/gaudi: fetch TPC/MME ECC errors from F/W habanalabs: modify multi-CS to wait on stream masters habanalabs/gaudi: add monitored SOBs to state dump habanalabs/gaudi: restore user registers when context opens habanalabs/gaudi: increase boot fit timeout habanalabs: update to latest firmware headers habanalabs/gaudi: minimize number of register reads ...
This commit is contained in:
@@ -215,6 +215,17 @@ Description: Sets the skip reset on timeout option for the device. Value of
|
|||||||
"0" means device will be reset in case some CS has timed out,
|
"0" means device will be reset in case some CS has timed out,
|
||||||
otherwise it will not be reset.
|
otherwise it will not be reset.
|
||||||
|
|
||||||
|
What: /sys/kernel/debug/habanalabs/hl<n>/state_dump
|
||||||
|
Date: Oct 2021
|
||||||
|
KernelVersion: 5.15
|
||||||
|
Contact: ynudelman@habana.ai
|
||||||
|
Description: Gets the state dump occurring on a CS timeout or failure.
|
||||||
|
State dump is used for debug and is created each time in case of
|
||||||
|
a problem in a CS execution, before reset.
|
||||||
|
Reading from the node returns the newest state dump available.
|
||||||
|
Writing an integer X discards X state dumps, so that the
|
||||||
|
next read would return X+1-st newest state dump.
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
|
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
|
||||||
Date: Mar 2020
|
Date: Mar 2020
|
||||||
KernelVersion: 5.6
|
KernelVersion: 5.6
|
||||||
@@ -230,6 +241,14 @@ Description: Displays a list with information about the currently user
|
|||||||
pointers (user virtual addresses) that are pinned and mapped
|
pointers (user virtual addresses) that are pinned and mapped
|
||||||
to DMA addresses
|
to DMA addresses
|
||||||
|
|
||||||
|
What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
|
||||||
|
Date: Aug 2021
|
||||||
|
KernelVersion: 5.15
|
||||||
|
Contact: ogabbay@kernel.org
|
||||||
|
Description: Allows to search for specific user pointers (user virtual
|
||||||
|
addresses) that are pinned and mapped to DMA addresses, and see
|
||||||
|
their resolution to the specific dma address.
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/vm
|
What: /sys/kernel/debug/habanalabs/hl<n>/vm
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
|
|||||||
@@ -10,4 +10,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
|
|||||||
common/asid.o common/habanalabs_ioctl.o \
|
common/asid.o common/habanalabs_ioctl.o \
|
||||||
common/command_buffer.o common/hw_queue.o common/irq.o \
|
common/command_buffer.o common/hw_queue.o common/irq.o \
|
||||||
common/sysfs.o common/hwmon.o common/memory.o \
|
common/sysfs.o common/hwmon.o common/memory.o \
|
||||||
common/command_submission.o common/firmware_if.o
|
common/command_submission.o common/firmware_if.o \
|
||||||
|
common/state_dump.o
|
||||||
|
|||||||
@@ -314,8 +314,6 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
|||||||
|
|
||||||
spin_lock(&mgr->cb_lock);
|
spin_lock(&mgr->cb_lock);
|
||||||
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
|
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
|
||||||
if (rc < 0)
|
|
||||||
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
|
|
||||||
spin_unlock(&mgr->cb_lock);
|
spin_unlock(&mgr->cb_lock);
|
||||||
|
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
@@ -552,7 +550,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
|||||||
|
|
||||||
vma->vm_private_data = cb;
|
vma->vm_private_data = cb;
|
||||||
|
|
||||||
rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
|
rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address,
|
||||||
cb->bus_address, cb->size);
|
cb->bus_address, cb->size);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
spin_lock(&cb->lock);
|
spin_lock(&cb->lock);
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -9,16 +9,70 @@
|
|||||||
|
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
|
void hl_encaps_handle_do_release(struct kref *ref)
|
||||||
|
{
|
||||||
|
struct hl_cs_encaps_sig_handle *handle =
|
||||||
|
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
||||||
|
struct hl_ctx *ctx = handle->hdev->compute_ctx;
|
||||||
|
struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
|
||||||
|
|
||||||
|
spin_lock(&mgr->lock);
|
||||||
|
idr_remove(&mgr->handles, handle->id);
|
||||||
|
spin_unlock(&mgr->lock);
|
||||||
|
|
||||||
|
kfree(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void hl_encaps_handle_do_release_sob(struct kref *ref)
|
||||||
|
{
|
||||||
|
struct hl_cs_encaps_sig_handle *handle =
|
||||||
|
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
||||||
|
struct hl_ctx *ctx = handle->hdev->compute_ctx;
|
||||||
|
struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
|
||||||
|
|
||||||
|
/* if we're here, then there was a signals reservation but cs with
|
||||||
|
* encaps signals wasn't submitted, so need to put refcount
|
||||||
|
* to hw_sob taken at the reservation.
|
||||||
|
*/
|
||||||
|
hw_sob_put(handle->hw_sob);
|
||||||
|
|
||||||
|
spin_lock(&mgr->lock);
|
||||||
|
idr_remove(&mgr->handles, handle->id);
|
||||||
|
spin_unlock(&mgr->lock);
|
||||||
|
|
||||||
|
kfree(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
|
||||||
|
{
|
||||||
|
spin_lock_init(&mgr->lock);
|
||||||
|
idr_init(&mgr->handles);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
|
||||||
|
struct hl_encaps_signals_mgr *mgr)
|
||||||
|
{
|
||||||
|
struct hl_cs_encaps_sig_handle *handle;
|
||||||
|
struct idr *idp;
|
||||||
|
u32 id;
|
||||||
|
|
||||||
|
idp = &mgr->handles;
|
||||||
|
|
||||||
|
if (!idr_is_empty(idp)) {
|
||||||
|
dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
|
||||||
|
idr_for_each_entry(idp, handle, id)
|
||||||
|
kref_put(&handle->refcount,
|
||||||
|
hl_encaps_handle_do_release_sob);
|
||||||
|
}
|
||||||
|
|
||||||
|
idr_destroy(&mgr->handles);
|
||||||
|
}
|
||||||
|
|
||||||
static void hl_ctx_fini(struct hl_ctx *ctx)
|
static void hl_ctx_fini(struct hl_ctx *ctx)
|
||||||
{
|
{
|
||||||
struct hl_device *hdev = ctx->hdev;
|
struct hl_device *hdev = ctx->hdev;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* Release all allocated pending cb's, those cb's were never
|
|
||||||
* scheduled so it is safe to release them here
|
|
||||||
*/
|
|
||||||
hl_pending_cb_list_flush(ctx);
|
|
||||||
|
|
||||||
/* Release all allocated HW block mapped list entries and destroy
|
/* Release all allocated HW block mapped list entries and destroy
|
||||||
* the mutex.
|
* the mutex.
|
||||||
*/
|
*/
|
||||||
@@ -53,6 +107,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
|||||||
hl_cb_va_pool_fini(ctx);
|
hl_cb_va_pool_fini(ctx);
|
||||||
hl_vm_ctx_fini(ctx);
|
hl_vm_ctx_fini(ctx);
|
||||||
hl_asid_free(hdev, ctx->asid);
|
hl_asid_free(hdev, ctx->asid);
|
||||||
|
hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
|
||||||
|
|
||||||
/* Scrub both SRAM and DRAM */
|
/* Scrub both SRAM and DRAM */
|
||||||
hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
|
hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
|
||||||
@@ -130,9 +185,6 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
|
|||||||
{
|
{
|
||||||
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
|
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
dev_warn(hdev->dev,
|
|
||||||
"user process released device but its command submissions are still executing\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
||||||
@@ -144,11 +196,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
|||||||
kref_init(&ctx->refcount);
|
kref_init(&ctx->refcount);
|
||||||
|
|
||||||
ctx->cs_sequence = 1;
|
ctx->cs_sequence = 1;
|
||||||
INIT_LIST_HEAD(&ctx->pending_cb_list);
|
|
||||||
spin_lock_init(&ctx->pending_cb_lock);
|
|
||||||
spin_lock_init(&ctx->cs_lock);
|
spin_lock_init(&ctx->cs_lock);
|
||||||
atomic_set(&ctx->thread_ctx_switch_token, 1);
|
atomic_set(&ctx->thread_ctx_switch_token, 1);
|
||||||
atomic_set(&ctx->thread_pending_cb_token, 1);
|
|
||||||
ctx->thread_ctx_switch_wait_token = 0;
|
ctx->thread_ctx_switch_wait_token = 0;
|
||||||
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
|
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
|
||||||
sizeof(struct hl_fence *),
|
sizeof(struct hl_fence *),
|
||||||
@@ -200,6 +249,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
|||||||
goto err_cb_va_pool_fini;
|
goto err_cb_va_pool_fini;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hl_encaps_sig_mgr_init(&ctx->sig_mgr);
|
||||||
|
|
||||||
dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
|
dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -229,31 +280,86 @@ int hl_ctx_put(struct hl_ctx *ctx)
|
|||||||
return kref_put(&ctx->refcount, hl_ctx_do_release);
|
return kref_put(&ctx->refcount, hl_ctx_do_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
/*
|
||||||
|
* hl_ctx_get_fence_locked - get CS fence under CS lock
|
||||||
|
*
|
||||||
|
* @ctx: pointer to the context structure.
|
||||||
|
* @seq: CS sequences number
|
||||||
|
*
|
||||||
|
* @return valid fence pointer on success, NULL if fence is gone, otherwise
|
||||||
|
* error pointer.
|
||||||
|
*
|
||||||
|
* NOTE: this function shall be called with cs_lock locked
|
||||||
|
*/
|
||||||
|
static struct hl_fence *hl_ctx_get_fence_locked(struct hl_ctx *ctx, u64 seq)
|
||||||
{
|
{
|
||||||
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
|
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
|
||||||
struct hl_fence *fence;
|
struct hl_fence *fence;
|
||||||
|
|
||||||
spin_lock(&ctx->cs_lock);
|
if (seq >= ctx->cs_sequence)
|
||||||
|
|
||||||
if (seq >= ctx->cs_sequence) {
|
|
||||||
spin_unlock(&ctx->cs_lock);
|
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
|
||||||
|
|
||||||
if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
|
if (seq + asic_prop->max_pending_cs < ctx->cs_sequence)
|
||||||
spin_unlock(&ctx->cs_lock);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
|
|
||||||
fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
|
fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
|
||||||
hl_fence_get(fence);
|
hl_fence_get(fence);
|
||||||
|
return fence;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
||||||
|
{
|
||||||
|
struct hl_fence *fence;
|
||||||
|
|
||||||
|
spin_lock(&ctx->cs_lock);
|
||||||
|
|
||||||
|
fence = hl_ctx_get_fence_locked(ctx, seq);
|
||||||
|
|
||||||
spin_unlock(&ctx->cs_lock);
|
spin_unlock(&ctx->cs_lock);
|
||||||
|
|
||||||
return fence;
|
return fence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* hl_ctx_get_fences - get multiple CS fences under the same CS lock
|
||||||
|
*
|
||||||
|
* @ctx: pointer to the context structure.
|
||||||
|
* @seq_arr: array of CS sequences to wait for
|
||||||
|
* @fence: fence array to store the CS fences
|
||||||
|
* @arr_len: length of seq_arr and fence_arr
|
||||||
|
*
|
||||||
|
* @return 0 on success, otherwise non 0 error code
|
||||||
|
*/
|
||||||
|
int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
|
||||||
|
struct hl_fence **fence, u32 arr_len)
|
||||||
|
{
|
||||||
|
struct hl_fence **fence_arr_base = fence;
|
||||||
|
int i, rc = 0;
|
||||||
|
|
||||||
|
spin_lock(&ctx->cs_lock);
|
||||||
|
|
||||||
|
for (i = 0; i < arr_len; i++, fence++) {
|
||||||
|
u64 seq = seq_arr[i];
|
||||||
|
|
||||||
|
*fence = hl_ctx_get_fence_locked(ctx, seq);
|
||||||
|
|
||||||
|
if (IS_ERR(*fence)) {
|
||||||
|
dev_err(ctx->hdev->dev,
|
||||||
|
"Failed to get fence for CS with seq 0x%llx\n",
|
||||||
|
seq);
|
||||||
|
rc = PTR_ERR(*fence);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&ctx->cs_lock);
|
||||||
|
|
||||||
|
if (rc)
|
||||||
|
hl_fences_put(fence_arr_base, i);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* hl_ctx_mgr_init - initialize the context manager
|
* hl_ctx_mgr_init - initialize the context manager
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -209,12 +209,12 @@ static int userptr_show(struct seq_file *s, void *data)
|
|||||||
if (first) {
|
if (first) {
|
||||||
first = false;
|
first = false;
|
||||||
seq_puts(s, "\n");
|
seq_puts(s, "\n");
|
||||||
seq_puts(s, " user virtual address size dma dir\n");
|
seq_puts(s, " pid user virtual address size dma dir\n");
|
||||||
seq_puts(s, "----------------------------------------------------------\n");
|
seq_puts(s, "----------------------------------------------------------\n");
|
||||||
}
|
}
|
||||||
seq_printf(s,
|
seq_printf(s, " %-7d 0x%-14llx %-10llu %-30s\n",
|
||||||
" 0x%-14llx %-10u %-30s\n",
|
userptr->pid, userptr->addr, userptr->size,
|
||||||
userptr->addr, userptr->size, dma_dir[userptr->dir]);
|
dma_dir[userptr->dir]);
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&dev_entry->userptr_spinlock);
|
spin_unlock(&dev_entry->userptr_spinlock);
|
||||||
@@ -235,7 +235,7 @@ static int vm_show(struct seq_file *s, void *data)
|
|||||||
struct hl_vm_hash_node *hnode;
|
struct hl_vm_hash_node *hnode;
|
||||||
struct hl_userptr *userptr;
|
struct hl_userptr *userptr;
|
||||||
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
|
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
|
||||||
enum vm_type_t *vm_type;
|
enum vm_type *vm_type;
|
||||||
bool once = true;
|
bool once = true;
|
||||||
u64 j;
|
u64 j;
|
||||||
int i;
|
int i;
|
||||||
@@ -261,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data)
|
|||||||
if (*vm_type == VM_TYPE_USERPTR) {
|
if (*vm_type == VM_TYPE_USERPTR) {
|
||||||
userptr = hnode->ptr;
|
userptr = hnode->ptr;
|
||||||
seq_printf(s,
|
seq_printf(s,
|
||||||
" 0x%-14llx %-10u\n",
|
" 0x%-14llx %-10llu\n",
|
||||||
hnode->vaddr, userptr->size);
|
hnode->vaddr, userptr->size);
|
||||||
} else {
|
} else {
|
||||||
phys_pg_pack = hnode->ptr;
|
phys_pg_pack = hnode->ptr;
|
||||||
@@ -320,6 +320,77 @@ static int vm_show(struct seq_file *s, void *data)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int userptr_lookup_show(struct seq_file *s, void *data)
|
||||||
|
{
|
||||||
|
struct hl_debugfs_entry *entry = s->private;
|
||||||
|
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||||
|
struct scatterlist *sg;
|
||||||
|
struct hl_userptr *userptr;
|
||||||
|
bool first = true;
|
||||||
|
u64 total_npages, npages, sg_start, sg_end;
|
||||||
|
dma_addr_t dma_addr;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
spin_lock(&dev_entry->userptr_spinlock);
|
||||||
|
|
||||||
|
list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
|
||||||
|
if (dev_entry->userptr_lookup >= userptr->addr &&
|
||||||
|
dev_entry->userptr_lookup < userptr->addr + userptr->size) {
|
||||||
|
total_npages = 0;
|
||||||
|
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents,
|
||||||
|
i) {
|
||||||
|
npages = hl_get_sg_info(sg, &dma_addr);
|
||||||
|
sg_start = userptr->addr +
|
||||||
|
total_npages * PAGE_SIZE;
|
||||||
|
sg_end = userptr->addr +
|
||||||
|
(total_npages + npages) * PAGE_SIZE;
|
||||||
|
|
||||||
|
if (dev_entry->userptr_lookup >= sg_start &&
|
||||||
|
dev_entry->userptr_lookup < sg_end) {
|
||||||
|
dma_addr += (dev_entry->userptr_lookup -
|
||||||
|
sg_start);
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
seq_puts(s, "\n");
|
||||||
|
seq_puts(s, " user virtual address dma address pid region start region size\n");
|
||||||
|
seq_puts(s, "---------------------------------------------------------------------------------------\n");
|
||||||
|
}
|
||||||
|
seq_printf(s, " 0x%-18llx 0x%-16llx %-8u 0x%-16llx %-12llu\n",
|
||||||
|
dev_entry->userptr_lookup,
|
||||||
|
(u64)dma_addr, userptr->pid,
|
||||||
|
userptr->addr, userptr->size);
|
||||||
|
}
|
||||||
|
total_npages += npages;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&dev_entry->userptr_spinlock);
|
||||||
|
|
||||||
|
if (!first)
|
||||||
|
seq_puts(s, "\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t userptr_lookup_write(struct file *file, const char __user *buf,
|
||||||
|
size_t count, loff_t *f_pos)
|
||||||
|
{
|
||||||
|
struct seq_file *s = file->private_data;
|
||||||
|
struct hl_debugfs_entry *entry = s->private;
|
||||||
|
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||||
|
ssize_t rc;
|
||||||
|
u64 value;
|
||||||
|
|
||||||
|
rc = kstrtoull_from_user(buf, count, 16, &value);
|
||||||
|
if (rc)
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
dev_entry->userptr_lookup = value;
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
static int mmu_show(struct seq_file *s, void *data)
|
static int mmu_show(struct seq_file *s, void *data)
|
||||||
{
|
{
|
||||||
struct hl_debugfs_entry *entry = s->private;
|
struct hl_debugfs_entry *entry = s->private;
|
||||||
@@ -349,7 +420,7 @@ static int mmu_show(struct seq_file *s, void *data)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
|
hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);
|
||||||
|
|
||||||
if (hops_info.scrambled_vaddr &&
|
if (hops_info.scrambled_vaddr &&
|
||||||
(dev_entry->mmu_addr != hops_info.scrambled_vaddr))
|
(dev_entry->mmu_addr != hops_info.scrambled_vaddr))
|
||||||
@@ -491,11 +562,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
|
|||||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||||
struct hl_ctx *ctx = hdev->compute_ctx;
|
struct hl_ctx *ctx = hdev->compute_ctx;
|
||||||
struct hl_vm_hash_node *hnode;
|
struct hl_vm_hash_node *hnode;
|
||||||
|
u64 end_address, range_size;
|
||||||
struct hl_userptr *userptr;
|
struct hl_userptr *userptr;
|
||||||
enum vm_type_t *vm_type;
|
enum vm_type *vm_type;
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
u64 end_address;
|
|
||||||
u32 range_size;
|
|
||||||
int i, rc = 0;
|
int i, rc = 0;
|
||||||
|
|
||||||
if (!ctx) {
|
if (!ctx) {
|
||||||
@@ -1043,6 +1113,60 @@ static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t hl_state_dump_read(struct file *f, char __user *buf,
|
||||||
|
size_t count, loff_t *ppos)
|
||||||
|
{
|
||||||
|
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||||
|
ssize_t rc;
|
||||||
|
|
||||||
|
down_read(&entry->state_dump_sem);
|
||||||
|
if (!entry->state_dump[entry->state_dump_head])
|
||||||
|
rc = 0;
|
||||||
|
else
|
||||||
|
rc = simple_read_from_buffer(
|
||||||
|
buf, count, ppos,
|
||||||
|
entry->state_dump[entry->state_dump_head],
|
||||||
|
strlen(entry->state_dump[entry->state_dump_head]));
|
||||||
|
up_read(&entry->state_dump_sem);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
|
||||||
|
size_t count, loff_t *ppos)
|
||||||
|
{
|
||||||
|
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||||
|
struct hl_device *hdev = entry->hdev;
|
||||||
|
ssize_t rc;
|
||||||
|
u32 size;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
rc = kstrtouint_from_user(buf, count, 10, &size);
|
||||||
|
if (rc)
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) {
|
||||||
|
dev_err(hdev->dev, "Invalid number of dumps to skip\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entry->state_dump[entry->state_dump_head]) {
|
||||||
|
down_write(&entry->state_dump_sem);
|
||||||
|
for (i = 0; i < size; ++i) {
|
||||||
|
vfree(entry->state_dump[entry->state_dump_head]);
|
||||||
|
entry->state_dump[entry->state_dump_head] = NULL;
|
||||||
|
if (entry->state_dump_head > 0)
|
||||||
|
entry->state_dump_head--;
|
||||||
|
else
|
||||||
|
entry->state_dump_head =
|
||||||
|
ARRAY_SIZE(entry->state_dump) - 1;
|
||||||
|
}
|
||||||
|
up_write(&entry->state_dump_sem);
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct file_operations hl_data32b_fops = {
|
static const struct file_operations hl_data32b_fops = {
|
||||||
.owner = THIS_MODULE,
|
.owner = THIS_MODULE,
|
||||||
.read = hl_data_read32,
|
.read = hl_data_read32,
|
||||||
@@ -1110,12 +1234,19 @@ static const struct file_operations hl_security_violations_fops = {
|
|||||||
.read = hl_security_violations_read
|
.read = hl_security_violations_read
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct file_operations hl_state_dump_fops = {
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.read = hl_state_dump_read,
|
||||||
|
.write = hl_state_dump_write
|
||||||
|
};
|
||||||
|
|
||||||
static const struct hl_info_list hl_debugfs_list[] = {
|
static const struct hl_info_list hl_debugfs_list[] = {
|
||||||
{"command_buffers", command_buffers_show, NULL},
|
{"command_buffers", command_buffers_show, NULL},
|
||||||
{"command_submission", command_submission_show, NULL},
|
{"command_submission", command_submission_show, NULL},
|
||||||
{"command_submission_jobs", command_submission_jobs_show, NULL},
|
{"command_submission_jobs", command_submission_jobs_show, NULL},
|
||||||
{"userptr", userptr_show, NULL},
|
{"userptr", userptr_show, NULL},
|
||||||
{"vm", vm_show, NULL},
|
{"vm", vm_show, NULL},
|
||||||
|
{"userptr_lookup", userptr_lookup_show, userptr_lookup_write},
|
||||||
{"mmu", mmu_show, mmu_asid_va_write},
|
{"mmu", mmu_show, mmu_asid_va_write},
|
||||||
{"engines", engines_show, NULL}
|
{"engines", engines_show, NULL}
|
||||||
};
|
};
|
||||||
@@ -1172,6 +1303,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
|||||||
INIT_LIST_HEAD(&dev_entry->userptr_list);
|
INIT_LIST_HEAD(&dev_entry->userptr_list);
|
||||||
INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
|
INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
|
||||||
mutex_init(&dev_entry->file_mutex);
|
mutex_init(&dev_entry->file_mutex);
|
||||||
|
init_rwsem(&dev_entry->state_dump_sem);
|
||||||
spin_lock_init(&dev_entry->cb_spinlock);
|
spin_lock_init(&dev_entry->cb_spinlock);
|
||||||
spin_lock_init(&dev_entry->cs_spinlock);
|
spin_lock_init(&dev_entry->cs_spinlock);
|
||||||
spin_lock_init(&dev_entry->cs_job_spinlock);
|
spin_lock_init(&dev_entry->cs_job_spinlock);
|
||||||
@@ -1283,6 +1415,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
|||||||
dev_entry->root,
|
dev_entry->root,
|
||||||
&hdev->skip_reset_on_timeout);
|
&hdev->skip_reset_on_timeout);
|
||||||
|
|
||||||
|
debugfs_create_file("state_dump",
|
||||||
|
0600,
|
||||||
|
dev_entry->root,
|
||||||
|
dev_entry,
|
||||||
|
&hl_state_dump_fops);
|
||||||
|
|
||||||
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
|
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
|
||||||
debugfs_create_file(hl_debugfs_list[i].name,
|
debugfs_create_file(hl_debugfs_list[i].name,
|
||||||
0444,
|
0444,
|
||||||
@@ -1297,6 +1435,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
|||||||
void hl_debugfs_remove_device(struct hl_device *hdev)
|
void hl_debugfs_remove_device(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
|
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
|
||||||
|
int i;
|
||||||
|
|
||||||
debugfs_remove_recursive(entry->root);
|
debugfs_remove_recursive(entry->root);
|
||||||
|
|
||||||
@@ -1304,6 +1443,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
|
|||||||
|
|
||||||
vfree(entry->blob_desc.data);
|
vfree(entry->blob_desc.data);
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
|
||||||
|
vfree(entry->state_dump[i]);
|
||||||
|
|
||||||
kfree(entry->entry_arr);
|
kfree(entry->entry_arr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1416,6 +1558,28 @@ void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
|
|||||||
spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
|
spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_debugfs_set_state_dump - register state dump making it accessible via
|
||||||
|
* debugfs
|
||||||
|
* @hdev: pointer to the device structure
|
||||||
|
* @data: the actual dump data
|
||||||
|
* @length: the length of the data
|
||||||
|
*/
|
||||||
|
void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
|
||||||
|
unsigned long length)
|
||||||
|
{
|
||||||
|
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
|
||||||
|
|
||||||
|
down_write(&dev_entry->state_dump_sem);
|
||||||
|
|
||||||
|
dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) %
|
||||||
|
ARRAY_SIZE(dev_entry->state_dump);
|
||||||
|
vfree(dev_entry->state_dump[dev_entry->state_dump_head]);
|
||||||
|
dev_entry->state_dump[dev_entry->state_dump_head] = data;
|
||||||
|
|
||||||
|
up_write(&dev_entry->state_dump_sem);
|
||||||
|
}
|
||||||
|
|
||||||
void __init hl_debugfs_init(void)
|
void __init hl_debugfs_init(void)
|
||||||
{
|
{
|
||||||
hl_debug_root = debugfs_create_dir("habanalabs", NULL);
|
hl_debug_root = debugfs_create_dir("habanalabs", NULL);
|
||||||
|
|||||||
@@ -7,11 +7,11 @@
|
|||||||
|
|
||||||
#define pr_fmt(fmt) "habanalabs: " fmt
|
#define pr_fmt(fmt) "habanalabs: " fmt
|
||||||
|
|
||||||
|
#include <uapi/misc/habanalabs.h>
|
||||||
#include "habanalabs.h"
|
#include "habanalabs.h"
|
||||||
|
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
#include <linux/hwmon.h>
|
#include <linux/hwmon.h>
|
||||||
#include <uapi/misc/habanalabs.h>
|
|
||||||
|
|
||||||
enum hl_device_status hl_device_status(struct hl_device *hdev)
|
enum hl_device_status hl_device_status(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
@@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
|
|||||||
status = HL_DEVICE_STATUS_NEEDS_RESET;
|
status = HL_DEVICE_STATUS_NEEDS_RESET;
|
||||||
else if (hdev->disabled)
|
else if (hdev->disabled)
|
||||||
status = HL_DEVICE_STATUS_MALFUNCTION;
|
status = HL_DEVICE_STATUS_MALFUNCTION;
|
||||||
|
else if (!hdev->init_done)
|
||||||
|
status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
|
||||||
else
|
else
|
||||||
status = HL_DEVICE_STATUS_OPERATIONAL;
|
status = HL_DEVICE_STATUS_OPERATIONAL;
|
||||||
|
|
||||||
@@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev,
|
|||||||
case HL_DEVICE_STATUS_NEEDS_RESET:
|
case HL_DEVICE_STATUS_NEEDS_RESET:
|
||||||
return false;
|
return false;
|
||||||
case HL_DEVICE_STATUS_OPERATIONAL:
|
case HL_DEVICE_STATUS_OPERATIONAL:
|
||||||
|
case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
|
||||||
default:
|
default:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -129,8 +132,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
|
|||||||
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
|
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
|
||||||
|
|
||||||
if (!hl_hpriv_put(hpriv))
|
if (!hl_hpriv_put(hpriv))
|
||||||
dev_warn(hdev->dev,
|
dev_notice(hdev->dev,
|
||||||
"Device is still in use because there are live CS and/or memory mappings\n");
|
"User process closed FD but device still in use\n");
|
||||||
|
|
||||||
hdev->last_open_session_duration_jif =
|
hdev->last_open_session_duration_jif =
|
||||||
jiffies - hdev->last_successful_open_jif;
|
jiffies - hdev->last_successful_open_jif;
|
||||||
@@ -308,9 +311,15 @@ static void device_hard_reset_pending(struct work_struct *work)
|
|||||||
container_of(work, struct hl_device_reset_work,
|
container_of(work, struct hl_device_reset_work,
|
||||||
reset_work.work);
|
reset_work.work);
|
||||||
struct hl_device *hdev = device_reset_work->hdev;
|
struct hl_device *hdev = device_reset_work->hdev;
|
||||||
|
u32 flags;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD);
|
flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
|
||||||
|
|
||||||
|
if (device_reset_work->fw_reset)
|
||||||
|
flags |= HL_RESET_FW;
|
||||||
|
|
||||||
|
rc = hl_device_reset(hdev, flags);
|
||||||
if ((rc == -EBUSY) && !hdev->device_fini_pending) {
|
if ((rc == -EBUSY) && !hdev->device_fini_pending) {
|
||||||
dev_info(hdev->dev,
|
dev_info(hdev->dev,
|
||||||
"Could not reset device. will try again in %u seconds",
|
"Could not reset device. will try again in %u seconds",
|
||||||
@@ -682,6 +691,44 @@ out:
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void take_release_locks(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
/* Flush anyone that is inside the critical section of enqueue
|
||||||
|
* jobs to the H/W
|
||||||
|
*/
|
||||||
|
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||||
|
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||||
|
|
||||||
|
/* Flush processes that are sending message to CPU */
|
||||||
|
mutex_lock(&hdev->send_cpu_message_lock);
|
||||||
|
mutex_unlock(&hdev->send_cpu_message_lock);
|
||||||
|
|
||||||
|
/* Flush anyone that is inside device open */
|
||||||
|
mutex_lock(&hdev->fpriv_list_lock);
|
||||||
|
mutex_unlock(&hdev->fpriv_list_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
|
||||||
|
{
|
||||||
|
if (hard_reset)
|
||||||
|
device_late_fini(hdev);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Halt the engines and disable interrupts so we won't get any more
|
||||||
|
* completions from H/W and we won't have any accesses from the
|
||||||
|
* H/W to the host machine
|
||||||
|
*/
|
||||||
|
hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
|
||||||
|
|
||||||
|
/* Go over all the queues, release all CS and their jobs */
|
||||||
|
hl_cs_rollback_all(hdev);
|
||||||
|
|
||||||
|
/* Release all pending user interrupts, each pending user interrupt
|
||||||
|
* holds a reference to user context
|
||||||
|
*/
|
||||||
|
hl_release_pending_user_interrupts(hdev);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* hl_device_suspend - initiate device suspend
|
* hl_device_suspend - initiate device suspend
|
||||||
*
|
*
|
||||||
@@ -707,16 +754,7 @@ int hl_device_suspend(struct hl_device *hdev)
|
|||||||
/* This blocks all other stuff that is not blocked by in_reset */
|
/* This blocks all other stuff that is not blocked by in_reset */
|
||||||
hdev->disabled = true;
|
hdev->disabled = true;
|
||||||
|
|
||||||
/*
|
take_release_locks(hdev);
|
||||||
* Flush anyone that is inside the critical section of enqueue
|
|
||||||
* jobs to the H/W
|
|
||||||
*/
|
|
||||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
|
||||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
|
||||||
|
|
||||||
/* Flush processes that are sending message to CPU */
|
|
||||||
mutex_lock(&hdev->send_cpu_message_lock);
|
|
||||||
mutex_unlock(&hdev->send_cpu_message_lock);
|
|
||||||
|
|
||||||
rc = hdev->asic_funcs->suspend(hdev);
|
rc = hdev->asic_funcs->suspend(hdev);
|
||||||
if (rc)
|
if (rc)
|
||||||
@@ -819,6 +857,11 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
|
|||||||
usleep_range(1000, 10000);
|
usleep_range(1000, 10000);
|
||||||
|
|
||||||
put_task_struct(task);
|
put_task_struct(task);
|
||||||
|
} else {
|
||||||
|
dev_warn(hdev->dev,
|
||||||
|
"Can't get task struct for PID so giving up on killing process\n");
|
||||||
|
mutex_unlock(&hdev->fpriv_list_lock);
|
||||||
|
return -ETIME;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -885,7 +928,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
|
|||||||
int hl_device_reset(struct hl_device *hdev, u32 flags)
|
int hl_device_reset(struct hl_device *hdev, u32 flags)
|
||||||
{
|
{
|
||||||
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
|
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
|
||||||
bool hard_reset, from_hard_reset_thread, hard_instead_soft = false;
|
bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false;
|
||||||
int i, rc;
|
int i, rc;
|
||||||
|
|
||||||
if (!hdev->init_done) {
|
if (!hdev->init_done) {
|
||||||
@@ -894,8 +937,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
hard_reset = (flags & HL_RESET_HARD) != 0;
|
hard_reset = !!(flags & HL_RESET_HARD);
|
||||||
from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
|
from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
|
||||||
|
fw_reset = !!(flags & HL_RESET_FW);
|
||||||
|
|
||||||
if (!hard_reset && !hdev->supports_soft_reset) {
|
if (!hard_reset && !hdev->supports_soft_reset) {
|
||||||
hard_instead_soft = true;
|
hard_instead_soft = true;
|
||||||
@@ -947,11 +991,13 @@ do_reset:
|
|||||||
else
|
else
|
||||||
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
||||||
|
|
||||||
/*
|
/* If reset is due to heartbeat, device CPU is no responsive in
|
||||||
* if reset is due to heartbeat, device CPU is no responsive in
|
* which case no point sending PCI disable message to it.
|
||||||
* which case no point sending PCI disable message to it
|
*
|
||||||
|
* If F/W is performing the reset, no need to send it a message to disable
|
||||||
|
* PCI access
|
||||||
*/
|
*/
|
||||||
if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) {
|
if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
|
||||||
/* Disable PCI access from device F/W so he won't send
|
/* Disable PCI access from device F/W so he won't send
|
||||||
* us additional interrupts. We disable MSI/MSI-X at
|
* us additional interrupts. We disable MSI/MSI-X at
|
||||||
* the halt_engines function and we can't have the F/W
|
* the halt_engines function and we can't have the F/W
|
||||||
@@ -970,15 +1016,7 @@ do_reset:
|
|||||||
/* This also blocks future CS/VM/JOB completion operations */
|
/* This also blocks future CS/VM/JOB completion operations */
|
||||||
hdev->disabled = true;
|
hdev->disabled = true;
|
||||||
|
|
||||||
/* Flush anyone that is inside the critical section of enqueue
|
take_release_locks(hdev);
|
||||||
* jobs to the H/W
|
|
||||||
*/
|
|
||||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
|
||||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
|
||||||
|
|
||||||
/* Flush anyone that is inside device open */
|
|
||||||
mutex_lock(&hdev->fpriv_list_lock);
|
|
||||||
mutex_unlock(&hdev->fpriv_list_lock);
|
|
||||||
|
|
||||||
dev_err(hdev->dev, "Going to RESET device!\n");
|
dev_err(hdev->dev, "Going to RESET device!\n");
|
||||||
}
|
}
|
||||||
@@ -989,6 +1027,8 @@ again:
|
|||||||
|
|
||||||
hdev->process_kill_trial_cnt = 0;
|
hdev->process_kill_trial_cnt = 0;
|
||||||
|
|
||||||
|
hdev->device_reset_work.fw_reset = fw_reset;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Because the reset function can't run from heartbeat work,
|
* Because the reset function can't run from heartbeat work,
|
||||||
* we need to call the reset function from a dedicated work.
|
* we need to call the reset function from a dedicated work.
|
||||||
@@ -999,31 +1039,7 @@ again:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hard_reset) {
|
cleanup_resources(hdev, hard_reset, fw_reset);
|
||||||
device_late_fini(hdev);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Now that the heartbeat thread is closed, flush processes
|
|
||||||
* which are sending messages to CPU
|
|
||||||
*/
|
|
||||||
mutex_lock(&hdev->send_cpu_message_lock);
|
|
||||||
mutex_unlock(&hdev->send_cpu_message_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Halt the engines and disable interrupts so we won't get any more
|
|
||||||
* completions from H/W and we won't have any accesses from the
|
|
||||||
* H/W to the host machine
|
|
||||||
*/
|
|
||||||
hdev->asic_funcs->halt_engines(hdev, hard_reset);
|
|
||||||
|
|
||||||
/* Go over all the queues, release all CS and their jobs */
|
|
||||||
hl_cs_rollback_all(hdev);
|
|
||||||
|
|
||||||
/* Release all pending user interrupts, each pending user interrupt
|
|
||||||
* holds a reference to user context
|
|
||||||
*/
|
|
||||||
hl_release_pending_user_interrupts(hdev);
|
|
||||||
|
|
||||||
kill_processes:
|
kill_processes:
|
||||||
if (hard_reset) {
|
if (hard_reset) {
|
||||||
@@ -1057,12 +1073,15 @@ kill_processes:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Reset the H/W. It will be in idle state after this returns */
|
/* Reset the H/W. It will be in idle state after this returns */
|
||||||
hdev->asic_funcs->hw_fini(hdev, hard_reset);
|
hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
|
||||||
|
|
||||||
if (hard_reset) {
|
if (hard_reset) {
|
||||||
|
hdev->fw_loader.linux_loaded = false;
|
||||||
|
|
||||||
/* Release kernel context */
|
/* Release kernel context */
|
||||||
if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
|
if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
|
||||||
hdev->kernel_ctx = NULL;
|
hdev->kernel_ctx = NULL;
|
||||||
|
|
||||||
hl_vm_fini(hdev);
|
hl_vm_fini(hdev);
|
||||||
hl_mmu_fini(hdev);
|
hl_mmu_fini(hdev);
|
||||||
hl_eq_reset(hdev, &hdev->event_queue);
|
hl_eq_reset(hdev, &hdev->event_queue);
|
||||||
@@ -1292,6 +1311,10 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||||||
if (rc)
|
if (rc)
|
||||||
goto user_interrupts_fini;
|
goto user_interrupts_fini;
|
||||||
|
|
||||||
|
|
||||||
|
/* initialize completion structure for multi CS wait */
|
||||||
|
hl_multi_cs_completion_init(hdev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize the H/W queues. Must be done before hw_init, because
|
* Initialize the H/W queues. Must be done before hw_init, because
|
||||||
* there the addresses of the kernel queue are being written to the
|
* there the addresses of the kernel queue are being written to the
|
||||||
@@ -1361,6 +1384,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||||||
|
|
||||||
hdev->compute_ctx = NULL;
|
hdev->compute_ctx = NULL;
|
||||||
|
|
||||||
|
hdev->asic_funcs->state_dump_init(hdev);
|
||||||
|
|
||||||
hl_debugfs_add_device(hdev);
|
hl_debugfs_add_device(hdev);
|
||||||
|
|
||||||
/* debugfs nodes are created in hl_ctx_init so it must be called after
|
/* debugfs nodes are created in hl_ctx_init so it must be called after
|
||||||
@@ -1567,31 +1592,13 @@ void hl_device_fini(struct hl_device *hdev)
|
|||||||
/* Mark device as disabled */
|
/* Mark device as disabled */
|
||||||
hdev->disabled = true;
|
hdev->disabled = true;
|
||||||
|
|
||||||
/* Flush anyone that is inside the critical section of enqueue
|
take_release_locks(hdev);
|
||||||
* jobs to the H/W
|
|
||||||
*/
|
|
||||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
|
||||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
|
||||||
|
|
||||||
/* Flush anyone that is inside device open */
|
|
||||||
mutex_lock(&hdev->fpriv_list_lock);
|
|
||||||
mutex_unlock(&hdev->fpriv_list_lock);
|
|
||||||
|
|
||||||
hdev->hard_reset_pending = true;
|
hdev->hard_reset_pending = true;
|
||||||
|
|
||||||
hl_hwmon_fini(hdev);
|
hl_hwmon_fini(hdev);
|
||||||
|
|
||||||
device_late_fini(hdev);
|
cleanup_resources(hdev, true, false);
|
||||||
|
|
||||||
/*
|
|
||||||
* Halt the engines and disable interrupts so we won't get any more
|
|
||||||
* completions from H/W and we won't have any accesses from the
|
|
||||||
* H/W to the host machine
|
|
||||||
*/
|
|
||||||
hdev->asic_funcs->halt_engines(hdev, true);
|
|
||||||
|
|
||||||
/* Go over all the queues, release all CS and their jobs */
|
|
||||||
hl_cs_rollback_all(hdev);
|
|
||||||
|
|
||||||
/* Kill processes here after CS rollback. This is because the process
|
/* Kill processes here after CS rollback. This is because the process
|
||||||
* can't really exit until all its CSs are done, which is what we
|
* can't really exit until all its CSs are done, which is what we
|
||||||
@@ -1610,7 +1617,9 @@ void hl_device_fini(struct hl_device *hdev)
|
|||||||
hl_cb_pool_fini(hdev);
|
hl_cb_pool_fini(hdev);
|
||||||
|
|
||||||
/* Reset the H/W. It will be in idle state after this returns */
|
/* Reset the H/W. It will be in idle state after this returns */
|
||||||
hdev->asic_funcs->hw_fini(hdev, true);
|
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||||
|
|
||||||
|
hdev->fw_loader.linux_loaded = false;
|
||||||
|
|
||||||
/* Release kernel context */
|
/* Release kernel context */
|
||||||
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
|
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||||
* All Rights Reserved.
|
* All Rights Reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -240,11 +240,15 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
|||||||
/* set fence to a non valid value */
|
/* set fence to a non valid value */
|
||||||
pkt->fence = cpu_to_le32(UINT_MAX);
|
pkt->fence = cpu_to_le32(UINT_MAX);
|
||||||
|
|
||||||
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
|
/*
|
||||||
if (rc) {
|
* The CPU queue is a synchronous queue with an effective depth of
|
||||||
dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
|
* a single entry (although it is allocated with room for multiple
|
||||||
goto out;
|
* entries). We lock on it using 'send_cpu_message_lock' which
|
||||||
}
|
* serializes accesses to the CPU queue.
|
||||||
|
* Which means that we don't need to lock the access to the entire H/W
|
||||||
|
* queues module when submitting a JOB to the CPU queue.
|
||||||
|
*/
|
||||||
|
hl_hw_queue_submit_bd(hdev, queue, 0, len, pkt_dma_addr);
|
||||||
|
|
||||||
if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
|
if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
|
||||||
expected_ack_val = queue->pi;
|
expected_ack_val = queue->pi;
|
||||||
@@ -663,17 +667,15 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
|
|||||||
hdev->event_queue.check_eqe_index = false;
|
hdev->event_queue.check_eqe_index = false;
|
||||||
|
|
||||||
/* Read FW application security bits again */
|
/* Read FW application security bits again */
|
||||||
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid) {
|
if (prop->fw_cpu_boot_dev_sts0_valid) {
|
||||||
hdev->asic_prop.fw_app_cpu_boot_dev_sts0 =
|
prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
|
||||||
RREG32(sts_boot_dev_sts0_reg);
|
if (prop->fw_app_cpu_boot_dev_sts0 &
|
||||||
if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
|
|
||||||
CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
|
CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
|
||||||
hdev->event_queue.check_eqe_index = true;
|
hdev->event_queue.check_eqe_index = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid)
|
if (prop->fw_cpu_boot_dev_sts1_valid)
|
||||||
hdev->asic_prop.fw_app_cpu_boot_dev_sts1 =
|
prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
|
||||||
RREG32(sts_boot_dev_sts1_reg);
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
|
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
|
||||||
@@ -1008,6 +1010,11 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
|
|||||||
} else {
|
} else {
|
||||||
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
|
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
|
||||||
msleep(static_loader->cpu_reset_wait_msec);
|
msleep(static_loader->cpu_reset_wait_msec);
|
||||||
|
|
||||||
|
/* Must clear this register in order to prevent preboot
|
||||||
|
* from reading WFE after reboot
|
||||||
|
*/
|
||||||
|
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA);
|
||||||
}
|
}
|
||||||
|
|
||||||
hdev->device_cpu_is_halted = true;
|
hdev->device_cpu_is_halted = true;
|
||||||
@@ -1055,6 +1062,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
|||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Device boot progress - Thermal Sensor initialization failed\n");
|
"Device boot progress - Thermal Sensor initialization failed\n");
|
||||||
break;
|
break;
|
||||||
|
case CPU_BOOT_STATUS_SECURITY_READY:
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Device boot progress - Stuck in preboot after security initialization\n");
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"Device boot progress - Invalid status code %d\n",
|
"Device boot progress - Invalid status code %d\n",
|
||||||
@@ -1238,11 +1249,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
|
|||||||
* b. Check whether hard reset is done by boot cpu
|
* b. Check whether hard reset is done by boot cpu
|
||||||
* 3. FW application - a. Fetch fw application security status
|
* 3. FW application - a. Fetch fw application security status
|
||||||
* b. Check whether hard reset is done by fw app
|
* b. Check whether hard reset is done by fw app
|
||||||
*
|
|
||||||
* Preboot:
|
|
||||||
* Check security status bit (CPU_BOOT_DEV_STS0_ENABLED). If set, then-
|
|
||||||
* check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
|
|
||||||
* If set, then mark GIC controller to be disabled.
|
|
||||||
*/
|
*/
|
||||||
prop->hard_reset_done_by_fw =
|
prop->hard_reset_done_by_fw =
|
||||||
!!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
|
!!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
|
||||||
@@ -1953,8 +1959,8 @@ static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
|
|||||||
if (!hdev->asic_prop.gic_interrupts_enable &&
|
if (!hdev->asic_prop.gic_interrupts_enable &&
|
||||||
!(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
|
!(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
|
||||||
CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
|
CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
|
||||||
dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_irq_ctrl;
|
dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_pi_upd_irq;
|
||||||
dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_irq_ctrl;
|
dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_pi_upd_irq;
|
||||||
|
|
||||||
dev_warn(hdev->dev,
|
dev_warn(hdev->dev,
|
||||||
"Using a single interrupt interface towards cpucp");
|
"Using a single interrupt interface towards cpucp");
|
||||||
@@ -2122,8 +2128,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
|
|||||||
|
|
||||||
/* Read FW application security bits */
|
/* Read FW application security bits */
|
||||||
if (prop->fw_cpu_boot_dev_sts0_valid) {
|
if (prop->fw_cpu_boot_dev_sts0_valid) {
|
||||||
prop->fw_app_cpu_boot_dev_sts0 =
|
prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
|
||||||
RREG32(cpu_boot_dev_sts0_reg);
|
|
||||||
|
|
||||||
if (prop->fw_app_cpu_boot_dev_sts0 &
|
if (prop->fw_app_cpu_boot_dev_sts0 &
|
||||||
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
|
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
|
||||||
@@ -2143,8 +2148,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (prop->fw_cpu_boot_dev_sts1_valid) {
|
if (prop->fw_cpu_boot_dev_sts1_valid) {
|
||||||
prop->fw_app_cpu_boot_dev_sts1 =
|
prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
|
||||||
RREG32(cpu_boot_dev_sts1_reg);
|
|
||||||
|
|
||||||
dev_dbg(hdev->dev,
|
dev_dbg(hdev->dev,
|
||||||
"Firmware application CPU status1 %#x\n",
|
"Firmware application CPU status1 %#x\n",
|
||||||
@@ -2235,6 +2239,10 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
|
|||||||
dev_info(hdev->dev,
|
dev_info(hdev->dev,
|
||||||
"Loading firmware to device, may take some time...\n");
|
"Loading firmware to device, may take some time...\n");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
|
||||||
|
* It will be updated from FW after hl_fw_dynamic_request_descriptor().
|
||||||
|
*/
|
||||||
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
|
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
|
||||||
|
|
||||||
rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
|
rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
#include <linux/scatterlist.h>
|
#include <linux/scatterlist.h>
|
||||||
#include <linux/hashtable.h>
|
#include <linux/hashtable.h>
|
||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
|
#include <linux/rwsem.h>
|
||||||
#include <linux/bitfield.h>
|
#include <linux/bitfield.h>
|
||||||
#include <linux/genalloc.h>
|
#include <linux/genalloc.h>
|
||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
@@ -65,6 +66,11 @@
|
|||||||
|
|
||||||
#define HL_COMMON_USER_INTERRUPT_ID 0xFFF
|
#define HL_COMMON_USER_INTERRUPT_ID 0xFFF
|
||||||
|
|
||||||
|
#define HL_STATE_DUMP_HIST_LEN 5
|
||||||
|
|
||||||
|
#define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||||
|
#define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||||
|
|
||||||
/* Memory */
|
/* Memory */
|
||||||
#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||||
|
|
||||||
@@ -122,12 +128,17 @@ enum hl_mmu_page_table_location {
|
|||||||
*
|
*
|
||||||
* - HL_RESET_DEVICE_RELEASE
|
* - HL_RESET_DEVICE_RELEASE
|
||||||
* Set if reset is due to device release
|
* Set if reset is due to device release
|
||||||
|
*
|
||||||
|
* - HL_RESET_FW
|
||||||
|
* F/W will perform the reset. No need to ask it to reset the device. This is relevant
|
||||||
|
* only when running with secured f/w
|
||||||
*/
|
*/
|
||||||
#define HL_RESET_HARD (1 << 0)
|
#define HL_RESET_HARD (1 << 0)
|
||||||
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
|
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
|
||||||
#define HL_RESET_HEARTBEAT (1 << 2)
|
#define HL_RESET_HEARTBEAT (1 << 2)
|
||||||
#define HL_RESET_TDR (1 << 3)
|
#define HL_RESET_TDR (1 << 3)
|
||||||
#define HL_RESET_DEVICE_RELEASE (1 << 4)
|
#define HL_RESET_DEVICE_RELEASE (1 << 4)
|
||||||
|
#define HL_RESET_FW (1 << 5)
|
||||||
|
|
||||||
#define HL_MAX_SOBS_PER_MONITOR 8
|
#define HL_MAX_SOBS_PER_MONITOR 8
|
||||||
|
|
||||||
@@ -236,7 +247,9 @@ enum hl_cs_type {
|
|||||||
CS_TYPE_DEFAULT,
|
CS_TYPE_DEFAULT,
|
||||||
CS_TYPE_SIGNAL,
|
CS_TYPE_SIGNAL,
|
||||||
CS_TYPE_WAIT,
|
CS_TYPE_WAIT,
|
||||||
CS_TYPE_COLLECTIVE_WAIT
|
CS_TYPE_COLLECTIVE_WAIT,
|
||||||
|
CS_RESERVE_SIGNALS,
|
||||||
|
CS_UNRESERVE_SIGNALS
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -281,13 +294,17 @@ enum queue_cb_alloc_flags {
|
|||||||
* @hdev: habanalabs device structure.
|
* @hdev: habanalabs device structure.
|
||||||
* @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
|
* @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
|
||||||
* @sob_id: id of this SOB.
|
* @sob_id: id of this SOB.
|
||||||
|
* @sob_addr: the sob offset from the base address.
|
||||||
* @q_idx: the H/W queue that uses this SOB.
|
* @q_idx: the H/W queue that uses this SOB.
|
||||||
|
* @need_reset: reset indication set when switching to the other sob.
|
||||||
*/
|
*/
|
||||||
struct hl_hw_sob {
|
struct hl_hw_sob {
|
||||||
struct hl_device *hdev;
|
struct hl_device *hdev;
|
||||||
struct kref kref;
|
struct kref kref;
|
||||||
u32 sob_id;
|
u32 sob_id;
|
||||||
|
u32 sob_addr;
|
||||||
u32 q_idx;
|
u32 q_idx;
|
||||||
|
bool need_reset;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum hl_collective_mode {
|
enum hl_collective_mode {
|
||||||
@@ -317,11 +334,11 @@ struct hw_queue_properties {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* enum vm_type_t - virtual memory mapping request information.
|
* enum vm_type - virtual memory mapping request information.
|
||||||
* @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
|
* @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
|
||||||
* @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
|
* @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
|
||||||
*/
|
*/
|
||||||
enum vm_type_t {
|
enum vm_type {
|
||||||
VM_TYPE_USERPTR = 0x1,
|
VM_TYPE_USERPTR = 0x1,
|
||||||
VM_TYPE_PHYS_PACK = 0x2
|
VM_TYPE_PHYS_PACK = 0x2
|
||||||
};
|
};
|
||||||
@@ -381,6 +398,16 @@ struct hl_mmu_properties {
|
|||||||
u8 host_resident;
|
u8 host_resident;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_hints_range - hint addresses reserved va range.
|
||||||
|
* @start_addr: start address of the va range.
|
||||||
|
* @end_addr: end address of the va range.
|
||||||
|
*/
|
||||||
|
struct hl_hints_range {
|
||||||
|
u64 start_addr;
|
||||||
|
u64 end_addr;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct asic_fixed_properties - ASIC specific immutable properties.
|
* struct asic_fixed_properties - ASIC specific immutable properties.
|
||||||
* @hw_queues_props: H/W queues properties.
|
* @hw_queues_props: H/W queues properties.
|
||||||
@@ -392,6 +419,10 @@ struct hl_mmu_properties {
|
|||||||
* @pmmu: PCI (host) MMU address translation properties.
|
* @pmmu: PCI (host) MMU address translation properties.
|
||||||
* @pmmu_huge: PCI (host) MMU address translation properties for memory
|
* @pmmu_huge: PCI (host) MMU address translation properties for memory
|
||||||
* allocated with huge pages.
|
* allocated with huge pages.
|
||||||
|
* @hints_dram_reserved_va_range: dram hint addresses reserved range.
|
||||||
|
* @hints_host_reserved_va_range: host hint addresses reserved range.
|
||||||
|
* @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
|
||||||
|
* range.
|
||||||
* @sram_base_address: SRAM physical start address.
|
* @sram_base_address: SRAM physical start address.
|
||||||
* @sram_end_address: SRAM physical end address.
|
* @sram_end_address: SRAM physical end address.
|
||||||
* @sram_user_base_address - SRAM physical start address for user access.
|
* @sram_user_base_address - SRAM physical start address for user access.
|
||||||
@@ -412,6 +443,10 @@ struct hl_mmu_properties {
|
|||||||
* to the device's MMU.
|
* to the device's MMU.
|
||||||
* @cb_va_end_addr: virtual end address of command buffers which are mapped to
|
* @cb_va_end_addr: virtual end address of command buffers which are mapped to
|
||||||
* the device's MMU.
|
* the device's MMU.
|
||||||
|
* @dram_hints_align_mask: dram va hint addresses alignment mask which is used
|
||||||
|
* for hints validity check.
|
||||||
|
* device_dma_offset_for_host_access: the offset to add to host DMA addresses
|
||||||
|
* to enable the device to access them.
|
||||||
* @mmu_pgt_size: MMU page tables total size.
|
* @mmu_pgt_size: MMU page tables total size.
|
||||||
* @mmu_pte_size: PTE size in MMU page tables.
|
* @mmu_pte_size: PTE size in MMU page tables.
|
||||||
* @mmu_hop_table_size: MMU hop table size.
|
* @mmu_hop_table_size: MMU hop table size.
|
||||||
@@ -459,6 +494,8 @@ struct hl_mmu_properties {
|
|||||||
* reserved for the user
|
* reserved for the user
|
||||||
* @first_available_cq: first available CQ for the user.
|
* @first_available_cq: first available CQ for the user.
|
||||||
* @user_interrupt_count: number of user interrupts.
|
* @user_interrupt_count: number of user interrupts.
|
||||||
|
* @server_type: Server type that the ASIC is currently installed in.
|
||||||
|
* The value is according to enum hl_server_type in uapi file.
|
||||||
* @tpc_enabled_mask: which TPCs are enabled.
|
* @tpc_enabled_mask: which TPCs are enabled.
|
||||||
* @completion_queues_count: number of completion queues.
|
* @completion_queues_count: number of completion queues.
|
||||||
* @fw_security_enabled: true if security measures are enabled in firmware,
|
* @fw_security_enabled: true if security measures are enabled in firmware,
|
||||||
@@ -470,6 +507,7 @@ struct hl_mmu_properties {
|
|||||||
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
|
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
|
||||||
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
|
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
|
||||||
* @num_functional_hbms: number of functional HBMs in each DCORE.
|
* @num_functional_hbms: number of functional HBMs in each DCORE.
|
||||||
|
* @hints_range_reservation: device support hint addresses range reservation.
|
||||||
* @iatu_done_by_fw: true if iATU configuration is being done by FW.
|
* @iatu_done_by_fw: true if iATU configuration is being done by FW.
|
||||||
* @dynamic_fw_load: is dynamic FW load is supported.
|
* @dynamic_fw_load: is dynamic FW load is supported.
|
||||||
* @gic_interrupts_enable: true if FW is not blocking GIC controller,
|
* @gic_interrupts_enable: true if FW is not blocking GIC controller,
|
||||||
@@ -483,6 +521,9 @@ struct asic_fixed_properties {
|
|||||||
struct hl_mmu_properties dmmu;
|
struct hl_mmu_properties dmmu;
|
||||||
struct hl_mmu_properties pmmu;
|
struct hl_mmu_properties pmmu;
|
||||||
struct hl_mmu_properties pmmu_huge;
|
struct hl_mmu_properties pmmu_huge;
|
||||||
|
struct hl_hints_range hints_dram_reserved_va_range;
|
||||||
|
struct hl_hints_range hints_host_reserved_va_range;
|
||||||
|
struct hl_hints_range hints_host_hpage_reserved_va_range;
|
||||||
u64 sram_base_address;
|
u64 sram_base_address;
|
||||||
u64 sram_end_address;
|
u64 sram_end_address;
|
||||||
u64 sram_user_base_address;
|
u64 sram_user_base_address;
|
||||||
@@ -500,6 +541,8 @@ struct asic_fixed_properties {
|
|||||||
u64 mmu_dram_default_page_addr;
|
u64 mmu_dram_default_page_addr;
|
||||||
u64 cb_va_start_addr;
|
u64 cb_va_start_addr;
|
||||||
u64 cb_va_end_addr;
|
u64 cb_va_end_addr;
|
||||||
|
u64 dram_hints_align_mask;
|
||||||
|
u64 device_dma_offset_for_host_access;
|
||||||
u32 mmu_pgt_size;
|
u32 mmu_pgt_size;
|
||||||
u32 mmu_pte_size;
|
u32 mmu_pte_size;
|
||||||
u32 mmu_hop_table_size;
|
u32 mmu_hop_table_size;
|
||||||
@@ -534,6 +577,7 @@ struct asic_fixed_properties {
|
|||||||
u16 first_available_user_msix_interrupt;
|
u16 first_available_user_msix_interrupt;
|
||||||
u16 first_available_cq[HL_MAX_DCORES];
|
u16 first_available_cq[HL_MAX_DCORES];
|
||||||
u16 user_interrupt_count;
|
u16 user_interrupt_count;
|
||||||
|
u16 server_type;
|
||||||
u8 tpc_enabled_mask;
|
u8 tpc_enabled_mask;
|
||||||
u8 completion_queues_count;
|
u8 completion_queues_count;
|
||||||
u8 fw_security_enabled;
|
u8 fw_security_enabled;
|
||||||
@@ -542,6 +586,7 @@ struct asic_fixed_properties {
|
|||||||
u8 dram_supports_virtual_memory;
|
u8 dram_supports_virtual_memory;
|
||||||
u8 hard_reset_done_by_fw;
|
u8 hard_reset_done_by_fw;
|
||||||
u8 num_functional_hbms;
|
u8 num_functional_hbms;
|
||||||
|
u8 hints_range_reservation;
|
||||||
u8 iatu_done_by_fw;
|
u8 iatu_done_by_fw;
|
||||||
u8 dynamic_fw_load;
|
u8 dynamic_fw_load;
|
||||||
u8 gic_interrupts_enable;
|
u8 gic_interrupts_enable;
|
||||||
@@ -552,40 +597,45 @@ struct asic_fixed_properties {
|
|||||||
* @completion: fence is implemented using completion
|
* @completion: fence is implemented using completion
|
||||||
* @refcount: refcount for this fence
|
* @refcount: refcount for this fence
|
||||||
* @cs_sequence: sequence of the corresponding command submission
|
* @cs_sequence: sequence of the corresponding command submission
|
||||||
|
* @stream_master_qid_map: streams masters QID bitmap to represent all streams
|
||||||
|
* masters QIDs that multi cs is waiting on
|
||||||
* @error: mark this fence with error
|
* @error: mark this fence with error
|
||||||
* @timestamp: timestamp upon completion
|
* @timestamp: timestamp upon completion
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
struct hl_fence {
|
struct hl_fence {
|
||||||
struct completion completion;
|
struct completion completion;
|
||||||
struct kref refcount;
|
struct kref refcount;
|
||||||
u64 cs_sequence;
|
u64 cs_sequence;
|
||||||
|
u32 stream_master_qid_map;
|
||||||
int error;
|
int error;
|
||||||
ktime_t timestamp;
|
ktime_t timestamp;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct hl_cs_compl - command submission completion object.
|
* struct hl_cs_compl - command submission completion object.
|
||||||
* @sob_reset_work: workqueue object to run SOB reset flow.
|
|
||||||
* @base_fence: hl fence object.
|
* @base_fence: hl fence object.
|
||||||
* @lock: spinlock to protect fence.
|
* @lock: spinlock to protect fence.
|
||||||
* @hdev: habanalabs device structure.
|
* @hdev: habanalabs device structure.
|
||||||
* @hw_sob: the H/W SOB used in this signal/wait CS.
|
* @hw_sob: the H/W SOB used in this signal/wait CS.
|
||||||
|
* @encaps_sig_hdl: encaps signals hanlder.
|
||||||
* @cs_seq: command submission sequence number.
|
* @cs_seq: command submission sequence number.
|
||||||
* @type: type of the CS - signal/wait.
|
* @type: type of the CS - signal/wait.
|
||||||
* @sob_val: the SOB value that is used in this signal/wait CS.
|
* @sob_val: the SOB value that is used in this signal/wait CS.
|
||||||
* @sob_group: the SOB group that is used in this collective wait CS.
|
* @sob_group: the SOB group that is used in this collective wait CS.
|
||||||
|
* @encaps_signals: indication whether it's a completion object of cs with
|
||||||
|
* encaps signals or not.
|
||||||
*/
|
*/
|
||||||
struct hl_cs_compl {
|
struct hl_cs_compl {
|
||||||
struct work_struct sob_reset_work;
|
|
||||||
struct hl_fence base_fence;
|
struct hl_fence base_fence;
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
struct hl_device *hdev;
|
struct hl_device *hdev;
|
||||||
struct hl_hw_sob *hw_sob;
|
struct hl_hw_sob *hw_sob;
|
||||||
|
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
|
||||||
u64 cs_seq;
|
u64 cs_seq;
|
||||||
enum hl_cs_type type;
|
enum hl_cs_type type;
|
||||||
u16 sob_val;
|
u16 sob_val;
|
||||||
u16 sob_group;
|
u16 sob_group;
|
||||||
|
bool encaps_signals;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -697,6 +747,17 @@ struct hl_sync_stream_properties {
|
|||||||
u8 curr_sob_offset;
|
u8 curr_sob_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_encaps_signals_mgr - describes sync stream encapsulated signals
|
||||||
|
* handlers manager
|
||||||
|
* @lock: protects handles.
|
||||||
|
* @handles: an idr to hold all encapsulated signals handles.
|
||||||
|
*/
|
||||||
|
struct hl_encaps_signals_mgr {
|
||||||
|
spinlock_t lock;
|
||||||
|
struct idr handles;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct hl_hw_queue - describes a H/W transport queue.
|
* struct hl_hw_queue - describes a H/W transport queue.
|
||||||
* @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
|
* @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
|
||||||
@@ -875,7 +936,7 @@ struct pci_mem_region {
|
|||||||
u64 region_base;
|
u64 region_base;
|
||||||
u64 region_size;
|
u64 region_size;
|
||||||
u64 bar_size;
|
u64 bar_size;
|
||||||
u32 offset_in_bar;
|
u64 offset_in_bar;
|
||||||
u8 bar_id;
|
u8 bar_id;
|
||||||
u8 used;
|
u8 used;
|
||||||
};
|
};
|
||||||
@@ -996,7 +1057,7 @@ struct fw_load_mgr {
|
|||||||
* hw_fini and before CS rollback.
|
* hw_fini and before CS rollback.
|
||||||
* @suspend: handles IP specific H/W or SW changes for suspend.
|
* @suspend: handles IP specific H/W or SW changes for suspend.
|
||||||
* @resume: handles IP specific H/W or SW changes for resume.
|
* @resume: handles IP specific H/W or SW changes for resume.
|
||||||
* @cb_mmap: maps a CB.
|
* @mmap: maps a memory.
|
||||||
* @ring_doorbell: increment PI on a given QMAN.
|
* @ring_doorbell: increment PI on a given QMAN.
|
||||||
* @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
|
* @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
|
||||||
* function because the PQs are located in different memory areas
|
* function because the PQs are located in different memory areas
|
||||||
@@ -1101,6 +1162,10 @@ struct fw_load_mgr {
|
|||||||
* generic f/w compatible PLL Indexes
|
* generic f/w compatible PLL Indexes
|
||||||
* @init_firmware_loader: initialize data for FW loader.
|
* @init_firmware_loader: initialize data for FW loader.
|
||||||
* @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
|
* @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
|
||||||
|
* @state_dump_init: initialize constants required for state dump
|
||||||
|
* @get_sob_addr: get SOB base address offset.
|
||||||
|
* @set_pci_memory_regions: setting properties of PCI memory regions
|
||||||
|
* @get_stream_master_qid_arr: get pointer to stream masters QID array
|
||||||
*/
|
*/
|
||||||
struct hl_asic_funcs {
|
struct hl_asic_funcs {
|
||||||
int (*early_init)(struct hl_device *hdev);
|
int (*early_init)(struct hl_device *hdev);
|
||||||
@@ -1110,11 +1175,11 @@ struct hl_asic_funcs {
|
|||||||
int (*sw_init)(struct hl_device *hdev);
|
int (*sw_init)(struct hl_device *hdev);
|
||||||
int (*sw_fini)(struct hl_device *hdev);
|
int (*sw_fini)(struct hl_device *hdev);
|
||||||
int (*hw_init)(struct hl_device *hdev);
|
int (*hw_init)(struct hl_device *hdev);
|
||||||
void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
|
void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
|
||||||
void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
|
void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
|
||||||
int (*suspend)(struct hl_device *hdev);
|
int (*suspend)(struct hl_device *hdev);
|
||||||
int (*resume)(struct hl_device *hdev);
|
int (*resume)(struct hl_device *hdev);
|
||||||
int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
|
int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||||
void *cpu_addr, dma_addr_t dma_addr, size_t size);
|
void *cpu_addr, dma_addr_t dma_addr, size_t size);
|
||||||
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
|
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
|
||||||
void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
|
void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
|
||||||
@@ -1210,10 +1275,11 @@ struct hl_asic_funcs {
|
|||||||
void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
|
void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
|
||||||
void (*set_dma_mask_from_fw)(struct hl_device *hdev);
|
void (*set_dma_mask_from_fw)(struct hl_device *hdev);
|
||||||
u64 (*get_device_time)(struct hl_device *hdev);
|
u64 (*get_device_time)(struct hl_device *hdev);
|
||||||
void (*collective_wait_init_cs)(struct hl_cs *cs);
|
int (*collective_wait_init_cs)(struct hl_cs *cs);
|
||||||
int (*collective_wait_create_jobs)(struct hl_device *hdev,
|
int (*collective_wait_create_jobs)(struct hl_device *hdev,
|
||||||
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
|
struct hl_ctx *ctx, struct hl_cs *cs,
|
||||||
u32 collective_engine_id);
|
u32 wait_queue_id, u32 collective_engine_id,
|
||||||
|
u32 encaps_signal_offset);
|
||||||
u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
|
u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
|
||||||
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
|
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
|
||||||
void (*ack_protection_bits_errors)(struct hl_device *hdev);
|
void (*ack_protection_bits_errors)(struct hl_device *hdev);
|
||||||
@@ -1226,6 +1292,10 @@ struct hl_asic_funcs {
|
|||||||
int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
|
int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
|
||||||
void (*init_firmware_loader)(struct hl_device *hdev);
|
void (*init_firmware_loader)(struct hl_device *hdev);
|
||||||
void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
|
void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
|
||||||
|
void (*state_dump_init)(struct hl_device *hdev);
|
||||||
|
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
|
||||||
|
void (*set_pci_memory_regions)(struct hl_device *hdev);
|
||||||
|
u32* (*get_stream_master_qid_arr)(void);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -1282,20 +1352,6 @@ struct hl_cs_counters_atomic {
|
|||||||
atomic64_t validation_drop_cnt;
|
atomic64_t validation_drop_cnt;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* struct hl_pending_cb - pending command buffer structure
|
|
||||||
* @cb_node: cb node in pending cb list
|
|
||||||
* @cb: command buffer to send in next submission
|
|
||||||
* @cb_size: command buffer size
|
|
||||||
* @hw_queue_id: destination queue id
|
|
||||||
*/
|
|
||||||
struct hl_pending_cb {
|
|
||||||
struct list_head cb_node;
|
|
||||||
struct hl_cb *cb;
|
|
||||||
u32 cb_size;
|
|
||||||
u32 hw_queue_id;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct hl_ctx - user/kernel context.
|
* struct hl_ctx - user/kernel context.
|
||||||
* @mem_hash: holds mapping from virtual address to virtual memory area
|
* @mem_hash: holds mapping from virtual address to virtual memory area
|
||||||
@@ -1312,28 +1368,21 @@ struct hl_pending_cb {
|
|||||||
* MMU hash or walking the PGT requires talking this lock.
|
* MMU hash or walking the PGT requires talking this lock.
|
||||||
* @hw_block_list_lock: protects the HW block memory list.
|
* @hw_block_list_lock: protects the HW block memory list.
|
||||||
* @debugfs_list: node in debugfs list of contexts.
|
* @debugfs_list: node in debugfs list of contexts.
|
||||||
* pending_cb_list: list of pending command buffers waiting to be sent upon
|
|
||||||
* next user command submission context.
|
|
||||||
* @hw_block_mem_list: list of HW block virtual mapped addresses.
|
* @hw_block_mem_list: list of HW block virtual mapped addresses.
|
||||||
* @cs_counters: context command submission counters.
|
* @cs_counters: context command submission counters.
|
||||||
* @cb_va_pool: device VA pool for command buffers which are mapped to the
|
* @cb_va_pool: device VA pool for command buffers which are mapped to the
|
||||||
* device's MMU.
|
* device's MMU.
|
||||||
|
* @sig_mgr: encaps signals handle manager.
|
||||||
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
|
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
|
||||||
* to user so user could inquire about CS. It is used as
|
* to user so user could inquire about CS. It is used as
|
||||||
* index to cs_pending array.
|
* index to cs_pending array.
|
||||||
* @dram_default_hops: array that holds all hops addresses needed for default
|
* @dram_default_hops: array that holds all hops addresses needed for default
|
||||||
* DRAM mapping.
|
* DRAM mapping.
|
||||||
* @pending_cb_lock: spinlock to protect pending cb list
|
|
||||||
* @cs_lock: spinlock to protect cs_sequence.
|
* @cs_lock: spinlock to protect cs_sequence.
|
||||||
* @dram_phys_mem: amount of used physical DRAM memory by this context.
|
* @dram_phys_mem: amount of used physical DRAM memory by this context.
|
||||||
* @thread_ctx_switch_token: token to prevent multiple threads of the same
|
* @thread_ctx_switch_token: token to prevent multiple threads of the same
|
||||||
* context from running the context switch phase.
|
* context from running the context switch phase.
|
||||||
* Only a single thread should run it.
|
* Only a single thread should run it.
|
||||||
* @thread_pending_cb_token: token to prevent multiple threads from processing
|
|
||||||
* the pending CB list. Only a single thread should
|
|
||||||
* process the list since it is protected by a
|
|
||||||
* spinlock and we don't want to halt the entire
|
|
||||||
* command submission sequence.
|
|
||||||
* @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
|
* @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
|
||||||
* the context switch phase from moving to their
|
* the context switch phase from moving to their
|
||||||
* execution phase before the context switch phase
|
* execution phase before the context switch phase
|
||||||
@@ -1353,17 +1402,15 @@ struct hl_ctx {
|
|||||||
struct mutex mmu_lock;
|
struct mutex mmu_lock;
|
||||||
struct mutex hw_block_list_lock;
|
struct mutex hw_block_list_lock;
|
||||||
struct list_head debugfs_list;
|
struct list_head debugfs_list;
|
||||||
struct list_head pending_cb_list;
|
|
||||||
struct list_head hw_block_mem_list;
|
struct list_head hw_block_mem_list;
|
||||||
struct hl_cs_counters_atomic cs_counters;
|
struct hl_cs_counters_atomic cs_counters;
|
||||||
struct gen_pool *cb_va_pool;
|
struct gen_pool *cb_va_pool;
|
||||||
|
struct hl_encaps_signals_mgr sig_mgr;
|
||||||
u64 cs_sequence;
|
u64 cs_sequence;
|
||||||
u64 *dram_default_hops;
|
u64 *dram_default_hops;
|
||||||
spinlock_t pending_cb_lock;
|
|
||||||
spinlock_t cs_lock;
|
spinlock_t cs_lock;
|
||||||
atomic64_t dram_phys_mem;
|
atomic64_t dram_phys_mem;
|
||||||
atomic_t thread_ctx_switch_token;
|
atomic_t thread_ctx_switch_token;
|
||||||
atomic_t thread_pending_cb_token;
|
|
||||||
u32 thread_ctx_switch_wait_token;
|
u32 thread_ctx_switch_wait_token;
|
||||||
u32 asid;
|
u32 asid;
|
||||||
u32 handle;
|
u32 handle;
|
||||||
@@ -1394,20 +1441,22 @@ struct hl_ctx_mgr {
|
|||||||
* @sgt: pointer to the scatter-gather table that holds the pages.
|
* @sgt: pointer to the scatter-gather table that holds the pages.
|
||||||
* @dir: for DMA unmapping, the direction must be supplied, so save it.
|
* @dir: for DMA unmapping, the direction must be supplied, so save it.
|
||||||
* @debugfs_list: node in debugfs list of command submissions.
|
* @debugfs_list: node in debugfs list of command submissions.
|
||||||
|
* @pid: the pid of the user process owning the memory
|
||||||
* @addr: user-space virtual address of the start of the memory area.
|
* @addr: user-space virtual address of the start of the memory area.
|
||||||
* @size: size of the memory area to pin & map.
|
* @size: size of the memory area to pin & map.
|
||||||
* @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
|
* @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
|
||||||
*/
|
*/
|
||||||
struct hl_userptr {
|
struct hl_userptr {
|
||||||
enum vm_type_t vm_type; /* must be first */
|
enum vm_type vm_type; /* must be first */
|
||||||
struct list_head job_node;
|
struct list_head job_node;
|
||||||
struct page **pages;
|
struct page **pages;
|
||||||
unsigned int npages;
|
unsigned int npages;
|
||||||
struct sg_table *sgt;
|
struct sg_table *sgt;
|
||||||
enum dma_data_direction dir;
|
enum dma_data_direction dir;
|
||||||
struct list_head debugfs_list;
|
struct list_head debugfs_list;
|
||||||
|
pid_t pid;
|
||||||
u64 addr;
|
u64 addr;
|
||||||
u32 size;
|
u64 size;
|
||||||
u8 dma_mapped;
|
u8 dma_mapped;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1426,12 +1475,14 @@ struct hl_userptr {
|
|||||||
* @mirror_node : node in device mirror list of command submissions.
|
* @mirror_node : node in device mirror list of command submissions.
|
||||||
* @staged_cs_node: node in the staged cs list.
|
* @staged_cs_node: node in the staged cs list.
|
||||||
* @debugfs_list: node in debugfs list of command submissions.
|
* @debugfs_list: node in debugfs list of command submissions.
|
||||||
|
* @encaps_sig_hdl: holds the encaps signals handle.
|
||||||
* @sequence: the sequence number of this CS.
|
* @sequence: the sequence number of this CS.
|
||||||
* @staged_sequence: the sequence of the staged submission this CS is part of,
|
* @staged_sequence: the sequence of the staged submission this CS is part of,
|
||||||
* relevant only if staged_cs is set.
|
* relevant only if staged_cs is set.
|
||||||
* @timeout_jiffies: cs timeout in jiffies.
|
* @timeout_jiffies: cs timeout in jiffies.
|
||||||
* @submission_time_jiffies: submission time of the cs
|
* @submission_time_jiffies: submission time of the cs
|
||||||
* @type: CS_TYPE_*.
|
* @type: CS_TYPE_*.
|
||||||
|
* @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
|
||||||
* @submitted: true if CS was submitted to H/W.
|
* @submitted: true if CS was submitted to H/W.
|
||||||
* @completed: true if CS was completed by device.
|
* @completed: true if CS was completed by device.
|
||||||
* @timedout : true if CS was timedout.
|
* @timedout : true if CS was timedout.
|
||||||
@@ -1445,6 +1496,7 @@ struct hl_userptr {
|
|||||||
* @staged_cs: true if this CS is part of a staged submission.
|
* @staged_cs: true if this CS is part of a staged submission.
|
||||||
* @skip_reset_on_timeout: true if we shall not reset the device in case
|
* @skip_reset_on_timeout: true if we shall not reset the device in case
|
||||||
* timeout occurs (debug scenario).
|
* timeout occurs (debug scenario).
|
||||||
|
* @encaps_signals: true if this CS has encaps reserved signals.
|
||||||
*/
|
*/
|
||||||
struct hl_cs {
|
struct hl_cs {
|
||||||
u16 *jobs_in_queue_cnt;
|
u16 *jobs_in_queue_cnt;
|
||||||
@@ -1459,11 +1511,13 @@ struct hl_cs {
|
|||||||
struct list_head mirror_node;
|
struct list_head mirror_node;
|
||||||
struct list_head staged_cs_node;
|
struct list_head staged_cs_node;
|
||||||
struct list_head debugfs_list;
|
struct list_head debugfs_list;
|
||||||
|
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
|
||||||
u64 sequence;
|
u64 sequence;
|
||||||
u64 staged_sequence;
|
u64 staged_sequence;
|
||||||
u64 timeout_jiffies;
|
u64 timeout_jiffies;
|
||||||
u64 submission_time_jiffies;
|
u64 submission_time_jiffies;
|
||||||
enum hl_cs_type type;
|
enum hl_cs_type type;
|
||||||
|
u32 encaps_sig_hdl_id;
|
||||||
u8 submitted;
|
u8 submitted;
|
||||||
u8 completed;
|
u8 completed;
|
||||||
u8 timedout;
|
u8 timedout;
|
||||||
@@ -1474,6 +1528,7 @@ struct hl_cs {
|
|||||||
u8 staged_first;
|
u8 staged_first;
|
||||||
u8 staged_cs;
|
u8 staged_cs;
|
||||||
u8 skip_reset_on_timeout;
|
u8 skip_reset_on_timeout;
|
||||||
|
u8 encaps_signals;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1493,6 +1548,8 @@ struct hl_cs {
|
|||||||
* @hw_queue_id: the id of the H/W queue this job is submitted to.
|
* @hw_queue_id: the id of the H/W queue this job is submitted to.
|
||||||
* @user_cb_size: the actual size of the CB we got from the user.
|
* @user_cb_size: the actual size of the CB we got from the user.
|
||||||
* @job_cb_size: the actual size of the CB that we put on the queue.
|
* @job_cb_size: the actual size of the CB that we put on the queue.
|
||||||
|
* @encaps_sig_wait_offset: encapsulated signals offset, which allow user
|
||||||
|
* to wait on part of the reserved signals.
|
||||||
* @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
|
* @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
|
||||||
* handle to a kernel-allocated CB object, false
|
* handle to a kernel-allocated CB object, false
|
||||||
* otherwise (SRAM/DRAM/host address).
|
* otherwise (SRAM/DRAM/host address).
|
||||||
@@ -1517,6 +1574,7 @@ struct hl_cs_job {
|
|||||||
u32 hw_queue_id;
|
u32 hw_queue_id;
|
||||||
u32 user_cb_size;
|
u32 user_cb_size;
|
||||||
u32 job_cb_size;
|
u32 job_cb_size;
|
||||||
|
u32 encaps_sig_wait_offset;
|
||||||
u8 is_kernel_allocated_cb;
|
u8 is_kernel_allocated_cb;
|
||||||
u8 contains_dma_pkt;
|
u8 contains_dma_pkt;
|
||||||
};
|
};
|
||||||
@@ -1613,7 +1671,7 @@ struct hl_vm_hw_block_list_node {
|
|||||||
* @created_from_userptr: is product of host virtual address.
|
* @created_from_userptr: is product of host virtual address.
|
||||||
*/
|
*/
|
||||||
struct hl_vm_phys_pg_pack {
|
struct hl_vm_phys_pg_pack {
|
||||||
enum vm_type_t vm_type; /* must be first */
|
enum vm_type vm_type; /* must be first */
|
||||||
u64 *pages;
|
u64 *pages;
|
||||||
u64 npages;
|
u64 npages;
|
||||||
u64 total_size;
|
u64 total_size;
|
||||||
@@ -1759,9 +1817,13 @@ struct hl_debugfs_entry {
|
|||||||
* @ctx_mem_hash_list: list of available contexts with MMU mappings.
|
* @ctx_mem_hash_list: list of available contexts with MMU mappings.
|
||||||
* @ctx_mem_hash_spinlock: protects cb_list.
|
* @ctx_mem_hash_spinlock: protects cb_list.
|
||||||
* @blob_desc: descriptor of blob
|
* @blob_desc: descriptor of blob
|
||||||
|
* @state_dump: data of the system states in case of a bad cs.
|
||||||
|
* @state_dump_sem: protects state_dump.
|
||||||
* @addr: next address to read/write from/to in read/write32.
|
* @addr: next address to read/write from/to in read/write32.
|
||||||
* @mmu_addr: next virtual address to translate to physical address in mmu_show.
|
* @mmu_addr: next virtual address to translate to physical address in mmu_show.
|
||||||
|
* @userptr_lookup: the target user ptr to look up for on demand.
|
||||||
* @mmu_asid: ASID to use while translating in mmu_show.
|
* @mmu_asid: ASID to use while translating in mmu_show.
|
||||||
|
* @state_dump_head: index of the latest state dump
|
||||||
* @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
|
* @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
|
||||||
* @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
|
* @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
|
||||||
* @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
|
* @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
|
||||||
@@ -1783,14 +1845,149 @@ struct hl_dbg_device_entry {
|
|||||||
struct list_head ctx_mem_hash_list;
|
struct list_head ctx_mem_hash_list;
|
||||||
spinlock_t ctx_mem_hash_spinlock;
|
spinlock_t ctx_mem_hash_spinlock;
|
||||||
struct debugfs_blob_wrapper blob_desc;
|
struct debugfs_blob_wrapper blob_desc;
|
||||||
|
char *state_dump[HL_STATE_DUMP_HIST_LEN];
|
||||||
|
struct rw_semaphore state_dump_sem;
|
||||||
u64 addr;
|
u64 addr;
|
||||||
u64 mmu_addr;
|
u64 mmu_addr;
|
||||||
|
u64 userptr_lookup;
|
||||||
u32 mmu_asid;
|
u32 mmu_asid;
|
||||||
|
u32 state_dump_head;
|
||||||
u8 i2c_bus;
|
u8 i2c_bus;
|
||||||
u8 i2c_addr;
|
u8 i2c_addr;
|
||||||
u8 i2c_reg;
|
u8 i2c_reg;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_hw_obj_name_entry - single hw object name, member of
|
||||||
|
* hl_state_dump_specs
|
||||||
|
* @node: link to the containing hash table
|
||||||
|
* @name: hw object name
|
||||||
|
* @id: object identifier
|
||||||
|
*/
|
||||||
|
struct hl_hw_obj_name_entry {
|
||||||
|
struct hlist_node node;
|
||||||
|
const char *name;
|
||||||
|
u32 id;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum hl_state_dump_specs_props {
|
||||||
|
SP_SYNC_OBJ_BASE_ADDR,
|
||||||
|
SP_NEXT_SYNC_OBJ_ADDR,
|
||||||
|
SP_SYNC_OBJ_AMOUNT,
|
||||||
|
SP_MON_OBJ_WR_ADDR_LOW,
|
||||||
|
SP_MON_OBJ_WR_ADDR_HIGH,
|
||||||
|
SP_MON_OBJ_WR_DATA,
|
||||||
|
SP_MON_OBJ_ARM_DATA,
|
||||||
|
SP_MON_OBJ_STATUS,
|
||||||
|
SP_MONITORS_AMOUNT,
|
||||||
|
SP_TPC0_CMDQ,
|
||||||
|
SP_TPC0_CFG_SO,
|
||||||
|
SP_NEXT_TPC,
|
||||||
|
SP_MME_CMDQ,
|
||||||
|
SP_MME_CFG_SO,
|
||||||
|
SP_NEXT_MME,
|
||||||
|
SP_DMA_CMDQ,
|
||||||
|
SP_DMA_CFG_SO,
|
||||||
|
SP_DMA_QUEUES_OFFSET,
|
||||||
|
SP_NUM_OF_MME_ENGINES,
|
||||||
|
SP_SUB_MME_ENG_NUM,
|
||||||
|
SP_NUM_OF_DMA_ENGINES,
|
||||||
|
SP_NUM_OF_TPC_ENGINES,
|
||||||
|
SP_ENGINE_NUM_OF_QUEUES,
|
||||||
|
SP_ENGINE_NUM_OF_STREAMS,
|
||||||
|
SP_ENGINE_NUM_OF_FENCES,
|
||||||
|
SP_FENCE0_CNT_OFFSET,
|
||||||
|
SP_FENCE0_RDATA_OFFSET,
|
||||||
|
SP_CP_STS_OFFSET,
|
||||||
|
SP_NUM_CORES,
|
||||||
|
|
||||||
|
SP_MAX
|
||||||
|
};
|
||||||
|
|
||||||
|
enum hl_sync_engine_type {
|
||||||
|
ENGINE_TPC,
|
||||||
|
ENGINE_DMA,
|
||||||
|
ENGINE_MME,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_mon_state_dump - represents a state dump of a single monitor
|
||||||
|
* @id: monitor id
|
||||||
|
* @wr_addr_low: address monitor will write to, low bits
|
||||||
|
* @wr_addr_high: address monitor will write to, high bits
|
||||||
|
* @wr_data: data monitor will write
|
||||||
|
* @arm_data: register value containing monitor configuration
|
||||||
|
* @status: monitor status
|
||||||
|
*/
|
||||||
|
struct hl_mon_state_dump {
|
||||||
|
u32 id;
|
||||||
|
u32 wr_addr_low;
|
||||||
|
u32 wr_addr_high;
|
||||||
|
u32 wr_data;
|
||||||
|
u32 arm_data;
|
||||||
|
u32 status;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_sync_to_engine_map_entry - sync object id to engine mapping entry
|
||||||
|
* @engine_type: type of the engine
|
||||||
|
* @engine_id: id of the engine
|
||||||
|
* @sync_id: id of the sync object
|
||||||
|
*/
|
||||||
|
struct hl_sync_to_engine_map_entry {
|
||||||
|
struct hlist_node node;
|
||||||
|
enum hl_sync_engine_type engine_type;
|
||||||
|
u32 engine_id;
|
||||||
|
u32 sync_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_sync_to_engine_map - maps sync object id to associated engine id
|
||||||
|
* @tb: hash table containing the mapping, each element is of type
|
||||||
|
* struct hl_sync_to_engine_map_entry
|
||||||
|
*/
|
||||||
|
struct hl_sync_to_engine_map {
|
||||||
|
DECLARE_HASHTABLE(tb, SYNC_TO_ENGINE_HASH_TABLE_BITS);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_state_dump_specs_funcs - virtual functions used by the state dump
|
||||||
|
* @gen_sync_to_engine_map: generate a hash map from sync obj id to its engine
|
||||||
|
* @print_single_monitor: format monitor data as string
|
||||||
|
* @monitor_valid: return true if given monitor dump is valid
|
||||||
|
* @print_fences_single_engine: format fences data as string
|
||||||
|
*/
|
||||||
|
struct hl_state_dump_specs_funcs {
|
||||||
|
int (*gen_sync_to_engine_map)(struct hl_device *hdev,
|
||||||
|
struct hl_sync_to_engine_map *map);
|
||||||
|
int (*print_single_monitor)(char **buf, size_t *size, size_t *offset,
|
||||||
|
struct hl_device *hdev,
|
||||||
|
struct hl_mon_state_dump *mon);
|
||||||
|
int (*monitor_valid)(struct hl_mon_state_dump *mon);
|
||||||
|
int (*print_fences_single_engine)(struct hl_device *hdev,
|
||||||
|
u64 base_offset,
|
||||||
|
u64 status_base_offset,
|
||||||
|
enum hl_sync_engine_type engine_type,
|
||||||
|
u32 engine_id, char **buf,
|
||||||
|
size_t *size, size_t *offset);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_state_dump_specs - defines ASIC known hw objects names
|
||||||
|
* @so_id_to_str_tb: sync objects names index table
|
||||||
|
* @monitor_id_to_str_tb: monitors names index table
|
||||||
|
* @funcs: virtual functions used for state dump
|
||||||
|
* @sync_namager_names: readable names for sync manager if available (ex: N_E)
|
||||||
|
* @props: pointer to a per asic const props array required for state dump
|
||||||
|
*/
|
||||||
|
struct hl_state_dump_specs {
|
||||||
|
DECLARE_HASHTABLE(so_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
|
||||||
|
DECLARE_HASHTABLE(monitor_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
|
||||||
|
struct hl_state_dump_specs_funcs funcs;
|
||||||
|
const char * const *sync_namager_names;
|
||||||
|
s64 *props;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DEVICES
|
* DEVICES
|
||||||
@@ -1798,7 +1995,7 @@ struct hl_dbg_device_entry {
|
|||||||
|
|
||||||
#define HL_STR_MAX 32
|
#define HL_STR_MAX 32
|
||||||
|
|
||||||
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
|
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
|
||||||
|
|
||||||
/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
|
/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
|
||||||
* x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
|
* x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
|
||||||
@@ -1946,11 +2143,13 @@ struct hwmon_chip_info;
|
|||||||
* @wq: work queue for device reset procedure.
|
* @wq: work queue for device reset procedure.
|
||||||
* @reset_work: reset work to be done.
|
* @reset_work: reset work to be done.
|
||||||
* @hdev: habanalabs device structure.
|
* @hdev: habanalabs device structure.
|
||||||
|
* @fw_reset: whether f/w will do the reset without us sending them a message to do it.
|
||||||
*/
|
*/
|
||||||
struct hl_device_reset_work {
|
struct hl_device_reset_work {
|
||||||
struct workqueue_struct *wq;
|
struct workqueue_struct *wq;
|
||||||
struct delayed_work reset_work;
|
struct delayed_work reset_work;
|
||||||
struct hl_device *hdev;
|
struct hl_device *hdev;
|
||||||
|
bool fw_reset;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -2064,6 +2263,58 @@ struct hl_mmu_funcs {
|
|||||||
u64 virt_addr, struct hl_mmu_hop_info *hops);
|
u64 virt_addr, struct hl_mmu_hop_info *hops);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* number of user contexts allowed to call wait_for_multi_cs ioctl in
|
||||||
|
* parallel
|
||||||
|
*/
|
||||||
|
#define MULTI_CS_MAX_USER_CTX 2
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct multi_cs_completion - multi CS wait completion.
|
||||||
|
* @completion: completion of any of the CS in the list
|
||||||
|
* @lock: spinlock for the completion structure
|
||||||
|
* @timestamp: timestamp for the multi-CS completion
|
||||||
|
* @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
|
||||||
|
* is waiting
|
||||||
|
* @used: 1 if in use, otherwise 0
|
||||||
|
*/
|
||||||
|
struct multi_cs_completion {
|
||||||
|
struct completion completion;
|
||||||
|
spinlock_t lock;
|
||||||
|
s64 timestamp;
|
||||||
|
u32 stream_master_qid_map;
|
||||||
|
u8 used;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct multi_cs_data - internal data for multi CS call
|
||||||
|
* @ctx: pointer to the context structure
|
||||||
|
* @fence_arr: array of fences of all CSs
|
||||||
|
* @seq_arr: array of CS sequence numbers
|
||||||
|
* @timeout_us: timeout in usec for waiting for CS to complete
|
||||||
|
* @timestamp: timestamp of first completed CS
|
||||||
|
* @wait_status: wait for CS status
|
||||||
|
* @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
|
||||||
|
* @stream_master_qid_map: bitmap of all stream master QIDs on which the
|
||||||
|
* multi-CS is waiting
|
||||||
|
* @arr_len: fence_arr and seq_arr array length
|
||||||
|
* @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
|
||||||
|
* @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
|
||||||
|
*/
|
||||||
|
struct multi_cs_data {
|
||||||
|
struct hl_ctx *ctx;
|
||||||
|
struct hl_fence **fence_arr;
|
||||||
|
u64 *seq_arr;
|
||||||
|
s64 timeout_us;
|
||||||
|
s64 timestamp;
|
||||||
|
long wait_status;
|
||||||
|
u32 completion_bitmap;
|
||||||
|
u32 stream_master_qid_map;
|
||||||
|
u8 arr_len;
|
||||||
|
u8 gone_cs;
|
||||||
|
u8 update_ts;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct hl_device - habanalabs device structure.
|
* struct hl_device - habanalabs device structure.
|
||||||
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
||||||
@@ -2129,6 +2380,8 @@ struct hl_mmu_funcs {
|
|||||||
* @mmu_func: device-related MMU functions.
|
* @mmu_func: device-related MMU functions.
|
||||||
* @fw_loader: FW loader manager.
|
* @fw_loader: FW loader manager.
|
||||||
* @pci_mem_region: array of memory regions in the PCI
|
* @pci_mem_region: array of memory regions in the PCI
|
||||||
|
* @state_dump_specs: constants and dictionaries needed to dump system state.
|
||||||
|
* @multi_cs_completion: array of multi-CS completion.
|
||||||
* @dram_used_mem: current DRAM memory consumption.
|
* @dram_used_mem: current DRAM memory consumption.
|
||||||
* @timeout_jiffies: device CS timeout value.
|
* @timeout_jiffies: device CS timeout value.
|
||||||
* @max_power: the max power of the device, as configured by the sysadmin. This
|
* @max_power: the max power of the device, as configured by the sysadmin. This
|
||||||
@@ -2205,6 +2458,7 @@ struct hl_mmu_funcs {
|
|||||||
* halted. We can't halt it again because the COMMS
|
* halted. We can't halt it again because the COMMS
|
||||||
* protocol will throw an error. Relevant only for
|
* protocol will throw an error. Relevant only for
|
||||||
* cases where Linux was not loaded to device CPU
|
* cases where Linux was not loaded to device CPU
|
||||||
|
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
|
||||||
*/
|
*/
|
||||||
struct hl_device {
|
struct hl_device {
|
||||||
struct pci_dev *pdev;
|
struct pci_dev *pdev;
|
||||||
@@ -2273,6 +2527,11 @@ struct hl_device {
|
|||||||
|
|
||||||
struct pci_mem_region pci_mem_region[PCI_REGION_NUMBER];
|
struct pci_mem_region pci_mem_region[PCI_REGION_NUMBER];
|
||||||
|
|
||||||
|
struct hl_state_dump_specs state_dump_specs;
|
||||||
|
|
||||||
|
struct multi_cs_completion multi_cs_completion[
|
||||||
|
MULTI_CS_MAX_USER_CTX];
|
||||||
|
u32 *stream_master_qid_arr;
|
||||||
atomic64_t dram_used_mem;
|
atomic64_t dram_used_mem;
|
||||||
u64 timeout_jiffies;
|
u64 timeout_jiffies;
|
||||||
u64 max_power;
|
u64 max_power;
|
||||||
@@ -2322,6 +2581,8 @@ struct hl_device {
|
|||||||
u8 curr_reset_cause;
|
u8 curr_reset_cause;
|
||||||
u8 skip_reset_on_timeout;
|
u8 skip_reset_on_timeout;
|
||||||
u8 device_cpu_is_halted;
|
u8 device_cpu_is_halted;
|
||||||
|
u8 supports_wait_for_multi_cs;
|
||||||
|
u8 stream_master_qid_arr_size;
|
||||||
|
|
||||||
/* Parameters for bring-up */
|
/* Parameters for bring-up */
|
||||||
u64 nic_ports_mask;
|
u64 nic_ports_mask;
|
||||||
@@ -2343,6 +2604,29 @@ struct hl_device {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
|
||||||
|
* @refcount: refcount used to protect removing this id when several
|
||||||
|
* wait cs are used to wait of the reserved encaps signals.
|
||||||
|
* @hdev: pointer to habanalabs device structure.
|
||||||
|
* @hw_sob: pointer to H/W SOB used in the reservation.
|
||||||
|
* @cs_seq: staged cs sequence which contains encapsulated signals
|
||||||
|
* @id: idr handler id to be used to fetch the handler info
|
||||||
|
* @q_idx: stream queue index
|
||||||
|
* @pre_sob_val: current SOB value before reservation
|
||||||
|
* @count: signals number
|
||||||
|
*/
|
||||||
|
struct hl_cs_encaps_sig_handle {
|
||||||
|
struct kref refcount;
|
||||||
|
struct hl_device *hdev;
|
||||||
|
struct hl_hw_sob *hw_sob;
|
||||||
|
u64 cs_seq;
|
||||||
|
u32 id;
|
||||||
|
u32 q_idx;
|
||||||
|
u32 pre_sob_val;
|
||||||
|
u32 count;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IOCTLs
|
* IOCTLs
|
||||||
*/
|
*/
|
||||||
@@ -2372,6 +2656,23 @@ struct hl_ioctl_desc {
|
|||||||
* Kernel module functions that can be accessed by entire module
|
* Kernel module functions that can be accessed by entire module
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_get_sg_info() - get number of pages and the DMA address from SG list.
|
||||||
|
* @sg: the SG list.
|
||||||
|
* @dma_addr: pointer to DMA address to return.
|
||||||
|
*
|
||||||
|
* Calculate the number of consecutive pages described by the SG list. Take the
|
||||||
|
* offset of the address in the first page, add to it the length and round it up
|
||||||
|
* to the number of needed pages.
|
||||||
|
*/
|
||||||
|
static inline u32 hl_get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
|
||||||
|
{
|
||||||
|
*dma_addr = sg_dma_address(sg);
|
||||||
|
|
||||||
|
return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
|
||||||
|
(PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* hl_mem_area_inside_range() - Checks whether address+size are inside a range.
|
* hl_mem_area_inside_range() - Checks whether address+size are inside a range.
|
||||||
* @address: The start address of the area we want to validate.
|
* @address: The start address of the area we want to validate.
|
||||||
@@ -2436,7 +2737,9 @@ void destroy_hdev(struct hl_device *hdev);
|
|||||||
int hl_hw_queues_create(struct hl_device *hdev);
|
int hl_hw_queues_create(struct hl_device *hdev);
|
||||||
void hl_hw_queues_destroy(struct hl_device *hdev);
|
void hl_hw_queues_destroy(struct hl_device *hdev);
|
||||||
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
||||||
u32 cb_size, u64 cb_ptr);
|
u32 cb_size, u64 cb_ptr);
|
||||||
|
void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||||
|
u32 ctl, u32 len, u64 ptr);
|
||||||
int hl_hw_queue_schedule_cs(struct hl_cs *cs);
|
int hl_hw_queue_schedule_cs(struct hl_cs *cs);
|
||||||
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
|
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
|
||||||
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
|
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
|
||||||
@@ -2470,6 +2773,8 @@ void hl_ctx_do_release(struct kref *ref);
|
|||||||
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
|
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||||
int hl_ctx_put(struct hl_ctx *ctx);
|
int hl_ctx_put(struct hl_ctx *ctx);
|
||||||
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
|
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
|
||||||
|
int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
|
||||||
|
struct hl_fence **fence, u32 arr_len);
|
||||||
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
|
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
|
||||||
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
|
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
|
||||||
|
|
||||||
@@ -2511,18 +2816,19 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
|
|||||||
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
|
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
|
||||||
|
|
||||||
void hl_cs_rollback_all(struct hl_device *hdev);
|
void hl_cs_rollback_all(struct hl_device *hdev);
|
||||||
void hl_pending_cb_list_flush(struct hl_ctx *ctx);
|
|
||||||
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
|
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
|
||||||
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
|
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
|
||||||
void hl_sob_reset_error(struct kref *ref);
|
void hl_sob_reset_error(struct kref *ref);
|
||||||
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
|
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
|
||||||
void hl_fence_put(struct hl_fence *fence);
|
void hl_fence_put(struct hl_fence *fence);
|
||||||
|
void hl_fences_put(struct hl_fence **fence, int len);
|
||||||
void hl_fence_get(struct hl_fence *fence);
|
void hl_fence_get(struct hl_fence *fence);
|
||||||
void cs_get(struct hl_cs *cs);
|
void cs_get(struct hl_cs *cs);
|
||||||
bool cs_needs_completion(struct hl_cs *cs);
|
bool cs_needs_completion(struct hl_cs *cs);
|
||||||
bool cs_needs_timeout(struct hl_cs *cs);
|
bool cs_needs_timeout(struct hl_cs *cs);
|
||||||
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
|
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
|
||||||
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
|
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
|
||||||
|
void hl_multi_cs_completion_init(struct hl_device *hdev);
|
||||||
|
|
||||||
void goya_set_asic_funcs(struct hl_device *hdev);
|
void goya_set_asic_funcs(struct hl_device *hdev);
|
||||||
void gaudi_set_asic_funcs(struct hl_device *hdev);
|
void gaudi_set_asic_funcs(struct hl_device *hdev);
|
||||||
@@ -2650,9 +2956,25 @@ int hl_set_voltage(struct hl_device *hdev,
|
|||||||
int sensor_index, u32 attr, long value);
|
int sensor_index, u32 attr, long value);
|
||||||
int hl_set_current(struct hl_device *hdev,
|
int hl_set_current(struct hl_device *hdev,
|
||||||
int sensor_index, u32 attr, long value);
|
int sensor_index, u32 attr, long value);
|
||||||
|
void hw_sob_get(struct hl_hw_sob *hw_sob);
|
||||||
|
void hw_sob_put(struct hl_hw_sob *hw_sob);
|
||||||
|
void hl_encaps_handle_do_release(struct kref *ref);
|
||||||
|
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
|
||||||
|
struct hl_cs *cs, struct hl_cs_job *job,
|
||||||
|
struct hl_cs_compl *cs_cmpl);
|
||||||
void hl_release_pending_user_interrupts(struct hl_device *hdev);
|
void hl_release_pending_user_interrupts(struct hl_device *hdev);
|
||||||
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
|
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
|
||||||
struct hl_hw_sob **hw_sob, u32 count);
|
struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);
|
||||||
|
|
||||||
|
int hl_state_dump(struct hl_device *hdev);
|
||||||
|
const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id);
|
||||||
|
const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
|
||||||
|
struct hl_mon_state_dump *mon);
|
||||||
|
void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map);
|
||||||
|
__printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
|
||||||
|
const char *format, ...);
|
||||||
|
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
|
||||||
|
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
|
||||||
@@ -2673,6 +2995,8 @@ void hl_debugfs_remove_userptr(struct hl_device *hdev,
|
|||||||
struct hl_userptr *userptr);
|
struct hl_userptr *userptr);
|
||||||
void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
|
void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||||
void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
|
void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||||
|
void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
|
||||||
|
unsigned long length);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -2746,6 +3070,11 @@ static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
|
||||||
|
char *data, unsigned long length)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* IOCTLs */
|
/* IOCTLs */
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
|||||||
hl_cb_mgr_init(&hpriv->cb_mgr);
|
hl_cb_mgr_init(&hpriv->cb_mgr);
|
||||||
hl_ctx_mgr_init(&hpriv->ctx_mgr);
|
hl_ctx_mgr_init(&hpriv->ctx_mgr);
|
||||||
|
|
||||||
hpriv->taskpid = find_get_pid(current->pid);
|
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
|
||||||
|
|
||||||
mutex_lock(&hdev->fpriv_list_lock);
|
mutex_lock(&hdev->fpriv_list_lock);
|
||||||
|
|
||||||
@@ -194,7 +194,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
|||||||
|
|
||||||
out_err:
|
out_err:
|
||||||
mutex_unlock(&hdev->fpriv_list_lock);
|
mutex_unlock(&hdev->fpriv_list_lock);
|
||||||
|
|
||||||
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
|
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
|
||||||
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
|
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
|
||||||
filp->private_data = NULL;
|
filp->private_data = NULL;
|
||||||
@@ -318,12 +317,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
|||||||
hdev->asic_prop.fw_security_enabled = false;
|
hdev->asic_prop.fw_security_enabled = false;
|
||||||
|
|
||||||
/* Assign status description string */
|
/* Assign status description string */
|
||||||
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
|
strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL],
|
||||||
"disabled", HL_STR_MAX);
|
"operational", HL_STR_MAX);
|
||||||
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
|
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
|
||||||
"in reset", HL_STR_MAX);
|
"in reset", HL_STR_MAX);
|
||||||
|
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
|
||||||
|
"disabled", HL_STR_MAX);
|
||||||
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
|
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
|
||||||
"needs reset", HL_STR_MAX);
|
"needs reset", HL_STR_MAX);
|
||||||
|
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
|
||||||
|
"in device creation", HL_STR_MAX);
|
||||||
|
|
||||||
hdev->major = hl_major;
|
hdev->major = hl_major;
|
||||||
hdev->reset_on_lockup = reset_on_lockup;
|
hdev->reset_on_lockup = reset_on_lockup;
|
||||||
@@ -532,7 +535,7 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
|
|||||||
result = PCI_ERS_RESULT_NONE;
|
result = PCI_ERS_RESULT_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
hdev->asic_funcs->halt_engines(hdev, true);
|
hdev->asic_funcs->halt_engines(hdev, true, false);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -94,6 +94,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
|
|||||||
|
|
||||||
hw_ip.first_available_interrupt_id =
|
hw_ip.first_available_interrupt_id =
|
||||||
prop->first_available_user_msix_interrupt;
|
prop->first_available_user_msix_interrupt;
|
||||||
|
hw_ip.server_type = prop->server_type;
|
||||||
|
|
||||||
return copy_to_user(out, &hw_ip,
|
return copy_to_user(out, &hw_ip,
|
||||||
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
|
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ void hl_hw_queue_update_ci(struct hl_cs *cs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
|
* hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
|
||||||
* H/W queue.
|
* H/W queue.
|
||||||
* @hdev: pointer to habanalabs device structure
|
* @hdev: pointer to habanalabs device structure
|
||||||
* @q: pointer to habanalabs queue structure
|
* @q: pointer to habanalabs queue structure
|
||||||
@@ -80,8 +80,8 @@ void hl_hw_queue_update_ci(struct hl_cs *cs)
|
|||||||
* This function must be called when the scheduler mutex is taken
|
* This function must be called when the scheduler mutex is taken
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
|
void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||||
struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
|
u32 ctl, u32 len, u64 ptr)
|
||||||
{
|
{
|
||||||
struct hl_bd *bd;
|
struct hl_bd *bd;
|
||||||
|
|
||||||
@@ -222,8 +222,8 @@ static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
|
|||||||
* @cb_size: size of CB
|
* @cb_size: size of CB
|
||||||
* @cb_ptr: pointer to CB location
|
* @cb_ptr: pointer to CB location
|
||||||
*
|
*
|
||||||
* This function sends a single CB, that must NOT generate a completion entry
|
* This function sends a single CB, that must NOT generate a completion entry.
|
||||||
*
|
* Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()'
|
||||||
*/
|
*/
|
||||||
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
||||||
u32 cb_size, u64 cb_ptr)
|
u32 cb_size, u64 cb_ptr)
|
||||||
@@ -231,16 +231,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
|||||||
struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
|
struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
/*
|
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||||
* The CPU queue is a synchronous queue with an effective depth of
|
|
||||||
* a single entry (although it is allocated with room for multiple
|
|
||||||
* entries). Therefore, there is a different lock, called
|
|
||||||
* send_cpu_message_lock, that serializes accesses to the CPU queue.
|
|
||||||
* As a result, we don't need to lock the access to the entire H/W
|
|
||||||
* queues module when submitting a JOB to the CPU queue
|
|
||||||
*/
|
|
||||||
if (q->queue_type != QUEUE_TYPE_CPU)
|
|
||||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
|
||||||
|
|
||||||
if (hdev->disabled) {
|
if (hdev->disabled) {
|
||||||
rc = -EPERM;
|
rc = -EPERM;
|
||||||
@@ -258,11 +249,10 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
|
hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (q->queue_type != QUEUE_TYPE_CPU)
|
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@@ -328,7 +318,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
|
|||||||
cq->pi = hl_cq_inc_ptr(cq->pi);
|
cq->pi = hl_cq_inc_ptr(cq->pi);
|
||||||
|
|
||||||
submit_bd:
|
submit_bd:
|
||||||
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -407,7 +397,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
|
|||||||
else
|
else
|
||||||
ptr = (u64) (uintptr_t) job->user_cb;
|
ptr = (u64) (uintptr_t) job->user_cb;
|
||||||
|
|
||||||
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int init_signal_cs(struct hl_device *hdev,
|
static int init_signal_cs(struct hl_device *hdev,
|
||||||
@@ -426,8 +416,9 @@ static int init_signal_cs(struct hl_device *hdev,
|
|||||||
cs_cmpl->sob_val = prop->next_sob_val;
|
cs_cmpl->sob_val = prop->next_sob_val;
|
||||||
|
|
||||||
dev_dbg(hdev->dev,
|
dev_dbg(hdev->dev,
|
||||||
"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
|
"generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
|
||||||
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
|
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
|
||||||
|
cs_cmpl->cs_seq);
|
||||||
|
|
||||||
/* we set an EB since we must make sure all oeprations are done
|
/* we set an EB since we must make sure all oeprations are done
|
||||||
* when sending the signal
|
* when sending the signal
|
||||||
@@ -435,17 +426,37 @@ static int init_signal_cs(struct hl_device *hdev,
|
|||||||
hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
|
hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
|
||||||
cs_cmpl->hw_sob->sob_id, 0, true);
|
cs_cmpl->hw_sob->sob_id, 0, true);
|
||||||
|
|
||||||
rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1);
|
rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
|
||||||
|
false);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
|
||||||
|
struct hl_cs *cs, struct hl_cs_job *job,
|
||||||
|
struct hl_cs_compl *cs_cmpl)
|
||||||
|
{
|
||||||
|
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
|
||||||
|
|
||||||
|
cs_cmpl->hw_sob = handle->hw_sob;
|
||||||
|
|
||||||
|
/* Note that encaps_sig_wait_offset was validated earlier in the flow
|
||||||
|
* for offset value which exceeds the max reserved signal count.
|
||||||
|
* always decrement 1 of the offset since when the user
|
||||||
|
* set offset 1 for example he mean to wait only for the first
|
||||||
|
* signal only, which will be pre_sob_val, and if he set offset 2
|
||||||
|
* then the value required is (pre_sob_val + 1) and so on...
|
||||||
|
*/
|
||||||
|
cs_cmpl->sob_val = handle->pre_sob_val +
|
||||||
|
(job->encaps_sig_wait_offset - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
||||||
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
|
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
|
||||||
{
|
{
|
||||||
struct hl_cs_compl *signal_cs_cmpl;
|
|
||||||
struct hl_sync_stream_properties *prop;
|
|
||||||
struct hl_gen_wait_properties wait_prop;
|
struct hl_gen_wait_properties wait_prop;
|
||||||
|
struct hl_sync_stream_properties *prop;
|
||||||
|
struct hl_cs_compl *signal_cs_cmpl;
|
||||||
u32 q_idx;
|
u32 q_idx;
|
||||||
|
|
||||||
q_idx = job->hw_queue_id;
|
q_idx = job->hw_queue_id;
|
||||||
@@ -455,14 +466,51 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
|||||||
struct hl_cs_compl,
|
struct hl_cs_compl,
|
||||||
base_fence);
|
base_fence);
|
||||||
|
|
||||||
/* copy the SOB id and value of the signal CS */
|
if (cs->encaps_signals) {
|
||||||
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
|
/* use the encaps signal handle stored earlier in the flow
|
||||||
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
|
* and set the SOB information from the encaps
|
||||||
|
* signals handle
|
||||||
|
*/
|
||||||
|
hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
|
||||||
|
|
||||||
|
dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
|
||||||
|
cs->encaps_sig_hdl->q_idx,
|
||||||
|
cs->encaps_sig_hdl->cs_seq,
|
||||||
|
cs_cmpl->sob_val,
|
||||||
|
job->encaps_sig_wait_offset);
|
||||||
|
} else {
|
||||||
|
/* Copy the SOB id and value of the signal CS */
|
||||||
|
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
|
||||||
|
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check again if the signal cs already completed.
|
||||||
|
* if yes then don't send any wait cs since the hw_sob
|
||||||
|
* could be in reset already. if signal is not completed
|
||||||
|
* then get refcount to hw_sob to prevent resetting the sob
|
||||||
|
* while wait cs is not submitted.
|
||||||
|
* note that this check is protected by two locks,
|
||||||
|
* hw queue lock and completion object lock,
|
||||||
|
* and the same completion object lock also protects
|
||||||
|
* the hw_sob reset handler function.
|
||||||
|
* The hw_queue lock prevent out of sync of hw_sob
|
||||||
|
* refcount value, changed by signal/wait flows.
|
||||||
|
*/
|
||||||
|
spin_lock(&signal_cs_cmpl->lock);
|
||||||
|
|
||||||
|
if (completion_done(&cs->signal_fence->completion)) {
|
||||||
|
spin_unlock(&signal_cs_cmpl->lock);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
kref_get(&cs_cmpl->hw_sob->kref);
|
||||||
|
|
||||||
|
spin_unlock(&signal_cs_cmpl->lock);
|
||||||
|
|
||||||
dev_dbg(hdev->dev,
|
dev_dbg(hdev->dev,
|
||||||
"generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
|
"generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
|
||||||
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
|
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
|
||||||
prop->base_mon_id, q_idx);
|
prop->base_mon_id, q_idx, cs->sequence);
|
||||||
|
|
||||||
wait_prop.data = (void *) job->patched_cb;
|
wait_prop.data = (void *) job->patched_cb;
|
||||||
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
|
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
|
||||||
@@ -471,17 +519,14 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
|||||||
wait_prop.mon_id = prop->base_mon_id;
|
wait_prop.mon_id = prop->base_mon_id;
|
||||||
wait_prop.q_idx = q_idx;
|
wait_prop.q_idx = q_idx;
|
||||||
wait_prop.size = 0;
|
wait_prop.size = 0;
|
||||||
|
|
||||||
hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
|
hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
|
||||||
|
|
||||||
kref_get(&cs_cmpl->hw_sob->kref);
|
|
||||||
/*
|
|
||||||
* Must put the signal fence after the SOB refcnt increment so
|
|
||||||
* the SOB refcnt won't turn 0 and reset the SOB before the
|
|
||||||
* wait CS was submitted.
|
|
||||||
*/
|
|
||||||
mb();
|
mb();
|
||||||
hl_fence_put(cs->signal_fence);
|
hl_fence_put(cs->signal_fence);
|
||||||
cs->signal_fence = NULL;
|
cs->signal_fence = NULL;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -506,7 +551,60 @@ static int init_signal_wait_cs(struct hl_cs *cs)
|
|||||||
if (cs->type & CS_TYPE_SIGNAL)
|
if (cs->type & CS_TYPE_SIGNAL)
|
||||||
rc = init_signal_cs(hdev, job, cs_cmpl);
|
rc = init_signal_cs(hdev, job, cs_cmpl);
|
||||||
else if (cs->type & CS_TYPE_WAIT)
|
else if (cs->type & CS_TYPE_WAIT)
|
||||||
init_wait_cs(hdev, cs, job, cs_cmpl);
|
rc = init_wait_cs(hdev, cs, job, cs_cmpl);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int encaps_sig_first_staged_cs_handler
|
||||||
|
(struct hl_device *hdev, struct hl_cs *cs)
|
||||||
|
{
|
||||||
|
struct hl_cs_compl *cs_cmpl =
|
||||||
|
container_of(cs->fence,
|
||||||
|
struct hl_cs_compl, base_fence);
|
||||||
|
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
|
||||||
|
struct hl_encaps_signals_mgr *mgr;
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
mgr = &hdev->compute_ctx->sig_mgr;
|
||||||
|
|
||||||
|
spin_lock(&mgr->lock);
|
||||||
|
encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
|
||||||
|
if (encaps_sig_hdl) {
|
||||||
|
/*
|
||||||
|
* Set handler CS sequence,
|
||||||
|
* the CS which contains the encapsulated signals.
|
||||||
|
*/
|
||||||
|
encaps_sig_hdl->cs_seq = cs->sequence;
|
||||||
|
/* store the handle and set encaps signal indication,
|
||||||
|
* to be used later in cs_do_release to put the last
|
||||||
|
* reference to encaps signals handlers.
|
||||||
|
*/
|
||||||
|
cs_cmpl->encaps_signals = true;
|
||||||
|
cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
|
||||||
|
|
||||||
|
/* set hw_sob pointer in completion object
|
||||||
|
* since it's used in cs_do_release flow to put
|
||||||
|
* refcount to sob
|
||||||
|
*/
|
||||||
|
cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
|
||||||
|
cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
|
||||||
|
encaps_sig_hdl->count;
|
||||||
|
|
||||||
|
dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
|
||||||
|
cs->sequence, encaps_sig_hdl->id,
|
||||||
|
encaps_sig_hdl->count,
|
||||||
|
encaps_sig_hdl->q_idx,
|
||||||
|
cs_cmpl->hw_sob->sob_id,
|
||||||
|
cs_cmpl->sob_val);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
|
||||||
|
cs->encaps_sig_hdl_id);
|
||||||
|
rc = -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&mgr->lock);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@@ -581,14 +679,21 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||||||
|
|
||||||
if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
|
if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
|
||||||
rc = init_signal_wait_cs(cs);
|
rc = init_signal_wait_cs(cs);
|
||||||
if (rc) {
|
if (rc)
|
||||||
dev_err(hdev->dev, "Failed to submit signal cs\n");
|
|
||||||
goto unroll_cq_resv;
|
goto unroll_cq_resv;
|
||||||
}
|
} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
|
||||||
} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
|
rc = hdev->asic_funcs->collective_wait_init_cs(cs);
|
||||||
hdev->asic_funcs->collective_wait_init_cs(cs);
|
if (rc)
|
||||||
|
goto unroll_cq_resv;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (cs->encaps_signals && cs->staged_first) {
|
||||||
|
rc = encaps_sig_first_staged_cs_handler(hdev, cs);
|
||||||
|
if (rc)
|
||||||
|
goto unroll_cq_resv;
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock(&hdev->cs_mirror_lock);
|
spin_lock(&hdev->cs_mirror_lock);
|
||||||
|
|
||||||
/* Verify staged CS exists and add to the staged list */
|
/* Verify staged CS exists and add to the staged list */
|
||||||
@@ -613,6 +718,11 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
|
list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
|
||||||
|
|
||||||
|
/* update stream map of the first CS */
|
||||||
|
if (hdev->supports_wait_for_multi_cs)
|
||||||
|
staged_cs->fence->stream_master_qid_map |=
|
||||||
|
cs->fence->stream_master_qid_map;
|
||||||
}
|
}
|
||||||
|
|
||||||
list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
|
list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
|
||||||
@@ -834,6 +944,8 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
|
|||||||
hw_sob = &sync_stream_prop->hw_sob[sob];
|
hw_sob = &sync_stream_prop->hw_sob[sob];
|
||||||
hw_sob->hdev = hdev;
|
hw_sob->hdev = hdev;
|
||||||
hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
|
hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
|
||||||
|
hw_sob->sob_addr =
|
||||||
|
hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
|
||||||
hw_sob->q_idx = q_idx;
|
hw_sob->q_idx = q_idx;
|
||||||
kref_init(&hw_sob->kref);
|
kref_init(&hw_sob->kref);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
|
|
||||||
spin_lock(&vm->idr_lock);
|
spin_lock(&vm->idr_lock);
|
||||||
handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
|
handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
|
||||||
GFP_KERNEL);
|
GFP_ATOMIC);
|
||||||
spin_unlock(&vm->idr_lock);
|
spin_unlock(&vm->idr_lock);
|
||||||
|
|
||||||
if (handle < 0) {
|
if (handle < 0) {
|
||||||
@@ -528,6 +528,33 @@ static inline int add_va_block(struct hl_device *hdev,
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* is_hint_crossing_range() - check if hint address crossing specified reserved
|
||||||
|
* range.
|
||||||
|
*/
|
||||||
|
static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
|
||||||
|
u64 start_addr, u32 size, struct asic_fixed_properties *prop) {
|
||||||
|
bool range_cross;
|
||||||
|
|
||||||
|
if (range_type == HL_VA_RANGE_TYPE_DRAM)
|
||||||
|
range_cross =
|
||||||
|
hl_mem_area_crosses_range(start_addr, size,
|
||||||
|
prop->hints_dram_reserved_va_range.start_addr,
|
||||||
|
prop->hints_dram_reserved_va_range.end_addr);
|
||||||
|
else if (range_type == HL_VA_RANGE_TYPE_HOST)
|
||||||
|
range_cross =
|
||||||
|
hl_mem_area_crosses_range(start_addr, size,
|
||||||
|
prop->hints_host_reserved_va_range.start_addr,
|
||||||
|
prop->hints_host_reserved_va_range.end_addr);
|
||||||
|
else
|
||||||
|
range_cross =
|
||||||
|
hl_mem_area_crosses_range(start_addr, size,
|
||||||
|
prop->hints_host_hpage_reserved_va_range.start_addr,
|
||||||
|
prop->hints_host_hpage_reserved_va_range.end_addr);
|
||||||
|
|
||||||
|
return range_cross;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get_va_block() - get a virtual block for the given size and alignment.
|
* get_va_block() - get a virtual block for the given size and alignment.
|
||||||
*
|
*
|
||||||
@@ -536,6 +563,8 @@ static inline int add_va_block(struct hl_device *hdev,
|
|||||||
* @size: requested block size.
|
* @size: requested block size.
|
||||||
* @hint_addr: hint for requested address by the user.
|
* @hint_addr: hint for requested address by the user.
|
||||||
* @va_block_align: required alignment of the virtual block start address.
|
* @va_block_align: required alignment of the virtual block start address.
|
||||||
|
* @range_type: va range type (host, dram)
|
||||||
|
* @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT
|
||||||
*
|
*
|
||||||
* This function does the following:
|
* This function does the following:
|
||||||
* - Iterate on the virtual block list to find a suitable virtual block for the
|
* - Iterate on the virtual block list to find a suitable virtual block for the
|
||||||
@@ -545,13 +574,19 @@ static inline int add_va_block(struct hl_device *hdev,
|
|||||||
*/
|
*/
|
||||||
static u64 get_va_block(struct hl_device *hdev,
|
static u64 get_va_block(struct hl_device *hdev,
|
||||||
struct hl_va_range *va_range,
|
struct hl_va_range *va_range,
|
||||||
u64 size, u64 hint_addr, u32 va_block_align)
|
u64 size, u64 hint_addr, u32 va_block_align,
|
||||||
|
enum hl_va_range_type range_type,
|
||||||
|
u32 flags)
|
||||||
{
|
{
|
||||||
struct hl_vm_va_block *va_block, *new_va_block = NULL;
|
struct hl_vm_va_block *va_block, *new_va_block = NULL;
|
||||||
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
|
u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
|
||||||
align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
|
align_mask, reserved_valid_start = 0, reserved_valid_size = 0,
|
||||||
|
dram_hint_mask = prop->dram_hints_align_mask;
|
||||||
bool add_prev = false;
|
bool add_prev = false;
|
||||||
bool is_align_pow_2 = is_power_of_2(va_range->page_size);
|
bool is_align_pow_2 = is_power_of_2(va_range->page_size);
|
||||||
|
bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
|
||||||
|
bool force_hint = flags & HL_MEM_FORCE_HINT;
|
||||||
|
|
||||||
if (is_align_pow_2)
|
if (is_align_pow_2)
|
||||||
align_mask = ~((u64)va_block_align - 1);
|
align_mask = ~((u64)va_block_align - 1);
|
||||||
@@ -564,12 +599,20 @@ static u64 get_va_block(struct hl_device *hdev,
|
|||||||
size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
|
size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
|
||||||
va_range->page_size;
|
va_range->page_size;
|
||||||
|
|
||||||
tmp_hint_addr = hint_addr;
|
tmp_hint_addr = hint_addr & ~dram_hint_mask;
|
||||||
|
|
||||||
/* Check if we need to ignore hint address */
|
/* Check if we need to ignore hint address */
|
||||||
if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
|
if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
|
||||||
(!is_align_pow_2 &&
|
(!is_align_pow_2 && is_hint_dram_addr &&
|
||||||
do_div(tmp_hint_addr, va_range->page_size))) {
|
do_div(tmp_hint_addr, va_range->page_size))) {
|
||||||
|
|
||||||
|
if (force_hint) {
|
||||||
|
/* Hint must be respected, so here we just fail */
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Hint address 0x%llx is not page aligned - cannot be respected\n",
|
||||||
|
hint_addr);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
dev_dbg(hdev->dev,
|
dev_dbg(hdev->dev,
|
||||||
"Hint address 0x%llx will be ignored because it is not aligned\n",
|
"Hint address 0x%llx will be ignored because it is not aligned\n",
|
||||||
@@ -596,6 +639,16 @@ static u64 get_va_block(struct hl_device *hdev,
|
|||||||
if (valid_size < size)
|
if (valid_size < size)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In case hint address is 0, and arc_hints_range_reservation
|
||||||
|
* property enabled, then avoid allocating va blocks from the
|
||||||
|
* range reserved for hint addresses
|
||||||
|
*/
|
||||||
|
if (prop->hints_range_reservation && !hint_addr)
|
||||||
|
if (is_hint_crossing_range(range_type, valid_start,
|
||||||
|
size, prop))
|
||||||
|
continue;
|
||||||
|
|
||||||
/* Pick the minimal length block which has the required size */
|
/* Pick the minimal length block which has the required size */
|
||||||
if (!new_va_block || (valid_size < reserved_valid_size)) {
|
if (!new_va_block || (valid_size < reserved_valid_size)) {
|
||||||
new_va_block = va_block;
|
new_va_block = va_block;
|
||||||
@@ -618,6 +671,17 @@ static u64 get_va_block(struct hl_device *hdev,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (force_hint && reserved_valid_start != hint_addr) {
|
||||||
|
/* Hint address must be respected. If we are here - this means
|
||||||
|
* we could not respect it.
|
||||||
|
*/
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Hint address 0x%llx could not be respected\n",
|
||||||
|
hint_addr);
|
||||||
|
reserved_valid_start = 0;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if there is some leftover range due to reserving the new
|
* Check if there is some leftover range due to reserving the new
|
||||||
* va block, then return it to the main virtual addresses list.
|
* va block, then return it to the main virtual addresses list.
|
||||||
@@ -670,7 +734,8 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||||||
enum hl_va_range_type type, u32 size, u32 alignment)
|
enum hl_va_range_type type, u32 size, u32 alignment)
|
||||||
{
|
{
|
||||||
return get_va_block(hdev, ctx->va_range[type], size, 0,
|
return get_va_block(hdev, ctx->va_range[type], size, 0,
|
||||||
max(alignment, ctx->va_range[type]->page_size));
|
max(alignment, ctx->va_range[type]->page_size),
|
||||||
|
type, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -731,29 +796,16 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* get_sg_info() - get number of pages and the DMA address from SG list.
|
|
||||||
* @sg: the SG list.
|
|
||||||
* @dma_addr: pointer to DMA address to return.
|
|
||||||
*
|
|
||||||
* Calculate the number of consecutive pages described by the SG list. Take the
|
|
||||||
* offset of the address in the first page, add to it the length and round it up
|
|
||||||
* to the number of needed pages.
|
|
||||||
*/
|
|
||||||
static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
|
|
||||||
{
|
|
||||||
*dma_addr = sg_dma_address(sg);
|
|
||||||
|
|
||||||
return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
|
|
||||||
(PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* init_phys_pg_pack_from_userptr() - initialize physical page pack from host
|
* init_phys_pg_pack_from_userptr() - initialize physical page pack from host
|
||||||
* memory
|
* memory
|
||||||
* @ctx: pointer to the context structure.
|
* @ctx: pointer to the context structure.
|
||||||
* @userptr: userptr to initialize from.
|
* @userptr: userptr to initialize from.
|
||||||
* @pphys_pg_pack: result pointer.
|
* @pphys_pg_pack: result pointer.
|
||||||
|
* @force_regular_page: tell the function to ignore huge page optimization,
|
||||||
|
* even if possible. Needed for cases where the device VA
|
||||||
|
* is allocated before we know the composition of the
|
||||||
|
* physical pages
|
||||||
*
|
*
|
||||||
* This function does the following:
|
* This function does the following:
|
||||||
* - Pin the physical pages related to the given virtual block.
|
* - Pin the physical pages related to the given virtual block.
|
||||||
@@ -762,17 +814,18 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
|
|||||||
*/
|
*/
|
||||||
static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
||||||
struct hl_userptr *userptr,
|
struct hl_userptr *userptr,
|
||||||
struct hl_vm_phys_pg_pack **pphys_pg_pack)
|
struct hl_vm_phys_pg_pack **pphys_pg_pack,
|
||||||
|
bool force_regular_page)
|
||||||
{
|
{
|
||||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
|
||||||
struct scatterlist *sg;
|
|
||||||
dma_addr_t dma_addr;
|
|
||||||
u64 page_mask, total_npages;
|
|
||||||
u32 npages, page_size = PAGE_SIZE,
|
u32 npages, page_size = PAGE_SIZE,
|
||||||
huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
|
huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
|
||||||
bool first = true, is_huge_page_opt = true;
|
|
||||||
int rc, i, j;
|
|
||||||
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
|
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
|
||||||
|
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||||
|
bool first = true, is_huge_page_opt;
|
||||||
|
u64 page_mask, total_npages;
|
||||||
|
struct scatterlist *sg;
|
||||||
|
dma_addr_t dma_addr;
|
||||||
|
int rc, i, j;
|
||||||
|
|
||||||
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
|
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
|
||||||
if (!phys_pg_pack)
|
if (!phys_pg_pack)
|
||||||
@@ -783,6 +836,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
|||||||
phys_pg_pack->asid = ctx->asid;
|
phys_pg_pack->asid = ctx->asid;
|
||||||
atomic_set(&phys_pg_pack->mapping_cnt, 1);
|
atomic_set(&phys_pg_pack->mapping_cnt, 1);
|
||||||
|
|
||||||
|
is_huge_page_opt = (force_regular_page ? false : true);
|
||||||
|
|
||||||
/* Only if all dma_addrs are aligned to 2MB and their
|
/* Only if all dma_addrs are aligned to 2MB and their
|
||||||
* sizes is at least 2MB, we can use huge page mapping.
|
* sizes is at least 2MB, we can use huge page mapping.
|
||||||
* We limit the 2MB optimization to this condition,
|
* We limit the 2MB optimization to this condition,
|
||||||
@@ -791,7 +846,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
|||||||
*/
|
*/
|
||||||
total_npages = 0;
|
total_npages = 0;
|
||||||
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
|
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
|
||||||
npages = get_sg_info(sg, &dma_addr);
|
npages = hl_get_sg_info(sg, &dma_addr);
|
||||||
|
|
||||||
total_npages += npages;
|
total_npages += npages;
|
||||||
|
|
||||||
@@ -820,7 +875,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
|||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
|
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
|
||||||
npages = get_sg_info(sg, &dma_addr);
|
npages = hl_get_sg_info(sg, &dma_addr);
|
||||||
|
|
||||||
/* align down to physical page size and save the offset */
|
/* align down to physical page size and save the offset */
|
||||||
if (first) {
|
if (first) {
|
||||||
@@ -1001,11 +1056,12 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
struct hl_userptr *userptr = NULL;
|
struct hl_userptr *userptr = NULL;
|
||||||
struct hl_vm_hash_node *hnode;
|
struct hl_vm_hash_node *hnode;
|
||||||
struct hl_va_range *va_range;
|
struct hl_va_range *va_range;
|
||||||
enum vm_type_t *vm_type;
|
enum vm_type *vm_type;
|
||||||
u64 ret_vaddr, hint_addr;
|
u64 ret_vaddr, hint_addr;
|
||||||
u32 handle = 0, va_block_align;
|
u32 handle = 0, va_block_align;
|
||||||
int rc;
|
int rc;
|
||||||
bool is_userptr = args->flags & HL_MEM_USERPTR;
|
bool is_userptr = args->flags & HL_MEM_USERPTR;
|
||||||
|
enum hl_va_range_type va_range_type = 0;
|
||||||
|
|
||||||
/* Assume failure */
|
/* Assume failure */
|
||||||
*device_addr = 0;
|
*device_addr = 0;
|
||||||
@@ -1023,7 +1079,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
}
|
}
|
||||||
|
|
||||||
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
|
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
|
||||||
&phys_pg_pack);
|
&phys_pg_pack, false);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"unable to init page pack for vaddr 0x%llx\n",
|
"unable to init page pack for vaddr 0x%llx\n",
|
||||||
@@ -1031,14 +1087,14 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
goto init_page_pack_err;
|
goto init_page_pack_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
vm_type = (enum vm_type_t *) userptr;
|
vm_type = (enum vm_type *) userptr;
|
||||||
hint_addr = args->map_host.hint_addr;
|
hint_addr = args->map_host.hint_addr;
|
||||||
handle = phys_pg_pack->handle;
|
handle = phys_pg_pack->handle;
|
||||||
|
|
||||||
/* get required alignment */
|
/* get required alignment */
|
||||||
if (phys_pg_pack->page_size == page_size) {
|
if (phys_pg_pack->page_size == page_size) {
|
||||||
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
|
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
|
||||||
|
va_range_type = HL_VA_RANGE_TYPE_HOST;
|
||||||
/*
|
/*
|
||||||
* huge page alignment may be needed in case of regular
|
* huge page alignment may be needed in case of regular
|
||||||
* page mapping, depending on the host VA alignment
|
* page mapping, depending on the host VA alignment
|
||||||
@@ -1053,6 +1109,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
* mapping
|
* mapping
|
||||||
*/
|
*/
|
||||||
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
|
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
|
||||||
|
va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE;
|
||||||
va_block_align = huge_page_size;
|
va_block_align = huge_page_size;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -1072,12 +1129,13 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
|
|
||||||
spin_unlock(&vm->idr_lock);
|
spin_unlock(&vm->idr_lock);
|
||||||
|
|
||||||
vm_type = (enum vm_type_t *) phys_pg_pack;
|
vm_type = (enum vm_type *) phys_pg_pack;
|
||||||
|
|
||||||
hint_addr = args->map_device.hint_addr;
|
hint_addr = args->map_device.hint_addr;
|
||||||
|
|
||||||
/* DRAM VA alignment is the same as the MMU page size */
|
/* DRAM VA alignment is the same as the MMU page size */
|
||||||
va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
|
va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
|
||||||
|
va_range_type = HL_VA_RANGE_TYPE_DRAM;
|
||||||
va_block_align = hdev->asic_prop.dmmu.page_size;
|
va_block_align = hdev->asic_prop.dmmu.page_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1100,8 +1158,23 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
goto hnode_err;
|
goto hnode_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hint_addr && phys_pg_pack->offset) {
|
||||||
|
if (args->flags & HL_MEM_FORCE_HINT) {
|
||||||
|
/* Fail if hint must be respected but it can't be */
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n",
|
||||||
|
hint_addr, phys_pg_pack->offset);
|
||||||
|
rc = -EINVAL;
|
||||||
|
goto va_block_err;
|
||||||
|
}
|
||||||
|
dev_dbg(hdev->dev,
|
||||||
|
"Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n",
|
||||||
|
hint_addr, phys_pg_pack->offset);
|
||||||
|
}
|
||||||
|
|
||||||
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
|
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
|
||||||
hint_addr, va_block_align);
|
hint_addr, va_block_align,
|
||||||
|
va_range_type, args->flags);
|
||||||
if (!ret_vaddr) {
|
if (!ret_vaddr) {
|
||||||
dev_err(hdev->dev, "no available va block for handle %u\n",
|
dev_err(hdev->dev, "no available va block for handle %u\n",
|
||||||
handle);
|
handle);
|
||||||
@@ -1181,17 +1254,19 @@ init_page_pack_err:
|
|||||||
static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||||
bool ctx_free)
|
bool ctx_free)
|
||||||
{
|
{
|
||||||
struct hl_device *hdev = ctx->hdev;
|
|
||||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
|
||||||
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
|
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
|
||||||
|
u64 vaddr = args->unmap.device_virt_addr;
|
||||||
struct hl_vm_hash_node *hnode = NULL;
|
struct hl_vm_hash_node *hnode = NULL;
|
||||||
|
struct asic_fixed_properties *prop;
|
||||||
|
struct hl_device *hdev = ctx->hdev;
|
||||||
struct hl_userptr *userptr = NULL;
|
struct hl_userptr *userptr = NULL;
|
||||||
struct hl_va_range *va_range;
|
struct hl_va_range *va_range;
|
||||||
u64 vaddr = args->unmap.device_virt_addr;
|
enum vm_type *vm_type;
|
||||||
enum vm_type_t *vm_type;
|
|
||||||
bool is_userptr;
|
bool is_userptr;
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
|
prop = &hdev->asic_prop;
|
||||||
|
|
||||||
/* protect from double entrance */
|
/* protect from double entrance */
|
||||||
mutex_lock(&ctx->mem_hash_lock);
|
mutex_lock(&ctx->mem_hash_lock);
|
||||||
hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
|
hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
|
||||||
@@ -1214,8 +1289,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
if (*vm_type == VM_TYPE_USERPTR) {
|
if (*vm_type == VM_TYPE_USERPTR) {
|
||||||
is_userptr = true;
|
is_userptr = true;
|
||||||
userptr = hnode->ptr;
|
userptr = hnode->ptr;
|
||||||
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
|
|
||||||
&phys_pg_pack);
|
rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack,
|
||||||
|
false);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
dev_err(hdev->dev,
|
dev_err(hdev->dev,
|
||||||
"unable to init page pack for vaddr 0x%llx\n",
|
"unable to init page pack for vaddr 0x%llx\n",
|
||||||
@@ -1299,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||||||
kfree(hnode);
|
kfree(hnode);
|
||||||
|
|
||||||
if (is_userptr) {
|
if (is_userptr) {
|
||||||
rc = free_phys_pg_pack(hdev, phys_pg_pack);
|
free_phys_pg_pack(hdev, phys_pg_pack);
|
||||||
dma_unmap_host_va(hdev, userptr);
|
dma_unmap_host_va(hdev, userptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1669,6 +1745,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
userptr->pid = current->pid;
|
||||||
userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
|
userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
|
||||||
if (!userptr->sgt)
|
if (!userptr->sgt)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@@ -2033,7 +2110,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
|||||||
* another side effect error
|
* another side effect error
|
||||||
*/
|
*/
|
||||||
if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
|
if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
|
||||||
dev_notice(hdev->dev,
|
dev_dbg(hdev->dev,
|
||||||
"user released device without removing its memory mappings\n");
|
"user released device without removing its memory mappings\n");
|
||||||
|
|
||||||
hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
|
hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
|
||||||
|
|||||||
@@ -470,13 +470,13 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
|
|||||||
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
|
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
|
||||||
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
|
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
|
||||||
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
|
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
|
||||||
}
|
|
||||||
|
|
||||||
/* Make sure that if we arrive here again without init was called we
|
/* Make sure that if we arrive here again without init was
|
||||||
* won't cause kernel panic. This can happen for example if we fail
|
* called we won't cause kernel panic. This can happen for
|
||||||
* during hard reset code at certain points
|
* example if we fail during hard reset code at certain points
|
||||||
*/
|
*/
|
||||||
hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
|
hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -436,6 +436,8 @@ int hl_pci_init(struct hl_device *hdev)
|
|||||||
goto unmap_pci_bars;
|
goto unmap_pci_bars;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dma_set_max_seg_size(&pdev->dev, U32_MAX);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
unmap_pci_bars:
|
unmap_pci_bars:
|
||||||
|
|||||||
718
drivers/misc/habanalabs/common/state_dump.c
Normal file
718
drivers/misc/habanalabs/common/state_dump.c
Normal file
@@ -0,0 +1,718 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright 2021 HabanaLabs, Ltd.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/vmalloc.h>
|
||||||
|
#include <uapi/misc/habanalabs.h>
|
||||||
|
#include "habanalabs.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_format_as_binary - helper function, format an integer as binary
|
||||||
|
* using supplied scratch buffer
|
||||||
|
* @buf: the buffer to use
|
||||||
|
* @buf_len: buffer capacity
|
||||||
|
* @n: number to format
|
||||||
|
*
|
||||||
|
* Returns pointer to buffer
|
||||||
|
*/
|
||||||
|
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u32 bit;
|
||||||
|
bool leading0 = true;
|
||||||
|
char *wrptr = buf;
|
||||||
|
|
||||||
|
if (buf_len > 0 && buf_len < 3) {
|
||||||
|
*wrptr = '\0';
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
wrptr[0] = '0';
|
||||||
|
wrptr[1] = 'b';
|
||||||
|
wrptr += 2;
|
||||||
|
/* Remove 3 characters from length for '0b' and '\0' termination */
|
||||||
|
buf_len -= 3;
|
||||||
|
|
||||||
|
for (i = 0; i < sizeof(n) * BITS_PER_BYTE && buf_len; ++i, n <<= 1) {
|
||||||
|
/* Writing bit calculation in one line would cause a false
|
||||||
|
* positive static code analysis error, so splitting.
|
||||||
|
*/
|
||||||
|
bit = n & (1 << (sizeof(n) * BITS_PER_BYTE - 1));
|
||||||
|
bit = !!bit;
|
||||||
|
leading0 &= !bit;
|
||||||
|
if (!leading0) {
|
||||||
|
*wrptr = '0' + bit;
|
||||||
|
++wrptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*wrptr = '\0';
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* resize_to_fit - helper function, resize buffer to fit given amount of data
|
||||||
|
* @buf: destination buffer double pointer
|
||||||
|
* @size: pointer to the size container
|
||||||
|
* @desired_size: size the buffer must contain
|
||||||
|
*
|
||||||
|
* Returns 0 on success or error code on failure.
|
||||||
|
* On success, the size of buffer is at least desired_size. Buffer is allocated
|
||||||
|
* via vmalloc and must be freed with vfree.
|
||||||
|
*/
|
||||||
|
static int resize_to_fit(char **buf, size_t *size, size_t desired_size)
|
||||||
|
{
|
||||||
|
char *resized_buf;
|
||||||
|
size_t new_size;
|
||||||
|
|
||||||
|
if (*size >= desired_size)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Not enough space to print all, have to resize */
|
||||||
|
new_size = max_t(size_t, PAGE_SIZE, round_up(desired_size, PAGE_SIZE));
|
||||||
|
resized_buf = vmalloc(new_size);
|
||||||
|
if (!resized_buf)
|
||||||
|
return -ENOMEM;
|
||||||
|
memcpy(resized_buf, *buf, *size);
|
||||||
|
vfree(*buf);
|
||||||
|
*buf = resized_buf;
|
||||||
|
*size = new_size;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_snprintf_resize() - print formatted data to buffer, resize as needed
|
||||||
|
* @buf: buffer double pointer, to be written to and resized, must be either
|
||||||
|
* NULL or allocated with vmalloc.
|
||||||
|
* @size: current size of the buffer
|
||||||
|
* @offset: current offset to write to
|
||||||
|
* @format: format of the data
|
||||||
|
*
|
||||||
|
* This function will write formatted data into the buffer. If buffer is not
|
||||||
|
* large enough, it will be resized using vmalloc. Size may be modified if the
|
||||||
|
* buffer was resized, offset will be advanced by the number of bytes written
|
||||||
|
* not including the terminating character
|
||||||
|
*
|
||||||
|
* Returns 0 on success or error code on failure
|
||||||
|
*
|
||||||
|
* Note that the buffer has to be manually released using vfree.
|
||||||
|
*/
|
||||||
|
int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
|
||||||
|
const char *format, ...)
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
size_t length;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if (*buf == NULL && (*size != 0 || *offset != 0))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
va_start(args, format);
|
||||||
|
length = vsnprintf(*buf + *offset, *size - *offset, format, args);
|
||||||
|
va_end(args);
|
||||||
|
|
||||||
|
rc = resize_to_fit(buf, size, *offset + length + 1);
|
||||||
|
if (rc < 0)
|
||||||
|
return rc;
|
||||||
|
else if (rc > 0) {
|
||||||
|
/* Resize was needed, write again */
|
||||||
|
va_start(args, format);
|
||||||
|
length = vsnprintf(*buf + *offset, *size - *offset, format,
|
||||||
|
args);
|
||||||
|
va_end(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
*offset += length;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_sync_engine_to_string - convert engine type enum to string literal
|
||||||
|
* @engine_type: engine type (TPC/MME/DMA)
|
||||||
|
*
|
||||||
|
* Return the resolved string literal
|
||||||
|
*/
|
||||||
|
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type)
|
||||||
|
{
|
||||||
|
switch (engine_type) {
|
||||||
|
case ENGINE_DMA:
|
||||||
|
return "DMA";
|
||||||
|
case ENGINE_MME:
|
||||||
|
return "MME";
|
||||||
|
case ENGINE_TPC:
|
||||||
|
return "TPC";
|
||||||
|
}
|
||||||
|
return "Invalid Engine Type";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_print_resize_sync_engine - helper function, format engine name and ID
|
||||||
|
* using hl_snprintf_resize
|
||||||
|
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||||
|
* @size: pointer to the size container
|
||||||
|
* @offset: pointer to the offset container
|
||||||
|
* @engine_type: engine type (TPC/MME/DMA)
|
||||||
|
* @engine_id: engine numerical id
|
||||||
|
*
|
||||||
|
* Returns 0 on success or error code on failure
|
||||||
|
*/
|
||||||
|
static int hl_print_resize_sync_engine(char **buf, size_t *size, size_t *offset,
|
||||||
|
enum hl_sync_engine_type engine_type,
|
||||||
|
u32 engine_id)
|
||||||
|
{
|
||||||
|
return hl_snprintf_resize(buf, size, offset, "%s%u",
|
||||||
|
hl_sync_engine_to_string(engine_type), engine_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_get_sync_name - transform sync object id to name if available
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @sync_id: sync object id
|
||||||
|
*
|
||||||
|
* Returns a name literal or NULL if not resolved.
|
||||||
|
* Note: returning NULL shall not be considered as a failure, as not all
|
||||||
|
* sync objects are named.
|
||||||
|
*/
|
||||||
|
const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id)
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
struct hl_hw_obj_name_entry *entry;
|
||||||
|
|
||||||
|
hash_for_each_possible(sds->so_id_to_str_tb, entry,
|
||||||
|
node, sync_id)
|
||||||
|
if (sync_id == entry->id)
|
||||||
|
return entry->name;
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_get_monitor_name - transform monitor object dump to monitor
|
||||||
|
* name if available
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @mon: monitor state dump
|
||||||
|
*
|
||||||
|
* Returns a name literal or NULL if not resolved.
|
||||||
|
* Note: returning NULL shall not be considered as a failure, as not all
|
||||||
|
* monitors are named.
|
||||||
|
*/
|
||||||
|
const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
|
||||||
|
struct hl_mon_state_dump *mon)
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
struct hl_hw_obj_name_entry *entry;
|
||||||
|
|
||||||
|
hash_for_each_possible(sds->monitor_id_to_str_tb,
|
||||||
|
entry, node, mon->id)
|
||||||
|
if (mon->id == entry->id)
|
||||||
|
return entry->name;
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_free_sync_to_engine_map - free sync object to engine map
|
||||||
|
* @map: sync object to engine map
|
||||||
|
*
|
||||||
|
* Note: generic free implementation, the allocation is implemented per ASIC.
|
||||||
|
*/
|
||||||
|
void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map)
|
||||||
|
{
|
||||||
|
struct hl_sync_to_engine_map_entry *entry;
|
||||||
|
struct hlist_node *tmp_node;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
hash_for_each_safe(map->tb, i, tmp_node, entry, node) {
|
||||||
|
hash_del(&entry->node);
|
||||||
|
kfree(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_get_sync_to_engine - transform sync_id to
|
||||||
|
* hl_sync_to_engine_map_entry if available for current id
|
||||||
|
* @map: sync object to engine map
|
||||||
|
* @sync_id: sync object id
|
||||||
|
*
|
||||||
|
* Returns the translation entry if found or NULL if not.
|
||||||
|
* Note, returned NULL shall not be considered as a failure as the map
|
||||||
|
* does not cover all possible, it is a best effort sync ids.
|
||||||
|
*/
|
||||||
|
static struct hl_sync_to_engine_map_entry *
|
||||||
|
hl_state_dump_get_sync_to_engine(struct hl_sync_to_engine_map *map, u32 sync_id)
|
||||||
|
{
|
||||||
|
struct hl_sync_to_engine_map_entry *entry;
|
||||||
|
|
||||||
|
hash_for_each_possible(map->tb, entry, node, sync_id)
|
||||||
|
if (entry->sync_id == sync_id)
|
||||||
|
return entry;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_read_sync_objects - read sync objects array
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @index: sync manager block index starting with E_N
|
||||||
|
*
|
||||||
|
* Returns array of size SP_SYNC_OBJ_AMOUNT on success or NULL on failure
|
||||||
|
*/
|
||||||
|
static u32 *hl_state_dump_read_sync_objects(struct hl_device *hdev, u32 index)
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
u32 *sync_objects;
|
||||||
|
s64 base_addr; /* Base addr can be negative */
|
||||||
|
int i;
|
||||||
|
|
||||||
|
base_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] +
|
||||||
|
sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index;
|
||||||
|
|
||||||
|
sync_objects = vmalloc(sds->props[SP_SYNC_OBJ_AMOUNT] * sizeof(u32));
|
||||||
|
if (!sync_objects)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i)
|
||||||
|
sync_objects[i] = RREG32(base_addr + i * sizeof(u32));
|
||||||
|
|
||||||
|
return sync_objects;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_free_sync_objects - free sync objects array allocated by
|
||||||
|
* hl_state_dump_read_sync_objects
|
||||||
|
* @sync_objects: sync objects array
|
||||||
|
*/
|
||||||
|
static void hl_state_dump_free_sync_objects(u32 *sync_objects)
|
||||||
|
{
|
||||||
|
vfree(sync_objects);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_print_syncs_single_block - print active sync objects on a
|
||||||
|
* single block
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @index: sync manager block index starting with E_N
|
||||||
|
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||||
|
* @size: pointer to the size container
|
||||||
|
* @offset: pointer to the offset container
|
||||||
|
* @map: sync engines names map
|
||||||
|
*
|
||||||
|
* Returns 0 on success or error code on failure
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
hl_state_dump_print_syncs_single_block(struct hl_device *hdev, u32 index,
|
||||||
|
char **buf, size_t *size, size_t *offset,
|
||||||
|
struct hl_sync_to_engine_map *map)
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
const char *sync_name;
|
||||||
|
u32 *sync_objects = NULL;
|
||||||
|
int rc = 0, i;
|
||||||
|
|
||||||
|
if (sds->sync_namager_names) {
|
||||||
|
rc = hl_snprintf_resize(
|
||||||
|
buf, size, offset, "%s\n",
|
||||||
|
sds->sync_namager_names[index]);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
sync_objects = hl_state_dump_read_sync_objects(hdev, index);
|
||||||
|
if (!sync_objects) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i) {
|
||||||
|
struct hl_sync_to_engine_map_entry *entry;
|
||||||
|
u64 sync_object_addr;
|
||||||
|
|
||||||
|
if (!sync_objects[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
sync_object_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] +
|
||||||
|
sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index +
|
||||||
|
i * sizeof(u32);
|
||||||
|
|
||||||
|
rc = hl_snprintf_resize(buf, size, offset, "sync id: %u", i);
|
||||||
|
if (rc)
|
||||||
|
goto free_sync_objects;
|
||||||
|
sync_name = hl_state_dump_get_sync_name(hdev, i);
|
||||||
|
if (sync_name) {
|
||||||
|
rc = hl_snprintf_resize(buf, size, offset, " %s",
|
||||||
|
sync_name);
|
||||||
|
if (rc)
|
||||||
|
goto free_sync_objects;
|
||||||
|
}
|
||||||
|
rc = hl_snprintf_resize(buf, size, offset, ", value: %u",
|
||||||
|
sync_objects[i]);
|
||||||
|
if (rc)
|
||||||
|
goto free_sync_objects;
|
||||||
|
|
||||||
|
/* Append engine string */
|
||||||
|
entry = hl_state_dump_get_sync_to_engine(map,
|
||||||
|
(u32)sync_object_addr);
|
||||||
|
if (entry) {
|
||||||
|
rc = hl_snprintf_resize(buf, size, offset,
|
||||||
|
", Engine: ");
|
||||||
|
if (rc)
|
||||||
|
goto free_sync_objects;
|
||||||
|
rc = hl_print_resize_sync_engine(buf, size, offset,
|
||||||
|
entry->engine_type,
|
||||||
|
entry->engine_id);
|
||||||
|
if (rc)
|
||||||
|
goto free_sync_objects;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = hl_snprintf_resize(buf, size, offset, "\n");
|
||||||
|
if (rc)
|
||||||
|
goto free_sync_objects;
|
||||||
|
}
|
||||||
|
|
||||||
|
free_sync_objects:
|
||||||
|
hl_state_dump_free_sync_objects(sync_objects);
|
||||||
|
out:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_print_syncs - print active sync objects
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||||
|
* @size: pointer to the size container
|
||||||
|
* @offset: pointer to the offset container
|
||||||
|
*
|
||||||
|
* Returns 0 on success or error code on failure
|
||||||
|
*/
|
||||||
|
static int hl_state_dump_print_syncs(struct hl_device *hdev,
|
||||||
|
char **buf, size_t *size,
|
||||||
|
size_t *offset)
|
||||||
|
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
struct hl_sync_to_engine_map *map;
|
||||||
|
u32 index;
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
map = kzalloc(sizeof(*map), GFP_KERNEL);
|
||||||
|
if (!map)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
rc = sds->funcs.gen_sync_to_engine_map(hdev, map);
|
||||||
|
if (rc)
|
||||||
|
goto free_map_mem;
|
||||||
|
|
||||||
|
rc = hl_snprintf_resize(buf, size, offset, "Non zero sync objects:\n");
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (sds->sync_namager_names) {
|
||||||
|
for (index = 0; sds->sync_namager_names[index]; ++index) {
|
||||||
|
rc = hl_state_dump_print_syncs_single_block(
|
||||||
|
hdev, index, buf, size, offset, map);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) {
|
||||||
|
rc = hl_state_dump_print_syncs_single_block(
|
||||||
|
hdev, index, buf, size, offset, map);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
hl_state_dump_free_sync_to_engine_map(map);
|
||||||
|
free_map_mem:
|
||||||
|
kfree(map);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_alloc_read_sm_block_monitors - read monitors for a specific
|
||||||
|
* block
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @index: sync manager block index starting with E_N
|
||||||
|
*
|
||||||
|
* Returns an array of monitor data of size SP_MONITORS_AMOUNT or NULL
|
||||||
|
* on error
|
||||||
|
*/
|
||||||
|
static struct hl_mon_state_dump *
|
||||||
|
hl_state_dump_alloc_read_sm_block_monitors(struct hl_device *hdev, u32 index)
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
struct hl_mon_state_dump *monitors;
|
||||||
|
s64 base_addr; /* Base addr can be negative */
|
||||||
|
int i;
|
||||||
|
|
||||||
|
monitors = vmalloc(sds->props[SP_MONITORS_AMOUNT] *
|
||||||
|
sizeof(struct hl_mon_state_dump));
|
||||||
|
if (!monitors)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
base_addr = sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index;
|
||||||
|
|
||||||
|
for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) {
|
||||||
|
monitors[i].id = i;
|
||||||
|
monitors[i].wr_addr_low =
|
||||||
|
RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_LOW] +
|
||||||
|
i * sizeof(u32));
|
||||||
|
|
||||||
|
monitors[i].wr_addr_high =
|
||||||
|
RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_HIGH] +
|
||||||
|
i * sizeof(u32));
|
||||||
|
|
||||||
|
monitors[i].wr_data =
|
||||||
|
RREG32(base_addr + sds->props[SP_MON_OBJ_WR_DATA] +
|
||||||
|
i * sizeof(u32));
|
||||||
|
|
||||||
|
monitors[i].arm_data =
|
||||||
|
RREG32(base_addr + sds->props[SP_MON_OBJ_ARM_DATA] +
|
||||||
|
i * sizeof(u32));
|
||||||
|
|
||||||
|
monitors[i].status =
|
||||||
|
RREG32(base_addr + sds->props[SP_MON_OBJ_STATUS] +
|
||||||
|
i * sizeof(u32));
|
||||||
|
}
|
||||||
|
|
||||||
|
return monitors;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_free_monitors - free the monitors structure
|
||||||
|
* @monitors: monitors array created with
|
||||||
|
* hl_state_dump_alloc_read_sm_block_monitors
|
||||||
|
*/
|
||||||
|
static void hl_state_dump_free_monitors(struct hl_mon_state_dump *monitors)
|
||||||
|
{
|
||||||
|
vfree(monitors);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_print_monitors_single_block - print active monitors on a
|
||||||
|
* single block
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @index: sync manager block index starting with E_N
|
||||||
|
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||||
|
* @size: pointer to the size container
|
||||||
|
* @offset: pointer to the offset container
|
||||||
|
*
|
||||||
|
* Returns 0 on success or error code on failure
|
||||||
|
*/
|
||||||
|
static int hl_state_dump_print_monitors_single_block(struct hl_device *hdev,
|
||||||
|
u32 index,
|
||||||
|
char **buf, size_t *size,
|
||||||
|
size_t *offset)
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
struct hl_mon_state_dump *monitors = NULL;
|
||||||
|
int rc = 0, i;
|
||||||
|
|
||||||
|
if (sds->sync_namager_names) {
|
||||||
|
rc = hl_snprintf_resize(
|
||||||
|
buf, size, offset, "%s\n",
|
||||||
|
sds->sync_namager_names[index]);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
monitors = hl_state_dump_alloc_read_sm_block_monitors(hdev, index);
|
||||||
|
if (!monitors) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) {
|
||||||
|
if (!(sds->funcs.monitor_valid(&monitors[i])))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Monitor is valid, dump it */
|
||||||
|
rc = sds->funcs.print_single_monitor(buf, size, offset, hdev,
|
||||||
|
&monitors[i]);
|
||||||
|
if (rc)
|
||||||
|
goto free_monitors;
|
||||||
|
|
||||||
|
hl_snprintf_resize(buf, size, offset, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
free_monitors:
|
||||||
|
hl_state_dump_free_monitors(monitors);
|
||||||
|
out:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_print_monitors - print active monitors
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||||
|
* @size: pointer to the size container
|
||||||
|
* @offset: pointer to the offset container
|
||||||
|
*
|
||||||
|
* Returns 0 on success or error code on failure
|
||||||
|
*/
|
||||||
|
static int hl_state_dump_print_monitors(struct hl_device *hdev,
|
||||||
|
char **buf, size_t *size,
|
||||||
|
size_t *offset)
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
u32 index;
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
rc = hl_snprintf_resize(buf, size, offset,
|
||||||
|
"Valid (armed) monitor objects:\n");
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (sds->sync_namager_names) {
|
||||||
|
for (index = 0; sds->sync_namager_names[index]; ++index) {
|
||||||
|
rc = hl_state_dump_print_monitors_single_block(
|
||||||
|
hdev, index, buf, size, offset);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) {
|
||||||
|
rc = hl_state_dump_print_monitors_single_block(
|
||||||
|
hdev, index, buf, size, offset);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_print_engine_fences - print active fences for a specific
|
||||||
|
* engine
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @engine_type: engine type to use
|
||||||
|
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||||
|
* @size: pointer to the size container
|
||||||
|
* @offset: pointer to the offset container
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
hl_state_dump_print_engine_fences(struct hl_device *hdev,
|
||||||
|
enum hl_sync_engine_type engine_type,
|
||||||
|
char **buf, size_t *size, size_t *offset)
|
||||||
|
{
|
||||||
|
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||||
|
int rc = 0, i, n_fences;
|
||||||
|
u64 base_addr, next_fence;
|
||||||
|
|
||||||
|
switch (engine_type) {
|
||||||
|
case ENGINE_TPC:
|
||||||
|
n_fences = sds->props[SP_NUM_OF_TPC_ENGINES];
|
||||||
|
base_addr = sds->props[SP_TPC0_CMDQ];
|
||||||
|
next_fence = sds->props[SP_NEXT_TPC];
|
||||||
|
break;
|
||||||
|
case ENGINE_MME:
|
||||||
|
n_fences = sds->props[SP_NUM_OF_MME_ENGINES];
|
||||||
|
base_addr = sds->props[SP_MME_CMDQ];
|
||||||
|
next_fence = sds->props[SP_NEXT_MME];
|
||||||
|
break;
|
||||||
|
case ENGINE_DMA:
|
||||||
|
n_fences = sds->props[SP_NUM_OF_DMA_ENGINES];
|
||||||
|
base_addr = sds->props[SP_DMA_CMDQ];
|
||||||
|
next_fence = sds->props[SP_DMA_QUEUES_OFFSET];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
for (i = 0; i < n_fences; ++i) {
|
||||||
|
rc = sds->funcs.print_fences_single_engine(
|
||||||
|
hdev,
|
||||||
|
base_addr + next_fence * i +
|
||||||
|
sds->props[SP_FENCE0_CNT_OFFSET],
|
||||||
|
base_addr + next_fence * i +
|
||||||
|
sds->props[SP_CP_STS_OFFSET],
|
||||||
|
engine_type, i, buf, size, offset);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump_print_fences - print active fences
|
||||||
|
* @hdev: pointer to the device
|
||||||
|
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||||
|
* @size: pointer to the size container
|
||||||
|
* @offset: pointer to the offset container
|
||||||
|
*/
|
||||||
|
static int hl_state_dump_print_fences(struct hl_device *hdev, char **buf,
|
||||||
|
size_t *size, size_t *offset)
|
||||||
|
{
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
rc = hl_snprintf_resize(buf, size, offset, "Valid (armed) fences:\n");
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
rc = hl_state_dump_print_engine_fences(hdev, ENGINE_TPC, buf, size, offset);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
rc = hl_state_dump_print_engine_fences(hdev, ENGINE_MME, buf, size, offset);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
rc = hl_state_dump_print_engine_fences(hdev, ENGINE_DMA, buf, size, offset);
|
||||||
|
if (rc)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
out:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hl_state_dump() - dump system state
|
||||||
|
* @hdev: pointer to device structure
|
||||||
|
*/
|
||||||
|
int hl_state_dump(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
char *buf = NULL;
|
||||||
|
size_t offset = 0, size = 0;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
rc = hl_snprintf_resize(&buf, &size, &offset,
|
||||||
|
"Timestamp taken on: %llu\n\n",
|
||||||
|
ktime_to_ns(ktime_get()));
|
||||||
|
if (rc)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
rc = hl_state_dump_print_syncs(hdev, &buf, &size, &offset);
|
||||||
|
if (rc)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
hl_snprintf_resize(&buf, &size, &offset, "\n");
|
||||||
|
|
||||||
|
rc = hl_state_dump_print_monitors(hdev, &buf, &size, &offset);
|
||||||
|
if (rc)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
hl_snprintf_resize(&buf, &size, &offset, "\n");
|
||||||
|
|
||||||
|
rc = hl_state_dump_print_fences(hdev, &buf, &size, &offset);
|
||||||
|
if (rc)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
hl_snprintf_resize(&buf, &size, &offset, "\n");
|
||||||
|
|
||||||
|
hl_debugfs_set_state_dump(hdev, buf, size);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
err:
|
||||||
|
vfree(buf);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
@@ -9,8 +9,7 @@
|
|||||||
|
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
|
|
||||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
|
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
|
||||||
bool curr)
|
|
||||||
{
|
{
|
||||||
struct cpucp_packet pkt;
|
struct cpucp_packet pkt;
|
||||||
u32 used_pll_idx;
|
u32 used_pll_idx;
|
||||||
@@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
|
|||||||
return (long) result;
|
return (long) result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
|
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
|
||||||
u64 freq)
|
|
||||||
{
|
{
|
||||||
struct cpucp_packet pkt;
|
struct cpucp_packet pkt;
|
||||||
u32 used_pll_idx;
|
u32 used_pll_idx;
|
||||||
@@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
|
|||||||
char *buf)
|
char *buf)
|
||||||
{
|
{
|
||||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||||
char *str;
|
char str[HL_STR_MAX];
|
||||||
|
|
||||||
if (atomic_read(&hdev->in_reset))
|
strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX);
|
||||||
str = "In reset";
|
|
||||||
else if (hdev->disabled)
|
/* use uppercase for backward compatibility */
|
||||||
str = "Malfunction";
|
str[0] = 'A' + (str[0] - 'a');
|
||||||
else if (hdev->needs_reset)
|
|
||||||
str = "Needs Reset";
|
|
||||||
else
|
|
||||||
str = "Operational";
|
|
||||||
|
|
||||||
return sprintf(buf, "%s\n", str);
|
return sprintf(buf, "%s\n", str);
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -36,6 +36,8 @@
|
|||||||
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \
|
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \
|
||||||
NUMBER_OF_CPU_HW_QUEUES)
|
NUMBER_OF_CPU_HW_QUEUES)
|
||||||
|
|
||||||
|
#define GAUDI_STREAM_MASTER_ARR_SIZE 8
|
||||||
|
|
||||||
#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
|
#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
|
||||||
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
|
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
|
||||||
#endif
|
#endif
|
||||||
@@ -50,6 +52,8 @@
|
|||||||
#define DC_POWER_DEFAULT_PCI 60000 /* 60W */
|
#define DC_POWER_DEFAULT_PCI 60000 /* 60W */
|
||||||
#define DC_POWER_DEFAULT_PMC 60000 /* 60W */
|
#define DC_POWER_DEFAULT_PMC 60000 /* 60W */
|
||||||
|
|
||||||
|
#define DC_POWER_DEFAULT_PMC_SEC 97000 /* 97W */
|
||||||
|
|
||||||
#define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */
|
#define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */
|
||||||
|
|
||||||
#define TPC_ENABLED_MASK 0xFF
|
#define TPC_ENABLED_MASK 0xFF
|
||||||
@@ -62,7 +66,7 @@
|
|||||||
|
|
||||||
#define DMA_MAX_TRANSFER_SIZE U32_MAX
|
#define DMA_MAX_TRANSFER_SIZE U32_MAX
|
||||||
|
|
||||||
#define GAUDI_DEFAULT_CARD_NAME "HL2000"
|
#define GAUDI_DEFAULT_CARD_NAME "HL205"
|
||||||
|
|
||||||
#define GAUDI_MAX_PENDING_CS SZ_16K
|
#define GAUDI_MAX_PENDING_CS SZ_16K
|
||||||
|
|
||||||
@@ -117,6 +121,7 @@
|
|||||||
(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \
|
(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \
|
||||||
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
|
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
|
||||||
|
|
||||||
|
#define MONITOR_MAX_SOBS 8
|
||||||
|
|
||||||
/* DRAM Memory Map */
|
/* DRAM Memory Map */
|
||||||
|
|
||||||
@@ -200,6 +205,18 @@
|
|||||||
#define HW_CAP_TPC_MASK GENMASK(31, 24)
|
#define HW_CAP_TPC_MASK GENMASK(31, 24)
|
||||||
#define HW_CAP_TPC_SHIFT 24
|
#define HW_CAP_TPC_SHIFT 24
|
||||||
|
|
||||||
|
#define NEXT_SYNC_OBJ_ADDR_INTERVAL \
|
||||||
|
(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 - \
|
||||||
|
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)
|
||||||
|
#define NUM_OF_MME_ENGINES 2
|
||||||
|
#define NUM_OF_MME_SUB_ENGINES 2
|
||||||
|
#define NUM_OF_TPC_ENGINES 8
|
||||||
|
#define NUM_OF_DMA_ENGINES 8
|
||||||
|
#define NUM_OF_QUEUES 5
|
||||||
|
#define NUM_OF_STREAMS 4
|
||||||
|
#define NUM_OF_FENCES 4
|
||||||
|
|
||||||
|
|
||||||
#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39)
|
#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39)
|
||||||
#define GAUDI_PCI_TO_CPU_ADDR(addr) \
|
#define GAUDI_PCI_TO_CPU_ADDR(addr) \
|
||||||
do { \
|
do { \
|
||||||
|
|||||||
@@ -622,11 +622,6 @@ static int gaudi_config_etr(struct hl_device *hdev,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER,
|
|
||||||
hdev->compute_ctx->asid);
|
|
||||||
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER,
|
|
||||||
hdev->compute_ctx->asid);
|
|
||||||
|
|
||||||
msb = upper_32_bits(input->buffer_address) >> 8;
|
msb = upper_32_bits(input->buffer_address) >> 8;
|
||||||
msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK;
|
msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK;
|
||||||
WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb);
|
WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb);
|
||||||
|
|||||||
@@ -9559,6 +9559,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||||||
mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
|
||||||
|
mask |= 1U << ((mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC0_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC0_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC0_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC0_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||||
@@ -10013,6 +10014,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||||||
mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2);
|
||||||
|
mask |= 1U << ((mmTPC1_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC1_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC1_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC1_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC1_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC1_CFG_MSS_CONFIG & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC1_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||||
@@ -10466,6 +10468,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||||||
mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2);
|
||||||
|
mask |= 1U << ((mmTPC2_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC2_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC2_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC2_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC2_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC2_CFG_MSS_CONFIG & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC2_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||||
@@ -10919,6 +10922,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||||||
mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2);
|
||||||
|
mask |= 1U << ((mmTPC3_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC3_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC3_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC3_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC3_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC3_CFG_MSS_CONFIG & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC3_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||||
@@ -11372,6 +11376,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||||||
mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2);
|
||||||
|
mask |= 1U << ((mmTPC4_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC4_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC4_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC4_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC4_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC4_CFG_MSS_CONFIG & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC4_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||||
@@ -11825,6 +11830,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||||||
mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2);
|
||||||
|
mask |= 1U << ((mmTPC5_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC5_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC5_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC5_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC5_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC5_CFG_MSS_CONFIG & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC5_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||||
@@ -12280,6 +12286,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||||||
mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2);
|
||||||
|
mask |= 1U << ((mmTPC6_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC6_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC6_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC6_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC6_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC6_CFG_MSS_CONFIG & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC6_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||||
@@ -12735,6 +12742,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||||||
mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2);
|
||||||
|
mask |= 1U << ((mmTPC7_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC7_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC7_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC7_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC7_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||||
mask |= 1U << ((mmTPC7_CFG_MSS_CONFIG & 0x7F) >> 2);
|
mask |= 1U << ((mmTPC7_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||||
|
|||||||
@@ -350,6 +350,8 @@ static u32 goya_all_events[] = {
|
|||||||
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
|
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static s64 goya_state_dump_specs_props[SP_MAX] = {0};
|
||||||
|
|
||||||
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
|
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
|
||||||
static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
|
static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
|
||||||
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
|
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
|
||||||
@@ -387,6 +389,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
|||||||
prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
|
prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
|
||||||
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
|
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
|
||||||
|
|
||||||
prop->dram_base_address = DRAM_PHYS_BASE;
|
prop->dram_base_address = DRAM_PHYS_BASE;
|
||||||
@@ -466,6 +469,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
|||||||
prop->hard_reset_done_by_fw = false;
|
prop->hard_reset_done_by_fw = false;
|
||||||
prop->gic_interrupts_enable = true;
|
prop->gic_interrupts_enable = true;
|
||||||
|
|
||||||
|
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -649,14 +654,14 @@ pci_init:
|
|||||||
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
|
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
if (hdev->reset_on_preboot_fail)
|
if (hdev->reset_on_preboot_fail)
|
||||||
hdev->asic_funcs->hw_fini(hdev, true);
|
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||||
goto pci_fini;
|
goto pci_fini;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
|
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
|
||||||
dev_info(hdev->dev,
|
dev_info(hdev->dev,
|
||||||
"H/W state is dirty, must reset before initializing\n");
|
"H/W state is dirty, must reset before initializing\n");
|
||||||
hdev->asic_funcs->hw_fini(hdev, true);
|
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!hdev->pldm) {
|
if (!hdev->pldm) {
|
||||||
@@ -955,8 +960,9 @@ static int goya_sw_init(struct hl_device *hdev)
|
|||||||
hdev->supports_coresight = true;
|
hdev->supports_coresight = true;
|
||||||
hdev->supports_soft_reset = true;
|
hdev->supports_soft_reset = true;
|
||||||
hdev->allow_external_soft_reset = true;
|
hdev->allow_external_soft_reset = true;
|
||||||
|
hdev->supports_wait_for_multi_cs = false;
|
||||||
|
|
||||||
goya_set_pci_memory_regions(hdev);
|
hdev->asic_funcs->set_pci_memory_regions(hdev);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -2374,7 +2380,7 @@ static void goya_disable_timestamp(struct hl_device *hdev)
|
|||||||
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
|
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
|
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
|
||||||
{
|
{
|
||||||
u32 wait_timeout_ms;
|
u32 wait_timeout_ms;
|
||||||
|
|
||||||
@@ -2493,6 +2499,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev)
|
|||||||
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
|
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
|
||||||
|
|
||||||
/* fill common fields */
|
/* fill common fields */
|
||||||
|
fw_loader->linux_loaded = false;
|
||||||
fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
|
fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
|
||||||
fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
|
fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
|
||||||
fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
|
fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
|
||||||
@@ -2696,14 +2703,7 @@ disable_queues:
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
|
||||||
* goya_hw_fini - Goya hardware tear-down code
|
|
||||||
*
|
|
||||||
* @hdev: pointer to hl_device structure
|
|
||||||
* @hard_reset: should we do hard reset to all engines or just reset the
|
|
||||||
* compute/dma engines
|
|
||||||
*/
|
|
||||||
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
|
|
||||||
{
|
{
|
||||||
struct goya_device *goya = hdev->asic_specific;
|
struct goya_device *goya = hdev->asic_specific;
|
||||||
u32 reset_timeout_ms, cpu_timeout_ms, status;
|
u32 reset_timeout_ms, cpu_timeout_ms, status;
|
||||||
@@ -2796,7 +2796,7 @@ int goya_resume(struct hl_device *hdev)
|
|||||||
return goya_init_iatu(hdev);
|
return goya_init_iatu(hdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||||
void *cpu_addr, dma_addr_t dma_addr, size_t size)
|
void *cpu_addr, dma_addr_t dma_addr, size_t size)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
@@ -4797,6 +4797,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
|||||||
>> EQ_CTL_EVENT_TYPE_SHIFT);
|
>> EQ_CTL_EVENT_TYPE_SHIFT);
|
||||||
struct goya_device *goya = hdev->asic_specific;
|
struct goya_device *goya = hdev->asic_specific;
|
||||||
|
|
||||||
|
if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
|
||||||
|
dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
|
||||||
|
event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
goya->events_stat[event_type]++;
|
goya->events_stat[event_type]++;
|
||||||
goya->events_stat_aggregate[event_type]++;
|
goya->events_stat_aggregate[event_type]++;
|
||||||
|
|
||||||
@@ -5475,14 +5481,14 @@ u64 goya_get_device_time(struct hl_device *hdev)
|
|||||||
return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
|
return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void goya_collective_wait_init_cs(struct hl_cs *cs)
|
static int goya_collective_wait_init_cs(struct hl_cs *cs)
|
||||||
{
|
{
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int goya_collective_wait_create_jobs(struct hl_device *hdev,
|
static int goya_collective_wait_create_jobs(struct hl_device *hdev,
|
||||||
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
|
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
|
||||||
u32 collective_engine_id)
|
u32 collective_engine_id, u32 encaps_signal_offset)
|
||||||
{
|
{
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
@@ -5524,6 +5530,62 @@ static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
|
||||||
|
struct hl_sync_to_engine_map *map)
|
||||||
|
{
|
||||||
|
/* Not implemented */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int goya_monitor_valid(struct hl_mon_state_dump *mon)
|
||||||
|
{
|
||||||
|
/* Not implemented */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
|
||||||
|
struct hl_device *hdev,
|
||||||
|
struct hl_mon_state_dump *mon)
|
||||||
|
{
|
||||||
|
/* Not implemented */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int goya_print_fences_single_engine(
|
||||||
|
struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
|
||||||
|
enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
|
||||||
|
size_t *size, size_t *offset)
|
||||||
|
{
|
||||||
|
/* Not implemented */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
|
||||||
|
.monitor_valid = goya_monitor_valid,
|
||||||
|
.print_single_monitor = goya_print_single_monitor,
|
||||||
|
.gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
|
||||||
|
.print_fences_single_engine = goya_print_fences_single_engine,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void goya_state_dump_init(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
/* Not implemented */
|
||||||
|
hdev->state_dump_specs.props = goya_state_dump_specs_props;
|
||||||
|
hdev->state_dump_specs.funcs = goya_state_dump_funcs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 *goya_get_stream_master_qid_arr(void)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct hl_asic_funcs goya_funcs = {
|
static const struct hl_asic_funcs goya_funcs = {
|
||||||
.early_init = goya_early_init,
|
.early_init = goya_early_init,
|
||||||
.early_fini = goya_early_fini,
|
.early_fini = goya_early_fini,
|
||||||
@@ -5536,7 +5598,7 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||||||
.halt_engines = goya_halt_engines,
|
.halt_engines = goya_halt_engines,
|
||||||
.suspend = goya_suspend,
|
.suspend = goya_suspend,
|
||||||
.resume = goya_resume,
|
.resume = goya_resume,
|
||||||
.cb_mmap = goya_cb_mmap,
|
.mmap = goya_mmap,
|
||||||
.ring_doorbell = goya_ring_doorbell,
|
.ring_doorbell = goya_ring_doorbell,
|
||||||
.pqe_write = goya_pqe_write,
|
.pqe_write = goya_pqe_write,
|
||||||
.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
|
.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
|
||||||
@@ -5609,7 +5671,11 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||||||
.enable_events_from_fw = goya_enable_events_from_fw,
|
.enable_events_from_fw = goya_enable_events_from_fw,
|
||||||
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
|
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
|
||||||
.init_firmware_loader = goya_init_firmware_loader,
|
.init_firmware_loader = goya_init_firmware_loader,
|
||||||
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram
|
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
|
||||||
|
.state_dump_init = goya_state_dump_init,
|
||||||
|
.get_sob_addr = &goya_get_sob_addr,
|
||||||
|
.set_pci_memory_regions = goya_set_pci_memory_regions,
|
||||||
|
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -98,6 +98,18 @@ struct hl_eq_fw_alive {
|
|||||||
__u8 pad[7];
|
__u8 pad[7];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum hl_pcie_addr_dec_cause {
|
||||||
|
PCIE_ADDR_DEC_HBW_ERR_RESP,
|
||||||
|
PCIE_ADDR_DEC_LBW_ERR_RESP,
|
||||||
|
PCIE_ADDR_DEC_TLP_BLOCKED_BY_RR
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hl_eq_pcie_addr_dec_data {
|
||||||
|
/* enum hl_pcie_addr_dec_cause */
|
||||||
|
__u8 addr_dec_cause;
|
||||||
|
__u8 pad[7];
|
||||||
|
};
|
||||||
|
|
||||||
struct hl_eq_entry {
|
struct hl_eq_entry {
|
||||||
struct hl_eq_header hdr;
|
struct hl_eq_header hdr;
|
||||||
union {
|
union {
|
||||||
@@ -106,6 +118,7 @@ struct hl_eq_entry {
|
|||||||
struct hl_eq_sm_sei_data sm_sei_data;
|
struct hl_eq_sm_sei_data sm_sei_data;
|
||||||
struct cpucp_pkt_sync_err pkt_sync_err;
|
struct cpucp_pkt_sync_err pkt_sync_err;
|
||||||
struct hl_eq_fw_alive fw_alive;
|
struct hl_eq_fw_alive fw_alive;
|
||||||
|
struct hl_eq_pcie_addr_dec_data pcie_addr_dec_data;
|
||||||
__le64 data[7];
|
__le64 data[7];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@@ -116,7 +129,7 @@ struct hl_eq_entry {
|
|||||||
#define EQ_CTL_READY_MASK 0x80000000
|
#define EQ_CTL_READY_MASK 0x80000000
|
||||||
|
|
||||||
#define EQ_CTL_EVENT_TYPE_SHIFT 16
|
#define EQ_CTL_EVENT_TYPE_SHIFT 16
|
||||||
#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000
|
#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000
|
||||||
|
|
||||||
#define EQ_CTL_INDEX_SHIFT 0
|
#define EQ_CTL_INDEX_SHIFT 0
|
||||||
#define EQ_CTL_INDEX_MASK 0x0000FFFF
|
#define EQ_CTL_INDEX_MASK 0x0000FFFF
|
||||||
@@ -300,7 +313,7 @@ enum pq_init_status {
|
|||||||
* The packet's arguments specify the desired sensor and the field to
|
* The packet's arguments specify the desired sensor and the field to
|
||||||
* set.
|
* set.
|
||||||
*
|
*
|
||||||
* CPUCP_PACKET_PCIE_THROUGHPUT_GET
|
* CPUCP_PACKET_PCIE_THROUGHPUT_GET -
|
||||||
* Get throughput of PCIe.
|
* Get throughput of PCIe.
|
||||||
* The packet's arguments specify the transaction direction (TX/RX).
|
* The packet's arguments specify the transaction direction (TX/RX).
|
||||||
* The window measurement is 10[msec], and the return value is in KB/sec.
|
* The window measurement is 10[msec], and the return value is in KB/sec.
|
||||||
@@ -309,19 +322,19 @@ enum pq_init_status {
|
|||||||
* Replay count measures number of "replay" events, which is basicly
|
* Replay count measures number of "replay" events, which is basicly
|
||||||
* number of retries done by PCIe.
|
* number of retries done by PCIe.
|
||||||
*
|
*
|
||||||
* CPUCP_PACKET_TOTAL_ENERGY_GET
|
* CPUCP_PACKET_TOTAL_ENERGY_GET -
|
||||||
* Total Energy is measurement of energy from the time FW Linux
|
* Total Energy is measurement of energy from the time FW Linux
|
||||||
* is loaded. It is calculated by multiplying the average power
|
* is loaded. It is calculated by multiplying the average power
|
||||||
* by time (passed from armcp start). The units are in MilliJouls.
|
* by time (passed from armcp start). The units are in MilliJouls.
|
||||||
*
|
*
|
||||||
* CPUCP_PACKET_PLL_INFO_GET
|
* CPUCP_PACKET_PLL_INFO_GET -
|
||||||
* Fetch frequencies of PLL from the required PLL IP.
|
* Fetch frequencies of PLL from the required PLL IP.
|
||||||
* The packet's arguments specify the device PLL type
|
* The packet's arguments specify the device PLL type
|
||||||
* Pll type is the PLL from device pll_index enum.
|
* Pll type is the PLL from device pll_index enum.
|
||||||
* The result is composed of 4 outputs, each is 16-bit
|
* The result is composed of 4 outputs, each is 16-bit
|
||||||
* frequency in MHz.
|
* frequency in MHz.
|
||||||
*
|
*
|
||||||
* CPUCP_PACKET_POWER_GET
|
* CPUCP_PACKET_POWER_GET -
|
||||||
* Fetch the present power consumption of the device (Current * Voltage).
|
* Fetch the present power consumption of the device (Current * Voltage).
|
||||||
*
|
*
|
||||||
* CPUCP_PACKET_NIC_PFC_SET -
|
* CPUCP_PACKET_NIC_PFC_SET -
|
||||||
@@ -345,6 +358,24 @@ enum pq_init_status {
|
|||||||
* CPUCP_PACKET_MSI_INFO_SET -
|
* CPUCP_PACKET_MSI_INFO_SET -
|
||||||
* set the index number for each supported msi type going from
|
* set the index number for each supported msi type going from
|
||||||
* host to device
|
* host to device
|
||||||
|
*
|
||||||
|
* CPUCP_PACKET_NIC_XPCS91_REGS_GET -
|
||||||
|
* Fetch the un/correctable counters values from the NIC MAC.
|
||||||
|
*
|
||||||
|
* CPUCP_PACKET_NIC_STAT_REGS_GET -
|
||||||
|
* Fetch various NIC MAC counters from the NIC STAT.
|
||||||
|
*
|
||||||
|
* CPUCP_PACKET_NIC_STAT_REGS_CLR -
|
||||||
|
* Clear the various NIC MAC counters in the NIC STAT.
|
||||||
|
*
|
||||||
|
* CPUCP_PACKET_NIC_STAT_REGS_ALL_GET -
|
||||||
|
* Fetch all NIC MAC counters from the NIC STAT.
|
||||||
|
*
|
||||||
|
* CPUCP_PACKET_IS_IDLE_CHECK -
|
||||||
|
* Check if the device is IDLE in regard to the DMA/compute engines
|
||||||
|
* and QMANs. The f/w will return a bitmask where each bit represents
|
||||||
|
* a different engine or QMAN according to enum cpucp_idle_mask.
|
||||||
|
* The bit will be 1 if the engine is NOT idle.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
enum cpucp_packet_id {
|
enum cpucp_packet_id {
|
||||||
@@ -385,6 +416,11 @@ enum cpucp_packet_id {
|
|||||||
CPUCP_PACKET_NIC_LPBK_SET, /* internal */
|
CPUCP_PACKET_NIC_LPBK_SET, /* internal */
|
||||||
CPUCP_PACKET_NIC_MAC_CFG, /* internal */
|
CPUCP_PACKET_NIC_MAC_CFG, /* internal */
|
||||||
CPUCP_PACKET_MSI_INFO_SET, /* internal */
|
CPUCP_PACKET_MSI_INFO_SET, /* internal */
|
||||||
|
CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */
|
||||||
|
CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */
|
||||||
|
CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */
|
||||||
|
CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */
|
||||||
|
CPUCP_PACKET_IS_IDLE_CHECK, /* internal */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
||||||
@@ -414,6 +450,11 @@ enum cpucp_packet_id {
|
|||||||
#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1
|
#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1
|
||||||
#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull
|
#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull
|
||||||
|
|
||||||
|
#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0
|
||||||
|
#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull
|
||||||
|
#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1
|
||||||
|
#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull
|
||||||
|
|
||||||
/* heartbeat status bits */
|
/* heartbeat status bits */
|
||||||
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0
|
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0
|
||||||
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001
|
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001
|
||||||
@@ -467,7 +508,8 @@ struct cpucp_packet {
|
|||||||
__le32 status_mask;
|
__le32 status_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
__le32 reserved;
|
/* For NIC requests */
|
||||||
|
__le32 port_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct cpucp_unmask_irq_arr_packet {
|
struct cpucp_unmask_irq_arr_packet {
|
||||||
@@ -476,6 +518,12 @@ struct cpucp_unmask_irq_arr_packet {
|
|||||||
__le32 irqs[0];
|
__le32 irqs[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct cpucp_nic_status_packet {
|
||||||
|
struct cpucp_packet cpucp_pkt;
|
||||||
|
__le32 length;
|
||||||
|
__le32 data[0];
|
||||||
|
};
|
||||||
|
|
||||||
struct cpucp_array_data_packet {
|
struct cpucp_array_data_packet {
|
||||||
struct cpucp_packet cpucp_pkt;
|
struct cpucp_packet cpucp_pkt;
|
||||||
__le32 length;
|
__le32 length;
|
||||||
@@ -595,6 +643,18 @@ enum pll_index {
|
|||||||
PLL_MAX
|
PLL_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum rl_index {
|
||||||
|
TPC_RL = 0,
|
||||||
|
MME_RL,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum pvt_index {
|
||||||
|
PVT_SW,
|
||||||
|
PVT_SE,
|
||||||
|
PVT_NW,
|
||||||
|
PVT_NE
|
||||||
|
};
|
||||||
|
|
||||||
/* Event Queue Packets */
|
/* Event Queue Packets */
|
||||||
|
|
||||||
struct eq_generic_event {
|
struct eq_generic_event {
|
||||||
@@ -700,6 +760,15 @@ struct cpucp_mac_addr {
|
|||||||
__u8 mac_addr[ETH_ALEN];
|
__u8 mac_addr[ETH_ALEN];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum cpucp_serdes_type {
|
||||||
|
TYPE_1_SERDES_TYPE,
|
||||||
|
TYPE_2_SERDES_TYPE,
|
||||||
|
HLS1_SERDES_TYPE,
|
||||||
|
HLS1H_SERDES_TYPE,
|
||||||
|
UNKNOWN_SERDES_TYPE,
|
||||||
|
MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE
|
||||||
|
};
|
||||||
|
|
||||||
struct cpucp_nic_info {
|
struct cpucp_nic_info {
|
||||||
struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS];
|
struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS];
|
||||||
__le64 link_mask[CPUCP_NIC_MASK_ARR_LEN];
|
__le64 link_mask[CPUCP_NIC_MASK_ARR_LEN];
|
||||||
@@ -708,6 +777,40 @@ struct cpucp_nic_info {
|
|||||||
__le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN];
|
__le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN];
|
||||||
__u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
|
__u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
|
||||||
__le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
|
__le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
|
||||||
|
__le16 serdes_type; /* enum cpucp_serdes_type */
|
||||||
|
__u8 reserved[6];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* struct cpucp_nic_status - describes the status of a NIC port.
|
||||||
|
* @port: NIC port index.
|
||||||
|
* @bad_format_cnt: e.g. CRC.
|
||||||
|
* @responder_out_of_sequence_psn_cnt: e.g NAK.
|
||||||
|
* @high_ber_reinit_cnt: link reinit due to high BER.
|
||||||
|
* @correctable_err_cnt: e.g. bit-flip.
|
||||||
|
* @uncorrectable_err_cnt: e.g. MAC errors.
|
||||||
|
* @retraining_cnt: re-training counter.
|
||||||
|
* @up: is port up.
|
||||||
|
* @pcs_link: has PCS link.
|
||||||
|
* @phy_ready: is PHY ready.
|
||||||
|
* @auto_neg: is Autoneg enabled.
|
||||||
|
* @timeout_retransmission_cnt: timeout retransmission events
|
||||||
|
* @high_ber_cnt: high ber events
|
||||||
|
*/
|
||||||
|
struct cpucp_nic_status {
|
||||||
|
__le32 port;
|
||||||
|
__le32 bad_format_cnt;
|
||||||
|
__le32 responder_out_of_sequence_psn_cnt;
|
||||||
|
__le32 high_ber_reinit;
|
||||||
|
__le32 correctable_err_cnt;
|
||||||
|
__le32 uncorrectable_err_cnt;
|
||||||
|
__le32 retraining_cnt;
|
||||||
|
__u8 up;
|
||||||
|
__u8 pcs_link;
|
||||||
|
__u8 phy_ready;
|
||||||
|
__u8 auto_neg;
|
||||||
|
__le32 timeout_retransmission_cnt;
|
||||||
|
__le32 high_ber_cnt;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* CPUCP_IF_H */
|
#endif /* CPUCP_IF_H */
|
||||||
|
|||||||
@@ -78,6 +78,26 @@
|
|||||||
* CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support
|
* CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support
|
||||||
* should be contacted.
|
* should be contacted.
|
||||||
*
|
*
|
||||||
|
* CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD HALT ACK from ARC0 is not received
|
||||||
|
* within specified retries after issuing
|
||||||
|
* HALT request. ARC0 appears to be in bad
|
||||||
|
* reset.
|
||||||
|
*
|
||||||
|
* CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD HALT ACK from ARC1 is not received
|
||||||
|
* within specified retries after issuing
|
||||||
|
* HALT request. ARC1 appears to be in bad
|
||||||
|
* reset.
|
||||||
|
*
|
||||||
|
* CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD RUN ACK from ARC0 is not received
|
||||||
|
* within specified timeout after issuing
|
||||||
|
* RUN request. ARC0 appears to be in bad
|
||||||
|
* reset.
|
||||||
|
*
|
||||||
|
* CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD RUN ACK from ARC1 is not received
|
||||||
|
* within specified timeout after issuing
|
||||||
|
* RUN request. ARC1 appears to be in bad
|
||||||
|
* reset.
|
||||||
|
*
|
||||||
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
|
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
|
||||||
* This is a main indication that the
|
* This is a main indication that the
|
||||||
* running FW populates the error
|
* running FW populates the error
|
||||||
@@ -98,6 +118,10 @@
|
|||||||
#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11)
|
#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11)
|
||||||
#define CPU_BOOT_ERR0_PLL_FAIL (1 << 12)
|
#define CPU_BOOT_ERR0_PLL_FAIL (1 << 12)
|
||||||
#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13)
|
#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13)
|
||||||
|
#define CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD (1 << 14)
|
||||||
|
#define CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD (1 << 15)
|
||||||
|
#define CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD (1 << 16)
|
||||||
|
#define CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD (1 << 17)
|
||||||
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
|
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
|
||||||
#define CPU_BOOT_ERR1_ENABLED (1 << 31)
|
#define CPU_BOOT_ERR1_ENABLED (1 << 31)
|
||||||
|
|
||||||
@@ -186,6 +210,10 @@
|
|||||||
* configured and is ready for use.
|
* configured and is ready for use.
|
||||||
* Initialized in: ppboot
|
* Initialized in: ppboot
|
||||||
*
|
*
|
||||||
|
* CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and
|
||||||
|
* any access to them is done via the FW.
|
||||||
|
* Initialized in: linux
|
||||||
|
*
|
||||||
* CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled.
|
* CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled.
|
||||||
* FW sends to host a bitmap of supported
|
* FW sends to host a bitmap of supported
|
||||||
* PLLs.
|
* PLLs.
|
||||||
@@ -209,6 +237,21 @@
|
|||||||
* prevent IRQs overriding each other.
|
* prevent IRQs overriding each other.
|
||||||
* Initialized in: linux
|
* Initialized in: linux
|
||||||
*
|
*
|
||||||
|
* CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN
|
||||||
|
* NIC STAT and XPCS91 access is restricted
|
||||||
|
* and is done via FW only.
|
||||||
|
* Initialized in: linux
|
||||||
|
*
|
||||||
|
* CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN
|
||||||
|
* NIC STAT get all is supported.
|
||||||
|
* Initialized in: linux
|
||||||
|
*
|
||||||
|
* CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN
|
||||||
|
* F/W checks if the device is idle by reading defined set
|
||||||
|
* of registers. It returns a bitmask of all the engines,
|
||||||
|
* where a bit is set if the engine is not idle.
|
||||||
|
* Initialized in: linux
|
||||||
|
*
|
||||||
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
|
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
|
||||||
* This is a main indication that the
|
* This is a main indication that the
|
||||||
* running FW populates the device status
|
* running FW populates the device status
|
||||||
@@ -236,10 +279,14 @@
|
|||||||
#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15)
|
#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15)
|
||||||
#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16)
|
#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16)
|
||||||
#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17)
|
#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17)
|
||||||
|
#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << 18)
|
||||||
#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19)
|
#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19)
|
||||||
#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << 20)
|
#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << 20)
|
||||||
#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << 21)
|
#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << 21)
|
||||||
#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << 22)
|
#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << 22)
|
||||||
|
#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << 23)
|
||||||
|
#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << 24)
|
||||||
|
#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << 25)
|
||||||
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
|
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
|
||||||
#define CPU_BOOT_DEV_STS1_ENABLED (1 << 31)
|
#define CPU_BOOT_DEV_STS1_ENABLED (1 << 31)
|
||||||
|
|
||||||
@@ -313,10 +360,7 @@ struct cpu_dyn_regs {
|
|||||||
__le32 hw_state;
|
__le32 hw_state;
|
||||||
__le32 kmd_msg_to_cpu;
|
__le32 kmd_msg_to_cpu;
|
||||||
__le32 cpu_cmd_status_to_host;
|
__le32 cpu_cmd_status_to_host;
|
||||||
union {
|
__le32 gic_host_pi_upd_irq;
|
||||||
__le32 gic_host_irq_ctrl;
|
|
||||||
__le32 gic_host_pi_upd_irq;
|
|
||||||
};
|
|
||||||
__le32 gic_tpc_qm_irq_ctrl;
|
__le32 gic_tpc_qm_irq_ctrl;
|
||||||
__le32 gic_mme_qm_irq_ctrl;
|
__le32 gic_mme_qm_irq_ctrl;
|
||||||
__le32 gic_dma_qm_irq_ctrl;
|
__le32 gic_dma_qm_irq_ctrl;
|
||||||
@@ -324,7 +368,9 @@ struct cpu_dyn_regs {
|
|||||||
__le32 gic_dma_core_irq_ctrl;
|
__le32 gic_dma_core_irq_ctrl;
|
||||||
__le32 gic_host_halt_irq;
|
__le32 gic_host_halt_irq;
|
||||||
__le32 gic_host_ints_irq;
|
__le32 gic_host_ints_irq;
|
||||||
__le32 reserved1[24]; /* reserve for future use */
|
__le32 gic_host_soft_rst_irq;
|
||||||
|
__le32 gic_rot_qm_irq_ctrl;
|
||||||
|
__le32 reserved1[22]; /* reserve for future use */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* TODO: remove the desc magic after the code is updated to use message */
|
/* TODO: remove the desc magic after the code is updated to use message */
|
||||||
@@ -462,6 +508,11 @@ struct lkd_fw_comms_msg {
|
|||||||
* Do not wait for BMC response.
|
* Do not wait for BMC response.
|
||||||
*
|
*
|
||||||
* COMMS_LOW_PLL_OPP Initialize PLLs for low OPP.
|
* COMMS_LOW_PLL_OPP Initialize PLLs for low OPP.
|
||||||
|
*
|
||||||
|
* COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory
|
||||||
|
* space is allocated in a ELBI access only
|
||||||
|
* address range.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
enum comms_cmd {
|
enum comms_cmd {
|
||||||
COMMS_NOOP = 0,
|
COMMS_NOOP = 0,
|
||||||
@@ -474,6 +525,7 @@ enum comms_cmd {
|
|||||||
COMMS_GOTO_WFE = 7,
|
COMMS_GOTO_WFE = 7,
|
||||||
COMMS_SKIP_BMC = 8,
|
COMMS_SKIP_BMC = 8,
|
||||||
COMMS_LOW_PLL_OPP = 9,
|
COMMS_LOW_PLL_OPP = 9,
|
||||||
|
COMMS_PREP_DESC_ELBI = 10,
|
||||||
COMMS_INVLD_LAST
|
COMMS_INVLD_LAST
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -126,6 +126,9 @@
|
|||||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 0x4F2004
|
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 0x4F2004
|
||||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 0x4F3FFC
|
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 0x4F3FFC
|
||||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 0x4F4000
|
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 0x4F4000
|
||||||
|
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 0x4F4800
|
||||||
|
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0 0x4F5000
|
||||||
|
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0 0x4F5800
|
||||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 0x4F6000
|
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 0x4F6000
|
||||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 0x4F67FC
|
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 0x4F67FC
|
||||||
|
|
||||||
|
|||||||
@@ -449,4 +449,21 @@ enum axi_id {
|
|||||||
#define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1
|
#define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1
|
||||||
#define PCIE_AUX_FLR_CTRL_INT_MASK_MASK 0x2
|
#define PCIE_AUX_FLR_CTRL_INT_MASK_MASK 0x2
|
||||||
|
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_SHIFT 0
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK 0x1
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_SHIFT 1
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK 0x1FE
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_SHIFT 0
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK 0xFF
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_SHIFT 8
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK 0xFF00
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_SHIFT 16
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_MASK 0x10000
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_SHIFT 17
|
||||||
|
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK 0xFFFE0000
|
||||||
|
#define TPC0_QM_CP_STS_0_FENCE_ID_SHIFT 20
|
||||||
|
#define TPC0_QM_CP_STS_0_FENCE_ID_MASK 0x300000
|
||||||
|
#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_SHIFT 22
|
||||||
|
#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK 0x400000
|
||||||
|
|
||||||
#endif /* GAUDI_MASKS_H_ */
|
#endif /* GAUDI_MASKS_H_ */
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
* PSOC scratch-pad registers
|
* PSOC scratch-pad registers
|
||||||
*/
|
*/
|
||||||
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
|
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
|
||||||
/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */
|
|
||||||
#define mmGIC_HOST_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
|
|
||||||
#define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
|
#define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
|
||||||
#define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
|
#define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
|
||||||
#define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
|
#define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
|
||||||
|
|||||||
@@ -276,7 +276,17 @@ enum hl_device_status {
|
|||||||
HL_DEVICE_STATUS_OPERATIONAL,
|
HL_DEVICE_STATUS_OPERATIONAL,
|
||||||
HL_DEVICE_STATUS_IN_RESET,
|
HL_DEVICE_STATUS_IN_RESET,
|
||||||
HL_DEVICE_STATUS_MALFUNCTION,
|
HL_DEVICE_STATUS_MALFUNCTION,
|
||||||
HL_DEVICE_STATUS_NEEDS_RESET
|
HL_DEVICE_STATUS_NEEDS_RESET,
|
||||||
|
HL_DEVICE_STATUS_IN_DEVICE_CREATION,
|
||||||
|
HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
|
||||||
|
};
|
||||||
|
|
||||||
|
enum hl_server_type {
|
||||||
|
HL_SERVER_TYPE_UNKNOWN = 0,
|
||||||
|
HL_SERVER_GAUDI_HLS1 = 1,
|
||||||
|
HL_SERVER_GAUDI_HLS1H = 2,
|
||||||
|
HL_SERVER_GAUDI_TYPE1 = 3,
|
||||||
|
HL_SERVER_GAUDI_TYPE2 = 4
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Opcode for management ioctl
|
/* Opcode for management ioctl
|
||||||
@@ -337,17 +347,49 @@ enum hl_device_status {
|
|||||||
#define HL_INFO_VERSION_MAX_LEN 128
|
#define HL_INFO_VERSION_MAX_LEN 128
|
||||||
#define HL_INFO_CARD_NAME_MAX_LEN 16
|
#define HL_INFO_CARD_NAME_MAX_LEN 16
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
|
||||||
|
* @sram_base_address: The first SRAM physical base address that is free to be
|
||||||
|
* used by the user.
|
||||||
|
* @dram_base_address: The first DRAM virtual or physical base address that is
|
||||||
|
* free to be used by the user.
|
||||||
|
* @dram_size: The DRAM size that is available to the user.
|
||||||
|
* @sram_size: The SRAM size that is available to the user.
|
||||||
|
* @num_of_events: The number of events that can be received from the f/w. This
|
||||||
|
* is needed so the user can what is the size of the h/w events
|
||||||
|
* array he needs to pass to the kernel when he wants to fetch
|
||||||
|
* the event counters.
|
||||||
|
* @device_id: PCI device ID of the ASIC.
|
||||||
|
* @module_id: Module ID of the ASIC for mezzanine cards in servers
|
||||||
|
* (From OCP spec).
|
||||||
|
* @first_available_interrupt_id: The first available interrupt ID for the user
|
||||||
|
* to be used when it works with user interrupts.
|
||||||
|
* @server_type: Server type that the Gaudi ASIC is currently installed in.
|
||||||
|
* The value is according to enum hl_server_type
|
||||||
|
* @cpld_version: CPLD version on the board.
|
||||||
|
* @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
|
||||||
|
* @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
|
||||||
|
* @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
|
||||||
|
* @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
|
||||||
|
* in some ASICs.
|
||||||
|
* @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
|
||||||
|
* for Goya/Gaudi only.
|
||||||
|
* @dram_enabled: Whether the DRAM is enabled.
|
||||||
|
* @cpucp_version: The CPUCP f/w version.
|
||||||
|
* @card_name: The card name as passed by the f/w.
|
||||||
|
* @dram_page_size: The DRAM physical page size.
|
||||||
|
*/
|
||||||
struct hl_info_hw_ip_info {
|
struct hl_info_hw_ip_info {
|
||||||
__u64 sram_base_address;
|
__u64 sram_base_address;
|
||||||
__u64 dram_base_address;
|
__u64 dram_base_address;
|
||||||
__u64 dram_size;
|
__u64 dram_size;
|
||||||
__u32 sram_size;
|
__u32 sram_size;
|
||||||
__u32 num_of_events;
|
__u32 num_of_events;
|
||||||
__u32 device_id; /* PCI Device ID */
|
__u32 device_id;
|
||||||
__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
|
__u32 module_id;
|
||||||
__u32 reserved;
|
__u32 reserved;
|
||||||
__u16 first_available_interrupt_id;
|
__u16 first_available_interrupt_id;
|
||||||
__u16 reserved2;
|
__u16 server_type;
|
||||||
__u32 cpld_version;
|
__u32 cpld_version;
|
||||||
__u32 psoc_pci_pll_nr;
|
__u32 psoc_pci_pll_nr;
|
||||||
__u32 psoc_pci_pll_nf;
|
__u32 psoc_pci_pll_nf;
|
||||||
@@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
|
|||||||
__u8 pad[2];
|
__u8 pad[2];
|
||||||
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
|
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
|
||||||
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
|
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
|
||||||
__u64 reserved3;
|
__u64 reserved2;
|
||||||
__u64 dram_page_size;
|
__u64 dram_page_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -628,12 +670,21 @@ struct hl_cs_chunk {
|
|||||||
__u64 cb_handle;
|
__u64 cb_handle;
|
||||||
|
|
||||||
/* Relevant only when HL_CS_FLAGS_WAIT or
|
/* Relevant only when HL_CS_FLAGS_WAIT or
|
||||||
* HL_CS_FLAGS_COLLECTIVE_WAIT is set.
|
* HL_CS_FLAGS_COLLECTIVE_WAIT is set
|
||||||
* This holds address of array of u64 values that contain
|
* This holds address of array of u64 values that contain
|
||||||
* signal CS sequence numbers. The wait described by this job
|
* signal CS sequence numbers. The wait described by
|
||||||
* will listen on all those signals (wait event per signal)
|
* this job will listen on all those signals
|
||||||
|
* (wait event per signal)
|
||||||
*/
|
*/
|
||||||
__u64 signal_seq_arr;
|
__u64 signal_seq_arr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Relevant only when HL_CS_FLAGS_WAIT or
|
||||||
|
* HL_CS_FLAGS_COLLECTIVE_WAIT is set
|
||||||
|
* along with HL_CS_FLAGS_ENCAP_SIGNALS.
|
||||||
|
* This is the CS sequence which has the encapsulated signals.
|
||||||
|
*/
|
||||||
|
__u64 encaps_signal_seq;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Index of queue to put the CB on */
|
/* Index of queue to put the CB on */
|
||||||
@@ -651,6 +702,17 @@ struct hl_cs_chunk {
|
|||||||
* Number of entries in signal_seq_arr
|
* Number of entries in signal_seq_arr
|
||||||
*/
|
*/
|
||||||
__u32 num_signal_seq_arr;
|
__u32 num_signal_seq_arr;
|
||||||
|
|
||||||
|
/* Relevant only when HL_CS_FLAGS_WAIT or
|
||||||
|
* HL_CS_FLAGS_COLLECTIVE_WAIT is set along
|
||||||
|
* with HL_CS_FLAGS_ENCAP_SIGNALS
|
||||||
|
* This set the signals range that the user want to wait for
|
||||||
|
* out of the whole reserved signals range.
|
||||||
|
* e.g if the signals range is 20, and user don't want
|
||||||
|
* to wait for signal 8, so he set this offset to 7, then
|
||||||
|
* he call the API again with 9 and so on till 20.
|
||||||
|
*/
|
||||||
|
__u32 encaps_signal_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* HL_CS_CHUNK_FLAGS_* */
|
/* HL_CS_CHUNK_FLAGS_* */
|
||||||
@@ -678,6 +740,28 @@ struct hl_cs_chunk {
|
|||||||
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
|
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
|
||||||
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
|
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The encapsulated signals CS is merged into the existing CS ioctls.
|
||||||
|
* In order to use this feature need to follow the below procedure:
|
||||||
|
* 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
|
||||||
|
* the output of this API will be the SOB offset from CFG_BASE.
|
||||||
|
* this address will be used to patch CB cmds to do the signaling for this
|
||||||
|
* SOB by incrementing it's value.
|
||||||
|
* for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
|
||||||
|
* CS type, note that this might fail if out-of-sync happened to the SOB
|
||||||
|
* value, in case other signaling request to the same SOB occurred between
|
||||||
|
* reserve-unreserve calls.
|
||||||
|
* 2. Use the staged CS to do the encapsulated signaling jobs.
|
||||||
|
* use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
|
||||||
|
* along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
|
||||||
|
* field. This offset allows app to wait on part of the reserved signals.
|
||||||
|
* 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
|
||||||
|
* to wait for the encapsulated signals.
|
||||||
|
*/
|
||||||
|
#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800
|
||||||
|
#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000
|
||||||
|
#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000
|
||||||
|
|
||||||
#define HL_CS_STATUS_SUCCESS 0
|
#define HL_CS_STATUS_SUCCESS 0
|
||||||
|
|
||||||
#define HL_MAX_JOBS_PER_CS 512
|
#define HL_MAX_JOBS_PER_CS 512
|
||||||
@@ -690,10 +774,35 @@ struct hl_cs_in {
|
|||||||
/* holds address of array of hl_cs_chunk for execution phase */
|
/* holds address of array of hl_cs_chunk for execution phase */
|
||||||
__u64 chunks_execute;
|
__u64 chunks_execute;
|
||||||
|
|
||||||
/* Sequence number of a staged submission CS
|
union {
|
||||||
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
|
/*
|
||||||
*/
|
* Sequence number of a staged submission CS
|
||||||
__u64 seq;
|
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
|
||||||
|
* HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
|
||||||
|
*/
|
||||||
|
__u64 seq;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Encapsulated signals handle id
|
||||||
|
* Valid for two flows:
|
||||||
|
* 1. CS with encapsulated signals:
|
||||||
|
* when HL_CS_FLAGS_STAGED_SUBMISSION and
|
||||||
|
* HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
|
||||||
|
* and HL_CS_FLAGS_ENCAP_SIGNALS are set.
|
||||||
|
* 2. unreserve signals:
|
||||||
|
* valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
|
||||||
|
*/
|
||||||
|
__u32 encaps_sig_handle_id;
|
||||||
|
|
||||||
|
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
|
||||||
|
struct {
|
||||||
|
/* Encapsulated signals number */
|
||||||
|
__u32 encaps_signals_count;
|
||||||
|
|
||||||
|
/* Encapsulated signals queue index (stream) */
|
||||||
|
__u32 encaps_signals_q_idx;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/* Number of chunks in restore phase array. Maximum number is
|
/* Number of chunks in restore phase array. Maximum number is
|
||||||
* HL_MAX_JOBS_PER_CS
|
* HL_MAX_JOBS_PER_CS
|
||||||
@@ -718,14 +827,31 @@ struct hl_cs_in {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct hl_cs_out {
|
struct hl_cs_out {
|
||||||
/*
|
union {
|
||||||
* seq holds the sequence number of the CS to pass to wait ioctl. All
|
/*
|
||||||
* values are valid except for 0 and ULLONG_MAX
|
* seq holds the sequence number of the CS to pass to wait
|
||||||
*/
|
* ioctl. All values are valid except for 0 and ULLONG_MAX
|
||||||
__u64 seq;
|
*/
|
||||||
/* HL_CS_STATUS_* */
|
__u64 seq;
|
||||||
|
|
||||||
|
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
|
||||||
|
struct {
|
||||||
|
/* This is the resereved signal handle id */
|
||||||
|
__u32 handle_id;
|
||||||
|
|
||||||
|
/* This is the signals count */
|
||||||
|
__u32 count;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/* HL_CS_STATUS */
|
||||||
__u32 status;
|
__u32 status;
|
||||||
__u32 pad;
|
|
||||||
|
/*
|
||||||
|
* SOB base address offset
|
||||||
|
* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
|
||||||
|
*/
|
||||||
|
__u32 sob_base_addr_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
union hl_cs_args {
|
union hl_cs_args {
|
||||||
@@ -735,11 +861,18 @@ union hl_cs_args {
|
|||||||
|
|
||||||
#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
|
#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
|
||||||
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
|
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
|
||||||
|
#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
|
||||||
|
|
||||||
|
#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
|
||||||
|
|
||||||
struct hl_wait_cs_in {
|
struct hl_wait_cs_in {
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
/* Command submission sequence number */
|
/*
|
||||||
|
* In case of wait_cs holds the CS sequence number.
|
||||||
|
* In case of wait for multi CS hold a user pointer to
|
||||||
|
* an array of CS sequence numbers
|
||||||
|
*/
|
||||||
__u64 seq;
|
__u64 seq;
|
||||||
/* Absolute timeout to wait for command submission
|
/* Absolute timeout to wait for command submission
|
||||||
* in microseconds
|
* in microseconds
|
||||||
@@ -767,12 +900,17 @@ struct hl_wait_cs_in {
|
|||||||
|
|
||||||
/* Context ID - Currently not in use */
|
/* Context ID - Currently not in use */
|
||||||
__u32 ctx_id;
|
__u32 ctx_id;
|
||||||
|
|
||||||
/* HL_WAIT_CS_FLAGS_*
|
/* HL_WAIT_CS_FLAGS_*
|
||||||
* If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
|
* If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
|
||||||
* interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
|
* interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
|
||||||
* not to specify an interrupt id ,set mask to all 1s.
|
* not to specify an interrupt id ,set mask to all 1s.
|
||||||
*/
|
*/
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
|
|
||||||
|
/* Multi CS API info- valid entries in multi-CS array */
|
||||||
|
__u8 seq_arr_len;
|
||||||
|
__u8 pad[7];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define HL_WAIT_CS_STATUS_COMPLETED 0
|
#define HL_WAIT_CS_STATUS_COMPLETED 0
|
||||||
@@ -789,8 +927,15 @@ struct hl_wait_cs_out {
|
|||||||
__u32 status;
|
__u32 status;
|
||||||
/* HL_WAIT_CS_STATUS_FLAG* */
|
/* HL_WAIT_CS_STATUS_FLAG* */
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
/* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
|
/*
|
||||||
|
* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
|
||||||
|
* for wait_cs: timestamp of CS completion
|
||||||
|
* for wait_multi_cs: timestamp of FIRST CS completion
|
||||||
|
*/
|
||||||
__s64 timestamp_nsec;
|
__s64 timestamp_nsec;
|
||||||
|
/* multi CS completion bitmap */
|
||||||
|
__u32 cs_completion_map;
|
||||||
|
__u32 pad;
|
||||||
};
|
};
|
||||||
|
|
||||||
union hl_wait_cs_args {
|
union hl_wait_cs_args {
|
||||||
@@ -813,6 +958,7 @@ union hl_wait_cs_args {
|
|||||||
#define HL_MEM_CONTIGUOUS 0x1
|
#define HL_MEM_CONTIGUOUS 0x1
|
||||||
#define HL_MEM_SHARED 0x2
|
#define HL_MEM_SHARED 0x2
|
||||||
#define HL_MEM_USERPTR 0x4
|
#define HL_MEM_USERPTR 0x4
|
||||||
|
#define HL_MEM_FORCE_HINT 0x8
|
||||||
|
|
||||||
struct hl_mem_in {
|
struct hl_mem_in {
|
||||||
union {
|
union {
|
||||||
|
|||||||
Reference in New Issue
Block a user