From 7f77c28b95345faf1e79bd273f9e493d9da3394c Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Thu, 21 May 2026 10:08:19 -0700 Subject: [PATCH 1/6] lib: linux: preserve device-open errors The Linux bus open path may try more than one backend driver for a device. When a backend finds the device but fails while opening it, the common open loop currently discards that errno and returns -ENODEV after all drivers have been tried. Keep the first useful backend open error, preferring non-ENODEV failures over a plain miss. This preserves the existing not-found result while letting callers see real failures such as UIO map population errors. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index fcd423e06..3c9c1b59e 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -428,7 +428,7 @@ static int metal_linux_dev_open(struct metal_bus *bus, struct linux_bus *lbus = to_linux_bus(bus); struct linux_device *ldev = NULL; struct linux_driver *ldrv; - int error; + int error = -ENODEV; ldev = malloc(sizeof(*ldev)); if (!ldev) @@ -450,8 +450,11 @@ static int metal_linux_dev_open(struct metal_bus *bus, /* Try and open the device. */ error = ldrv->dev_open(lbus, ldev); if (error) { - ldrv->dev_close(lbus, ldev); - continue; + /* + * Return the driver's errno while still giving it a + * chance to release any state allocated before failing. + */ + goto close_dev; } *device = &ldev->device; @@ -461,9 +464,15 @@ static int metal_linux_dev_open(struct metal_bus *bus, return 0; } + goto out; + +close_dev: + if (ldrv->dev_close) + ldrv->dev_close(lbus, ldev); +out: free(ldev); - return -ENODEV; + return error; } static void metal_linux_dev_close(struct metal_bus *bus, @@ -668,4 +677,3 @@ int metal_linux_get_device_property(struct metal_device *device, status = close(fd); return status < 0 ? -errno : 0; } - From 1ca5a9f95944e2df2ce4fae5b48bd8f2c835c6cb Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 20 May 2026 14:06:34 -0700 Subject: [PATCH 2/6] lib: linux: fix UIO mmap offset handling UIO map offsets identify the usable resource start inside the page-aligned mapping exposed by sysfs. The Linux backend previously exposed and unmapped the adjusted virtual address directly. Keep the raw mmap base and length for close, expose the usable virtual address as raw mapping plus offset, and derive the libmetal physical base and size from the usable portion of the UIO map. Use the sysfs map size as the mmap length. For an unaligned resource, UIO already reports a page-aligned address and a full mmap length, so adding the offset to that length can over-map the resource and fail. Reject offsets outside the system page size, reject offsets beyond the map size, and report overflow before attempting to mmap the region. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 156 ++++++++++++++++++++++++++++++++++---- 1 file changed, 142 insertions(+), 14 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index 3c9c1b59e..eb8ed8c9d 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -59,12 +59,29 @@ struct linux_device { char dev_path[PATH_MAX]; char cls_path[PATH_MAX]; metal_phys_addr_t region_phys[METAL_MAX_DEVICE_REGIONS]; + void *region_map_raw[METAL_MAX_DEVICE_REGIONS]; + size_t region_map_len[METAL_MAX_DEVICE_REGIONS]; struct linux_driver *ldrv; struct sysfs_device *sdev; struct sysfs_attribute *override; int fd; }; +/* + * UIO sysfs reports a full mmap() extent plus a separate offset to the + * usable resource. Keep those inputs together while converting them into the + * libmetal physical address, mmap length, and exported region size. + */ +struct metal_uio_map_info { + const char *dev_name; + metal_phys_addr_t map_addr; + unsigned long map_size; + unsigned long offset; + metal_phys_addr_t *phys; + size_t *map_len; + size_t *region_size; +}; + static struct linux_bus *to_linux_bus(struct metal_bus *bus) { return metal_container_of(bus, struct linux_bus, bus); @@ -100,6 +117,81 @@ static int metal_uio_read_map_attr(struct linux_device *ldev, return 0; } +/* + * Validate the sysfs map offset before it is applied to the mmap() base. + * The Linux UIO ABI exposes one mmap slot per page-sized index, so the + * per-map offset must stay inside a single host page. + */ +static int metal_linux_uio_validate_offset(const char *dev_name, + unsigned long offset) +{ + const unsigned long page_size = (unsigned long)getpagesize(); + + /* + * The offset is applied inside one page returned by mmap(). Larger + * offsets cannot be represented by adjusting the returned mapping. + */ + if (offset >= page_size) { + metal_log(METAL_LOG_ERROR, + "device %s has invalid UIO offset 0x%lx (page size 0x%lx)\n", + dev_name ? dev_name : "", offset, page_size); + return -EINVAL; + } + + return 0; +} + +/* + * Translate UIO sysfs map attributes into the values libmetal needs: + * the mmap() length for cleanup, the usable physical start address, and + * the usable I/O region size after skipping the map offset. + */ +static int metal_linux_uio_map_info(struct metal_uio_map_info *info) +{ + int result; + + if (!info || !info->phys || !info->map_len || !info->region_size) + return -EINVAL; + + result = metal_linux_uio_validate_offset(info->dev_name, info->offset); + if (result) + return result; + + if (!info->map_size || info->offset >= info->map_size) { + metal_log(METAL_LOG_ERROR, + "device %s has invalid UIO size 0x%lx for offset 0x%lx\n", + info->dev_name ? info->dev_name : "", + info->map_size, info->offset); + return -EINVAL; + } + + if ((unsigned long)(size_t)info->map_size != info->map_size) { + metal_log(METAL_LOG_ERROR, + "device %s UIO size 0x%lx overflows size_t\n", + info->dev_name ? info->dev_name : "", + info->map_size); + return -EOVERFLOW; + } + + if (info->map_addr > (metal_phys_addr_t)-1 - info->offset) { + metal_log(METAL_LOG_ERROR, + "device %s UIO physical address overflow (addr=0x%lx offset=0x%lx)\n", + info->dev_name ? info->dev_name : "", + (unsigned long)info->map_addr, info->offset); + return -EOVERFLOW; + } + + /* + * mmap() uses the full page-aligned map. libmetal clients see only the + * usable resource that starts at offset bytes into that mapping. + */ + *info->phys = info->map_addr + info->offset; + *info->map_len = (size_t)info->map_size; + *info->region_size = (size_t)(info->map_size - info->offset); + + return 0; +} + static int metal_uio_dev_bind(struct linux_device *ldev, struct linux_driver *ldrv) { @@ -155,11 +247,14 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) { char *instance, path[SYSFS_PATH_MAX]; struct linux_driver *ldrv = ldev->ldrv; - unsigned long *phys, offset = 0, size = 0; + unsigned long offset = 0, size = 0; + metal_phys_addr_t addr = 0, *phys; struct metal_io_region *io; + struct metal_uio_map_info map_info; + size_t map_len, region_size; struct dlist *dlist; int result, i; - void *virt; + void *raw, *virt; int irq_info; @@ -231,21 +326,52 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) metal_log(METAL_LOG_DEBUG, "opened %s:%s as %s\n", lbus->bus_name, ldev->dev_name, ldev->dev_path); - for (i = 0, result = 0; !result && i < METAL_MAX_DEVICE_REGIONS; i++) { + for (i = 0; i < METAL_MAX_DEVICE_REGIONS; i++) { phys = &ldev->region_phys[ldev->device.num_regions]; + result = metal_uio_read_map_attr(ldev, i, "offset", &offset); + if (result) + break; result = (result ? result : - metal_uio_read_map_attr(ldev, i, "offset", &offset)); - result = (result ? result : - metal_uio_read_map_attr(ldev, i, "addr", phys)); + metal_uio_read_map_attr(ldev, i, "addr", &addr)); result = (result ? result : metal_uio_read_map_attr(ldev, i, "size", &size)); - result = (result ? result : - metal_map(ldev->fd, i * getpagesize(), size, 0, 0, &virt)); - if (!result) { - io = &ldev->device.regions[ldev->device.num_regions]; - metal_io_init(io, virt, phys, size, -1, 0, NULL); - ldev->device.num_regions++; + if (result) + return result; + /* + * UIO sysfs reports addr/size/offset separately. Convert them + * before mmap() so the raw mapping and exposed region stay in + * sync for both normal access and close-time unmap. + */ + map_info.dev_name = ldev->dev_name; + map_info.map_addr = addr; + map_info.map_size = size; + map_info.offset = offset; + map_info.phys = phys; + map_info.map_len = &map_len; + map_info.region_size = ®ion_size; + result = metal_linux_uio_map_info(&map_info); + if (result) + return result; + result = metal_map(ldev->fd, i * getpagesize(), map_len, 0, 0, + &raw); + if (result) { + metal_log(METAL_LOG_ERROR, + "failed to mmap device %s map%u (len=0x%zx offset=0x%lx): %s\n", + ldev->dev_name, i, map_len, + (unsigned long)i * (unsigned long)getpagesize(), + strerror(-result)); + return result; } + virt = (void *)((char *)raw + offset); + /* + * Keep the raw mapping for munmap(); expose the adjusted + * address as the usable libmetal I/O region. + */ + io = &ldev->device.regions[ldev->device.num_regions]; + metal_io_init(io, virt, phys, region_size, -1, 0, NULL); + ldev->region_map_raw[ldev->device.num_regions] = raw; + ldev->region_map_len[ldev->device.num_regions] = map_len; + ldev->device.num_regions++; } irq_info = 1; @@ -271,8 +397,10 @@ static void metal_uio_dev_close(struct linux_bus *lbus, unsigned int i; for (i = 0; i < ldev->device.num_regions; i++) { - metal_unmap(ldev->device.regions[i].virt, - ldev->device.regions[i].size); + metal_unmap(ldev->region_map_raw[i], + ldev->region_map_len[i]); + ldev->region_map_raw[i] = NULL; + ldev->region_map_len[i] = 0; } if (ldev->override) { sysfs_write_attribute(ldev->override, "", 1); From dbb1a723239da840bac541e92116d06bcbba82fd Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 20 May 2026 14:07:17 -0700 Subject: [PATCH 3/6] lib: linux: clear UIO IRQ bookkeeping on close A UIO-backed device registers its file descriptor with the Linux IRQ controller so interrupt handling can find the owning metal device. Closing the device must clear that association before closing the fd. Add an internal unregister helper that detaches the device pointer after the IRQ consumer has disabled the IRQ. Keep IRQ handler and enable-state teardown owned by the standard IRQ disable and unregister paths. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 7 +++++ lib/system/linux/irq.c | 61 ++++++++++++++++++++++++++++++++++++++- lib/system/linux/irq.h | 27 +++++++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index eb8ed8c9d..ebb01ae31 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -411,7 +411,14 @@ static void metal_uio_dev_close(struct linux_bus *lbus, ldev->sdev = NULL; } if (ldev->fd >= 0) { + /* + * Disable first so unregister only removes device bookkeeping; + * IRQ handler teardown remains in the generic IRQ path. + */ + metal_irq_disable(ldev->fd); + metal_linux_irq_unregister_dev(ldev->fd); close(ldev->fd); + ldev->fd = -1; } } diff --git a/lib/system/linux/irq.c b/lib/system/linux/irq.c index 5d84ee015..1042d8b69 100644 --- a/lib/system/linux/irq.c +++ b/lib/system/linux/irq.c @@ -266,10 +266,69 @@ void metal_linux_irq_shutdown(void) void metal_linux_irq_register_dev(struct metal_device *dev, int irq) { - if (irq > MAX_IRQS) { + if (irq < 0 || irq >= MAX_IRQS) { metal_log(METAL_LOG_ERROR, "Failed to register device to irq %d\n", irq); return; } irqs_devs[irq] = dev; } + +/* + * Drop the device pointer associated with a Linux IRQ fd during device close. + * The caller must disable the IRQ first so the dispatch path cannot observe + * an enabled IRQ whose owning device has already been detached. + */ +int metal_linux_irq_unregister_dev(int irq) +{ + int offset; + + if (irq < linux_irq_cntr.irq_base || + irq >= linux_irq_cntr.irq_base + linux_irq_cntr.irq_num) { + metal_log(METAL_LOG_ERROR, + "Failed to unregister device from irq %d\n", irq); + return -EINVAL; + } + + offset = irq - linux_irq_cntr.irq_base; + metal_mutex_acquire(&irq_lock); + /* + * Unregister only detaches the device association. The IRQ handler and + * enabled state remain owned by metal_irq_disable()/unregister(). + */ + if (metal_bitmap_is_bit_set(irqs_enabled, offset)) { + metal_mutex_release(&irq_lock); + return -EINVAL; + } + irqs_devs[irq] = NULL; + metal_mutex_release(&irq_lock); + + return 0; +} + +/* + * Return the device pointer used by the Linux IRQ dispatch path. Tests use + * this to verify close-time bookkeeping without poking at static arrays. + */ +struct metal_device *metal_linux_irq_get_dev(int irq) +{ + if (irq < linux_irq_cntr.irq_base || + irq >= linux_irq_cntr.irq_base + linux_irq_cntr.irq_num) + return NULL; + + return irqs_devs[irq]; +} + +/* + * Report the Linux IRQ enable bit for callers that need to enforce teardown + * ordering before unregistering a device association. + */ +int metal_linux_irq_is_enabled(int irq) +{ + if (irq < linux_irq_cntr.irq_base || + irq >= linux_irq_cntr.irq_base + linux_irq_cntr.irq_num) + return 0; + + return metal_bitmap_is_bit_set(irqs_enabled, + irq - linux_irq_cntr.irq_base); +} diff --git a/lib/system/linux/irq.h b/lib/system/linux/irq.h index ff02b7e48..b6dedb719 100644 --- a/lib/system/linux/irq.h +++ b/lib/system/linux/irq.h @@ -29,6 +29,33 @@ */ void metal_linux_irq_register_dev(struct metal_device *dev, int irq); +/** + * @brief Unregister the metal device associated with a Linux IRQ. + * + * Metal Linux internal function to clear device bookkeeping for an IRQ. The + * IRQ consumer must disable the IRQ before unregistering the device. + * + * @param[in] irq interrupt id + * @return 0 on success, or -errno on error. + */ +int metal_linux_irq_unregister_dev(int irq); + +/** + * @brief Get the metal device associated with a Linux IRQ. + * + * @param[in] irq interrupt id + * @return Registered metal device, or NULL if none is registered. + */ +struct metal_device *metal_linux_irq_get_dev(int irq); + +/** + * @brief Check whether a Linux IRQ is enabled. + * + * @param[in] irq interrupt id + * @return 1 if the IRQ is enabled, or 0 otherwise. + */ +int metal_linux_irq_is_enabled(int irq); + #endif /* METAL_INTERNAL */ #define __METAL_LINUX_IRQ__H__ From 1e6b5c66ffdcd2a034127d34a6745fbb5b758761 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Mon, 8 Jun 2026 09:47:50 -0700 Subject: [PATCH 4/6] lib: linux: factor common UIO populate path Split the UIO open flow into two stages. The parent-bus path still opens the platform or PCI sysfs device, binds it to the selected UIO driver, finds the child UIO class device, and records the resolved class and /dev paths. Move the common stage into metal_uio_populate(). That helper waits for the /dev/uioX node, opens it, reads each UIO map, maps the full mmap extent, exposes the usable region after the sysfs offset, and registers IRQ bookkeeping when the UIO fd supports interrupts. Keep close-time cleanup unchanged by storing the raw mmap address and length alongside the adjusted libmetal I/O region. On populate failure, unmap any regions mapped so far and close the UIO fd locally before the generic open path releases parent sysfs and driver override state. Also make local error paths close the temporary UIO child list before returning. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 229 ++++++++++++++++++++++++++++---------- 1 file changed, 172 insertions(+), 57 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index ebb01ae31..d0d3b4c36 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -57,7 +57,13 @@ struct linux_device { struct metal_device device; char dev_name[PATH_MAX]; char dev_path[PATH_MAX]; + /* + * UIO sysfs class directory, such as /sys/class/uio/uio0. UIO map + * attributes are read relative to this path. + */ char cls_path[PATH_MAX]; + char uio_name[PATH_MAX]; + char uio_dev_name[PATH_MAX]; metal_phys_addr_t region_phys[METAL_MAX_DEVICE_REGIONS]; void *region_map_raw[METAL_MAX_DEVICE_REGIONS]; size_t region_map_len[METAL_MAX_DEVICE_REGIONS]; @@ -117,6 +123,32 @@ static int metal_uio_read_map_attr(struct linux_device *ldev, return 0; } +/* + * Read string-valued UIO sysfs attributes such as /sys/class/uio/uioX/name. + * The value is explicitly terminated so callers can compare it as a C string. + */ +static int metal_uio_read_str_attr(const char *path, char *value, size_t len) +{ + struct sysfs_attribute *attr; + int result = 0; + + if (!value || !len) + return -EINVAL; + + attr = sysfs_open_attribute(path); + if (!attr || sysfs_read_attribute(attr) != 0) { + result = -errno; + goto close_attr; + } + + strncpy(value, attr->value, len - 1); + value[len - 1] = '\0'; + +close_attr: + sysfs_close_attribute(attr); + return result; +} + /* * Validate the sysfs map offset before it is applied to the mmap() base. * The Linux UIO ABI exposes one mmap slot per page-sized index, so the @@ -243,67 +275,23 @@ static int metal_uio_dev_bind(struct linux_device *ldev, return 0; } -static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) +/* + * Populate the common UIO device state after either open path has resolved + * cls_path and dev_path. Both parent-bus opens and class-name opens share the + * same mmap, IRQ registration, DMA, and close-time cleanup rules. + */ +static int metal_uio_populate(struct linux_bus *lbus, struct linux_device *ldev) { - char *instance, path[SYSFS_PATH_MAX]; - struct linux_driver *ldrv = ldev->ldrv; unsigned long offset = 0, size = 0; metal_phys_addr_t addr = 0, *phys; struct metal_io_region *io; struct metal_uio_map_info map_info; size_t map_len, region_size; - struct dlist *dlist; - int result, i; + int result, i = 0; + unsigned int j; void *raw, *virt; int irq_info; - - ldev->fd = -1; - ldev->device.irq_info = (void *)-1; - - ldev->sdev = sysfs_open_device(lbus->bus_name, ldev->dev_name); - if (!ldev->sdev) { - metal_log(METAL_LOG_ERROR, "device %s:%s not found\n", - lbus->bus_name, ldev->dev_name); - return -ENODEV; - } - metal_log(METAL_LOG_DEBUG, "opened sysfs device %s:%s\n", - lbus->bus_name, ldev->dev_name); - - result = metal_uio_dev_bind(ldev, ldrv); - if (result) - return result; - - result = snprintf(path, sizeof(path), "%s/uio", ldev->sdev->path); - if (result >= (int)sizeof(path)) - return -EOVERFLOW; - dlist = sysfs_open_directory_list(path); - if (!dlist) { - metal_log(METAL_LOG_ERROR, "failed to scan class path %s\n", - path); - return -errno; - } - - dlist_for_each_data(dlist, instance, char) { - result = snprintf(ldev->cls_path, sizeof(ldev->cls_path), - "%s/%s", path, instance); - if (result >= (int)sizeof(ldev->cls_path)) - return -EOVERFLOW; - result = snprintf(ldev->dev_path, sizeof(ldev->dev_path), - "/dev/%s", instance); - if (result >= (int)sizeof(ldev->dev_path)) - return -EOVERFLOW; - break; - } - sysfs_close_list(dlist); - - if (sysfs_path_is_dir(ldev->cls_path) != 0) { - metal_log(METAL_LOG_ERROR, "invalid device class path %s\n", - ldev->cls_path); - return -ENODEV; - } - - i = 0; do { if (!access(ldev->dev_path, F_OK)) break; @@ -317,7 +305,7 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) } result = metal_open(ldev->dev_path, 0); if (result < 0) { - metal_log(METAL_LOG_ERROR, "failed to open device %s\n", + metal_log(METAL_LOG_ERROR, "failed to open device %s: %s\n", ldev->dev_path, strerror(-result)); return result; } @@ -329,14 +317,20 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) for (i = 0; i < METAL_MAX_DEVICE_REGIONS; i++) { phys = &ldev->region_phys[ldev->device.num_regions]; result = metal_uio_read_map_attr(ldev, i, "offset", &offset); - if (result) + /* + * A missing offset for the next map marks the end of the UIO + * map list. Other read errors are real open failures. + */ + if (result == -ENOENT) break; + if (result) + goto fail; result = (result ? result : metal_uio_read_map_attr(ldev, i, "addr", &addr)); result = (result ? result : metal_uio_read_map_attr(ldev, i, "size", &size)); if (result) - return result; + goto fail; /* * UIO sysfs reports addr/size/offset separately. Convert them * before mmap() so the raw mapping and exposed region stay in @@ -351,7 +345,7 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) map_info.region_size = ®ion_size; result = metal_linux_uio_map_info(&map_info); if (result) - return result; + goto fail; result = metal_map(ldev->fd, i * getpagesize(), map_len, 0, 0, &raw); if (result) { @@ -360,7 +354,7 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) ldev->dev_name, i, map_len, (unsigned long)i * (unsigned long)getpagesize(), strerror(-result)); - return result; + goto fail; } virt = (void *)((char *)raw + offset); /* @@ -388,6 +382,127 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) } return 0; + +fail: + for (j = 0; j < ldev->device.num_regions; j++) { + metal_unmap(ldev->region_map_raw[j], + ldev->region_map_len[j]); + ldev->region_map_raw[j] = NULL; + ldev->region_map_len[j] = 0; + } + ldev->device.num_regions = 0; + ldev->device.irq_num = 0; + ldev->device.irq_info = (void *)-1; + if (ldev->fd >= 0) { + close(ldev->fd); + ldev->fd = -1; + } + + return result; +} + +/* + * Open a platform or PCI device that has an associated UIO child. This path + * binds the parent device to a UIO driver before using the common UIO populate + * logic. + */ +static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) +{ + char *instance, path[SYSFS_PATH_MAX]; + struct linux_driver *ldrv = ldev->ldrv; + struct dlist *dlist; + int result; + + ldev->fd = -1; + ldev->device.irq_info = (void *)-1; + + ldev->sdev = sysfs_open_device(lbus->bus_name, ldev->dev_name); + if (!ldev->sdev) { + metal_log(METAL_LOG_ERROR, "device %s:%s not found\n", + lbus->bus_name, ldev->dev_name); + return -ENODEV; + } + metal_log(METAL_LOG_DEBUG, "opened sysfs device %s:%s\n", + lbus->bus_name, ldev->dev_name); + /* + * Errors after this point return through metal_linux_dev_open(), which + * calls dev_close() to release parent sysfs and driver override state. + */ + + /* + * Parent-bus opens still need the requested platform or PCI device + * bound to the selected UIO driver before a /dev/uioX node can exist. + */ + result = metal_uio_dev_bind(ldev, ldrv); + if (result) + return result; + + /* + * A bound parent device exposes one UIO child below its sysfs device + * directory. Use that child name to derive both sysfs and /dev paths. + */ + result = snprintf(path, sizeof(path), "%s/uio", ldev->sdev->path); + if (result >= (int)sizeof(path)) + return -EOVERFLOW; + dlist = sysfs_open_directory_list(path); + if (!dlist) { + metal_log(METAL_LOG_ERROR, "failed to scan class path %s\n", + path); + return -errno; + } + + dlist_for_each_data(dlist, instance, char) { + /* + * The first UIO child is the device node this parent-bus open + * will use for mmap, IRQ, and DMA operations. + */ + result = snprintf(ldev->cls_path, sizeof(ldev->cls_path), + "%s/%s", path, instance); + if (result >= (int)sizeof(ldev->cls_path)) { + result = -EOVERFLOW; + goto close_list; + } + result = snprintf(ldev->dev_path, sizeof(ldev->dev_path), + "/dev/%s", instance); + if (result >= (int)sizeof(ldev->dev_path)) { + result = -EOVERFLOW; + goto close_list; + } + result = snprintf(path, sizeof(path), "%s/name", ldev->cls_path); + if (result >= (int)sizeof(path)) { + result = -EOVERFLOW; + goto close_list; + } + ldev->uio_name[0] = '\0'; + metal_uio_read_str_attr(path, ldev->uio_name, + sizeof(ldev->uio_name)); + result = snprintf(ldev->uio_dev_name, + sizeof(ldev->uio_dev_name), "%s", instance); + if (result < 0 || result >= (int)sizeof(ldev->uio_dev_name)) { + result = -EOVERFLOW; + goto close_list; + } + break; + } + result = 0; + +close_list: + sysfs_close_list(dlist); + if (result) + return result; + + /* Refuse to continue if the selected UIO class path disappeared. */ + if (sysfs_path_is_dir(ldev->cls_path) != 0) { + metal_log(METAL_LOG_ERROR, "invalid device class path %s\n", + ldev->cls_path); + return -ENODEV; + } + + /* + * Once cls_path and dev_path are resolved, the rest of the open flow is + * shared with the synthetic UIO class-name path. + */ + return metal_uio_populate(lbus, ldev); } static void metal_uio_dev_close(struct linux_bus *lbus, From e7a81a247af79e717e5cf5ee026caf4794c4cac3 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Mon, 8 Jun 2026 09:48:38 -0700 Subject: [PATCH 5/6] lib: linux: add UIO class-name lookup Add the resolver used by the synthetic uio bus. It scans every /sys/class/uio/uioX/name file, compares the first line against the requested libmetal device name, and rejects duplicate matches because they cannot be opened deterministically. When a unique match is found, fill the same linux_device fields that the parent-bus UIO path fills: cls_path points at the UIO sysfs class directory, dev_path points at /dev/uioX, and the UIO name and device node name are saved for diagnostics and future callers. The class-name open callback then reuses metal_uio_populate(), so UIO class opens and parent-bus UIO opens share mmap setup, IRQ registration, DMA handling, and close-time cleanup. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 154 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index d0d3b4c36..619408971 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -14,9 +14,13 @@ #include #include +#include +#include + #include "irq.h" #define MAX_DRIVERS 64 +#define METAL_UIO_CLASS_PATH "/sys/class/uio" struct linux_bus; struct linux_device; @@ -98,6 +102,39 @@ static struct linux_device *to_linux_device(struct metal_device *device) return metal_container_of(device, struct linux_device, device); } +/* + * Read sysfs files whose useful value is the first text line. UIO class-name + * matching needs the newline stripped before comparing against dev_name. + */ +static int metal_linux_read_first_line(const char *path, char *output, + size_t output_len) +{ + FILE *fp; + char *newline; + int result = 0; + + if (!path || !output || output_len < 2) + return -EINVAL; + + fp = fopen(path, "r"); + if (!fp) + return -errno; + + if (!fgets(output, output_len, fp)) { + result = ferror(fp) ? -errno : -ENODATA; + goto close_file; + } + + newline = strchr(output, '\n'); + if (newline) + *newline = '\0'; + +close_file: + fclose(fp); + + return result; +} + static int metal_uio_read_map_attr(struct linux_device *ldev, unsigned int index, const char *name, @@ -224,6 +261,101 @@ static int metal_linux_uio_map_info(struct metal_uio_map_info *info) return 0; } +/* + * Open by UIO class name by scanning /sys/class/uio/uioX/name for the + * requested libmetal device name. This is the synthetic "uio" bus path: + * there is no parent platform or PCI sysfs device to bind through first, so + * the UIO class name must uniquely identify the device. + */ +static int metal_uio_find_device_by_name(const char *uio_name, + struct linux_device *ldev) +{ + DIR *dir; + struct dirent *entry; + char path[PATH_MAX]; + char value[PATH_MAX]; + bool found = false; + int result = -ENODEV; + + if (!uio_name || !strlen(uio_name) || !ldev) + return -EINVAL; + + dir = opendir(METAL_UIO_CLASS_PATH); + if (!dir) { + result = errno == ENOENT ? -ENODEV : -errno; + return result; + } + + /* + * Walk every UIO class device and compare its reported name against the + * requested libmetal name. Continue after a match so duplicate names can + * be detected instead of silently choosing a nondeterministic device. + */ + while ((entry = readdir(dir)) != NULL) { + if (strncmp(entry->d_name, "uio", 3) != 0) + continue; + + result = snprintf(path, sizeof(path), "%s/%s/name", + METAL_UIO_CLASS_PATH, entry->d_name); + if (result < 0 || result >= (int)sizeof(path)) { + result = -EOVERFLOW; + goto out; + } + + result = metal_linux_read_first_line(path, value, + sizeof(value)); + if (result) + continue; + + if (strcmp(value, uio_name) != 0) + continue; + + if (found) { + /* Duplicate names cannot be opened deterministically. */ + result = -EEXIST; + goto out; + } + found = true; + + result = snprintf(ldev->cls_path, sizeof(ldev->cls_path), + "%s/%s", METAL_UIO_CLASS_PATH, + entry->d_name); + if (result < 0 || result >= (int)sizeof(ldev->cls_path)) { + result = -EOVERFLOW; + goto out; + } + /* + * Fill the same fields as the parent-bus UIO path so both + * open modes can share metal_uio_populate(). + */ + result = snprintf(ldev->dev_path, sizeof(ldev->dev_path), + "/dev/%s", entry->d_name); + if (result < 0 || result >= (int)sizeof(ldev->dev_path)) { + result = -EOVERFLOW; + goto out; + } + result = snprintf(ldev->uio_name, sizeof(ldev->uio_name), + "%s", value); + if (result < 0 || result >= (int)sizeof(ldev->uio_name)) { + result = -EOVERFLOW; + goto out; + } + result = snprintf(ldev->uio_dev_name, + sizeof(ldev->uio_dev_name), "%s", + entry->d_name); + if (result < 0 || result >= (int)sizeof(ldev->uio_dev_name)) { + result = -EOVERFLOW; + goto out; + } + } + + result = found ? 0 : -ENODEV; + +out: + closedir(dir); + return result; +} + static int metal_uio_dev_bind(struct linux_device *ldev, struct linux_driver *ldrv) { @@ -505,6 +637,28 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) return metal_uio_populate(lbus, ldev); } +/* + * Open through the synthetic "uio" bus by treating dev_name as the value found + * in /sys/class/uio/uioX/name. No parent sysfs device is available here. + */ +static int metal_uio_class_dev_open(struct linux_bus *lbus, + struct linux_device *ldev) +{ + int result; + + ldev->fd = -1; + ldev->device.irq_info = (void *)-1; + + result = metal_uio_find_device_by_name(ldev->dev_name, ldev); + if (result) { + metal_log(METAL_LOG_ERROR, "UIO device %s not found\n", + ldev->dev_name); + return result; + } + + return metal_uio_populate(lbus, ldev); +} + static void metal_uio_dev_close(struct linux_bus *lbus, struct linux_device *ldev) { From 187b01a68f45ce39c78214d6a6e0374b6f9c5968 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Mon, 8 Jun 2026 09:49:20 -0700 Subject: [PATCH 6/6] lib: linux: register synthetic UIO bus Register a synthetic Linux uio bus so callers can use the existing metal_device_open("uio", name, ...) API shape to open UIO devices by the value exported in /sys/class/uio/uioX/name. This bus is not backed by a sysfs bus directory or a probed kernel driver handle. During Linux bus initialization, register it only when /sys/class/uio exists, and skip the normal sysfs bus and driver probing that platform and PCI devices require. During device open, allow the synthetic uio driver to run its class-name open callback without an sdrv handle. The callback resolves the UIO class device and then uses the shared populate path added earlier, so the new bus preserves the same mmap, IRQ, DMA, and close semantics as existing UIO-backed platform and PCI opens. Also make bus close tolerate the missing sysfs bus handle and copy the requested device name with snprintf() so oversized names fail cleanly. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 52 +++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index 619408971..71090dbb2 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -102,6 +102,15 @@ static struct linux_device *to_linux_device(struct metal_device *device) return metal_container_of(device, struct linux_device, device); } +/* + * The "uio" bus is synthetic: it has no sysfs bus directory and opens + * devices directly from /sys/class/uio instead of through parent-bus drivers. + */ +static bool metal_linux_is_uio_bus(const struct linux_bus *lbus) +{ + return strcmp(lbus->bus_name, "uio") == 0; +} + /* * Read sysfs files whose useful value is the first text line. UIO class-name * matching needs the newline stripped before comparing against dev_name. @@ -503,8 +512,7 @@ static int metal_uio_populate(struct linux_bus *lbus, struct linux_device *ldev) irq_info = 1; if (write(ldev->fd, &irq_info, sizeof(irq_info)) <= 0) { metal_log(METAL_LOG_INFO, - "%s: No IRQ for device %s.\n", - __func__, ldev->dev_name); + "No IRQ for device %s.\n", ldev->dev_name); ldev->device.irq_num = 0; ldev->device.irq_info = (void *)-1; } else { @@ -767,6 +775,22 @@ static void metal_uio_dev_dma_unmap(struct linux_bus *lbus, } static struct linux_bus linux_bus[] = { + { + .bus_name = "uio", + .drivers = { + { + .drv_name = "uio", + .mod_name = "uio", + .cls_name = "uio", + .dev_open = metal_uio_class_dev_open, + .dev_close = metal_uio_dev_close, + .dev_irq_ack = metal_uio_dev_irq_ack, + .dev_dma_map = metal_uio_dev_dma_map, + .dev_dma_unmap = metal_uio_dev_dma_unmap, + }, + { 0 /* sentinel */ } + } + }, { .bus_name = "platform", .drivers = { @@ -841,12 +865,18 @@ static int metal_linux_dev_open(struct metal_bus *bus, for_each_linux_driver(lbus, ldrv) { /* Check if we have a viable driver. */ - if (!ldrv->sdrv || !ldrv->dev_open) + if (!ldrv->dev_open || + (!metal_linux_is_uio_bus(lbus) && !ldrv->sdrv)) continue; /* Reset device data. */ memset(ldev, 0, sizeof(*ldev)); - strncpy(ldev->dev_name, dev_name, sizeof(ldev->dev_name) - 1); + error = snprintf(ldev->dev_name, sizeof(ldev->dev_name), + "%s", dev_name); + if (error < 0 || error >= (int)sizeof(ldev->dev_name)) { + error = -EOVERFLOW; + goto out; + } ldev->fd = -1; ldev->ldrv = ldrv; ldev->device.bus = bus; @@ -901,7 +931,9 @@ static void metal_linux_bus_close(struct metal_bus *bus) ldrv->sdrv = NULL; } - sysfs_close_bus(lbus->sbus); + /* The synthetic UIO bus does not open a sysfs bus handle. */ + if (lbus->sbus) + sysfs_close_bus(lbus->sbus); lbus->sbus = NULL; } @@ -1005,6 +1037,16 @@ static int metal_linux_probe_bus(struct linux_bus *lbus) struct linux_driver *ldrv; int ret, error = -ENODEV; + /* + * Register the synthetic bus only when the /sys/class/uio class exists + * and skip normal bus/driver probing. + */ + if (metal_linux_is_uio_bus(lbus)) { + if (sysfs_path_is_dir(METAL_UIO_CLASS_PATH) != 0) + return -ENODEV; + return metal_linux_register_bus(lbus); + } + lbus->sbus = sysfs_open_bus(lbus->bus_name); if (!lbus->sbus) return -ENODEV;