/* * raspberrypi_axi_monitor.c * * Author: james.hughes@raspberrypi.org * * Raspberry Pi AXI performance counters. * * Copyright (C) 2017 Raspberry Pi Trading Ltd. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include #include #include #include #include #include #include #include #include #include #define NUM_MONITORS 2 #define NUM_BUS_WATCHERS_PER_MONITOR 3 #define SYSTEM_MONITOR 0 #define VPU_MONITOR 1 #define MAX_BUSES 16 #define DEFAULT_SAMPLE_TIME 100 #define NUM_BUS_WATCHER_RESULTS 9 struct bus_watcher_data { union { u32 results[NUM_BUS_WATCHER_RESULTS]; struct { u32 atrans; u32 atwait; u32 amax; u32 wtrans; u32 wtwait; u32 wmax; u32 rtrans; u32 rtwait; u32 rmax; }; }; }; struct rpi_axiperf { struct platform_device *dev; struct dentry *root_folder; struct task_struct *monitor_thread; struct mutex lock; struct rpi_firmware *firmware; /* Sample time spent on for each bus */ int sample_time; /* Now storage for the per monitor settings and the resulting * performance figures */ struct { /* Bit field of buses we want to monitor */ int bus_enabled; /* Bit field of buses to filter by */ int bus_filter; /* The current buses being monitored on this monitor */ int current_bus[NUM_BUS_WATCHERS_PER_MONITOR]; /* The last bus monitored on this monitor */ int last_monitored; /* Set true if this mailbox must use the mailbox interface * rather than access registers directly. */ int use_mailbox_interface; /* Current result values */ struct bus_watcher_data results[MAX_BUSES]; struct dentry *debugfs_entry; void __iomem *base_address; } monitor[NUM_MONITORS]; }; static struct rpi_axiperf *state; /* Two monitors, System and VPU, each with the following register sets. * Each monitor can only monitor one bus at a time, so we time share them, * giving each bus 100ms (default, settable via debugfs) of time on its * associated monitor * Record results from the three Bus watchers per monitor and push to the sysfs */ /* general registers */ const int GEN_CTRL; const int GEN_CTL_ENABLE_BIT = BIT(0); const int GEN_CTL_RESET_BIT = BIT(1); /* Bus watcher registers */ const int BW_PITCH = 0x40; const int BW0_CTRL = 0x40; const int BW1_CTRL = 0x80; const int BW2_CTRL = 0xc0; const int BW_ATRANS_OFFSET = 0x04; const int BW_ATWAIT_OFFSET = 0x08; const int BW_AMAX_OFFSET = 0x0c; const int BW_WTRANS_OFFSET = 0x10; const int BW_WTWAIT_OFFSET = 0x14; const int BW_WMAX_OFFSET = 0x18; const int BW_RTRANS_OFFSET = 0x1c; const int BW_RTWAIT_OFFSET = 0x20; const int BW_RMAX_OFFSET = 0x24; const int BW_CTRL_RESET_BIT = BIT(31); const int BW_CTRL_ENABLE_BIT = BIT(30); const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29); const int BW_CTRL_LIMIT_HALT_BIT = BIT(28); const int BW_CTRL_SOURCE_SHIFT = 8; const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits const int BW_CTRL_BUS_WATCH_SHIFT; const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits const int BW_CTRL_BUS_FILTER_SHIFT = 8; const static char *bus_filter_strings[] = { "", "CORE0_V", "ICACHE0", "DCACHE0", "CORE1_V", "ICACHE1", "DCACHE1", "L2_MAIN", "HOST_PORT", "HOST_PORT2", "HVS", "ISP", "VIDEO_DCT", "VIDEO_SD2AXI", "CAM0", "CAM1", "DMA0", "DMA1", "DMA2_VPU", "JPEG", "VIDEO_CME", "TRANSPOSER", "VIDEO_FME", "CCP2TX", "USB", "V3D0", "V3D1", "V3D2", "AVE", "DEBUG", "CPU", "M30" }; const int num_bus_filters = ARRAY_SIZE(bus_filter_strings); const static char *system_bus_string[] = { "DMA_L2", "TRANS", "JPEG", "SYSTEM_UC", "DMA_UC", "SYSTEM_L2", "CCP2TX", "MPHI_RX", "MPHI_TX", "HVS", "H264", "ISP", "V3D", "PERIPHERAL", "CPU_UC", "CPU_L2" }; const int num_system_buses = ARRAY_SIZE(system_bus_string); const static char *vpu_bus_string[] = { "VPU1_D_L2", "VPU0_D_L2", "VPU1_I_L2", "VPU0_I_L2", "SYSTEM_L2", "L2_FLUSH", "DMA_L2", "VPU1_D_UC", "VPU0_D_UC", "VPU1_I_UC", "VPU0_I_UC", "SYSTEM_UC", "L2_OUT", "DMA_UC", "SDRAM", "L2_IN" }; const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string); const static char *monitor_name[] = { "System", "VPU" }; static inline void write_reg(int monitor, int reg, u32 value) { writel(value, state->monitor[monitor].base_address + reg); } static inline u32 read_reg(int monitor, u32 reg) { return readl(state->monitor[monitor].base_address + reg); } static void read_bus_watcher(int monitor, int watcher, u32 *results) { if (state->monitor[monitor].use_mailbox_interface) { /* We have 9 results, plus the overheads of start address and * length So 11 u32 to define */ u32 tmp[11]; int err; tmp[0] = (u32)(uintptr_t)(state->monitor[monitor].base_address + watcher + BW_ATRANS_OFFSET); tmp[1] = NUM_BUS_WATCHER_RESULTS; err = rpi_firmware_property(state->firmware, RPI_FIRMWARE_GET_PERIPH_REG, tmp, sizeof(tmp)); if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS) dev_err_once(&state->dev->dev, "Failed to read bus watcher"); else memcpy(results, &tmp[2], NUM_BUS_WATCHER_RESULTS * sizeof(u32)); } else { int i; void __iomem *addr = state->monitor[monitor].base_address + watcher + BW_ATRANS_OFFSET; for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4) *results++ = readl(addr); } } static void set_monitor_control(int monitor, u32 set) { if (state->monitor[monitor].use_mailbox_interface) { u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address + GEN_CTRL), 1, set}; int err = rpi_firmware_property(state->firmware, RPI_FIRMWARE_SET_PERIPH_REG, tmp, sizeof(tmp)); if (err < 0 || tmp[1] != 1) dev_err_once(&state->dev->dev, "Failed to set monitor control"); } else write_reg(monitor, GEN_CTRL, set); } static void set_bus_watcher_control(int monitor, int watcher, u32 set) { if (state->monitor[monitor].use_mailbox_interface) { u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address + watcher), 1, set}; int err = rpi_firmware_property(state->firmware, RPI_FIRMWARE_SET_PERIPH_REG, tmp, sizeof(tmp)); if (err < 0 || tmp[1] != 1) dev_err_once(&state->dev->dev, "Failed to set bus watcher control"); } else write_reg(monitor, watcher, set); } static void monitor(struct rpi_axiperf *state) { int monitor, num_buses[NUM_MONITORS]; mutex_lock(&state->lock); for (monitor = 0; monitor < NUM_MONITORS; monitor++) { typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); /* Anything enabled? */ if (mon->bus_enabled == 0) { /* No, disable all monitoring for this monitor */ set_monitor_control(monitor, GEN_CTL_RESET_BIT); } else { int i; /* Find out how many busses we want to monitor, and * spread our 3 actual monitors over them */ num_buses[monitor] = hweight32(mon->bus_enabled); num_buses[monitor] = min(num_buses[monitor], NUM_BUS_WATCHERS_PER_MONITOR); for (i = 0; i < num_buses[monitor]; i++) { int bus_control; do { mon->last_monitored++; mon->last_monitored &= 0xf; } while ((mon->bus_enabled & (1 << mon->last_monitored)) == 0); mon->current_bus[i] = mon->last_monitored; /* Reset the counters */ set_bus_watcher_control(monitor, BW0_CTRL + i*BW_PITCH, BW_CTRL_RESET_BIT); bus_control = BW_CTRL_ENABLE_BIT | mon->current_bus[i]; if (mon->bus_filter) { bus_control |= BW_CTRL_ENABLE_ID_FILTER_BIT; bus_control |= ((mon->bus_filter & 0x1f) << BW_CTRL_BUS_FILTER_SHIFT); } // Start capture set_bus_watcher_control(monitor, BW0_CTRL + i*BW_PITCH, bus_control); } } /* start monitoring */ set_monitor_control(monitor, GEN_CTL_ENABLE_BIT); } mutex_unlock(&state->lock); msleep(state->sample_time); /* Now read the results */ mutex_lock(&state->lock); for (monitor = 0; monitor < NUM_MONITORS; monitor++) { typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); /* Anything enabled? */ if (mon->bus_enabled == 0) { /* No, disable all monitoring for this monitor */ set_monitor_control(monitor, 0); } else { int i; for (i = 0; i < num_buses[monitor]; i++) { int bus = mon->current_bus[i]; read_bus_watcher(monitor, BW0_CTRL + i*BW_PITCH, (u32 *)&mon->results[bus].results); } } } mutex_unlock(&state->lock); } static int monitor_thread(void *data) { struct rpi_axiperf *state = data; while (1) { monitor(state); if (kthread_should_stop()) return 0; } return 0; } static ssize_t myreader(struct file *fp, char __user *user_buffer, size_t count, loff_t *position) { #define INIT_BUFF_SIZE 2048 int i; int idx = (int)(uintptr_t)(fp->private_data); int num_buses, cnt; char *string_buffer; int buff_size = INIT_BUFF_SIZE; char *p; typeof(state->monitor[0]) *mon = &(state->monitor[idx]); if (idx < 0 || idx > NUM_MONITORS) idx = 0; num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses; string_buffer = kmalloc(buff_size, GFP_KERNEL); if (!string_buffer) { dev_err(&state->dev->dev, "Failed temporary string allocation\n"); return 0; } p = string_buffer; mutex_lock(&state->lock); if (mon->bus_filter) { int filt = min(mon->bus_filter & 0x1f, num_bus_filters); cnt = snprintf(p, buff_size, "\nMonitoring transactions from %s only\n", bus_filter_strings[filt]); p += cnt; buff_size -= cnt; } cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n" "======================================================================================================\n"); if (cnt >= buff_size) goto done; p += cnt; buff_size -= cnt; for (i = 0; i < num_buses; i++) { if (mon->bus_enabled & (1 << i)) { #define DIVIDER (1024) typeof(mon->results[0]) *res = &(mon->results[i]); cnt = snprintf(p, buff_size, "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n", idx == SYSTEM_MONITOR ? system_bus_string[i] : vpu_bus_string[i], res->atrans/DIVIDER, res->atwait/DIVIDER, res->amax/DIVIDER, res->wtrans/DIVIDER, res->wtwait/DIVIDER, res->wmax/DIVIDER, res->rtrans/DIVIDER, res->rtwait/DIVIDER, res->rmax/DIVIDER ); if (cnt >= buff_size) goto done; p += cnt; buff_size -= cnt; } } mutex_unlock(&state->lock); done: /* did the last string entry exceeed our buffer size? ie out of string * buffer space. Null terminate, use what we have. */ if (cnt >= buff_size) { buff_size = 0; string_buffer[INIT_BUFF_SIZE] = 0; } cnt = simple_read_from_buffer(user_buffer, count, position, string_buffer, INIT_BUFF_SIZE - buff_size); kfree(string_buffer); return cnt; } static ssize_t mywriter(struct file *fp, const char __user *user_buffer, size_t count, loff_t *position) { int idx = (int)(uintptr_t)(fp->private_data); if (idx < 0 || idx > NUM_MONITORS) idx = 0; /* At the moment, this does nothing, but in the future it could be * used to reset counters etc */ return count; } static const struct file_operations fops_debug = { .read = myreader, .write = mywriter, .open = simple_open }; static int rpi_axiperf_probe(struct platform_device *pdev) { int ret = 0, i; struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; struct device_node *fw_node; state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL); if (!state) return -ENOMEM; /* Get the firmware handle for future rpi-firmware-xxx calls */ fw_node = of_parse_phandle(np, "firmware", 0); if (!fw_node) { dev_err(dev, "Missing firmware node\n"); return -ENOENT; } state->firmware = rpi_firmware_get(fw_node); if (!state->firmware) return -EPROBE_DEFER; /* Special case for the VPU monitor, we must use the mailbox interface * as it is not accessible from the ARM address space. */ state->monitor[VPU_MONITOR].use_mailbox_interface = 1; state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0; for (i = 0; i < NUM_MONITORS; i++) { if (state->monitor[i].use_mailbox_interface) { of_property_read_u32_index(np, "reg", i*2, (u32 *)(&state->monitor[i].base_address)); } else { struct resource *resource = platform_get_resource(pdev, IORESOURCE_MEM, i); state->monitor[i].base_address = devm_ioremap_resource(&pdev->dev, resource); } if (IS_ERR(state->monitor[i].base_address)) return PTR_ERR(state->monitor[i].base_address); /* Enable all buses by default */ state->monitor[i].bus_enabled = 0xffff; } state->dev = pdev; platform_set_drvdata(pdev, state); state->sample_time = DEFAULT_SAMPLE_TIME; /* Set up all the debugfs stuff */ state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL); for (i = 0; i < NUM_MONITORS; i++) { state->monitor[i].debugfs_entry = debugfs_create_dir(monitor_name[i], state->root_folder); if (IS_ERR(state->monitor[i].debugfs_entry)) state->monitor[i].debugfs_entry = NULL; debugfs_create_file("data", 0444, state->monitor[i].debugfs_entry, (void *)(uintptr_t)i, &fops_debug); debugfs_create_u32("enable", 0644, state->monitor[i].debugfs_entry, &state->monitor[i].bus_enabled); debugfs_create_u32("filter", 0644, state->monitor[i].debugfs_entry, &state->monitor[i].bus_filter); debugfs_create_u32("sample_time", 0644, state->monitor[i].debugfs_entry, &state->sample_time); } mutex_init(&state->lock); state->monitor_thread = kthread_run(monitor_thread, state, "rpi-axiperfmon"); return ret; } static int rpi_axiperf_remove(struct platform_device *dev) { int ret = 0; kthread_stop(state->monitor_thread); debugfs_remove_recursive(state->root_folder); state->root_folder = NULL; return ret; } static const struct of_device_id rpi_axiperf_match[] = { { .compatible = "brcm,bcm2835-axiperf", }, {}, }; MODULE_DEVICE_TABLE(of, rpi_axiperf_match); static struct platform_driver rpi_axiperf_driver = { .probe = rpi_axiperf_probe, .remove = rpi_axiperf_remove, .driver = { .name = "rpi-bcm2835-axiperf", .of_match_table = of_match_ptr(rpi_axiperf_match), }, }; module_platform_driver(rpi_axiperf_driver); /* Module information */ MODULE_AUTHOR("James Hughes "); MODULE_DESCRIPTION("RPI AXI Performance monitor driver"); MODULE_LICENSE("GPL");