aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/perf/raspberrypi_axi_monitor.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/perf/raspberrypi_axi_monitor.c')
-rw-r--r--drivers/perf/raspberrypi_axi_monitor.c637
1 files changed, 637 insertions, 0 deletions
diff --git a/drivers/perf/raspberrypi_axi_monitor.c b/drivers/perf/raspberrypi_axi_monitor.c
new file mode 100644
index 000000000000..5ae2bdaa88b4
--- /dev/null
+++ b/drivers/perf/raspberrypi_axi_monitor.c
@@ -0,0 +1,637 @@
+/*
+ * raspberrypi_axi_monitor.c
+ *
+ * Author: james.hughes@raspberrypi.org
+ *
+ * Raspberry Pi AXI performance counters.
+ *
+ * Copyright (C) 2017 Raspberry Pi Trading Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/devcoredump.h>
+#include <linux/device.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <soc/bcm2835/raspberrypi-firmware.h>
+
+#define NUM_MONITORS 2
+#define NUM_BUS_WATCHERS_PER_MONITOR 3
+
+#define SYSTEM_MONITOR 0
+#define VPU_MONITOR 1
+
+#define MAX_BUSES 16
+#define DEFAULT_SAMPLE_TIME 100
+
+#define NUM_BUS_WATCHER_RESULTS 9
+
+struct bus_watcher_data {
+ union {
+ u32 results[NUM_BUS_WATCHER_RESULTS];
+ struct {
+ u32 atrans;
+ u32 atwait;
+ u32 amax;
+ u32 wtrans;
+ u32 wtwait;
+ u32 wmax;
+ u32 rtrans;
+ u32 rtwait;
+ u32 rmax;
+ };
+ };
+};
+
+
+struct rpi_axiperf {
+ struct platform_device *dev;
+ struct dentry *root_folder;
+
+ struct task_struct *monitor_thread;
+ struct mutex lock;
+
+ struct rpi_firmware *firmware;
+
+ /* Sample time spent on for each bus */
+ int sample_time;
+
+ /* Now storage for the per monitor settings and the resulting
+ * performance figures
+ */
+ struct {
+ /* Bit field of buses we want to monitor */
+ int bus_enabled;
+ /* Bit field of buses to filter by */
+ int bus_filter;
+ /* The current buses being monitored on this monitor */
+ int current_bus[NUM_BUS_WATCHERS_PER_MONITOR];
+ /* The last bus monitored on this monitor */
+ int last_monitored;
+
+ /* Set true if this mailbox must use the mailbox interface
+ * rather than access registers directly.
+ */
+ int use_mailbox_interface;
+
+ /* Current result values */
+ struct bus_watcher_data results[MAX_BUSES];
+
+ struct dentry *debugfs_entry;
+ void __iomem *base_address;
+
+ } monitor[NUM_MONITORS];
+
+};
+
+static struct rpi_axiperf *state;
+
+/* Two monitors, System and VPU, each with the following register sets.
+ * Each monitor can only monitor one bus at a time, so we time share them,
+ * giving each bus 100ms (default, settable via debugfs) of time on its
+ * associated monitor
+ * Record results from the three Bus watchers per monitor and push to the sysfs
+ */
+
+/* general registers */
+const int GEN_CTRL;
+
+const int GEN_CTL_ENABLE_BIT = BIT(0);
+const int GEN_CTL_RESET_BIT = BIT(1);
+
+/* Bus watcher registers */
+const int BW_PITCH = 0x40;
+
+const int BW0_CTRL = 0x40;
+const int BW1_CTRL = 0x80;
+const int BW2_CTRL = 0xc0;
+
+const int BW_ATRANS_OFFSET = 0x04;
+const int BW_ATWAIT_OFFSET = 0x08;
+const int BW_AMAX_OFFSET = 0x0c;
+const int BW_WTRANS_OFFSET = 0x10;
+const int BW_WTWAIT_OFFSET = 0x14;
+const int BW_WMAX_OFFSET = 0x18;
+const int BW_RTRANS_OFFSET = 0x1c;
+const int BW_RTWAIT_OFFSET = 0x20;
+const int BW_RMAX_OFFSET = 0x24;
+
+const int BW_CTRL_RESET_BIT = BIT(31);
+const int BW_CTRL_ENABLE_BIT = BIT(30);
+const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29);
+const int BW_CTRL_LIMIT_HALT_BIT = BIT(28);
+
+const int BW_CTRL_SOURCE_SHIFT = 8;
+const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits
+const int BW_CTRL_BUS_WATCH_SHIFT;
+const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits
+const int BW_CTRL_BUS_FILTER_SHIFT = 8;
+
+const static char *bus_filter_strings[] = {
+ "",
+ "CORE0_V",
+ "ICACHE0",
+ "DCACHE0",
+ "CORE1_V",
+ "ICACHE1",
+ "DCACHE1",
+ "L2_MAIN",
+ "HOST_PORT",
+ "HOST_PORT2",
+ "HVS",
+ "ISP",
+ "VIDEO_DCT",
+ "VIDEO_SD2AXI",
+ "CAM0",
+ "CAM1",
+ "DMA0",
+ "DMA1",
+ "DMA2_VPU",
+ "JPEG",
+ "VIDEO_CME",
+ "TRANSPOSER",
+ "VIDEO_FME",
+ "CCP2TX",
+ "USB",
+ "V3D0",
+ "V3D1",
+ "V3D2",
+ "AVE",
+ "DEBUG",
+ "CPU",
+ "M30"
+};
+
+const int num_bus_filters = ARRAY_SIZE(bus_filter_strings);
+
+const static char *system_bus_string[] = {
+ "DMA_L2",
+ "TRANS",
+ "JPEG",
+ "SYSTEM_UC",
+ "DMA_UC",
+ "SYSTEM_L2",
+ "CCP2TX",
+ "MPHI_RX",
+ "MPHI_TX",
+ "HVS",
+ "H264",
+ "ISP",
+ "V3D",
+ "PERIPHERAL",
+ "CPU_UC",
+ "CPU_L2"
+};
+
+const int num_system_buses = ARRAY_SIZE(system_bus_string);
+
+const static char *vpu_bus_string[] = {
+ "VPU1_D_L2",
+ "VPU0_D_L2",
+ "VPU1_I_L2",
+ "VPU0_I_L2",
+ "SYSTEM_L2",
+ "L2_FLUSH",
+ "DMA_L2",
+ "VPU1_D_UC",
+ "VPU0_D_UC",
+ "VPU1_I_UC",
+ "VPU0_I_UC",
+ "SYSTEM_UC",
+ "L2_OUT",
+ "DMA_UC",
+ "SDRAM",
+ "L2_IN"
+};
+
+const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string);
+
+const static char *monitor_name[] = {
+ "System",
+ "VPU"
+};
+
+static inline void write_reg(int monitor, int reg, u32 value)
+{
+ writel(value, state->monitor[monitor].base_address + reg);
+}
+
+static inline u32 read_reg(int monitor, u32 reg)
+{
+ return readl(state->monitor[monitor].base_address + reg);
+}
+
+static void read_bus_watcher(int monitor, int watcher, u32 *results)
+{
+ if (state->monitor[monitor].use_mailbox_interface) {
+ /* We have 9 results, plus the overheads of start address and
+ * length So 11 u32 to define
+ */
+ u32 tmp[11];
+ int err;
+
+ tmp[0] = (u32)(uintptr_t)(state->monitor[monitor].base_address + watcher
+ + BW_ATRANS_OFFSET);
+ tmp[1] = NUM_BUS_WATCHER_RESULTS;
+
+ err = rpi_firmware_property(state->firmware,
+ RPI_FIRMWARE_GET_PERIPH_REG,
+ tmp, sizeof(tmp));
+
+ if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS)
+ dev_err_once(&state->dev->dev,
+ "Failed to read bus watcher");
+ else
+ memcpy(results, &tmp[2],
+ NUM_BUS_WATCHER_RESULTS * sizeof(u32));
+ } else {
+ int i;
+ void __iomem *addr = state->monitor[monitor].base_address
+ + watcher + BW_ATRANS_OFFSET;
+ for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4)
+ *results++ = readl(addr);
+ }
+}
+
+static void set_monitor_control(int monitor, u32 set)
+{
+ if (state->monitor[monitor].use_mailbox_interface) {
+ u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address +
+ GEN_CTRL), 1, set};
+ int err = rpi_firmware_property(state->firmware,
+ RPI_FIRMWARE_SET_PERIPH_REG,
+ tmp, sizeof(tmp));
+
+ if (err < 0 || tmp[1] != 1)
+ dev_err_once(&state->dev->dev,
+ "Failed to set monitor control");
+ } else
+ write_reg(monitor, GEN_CTRL, set);
+}
+
+static void set_bus_watcher_control(int monitor, int watcher, u32 set)
+{
+ if (state->monitor[monitor].use_mailbox_interface) {
+ u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address +
+ watcher), 1, set};
+ int err = rpi_firmware_property(state->firmware,
+ RPI_FIRMWARE_SET_PERIPH_REG,
+ tmp, sizeof(tmp));
+ if (err < 0 || tmp[1] != 1)
+ dev_err_once(&state->dev->dev,
+ "Failed to set bus watcher control");
+ } else
+ write_reg(monitor, watcher, set);
+}
+
+static void monitor(struct rpi_axiperf *state)
+{
+ int monitor, num_buses[NUM_MONITORS];
+
+ mutex_lock(&state->lock);
+
+ for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
+ typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
+
+ /* Anything enabled? */
+ if (mon->bus_enabled == 0) {
+ /* No, disable all monitoring for this monitor */
+ set_monitor_control(monitor, GEN_CTL_RESET_BIT);
+ } else {
+ int i;
+
+ /* Find out how many busses we want to monitor, and
+ * spread our 3 actual monitors over them
+ */
+ num_buses[monitor] = hweight32(mon->bus_enabled);
+ num_buses[monitor] = min(num_buses[monitor],
+ NUM_BUS_WATCHERS_PER_MONITOR);
+
+ for (i = 0; i < num_buses[monitor]; i++) {
+ int bus_control;
+
+ do {
+ mon->last_monitored++;
+ mon->last_monitored &= 0xf;
+ } while ((mon->bus_enabled &
+ (1 << mon->last_monitored)) == 0);
+
+ mon->current_bus[i] = mon->last_monitored;
+
+ /* Reset the counters */
+ set_bus_watcher_control(monitor,
+ BW0_CTRL +
+ i*BW_PITCH,
+ BW_CTRL_RESET_BIT);
+
+ bus_control = BW_CTRL_ENABLE_BIT |
+ mon->current_bus[i];
+
+ if (mon->bus_filter) {
+ bus_control |=
+ BW_CTRL_ENABLE_ID_FILTER_BIT;
+ bus_control |=
+ ((mon->bus_filter & 0x1f)
+ << BW_CTRL_BUS_FILTER_SHIFT);
+ }
+
+ // Start capture
+ set_bus_watcher_control(monitor,
+ BW0_CTRL + i*BW_PITCH,
+ bus_control);
+ }
+ }
+
+ /* start monitoring */
+ set_monitor_control(monitor, GEN_CTL_ENABLE_BIT);
+ }
+
+ mutex_unlock(&state->lock);
+
+ msleep(state->sample_time);
+
+ /* Now read the results */
+
+ mutex_lock(&state->lock);
+ for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
+ typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
+
+ /* Anything enabled? */
+ if (mon->bus_enabled == 0) {
+ /* No, disable all monitoring for this monitor */
+ set_monitor_control(monitor, 0);
+ } else {
+ int i;
+
+ for (i = 0; i < num_buses[monitor]; i++) {
+ int bus = mon->current_bus[i];
+
+ read_bus_watcher(monitor,
+ BW0_CTRL + i*BW_PITCH,
+ (u32 *)&mon->results[bus].results);
+ }
+ }
+ }
+ mutex_unlock(&state->lock);
+}
+
+static int monitor_thread(void *data)
+{
+ struct rpi_axiperf *state = data;
+
+ while (1) {
+ monitor(state);
+
+ if (kthread_should_stop())
+ return 0;
+ }
+ return 0;
+}
+
+static ssize_t myreader(struct file *fp, char __user *user_buffer,
+ size_t count, loff_t *position)
+{
+#define INIT_BUFF_SIZE 2048
+
+ int i;
+ int idx = (int)(uintptr_t)(fp->private_data);
+ int num_buses, cnt;
+ char *string_buffer;
+ int buff_size = INIT_BUFF_SIZE;
+ char *p;
+ typeof(state->monitor[0]) *mon = &(state->monitor[idx]);
+
+ if (idx < 0 || idx > NUM_MONITORS)
+ idx = 0;
+
+ num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses;
+
+ string_buffer = kmalloc(buff_size, GFP_KERNEL);
+
+ if (!string_buffer) {
+ dev_err(&state->dev->dev,
+ "Failed temporary string allocation\n");
+ return 0;
+ }
+
+ p = string_buffer;
+
+ mutex_lock(&state->lock);
+
+ if (mon->bus_filter) {
+ int filt = min(mon->bus_filter & 0x1f, num_bus_filters);
+
+ cnt = snprintf(p, buff_size,
+ "\nMonitoring transactions from %s only\n",
+ bus_filter_strings[filt]);
+ p += cnt;
+ buff_size -= cnt;
+ }
+
+ cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n"
+ "======================================================================================================\n");
+
+ if (cnt >= buff_size)
+ goto done;
+
+ p += cnt;
+ buff_size -= cnt;
+
+ for (i = 0; i < num_buses; i++) {
+ if (mon->bus_enabled & (1 << i)) {
+#define DIVIDER (1024)
+ typeof(mon->results[0]) *res = &(mon->results[i]);
+
+ cnt = snprintf(p, buff_size,
+ "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n",
+ idx == SYSTEM_MONITOR ?
+ system_bus_string[i] :
+ vpu_bus_string[i],
+ res->atrans/DIVIDER,
+ res->atwait/DIVIDER,
+ res->amax/DIVIDER,
+ res->wtrans/DIVIDER,
+ res->wtwait/DIVIDER,
+ res->wmax/DIVIDER,
+ res->rtrans/DIVIDER,
+ res->rtwait/DIVIDER,
+ res->rmax/DIVIDER
+ );
+ if (cnt >= buff_size)
+ goto done;
+
+ p += cnt;
+ buff_size -= cnt;
+ }
+ }
+
+ mutex_unlock(&state->lock);
+
+done:
+
+ /* did the last string entry exceeed our buffer size? ie out of string
+ * buffer space. Null terminate, use what we have.
+ */
+ if (cnt >= buff_size) {
+ buff_size = 0;
+ string_buffer[INIT_BUFF_SIZE] = 0;
+ }
+
+ cnt = simple_read_from_buffer(user_buffer, count, position,
+ string_buffer,
+ INIT_BUFF_SIZE - buff_size);
+
+ kfree(string_buffer);
+
+ return cnt;
+}
+
+static ssize_t mywriter(struct file *fp, const char __user *user_buffer,
+ size_t count, loff_t *position)
+{
+ int idx = (int)(uintptr_t)(fp->private_data);
+
+ if (idx < 0 || idx > NUM_MONITORS)
+ idx = 0;
+
+ /* At the moment, this does nothing, but in the future it could be
+ * used to reset counters etc
+ */
+ return count;
+}
+
+static const struct file_operations fops_debug = {
+ .read = myreader,
+ .write = mywriter,
+ .open = simple_open
+};
+
+static int rpi_axiperf_probe(struct platform_device *pdev)
+{
+ int ret = 0, i;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct device_node *fw_node;
+
+ state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ /* Get the firmware handle for future rpi-firmware-xxx calls */
+ fw_node = of_parse_phandle(np, "firmware", 0);
+ if (!fw_node) {
+ dev_err(dev, "Missing firmware node\n");
+ return -ENOENT;
+ }
+
+ state->firmware = rpi_firmware_get(fw_node);
+ if (!state->firmware)
+ return -EPROBE_DEFER;
+
+ /* Special case for the VPU monitor, we must use the mailbox interface
+ * as it is not accessible from the ARM address space.
+ */
+ state->monitor[VPU_MONITOR].use_mailbox_interface = 1;
+ state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0;
+
+ for (i = 0; i < NUM_MONITORS; i++) {
+ if (state->monitor[i].use_mailbox_interface) {
+ of_property_read_u32_index(np, "reg", i*2,
+ (u32 *)(&state->monitor[i].base_address));
+ } else {
+ struct resource *resource =
+ platform_get_resource(pdev, IORESOURCE_MEM, i);
+
+ state->monitor[i].base_address =
+ devm_ioremap_resource(&pdev->dev, resource);
+ }
+
+ if (IS_ERR(state->monitor[i].base_address))
+ return PTR_ERR(state->monitor[i].base_address);
+
+ /* Enable all buses by default */
+ state->monitor[i].bus_enabled = 0xffff;
+ }
+
+ state->dev = pdev;
+ platform_set_drvdata(pdev, state);
+
+ state->sample_time = DEFAULT_SAMPLE_TIME;
+
+ /* Set up all the debugfs stuff */
+ state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+ for (i = 0; i < NUM_MONITORS; i++) {
+ state->monitor[i].debugfs_entry =
+ debugfs_create_dir(monitor_name[i], state->root_folder);
+ if (IS_ERR(state->monitor[i].debugfs_entry))
+ state->monitor[i].debugfs_entry = NULL;
+
+ debugfs_create_file("data", 0444,
+ state->monitor[i].debugfs_entry,
+ (void *)(uintptr_t)i, &fops_debug);
+ debugfs_create_u32("enable", 0644,
+ state->monitor[i].debugfs_entry,
+ &state->monitor[i].bus_enabled);
+ debugfs_create_u32("filter", 0644,
+ state->monitor[i].debugfs_entry,
+ &state->monitor[i].bus_filter);
+ debugfs_create_u32("sample_time", 0644,
+ state->monitor[i].debugfs_entry,
+ &state->sample_time);
+ }
+
+ mutex_init(&state->lock);
+
+ state->monitor_thread = kthread_run(monitor_thread, state,
+ "rpi-axiperfmon");
+
+ return ret;
+
+}
+
+static int rpi_axiperf_remove(struct platform_device *dev)
+{
+ int ret = 0;
+
+ kthread_stop(state->monitor_thread);
+
+ debugfs_remove_recursive(state->root_folder);
+ state->root_folder = NULL;
+
+ return ret;
+}
+
+static const struct of_device_id rpi_axiperf_match[] = {
+ {
+ .compatible = "brcm,bcm2835-axiperf",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, rpi_axiperf_match);
+
+static struct platform_driver rpi_axiperf_driver = {
+ .probe = rpi_axiperf_probe,
+ .remove = rpi_axiperf_remove,
+ .driver = {
+ .name = "rpi-bcm2835-axiperf",
+ .of_match_table = of_match_ptr(rpi_axiperf_match),
+ },
+};
+
+module_platform_driver(rpi_axiperf_driver);
+
+/* Module information */
+MODULE_AUTHOR("James Hughes <james.hughes@raspberrypi.org>");
+MODULE_DESCRIPTION("RPI AXI Performance monitor driver");
+MODULE_LICENSE("GPL");
+