/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and * only version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * */ #include "adreno.h" #include "adreno_a4xx.h" #include "adreno_trace.h" #include "adreno_pm4types.h" #define ADRENO_RB_PREEMPT_TOKEN_DWORDS 125 static void a4xx_preemption_timer(unsigned long data) { struct adreno_device *adreno_dev = (struct adreno_device *) data; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int cur_rptr = adreno_get_rptr(adreno_dev->cur_rb); unsigned int next_rptr = adreno_get_rptr(adreno_dev->next_rb); KGSL_DRV_ERR(device, "Preemption timed out. cur_rb rptr/wptr %x/%x id %d, next_rb rptr/wptr %x/%x id %d, disp_state: %d\n", cur_rptr, adreno_dev->cur_rb->wptr, adreno_dev->cur_rb->id, next_rptr, adreno_dev->next_rb->wptr, adreno_dev->next_rb->id, atomic_read(&adreno_dev->preempt.state)); adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); adreno_dispatcher_schedule(device); } static unsigned int a4xx_preemption_token(struct adreno_device *adreno_dev, unsigned int *cmds, uint64_t gpuaddr) { unsigned int *cmds_orig = cmds; /* Turn on preemption flag */ /* preemption token - fill when pt switch command size is known */ *cmds++ = cp_type3_packet(CP_PREEMPT_TOKEN, 3); *cmds++ = (uint)gpuaddr; *cmds++ = 1; /* generate interrupt on preemption completion */ *cmds++ = 1 << CP_PREEMPT_ORDINAL_INTERRUPT; return (unsigned int) (cmds - cmds_orig); } unsigned int a4xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, unsigned int *cmds, struct kgsl_context *context) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int *cmds_orig = cmds; unsigned int cond_addr = device->memstore.gpuaddr + MEMSTORE_ID_GPU_ADDR(device, context->id, preempted); cmds += a4xx_preemption_token(adreno_dev, cmds, cond_addr); *cmds++ = cp_type3_packet(CP_COND_EXEC, 4); *cmds++ = cond_addr; *cmds++ = cond_addr; *cmds++ = 1; *cmds++ = 7; /* clear preemption flag */ *cmds++ = cp_type3_packet(CP_MEM_WRITE, 2); *cmds++ = cond_addr; *cmds++ = 0; *cmds++ = cp_type3_packet(CP_WAIT_MEM_WRITES, 1); *cmds++ = 0; *cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1); *cmds++ = 0; return (unsigned int) (cmds - cmds_orig); } static void a4xx_preemption_start(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); uint32_t val; /* * Setup scratch registers from which the GPU will program the * registers required to start execution of new ringbuffer * set ringbuffer address */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG8, rb->buffer_desc.gpuaddr); kgsl_regread(device, A4XX_CP_RB_CNTL, &val); /* scratch REG9 corresponds to CP_RB_CNTL register */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG9, val); /* scratch REG10 corresponds to rptr address */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG10, SCRATCH_RPTR_GPU_ADDR(device, rb->id)); /* scratch REG11 corresponds to rptr */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG11, adreno_get_rptr(rb)); /* scratch REG12 corresponds to wptr */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG12, rb->wptr); /* * scratch REG13 corresponds to IB1_BASE, * 0 since we do not do switches in between IB's */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG13, 0); /* scratch REG14 corresponds to IB1_BUFSZ */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG14, 0); /* scratch REG15 corresponds to IB2_BASE */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG15, 0); /* scratch REG16 corresponds to IB2_BUFSZ */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG16, 0); /* scratch REG17 corresponds to GPR11 */ kgsl_regwrite(device, A4XX_CP_SCRATCH_REG17, rb->gpr11); } static void a4xx_preemption_save(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); kgsl_regread(device, A4XX_CP_SCRATCH_REG23, &rb->gpr11); } static int a4xx_submit_preempt_token(struct adreno_ringbuffer *rb, struct adreno_ringbuffer *incoming_rb) { struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int *ringcmds, *start; int ptname; struct kgsl_pagetable *pt; int pt_switch_sizedwords = 0, total_sizedwords = 20; unsigned int link[ADRENO_RB_PREEMPT_TOKEN_DWORDS]; uint i; if (incoming_rb->preempted_midway) { kgsl_sharedmem_readl(&incoming_rb->pagetable_desc, &ptname, PT_INFO_OFFSET(current_rb_ptname)); pt = kgsl_mmu_get_pt_from_ptname(&(device->mmu), ptname); if (IS_ERR_OR_NULL(pt)) return (pt == NULL) ? -ENOENT : PTR_ERR(pt); /* set the ringbuffer for incoming RB */ pt_switch_sizedwords = adreno_iommu_set_pt_generate_cmds(incoming_rb, &link[0], pt); total_sizedwords += pt_switch_sizedwords; } /* * Allocate total_sizedwords space in RB, this is the max space * required. */ ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); if (IS_ERR(ringcmds)) return PTR_ERR(ringcmds); start = ringcmds; *ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); *ringcmds++ = 0; if (incoming_rb->preempted_midway) { for (i = 0; i < pt_switch_sizedwords; i++) *ringcmds++ = link[i]; } *ringcmds++ = cp_register(adreno_dev, adreno_getreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DISABLE), 1); *ringcmds++ = 0; *ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); *ringcmds++ = 1; ringcmds += a4xx_preemption_token(adreno_dev, ringcmds, device->memstore.gpuaddr + MEMSTORE_RB_OFFSET(rb, preempted)); if ((uint)(ringcmds - start) > total_sizedwords) KGSL_DRV_ERR(device, "Insufficient rb size allocated\n"); /* * If we have commands less than the space reserved in RB * adjust the wptr accordingly */ rb->wptr = rb->wptr - (total_sizedwords - (uint)(ringcmds - start)); /* submit just the preempt token */ mb(); kgsl_pwrscale_busy(device); adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr); return 0; } static void a4xx_preempt_trig_state(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int rbbase, val; int ret; /* * Hardware not yet idle means that preemption interrupt * may still occur, nothing to do here until interrupt signals * completion of preemption, just return here */ if (!adreno_hw_isidle(adreno_dev)) return; /* * We just changed states, reschedule dispatcher to change * preemption states */ if (atomic_read(&adreno_dev->preempt.state) != ADRENO_PREEMPT_TRIGGERED) { adreno_dispatcher_schedule(device); return; } /* * H/W is idle and we did not get a preemption interrupt, may * be device went idle w/o encountering any preempt token or * we already preempted w/o interrupt */ adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_BASE, &rbbase); /* Did preemption occur, if so then change states and return */ if (rbbase != adreno_dev->cur_rb->buffer_desc.gpuaddr) { adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, &val); if (val && rbbase == adreno_dev->next_rb->buffer_desc.gpuaddr) { KGSL_DRV_INFO(device, "Preemption completed without interrupt\n"); trace_adreno_hw_preempt_trig_to_comp(adreno_dev->cur_rb, adreno_dev->next_rb, adreno_get_rptr(adreno_dev->cur_rb), adreno_get_rptr(adreno_dev->next_rb)); adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); adreno_dispatcher_schedule(device); return; } adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); /* reschedule dispatcher to take care of the fault */ adreno_dispatcher_schedule(device); return; } /* * Check if preempt token was submitted after preemption trigger, if so * then preemption should have occurred, since device is already idle it * means something went wrong - trigger FT */ if (adreno_dev->preempt.token_submit) { adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); /* reschedule dispatcher to take care of the fault */ adreno_dispatcher_schedule(device); return; } /* * Preempt token was not submitted after preemption trigger so device * may have gone idle before preemption could occur, if there are * commands that got submitted to current RB after triggering preemption * then submit them as those commands may have a preempt token in them */ if (!adreno_rb_empty(adreno_dev->cur_rb)) { /* * Memory barrier before informing the * hardware of new commands */ mb(); kgsl_pwrscale_busy(device); adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, adreno_dev->cur_rb->wptr); return; } /* Submit preempt token to make preemption happen */ ret = adreno_drawctxt_switch(adreno_dev, adreno_dev->cur_rb, NULL, 0); if (ret) KGSL_DRV_ERR(device, "Unable to switch context to NULL: %d\n", ret); ret = a4xx_submit_preempt_token(adreno_dev->cur_rb, adreno_dev->next_rb); if (ret) KGSL_DRV_ERR(device, "Unable to submit preempt token: %d\n", ret); adreno_dev->preempt.token_submit = true; adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr; trace_adreno_hw_preempt_token_submit(adreno_dev->cur_rb, adreno_dev->next_rb, adreno_get_rptr(adreno_dev->cur_rb), adreno_get_rptr(adreno_dev->next_rb)); } static struct adreno_ringbuffer *a4xx_next_ringbuffer( struct adreno_device *adreno_dev) { struct adreno_ringbuffer *rb; int i; FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { if (!adreno_rb_empty(rb)) return rb; } return NULL; } static void a4xx_preempt_clear_state(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_ringbuffer *highest_busy_rb; int switch_low_to_high; int ret; /* Device not awake means there is nothing to do */ if (!kgsl_state_is_awake(device)) return; highest_busy_rb = a4xx_next_ringbuffer(adreno_dev); if (!highest_busy_rb || highest_busy_rb == adreno_dev->cur_rb) return; switch_low_to_high = adreno_compare_prio_level( highest_busy_rb->id, adreno_dev->cur_rb->id); if (switch_low_to_high < 0) { /* * if switching to lower priority make sure that the rptr and * wptr are equal, when the lower rb is not starved */ if (!adreno_rb_empty(adreno_dev->cur_rb)) return; /* * switch to default context because when we switch back * to higher context then its not known which pt will * be current, so by making it default here the next * commands submitted will set the right pt */ ret = adreno_drawctxt_switch(adreno_dev, adreno_dev->cur_rb, NULL, 0); /* * lower priority RB has to wait until space opens up in * higher RB */ if (ret) { KGSL_DRV_ERR(device, "Unable to switch context to NULL: %d", ret); return; } adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT_DISABLE, 1); } /* * setup registers to do the switch to highest priority RB * which is not empty or may be starving away(poor thing) */ a4xx_preemption_start(adreno_dev, highest_busy_rb); adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED); adreno_dev->next_rb = highest_busy_rb; mod_timer(&adreno_dev->preempt.timer, jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT)); trace_adreno_hw_preempt_clear_to_trig(adreno_dev->cur_rb, adreno_dev->next_rb, adreno_get_rptr(adreno_dev->cur_rb), adreno_get_rptr(adreno_dev->next_rb)); /* issue PREEMPT trigger */ adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1); /* submit preempt token packet to ensure preemption */ if (switch_low_to_high < 0) { ret = a4xx_submit_preempt_token( adreno_dev->cur_rb, adreno_dev->next_rb); KGSL_DRV_ERR(device, "Unable to submit preempt token: %d\n", ret); adreno_dev->preempt.token_submit = true; adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr; } else { adreno_dev->preempt.token_submit = false; adreno_dispatcher_schedule(device); adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF; } } static void a4xx_preempt_complete_state(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int wptr, rbbase; unsigned int val, val1; unsigned int prevrptr; del_timer_sync(&adreno_dev->preempt.timer); adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &val); adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, &val1); if (val || !val1) { KGSL_DRV_ERR(device, "Invalid state after preemption CP_PREEMPT: %08x, CP_PREEMPT_DEBUG: %08x\n", val, val1); adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); adreno_dispatcher_schedule(device); return; } adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_BASE, &rbbase); if (rbbase != adreno_dev->next_rb->buffer_desc.gpuaddr) { KGSL_DRV_ERR(device, "RBBASE incorrect after preemption, expected %x got %016llx\b", rbbase, adreno_dev->next_rb->buffer_desc.gpuaddr); adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); adreno_dispatcher_schedule(device); return; } a4xx_preemption_save(adreno_dev, adreno_dev->cur_rb); /* new RB is the current RB */ trace_adreno_hw_preempt_comp_to_clear(adreno_dev->next_rb, adreno_dev->cur_rb, adreno_get_rptr(adreno_dev->next_rb), adreno_get_rptr(adreno_dev->cur_rb)); adreno_dev->prev_rb = adreno_dev->cur_rb; adreno_dev->cur_rb = adreno_dev->next_rb; adreno_dev->cur_rb->preempted_midway = 0; adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF; adreno_dev->next_rb = NULL; adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE); prevrptr = adreno_get_rptr(adreno_dev->prev_rb); if (adreno_compare_prio_level(adreno_dev->prev_rb->id, adreno_dev->cur_rb->id) < 0) { if (adreno_dev->prev_rb->wptr_preempt_end != prevrptr) adreno_dev->prev_rb->preempted_midway = 1; } /* submit wptr if required for new rb */ adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); if (adreno_dev->cur_rb->wptr != wptr) { kgsl_pwrscale_busy(device); adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, adreno_dev->cur_rb->wptr); } /* clear preemption register */ adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, 0); } void a4xx_preemption_schedule(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); if (!adreno_is_preemption_enabled(adreno_dev)) return; mutex_lock(&device->mutex); switch (atomic_read(&adreno_dev->preempt.state)) { case ADRENO_PREEMPT_NONE: a4xx_preempt_clear_state(adreno_dev); break; case ADRENO_PREEMPT_TRIGGERED: a4xx_preempt_trig_state(adreno_dev); /* * if we transitioned to next state then fall-through * processing to next state */ if (!adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE)) break; case ADRENO_PREEMPT_COMPLETE: a4xx_preempt_complete_state(adreno_dev); break; default: break; } mutex_unlock(&device->mutex); } int a4xx_preemption_init(struct adreno_device *adreno_dev) { setup_timer(&adreno_dev->preempt.timer, a4xx_preemption_timer, (unsigned long) adreno_dev); return 0; }