This support was partially present in the existing code (look for "__tilegx__" ifdefs) but with this change you can build a working kernel using the TILE-Gx toolchain and ARCH=tilegx. Most of these files are new, generally adding a foo_64.c file where previously there was just a foo_32.c file. The ARCH=tilegx directive redirects to arch/tile, not arch/tilegx, using the existing SRCARCH mechanism in the top-level Makefile. Changes to existing files: - <asm/bitops.h> and <asm/bitops_32.h> changed to factor the include of <asm-generic/bitops/non-atomic.h> in the common header. - <asm/compat.h> and arch/tile/kernel/compat.c changed to remove the "const" markers I had put on compat_sys_execve() when trying to match some recent similar changes to the non-compat execve. It turns out the compat version wasn't "upgraded" to use const. - <asm/opcode-tile_64.h> and <asm/opcode_constants_64.h> were previously included accidentally, with the 32-bit contents. Now they have the proper 64-bit contents. Finally, I had to hack the existing hacky drivers/input/input-compat.h to add yet another "#ifdef" for INPUT_COMPAT_TEST (same as x86_64). Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> [drivers/input]tirimbino
parent
be84cb4383
commit
18aecc2b64
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,258 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
/*
|
||||
* @file |
||||
* Global header file. |
||||
* This header file specifies defines for TILE-Gx. |
||||
*/ |
||||
|
||||
#ifndef __ARCH_CHIP_H__ |
||||
#define __ARCH_CHIP_H__ |
||||
|
||||
/** Specify chip version.
|
||||
* When possible, prefer the CHIP_xxx symbols below for future-proofing. |
||||
* This is intended for cross-compiling; native compilation should |
||||
* use the predefined __tile_chip__ symbol. |
||||
*/ |
||||
#define TILE_CHIP 10 |
||||
|
||||
/** Specify chip revision.
|
||||
* This provides for the case of a respin of a particular chip type; |
||||
* the normal value for this symbol is "0". |
||||
* This is intended for cross-compiling; native compilation should |
||||
* use the predefined __tile_chip_rev__ symbol. |
||||
*/ |
||||
#define TILE_CHIP_REV 0 |
||||
|
||||
/** The name of this architecture. */ |
||||
#define CHIP_ARCH_NAME "tilegx" |
||||
|
||||
/** The ELF e_machine type for binaries for this chip. */ |
||||
#define CHIP_ELF_TYPE() EM_TILEGX |
||||
|
||||
/** The alternate ELF e_machine type for binaries for this chip. */ |
||||
#define CHIP_COMPAT_ELF_TYPE() 0x2597 |
||||
|
||||
/** What is the native word size of the machine? */ |
||||
#define CHIP_WORD_SIZE() 64 |
||||
|
||||
/** How many bits of a virtual address are used. Extra bits must be
|
||||
* the sign extension of the low bits. |
||||
*/ |
||||
#define CHIP_VA_WIDTH() 42 |
||||
|
||||
/** How many bits are in a physical address? */ |
||||
#define CHIP_PA_WIDTH() 40 |
||||
|
||||
/** Size of the L2 cache, in bytes. */ |
||||
#define CHIP_L2_CACHE_SIZE() 262144 |
||||
|
||||
/** Log size of an L2 cache line in bytes. */ |
||||
#define CHIP_L2_LOG_LINE_SIZE() 6 |
||||
|
||||
/** Size of an L2 cache line, in bytes. */ |
||||
#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE()) |
||||
|
||||
/** Associativity of the L2 cache. */ |
||||
#define CHIP_L2_ASSOC() 8 |
||||
|
||||
/** Size of the L1 data cache, in bytes. */ |
||||
#define CHIP_L1D_CACHE_SIZE() 32768 |
||||
|
||||
/** Log size of an L1 data cache line in bytes. */ |
||||
#define CHIP_L1D_LOG_LINE_SIZE() 6 |
||||
|
||||
/** Size of an L1 data cache line, in bytes. */ |
||||
#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE()) |
||||
|
||||
/** Associativity of the L1 data cache. */ |
||||
#define CHIP_L1D_ASSOC() 2 |
||||
|
||||
/** Size of the L1 instruction cache, in bytes. */ |
||||
#define CHIP_L1I_CACHE_SIZE() 32768 |
||||
|
||||
/** Log size of an L1 instruction cache line in bytes. */ |
||||
#define CHIP_L1I_LOG_LINE_SIZE() 6 |
||||
|
||||
/** Size of an L1 instruction cache line, in bytes. */ |
||||
#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE()) |
||||
|
||||
/** Associativity of the L1 instruction cache. */ |
||||
#define CHIP_L1I_ASSOC() 2 |
||||
|
||||
/** Stride with which flush instructions must be issued. */ |
||||
#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE() |
||||
|
||||
/** Stride with which inv instructions must be issued. */ |
||||
#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE() |
||||
|
||||
/** Stride with which finv instructions must be issued. */ |
||||
#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE() |
||||
|
||||
/** Can the local cache coherently cache data that is homed elsewhere? */ |
||||
#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1 |
||||
|
||||
/** How many simultaneous outstanding victims can the L2 cache have? */ |
||||
#define CHIP_MAX_OUTSTANDING_VICTIMS() 128 |
||||
|
||||
/** Does the TLB support the NC and NOALLOC bits? */ |
||||
#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1 |
||||
|
||||
/** Does the chip support hash-for-home caching? */ |
||||
#define CHIP_HAS_CBOX_HOME_MAP() 1 |
||||
|
||||
/** Number of entries in the chip's home map tables. */ |
||||
#define CHIP_CBOX_HOME_MAP_SIZE() 128 |
||||
|
||||
/** Do uncacheable requests miss in the cache regardless of whether
|
||||
* there is matching data? */ |
||||
#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1 |
||||
|
||||
/** Does the mf instruction wait for victims? */ |
||||
#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0 |
||||
|
||||
/** Does the chip have an "inv" instruction that doesn't also flush? */ |
||||
#define CHIP_HAS_INV() 1 |
||||
|
||||
/** Does the chip have a "wh64" instruction? */ |
||||
#define CHIP_HAS_WH64() 1 |
||||
|
||||
/** Does this chip have a 'dword_align' instruction? */ |
||||
#define CHIP_HAS_DWORD_ALIGN() 0 |
||||
|
||||
/** Number of performance counters. */ |
||||
#define CHIP_PERFORMANCE_COUNTERS() 4 |
||||
|
||||
/** Does this chip have auxiliary performance counters? */ |
||||
#define CHIP_HAS_AUX_PERF_COUNTERS() 1 |
||||
|
||||
/** Is the CBOX_MSR1 SPR supported? */ |
||||
#define CHIP_HAS_CBOX_MSR1() 0 |
||||
|
||||
/** Is the TILE_RTF_HWM SPR supported? */ |
||||
#define CHIP_HAS_TILE_RTF_HWM() 1 |
||||
|
||||
/** Is the TILE_WRITE_PENDING SPR supported? */ |
||||
#define CHIP_HAS_TILE_WRITE_PENDING() 0 |
||||
|
||||
/** Is the PROC_STATUS SPR supported? */ |
||||
#define CHIP_HAS_PROC_STATUS_SPR() 1 |
||||
|
||||
/** Is the DSTREAM_PF SPR supported? */ |
||||
#define CHIP_HAS_DSTREAM_PF() 1 |
||||
|
||||
/** Log of the number of mshims we have. */ |
||||
#define CHIP_LOG_NUM_MSHIMS() 2 |
||||
|
||||
/** Are the bases of the interrupt vector areas fixed? */ |
||||
#define CHIP_HAS_FIXED_INTVEC_BASE() 0 |
||||
|
||||
/** Are the interrupt masks split up into 2 SPRs? */ |
||||
#define CHIP_HAS_SPLIT_INTR_MASK() 0 |
||||
|
||||
/** Is the cycle count split up into 2 SPRs? */ |
||||
#define CHIP_HAS_SPLIT_CYCLE() 0 |
||||
|
||||
/** Does the chip have a static network? */ |
||||
#define CHIP_HAS_SN() 0 |
||||
|
||||
/** Does the chip have a static network processor? */ |
||||
#define CHIP_HAS_SN_PROC() 0 |
||||
|
||||
/** Size of the L1 static network processor instruction cache, in bytes. */ |
||||
/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 10 */ |
||||
|
||||
/** Does the chip have DMA support in each tile? */ |
||||
#define CHIP_HAS_TILE_DMA() 0 |
||||
|
||||
/** Does the chip have the second revision of the directly accessible
|
||||
* dynamic networks? This encapsulates a number of characteristics, |
||||
* including the absence of the catch-all, the absence of inline message |
||||
* tags, the absence of support for network context-switching, and so on. |
||||
*/ |
||||
#define CHIP_HAS_REV1_XDN() 1 |
||||
|
||||
/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */ |
||||
#define CHIP_HAS_CMPEXCH() 1 |
||||
|
||||
/** Does the chip have memory-mapped I/O support? */ |
||||
#define CHIP_HAS_MMIO() 1 |
||||
|
||||
/** Does the chip have post-completion interrupts? */ |
||||
#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 1 |
||||
|
||||
/** Does the chip have native single step support? */ |
||||
#define CHIP_HAS_SINGLE_STEP() 1 |
||||
|
||||
#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */ |
||||
|
||||
/** How many entries are present in the instruction TLB? */ |
||||
#define CHIP_ITLB_ENTRIES() 16 |
||||
|
||||
/** How many entries are present in the data TLB? */ |
||||
#define CHIP_DTLB_ENTRIES() 32 |
||||
|
||||
/** How many MAF entries does the XAUI shim have? */ |
||||
#define CHIP_XAUI_MAF_ENTRIES() 32 |
||||
|
||||
/** Does the memory shim have a source-id table? */ |
||||
#define CHIP_HAS_MSHIM_SRCID_TABLE() 0 |
||||
|
||||
/** Does the L1 instruction cache clear on reset? */ |
||||
#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1 |
||||
|
||||
/** Does the chip come out of reset with valid coordinates on all tiles?
|
||||
* Note that if defined, this also implies that the upper left is 1,1. |
||||
*/ |
||||
#define CHIP_HAS_VALID_TILE_COORD_RESET() 1 |
||||
|
||||
/** Does the chip have unified packet formats? */ |
||||
#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1 |
||||
|
||||
/** Does the chip support write reordering? */ |
||||
#define CHIP_HAS_WRITE_REORDERING() 1 |
||||
|
||||
/** Does the chip support Y-X routing as well as X-Y? */ |
||||
#define CHIP_HAS_Y_X_ROUTING() 1 |
||||
|
||||
/** Is INTCTRL_3 managed with the correct MPL? */ |
||||
#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1 |
||||
|
||||
/** Is it possible to configure the chip to be big-endian? */ |
||||
#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1 |
||||
|
||||
/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */ |
||||
#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0 |
||||
|
||||
/** Is the DIAG_TRACE_WAY SPR supported? */ |
||||
#define CHIP_HAS_DIAG_TRACE_WAY() 0 |
||||
|
||||
/** Is the MEM_STRIPE_CONFIG SPR supported? */ |
||||
#define CHIP_HAS_MEM_STRIPE_CONFIG() 1 |
||||
|
||||
/** Are the TLB_PERF SPRs supported? */ |
||||
#define CHIP_HAS_TLB_PERF() 1 |
||||
|
||||
/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */ |
||||
#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0 |
||||
|
||||
/** Does the chip support rev1 DMA packets? */ |
||||
#define CHIP_HAS_REV1_DMA_PACKETS() 1 |
||||
|
||||
/** Does the chip have an IPI shim? */ |
||||
#define CHIP_HAS_IPI() 1 |
||||
|
||||
#endif /* !__OPEN_SOURCE__ */ |
||||
#endif /* __ARCH_CHIP_H__ */ |
@ -0,0 +1,276 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#ifndef __ARCH_INTERRUPTS_H__ |
||||
#define __ARCH_INTERRUPTS_H__ |
||||
|
||||
/** Mask for an interrupt. */ |
||||
#ifdef __ASSEMBLER__ |
||||
/* Note: must handle breaking interrupts into high and low words manually. */ |
||||
#define INT_MASK(intno) (1 << (intno)) |
||||
#else |
||||
#define INT_MASK(intno) (1ULL << (intno)) |
||||
#endif |
||||
|
||||
|
||||
/** Where a given interrupt executes */ |
||||
#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) |
||||
|
||||
/** Where to store a vector for a given interrupt. */ |
||||
#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) |
||||
|
||||
/** The base address of user-level interrupts. */ |
||||
#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) |
||||
|
||||
|
||||
/** Additional synthetic interrupt. */ |
||||
#define INT_BREAKPOINT (63) |
||||
|
||||
#define INT_MEM_ERROR 0 |
||||
#define INT_SINGLE_STEP_3 1 |
||||
#define INT_SINGLE_STEP_2 2 |
||||
#define INT_SINGLE_STEP_1 3 |
||||
#define INT_SINGLE_STEP_0 4 |
||||
#define INT_IDN_COMPLETE 5 |
||||
#define INT_UDN_COMPLETE 6 |
||||
#define INT_ITLB_MISS 7 |
||||
#define INT_ILL 8 |
||||
#define INT_GPV 9 |
||||
#define INT_IDN_ACCESS 10 |
||||
#define INT_UDN_ACCESS 11 |
||||
#define INT_SWINT_3 12 |
||||
#define INT_SWINT_2 13 |
||||
#define INT_SWINT_1 14 |
||||
#define INT_SWINT_0 15 |
||||
#define INT_ILL_TRANS 16 |
||||
#define INT_UNALIGN_DATA 17 |
||||
#define INT_DTLB_MISS 18 |
||||
#define INT_DTLB_ACCESS 19 |
||||
#define INT_IDN_FIREWALL 20 |
||||
#define INT_UDN_FIREWALL 21 |
||||
#define INT_TILE_TIMER 22 |
||||
#define INT_AUX_TILE_TIMER 23 |
||||
#define INT_IDN_TIMER 24 |
||||
#define INT_UDN_TIMER 25 |
||||
#define INT_IDN_AVAIL 26 |
||||
#define INT_UDN_AVAIL 27 |
||||
#define INT_IPI_3 28 |
||||
#define INT_IPI_2 29 |
||||
#define INT_IPI_1 30 |
||||
#define INT_IPI_0 31 |
||||
#define INT_PERF_COUNT 32 |
||||
#define INT_AUX_PERF_COUNT 33 |
||||
#define INT_INTCTRL_3 34 |
||||
#define INT_INTCTRL_2 35 |
||||
#define INT_INTCTRL_1 36 |
||||
#define INT_INTCTRL_0 37 |
||||
#define INT_BOOT_ACCESS 38 |
||||
#define INT_WORLD_ACCESS 39 |
||||
#define INT_I_ASID 40 |
||||
#define INT_D_ASID 41 |
||||
#define INT_DOUBLE_FAULT 42 |
||||
|
||||
#define NUM_INTERRUPTS 43 |
||||
|
||||
#ifndef __ASSEMBLER__ |
||||
#define QUEUED_INTERRUPTS ( \ |
||||
INT_MASK(INT_MEM_ERROR) | \
|
||||
INT_MASK(INT_IDN_COMPLETE) | \
|
||||
INT_MASK(INT_UDN_COMPLETE) | \
|
||||
INT_MASK(INT_IDN_FIREWALL) | \
|
||||
INT_MASK(INT_UDN_FIREWALL) | \
|
||||
INT_MASK(INT_TILE_TIMER) | \
|
||||
INT_MASK(INT_AUX_TILE_TIMER) | \
|
||||
INT_MASK(INT_IDN_TIMER) | \
|
||||
INT_MASK(INT_UDN_TIMER) | \
|
||||
INT_MASK(INT_IDN_AVAIL) | \
|
||||
INT_MASK(INT_UDN_AVAIL) | \
|
||||
INT_MASK(INT_IPI_3) | \
|
||||
INT_MASK(INT_IPI_2) | \
|
||||
INT_MASK(INT_IPI_1) | \
|
||||
INT_MASK(INT_IPI_0) | \
|
||||
INT_MASK(INT_PERF_COUNT) | \
|
||||
INT_MASK(INT_AUX_PERF_COUNT) | \
|
||||
INT_MASK(INT_INTCTRL_3) | \
|
||||
INT_MASK(INT_INTCTRL_2) | \
|
||||
INT_MASK(INT_INTCTRL_1) | \
|
||||
INT_MASK(INT_INTCTRL_0) | \
|
||||
INT_MASK(INT_BOOT_ACCESS) | \
|
||||
INT_MASK(INT_WORLD_ACCESS) | \
|
||||
INT_MASK(INT_I_ASID) | \
|
||||
INT_MASK(INT_D_ASID) | \
|
||||
INT_MASK(INT_DOUBLE_FAULT) | \
|
||||
0) |
||||
#define NONQUEUED_INTERRUPTS ( \ |
||||
INT_MASK(INT_SINGLE_STEP_3) | \
|
||||
INT_MASK(INT_SINGLE_STEP_2) | \
|
||||
INT_MASK(INT_SINGLE_STEP_1) | \
|
||||
INT_MASK(INT_SINGLE_STEP_0) | \
|
||||
INT_MASK(INT_ITLB_MISS) | \
|
||||
INT_MASK(INT_ILL) | \
|
||||
INT_MASK(INT_GPV) | \
|
||||
INT_MASK(INT_IDN_ACCESS) | \
|
||||
INT_MASK(INT_UDN_ACCESS) | \
|
||||
INT_MASK(INT_SWINT_3) | \
|
||||
INT_MASK(INT_SWINT_2) | \
|
||||
INT_MASK(INT_SWINT_1) | \
|
||||
INT_MASK(INT_SWINT_0) | \
|
||||
INT_MASK(INT_ILL_TRANS) | \
|
||||
INT_MASK(INT_UNALIGN_DATA) | \
|
||||
INT_MASK(INT_DTLB_MISS) | \
|
||||
INT_MASK(INT_DTLB_ACCESS) | \
|
||||
0) |
||||
#define CRITICAL_MASKED_INTERRUPTS ( \ |
||||
INT_MASK(INT_MEM_ERROR) | \
|
||||
INT_MASK(INT_SINGLE_STEP_3) | \
|
||||
INT_MASK(INT_SINGLE_STEP_2) | \
|
||||
INT_MASK(INT_SINGLE_STEP_1) | \
|
||||
INT_MASK(INT_SINGLE_STEP_0) | \
|
||||
INT_MASK(INT_IDN_COMPLETE) | \
|
||||
INT_MASK(INT_UDN_COMPLETE) | \
|
||||
INT_MASK(INT_IDN_FIREWALL) | \
|
||||
INT_MASK(INT_UDN_FIREWALL) | \
|
||||
INT_MASK(INT_TILE_TIMER) | \
|
||||
INT_MASK(INT_AUX_TILE_TIMER) | \
|
||||
INT_MASK(INT_IDN_TIMER) | \
|
||||
INT_MASK(INT_UDN_TIMER) | \
|
||||
INT_MASK(INT_IDN_AVAIL) | \
|
||||
INT_MASK(INT_UDN_AVAIL) | \
|
||||
INT_MASK(INT_IPI_3) | \
|
||||
INT_MASK(INT_IPI_2) | \
|
||||
INT_MASK(INT_IPI_1) | \
|
||||
INT_MASK(INT_IPI_0) | \
|
||||
INT_MASK(INT_PERF_COUNT) | \
|
||||
INT_MASK(INT_AUX_PERF_COUNT) | \
|
||||
INT_MASK(INT_INTCTRL_3) | \
|
||||
INT_MASK(INT_INTCTRL_2) | \
|
||||
INT_MASK(INT_INTCTRL_1) | \
|
||||
INT_MASK(INT_INTCTRL_0) | \
|
||||
0) |
||||
#define CRITICAL_UNMASKED_INTERRUPTS ( \ |
||||
INT_MASK(INT_ITLB_MISS) | \
|
||||
INT_MASK(INT_ILL) | \
|
||||
INT_MASK(INT_GPV) | \
|
||||
INT_MASK(INT_IDN_ACCESS) | \
|
||||
INT_MASK(INT_UDN_ACCESS) | \
|
||||
INT_MASK(INT_SWINT_3) | \
|
||||
INT_MASK(INT_SWINT_2) | \
|
||||
INT_MASK(INT_SWINT_1) | \
|
||||
INT_MASK(INT_SWINT_0) | \
|
||||
INT_MASK(INT_ILL_TRANS) | \
|
||||
INT_MASK(INT_UNALIGN_DATA) | \
|
||||
INT_MASK(INT_DTLB_MISS) | \
|
||||
INT_MASK(INT_DTLB_ACCESS) | \
|
||||
INT_MASK(INT_BOOT_ACCESS) | \
|
||||
INT_MASK(INT_WORLD_ACCESS) | \
|
||||
INT_MASK(INT_I_ASID) | \
|
||||
INT_MASK(INT_D_ASID) | \
|
||||
INT_MASK(INT_DOUBLE_FAULT) | \
|
||||
0) |
||||
#define MASKABLE_INTERRUPTS ( \ |
||||
INT_MASK(INT_MEM_ERROR) | \
|
||||
INT_MASK(INT_SINGLE_STEP_3) | \
|
||||
INT_MASK(INT_SINGLE_STEP_2) | \
|
||||
INT_MASK(INT_SINGLE_STEP_1) | \
|
||||
INT_MASK(INT_SINGLE_STEP_0) | \
|
||||
INT_MASK(INT_IDN_COMPLETE) | \
|
||||
INT_MASK(INT_UDN_COMPLETE) | \
|
||||
INT_MASK(INT_IDN_FIREWALL) | \
|
||||
INT_MASK(INT_UDN_FIREWALL) | \
|
||||
INT_MASK(INT_TILE_TIMER) | \
|
||||
INT_MASK(INT_AUX_TILE_TIMER) | \
|
||||
INT_MASK(INT_IDN_TIMER) | \
|
||||
INT_MASK(INT_UDN_TIMER) | \
|
||||
INT_MASK(INT_IDN_AVAIL) | \
|
||||
INT_MASK(INT_UDN_AVAIL) | \
|
||||
INT_MASK(INT_IPI_3) | \
|
||||
INT_MASK(INT_IPI_2) | \
|
||||
INT_MASK(INT_IPI_1) | \
|
||||
INT_MASK(INT_IPI_0) | \
|
||||
INT_MASK(INT_PERF_COUNT) | \
|
||||
INT_MASK(INT_AUX_PERF_COUNT) | \
|
||||
INT_MASK(INT_INTCTRL_3) | \
|
||||
INT_MASK(INT_INTCTRL_2) | \
|
||||
INT_MASK(INT_INTCTRL_1) | \
|
||||
INT_MASK(INT_INTCTRL_0) | \
|
||||
0) |
||||
#define UNMASKABLE_INTERRUPTS ( \ |
||||
INT_MASK(INT_ITLB_MISS) | \
|
||||
INT_MASK(INT_ILL) | \
|
||||
INT_MASK(INT_GPV) | \
|
||||
INT_MASK(INT_IDN_ACCESS) | \
|
||||
INT_MASK(INT_UDN_ACCESS) | \
|
||||
INT_MASK(INT_SWINT_3) | \
|
||||
INT_MASK(INT_SWINT_2) | \
|
||||
INT_MASK(INT_SWINT_1) | \
|
||||
INT_MASK(INT_SWINT_0) | \
|
||||
INT_MASK(INT_ILL_TRANS) | \
|
||||
INT_MASK(INT_UNALIGN_DATA) | \
|
||||
INT_MASK(INT_DTLB_MISS) | \
|
||||
INT_MASK(INT_DTLB_ACCESS) | \
|
||||
INT_MASK(INT_BOOT_ACCESS) | \
|
||||
INT_MASK(INT_WORLD_ACCESS) | \
|
||||
INT_MASK(INT_I_ASID) | \
|
||||
INT_MASK(INT_D_ASID) | \
|
||||
INT_MASK(INT_DOUBLE_FAULT) | \
|
||||
0) |
||||
#define SYNC_INTERRUPTS ( \ |
||||
INT_MASK(INT_SINGLE_STEP_3) | \
|
||||
INT_MASK(INT_SINGLE_STEP_2) | \
|
||||
INT_MASK(INT_SINGLE_STEP_1) | \
|
||||
INT_MASK(INT_SINGLE_STEP_0) | \
|
||||
INT_MASK(INT_IDN_COMPLETE) | \
|
||||
INT_MASK(INT_UDN_COMPLETE) | \
|
||||
INT_MASK(INT_ITLB_MISS) | \
|
||||
INT_MASK(INT_ILL) | \
|
||||
INT_MASK(INT_GPV) | \
|
||||
INT_MASK(INT_IDN_ACCESS) | \
|
||||
INT_MASK(INT_UDN_ACCESS) | \
|
||||
INT_MASK(INT_SWINT_3) | \
|
||||
INT_MASK(INT_SWINT_2) | \
|
||||
INT_MASK(INT_SWINT_1) | \
|
||||
INT_MASK(INT_SWINT_0) | \
|
||||
INT_MASK(INT_ILL_TRANS) | \
|
||||
INT_MASK(INT_UNALIGN_DATA) | \
|
||||
INT_MASK(INT_DTLB_MISS) | \
|
||||
INT_MASK(INT_DTLB_ACCESS) | \
|
||||
0) |
||||
#define NON_SYNC_INTERRUPTS ( \ |
||||
INT_MASK(INT_MEM_ERROR) | \
|
||||
INT_MASK(INT_IDN_FIREWALL) | \
|
||||
INT_MASK(INT_UDN_FIREWALL) | \
|
||||
INT_MASK(INT_TILE_TIMER) | \
|
||||
INT_MASK(INT_AUX_TILE_TIMER) | \
|
||||
INT_MASK(INT_IDN_TIMER) | \
|
||||
INT_MASK(INT_UDN_TIMER) | \
|
||||
INT_MASK(INT_IDN_AVAIL) | \
|
||||
INT_MASK(INT_UDN_AVAIL) | \
|
||||
INT_MASK(INT_IPI_3) | \
|
||||
INT_MASK(INT_IPI_2) | \
|
||||
INT_MASK(INT_IPI_1) | \
|
||||
INT_MASK(INT_IPI_0) | \
|
||||
INT_MASK(INT_PERF_COUNT) | \
|
||||
INT_MASK(INT_AUX_PERF_COUNT) | \
|
||||
INT_MASK(INT_INTCTRL_3) | \
|
||||
INT_MASK(INT_INTCTRL_2) | \
|
||||
INT_MASK(INT_INTCTRL_1) | \
|
||||
INT_MASK(INT_INTCTRL_0) | \
|
||||
INT_MASK(INT_BOOT_ACCESS) | \
|
||||
INT_MASK(INT_WORLD_ACCESS) | \
|
||||
INT_MASK(INT_I_ASID) | \
|
||||
INT_MASK(INT_D_ASID) | \
|
||||
INT_MASK(INT_DOUBLE_FAULT) | \
|
||||
0) |
||||
#endif /* !__ASSEMBLER__ */ |
||||
#endif /* !__ARCH_INTERRUPTS_H__ */ |
@ -0,0 +1,173 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#ifndef __DOXYGEN__ |
||||
|
||||
#ifndef __ARCH_SPR_DEF_H__ |
||||
#define __ARCH_SPR_DEF_H__ |
||||
|
||||
#define SPR_AUX_PERF_COUNT_0 0x2105 |
||||
#define SPR_AUX_PERF_COUNT_1 0x2106 |
||||
#define SPR_AUX_PERF_COUNT_CTL 0x2107 |
||||
#define SPR_AUX_PERF_COUNT_STS 0x2108 |
||||
#define SPR_CMPEXCH_VALUE 0x2780 |
||||
#define SPR_CYCLE 0x2781 |
||||
#define SPR_DONE 0x2705 |
||||
#define SPR_DSTREAM_PF 0x2706 |
||||
#define SPR_EVENT_BEGIN 0x2782 |
||||
#define SPR_EVENT_END 0x2783 |
||||
#define SPR_EX_CONTEXT_0_0 0x2580 |
||||
#define SPR_EX_CONTEXT_0_1 0x2581 |
||||
#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0 |
||||
#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3 |
||||
#define SPR_EX_CONTEXT_0_1__PL_MASK 0x3 |
||||
#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2 |
||||
#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1 |
||||
#define SPR_EX_CONTEXT_0_1__ICS_MASK 0x4 |
||||
#define SPR_EX_CONTEXT_1_0 0x2480 |
||||
#define SPR_EX_CONTEXT_1_1 0x2481 |
||||
#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0 |
||||
#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3 |
||||
#define SPR_EX_CONTEXT_1_1__PL_MASK 0x3 |
||||
#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2 |
||||
#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1 |
||||
#define SPR_EX_CONTEXT_1_1__ICS_MASK 0x4 |
||||
#define SPR_EX_CONTEXT_2_0 0x2380 |
||||
#define SPR_EX_CONTEXT_2_1 0x2381 |
||||
#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0 |
||||
#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3 |
||||
#define SPR_EX_CONTEXT_2_1__PL_MASK 0x3 |
||||
#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2 |
||||
#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1 |
||||
#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4 |
||||
#define SPR_FAIL 0x2707 |
||||
#define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1 |
||||
#define SPR_INTCTRL_0_STATUS 0x2505 |
||||
#define SPR_INTCTRL_1_STATUS 0x2405 |
||||
#define SPR_INTCTRL_2_STATUS 0x2305 |
||||
#define SPR_INTERRUPT_CRITICAL_SECTION 0x2708 |
||||
#define SPR_INTERRUPT_MASK_0 0x2506 |
||||
#define SPR_INTERRUPT_MASK_1 0x2406 |
||||
#define SPR_INTERRUPT_MASK_2 0x2306 |
||||
#define SPR_INTERRUPT_MASK_RESET_0 0x2507 |
||||
#define SPR_INTERRUPT_MASK_RESET_1 0x2407 |
||||
#define SPR_INTERRUPT_MASK_RESET_2 0x2307 |
||||
#define SPR_INTERRUPT_MASK_SET_0 0x2508 |
||||
#define SPR_INTERRUPT_MASK_SET_1 0x2408 |
||||
#define SPR_INTERRUPT_MASK_SET_2 0x2308 |
||||
#define SPR_INTERRUPT_VECTOR_BASE_0 0x2509 |
||||
#define SPR_INTERRUPT_VECTOR_BASE_1 0x2409 |
||||
#define SPR_INTERRUPT_VECTOR_BASE_2 0x2309 |
||||
#define SPR_INTERRUPT_VECTOR_BASE_3 0x2209 |
||||
#define SPR_IPI_EVENT_0 0x1f05 |
||||
#define SPR_IPI_EVENT_1 0x1e05 |
||||
#define SPR_IPI_EVENT_2 0x1d05 |
||||
#define SPR_IPI_EVENT_RESET_0 0x1f06 |
||||
#define SPR_IPI_EVENT_RESET_1 0x1e06 |
||||
#define SPR_IPI_EVENT_RESET_2 0x1d06 |
||||
#define SPR_IPI_EVENT_SET_0 0x1f07 |
||||
#define SPR_IPI_EVENT_SET_1 0x1e07 |
||||
#define SPR_IPI_EVENT_SET_2 0x1d07 |
||||
#define SPR_IPI_MASK_0 0x1f08 |
||||
#define SPR_IPI_MASK_1 0x1e08 |
||||
#define SPR_IPI_MASK_2 0x1d08 |
||||
#define SPR_IPI_MASK_RESET_0 0x1f09 |
||||
#define SPR_IPI_MASK_RESET_1 0x1e09 |
||||
#define SPR_IPI_MASK_RESET_2 0x1d09 |
||||
#define SPR_IPI_MASK_SET_0 0x1f0a |
||||
#define SPR_IPI_MASK_SET_1 0x1e0a |
||||
#define SPR_IPI_MASK_SET_2 0x1d0a |
||||
#define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700 |
||||
#define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701 |
||||
#define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702 |
||||
#define SPR_MPL_INTCTRL_0_SET_0 0x2500 |
||||
#define SPR_MPL_INTCTRL_0_SET_1 0x2501 |
||||
#define SPR_MPL_INTCTRL_0_SET_2 0x2502 |
||||
#define SPR_MPL_INTCTRL_1_SET_0 0x2400 |
||||
#define SPR_MPL_INTCTRL_1_SET_1 0x2401 |
||||
#define SPR_MPL_INTCTRL_1_SET_2 0x2402 |
||||
#define SPR_MPL_INTCTRL_2_SET_0 0x2300 |
||||
#define SPR_MPL_INTCTRL_2_SET_1 0x2301 |
||||
#define SPR_MPL_INTCTRL_2_SET_2 0x2302 |
||||
#define SPR_MPL_UDN_ACCESS_SET_0 0x0b00 |
||||
#define SPR_MPL_UDN_ACCESS_SET_1 0x0b01 |
||||
#define SPR_MPL_UDN_ACCESS_SET_2 0x0b02 |
||||
#define SPR_MPL_UDN_AVAIL_SET_0 0x1b00 |
||||
#define SPR_MPL_UDN_AVAIL_SET_1 0x1b01 |
||||
#define SPR_MPL_UDN_AVAIL_SET_2 0x1b02 |
||||
#define SPR_MPL_UDN_COMPLETE_SET_0 0x0600 |
||||
#define SPR_MPL_UDN_COMPLETE_SET_1 0x0601 |
||||
#define SPR_MPL_UDN_COMPLETE_SET_2 0x0602 |
||||
#define SPR_MPL_UDN_FIREWALL_SET_0 0x1500 |
||||
#define SPR_MPL_UDN_FIREWALL_SET_1 0x1501 |
||||
#define SPR_MPL_UDN_FIREWALL_SET_2 0x1502 |
||||
#define SPR_MPL_UDN_TIMER_SET_0 0x1900 |
||||
#define SPR_MPL_UDN_TIMER_SET_1 0x1901 |
||||
#define SPR_MPL_UDN_TIMER_SET_2 0x1902 |
||||
#define SPR_MPL_WORLD_ACCESS_SET_0 0x2700 |
||||
#define SPR_MPL_WORLD_ACCESS_SET_1 0x2701 |
||||
#define SPR_MPL_WORLD_ACCESS_SET_2 0x2702 |
||||
#define SPR_PASS 0x2709 |
||||
#define SPR_PERF_COUNT_0 0x2005 |
||||
#define SPR_PERF_COUNT_1 0x2006 |
||||
#define SPR_PERF_COUNT_CTL 0x2007 |
||||
#define SPR_PERF_COUNT_DN_CTL 0x2008 |
||||
#define SPR_PERF_COUNT_STS 0x2009 |
||||
#define SPR_PROC_STATUS 0x2784 |
||||
#define SPR_SIM_CONTROL 0x2785 |
||||
#define SPR_SINGLE_STEP_CONTROL_0 0x0405 |
||||
#define SPR_SINGLE_STEP_CONTROL_0__CANCELED_MASK 0x1 |
||||
#define SPR_SINGLE_STEP_CONTROL_0__INHIBIT_MASK 0x2 |
||||
#define SPR_SINGLE_STEP_CONTROL_1 0x0305 |
||||
#define SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK 0x1 |
||||
#define SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK 0x2 |
||||
#define SPR_SINGLE_STEP_CONTROL_2 0x0205 |
||||
#define SPR_SINGLE_STEP_CONTROL_2__CANCELED_MASK 0x1 |
||||
#define SPR_SINGLE_STEP_CONTROL_2__INHIBIT_MASK 0x2 |
||||
#define SPR_SINGLE_STEP_EN_0_0 0x250a |
||||
#define SPR_SINGLE_STEP_EN_0_1 0x240a |
||||
#define SPR_SINGLE_STEP_EN_0_2 0x230a |
||||
#define SPR_SINGLE_STEP_EN_1_0 0x250b |
||||
#define SPR_SINGLE_STEP_EN_1_1 0x240b |
||||
#define SPR_SINGLE_STEP_EN_1_2 0x230b |
||||
#define SPR_SINGLE_STEP_EN_2_0 0x250c |
||||
#define SPR_SINGLE_STEP_EN_2_1 0x240c |
||||
#define SPR_SINGLE_STEP_EN_2_2 0x230c |
||||
#define SPR_SYSTEM_SAVE_0_0 0x2582 |
||||
#define SPR_SYSTEM_SAVE_0_1 0x2583 |
||||
#define SPR_SYSTEM_SAVE_0_2 0x2584 |
||||
#define SPR_SYSTEM_SAVE_0_3 0x2585 |
||||
#define SPR_SYSTEM_SAVE_1_0 0x2482 |
||||
#define SPR_SYSTEM_SAVE_1_1 0x2483 |
||||
#define SPR_SYSTEM_SAVE_1_2 0x2484 |
||||
#define SPR_SYSTEM_SAVE_1_3 0x2485 |
||||
#define SPR_SYSTEM_SAVE_2_0 0x2382 |
||||
#define SPR_SYSTEM_SAVE_2_1 0x2383 |
||||
#define SPR_SYSTEM_SAVE_2_2 0x2384 |
||||
#define SPR_SYSTEM_SAVE_2_3 0x2385 |
||||
#define SPR_TILE_COORD 0x270b |
||||
#define SPR_TILE_RTF_HWM 0x270c |
||||
#define SPR_TILE_TIMER_CONTROL 0x1605 |
||||
#define SPR_UDN_AVAIL_EN 0x1b05 |
||||
#define SPR_UDN_DATA_AVAIL 0x0b80 |
||||
#define SPR_UDN_DEADLOCK_TIMEOUT 0x1906 |
||||
#define SPR_UDN_DEMUX_COUNT_0 0x0b05 |
||||
#define SPR_UDN_DEMUX_COUNT_1 0x0b06 |
||||
#define SPR_UDN_DEMUX_COUNT_2 0x0b07 |
||||
#define SPR_UDN_DEMUX_COUNT_3 0x0b08 |
||||
#define SPR_UDN_DIRECTION_PROTECT 0x1505 |
||||
|
||||
#endif /* !defined(__ARCH_SPR_DEF_H__) */ |
||||
|
||||
#endif /* !defined(__DOXYGEN__) */ |
@ -0,0 +1,169 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
* |
||||
* Do not include directly; use <asm/atomic.h>. |
||||
*/ |
||||
|
||||
#ifndef _ASM_TILE_ATOMIC_64_H |
||||
#define _ASM_TILE_ATOMIC_64_H |
||||
|
||||
#ifndef __ASSEMBLY__ |
||||
|
||||
#include <arch/spr_def.h> |
||||
|
||||
/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */ |
||||
|
||||
#define atomic_set(v, i) ((v)->counter = (i)) |
||||
|
||||
/*
|
||||
* The smp_mb() operations throughout are to support the fact that |
||||
* Linux requires memory barriers before and after the operation, |
||||
* on any routine which updates memory and returns a value. |
||||
*/ |
||||
|
||||
static inline int atomic_cmpxchg(atomic_t *v, int o, int n) |
||||
{ |
||||
int val; |
||||
__insn_mtspr(SPR_CMPEXCH_VALUE, o); |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
val = __insn_cmpexch4((void *)&v->counter, n); |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
return val; |
||||
} |
||||
|
||||
static inline int atomic_xchg(atomic_t *v, int n) |
||||
{ |
||||
int val; |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
val = __insn_exch4((void *)&v->counter, n); |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
return val; |
||||
} |
||||
|
||||
static inline void atomic_add(int i, atomic_t *v) |
||||
{ |
||||
__insn_fetchadd4((void *)&v->counter, i); |
||||
} |
||||
|
||||
static inline int atomic_add_return(int i, atomic_t *v) |
||||
{ |
||||
int val; |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
val = __insn_fetchadd4((void *)&v->counter, i) + i; |
||||
barrier(); /* the "+ i" above will wait on memory */ |
||||
return val; |
||||
} |
||||
|
||||
static inline int atomic_add_unless(atomic_t *v, int a, int u) |
||||
{ |
||||
int guess, oldval = v->counter; |
||||
do { |
||||
if (oldval == u) |
||||
break; |
||||
guess = oldval; |
||||
oldval = atomic_cmpxchg(v, guess, guess + a); |
||||
} while (guess != oldval); |
||||
return oldval != u; |
||||
} |
||||
|
||||
/* Now the true 64-bit operations. */ |
||||
|
||||
#define ATOMIC64_INIT(i) { (i) } |
||||
|
||||
#define atomic64_read(v) ((v)->counter) |
||||
#define atomic64_set(v, i) ((v)->counter = (i)) |
||||
|
||||
static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n) |
||||
{ |
||||
long val; |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
__insn_mtspr(SPR_CMPEXCH_VALUE, o); |
||||
val = __insn_cmpexch((void *)&v->counter, n); |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
return val; |
||||
} |
||||
|
||||
static inline long atomic64_xchg(atomic64_t *v, long n) |
||||
{ |
||||
long val; |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
val = __insn_exch((void *)&v->counter, n); |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
return val; |
||||
} |
||||
|
||||
static inline void atomic64_add(long i, atomic64_t *v) |
||||
{ |
||||
__insn_fetchadd((void *)&v->counter, i); |
||||
} |
||||
|
||||
static inline long atomic64_add_return(long i, atomic64_t *v) |
||||
{ |
||||
int val; |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
val = __insn_fetchadd((void *)&v->counter, i) + i; |
||||
barrier(); /* the "+ i" above will wait on memory */ |
||||
return val; |
||||
} |
||||
|
||||
static inline long atomic64_add_unless(atomic64_t *v, long a, long u) |
||||
{ |
||||
long guess, oldval = v->counter; |
||||
do { |
||||
if (oldval == u) |
||||
break; |
||||
guess = oldval; |
||||
oldval = atomic64_cmpxchg(v, guess, guess + a); |
||||
} while (guess != oldval); |
||||
return oldval != u; |
||||
} |
||||
|
||||
#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) |
||||
#define atomic64_sub(i, v) atomic64_add(-(i), (v)) |
||||
#define atomic64_inc_return(v) atomic64_add_return(1, (v)) |
||||
#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) |
||||
#define atomic64_inc(v) atomic64_add(1, (v)) |
||||
#define atomic64_dec(v) atomic64_sub(1, (v)) |
||||
|
||||
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) |
||||
#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) |
||||
#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) |
||||
#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0) |
||||
|
||||
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) |
||||
|
||||
/* Atomic dec and inc don't implement barrier, so provide them if needed. */ |
||||
#define smp_mb__before_atomic_dec() smp_mb() |
||||
#define smp_mb__after_atomic_dec() smp_mb() |
||||
#define smp_mb__before_atomic_inc() smp_mb() |
||||
#define smp_mb__after_atomic_inc() smp_mb() |
||||
|
||||
#define xchg(ptr, x) \ |
||||
((typeof(*(ptr))) \
|
||||
((sizeof(*(ptr)) == sizeof(atomic_t)) ? \
|
||||
atomic_xchg((atomic_t *)(ptr), (long)(x)) : \
|
||||
(sizeof(*(ptr)) == sizeof(atomic_long_t)) ? \
|
||||
atomic_long_xchg((atomic_long_t *)(ptr), (long)(x)) : \
|
||||
__xchg_called_with_bad_pointer())) |
||||
|
||||
#define cmpxchg(ptr, o, n) \ |
||||
((typeof(*(ptr))) \
|
||||
((sizeof(*(ptr)) == sizeof(atomic_t)) ? \
|
||||
atomic_cmpxchg((atomic_t *)(ptr), (long)(o), (long)(n)) : \
|
||||
(sizeof(*(ptr)) == sizeof(atomic_long_t)) ? \
|
||||
atomic_long_cmpxchg((atomic_long_t *)(ptr), (long)(o), (long)(n)) : \
|
||||
__cmpxchg_called_with_bad_pointer())) |
||||
|
||||
#endif /* !__ASSEMBLY__ */ |
||||
|
||||
#endif /* _ASM_TILE_ATOMIC_64_H */ |
@ -0,0 +1,105 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#ifndef _ASM_TILE_BITOPS_64_H |
||||
#define _ASM_TILE_BITOPS_64_H |
||||
|
||||
#include <linux/compiler.h> |
||||
#include <asm/atomic.h> |
||||
#include <asm/system.h> |
||||
|
||||
/* See <asm/bitops.h> for API comments. */ |
||||
|
||||
static inline void set_bit(unsigned nr, volatile unsigned long *addr) |
||||
{ |
||||
unsigned long mask = (1UL << (nr % BITS_PER_LONG)); |
||||
__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask); |
||||
} |
||||
|
||||
static inline void clear_bit(unsigned nr, volatile unsigned long *addr) |
||||
{ |
||||
unsigned long mask = (1UL << (nr % BITS_PER_LONG)); |
||||
__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask); |
||||
} |
||||
|
||||
#define smp_mb__before_clear_bit() smp_mb() |
||||
#define smp_mb__after_clear_bit() smp_mb() |
||||
|
||||
|
||||
static inline void change_bit(unsigned nr, volatile unsigned long *addr) |
||||
{ |
||||
unsigned long old, mask = (1UL << (nr % BITS_PER_LONG)); |
||||
long guess, oldval; |
||||
addr += nr / BITS_PER_LONG; |
||||
old = *addr; |
||||
do { |
||||
guess = oldval; |
||||
oldval = atomic64_cmpxchg((atomic64_t *)addr, |
||||
guess, guess ^ mask); |
||||
} while (guess != oldval); |
||||
} |
||||
|
||||
|
||||
/*
|
||||
* The test_and_xxx_bit() routines require a memory fence before we |
||||
* start the operation, and after the operation completes. We use |
||||
* smp_mb() before, and rely on the "!= 0" comparison, plus a compiler |
||||
* barrier(), to block until the atomic op is complete. |
||||
*/ |
||||
|
||||
static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) |
||||
{ |
||||
int val; |
||||
unsigned long mask = (1UL << (nr % BITS_PER_LONG)); |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
val = (__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask) |
||||
& mask) != 0; |
||||
barrier(); |
||||
return val; |
||||
} |
||||
|
||||
|
||||
static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) |
||||
{ |
||||
int val; |
||||
unsigned long mask = (1UL << (nr % BITS_PER_LONG)); |
||||
smp_mb(); /* barrier for proper semantics */ |
||||
val = (__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask) |
||||
& mask) != 0; |
||||
barrier(); |
||||
return val; |
||||
} |
||||
|
||||
|
||||
static inline int test_and_change_bit(unsigned nr, |
||||
volatile unsigned long *addr) |
||||
{ |
||||
unsigned long mask = (1UL << (nr % BITS_PER_LONG)); |
||||
long guess, oldval = *addr; |
||||
addr += nr / BITS_PER_LONG; |
||||
oldval = *addr; |
||||
do { |
||||
guess = oldval; |
||||
oldval = atomic64_cmpxchg((atomic64_t *)addr, |
||||
guess, guess ^ mask); |
||||
} while (guess != oldval); |
||||
return (oldval & mask) != 0; |
||||
} |
||||
|
||||
#define ext2_set_bit_atomic(lock, nr, addr) \ |
||||
test_and_set_bit((nr), (unsigned long *)(addr)) |
||||
#define ext2_clear_bit_atomic(lock, nr, addr) \ |
||||
test_and_clear_bit((nr), (unsigned long *)(addr)) |
||||
|
||||
#endif /* _ASM_TILE_BITOPS_64_H */ |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,175 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
* |
||||
*/ |
||||
|
||||
#ifndef _ASM_TILE_PGTABLE_64_H |
||||
#define _ASM_TILE_PGTABLE_64_H |
||||
|
||||
/* The level-0 page table breaks the address space into 32-bit chunks. */ |
||||
#define PGDIR_SHIFT HV_LOG2_L1_SPAN |
||||
#define PGDIR_SIZE HV_L1_SPAN |
||||
#define PGDIR_MASK (~(PGDIR_SIZE-1)) |
||||
#define PTRS_PER_PGD HV_L0_ENTRIES |
||||
#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t)) |
||||
|
||||
/*
|
||||
* The level-1 index is defined by the huge page size. A PMD is composed |
||||
* of PTRS_PER_PMD pgd_t's and is the middle level of the page table. |
||||
*/ |
||||
#define PMD_SHIFT HV_LOG2_PAGE_SIZE_LARGE |
||||
#define PMD_SIZE HV_PAGE_SIZE_LARGE |
||||
#define PMD_MASK (~(PMD_SIZE-1)) |
||||
#define PTRS_PER_PMD (1 << (PGDIR_SHIFT - PMD_SHIFT)) |
||||
#define SIZEOF_PMD (PTRS_PER_PMD * sizeof(pmd_t)) |
||||
|
||||
/*
|
||||
* The level-2 index is defined by the difference between the huge |
||||
* page size and the normal page size. A PTE is composed of |
||||
* PTRS_PER_PTE pte_t's and is the bottom level of the page table. |
||||
* Note that the hypervisor docs use PTE for what we call pte_t, so |
||||
* this nomenclature is somewhat confusing. |
||||
*/ |
||||
#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) |
||||
#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t)) |
||||
|
||||
/*
|
||||
* Align the vmalloc area to an L2 page table, and leave a guard page |
||||
* at the beginning and end. The vmalloc code also puts in an internal |
||||
* guard page between each allocation. |
||||
*/ |
||||
#define _VMALLOC_END HUGE_VMAP_BASE |
||||
#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE) |
||||
#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE) |
||||
|
||||
#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE) |
||||
|
||||
#ifndef __ASSEMBLY__ |
||||
|
||||
/* We have no pud since we are a three-level page table. */ |
||||
#include <asm-generic/pgtable-nopud.h> |
||||
|
||||
static inline int pud_none(pud_t pud) |
||||
{ |
||||
return pud_val(pud) == 0; |
||||
} |
||||
|
||||
static inline int pud_present(pud_t pud) |
||||
{ |
||||
return pud_val(pud) & _PAGE_PRESENT; |
||||
} |
||||
|
||||
#define pmd_ERROR(e) \ |
||||
pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e)) |
||||
|
||||
static inline void pud_clear(pud_t *pudp) |
||||
{ |
||||
__pte_clear(&pudp->pgd); |
||||
} |
||||
|
||||
static inline int pud_bad(pud_t pud) |
||||
{ |
||||
return ((pud_val(pud) & _PAGE_ALL) != _PAGE_TABLE); |
||||
} |
||||
|
||||
/* Return the page-table frame number (ptfn) that a pud_t points at. */ |
||||
#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd) |
||||
|
||||
/*
|
||||
* A given kernel pud_t maps to a kernel pmd_t table at a specific |
||||
* virtual address. Since kernel pmd_t tables can be aligned at |
||||
* sub-page granularity, this macro can return non-page-aligned |
||||
* pointers, despite its name. |
||||
*/ |
||||
#define pud_page_vaddr(pud) \ |
||||
(__va((phys_addr_t)pud_ptfn(pud) << HV_LOG2_PAGE_TABLE_ALIGN)) |
||||
|
||||
/*
|
||||
* A pud_t points to a pmd_t array. Since we can have multiple per |
||||
* page, we don't have a one-to-one mapping of pud_t's to pages. |
||||
*/ |
||||
#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud))) |
||||
|
||||
static inline unsigned long pud_index(unsigned long address) |
||||
{ |
||||
return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); |
||||
} |
||||
|
||||
#define pmd_offset(pud, address) \ |
||||
((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address)) |
||||
|
||||
static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) |
||||
{ |
||||
set_pte(pmdp, pmdval); |
||||
} |
||||
|
||||
/* Create a pmd from a PTFN and pgprot. */ |
||||
static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) |
||||
{ |
||||
return hv_pte_set_ptfn(prot, ptfn); |
||||
} |
||||
|
||||
/* Return the page-table frame number (ptfn) that a pmd_t points at. */ |
||||
static inline unsigned long pmd_ptfn(pmd_t pmd) |
||||
{ |
||||
return hv_pte_get_ptfn(pmd); |
||||
} |
||||
|
||||
static inline void pmd_clear(pmd_t *pmdp) |
||||
{ |
||||
__pte_clear(pmdp); |
||||
} |
||||
|
||||
/* Normalize an address to having the correct high bits set. */ |
||||
#define pgd_addr_normalize pgd_addr_normalize |
||||
static inline unsigned long pgd_addr_normalize(unsigned long addr) |
||||
{ |
||||
return ((long)addr << (CHIP_WORD_SIZE() - CHIP_VA_WIDTH())) >> |
||||
(CHIP_WORD_SIZE() - CHIP_VA_WIDTH()); |
||||
} |
||||
|
||||
/* We don't define any pgds for these addresses. */ |
||||
static inline int pgd_addr_invalid(unsigned long addr) |
||||
{ |
||||
return addr >= MEM_HV_START || |
||||
(addr > MEM_LOW_END && addr < MEM_HIGH_START); |
||||
} |
||||
|
||||
/*
|
||||
* Use atomic instructions to provide atomicity against the hypervisor. |
||||
*/ |
||||
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
||||
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, |
||||
unsigned long addr, pte_t *ptep) |
||||
{ |
||||
return (__insn_fetchand(&ptep->val, ~HV_PTE_ACCESSED) >> |
||||
HV_PTE_INDEX_ACCESSED) & 0x1; |
||||
} |
||||
|
||||
#define __HAVE_ARCH_PTEP_SET_WRPROTECT |
||||
static inline void ptep_set_wrprotect(struct mm_struct *mm, |
||||
unsigned long addr, pte_t *ptep) |
||||
{ |
||||
__insn_fetchand(&ptep->val, ~HV_PTE_WRITABLE); |
||||
} |
||||
|
||||
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
||||
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, |
||||
unsigned long addr, pte_t *ptep) |
||||
{ |
||||
return hv_pte(__insn_exch(&ptep->val, 0UL)); |
||||
} |
||||
|
||||
#endif /* __ASSEMBLY__ */ |
||||
|
||||
#endif /* _ASM_TILE_PGTABLE_64_H */ |
@ -0,0 +1,161 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
* |
||||
* 64-bit SMP ticket spinlocks, allowing only a single CPU anywhere |
||||
* (the type definitions are in asm/spinlock_types.h) |
||||
*/ |
||||
|
||||
#ifndef _ASM_TILE_SPINLOCK_64_H |
||||
#define _ASM_TILE_SPINLOCK_64_H |
||||
|
||||
/* Shifts and masks for the various fields in "lock". */ |
||||
#define __ARCH_SPIN_CURRENT_SHIFT 17 |
||||
#define __ARCH_SPIN_NEXT_MASK 0x7fff |
||||
#define __ARCH_SPIN_NEXT_OVERFLOW 0x8000 |
||||
|
||||
/*
|
||||
* Return the "current" portion of a ticket lock value, |
||||
* i.e. the number that currently owns the lock. |
||||
*/ |
||||
static inline int arch_spin_current(u32 val) |
||||
{ |
||||
return val >> __ARCH_SPIN_CURRENT_SHIFT; |
||||
} |
||||
|
||||
/*
|
||||
* Return the "next" portion of a ticket lock value, |
||||
* i.e. the number that the next task to try to acquire the lock will get. |
||||
*/ |
||||
static inline int arch_spin_next(u32 val) |
||||
{ |
||||
return val & __ARCH_SPIN_NEXT_MASK; |
||||
} |
||||
|
||||
/* The lock is locked if a task would have to wait to get it. */ |
||||
static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
||||
{ |
||||
u32 val = lock->lock; |
||||
return arch_spin_current(val) != arch_spin_next(val); |
||||
} |
||||
|
||||
/* Bump the current ticket so the next task owns the lock. */ |
||||
static inline void arch_spin_unlock(arch_spinlock_t *lock) |
||||
{ |
||||
wmb(); /* guarantee anything modified under the lock is visible */ |
||||
__insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT); |
||||
} |
||||
|
||||
void arch_spin_unlock_wait(arch_spinlock_t *lock); |
||||
|
||||
void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val); |
||||
|
||||
/* Grab the "next" ticket number and bump it atomically.
|
||||
* If the current ticket is not ours, go to the slow path. |
||||
* We also take the slow path if the "next" value overflows. |
||||
*/ |
||||
static inline void arch_spin_lock(arch_spinlock_t *lock) |
||||
{ |
||||
u32 val = __insn_fetchadd4(&lock->lock, 1); |
||||
u32 ticket = val & (__ARCH_SPIN_NEXT_MASK | __ARCH_SPIN_NEXT_OVERFLOW); |
||||
if (unlikely(arch_spin_current(val) != ticket)) |
||||
arch_spin_lock_slow(lock, ticket); |
||||
} |
||||
|
||||
/* Try to get the lock, and return whether we succeeded. */ |
||||
int arch_spin_trylock(arch_spinlock_t *lock); |
||||
|
||||
/* We cannot take an interrupt after getting a ticket, so don't enable them. */ |
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) |
||||
|
||||
/*
|
||||
* Read-write spinlocks, allowing multiple readers |
||||
* but only one writer. |
||||
* |
||||
* We use fetchadd() for readers, and fetchor() with the sign bit |
||||
* for writers. |
||||
*/ |
||||
|
||||
#define __WRITE_LOCK_BIT (1 << 31) |
||||
|
||||
static inline int arch_write_val_locked(int val) |
||||
{ |
||||
return val < 0; /* Optimize "val & __WRITE_LOCK_BIT". */ |
||||
} |
||||
|
||||
/**
|
||||
* read_can_lock - would read_trylock() succeed? |
||||
* @lock: the rwlock in question. |
||||
*/ |
||||
static inline int arch_read_can_lock(arch_rwlock_t *rw) |
||||
{ |
||||
return !arch_write_val_locked(rw->lock); |
||||
} |
||||
|
||||
/**
|
||||
* write_can_lock - would write_trylock() succeed? |
||||
* @lock: the rwlock in question. |
||||
*/ |
||||
static inline int arch_write_can_lock(arch_rwlock_t *rw) |
||||
{ |
||||
return rw->lock == 0; |
||||
} |
||||
|
||||
extern void __read_lock_failed(arch_rwlock_t *rw); |
||||
|
||||
static inline void arch_read_lock(arch_rwlock_t *rw) |
||||
{ |
||||
u32 val = __insn_fetchaddgez4(&rw->lock, 1); |
||||
if (unlikely(arch_write_val_locked(val))) |
||||
__read_lock_failed(rw); |
||||
} |
||||
|
||||
extern void __write_lock_failed(arch_rwlock_t *rw, u32 val); |
||||
|
||||
static inline void arch_write_lock(arch_rwlock_t *rw) |
||||
{ |
||||
u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); |
||||
if (unlikely(val != 0)) |
||||
__write_lock_failed(rw, val); |
||||
} |
||||
|
||||
static inline void arch_read_unlock(arch_rwlock_t *rw) |
||||
{ |
||||
__insn_mf(); |
||||
__insn_fetchadd4(&rw->lock, -1); |
||||
} |
||||
|
||||
static inline void arch_write_unlock(arch_rwlock_t *rw) |
||||
{ |
||||
__insn_mf(); |
||||
rw->lock = 0; |
||||
} |
||||
|
||||
static inline int arch_read_trylock(arch_rwlock_t *rw) |
||||
{ |
||||
return !arch_write_val_locked(__insn_fetchaddgez4(&rw->lock, 1)); |
||||
} |
||||
|
||||
static inline int arch_write_trylock(arch_rwlock_t *rw) |
||||
{ |
||||
u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); |
||||
if (likely(val == 0)) |
||||
return 1; |
||||
if (!arch_write_val_locked(val)) |
||||
__insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT); |
||||
return 0; |
||||
} |
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) |
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) |
||||
|
||||
#endif /* _ASM_TILE_SPINLOCK_64_H */ |
@ -0,0 +1,55 @@ |
||||
/* |
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
* |
||||
* Atomically access user memory, but use MMU to avoid propagating |
||||
* kernel exceptions. |
||||
*/ |
||||
|
||||
#include <linux/linkage.h> |
||||
#include <asm/errno.h> |
||||
#include <asm/futex.h> |
||||
#include <asm/page.h> |
||||
#include <asm/processor.h> |
||||
|
||||
/* |
||||
* Provide a set of atomic memory operations supporting <asm/futex.h>. |
||||
* |
||||
* r0: user address to manipulate |
||||
* r1: new value to write, or for cmpxchg, old value to compare against |
||||
* r2: (cmpxchg only) new value to write |
||||
* |
||||
* Return __get_user struct, r0 with value, r1 with error. |
||||
*/ |
||||
#define FUTEX_OP(name, ...) \ |
||||
STD_ENTRY(futex_##name) \ |
||||
__VA_ARGS__; \
|
||||
{ \ |
||||
move r1, zero; \
|
||||
jrp lr \ |
||||
}; \
|
||||
STD_ENDPROC(futex_##name); \ |
||||
.pushsection __ex_table,"a"; \
|
||||
.quad 1b, get_user_fault; \
|
||||
.popsection |
||||
|
||||
.pushsection .fixup,"ax" |
||||
get_user_fault: |
||||
{ movei r1, -EFAULT; jrp lr }
|
||||
ENDPROC(get_user_fault) |
||||
.popsection |
||||
|
||||
FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2)
|
||||
FUTEX_OP(set, 1: exch4 r0, r0, r1) |
||||
FUTEX_OP(add, 1: fetchadd4 r0, r0, r1) |
||||
FUTEX_OP(or, 1: fetchor4 r0, r0, r1) |
||||
FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1)
|
@ -0,0 +1,269 @@ |
||||
/* |
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
* |
||||
* TILE startup code. |
||||
*/ |
||||
|
||||
#include <linux/linkage.h> |
||||
#include <linux/init.h> |
||||
#include <asm/page.h> |
||||
#include <asm/pgtable.h> |
||||
#include <asm/thread_info.h> |
||||
#include <asm/processor.h> |
||||
#include <asm/asm-offsets.h> |
||||
#include <hv/hypervisor.h> |
||||
#include <arch/chip.h> |
||||
#include <arch/spr_def.h> |
||||
|
||||
/* |
||||
* This module contains the entry code for kernel images. It performs the |
||||
* minimal setup needed to call the generic C routines. |
||||
*/ |
||||
|
||||
__HEAD |
||||
ENTRY(_start) |
||||
/* Notify the hypervisor of what version of the API we want */ |
||||
{ |
||||
movei r1, TILE_CHIP |
||||
movei r2, TILE_CHIP_REV |
||||
} |
||||
{ |
||||
moveli r0, _HV_VERSION |
||||
jal hv_init |
||||
} |
||||
/* Get a reasonable default ASID in r0 */ |
||||
{ |
||||
move r0, zero |
||||
jal hv_inquire_asid |
||||
} |
||||
|
||||
/* |
||||
* Install the default page table. The relocation required to |
||||
* statically define the table is a bit too complex, so we have |
||||
* to plug in the pointer from the L0 to the L1 table by hand. |
||||
* We only do this on the first cpu to boot, though, since the |
||||
* other CPUs should see a properly-constructed page table. |
||||
*/ |
||||
{ |
||||
v4int_l r2, zero, r0 /* ASID for hv_install_context */ |
||||
moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET) |
||||
} |
||||
{ |
||||
shl16insli r4, r4, hw0(swapper_pgprot - PAGE_OFFSET) |
||||
} |
||||
{ |
||||
ld r1, r4 /* access_pte for hv_install_context */ |
||||
} |
||||
{ |
||||
moveli r0, hw1_last(.Lsv_data_pmd - PAGE_OFFSET) |
||||
moveli r6, hw1_last(temp_data_pmd - PAGE_OFFSET) |
||||
} |
||||
{ |
||||
/* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */ |
||||
bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL |
||||
inv r4 |
||||
} |
||||
bnez r7, .Lno_write |
||||
{ |
||||
shl16insli r0, r0, hw0(.Lsv_data_pmd - PAGE_OFFSET) |
||||
shl16insli r6, r6, hw0(temp_data_pmd - PAGE_OFFSET) |
||||
} |
||||
{ |
||||
/* Cut off the low bits of the PT address. */ |
||||
shrui r6, r6, HV_LOG2_PAGE_TABLE_ALIGN |
||||
/* Start with our access pte. */ |
||||
move r5, r1 |
||||
} |
||||
{ |
||||
/* Stuff the address into the page table pointer slot of the PTE. */ |
||||
bfins r5, r6, HV_PTE_INDEX_PTFN, \ |
||||
HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 |
||||
} |
||||
{ |
||||
/* Store the L0 data PTE. */ |
||||
st r0, r5 |
||||
addli r6, r6, (temp_code_pmd - temp_data_pmd) >> \ |
||||
HV_LOG2_PAGE_TABLE_ALIGN |
||||
} |
||||
{ |
||||
addli r0, r0, .Lsv_code_pmd - .Lsv_data_pmd |
||||
bfins r5, r6, HV_PTE_INDEX_PTFN, \ |
||||
HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 |
||||
} |
||||
/* Store the L0 code PTE. */ |
||||
st r0, r5 |
||||
|
||||
.Lno_write: |
||||
moveli lr, hw2_last(1f) |
||||
{ |
||||
shl16insli lr, lr, hw1(1f) |
||||
moveli r0, hw1_last(swapper_pg_dir - PAGE_OFFSET) |
||||
} |
||||
{ |
||||
shl16insli lr, lr, hw0(1f) |
||||
shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET) |
||||
} |
||||
{ |
||||
move r3, zero |
||||
j hv_install_context |
||||
} |
||||
1: |
||||
|
||||
/* Install the interrupt base. */ |
||||
moveli r0, hw2_last(MEM_SV_START) |
||||
shl16insli r0, r0, hw1(MEM_SV_START) |
||||
shl16insli r0, r0, hw0(MEM_SV_START) |
||||
mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 |
||||
|
||||
/* |
||||
* Get our processor number and save it away in SAVE_K_0. |
||||
* Extract stuff from the topology structure: r4 = y, r6 = x, |
||||
* r5 = width. FIXME: consider whether we want to just make these |
||||
* 64-bit values (and if so fix smp_topology write below, too). |
||||
*/ |
||||
jal hv_inquire_topology |
||||
{ |
||||
v4int_l r5, zero, r1 /* r5 = width */ |
||||
shrui r4, r0, 32 /* r4 = y */ |
||||
} |
||||
{ |
||||
v4int_l r6, zero, r0 /* r6 = x */ |
||||
mul_lu_lu r4, r4, r5 |
||||
} |
||||
{ |
||||
add r4, r4, r6 /* r4 == cpu == y*width + x */ |
||||
} |
||||
|
||||
#ifdef CONFIG_SMP |
||||
/* |
||||
* Load up our per-cpu offset. When the first (master) tile |
||||
* boots, this value is still zero, so we will load boot_pc |
||||
* with start_kernel, and boot_sp with init_stack + THREAD_SIZE. |
||||
* The master tile initializes the per-cpu offset array, so that |
||||
* when subsequent (secondary) tiles boot, they will instead load |
||||
* from their per-cpu versions of boot_sp and boot_pc. |
||||
*/ |
||||
moveli r5, hw2_last(__per_cpu_offset) |
||||
shl16insli r5, r5, hw1(__per_cpu_offset) |
||||
shl16insli r5, r5, hw0(__per_cpu_offset) |
||||
shl3add r5, r4, r5 |
||||
ld r5, r5 |
||||
bnez r5, 1f |
||||
|
||||
/* |
||||
* Save the width and height to the smp_topology variable |
||||
* for later use. |
||||
*/ |
||||
moveli r0, hw2_last(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) |
||||
shl16insli r0, r0, hw1(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) |
||||
shl16insli r0, r0, hw0(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) |
||||
st r0, r1 |
||||
1: |
||||
#else |
||||
move r5, zero |
||||
#endif |
||||
|
||||
/* Load and go with the correct pc and sp. */ |
||||
{ |
||||
moveli r1, hw2_last(boot_sp) |
||||
moveli r0, hw2_last(boot_pc) |
||||
} |
||||
{ |
||||
shl16insli r1, r1, hw1(boot_sp) |
||||
shl16insli r0, r0, hw1(boot_pc) |
||||
} |
||||
{ |
||||
shl16insli r1, r1, hw0(boot_sp) |
||||
shl16insli r0, r0, hw0(boot_pc) |
||||
} |
||||
{ |
||||
add r1, r1, r5 |
||||
add r0, r0, r5 |
||||
} |
||||
ld r0, r0 |
||||
ld sp, r1 |
||||
or r4, sp, r4 |
||||
mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ |
||||
addi sp, sp, -STACK_TOP_DELTA |
||||
{ |
||||
move lr, zero /* stop backtraces in the called function */ |
||||
jr r0 |
||||
} |
||||
ENDPROC(_start) |
||||
|
||||
__PAGE_ALIGNED_BSS |
||||
.align PAGE_SIZE
|
||||
ENTRY(empty_zero_page) |
||||
.fill PAGE_SIZE,1,0 |
||||
END(empty_zero_page) |
||||
|
||||
.macro PTE cpa, bits1 |
||||
.quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\ |
||||
HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\ |
||||
(\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) |
||||
.endm |
||||
|
||||
__PAGE_ALIGNED_DATA |
||||
.align PAGE_SIZE
|
||||
ENTRY(swapper_pg_dir) |
||||
.org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE |
||||
.Lsv_data_pmd: |
||||
.quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */ |
||||
.org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE |
||||
.Lsv_code_pmd: |
||||
.quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */ |
||||
.org swapper_pg_dir + HV_L0_SIZE |
||||
END(swapper_pg_dir) |
||||
|
||||
.align HV_PAGE_TABLE_ALIGN
|
||||
ENTRY(temp_data_pmd) |
||||
/* |
||||
* We fill the PAGE_OFFSET pmd with huge pages with |
||||
* VA = PA + PAGE_OFFSET. We remap things with more precise access |
||||
* permissions later. |
||||
*/ |
||||
.set addr, 0 |
||||
.rept HV_L1_ENTRIES
|
||||
PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE |
||||
.set addr, addr + HV_PAGE_SIZE_LARGE |
||||
.endr |
||||
.org temp_data_pmd + HV_L1_SIZE |
||||
END(temp_data_pmd) |
||||
|
||||
.align HV_PAGE_TABLE_ALIGN
|
||||
ENTRY(temp_code_pmd) |
||||
/* |
||||
* We fill the MEM_SV_START pmd with huge pages with |
||||
* VA = PA + PAGE_OFFSET. We remap things with more precise access |
||||
* permissions later. |
||||
*/ |
||||
.set addr, 0 |
||||
.rept HV_L1_ENTRIES
|
||||
PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE |
||||
.set addr, addr + HV_PAGE_SIZE_LARGE |
||||
.endr |
||||
.org temp_code_pmd + HV_L1_SIZE |
||||
END(temp_code_pmd) |
||||
|
||||
/* |
||||
* Isolate swapper_pgprot to its own cache line, since each cpu |
||||
* starting up will read it using VA-is-PA and local homing. |
||||
* This would otherwise likely conflict with other data on the cache |
||||
* line, once we have set its permanent home in the page tables. |
||||
*/ |
||||
__INITDATA |
||||
.align CHIP_L2_LINE_SIZE() |
||||
ENTRY(swapper_pgprot) |
||||
.quad HV_PTE_PRESENT | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |
||||
.align CHIP_L2_LINE_SIZE() |
||||
END(swapper_pgprot) |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,145 @@ |
||||
/* |
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#include <linux/linkage.h> |
||||
#include <asm/system.h> |
||||
#include <asm/ptrace.h> |
||||
#include <asm/asm-offsets.h> |
||||
#include <arch/spr_def.h> |
||||
#include <asm/processor.h> |
||||
|
||||
/* |
||||
* See <asm/system.h>; called with prev and next task_struct pointers.
|
||||
* "prev" is returned in r0 for _switch_to and also for ret_from_fork. |
||||
* |
||||
* We want to save pc/sp in "prev", and get the new pc/sp from "next". |
||||
* We also need to save all the callee-saved registers on the stack. |
||||
* |
||||
* Intel enables/disables access to the hardware cycle counter in |
||||
* seccomp (secure computing) environments if necessary, based on |
||||
* has_secure_computing(). We might want to do this at some point, |
||||
* though it would require virtualizing the other SPRs under WORLD_ACCESS. |
||||
* |
||||
* Since we're saving to the stack, we omit sp from this list. |
||||
* And for parallels with other architectures, we save lr separately, |
||||
* in the thread_struct itself (as the "pc" field). |
||||
* |
||||
* This code also needs to be aligned with process.c copy_thread() |
||||
*/ |
||||
|
||||
#if CALLEE_SAVED_REGS_COUNT != 24 |
||||
# error Mismatch between <asm/system.h> and kernel/entry.S |
||||
#endif |
||||
#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8) |
||||
|
||||
#define SAVE_REG(r) { st r12, r; addi r12, r12, 8 }
|
||||
#define LOAD_REG(r) { ld r, r12; addi r12, r12, 8 }
|
||||
#define FOR_EACH_CALLEE_SAVED_REG(f) \ |
||||
f(r30); f(r31); \
|
||||
f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \
|
||||
f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \
|
||||
f(r48); f(r49); f(r50); f(r51); f(r52);
|
||||
|
||||
STD_ENTRY_SECTION(__switch_to, .sched.text) |
||||
{ |
||||
move r10, sp |
||||
st sp, lr |
||||
} |
||||
{ |
||||
addli r11, sp, -FRAME_SIZE + 8 |
||||
addli sp, sp, -FRAME_SIZE |
||||
} |
||||
{ |
||||
st r11, r10 |
||||
addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET |
||||
} |
||||
{ |
||||
ld r13, r4 /* Load new sp to a temp register early. */ |
||||
addi r12, sp, 16 |
||||
} |
||||
FOR_EACH_CALLEE_SAVED_REG(SAVE_REG) |
||||
addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET |
||||
{ |
||||
st r3, sp |
||||
addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET |
||||
} |
||||
{ |
||||
st r3, lr |
||||
addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET |
||||
} |
||||
{ |
||||
ld lr, r4 |
||||
addi r12, r13, 16 |
||||
} |
||||
{ |
||||
/* Update sp and ksp0 simultaneously to avoid backtracer warnings. */ |
||||
move sp, r13 |
||||
mtspr SPR_SYSTEM_SAVE_K_0, r2 |
||||
} |
||||
FOR_EACH_CALLEE_SAVED_REG(LOAD_REG) |
||||
.L__switch_to_pc: |
||||
{ |
||||
addli sp, sp, FRAME_SIZE |
||||
jrp lr /* r0 is still valid here, so return it */ |
||||
} |
||||
STD_ENDPROC(__switch_to) |
||||
|
||||
/* Return a suitable address for the backtracer for suspended threads */ |
||||
STD_ENTRY_SECTION(get_switch_to_pc, .sched.text) |
||||
lnk r0 |
||||
{ |
||||
addli r0, r0, .L__switch_to_pc - . |
||||
jrp lr |
||||
} |
||||
STD_ENDPROC(get_switch_to_pc) |
||||
|
||||
STD_ENTRY(get_pt_regs) |
||||
.irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \ |
||||
r8, r9, r10, r11, r12, r13, r14, r15, \ |
||||
r16, r17, r18, r19, r20, r21, r22, r23, \ |
||||
r24, r25, r26, r27, r28, r29, r30, r31, \ |
||||
r32, r33, r34, r35, r36, r37, r38, r39, \ |
||||
r40, r41, r42, r43, r44, r45, r46, r47, \ |
||||
r48, r49, r50, r51, r52, tp, sp |
||||
{ |
||||
st r0, \reg |
||||
addi r0, r0, 8 |
||||
} |
||||
.endr |
||||
{ |
||||
st r0, lr |
||||
addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR |
||||
} |
||||
lnk r1 |
||||
{ |
||||
st r0, r1 |
||||
addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC |
||||
} |
||||
mfspr r1, INTERRUPT_CRITICAL_SECTION |
||||
shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT |
||||
ori r1, r1, KERNEL_PL |
||||
{ |
||||
st r0, r1 |
||||
addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 |
||||
} |
||||
{ |
||||
st r0, zero /* clear faultnum */ |
||||
addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM |
||||
} |
||||
{ |
||||
st r0, zero /* clear orig_r0 */ |
||||
addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */ |
||||
} |
||||
jrp lr |
||||
STD_ENDPROC(get_pt_regs) |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,71 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#include <linux/types.h> |
||||
#include <linux/string.h> |
||||
#include <linux/module.h> |
||||
|
||||
void *memchr(const void *s, int c, size_t n) |
||||
{ |
||||
const uint64_t *last_word_ptr; |
||||
const uint64_t *p; |
||||
const char *last_byte_ptr; |
||||
uintptr_t s_int; |
||||
uint64_t goal, before_mask, v, bits; |
||||
char *ret; |
||||
|
||||
if (__builtin_expect(n == 0, 0)) { |
||||
/* Don't dereference any memory if the array is empty. */ |
||||
return NULL; |
||||
} |
||||
|
||||
/* Get an aligned pointer. */ |
||||
s_int = (uintptr_t) s; |
||||
p = (const uint64_t *)(s_int & -8); |
||||
|
||||
/* Create eight copies of the byte for which we are looking. */ |
||||
goal = 0x0101010101010101ULL * (uint8_t) c; |
||||
|
||||
/* Read the first word, but munge it so that bytes before the array
|
||||
* will not match goal. |
||||
* |
||||
* Note that this shift count expression works because we know |
||||
* shift counts are taken mod 64. |
||||
*/ |
||||
before_mask = (1ULL << (s_int << 3)) - 1; |
||||
v = (*p | before_mask) ^ (goal & before_mask); |
||||
|
||||
/* Compute the address of the last byte. */ |
||||
last_byte_ptr = (const char *)s + n - 1; |
||||
|
||||
/* Compute the address of the word containing the last byte. */ |
||||
last_word_ptr = (const uint64_t *)((uintptr_t) last_byte_ptr & -8); |
||||
|
||||
while ((bits = __insn_v1cmpeq(v, goal)) == 0) { |
||||
if (__builtin_expect(p == last_word_ptr, 0)) { |
||||
/* We already read the last word in the array,
|
||||
* so give up. |
||||
*/ |
||||
return NULL; |
||||
} |
||||
v = *++p; |
||||
} |
||||
|
||||
/* We found a match, but it might be in a byte past the end
|
||||
* of the array. |
||||
*/ |
||||
ret = ((char *)p) + (__insn_ctz(bits) >> 3); |
||||
return (ret <= last_byte_ptr) ? ret : NULL; |
||||
} |
||||
EXPORT_SYMBOL(memchr); |
@ -0,0 +1,220 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#include <linux/types.h> |
||||
#include <linux/string.h> |
||||
#include <linux/module.h> |
||||
#define __memcpy memcpy |
||||
/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */ |
||||
|
||||
/* Must be 8 bytes in size. */ |
||||
#define word_t uint64_t |
||||
|
||||
#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128 |
||||
#error "Assumes 64 or 128 byte line size" |
||||
#endif |
||||
|
||||
/* How many cache lines ahead should we prefetch? */ |
||||
#define PREFETCH_LINES_AHEAD 3 |
||||
|
||||
/*
|
||||
* Provide "base versions" of load and store for the normal code path. |
||||
* The kernel provides other versions for userspace copies. |
||||
*/ |
||||
#define ST(p, v) (*(p) = (v)) |
||||
#define LD(p) (*(p)) |
||||
|
||||
#ifndef USERCOPY_FUNC |
||||
#define ST1 ST |
||||
#define ST2 ST |
||||
#define ST4 ST |
||||
#define ST8 ST |
||||
#define LD1 LD |
||||
#define LD2 LD |
||||
#define LD4 LD |
||||
#define LD8 LD |
||||
#define RETVAL dstv |
||||
void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n) |
||||
#else |
||||
/*
|
||||
* Special kernel version will provide implementation of the LDn/STn |
||||
* macros to return a count of uncopied bytes due to mm fault. |
||||
*/ |
||||
#define RETVAL 0 |
||||
int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n) |
||||
#endif |
||||
{ |
||||
char *__restrict dst1 = (char *)dstv; |
||||
const char *__restrict src1 = (const char *)srcv; |
||||
const char *__restrict src1_end; |
||||
const char *__restrict prefetch; |
||||
word_t *__restrict dst8; /* 8-byte pointer to destination memory. */ |
||||
word_t final; /* Final bytes to write to trailing word, if any */ |
||||
long i; |
||||
|
||||
if (n < 16) { |
||||
for (; n; n--) |
||||
ST1(dst1++, LD1(src1++)); |
||||
return RETVAL; |
||||
} |
||||
|
||||
/*
|
||||
* Locate the end of source memory we will copy. Don't |
||||
* prefetch past this. |
||||
*/ |
||||
src1_end = src1 + n - 1; |
||||
|
||||
/* Prefetch ahead a few cache lines, but not past the end. */ |
||||
prefetch = src1; |
||||
for (i = 0; i < PREFETCH_LINES_AHEAD; i++) { |
||||
__insn_prefetch(prefetch); |
||||
prefetch += CHIP_L2_LINE_SIZE(); |
||||
prefetch = (prefetch > src1_end) ? prefetch : src1; |
||||
} |
||||
|
||||
/* Copy bytes until dst is word-aligned. */ |
||||
for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--) |
||||
ST1(dst1++, LD1(src1++)); |
||||
|
||||
/* 8-byte pointer to destination memory. */ |
||||
dst8 = (word_t *)dst1; |
||||
|
||||
if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) { |
||||
/*
|
||||
* Misaligned copy. Copy 8 bytes at a time, but don't |
||||
* bother with other fanciness. |
||||
* |
||||
* TODO: Consider prefetching and using wh64 as well. |
||||
*/ |
||||
|
||||
/* Create an aligned src8. */ |
||||
const word_t *__restrict src8 = |
||||
(const word_t *)((uintptr_t)src1 & -sizeof(word_t)); |
||||
word_t b; |
||||
|
||||
word_t a = LD8(src8++); |
||||
for (; n >= sizeof(word_t); n -= sizeof(word_t)) { |
||||
b = LD8(src8++); |
||||
a = __insn_dblalign(a, b, src1); |
||||
ST8(dst8++, a); |
||||
a = b; |
||||
} |
||||
|
||||
if (n == 0) |
||||
return RETVAL; |
||||
|
||||
b = ((const char *)src8 <= src1_end) ? *src8 : 0; |
||||
|
||||
/*
|
||||
* Final source bytes to write to trailing partial |
||||
* word, if any. |
||||
*/ |
||||
final = __insn_dblalign(a, b, src1); |
||||
} else { |
||||
/* Aligned copy. */ |
||||
|
||||
const word_t* __restrict src8 = (const word_t *)src1; |
||||
|
||||
/* src8 and dst8 are both word-aligned. */ |
||||
if (n >= CHIP_L2_LINE_SIZE()) { |
||||
/* Copy until 'dst' is cache-line-aligned. */ |
||||
for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1); |
||||
n -= sizeof(word_t)) |
||||
ST8(dst8++, LD8(src8++)); |
||||
|
||||
for (; n >= CHIP_L2_LINE_SIZE(); ) { |
||||
__insn_wh64(dst8); |
||||
|
||||
/*
|
||||
* Prefetch and advance to next line |
||||
* to prefetch, but don't go past the end |
||||
*/ |
||||
__insn_prefetch(prefetch); |
||||
prefetch += CHIP_L2_LINE_SIZE(); |
||||
prefetch = (prefetch > src1_end) ? prefetch : |
||||
(const char *)src8; |
||||
|
||||
/*
|
||||
* Copy an entire cache line. Manually |
||||
* unrolled to avoid idiosyncracies of |
||||
* compiler unrolling. |
||||
*/ |
||||
#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; }) |
||||
COPY_WORD(0); |
||||
COPY_WORD(1); |
||||
COPY_WORD(2); |
||||
COPY_WORD(3); |
||||
COPY_WORD(4); |
||||
COPY_WORD(5); |
||||
COPY_WORD(6); |
||||
COPY_WORD(7); |
||||
#if CHIP_L2_LINE_SIZE() == 128 |
||||
COPY_WORD(8); |
||||
COPY_WORD(9); |
||||
COPY_WORD(10); |
||||
COPY_WORD(11); |
||||
COPY_WORD(12); |
||||
COPY_WORD(13); |
||||
COPY_WORD(14); |
||||
COPY_WORD(15); |
||||
#elif CHIP_L2_LINE_SIZE() != 64 |
||||
# error Fix code that assumes particular L2 cache line sizes |
||||
#endif |
||||
|
||||
dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t); |
||||
src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t); |
||||
} |
||||
} |
||||
|
||||
for (; n >= sizeof(word_t); n -= sizeof(word_t)) |
||||
ST8(dst8++, LD8(src8++)); |
||||
|
||||
if (__builtin_expect(n == 0, 1)) |
||||
return RETVAL; |
||||
|
||||
final = LD8(src8); |
||||
} |
||||
|
||||
/* n != 0 if we get here. Write out any trailing bytes. */ |
||||
dst1 = (char *)dst8; |
||||
if (n & 4) { |
||||
ST4((uint32_t *)dst1, final); |
||||
dst1 += 4; |
||||
final >>= 32; |
||||
n &= 3; |
||||
} |
||||
if (n & 2) { |
||||
ST2((uint16_t *)dst1, final); |
||||
dst1 += 2; |
||||
final >>= 16; |
||||
n &= 1; |
||||
} |
||||
if (n) |
||||
ST1((uint8_t *)dst1, final); |
||||
|
||||
return RETVAL; |
||||
} |
||||
|
||||
|
||||
#ifdef USERCOPY_FUNC |
||||
#undef ST1 |
||||
#undef ST2 |
||||
#undef ST4 |
||||
#undef ST8 |
||||
#undef LD1 |
||||
#undef LD2 |
||||
#undef LD4 |
||||
#undef LD8 |
||||
#undef USERCOPY_FUNC |
||||
#endif |
@ -0,0 +1,86 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
* |
||||
* Do memcpy(), but trap and return "n" when a load or store faults. |
||||
* |
||||
* Note: this idiom only works when memcpy() compiles to a leaf function. |
||||
* If "sp" is updated during memcpy, the "jrp lr" will be incorrect. |
||||
* |
||||
* Also note that we are capturing "n" from the containing scope here. |
||||
*/ |
||||
|
||||
#define _ST(p, inst, v) \ |
||||
({ \
|
||||
asm("1: " #inst " %0, %1;" \
|
||||
".pushsection .coldtext.memcpy,\"ax\";" \
|
||||
"2: { move r0, %2; jrp lr };" \
|
||||
".section __ex_table,\"a\";" \
|
||||
".quad 1b, 2b;" \
|
||||
".popsection" \
|
||||
: "=m" (*(p)) : "r" (v), "r" (n)); \
|
||||
}) |
||||
|
||||
#define _LD(p, inst) \ |
||||
({ \
|
||||
unsigned long __v; \
|
||||
asm("1: " #inst " %0, %1;" \
|
||||
".pushsection .coldtext.memcpy,\"ax\";" \
|
||||
"2: { move r0, %2; jrp lr };" \
|
||||
".section __ex_table,\"a\";" \
|
||||
".quad 1b, 2b;" \
|
||||
".popsection" \
|
||||
: "=r" (__v) : "m" (*(p)), "r" (n)); \
|
||||
__v; \
|
||||
}) |
||||
|
||||
#define USERCOPY_FUNC __copy_to_user_inatomic |
||||
#define ST1(p, v) _ST((p), st1, (v)) |
||||
#define ST2(p, v) _ST((p), st2, (v)) |
||||
#define ST4(p, v) _ST((p), st4, (v)) |
||||
#define ST8(p, v) _ST((p), st, (v)) |
||||
#define LD1 LD |
||||
#define LD2 LD |
||||
#define LD4 LD |
||||
#define LD8 LD |
||||
#include "memcpy_64.c" |
||||
|
||||
#define USERCOPY_FUNC __copy_from_user_inatomic |
||||
#define ST1 ST |
||||
#define ST2 ST |
||||
#define ST4 ST |
||||
#define ST8 ST |
||||
#define LD1(p) _LD((p), ld1u) |
||||
#define LD2(p) _LD((p), ld2u) |
||||
#define LD4(p) _LD((p), ld4u) |
||||
#define LD8(p) _LD((p), ld) |
||||
#include "memcpy_64.c" |
||||
|
||||
#define USERCOPY_FUNC __copy_in_user_inatomic |
||||
#define ST1(p, v) _ST((p), st1, (v)) |
||||
#define ST2(p, v) _ST((p), st2, (v)) |
||||
#define ST4(p, v) _ST((p), st4, (v)) |
||||
#define ST8(p, v) _ST((p), st, (v)) |
||||
#define LD1(p) _LD((p), ld1u) |
||||
#define LD2(p) _LD((p), ld2u) |
||||
#define LD4(p) _LD((p), ld4u) |
||||
#define LD8(p) _LD((p), ld) |
||||
#include "memcpy_64.c" |
||||
|
||||
unsigned long __copy_from_user_zeroing(void *to, const void __user *from, |
||||
unsigned long n) |
||||
{ |
||||
unsigned long rc = __copy_from_user_inatomic(to, from, n); |
||||
if (unlikely(rc)) |
||||
memset(to + n - rc, 0, rc); |
||||
return rc; |
||||
} |
@ -0,0 +1,145 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#include <arch/chip.h> |
||||
|
||||
#include <linux/types.h> |
||||
#include <linux/string.h> |
||||
#include <linux/module.h> |
||||
|
||||
#undef memset |
||||
|
||||
void *memset(void *s, int c, size_t n) |
||||
{ |
||||
uint64_t *out64; |
||||
int n64, to_align64; |
||||
uint64_t v64; |
||||
uint8_t *out8 = s; |
||||
|
||||
/* Experimentation shows that a trivial tight loop is a win up until
|
||||
* around a size of 20, where writing a word at a time starts to win. |
||||
*/ |
||||
#define BYTE_CUTOFF 20 |
||||
|
||||
#if BYTE_CUTOFF < 7 |
||||
/* This must be at least at least this big, or some code later
|
||||
* on doesn't work. |
||||
*/ |
||||
#error "BYTE_CUTOFF is too small" |
||||
#endif |
||||
|
||||
if (n < BYTE_CUTOFF) { |
||||
/* Strangely, this turns out to be the tightest way to
|
||||
* write this loop. |
||||
*/ |
||||
if (n != 0) { |
||||
do { |
||||
/* Strangely, combining these into one line
|
||||
* performs worse. |
||||
*/ |
||||
*out8 = c; |
||||
out8++; |
||||
} while (--n != 0); |
||||
} |
||||
|
||||
return s; |
||||
} |
||||
|
||||
/* Align 'out8'. We know n >= 7 so this won't write past the end. */ |
||||
while (((uintptr_t) out8 & 7) != 0) { |
||||
*out8++ = c; |
||||
--n; |
||||
} |
||||
|
||||
/* Align 'n'. */ |
||||
while (n & 7) |
||||
out8[--n] = c; |
||||
|
||||
out64 = (uint64_t *) out8; |
||||
n64 = n >> 3; |
||||
|
||||
/* Tile input byte out to 64 bits. */ |
||||
/* KLUDGE */ |
||||
v64 = 0x0101010101010101ULL * (uint8_t)c; |
||||
|
||||
/* This must be at least 8 or the following loop doesn't work. */ |
||||
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8) |
||||
|
||||
/* Determine how many words we need to emit before the 'out32'
|
||||
* pointer becomes aligned modulo the cache line size. |
||||
*/ |
||||
to_align64 = (-((uintptr_t)out64 >> 3)) & |
||||
(CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1); |
||||
|
||||
/* Only bother aligning and using wh64 if there is at least
|
||||
* one full cache line to process. This check also prevents |
||||
* overrunning the end of the buffer with alignment words. |
||||
*/ |
||||
if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) { |
||||
int lines_left; |
||||
|
||||
/* Align out64 mod the cache line size so we can use wh64. */ |
||||
n64 -= to_align64; |
||||
for (; to_align64 != 0; to_align64--) { |
||||
*out64 = v64; |
||||
out64++; |
||||
} |
||||
|
||||
/* Use unsigned divide to turn this into a right shift. */ |
||||
lines_left = (unsigned)n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS; |
||||
|
||||
do { |
||||
/* Only wh64 a few lines at a time, so we don't
|
||||
* exceed the maximum number of victim lines. |
||||
*/ |
||||
int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS()) |
||||
? lines_left |
||||
: CHIP_MAX_OUTSTANDING_VICTIMS()); |
||||
uint64_t *wh = out64; |
||||
int i = x; |
||||
int j; |
||||
|
||||
lines_left -= x; |
||||
|
||||
do { |
||||
__insn_wh64(wh); |
||||
wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS; |
||||
} while (--i); |
||||
|
||||
for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4); |
||||
j != 0; j--) { |
||||
*out64++ = v64; |
||||
*out64++ = v64; |
||||
*out64++ = v64; |
||||
*out64++ = v64; |
||||
} |
||||
} while (lines_left != 0); |
||||
|
||||
/* We processed all full lines above, so only this many
|
||||
* words remain to be processed. |
||||
*/ |
||||
n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1; |
||||
} |
||||
|
||||
/* Now handle any leftover values. */ |
||||
if (n64 != 0) { |
||||
do { |
||||
*out64 = v64; |
||||
out64++; |
||||
} while (--n64 != 0); |
||||
} |
||||
|
||||
return s; |
||||
} |
||||
EXPORT_SYMBOL(memset); |
@ -0,0 +1,104 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#include <linux/spinlock.h> |
||||
#include <linux/module.h> |
||||
#include <asm/processor.h> |
||||
|
||||
#include "spinlock_common.h" |
||||
|
||||
/*
|
||||
* Read the spinlock value without allocating in our cache and without |
||||
* causing an invalidation to another cpu with a copy of the cacheline. |
||||
* This is important when we are spinning waiting for the lock. |
||||
*/ |
||||
static inline u32 arch_spin_read_noalloc(void *lock) |
||||
{ |
||||
return atomic_cmpxchg((atomic_t *)lock, -1, -1); |
||||
} |
||||
|
||||
/*
|
||||
* Wait until the high bits (current) match my ticket. |
||||
* If we notice the overflow bit set on entry, we clear it. |
||||
*/ |
||||
void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket) |
||||
{ |
||||
if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) { |
||||
__insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW); |
||||
my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW; |
||||
} |
||||
|
||||
for (;;) { |
||||
u32 val = arch_spin_read_noalloc(lock); |
||||
u32 delta = my_ticket - arch_spin_current(val); |
||||
if (delta == 0) |
||||
return; |
||||
relax((128 / CYCLES_PER_RELAX_LOOP) * delta); |
||||
} |
||||
} |
||||
EXPORT_SYMBOL(arch_spin_lock_slow); |
||||
|
||||
/*
|
||||
* Check the lock to see if it is plausible, and try to get it with cmpxchg(). |
||||
*/ |
||||
int arch_spin_trylock(arch_spinlock_t *lock) |
||||
{ |
||||
u32 val = arch_spin_read_noalloc(lock); |
||||
if (unlikely(arch_spin_current(val) != arch_spin_next(val))) |
||||
return 0; |
||||
return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW) |
||||
== val; |
||||
} |
||||
EXPORT_SYMBOL(arch_spin_trylock); |
||||
|
||||
void arch_spin_unlock_wait(arch_spinlock_t *lock) |
||||
{ |
||||
u32 iterations = 0; |
||||
while (arch_spin_is_locked(lock)) |
||||
delay_backoff(iterations++); |
||||
} |
||||
EXPORT_SYMBOL(arch_spin_unlock_wait); |
||||
|
||||
/*
|
||||
* If the read lock fails due to a writer, we retry periodically |
||||
* until the value is positive and we write our incremented reader count. |
||||
*/ |
||||
void __read_lock_failed(arch_rwlock_t *rw) |
||||
{ |
||||
u32 val; |
||||
int iterations = 0; |
||||
do { |
||||
delay_backoff(iterations++); |
||||
val = __insn_fetchaddgez4(&rw->lock, 1); |
||||
} while (unlikely(arch_write_val_locked(val))); |
||||
} |
||||
EXPORT_SYMBOL(__read_lock_failed); |
||||
|
||||
/*
|
||||
* If we failed because there were readers, clear the "writer" bit |
||||
* so we don't block additional readers. Otherwise, there was another |
||||
* writer anyway, so our "fetchor" made no difference. Then wait, |
||||
* issuing periodic fetchor instructions, till we get the lock. |
||||
*/ |
||||
void __write_lock_failed(arch_rwlock_t *rw, u32 val) |
||||
{ |
||||
int iterations = 0; |
||||
do { |
||||
if (!arch_write_val_locked(val)) |
||||
val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT); |
||||
delay_backoff(iterations++); |
||||
val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); |
||||
} while (val != 0); |
||||
} |
||||
EXPORT_SYMBOL(__write_lock_failed); |
@ -0,0 +1,67 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#include <linux/types.h> |
||||
#include <linux/string.h> |
||||
#include <linux/module.h> |
||||
|
||||
#undef strchr |
||||
|
||||
char *strchr(const char *s, int c) |
||||
{ |
||||
int z, g; |
||||
|
||||
/* Get an aligned pointer. */ |
||||
const uintptr_t s_int = (uintptr_t) s; |
||||
const uint64_t *p = (const uint64_t *)(s_int & -8); |
||||
|
||||
/* Create eight copies of the byte for which we are looking. */ |
||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; |
||||
|
||||
/* Read the first aligned word, but force bytes before the string to
|
||||
* match neither zero nor goal (we make sure the high bit of each |
||||
* byte is 1, and the low 7 bits are all the opposite of the goal |
||||
* byte). |
||||
* |
||||
* Note that this shift count expression works because we know shift |
||||
* counts are taken mod 64. |
||||
*/ |
||||
const uint64_t before_mask = (1ULL << (s_int << 3)) - 1; |
||||
uint64_t v = (*p | before_mask) ^ |
||||
(goal & __insn_v1shrsi(before_mask, 1)); |
||||
|
||||
uint64_t zero_matches, goal_matches; |
||||
while (1) { |
||||
/* Look for a terminating '\0'. */ |
||||
zero_matches = __insn_v1cmpeqi(v, 0); |
||||
|
||||
/* Look for the goal byte. */ |
||||
goal_matches = __insn_v1cmpeq(v, goal); |
||||
|
||||
if (__builtin_expect((zero_matches | goal_matches) != 0, 0)) |
||||
break; |
||||
|
||||
v = *++p; |
||||
} |
||||
|
||||
z = __insn_ctz(zero_matches); |
||||
g = __insn_ctz(goal_matches); |
||||
|
||||
/* If we found c before '\0' we got a match. Note that if c == '\0'
|
||||
* then g == z, and we correctly return the address of the '\0' |
||||
* rather than NULL. |
||||
*/ |
||||
return (g <= z) ? ((char *)p) + (g >> 3) : NULL; |
||||
} |
||||
EXPORT_SYMBOL(strchr); |
@ -0,0 +1,38 @@ |
||||
/*
|
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#include <linux/types.h> |
||||
#include <linux/string.h> |
||||
#include <linux/module.h> |
||||
|
||||
#undef strlen |
||||
|
||||
size_t strlen(const char *s) |
||||
{ |
||||
/* Get an aligned pointer. */ |
||||
const uintptr_t s_int = (uintptr_t) s; |
||||
const uint64_t *p = (const uint64_t *)(s_int & -8); |
||||
|
||||
/* Read the first word, but force bytes before the string to be nonzero.
|
||||
* This expression works because we know shift counts are taken mod 64. |
||||
*/ |
||||
uint64_t v = *p | ((1ULL << (s_int << 3)) - 1); |
||||
|
||||
uint64_t bits; |
||||
while ((bits = __insn_v1cmpeqi(v, 0)) == 0) |
||||
v = *++p; |
||||
|
||||
return ((const char *)p) + (__insn_ctz(bits) >> 3) - s; |
||||
} |
||||
EXPORT_SYMBOL(strlen); |
@ -0,0 +1,196 @@ |
||||
/* |
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
*/ |
||||
|
||||
#include <linux/linkage.h> |
||||
#include <asm/errno.h> |
||||
#include <asm/cache.h> |
||||
#include <arch/chip.h> |
||||
|
||||
/* Access user memory, but use MMU to avoid propagating kernel exceptions. */ |
||||
|
||||
.pushsection .fixup,"ax" |
||||
|
||||
get_user_fault: |
||||
{ movei r1, -EFAULT; move r0, zero }
|
||||
jrp lr |
||||
ENDPROC(get_user_fault) |
||||
|
||||
put_user_fault: |
||||
{ movei r0, -EFAULT; jrp lr }
|
||||
ENDPROC(put_user_fault) |
||||
|
||||
.popsection |
||||
|
||||
/* |
||||
* __get_user_N functions take a pointer in r0, and return 0 in r1 |
||||
* on success, with the value in r0; or else -EFAULT in r1.
|
||||
*/ |
||||
#define __get_user_N(bytes, LOAD) \ |
||||
STD_ENTRY(__get_user_##bytes); \ |
||||
1: { LOAD r0, r0; move r1, zero }; \
|
||||
jrp lr; \
|
||||
STD_ENDPROC(__get_user_##bytes); \ |
||||
.pushsection __ex_table,"a"; \
|
||||
.quad 1b, get_user_fault; \
|
||||
.popsection |
||||
|
||||
__get_user_N(1, ld1u) |
||||
__get_user_N(2, ld2u) |
||||
__get_user_N(4, ld4u) |
||||
__get_user_N(8, ld) |
||||
|
||||
/* |
||||
* __put_user_N functions take a value in r0 and a pointer in r1, |
||||
* and return 0 in r0 on success or -EFAULT on failure. |
||||
*/ |
||||
#define __put_user_N(bytes, STORE) \ |
||||
STD_ENTRY(__put_user_##bytes); \ |
||||
1: { STORE r1, r0; move r0, zero }; \
|
||||
jrp lr; \
|
||||
STD_ENDPROC(__put_user_##bytes); \ |
||||
.pushsection __ex_table,"a"; \
|
||||
.quad 1b, put_user_fault; \
|
||||
.popsection |
||||
|
||||
__put_user_N(1, st1) |
||||
__put_user_N(2, st2) |
||||
__put_user_N(4, st4) |
||||
__put_user_N(8, st) |
||||
|
||||
/* |
||||
* strnlen_user_asm takes the pointer in r0, and the length bound in r1. |
||||
* It returns the length, including the terminating NUL, or zero on exception. |
||||
* If length is greater than the bound, returns one plus the bound. |
||||
*/ |
||||
STD_ENTRY(strnlen_user_asm) |
||||
{ beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
|
||||
1: { ld1u r4, r0; addi r1, r1, -1 }
|
||||
beqz r4, 2f |
||||
{ bnezt r1, 1b; addi r0, r0, 1 }
|
||||
2: { sub r0, r0, r3; jrp lr }
|
||||
STD_ENDPROC(strnlen_user_asm) |
||||
.pushsection .fixup,"ax" |
||||
strnlen_user_fault: |
||||
{ move r0, zero; jrp lr }
|
||||
ENDPROC(strnlen_user_fault) |
||||
.section __ex_table,"a" |
||||
.quad 1b, strnlen_user_fault |
||||
.popsection |
||||
|
||||
/* |
||||
* strncpy_from_user_asm takes the kernel target pointer in r0, |
||||
* the userspace source pointer in r1, and the length bound (including |
||||
* the trailing NUL) in r2. On success, it returns the string length |
||||
* (not including the trailing NUL), or -EFAULT on failure. |
||||
*/ |
||||
STD_ENTRY(strncpy_from_user_asm) |
||||
{ beqz r2, 2f; move r3, r0 }
|
||||
1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
|
||||
{ st1 r0, r4; addi r0, r0, 1 }
|
||||
beqz r2, 2f |
||||
bnezt r4, 1b |
||||
addi r0, r0, -1 /* don't count the trailing NUL */ |
||||
2: { sub r0, r0, r3; jrp lr }
|
||||
STD_ENDPROC(strncpy_from_user_asm) |
||||
.pushsection .fixup,"ax" |
||||
strncpy_from_user_fault: |
||||
{ movei r0, -EFAULT; jrp lr }
|
||||
ENDPROC(strncpy_from_user_fault) |
||||
.section __ex_table,"a" |
||||
.quad 1b, strncpy_from_user_fault |
||||
.popsection |
||||
|
||||
/* |
||||
* clear_user_asm takes the user target address in r0 and the |
||||
* number of bytes to zero in r1. |
||||
* It returns the number of uncopiable bytes (hopefully zero) in r0. |
||||
* Note that we don't use a separate .fixup section here since we fall |
||||
* through into the "fixup" code as the last straight-line bundle anyway. |
||||
*/ |
||||
STD_ENTRY(clear_user_asm) |
||||
{ beqz r1, 2f; or r2, r0, r1 }
|
||||
andi r2, r2, 7 |
||||
beqzt r2, .Lclear_aligned_user_asm |
||||
1: { st1 r0, zero; addi r0, r0, 1; addi r1, r1, -1 }
|
||||
bnezt r1, 1b |
||||
2: { move r0, r1; jrp lr }
|
||||
.pushsection __ex_table,"a" |
||||
.quad 1b, 2b |
||||
.popsection |
||||
|
||||
.Lclear_aligned_user_asm: |
||||
1: { st r0, zero; addi r0, r0, 8; addi r1, r1, -8 }
|
||||
bnezt r1, 1b |
||||
2: { move r0, r1; jrp lr }
|
||||
STD_ENDPROC(clear_user_asm) |
||||
.pushsection __ex_table,"a" |
||||
.quad 1b, 2b |
||||
.popsection |
||||
|
||||
/* |
||||
* flush_user_asm takes the user target address in r0 and the |
||||
* number of bytes to flush in r1. |
||||
* It returns the number of unflushable bytes (hopefully zero) in r0. |
||||
*/ |
||||
STD_ENTRY(flush_user_asm) |
||||
beqz r1, 2f |
||||
{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
|
||||
{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
|
||||
{ and r0, r0, r2; and r1, r1, r2 }
|
||||
{ sub r1, r1, r0 } |
||||
1: { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() }
|
||||
{ addi r0, r0, CHIP_FLUSH_STRIDE(); bnezt r1, 1b }
|
||||
2: { move r0, r1; jrp lr }
|
||||
STD_ENDPROC(flush_user_asm) |
||||
.pushsection __ex_table,"a" |
||||
.quad 1b, 2b |
||||
.popsection |
||||
|
||||
/* |
||||
* inv_user_asm takes the user target address in r0 and the |
||||
* number of bytes to invalidate in r1. |
||||
* It returns the number of not inv'able bytes (hopefully zero) in r0. |
||||
*/ |
||||
STD_ENTRY(inv_user_asm) |
||||
beqz r1, 2f |
||||
{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
|
||||
{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
|
||||
{ and r0, r0, r2; and r1, r1, r2 }
|
||||
{ sub r1, r1, r0 } |
||||
1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
|
||||
{ addi r0, r0, CHIP_INV_STRIDE(); bnezt r1, 1b }
|
||||
2: { move r0, r1; jrp lr }
|
||||
STD_ENDPROC(inv_user_asm) |
||||
.pushsection __ex_table,"a" |
||||
.quad 1b, 2b |
||||
.popsection |
||||
|
||||
/* |
||||
* finv_user_asm takes the user target address in r0 and the |
||||
* number of bytes to flush-invalidate in r1. |
||||
* It returns the number of not finv'able bytes (hopefully zero) in r0. |
||||
*/ |
||||
STD_ENTRY(finv_user_asm) |
||||
beqz r1, 2f |
||||
{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
|
||||
{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
|
||||
{ and r0, r0, r2; and r1, r1, r2 }
|
||||
{ sub r1, r1, r0 } |
||||
1: { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() }
|
||||
{ addi r0, r0, CHIP_FINV_STRIDE(); bnezt r1, 1b }
|
||||
2: { move r0, r1; jrp lr }
|
||||
STD_ENDPROC(finv_user_asm) |
||||
.pushsection __ex_table,"a" |
||||
.quad 1b, 2b |
||||
.popsection |
@ -0,0 +1,187 @@ |
||||
/* |
||||
* Copyright 2011 Tilera Corporation. All Rights Reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation, version 2. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for |
||||
* more details. |
||||
* |
||||
* This routine is a helper for migrating the home of a set of pages to |
||||
* a new cpu. See the documentation in homecache.c for more information. |
||||
*/ |
||||
|
||||
#include <linux/linkage.h> |
||||
#include <linux/threads.h> |
||||
#include <asm/page.h> |
||||
#include <asm/thread_info.h> |
||||
#include <asm/types.h> |
||||
#include <asm/asm-offsets.h> |
||||
#include <hv/hypervisor.h> |
||||
|
||||
.text |
||||
|
||||
/* |
||||
* First, some definitions that apply to all the code in the file. |
||||
*/ |
||||
|
||||
/* Locals (caller-save) */ |
||||
#define r_tmp r10 |
||||
#define r_save_sp r11 |
||||
|
||||
/* What we save where in the stack frame; must include all callee-saves. */ |
||||
#define FRAME_SP 8 |
||||
#define FRAME_R30 16 |
||||
#define FRAME_R31 24 |
||||
#define FRAME_R32 32 |
||||
#define FRAME_R33 40 |
||||
#define FRAME_SIZE 48 |
||||
|
||||
|
||||
|
||||
|
||||
/* |
||||
* On entry: |
||||
* |
||||
* r0 the new context PA to install (moved to r_context) |
||||
* r1 PTE to use for context access (moved to r_access) |
||||
* r2 ASID to use for new context (moved to r_asid) |
||||
* r3 pointer to cpumask with just this cpu set in it (r_my_cpumask) |
||||
*/ |
||||
|
||||
/* Arguments (caller-save) */ |
||||
#define r_context_in r0 |
||||
#define r_access_in r1 |
||||
#define r_asid_in r2 |
||||
#define r_my_cpumask r3 |
||||
|
||||
/* Locals (callee-save); must not be more than FRAME_xxx above. */ |
||||
#define r_save_ics r30 |
||||
#define r_context r31 |
||||
#define r_access r32 |
||||
#define r_asid r33 |
||||
|
||||
/* |
||||
* Caller-save locals and frame constants are the same as |
||||
* for homecache_migrate_stack_and_flush. |
||||
*/ |
||||
|
||||
STD_ENTRY(flush_and_install_context) |
||||
/* |
||||
* Create a stack frame; we can't touch it once we flush the
|
||||
* cache until we install the new page table and flush the TLB. |
||||
*/ |
||||
{ |
||||
move r_save_sp, sp |
||||
st sp, lr |
||||
addi sp, sp, -FRAME_SIZE |
||||
} |
||||
addi r_tmp, sp, FRAME_SP |
||||
{ |
||||
st r_tmp, r_save_sp |
||||
addi r_tmp, sp, FRAME_R30 |
||||
} |
||||
{ |
||||
st r_tmp, r30 |
||||
addi r_tmp, sp, FRAME_R31 |
||||
} |
||||
{ |
||||
st r_tmp, r31 |
||||
addi r_tmp, sp, FRAME_R32 |
||||
} |
||||
{ |
||||
st r_tmp, r32 |
||||
addi r_tmp, sp, FRAME_R33 |
||||
} |
||||
st r_tmp, r33 |
||||
|
||||
/* Move some arguments to callee-save registers. */ |
||||
{ |
||||
move r_context, r_context_in |
||||
move r_access, r_access_in |
||||
} |
||||
move r_asid, r_asid_in |
||||
|
||||
/* Disable interrupts, since we can't use our stack. */ |
||||
{ |
||||
mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION |
||||
movei r_tmp, 1 |
||||
} |
||||
mtspr INTERRUPT_CRITICAL_SECTION, r_tmp |
||||
|
||||
/* First, flush our L2 cache. */ |
||||
{ |
||||
move r0, zero /* cache_pa */ |
||||
moveli r1, hw2_last(HV_FLUSH_EVICT_L2) /* cache_control */ |
||||
} |
||||
{ |
||||
shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2) |
||||
move r2, r_my_cpumask /* cache_cpumask */ |
||||
} |
||||
{ |
||||
shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2) |
||||
move r3, zero /* tlb_va */ |
||||
} |
||||
{ |
||||
move r4, zero /* tlb_length */ |
||||
move r5, zero /* tlb_pgsize */ |
||||
} |
||||
{ |
||||
move r6, zero /* tlb_cpumask */ |
||||
move r7, zero /* asids */ |
||||
} |
||||
{ |
||||
move r8, zero /* asidcount */ |
||||
jal hv_flush_remote |
||||
} |
||||
bnez r0, 1f |
||||
|
||||
/* Now install the new page table. */ |
||||
{ |
||||
move r0, r_context |
||||
move r1, r_access |
||||
} |
||||
{ |
||||
move r2, r_asid |
||||
movei r3, HV_CTX_DIRECTIO |
||||
} |
||||
jal hv_install_context |
||||
bnez r0, 1f |
||||
|
||||
/* Finally, flush the TLB. */ |
||||
{ |
||||
movei r0, 0 /* preserve_global */ |
||||
jal hv_flush_all |
||||
} |
||||
|
||||
1: /* Reset interrupts back how they were before. */ |
||||
mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics |
||||
|
||||
/* Restore the callee-saved registers and return. */ |
||||
addli lr, sp, FRAME_SIZE |
||||
{ |
||||
ld lr, lr |
||||
addli r_tmp, sp, FRAME_R30 |
||||
} |
||||
{ |
||||
ld r30, r_tmp |
||||
addli r_tmp, sp, FRAME_R31 |
||||
} |
||||
{ |
||||
ld r31, r_tmp |
||||
addli r_tmp, sp, FRAME_R32 |
||||
} |
||||
{ |
||||
ld r32, r_tmp |
||||
addli r_tmp, sp, FRAME_R33 |
||||
} |
||||
{ |
||||
ld r33, r_tmp |
||||
addi sp, sp, FRAME_SIZE |
||||
} |
||||
jrp lr |
||||
STD_ENDPROC(flush_and_install_context) |
Loading…
Reference in new issue