/* * Blackfin On-Chip Sport Emulated UART Driver * * Copyright 2006-2008 Analog Devices Inc. * * Enter bugs at http://blackfin.uclinux.org/ * * Licensed under the GPL-2 or later. */ /* * This driver and the hardware supported are in term of EE-191 of ADI. * http://www.analog.com/static/imported-files/application_notes/EE191.pdf * This application note describe how to implement a UART on a Sharc DSP, * but this driver is implemented on Blackfin Processor. * Transmit Frame Sync is not used by this driver to transfer data out. */ #ifndef _BFIN_SPORT_UART_H #define _BFIN_SPORT_UART_H #define OFFSET_TCR1 0x00 /* Transmit Configuration 1 Register */ #define OFFSET_TCR2 0x04 /* Transmit Configuration 2 Register */ #define OFFSET_TCLKDIV 0x08 /* Transmit Serial Clock Divider Register */ #define OFFSET_TFSDIV 0x0C /* Transmit Frame Sync Divider Register */ #define OFFSET_TX 0x10 /* Transmit Data Register */ #define OFFSET_RX 0x18 /* Receive Data Register */ #define OFFSET_RCR1 0x20 /* Receive Configuration 1 Register */ #define OFFSET_RCR2 0x24 /* Receive Configuration 2 Register */ #define OFFSET_RCLKDIV 0x28 /* Receive Serial Clock Divider Register */ #define OFFSET_RFSDIV 0x2c /* Receive Frame Sync Divider Register */ #define OFFSET_STAT 0x30 /* Status Register */ #define SPORT_GET_TCR1(sport) bfin_read16(((sport)->port.membase + OFFSET_TCR1)) #define SPORT_GET_TCR2(sport) bfin_read16(((sport)->port.membase + OFFSET_TCR2)) #define SPORT_GET_TCLKDIV(sport) bfin_read16(((sport)->port.membase + OFFSET_TCLKDIV)) #define SPORT_GET_TFSDIV(sport) bfin_read16(((sport)->port.membase + OFFSET_TFSDIV)) #define SPORT_GET_TX(sport) bfin_read16(((sport)->port.membase + OFFSET_TX)) #define SPORT_GET_RX(sport) bfin_read16(((sport)->port.membase + OFFSET_RX)) /* * If another interrupt fires while doing a 32-bit read from RX FIFO, * a fake RX underflow error will be generated. So disable interrupts * to prevent interruption while reading the FIFO. */ #define SPORT_GET_RX32(sport) \ ({ \ unsigned int __ret; \ unsigned long flags; \ if (ANOMALY_05000473) \ local_irq_save(flags); \ __ret = bfin_read32((sport)->port.membase + OFFSET_RX); \ if (ANOMALY_05000473) \ local_irq_restore(flags); \ __ret; \ }) #define SPORT_GET_RCR1(sport) bfin_read16(((sport)->port.membase + OFFSET_RCR1)) #define SPORT_GET_RCR2(sport) bfin_read16(((sport)->port.membase + OFFSET_RCR2)) #define SPORT_GET_RCLKDIV(sport) bfin_read16(((sport)->port.membase + OFFSET_RCLKDIV)) #define SPORT_GET_RFSDIV(sport) bfin_read16(((sport)->port.membase + OFFSET_RFSDIV)) #define SPORT_GET_STAT(sport) bfin_read16(((sport)->port.membase + OFFSET_STAT)) #define SPORT_PUT_TCR1(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TCR1), v) #define SPORT_PUT_TCR2(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TCR2), v) #define SPORT_PUT_TCLKDIV(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TCLKDIV), v) #define SPORT_PUT_TFSDIV(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TFSDIV), v) #define SPORT_PUT_TX(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TX), v) #define SPORT_PUT_RX(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RX), v) #define SPORT_PUT_RCR1(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RCR1), v) #define SPORT_PUT_RCR2(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RCR2), v) #define SPORT_PUT_RCLKDIV(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RCLKDIV), v) #define SPORT_PUT_RFSDIV(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RFSDIV), v) #define SPORT_PUT_STAT(sport, v) bfin_write16(((sport)->port.membase + OFFSET_STAT), v) #define SPORT_TX_FIFO_SIZE 8 #define SPORT_UART_GET_CTS(x) gpio_get_value(x->cts_pin) #define SPORT_UART_DISABLE_RTS(x) gpio_set_value(x->rts_pin, 1) #define SPORT_UART_ENABLE_RTS(x) gpio_set_value(x->rts_pin, 0) #if defined(CONFIG_SERIAL_BFIN_SPORT0_UART_CTSRTS) \ || defined(CONFIG_SERIAL_BFIN_SPORT1_UART_CTSRTS) \ || defined(CONFIG_SERIAL_BFIN_SPORT2_UART_CTSRTS) \ || defined(CONFIG_SERIAL_BFIN_SPORT3_UART_CTSRTS) # define CONFIG_SERIAL_BFIN_SPORT_CTSRTS #endif #endif /* _BFIN_SPORT_UART_H */ orm.submit();'>
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-27 11:40:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-29 11:03:15 -0800
commitb91e1302ad9b80c174a4855533f7e3aa2873355e (patch)
treedacf84fd1ec749b64dd8245195e277d5f804ca12 /drivers/usb/gadget
parent2d706e790f0508dff4fb72eca9b4892b79757feb (diff)
mm: optimize PageWaiters bit use for unlock_page()
In commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Nick Piggin made our page locking no longer unconditionally touch the hashed page waitqueue, which not only helps performance in general, but is particularly helpful on NUMA machines where the hashed wait queues can bounce around a lot. However, the "clear lock bit atomically and then test the waiters bit" sequence turns out to be much more expensive than it needs to be, because you get a nasty stall when trying to access the same word that just got updated atomically. On architectures where locking is done with LL/SC, this would be trivial to fix with a new primitive that clears one bit and tests another atomically, but that ends up not working on x86, where the only atomic operations that return the result end up being cmpxchg and xadd. The atomic bit operations return the old value of the same bit we changed, not the value of an unrelated bit. On x86, we could put the lock bit in the high bit of the byte, and use "xadd" with that bit (where the overflow ends up not touching other bits), and look at the other bits of the result. However, an even simpler model is to just use a regular atomic "and" to clear the lock bit, and then the sign bit in eflags will indicate the resulting state of the unrelated bit #7. So by moving the PageWaiters bit up to bit #7, we can atomically clear the lock bit and test the waiters bit on x86 too. And architectures with LL/SC (which is all the usual RISC suspects), the particular bit doesn't matter, so they are fine with this approach too. This avoids the extra access to the same atomic word, and thus avoids the costly stall at page unlock time. The only downside is that the interface ends up being a bit odd and specialized: clear a bit in a byte, and test the sign bit. Nick doesn't love the resulting name of the new primitive, but I'd rather make the name be descriptive and very clear about the limitation imposed by trying to work across all relevant architectures than make it be some generic thing that doesn't make the odd semantics explicit. So this introduces the new architecture primitive clear_bit_unlock_is_negative_byte(); and adds the trivial implementation for x86. We have a generic non-optimized fallback (that just does a "clear_bit()"+"test_bit(7)" combination) which can be overridden by any architecture that can do better. According to Nick, Power has the same hickup x86 has, for example, but some other architectures may not even care. All these optimizations mean that my page locking stress-test (which is just executing a lot of small short-lived shell scripts: "make test" in the git source tree) no longer makes our page locking look horribly bad. Before all these optimizations, just the unlock_page() costs were just over 3% of all CPU overhead on "make test". After this, it's down to 0.66%, so just a quarter of the cost it used to be. (The difference on NUMA is bigger, but there this micro-optimization is likely less noticeable, since the big issue on NUMA was not the accesses to 'struct page', but the waitqueue accesses that were already removed by Nick's earlier commit). Acked-by: Nick Piggin <npiggin@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Bob Peterson <rpeterso@redhat.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Andrew Lutomirski <luto@kernel.org> Cc: Andreas Gruenbacher <agruenba@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/usb/gadget')