/* * linux/sound/soc-topology.h -- ALSA SoC Firmware Controls and DAPM * * Copyright (C) 2012 Texas Instruments Inc. * Copyright (C) 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * Simple file API to load FW that includes mixers, coefficients, DAPM graphs, * algorithms, equalisers, DAIs, widgets, FE caps, BE caps, codec link caps etc. */ #ifndef __LINUX_SND_SOC_TPLG_H #define __LINUX_SND_SOC_TPLG_H #include #include struct firmware; struct snd_kcontrol; struct snd_soc_tplg_pcm_be; struct snd_ctl_elem_value; struct snd_ctl_elem_info; struct snd_soc_dapm_widget; struct snd_soc_component; struct snd_soc_tplg_pcm_fe; struct snd_soc_dapm_context; struct snd_soc_card; /* object scan be loaded and unloaded in groups with identfying indexes */ #define SND_SOC_TPLG_INDEX_ALL 0 /* ID that matches all FW objects */ /* dynamic object type */ enum snd_soc_dobj_type { SND_SOC_DOBJ_NONE = 0, /* object is not dynamic */ SND_SOC_DOBJ_MIXER, SND_SOC_DOBJ_ENUM, SND_SOC_DOBJ_BYTES, SND_SOC_DOBJ_PCM, SND_SOC_DOBJ_DAI_LINK, SND_SOC_DOBJ_CODEC_LINK, SND_SOC_DOBJ_WIDGET, }; /* dynamic control object */ struct snd_soc_dobj_control { struct snd_kcontrol *kcontrol; char **dtexts; unsigned long *dvalues; }; /* dynamic widget object */ struct snd_soc_dobj_widget { unsigned int kcontrol_type; /* kcontrol type: mixer, enum, bytes */ }; /* generic dynamic object - all dynamic objects belong to this struct */ struct snd_soc_dobj { enum snd_soc_dobj_type type; unsigned int index; /* objects can belong in different groups */ struct list_head list; struct snd_soc_tplg_ops *ops; union { struct snd_soc_dobj_control control; struct snd_soc_dobj_widget widget; }; void *private; /* core does not touch this */ }; /* * Kcontrol operations - used to map handlers onto firmware based controls. */ struct snd_soc_tplg_kcontrol_ops { u32 id; int (*get)(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int (*put)(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int (*info)(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo); }; /* Bytes ext operations, for TLV byte controls */ struct snd_soc_tplg_bytes_ext_ops { u32 id; int (*get)(struct snd_kcontrol *kcontrol, unsigned int __user *bytes, unsigned int size); int (*put)(struct snd_kcontrol *kcontrol, const unsigned int __user *bytes, unsigned int size); }; /* * DAPM widget event handlers - used to map handlers onto widgets. */ struct snd_soc_tplg_widget_events { u16 type; int (*event_handler)(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event); }; /* * Public API - Used by component drivers to load and unload dynamic objects * and their resources. */ struct snd_soc_tplg_ops { /* external kcontrol init - used for any driver specific init */ int (*control_load)(struct snd_soc_component *, struct snd_kcontrol_new *, struct snd_soc_tplg_ctl_hdr *); int (*control_unload)(struct snd_soc_component *, struct snd_soc_dobj *); /* external widget init - used for any driver specific init */ int (*widget_load)(struct snd_soc_component *, struct snd_soc_dapm_widget *, struct snd_soc_tplg_dapm_widget *); int (*widget_unload)(struct snd_soc_component *, struct snd_soc_dobj *); /* FE DAI - used for any driver specific init */ int (*dai_load)(struct snd_soc_component *, struct snd_soc_dai_driver *dai_drv); int (*dai_unload)(struct snd_soc_component *, struct snd_soc_dobj *); /* DAI link - used for any driver specific init */ int (*link_load)(struct snd_soc_component *, struct snd_soc_dai_link *link); int (*link_unload)(struct snd_soc_component *, struct snd_soc_dobj *); /* callback to handle vendor bespoke data */ int (*vendor_load)(struct snd_soc_component *, struct snd_soc_tplg_hdr *); int (*vendor_unload)(struct snd_soc_component *, struct snd_soc_tplg_hdr *); /* completion - called at completion of firmware loading */ void (*complete)(struct snd_soc_component *); /* manifest - optional to inform component of manifest */ int (*manifest)(struct snd_soc_component *, struct snd_soc_tplg_manifest *); /* vendor specific kcontrol handlers available for binding */ const struct snd_soc_tplg_kcontrol_ops *io_ops; int io_ops_count; /* vendor specific bytes ext handlers available for binding */ const struct snd_soc_tplg_bytes_ext_ops *bytes_ext_ops; int bytes_ext_ops_count; }; #ifdef CONFIG_SND_SOC_TOPOLOGY /* gets a pointer to data from the firmware block header */ static inline const void *snd_soc_tplg_get_data(struct snd_soc_tplg_hdr *hdr) { const void *ptr = hdr; return ptr + sizeof(*hdr); } /* Dynamic Object loading and removal for component drivers */ int snd_soc_tplg_component_load(struct snd_soc_component *comp, struct snd_soc_tplg_ops *ops, const struct firmware *fw, u32 index); int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index); /* Widget removal - widgets also removed wth component API */ void snd_soc_tplg_widget_remove(struct snd_soc_dapm_widget *w); void snd_soc_tplg_widget_remove_all(struct snd_soc_dapm_context *dapm, u32 index); /* Binds event handlers to dynamic widgets */ int snd_soc_tplg_widget_bind_event(struct snd_soc_dapm_widget *w, const struct snd_soc_tplg_widget_events *events, int num_events, u16 event_type); #else static inline int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index) { return 0; } #endif #endif ing parent2d706e790f0508dff4fb72eca9b4892b79757feb (diff)
mm: optimize PageWaiters bit use for unlock_page()
In commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Nick Piggin made our page locking no longer unconditionally touch the hashed page waitqueue, which not only helps performance in general, but is particularly helpful on NUMA machines where the hashed wait queues can bounce around a lot. However, the "clear lock bit atomically and then test the waiters bit" sequence turns out to be much more expensive than it needs to be, because you get a nasty stall when trying to access the same word that just got updated atomically. On architectures where locking is done with LL/SC, this would be trivial to fix with a new primitive that clears one bit and tests another atomically, but that ends up not working on x86, where the only atomic operations that return the result end up being cmpxchg and xadd. The atomic bit operations return the old value of the same bit we changed, not the value of an unrelated bit. On x86, we could put the lock bit in the high bit of the byte, and use "xadd" with that bit (where the overflow ends up not touching other bits), and look at the other bits of the result. However, an even simpler model is to just use a regular atomic "and" to clear the lock bit, and then the sign bit in eflags will indicate the resulting state of the unrelated bit #7. So by moving the PageWaiters bit up to bit #7, we can atomically clear the lock bit and test the waiters bit on x86 too. And architectures with LL/SC (which is all the usual RISC suspects), the particular bit doesn't matter, so they are fine with this approach too. This avoids the extra access to the same atomic word, and thus avoids the costly stall at page unlock time. The only downside is that the interface ends up being a bit odd and specialized: clear a bit in a byte, and test the sign bit. Nick doesn't love the resulting name of the new primitive, but I'd rather make the name be descriptive and very clear about the limitation imposed by trying to work across all relevant architectures than make it be some generic thing that doesn't make the odd semantics explicit. So this introduces the new architecture primitive clear_bit_unlock_is_negative_byte(); and adds the trivial implementation for x86. We have a generic non-optimized fallback (that just does a "clear_bit()"+"test_bit(7)" combination) which can be overridden by any architecture that can do better. According to Nick, Power has the same hickup x86 has, for example, but some other architectures may not even care. All these optimizations mean that my page locking stress-test (which is just executing a lot of small short-lived shell scripts: "make test" in the git source tree) no longer makes our page locking look horribly bad. Before all these optimizations, just the unlock_page() costs were just over 3% of all CPU overhead on "make test". After this, it's down to 0.66%, so just a quarter of the cost it used to be. (The difference on NUMA is bigger, but there this micro-optimization is likely less noticeable, since the big issue on NUMA was not the accesses to 'struct page', but the waitqueue accesses that were already removed by Nick's earlier commit). Acked-by: Nick Piggin <npiggin@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Bob Peterson <rpeterso@redhat.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Andrew Lutomirski <luto@kernel.org> Cc: Andreas Gruenbacher <agruenba@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'tools/testing')