/* * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU General Public License version 2. */ #include #include #include #include #include #include #include "dlm_internal.h" #include "lockspace.h" static spinlock_t ops_lock; static struct list_head send_list; static struct list_head recv_list; static wait_queue_head_t send_wq; static wait_queue_head_t recv_wq; struct plock_op { struct list_head list; int done; struct dlm_plock_info info; }; struct plock_xop { struct plock_op xop; int (*callback)(struct file_lock *fl, int result); void *fl; void *file; struct file_lock flc; }; static inline void set_version(struct dlm_plock_info *info) { info->version[0] = DLM_PLOCK_VERSION_MAJOR; info->version[1] = DLM_PLOCK_VERSION_MINOR; info->version[2] = DLM_PLOCK_VERSION_PATCH; } static int check_version(struct dlm_plock_info *info) { if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || (DLM_PLOCK_VERSION_MINOR < info->version[1])) { log_print("plock device version mismatch: " "kernel (%u.%u.%u), user (%u.%u.%u)", DLM_PLOCK_VERSION_MAJOR, DLM_PLOCK_VERSION_MINOR, DLM_PLOCK_VERSION_PATCH, info->version[0], info->version[1], info->version[2]); return -EINVAL; } return 0; } static void send_op(struct plock_op *op) { set_version(&op->info); INIT_LIST_HEAD(&op->list); spin_lock(&ops_lock); list_add_tail(&op->list, &send_list); spin_unlock(&ops_lock); wake_up(&send_wq); } /* If a process was killed while waiting for the only plock on a file, locks_remove_posix will not see any lock on the file so it won't send an unlock-close to us to pass on to userspace to clean up the abandoned waiter. So, we have to insert the unlock-close when the lock call is interrupted. */ static void do_unlock_close(struct dlm_ls *ls, u64 number, struct file *file, struct file_lock *fl) { struct plock_op *op; op = kzalloc(sizeof(*op), GFP_NOFS); if (!op) return; op->info.optype = DLM_PLOCK_OP_UNLOCK; op->info.pid = fl->fl_pid; op->info.fsid = ls->ls_global_id; op->info.number = number; op->info.start = 0; op->info.end = OFFSET_MAX; if (fl->fl_lmops && fl->fl_lmops->lm_grant) op->info.owner = (__u64) fl->fl_pid; else op->info.owner = (__u64)(long) fl->fl_owner; op->info.flags |= DLM_PLOCK_FL_CLOSE; send_op(op); } int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, int cmd, struct file_lock *fl) { struct dlm_ls *ls; struct plock_op *op; struct plock_xop *xop; int rv; ls = dlm_find_lockspace_local(lockspace); if (!ls) return -EINVAL; xop = kzalloc(sizeof(*xop), GFP_NOFS); if (!xop) { rv = -ENOMEM; goto out; } op = &xop->xop; op->info.optype = DLM_PLOCK_OP_LOCK; op->info.pid = fl->fl_pid; op->info.ex = (fl->fl_type == F_WRLCK); op->info.wait = IS_SETLKW(cmd); op->info.fsid = ls->ls_global_id; op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; if (fl->fl_lmops && fl->fl_lmops->lm_grant) { /* fl_owner is lockd which doesn't distinguish processes on the nfs client */ op->info.owner = (__u64) fl->fl_pid; xop->callback = fl->fl_lmops->lm_grant; locks_init_lock(&xop->flc); locks_copy_lock(&xop->flc, fl); xop->fl = fl; xop->file = file; } else { op->info.owner = (__u64)(long) fl->fl_owner; xop->callback = NULL; } send_op(op); if (xop->callback == NULL) { rv = wait_event_interruptible(recv_wq, (op->done != 0)); if (rv == -ERESTARTSYS) { log_debug(ls, "dlm_posix_lock: wait killed %llx", (unsigned long long)number); spin_lock(&ops_lock); list_del(&op->list); spin_unlock(&ops_lock); kfree(xop); do_unlock_close(ls, number, file, fl); goto out; } } else { rv = FILE_LOCK_DEFERRED; goto out; } spin_lock(&ops_lock); if (!list_empty(&op->list)) { log_error(ls, "dlm_posix_lock: op on list %llx", (unsigned long long)number); list_del(&op->list); } spin_unlock(&ops_lock); rv = op->info.rv; if (!rv) { if (locks_lock_file_wait(file, fl) < 0) log_error(ls, "dlm_posix_lock: vfs lock error %llx", (unsigned long long)number); } kfree(xop); out: dlm_put_lockspace(ls); return rv; } EXPORT_SYMBOL_GPL(dlm_posix_lock); /* Returns failure iff a successful lock operation should be canceled */ static int dlm_plock_callback(struct plock_op *op) { struct file *file; struct file_lock *fl; struct file_lock *flc; int (*notify)(struct file_lock *fl, int result) = NULL; struct plock_xop *xop = (struct plock_xop *)op; int rv = 0; spin_lock(&ops_lock); if (!list_empty(&op->list)) { log_print("dlm_plock_callback: op on list %llx", (unsigned long long)op->info.number); list_del(&op->list); } spin_unlock(&ops_lock); /* check if the following 2 are still valid or make a copy */ file = xop->file; flc = &xop->flc; fl = xop->fl; notify = xop->callback; if (op->info.rv) { notify(fl, op->info.rv); goto out; } /* got fs lock; bookkeep locally as well: */ flc->fl_flags &= ~FL_SLEEP; if (posix_lock_file(file, flc, NULL)) { /* * This can only happen in the case of kmalloc() failure. * The filesystem's own lock is the authoritative lock, * so a failure to get the lock locally is not a disaster. * As long as the fs cannot reliably cancel locks (especially * in a low-memory situation), we're better off ignoring * this failure than trying to recover. */ log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", (unsigned long long)op->info.number, file, fl); } rv = notify(fl, 0); if (rv) { /* XXX: We need to cancel the fs lock here: */ log_print("dlm_plock_callback: lock granted after lock request " "failed; dangling lock!\n"); goto out; } out: kfree(xop); return rv; } int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, struct file_lock *fl) { struct dlm_ls *ls; struct plock_op *op; int rv; unsigned char fl_flags = fl->fl_flags; ls = dlm_find_lockspace_local(lockspace); if (!ls) return -EINVAL; op = kzalloc(sizeof(*op), GFP_NOFS); if (!op) { rv = -ENOMEM; goto out; } /* cause the vfs unlock to return ENOENT if lock is not found */ fl->fl_flags |= FL_EXISTS; rv = locks_lock_file_wait(file, fl); if (rv == -ENOENT) { rv = 0; goto out_free; } if (rv < 0) { log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx", rv, (unsigned long long)number); } op->info.optype = DLM_PLOCK_OP_UNLOCK; op->info.pid = fl->fl_pid; op->info.fsid = ls->ls_global_id; op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; if (fl->fl_lmops && fl->fl_lmops->lm_grant) op->info.owner = (__u64) fl->fl_pid; else op->info.owner = (__u64)(long) fl->fl_owner; if (fl->fl_flags & FL_CLOSE) { op->info.flags |= DLM_PLOCK_FL_CLOSE; send_op(op); rv = 0; goto out; } send_op(op); wait_event(recv_wq, (op->done != 0)); spin_lock(&ops_lock); if (!list_empty(&op->list)) { log_error(ls, "dlm_posix_unlock: op on list %llx", (unsigned long long)number); list_del(&op->list); } spin_unlock(&ops_lock); rv = op->info.rv; if (rv == -ENOENT) rv = 0; out_free: kfree(op); out: dlm_put_lockspace(ls); fl->fl_flags = fl_flags; return rv; } EXPORT_SYMBOL_GPL(dlm_posix_unlock); int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, struct file_lock *fl) { struct dlm_ls *ls; struct plock_op *op; int rv; ls = dlm_find_lockspace_local(lockspace); if (!ls) return -EINVAL; op = kzalloc(sizeof(*op), GFP_NOFS); if (!op) { rv = -ENOMEM; goto out; } op->info.optype = DLM_PLOCK_OP_GET; op->info.pid = fl->fl_pid; op->info.ex = (fl->fl_type == F_WRLCK); op->info.fsid = ls->ls_global_id; op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; if (fl->fl_lmops && fl->fl_lmops->lm_grant) op->info.owner = (__u64) fl->fl_pid; else op->info.owner = (__u64)(long) fl->fl_owner; send_op(op); wait_event(recv_wq, (op->done != 0)); spin_lock(&ops_lock); if (!list_empty(&op->list)) { log_error(ls, "dlm_posix_get: op on list %llx", (unsigned long long)number); list_del(&op->list); } spin_unlock(&ops_lock); /* info.rv from userspace is 1 for conflict, 0 for no-conflict, -ENOENT if there are no locks on the file */ rv = op->info.rv; fl->fl_type = F_UNLCK; if (rv == -ENOENT) rv = 0; else if (rv > 0) { locks_init_lock(fl); fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; fl->fl_flags = FL_POSIX; fl->fl_pid = op->info.pid; fl->fl_start = op->info.start; fl->fl_end = op->info.end; rv = 0; } kfree(op); out: dlm_put_lockspace(ls); return rv; } EXPORT_SYMBOL_GPL(dlm_posix_get); /* a read copies out one plock request from the send list */ static ssize_t dev_read(struct file *file, char __user *u, size_t count, loff_t *ppos) { struct dlm_plock_info info; struct plock_op *op = NULL; if (count < sizeof(info)) return -EINVAL; spin_lock(&ops_lock); if (!list_empty(&send_list)) { op = list_entry(send_list.next, struct plock_op, list); if (op->info.flags & DLM_PLOCK_FL_CLOSE) list_del(&op->list); else list_move(&op->list, &recv_list); memcpy(&info, &op->info, sizeof(info)); } spin_unlock(&ops_lock); if (!op) return -EAGAIN; /* there is no need to get a reply from userspace for unlocks that were generated by the vfs cleaning up for a close (the process did not make an unlock call). */ if (op->info.flags & DLM_PLOCK_FL_CLOSE) kfree(op); if (copy_to_user(u, &info, sizeof(info))) return -EFAULT; return sizeof(info); } /* a write copies in one plock result that should match a plock_op on the recv list */ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, loff_t *ppos) { struct dlm_plock_info info; struct plock_op *op; int found = 0, do_callback = 0; if (count != sizeof(info)) return -EINVAL; if (copy_from_user(&info, u, sizeof(info))) return -EFAULT; if (check_version(&info)) return -EINVAL; spin_lock(&ops_lock); list_for_each_entry(op, &recv_list, list) { if (op->info.fsid == info.fsid && op->info.number == info.number && op->info.owner == info.owner) { struct plock_xop *xop = (struct plock_xop *)op; list_del_init(&op->list); memcpy(&op->info, &info, sizeof(info)); if (xop->callback) do_callback = 1; else op->done = 1; found = 1; break; } } spin_unlock(&ops_lock); if (found) { if (do_callback) dlm_plock_callback(op); else wake_up(&recv_wq); } else log_print("dev_write no op %x %llx", info.fsid, (unsigned long long)info.number); return count; } static unsigned int dev_poll(struct file *file, poll_table *wait) { unsigned int mask = 0; poll_wait(file, &send_wq, wait); spin_lock(&ops_lock); if (!list_empty(&send_list)) mask = POLLIN | POLLRDNORM; spin_unlock(&ops_lock); return mask; } static const struct file_operations dev_fops = { .read = dev_read, .write = dev_write, .poll = dev_poll, .owner = THIS_MODULE, .llseek = noop_llseek, }; static struct miscdevice plock_dev_misc = { .minor = MISC_DYNAMIC_MINOR, .name = DLM_PLOCK_MISC_NAME, .fops = &dev_fops }; int dlm_plock_init(void) { int rv; spin_lock_init(&ops_lock); INIT_LIST_HEAD(&send_list); INIT_LIST_HEAD(&recv_list); init_waitqueue_head(&send_wq); init_waitqueue_head(&recv_wq); rv = misc_register(&plock_dev_misc); if (rv) log_print("dlm_plock_init: misc_register failed %d", rv); return rv; } void dlm_plock_exit(void) { misc_deregister(&plock_dev_misc); } clude?id=79112c26f14c38ddbac3b2739469e373ef424fe6'>sched: rename tcf_destroy to tcf_destroy_protoJiri Pirko1-1/+1 This function destroys TC filter protocol, not TC filter. So name it accordingly. Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-10ipv4: fib: Add events for FIB replace and appendIdo Schimmel1-1/+2 The FIB notification chain currently uses the NLM_F_{REPLACE,APPEND} flags to signal routes being replaced or appended. Instead of using netlink flags for in-kernel notifications we can simply introduce two new events in the FIB notification chain. This has the added advantage of making the API cleaner, thereby making it clear that these events should be supported by listeners of the notification chain. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> CC: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09openvswitch: Add force commit.Jarno Rajahalme1-0/+5 Stateful network admission policy may allow connections to one direction and reject connections initiated in the other direction. After policy change it is possible that for a new connection an overlapping conntrack entry already exists, where the original direction of the existing connection is opposed to the new connection's initial packet. Most importantly, conntrack state relating to the current packet gets the "reply" designation based on whether the original direction tuple or the reply direction tuple matched. If this "directionality" is wrong w.r.t. to the stateful network admission policy it may happen that packets in neither direction are correctly admitted. This patch adds a new "force commit" option to the OVS conntrack action that checks the original direction of an existing conntrack entry. If that direction is opposed to the current packet, the existing conntrack entry is deleted and a new one is subsequently created in the correct direction. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09openvswitch: Add original direction conntrack tuple to sw_flow_key.Jarno Rajahalme1-1/+19 Add the fields of the conntrack original direction 5-tuple to struct sw_flow_key. The new fields are initially marked as non-existent, and are populated whenever a conntrack action is executed and either finds or generates a conntrack entry. This means that these fields exist for all packets that were not rejected by conntrack as untrackable. The original tuple fields in the sw_flow_key are filled from the original direction tuple of the conntrack entry relating to the current packet, or from the original direction tuple of the master conntrack entry, if the current conntrack entry has a master. Generally, expected connections of connections having an assigned helper (e.g., FTP), have a master conntrack entry. The main purpose of the new conntrack original tuple fields is to allow matching on them for policy decision purposes, with the premise that the admissibility of tracked connections reply packets (as well as original direction packets), and both direction packets of any related connections may be based on ACL rules applying to the master connection's original direction 5-tuple. This also makes it easier to make policy decisions when the actual packet headers might have been transformed by NAT, as the original direction 5-tuple represents the packet headers before any such transformation. When using the original direction 5-tuple the admissibility of return and/or related packets need not be based on the mere existence of a conntrack entry, allowing separation of admission policy from the established conntrack state. While existence of a conntrack entry is required for admission of the return or related packets, policy changes can render connections that were initially admitted to be rejected or dropped afterwards. If the admission of the return and related packets was based on mere conntrack state (e.g., connection being in an established state), a policy change that would make the connection rejected or dropped would need to find and delete all conntrack entries affected by such a change. When using the original direction 5-tuple matching the affected conntrack entries can be allowed to time out instead, as the established state of the connection would not need to be the basis for packet admission any more. It should be noted that the directionality of related connections may be the same or different than that of the master connection, and neither the original direction 5-tuple nor the conntrack state bits carry this information. If needed, the directionality of the master connection can be stored in master's conntrack mark or labels, which are automatically inherited by the expected related connections. The fact that neither ARP nor ND packets are trackable by conntrack allows mutual exclusion between ARP/ND and the new conntrack original tuple fields. Hence, the IP addresses are overlaid in union with ARP and ND fields. This allows the sw_flow_key to not grow much due to this patch, but it also means that we must be careful to never use the new key fields with ARP or ND packets. ARP is easy to distinguish and keep mutually exclusive based on the ethernet type, but ND being an ICMPv6 protocol requires a bit more attention. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09openvswitch: Unionize ovs_key_ct_label with a u32 array.Jarno Rajahalme1-2/+6 Make the array of labels in struct ovs_key_ct_label an union, adding a u32 array of the same byte size as the existing u8 array. It is faster to loop through the labels 32 bits at the time, which is also the alignment of netlink attributes. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09sctp: implement sender-side procedures for Add Incoming/Outgoing Streams ↵Xin Long2-0/+9 Request Parameter This patch is to implement Sender-Side Procedures for the Add Outgoing and Incoming Streams Request Parameter described in rfc6525 section 5.1.5-5.1.6. It is also to add sockopt SCTP_ADD_STREAMS in rfc6525 section 6.3.4 for users. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09sctp: add support for generating stream reconf add incoming/outgoing streams ↵Xin Long2-0/+10 request chunk This patch is to define Add Incoming/Outgoing Streams Request Parameter described in rfc6525 section 4.5 and 4.6. They can be in one same chunk trunk as rfc6525 section 3.1-7 describes, so make them in one function. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09sctp: implement sender-side procedures for SSN/TSN Reset Request ParameterXin Long2-0/+2 This patch is to implement Sender-Side Procedures for the SSN/TSN Reset Request Parameter descibed in rfc6525 section 5.1.4. It is also to add sockopt SCTP_RESET_ASSOC in rfc6525 section 6.3.3 for users. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09sctp: add support for generating stream reconf ssn/tsn reset request chunkXin Long2-0/+7 This patch is to define SSN/TSN Reset Request Parameter described in rfc6525 section 4.3. Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09sctp: drop unnecessary __packed from some stream reconf structuresXin Long1-3/+3 commit 85c727b59483 ("sctp: drop __packed from almost all SCTP structures") has removed __packed from almost all SCTP structures. But there still are three structures where it should be dropped. This patch is to remove it from some stream reconf structures. Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-09cfg80211: fix NAN bands definitionLuca Coelho2-36/+39 The nl80211_nan_dual_band_conf enumeration doesn't make much sense. The default value is assigned to a bit, which makes it weird if the default bit and other bits are set at the same time. To improve this, get rid of NL80211_NAN_BAND_DEFAULT and add a wiphy configuration to let the drivers define which bands are supported. This is exposed to the userspace, which then can make a decision on which band(s) to use. Additionally, rename all "dual_band" elements to "bands", to make things clearer. Signed-off-by: Luca Coelho <luciano.coelho@intel.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com> 2017-02-08ipv4: fib: Notify about nexthop status changesIdo Schimmel1-0/+7 When a multipath route is hit the kernel doesn't consider nexthops that are DEAD or LINKDOWN when IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN is set. Devices that offload multipath routes need to be made aware of nexthop status changes. Otherwise, the device will keep forwarding packets to non-functional nexthops. Add the FIB_EVENT_NH_{ADD,DEL} events to the fib notification chain, which notify capable devices when they should add or delete a nexthop from their tables. Cc: Roopa Prabhu <roopa@cumulusnetworks.com> Cc: David Ahern <dsa@cumulusnetworks.com> Cc: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Reviewed-by: Andy Gospodarek <gospo@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-08net: stmmac: Remove the bus_setup function pointerLABBE Corentin1-1/+0 The bus_setup function pointer is not used at all, this patch remove it. Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com> Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-08gro_cells: move to net/core/gro_cells.cEric Dumazet1-82/+4 We have many gro cells users, so lets move the code to avoid duplication. This creates a CONFIG_GRO_CELLS option. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> 2017-02-08net: phy: Add LED mode driver for Microsemi PHYs.Raju Lakkaraju