From 52df7837560e51a47903b49243291fb8039653d2 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 15 Oct 2018 13:31:39 +0200 Subject: [PATCH 1/7] s390/cio: make vfio_ccw_io_region static Fix the following sparse warning: drivers/s390/cio/vfio_ccw_drv.c:25:19: warning: symbol 'vfio_ccw_io_region' was not declared. Should it be static? Signed-off-by: Sebastian Ott Message-Id: Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index f47d16b5810b..edbf542d82af 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -22,7 +22,7 @@ #include "vfio_ccw_private.h" struct workqueue_struct *vfio_ccw_work_q; -struct kmem_cache *vfio_ccw_io_region; +static struct kmem_cache *vfio_ccw_io_region; /* * Helpers From 55e93ecdc1ef9256279e0a0b08edf72cc47fc2f6 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Thu, 25 Oct 2018 19:15:20 +0200 Subject: [PATCH 2/7] vfio: ccw: Register mediated device once all structures are initialized Let's register the mediated device when all the data structures which could be used are initialized. Signed-off-by: Pierre Morel Reviewed-by: Eric Farman Message-Id: <1540487720-11634-3-git-send-email-pmorel@linux.ibm.com> Acked-by: Halil Pasic Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index edbf542d82af..a10cec0e86eb 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -134,14 +134,14 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) if (ret) goto out_free; - ret = vfio_ccw_mdev_reg(sch); - if (ret) - goto out_disable; - INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); atomic_set(&private->avail, 1); private->state = VFIO_CCW_STATE_STANDBY; + ret = vfio_ccw_mdev_reg(sch); + if (ret) + goto out_disable; + return 0; out_disable: From 806212f91c874b24cf9eb4a9f180323671b6c5ed Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 9 Nov 2018 03:39:28 +0100 Subject: [PATCH 3/7] s390/cio: Fix cleanup of pfn_array alloc failure If pfn_array_alloc fails somehow, we need to release the pfn_array_table that was malloc'd earlier. Signed-off-by: Eric Farman Message-Id: <20181109023937.96105-2-farman@linux.ibm.com> Acked-by: Halil Pasic Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index fd77e46eb3b2..ef5ab45d94b3 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -528,7 +528,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count); if (ret < 0) - goto out_init; + goto out_unpin; /* Translate this direct ccw to a idal ccw. */ idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL); From b89e242eee8d4cd8261d8d821c62c5d1efc454d0 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 9 Nov 2018 03:39:29 +0100 Subject: [PATCH 4/7] s390/cio: Fix cleanup when unsupported IDA format is used Direct returns from within a loop are rude, but it doesn't mean it gets to avoid releasing the memory acquired beforehand. Signed-off-by: Eric Farman Message-Id: <20181109023937.96105-3-farman@linux.ibm.com> Reviewed-by: Farhan Ali Reviewed-by: Pierre Morel Acked-by: Halil Pasic Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index ef5ab45d94b3..70a006ba4d05 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -387,8 +387,10 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp) * orb specified one of the unsupported formats, we defer * checking for IDAWs in unsupported formats to here. */ - if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) + if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) { + kfree(p); return -EOPNOTSUPP; + } if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw))) break; From 613a41b0d16e617f46776a93b975a1eeea96417c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 13 Nov 2018 15:38:22 +0000 Subject: [PATCH 5/7] s390/cpum_cf: Reject request for sampling in event initialization On s390 command perf top fails [root@s35lp76 perf] # ./perf top -F100000 --stdio Error: cycles: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' [root@s35lp76 perf] # Using event -e rb0000 works as designed. Event rb0000 is the event number of the sampling facility for basic sampling. During system start up the following PMUs are installed in the kernel's PMU list (from head to tail): cpum_cf --> s390 PMU counter facility device driver cpum_sf --> s390 PMU sampling facility device driver uprobe kprobe tracepoint task_clock cpu_clock Perf top executes following functions and calls perf_event_open(2) system call with different parameters many times: cmd_top --> __cmd_top --> perf_evlist__add_default --> __perf_evlist__add_default --> perf_evlist__new_cycles (creates event type:0 (HW) config 0 (CPU_CYCLES) --> perf_event_attr__set_max_precise_ip Uses perf_event_open(2) to detect correct precise_ip level. Fails 3 times on s390 which is ok. Then functions cmd_top --> __cmd_top --> perf_top__start_counters -->perf_evlist__config --> perf_can_comm_exec --> perf_probe_api This functions test support for the following events: "cycles:u", "instructions:u", "cpu-clock:u" using --> perf_do_probe_api --> perf_event_open_cloexec Test the close on exec flag support with perf_event_open(2). perf_do_probe_api returns true if the event is supported. The function returns true because event cpu-clock is supported by the PMU cpu_clock. This is achieved by many calls to perf_event_open(2). Function perf_top__start_counters now calls perf_evsel__open() for every event, which is the default event cpu_cycles (config:0) and type HARDWARE (type:0) which a predfined frequence of 4000. Given the above order of the PMU list, the PMU cpum_cf gets called first and returns 0, which indicates support for this sampling. The event is fully allocated in the function perf_event_open (file kernel/event/core.c near line 10521 and the following check fails: event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_cred; } if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { err = -EOPNOTSUPP; goto err_alloc; } } The check for the interrupt capabilities fails and the system call perf_event_open() returns -EOPNOTSUPP (-95). Add a check to return -ENODEV when sampling is requested in PMU cpum_cf. This allows common kernel code in the perf_event_open() system call to test the next PMU in above list. Fixes: 97b1198fece0 (" "s390, perf: Use common PMU interrupt disabled code") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 74091fd3101e..d5523adeddbf 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -346,6 +346,8 @@ static int __hw_perf_event_init(struct perf_event *event) break; case PERF_TYPE_HARDWARE: + if (is_sampling_event(event)) /* No sampling support */ + return -ENOENT; ev = attr->config; /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { From 104f708fd1241b22f808bdf066ab67dc5a051de5 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 9 Nov 2018 14:59:24 +0100 Subject: [PATCH 6/7] s390/zcrypt: reinit ap queue state machine during device probe Until the vfio-ap driver came into live there was a well known agreement about the way how ap devices are initialized and their states when the driver's probe function is called. However, the vfio device driver when receiving an ap queue device does additional resets thereby removing the registration for interrupts for the ap device done by the ap bus core code. So when later the vfio driver releases the device and one of the default zcrypt drivers takes care of the device the interrupt registration needs to get renewed. The current code does no renew and result is that requests send into such a queue will never see a reply processed - the application hangs. This patch adds a function which resets the aq queue state machine for the ap queue device and triggers the walk through the initial states (which are reset and registration for interrupts). This function is now called before the driver's probe function is invoked. When the association between driver and device is released, the driver's remove function is called. The current implementation calls a ap queue function ap_queue_remove(). This invokation has been moved to the ap bus function to make the probe / remove pair for ap bus and drivers more symmetric. Fixes: 7e0bdbe5c21c ("s390/zcrypt: AP bus support for alternate driver(s)") Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Harald Freudenberger Reviewd-by: Tony Krowiak Reviewd-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 8 ++++---- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/ap_queue.c | 15 +++++++++++++++ drivers/s390/crypto/zcrypt_cex2a.c | 1 - drivers/s390/crypto/zcrypt_cex2c.c | 1 - drivers/s390/crypto/zcrypt_cex4.c | 1 - 6 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 048665e4f13d..9f5a201c4c87 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -775,6 +775,8 @@ static int ap_device_probe(struct device *dev) drvres = ap_drv->flags & AP_DRIVER_FLAG_DEFAULT; if (!!devres != !!drvres) return -ENODEV; + /* (re-)init queue's state machine */ + ap_queue_reinit_state(to_ap_queue(dev)); } /* Add queue/card to list of active queues/cards */ @@ -807,6 +809,8 @@ static int ap_device_remove(struct device *dev) struct ap_device *ap_dev = to_ap_dev(dev); struct ap_driver *ap_drv = ap_dev->drv; + if (is_queue_dev(dev)) + ap_queue_remove(to_ap_queue(dev)); if (ap_drv->remove) ap_drv->remove(ap_dev); @@ -1444,10 +1448,6 @@ static void ap_scan_bus(struct work_struct *unused) aq->ap_dev.device.parent = &ac->ap_dev.device; dev_set_name(&aq->ap_dev.device, "%02x.%04x", id, dom); - /* Start with a device reset */ - spin_lock_bh(&aq->lock); - ap_wait(ap_sm_event(aq, AP_EVENT_POLL)); - spin_unlock_bh(&aq->lock); /* Register device */ rc = device_register(&aq->ap_dev.device); if (rc) { diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 3eed1b36c876..bfc66e4a9de1 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -254,6 +254,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type); void ap_queue_remove(struct ap_queue *aq); void ap_queue_suspend(struct ap_device *ap_dev); void ap_queue_resume(struct ap_device *ap_dev); +void ap_queue_reinit_state(struct ap_queue *aq); struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type, int comp_device_type, unsigned int functions); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 66f7334bcb03..0aa4b3ccc948 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -718,5 +718,20 @@ void ap_queue_remove(struct ap_queue *aq) { ap_flush_queue(aq); del_timer_sync(&aq->timeout); + + /* reset with zero, also clears irq registration */ + spin_lock_bh(&aq->lock); + ap_zapq(aq->qid); + aq->state = AP_STATE_BORKED; + spin_unlock_bh(&aq->lock); } EXPORT_SYMBOL(ap_queue_remove); + +void ap_queue_reinit_state(struct ap_queue *aq) +{ + spin_lock_bh(&aq->lock); + aq->state = AP_STATE_RESET_START; + ap_wait(ap_sm_event(aq, AP_EVENT_POLL)); + spin_unlock_bh(&aq->lock); +} +EXPORT_SYMBOL(ap_queue_reinit_state); diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index 146f54f5cbb8..c50f3e86cc74 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -196,7 +196,6 @@ static void zcrypt_cex2a_queue_remove(struct ap_device *ap_dev) struct ap_queue *aq = to_ap_queue(&ap_dev->device); struct zcrypt_queue *zq = aq->private; - ap_queue_remove(aq); if (zq) zcrypt_queue_unregister(zq); } diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index 546f67676734..35c7c6672713 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -251,7 +251,6 @@ static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev) struct ap_queue *aq = to_ap_queue(&ap_dev->device); struct zcrypt_queue *zq = aq->private; - ap_queue_remove(aq); if (zq) zcrypt_queue_unregister(zq); } diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index f9d4c6c7521d..582ffa7e0f18 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -275,7 +275,6 @@ static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev) struct ap_queue *aq = to_ap_queue(&ap_dev->device); struct zcrypt_queue *zq = aq->private; - ap_queue_remove(aq); if (zq) zcrypt_queue_unregister(zq); } From 814cedbc0b78d75e335c96da9b9391142eab5600 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 27 Nov 2018 14:04:04 +0100 Subject: [PATCH 7/7] s390/mm: correct pgtable_bytes on page table downgrade The downgrade of a page table from 3 levels to 2 levels for a 31-bit compat process removes a pmd table which has to be counted against pgtable_bytes. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 814f26520aa2..6791562779ee 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -131,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm) } pgd = mm->pgd; + mm_dec_nr_pmds(mm); mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); mm->context.asce_limit = _REGION3_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |