mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-03-30 19:15:14 +00:00
Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ was initially a fork of CFQ, but subsequently changed to implement fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant to be used on desktop type single drives, providing good fairness. From Paolo. - Add Kyber IO scheduler. This is a full multiqueue aware scheduler, using a scalable token based algorithm that throttles IO based on live completion IO stats, similary to blk-wbt. From Omar. - A series from Jan, moving users to separately allocated backing devices. This continues the work of separating backing device life times, solving various problems with hot removal. - A series of updates for lightnvm, mostly from Javier. Includes a 'pblk' target that exposes an open channel SSD as a physical block device. - A series of fixes and improvements for nbd from Josef. - A series from Omar, removing queue sharing between devices on mostly legacy drivers. This helps us clean up other bits, if we know that a queue only has a single device backing. This has been overdue for more than a decade. - Fixes for the blk-stats, and improvements to unify the stats and user windows. This both improves blk-wbt, and enables other users to register a need to receive IO stats for a device. From Omar. - blk-throttle improvements from Shaohua. This provides a scalable framework for implementing scalable priotization - particularly for blk-mq, but applicable to any type of block device. The interface is marked experimental for now. - Bucketized IO stats for IO polling from Stephen Bates. This improves efficiency of polled workloads in the presence of mixed block size IO. - A few fixes for opal, from Scott. - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics. From a variety of folks, mostly Sagi and James Smart. - A series from Bart, improving our exposed info and capabilities from the blk-mq debugfs support. - A series from Christoph, cleaning up how handle WRITE_ZEROES. - A series from Christoph, cleaning up the block layer handling of how we track errors in a request. On top of being a nice cleanup, it also shrinks the size of struct request a bit. - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was never used by platforms, and the latter has outlived it's usefulness. - Various little bug fixes and cleanups from a wide variety of folks. * 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits) block: hide badblocks attribute by default blk-mq: unify hctx delay_work and run_work block: add kblock_mod_delayed_work_on() blk-mq: unify hctx delayed_run_work and run_work nbd: fix use after free on module unload MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler blk-mq-sched: alloate reserved tags out of normal pool mtip32xx: use runtime tag to initialize command header scsi: Implement blk_mq_ops.show_rq() blk-mq: Add blk_mq_ops.show_rq() blk-mq: Show operation, cmd_flags and rq_flags names blk-mq: Make blk_flags_show() callers append a newline character blk-mq: Move the "state" debugfs attribute one level down blk-mq: Unregister debugfs attributes earlier blk-mq: Only unregister hctxs for which registration succeeded blk-mq-debugfs: Rename functions for registering and unregistering the mq directory blk-mq: Let blk_mq_debugfs_register() look up the queue name blk-mq: Register <dev>/queue/mq after having registered <dev>/queue ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset ide-pm: always pass 0 error to __blk_end_request_all ..
This commit is contained in:
commit
694752922b
255 changed files with 24703 additions and 6212 deletions
|
@ -213,14 +213,8 @@ What: /sys/block/<disk>/queue/discard_zeroes_data
|
||||||
Date: May 2011
|
Date: May 2011
|
||||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||||
Description:
|
Description:
|
||||||
Devices that support discard functionality may return
|
Will always return 0. Don't rely on any specific behavior
|
||||||
stale or random data when a previously discarded block
|
for discards, and don't read this file.
|
||||||
is read back. This can cause problems if the filesystem
|
|
||||||
expects discarded blocks to be explicitly cleared. If a
|
|
||||||
device reports that it deterministically returns zeroes
|
|
||||||
when a discarded area is read the discard_zeroes_data
|
|
||||||
parameter will be set to one. Otherwise it will be 0 and
|
|
||||||
the result of reading a discarded area is undefined.
|
|
||||||
|
|
||||||
What: /sys/block/<disk>/queue/write_same_max_bytes
|
What: /sys/block/<disk>/queue/write_same_max_bytes
|
||||||
Date: January 2012
|
Date: January 2012
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
00-INDEX
|
00-INDEX
|
||||||
- This file
|
- This file
|
||||||
|
bfq-iosched.txt
|
||||||
|
- BFQ IO scheduler and its tunables
|
||||||
biodoc.txt
|
biodoc.txt
|
||||||
- Notes on the Generic Block Layer Rewrite in Linux 2.5
|
- Notes on the Generic Block Layer Rewrite in Linux 2.5
|
||||||
biovecs.txt
|
biovecs.txt
|
||||||
|
|
531
Documentation/block/bfq-iosched.txt
Normal file
531
Documentation/block/bfq-iosched.txt
Normal file
|
@ -0,0 +1,531 @@
|
||||||
|
BFQ (Budget Fair Queueing)
|
||||||
|
==========================
|
||||||
|
|
||||||
|
BFQ is a proportional-share I/O scheduler, with some extra
|
||||||
|
low-latency capabilities. In addition to cgroups support (blkio or io
|
||||||
|
controllers), BFQ's main features are:
|
||||||
|
- BFQ guarantees a high system and application responsiveness, and a
|
||||||
|
low latency for time-sensitive applications, such as audio or video
|
||||||
|
players;
|
||||||
|
- BFQ distributes bandwidth, and not just time, among processes or
|
||||||
|
groups (switching back to time distribution when needed to keep
|
||||||
|
throughput high).
|
||||||
|
|
||||||
|
On average CPUs, the current version of BFQ can handle devices
|
||||||
|
performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
|
||||||
|
reference, 30-50 KIOPS correspond to very high bandwidths with
|
||||||
|
sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and
|
||||||
|
to 120-200 MB/s with 4KB random I/O. BFQ has not yet been tested on
|
||||||
|
multi-queue devices.
|
||||||
|
|
||||||
|
The table of contents follow. Impatients can just jump to Section 3.
|
||||||
|
|
||||||
|
CONTENTS
|
||||||
|
|
||||||
|
1. When may BFQ be useful?
|
||||||
|
1-1 Personal systems
|
||||||
|
1-2 Server systems
|
||||||
|
2. How does BFQ work?
|
||||||
|
3. What are BFQ's tunable?
|
||||||
|
4. BFQ group scheduling
|
||||||
|
4-1 Service guarantees provided
|
||||||
|
4-2 Interface
|
||||||
|
|
||||||
|
1. When may BFQ be useful?
|
||||||
|
==========================
|
||||||
|
|
||||||
|
BFQ provides the following benefits on personal and server systems.
|
||||||
|
|
||||||
|
1-1 Personal systems
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Low latency for interactive applications
|
||||||
|
|
||||||
|
Regardless of the actual background workload, BFQ guarantees that, for
|
||||||
|
interactive tasks, the storage device is virtually as responsive as if
|
||||||
|
it was idle. For example, even if one or more of the following
|
||||||
|
background workloads are being executed:
|
||||||
|
- one or more large files are being read, written or copied,
|
||||||
|
- a tree of source files is being compiled,
|
||||||
|
- one or more virtual machines are performing I/O,
|
||||||
|
- a software update is in progress,
|
||||||
|
- indexing daemons are scanning filesystems and updating their
|
||||||
|
databases,
|
||||||
|
starting an application or loading a file from within an application
|
||||||
|
takes about the same time as if the storage device was idle. As a
|
||||||
|
comparison, with CFQ, NOOP or DEADLINE, and in the same conditions,
|
||||||
|
applications experience high latencies, or even become unresponsive
|
||||||
|
until the background workload terminates (also on SSDs).
|
||||||
|
|
||||||
|
Low latency for soft real-time applications
|
||||||
|
|
||||||
|
Also soft real-time applications, such as audio and video
|
||||||
|
players/streamers, enjoy a low latency and a low drop rate, regardless
|
||||||
|
of the background I/O workload. As a consequence, these applications
|
||||||
|
do not suffer from almost any glitch due to the background workload.
|
||||||
|
|
||||||
|
Higher speed for code-development tasks
|
||||||
|
|
||||||
|
If some additional workload happens to be executed in parallel, then
|
||||||
|
BFQ executes the I/O-related components of typical code-development
|
||||||
|
tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
|
||||||
|
NOOP or DEADLINE.
|
||||||
|
|
||||||
|
High throughput
|
||||||
|
|
||||||
|
On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and
|
||||||
|
up to 150% higher throughput than DEADLINE and NOOP, with all the
|
||||||
|
sequential workloads considered in our tests. With random workloads,
|
||||||
|
and with all the workloads on flash-based devices, BFQ achieves,
|
||||||
|
instead, about the same throughput as the other schedulers.
|
||||||
|
|
||||||
|
Strong fairness, bandwidth and delay guarantees
|
||||||
|
|
||||||
|
BFQ distributes the device throughput, and not just the device time,
|
||||||
|
among I/O-bound applications in proportion their weights, with any
|
||||||
|
workload and regardless of the device parameters. From these bandwidth
|
||||||
|
guarantees, it is possible to compute tight per-I/O-request delay
|
||||||
|
guarantees by a simple formula. If not configured for strict service
|
||||||
|
guarantees, BFQ switches to time-based resource sharing (only) for
|
||||||
|
applications that would otherwise cause a throughput loss.
|
||||||
|
|
||||||
|
1-2 Server systems
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Most benefits for server systems follow from the same service
|
||||||
|
properties as above. In particular, regardless of whether additional,
|
||||||
|
possibly heavy workloads are being served, BFQ guarantees:
|
||||||
|
|
||||||
|
. audio and video-streaming with zero or very low jitter and drop
|
||||||
|
rate;
|
||||||
|
|
||||||
|
. fast retrieval of WEB pages and embedded objects;
|
||||||
|
|
||||||
|
. real-time recording of data in live-dumping applications (e.g.,
|
||||||
|
packet logging);
|
||||||
|
|
||||||
|
. responsiveness in local and remote access to a server.
|
||||||
|
|
||||||
|
|
||||||
|
2. How does BFQ work?
|
||||||
|
=====================
|
||||||
|
|
||||||
|
BFQ is a proportional-share I/O scheduler, whose general structure,
|
||||||
|
plus a lot of code, are borrowed from CFQ.
|
||||||
|
|
||||||
|
- Each process doing I/O on a device is associated with a weight and a
|
||||||
|
(bfq_)queue.
|
||||||
|
|
||||||
|
- BFQ grants exclusive access to the device, for a while, to one queue
|
||||||
|
(process) at a time, and implements this service model by
|
||||||
|
associating every queue with a budget, measured in number of
|
||||||
|
sectors.
|
||||||
|
|
||||||
|
- After a queue is granted access to the device, the budget of the
|
||||||
|
queue is decremented, on each request dispatch, by the size of the
|
||||||
|
request.
|
||||||
|
|
||||||
|
- The in-service queue is expired, i.e., its service is suspended,
|
||||||
|
only if one of the following events occurs: 1) the queue finishes
|
||||||
|
its budget, 2) the queue empties, 3) a "budget timeout" fires.
|
||||||
|
|
||||||
|
- The budget timeout prevents processes doing random I/O from
|
||||||
|
holding the device for too long and dramatically reducing
|
||||||
|
throughput.
|
||||||
|
|
||||||
|
- Actually, as in CFQ, a queue associated with a process issuing
|
||||||
|
sync requests may not be expired immediately when it empties. In
|
||||||
|
contrast, BFQ may idle the device for a short time interval,
|
||||||
|
giving the process the chance to go on being served if it issues
|
||||||
|
a new request in time. Device idling typically boosts the
|
||||||
|
throughput on rotational devices, if processes do synchronous
|
||||||
|
and sequential I/O. In addition, under BFQ, device idling is
|
||||||
|
also instrumental in guaranteeing the desired throughput
|
||||||
|
fraction to processes issuing sync requests (see the description
|
||||||
|
of the slice_idle tunable in this document, or [1, 2], for more
|
||||||
|
details).
|
||||||
|
|
||||||
|
- With respect to idling for service guarantees, if several
|
||||||
|
processes are competing for the device at the same time, but
|
||||||
|
all processes (and groups, after the following commit) have
|
||||||
|
the same weight, then BFQ guarantees the expected throughput
|
||||||
|
distribution without ever idling the device. Throughput is
|
||||||
|
thus as high as possible in this common scenario.
|
||||||
|
|
||||||
|
- If low-latency mode is enabled (default configuration), BFQ
|
||||||
|
executes some special heuristics to detect interactive and soft
|
||||||
|
real-time applications (e.g., video or audio players/streamers),
|
||||||
|
and to reduce their latency. The most important action taken to
|
||||||
|
achieve this goal is to give to the queues associated with these
|
||||||
|
applications more than their fair share of the device
|
||||||
|
throughput. For brevity, we call just "weight-raising" the whole
|
||||||
|
sets of actions taken by BFQ to privilege these queues. In
|
||||||
|
particular, BFQ provides a milder form of weight-raising for
|
||||||
|
interactive applications, and a stronger form for soft real-time
|
||||||
|
applications.
|
||||||
|
|
||||||
|
- BFQ automatically deactivates idling for queues born in a burst of
|
||||||
|
queue creations. In fact, these queues are usually associated with
|
||||||
|
the processes of applications and services that benefit mostly
|
||||||
|
from a high throughput. Examples are systemd during boot, or git
|
||||||
|
grep.
|
||||||
|
|
||||||
|
- As CFQ, BFQ merges queues performing interleaved I/O, i.e.,
|
||||||
|
performing random I/O that becomes mostly sequential if
|
||||||
|
merged. Differently from CFQ, BFQ achieves this goal with a more
|
||||||
|
reactive mechanism, called Early Queue Merge (EQM). EQM is so
|
||||||
|
responsive in detecting interleaved I/O (cooperating processes),
|
||||||
|
that it enables BFQ to achieve a high throughput, by queue
|
||||||
|
merging, even for queues for which CFQ needs a different
|
||||||
|
mechanism, preemption, to get a high throughput. As such EQM is a
|
||||||
|
unified mechanism to achieve a high throughput with interleaved
|
||||||
|
I/O.
|
||||||
|
|
||||||
|
- Queues are scheduled according to a variant of WF2Q+, named
|
||||||
|
B-WF2Q+, and implemented using an augmented rb-tree to preserve an
|
||||||
|
O(log N) overall complexity. See [2] for more details. B-WF2Q+ is
|
||||||
|
also ready for hierarchical scheduling. However, for a cleaner
|
||||||
|
logical breakdown, the code that enables and completes
|
||||||
|
hierarchical support is provided in the next commit, which focuses
|
||||||
|
exactly on this feature.
|
||||||
|
|
||||||
|
- B-WF2Q+ guarantees a tight deviation with respect to an ideal,
|
||||||
|
perfectly fair, and smooth service. In particular, B-WF2Q+
|
||||||
|
guarantees that each queue receives a fraction of the device
|
||||||
|
throughput proportional to its weight, even if the throughput
|
||||||
|
fluctuates, and regardless of: the device parameters, the current
|
||||||
|
workload and the budgets assigned to the queue.
|
||||||
|
|
||||||
|
- The last, budget-independence, property (although probably
|
||||||
|
counterintuitive in the first place) is definitely beneficial, for
|
||||||
|
the following reasons:
|
||||||
|
|
||||||
|
- First, with any proportional-share scheduler, the maximum
|
||||||
|
deviation with respect to an ideal service is proportional to
|
||||||
|
the maximum budget (slice) assigned to queues. As a consequence,
|
||||||
|
BFQ can keep this deviation tight not only because of the
|
||||||
|
accurate service of B-WF2Q+, but also because BFQ *does not*
|
||||||
|
need to assign a larger budget to a queue to let the queue
|
||||||
|
receive a higher fraction of the device throughput.
|
||||||
|
|
||||||
|
- Second, BFQ is free to choose, for every process (queue), the
|
||||||
|
budget that best fits the needs of the process, or best
|
||||||
|
leverages the I/O pattern of the process. In particular, BFQ
|
||||||
|
updates queue budgets with a simple feedback-loop algorithm that
|
||||||
|
allows a high throughput to be achieved, while still providing
|
||||||
|
tight latency guarantees to time-sensitive applications. When
|
||||||
|
the in-service queue expires, this algorithm computes the next
|
||||||
|
budget of the queue so as to:
|
||||||
|
|
||||||
|
- Let large budgets be eventually assigned to the queues
|
||||||
|
associated with I/O-bound applications performing sequential
|
||||||
|
I/O: in fact, the longer these applications are served once
|
||||||
|
got access to the device, the higher the throughput is.
|
||||||
|
|
||||||
|
- Let small budgets be eventually assigned to the queues
|
||||||
|
associated with time-sensitive applications (which typically
|
||||||
|
perform sporadic and short I/O), because, the smaller the
|
||||||
|
budget assigned to a queue waiting for service is, the sooner
|
||||||
|
B-WF2Q+ will serve that queue (Subsec 3.3 in [2]).
|
||||||
|
|
||||||
|
- If several processes are competing for the device at the same time,
|
||||||
|
but all processes and groups have the same weight, then BFQ
|
||||||
|
guarantees the expected throughput distribution without ever idling
|
||||||
|
the device. It uses preemption instead. Throughput is then much
|
||||||
|
higher in this common scenario.
|
||||||
|
|
||||||
|
- ioprio classes are served in strict priority order, i.e.,
|
||||||
|
lower-priority queues are not served as long as there are
|
||||||
|
higher-priority queues. Among queues in the same class, the
|
||||||
|
bandwidth is distributed in proportion to the weight of each
|
||||||
|
queue. A very thin extra bandwidth is however guaranteed to
|
||||||
|
the Idle class, to prevent it from starving.
|
||||||
|
|
||||||
|
|
||||||
|
3. What are BFQ's tunable?
|
||||||
|
==========================
|
||||||
|
|
||||||
|
The tunables back_seek-max, back_seek_penalty, fifo_expire_async and
|
||||||
|
fifo_expire_sync below are the same as in CFQ. Their description is
|
||||||
|
just copied from that for CFQ. Some considerations in the description
|
||||||
|
of slice_idle are copied from CFQ too.
|
||||||
|
|
||||||
|
per-process ioprio and weight
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Unless the cgroups interface is used (see "4. BFQ group scheduling"),
|
||||||
|
weights can be assigned to processes only indirectly, through I/O
|
||||||
|
priorities, and according to the relation:
|
||||||
|
weight = (IOPRIO_BE_NR - ioprio) * 10.
|
||||||
|
|
||||||
|
Beware that, if low-latency is set, then BFQ automatically raises the
|
||||||
|
weight of the queues associated with interactive and soft real-time
|
||||||
|
applications. Unset this tunable if you need/want to control weights.
|
||||||
|
|
||||||
|
slice_idle
|
||||||
|
----------
|
||||||
|
|
||||||
|
This parameter specifies how long BFQ should idle for next I/O
|
||||||
|
request, when certain sync BFQ queues become empty. By default
|
||||||
|
slice_idle is a non-zero value. Idling has a double purpose: boosting
|
||||||
|
throughput and making sure that the desired throughput distribution is
|
||||||
|
respected (see the description of how BFQ works, and, if needed, the
|
||||||
|
papers referred there).
|
||||||
|
|
||||||
|
As for throughput, idling can be very helpful on highly seeky media
|
||||||
|
like single spindle SATA/SAS disks where we can cut down on overall
|
||||||
|
number of seeks and see improved throughput.
|
||||||
|
|
||||||
|
Setting slice_idle to 0 will remove all the idling on queues and one
|
||||||
|
should see an overall improved throughput on faster storage devices
|
||||||
|
like multiple SATA/SAS disks in hardware RAID configuration.
|
||||||
|
|
||||||
|
So depending on storage and workload, it might be useful to set
|
||||||
|
slice_idle=0. In general for SATA/SAS disks and software RAID of
|
||||||
|
SATA/SAS disks keeping slice_idle enabled should be useful. For any
|
||||||
|
configurations where there are multiple spindles behind single LUN
|
||||||
|
(Host based hardware RAID controller or for storage arrays), setting
|
||||||
|
slice_idle=0 might end up in better throughput and acceptable
|
||||||
|
latencies.
|
||||||
|
|
||||||
|
Idling is however necessary to have service guarantees enforced in
|
||||||
|
case of differentiated weights or differentiated I/O-request lengths.
|
||||||
|
To see why, suppose that a given BFQ queue A must get several I/O
|
||||||
|
requests served for each request served for another queue B. Idling
|
||||||
|
ensures that, if A makes a new I/O request slightly after becoming
|
||||||
|
empty, then no request of B is dispatched in the middle, and thus A
|
||||||
|
does not lose the possibility to get more than one request dispatched
|
||||||
|
before the next request of B is dispatched. Note that idling
|
||||||
|
guarantees the desired differentiated treatment of queues only in
|
||||||
|
terms of I/O-request dispatches. To guarantee that the actual service
|
||||||
|
order then corresponds to the dispatch order, the strict_guarantees
|
||||||
|
tunable must be set too.
|
||||||
|
|
||||||
|
There is an important flipside for idling: apart from the above cases
|
||||||
|
where it is beneficial also for throughput, idling can severely impact
|
||||||
|
throughput. One important case is random workload. Because of this
|
||||||
|
issue, BFQ tends to avoid idling as much as possible, when it is not
|
||||||
|
beneficial also for throughput. As a consequence of this behavior, and
|
||||||
|
of further issues described for the strict_guarantees tunable,
|
||||||
|
short-term service guarantees may be occasionally violated. And, in
|
||||||
|
some cases, these guarantees may be more important than guaranteeing
|
||||||
|
maximum throughput. For example, in video playing/streaming, a very
|
||||||
|
low drop rate may be more important than maximum throughput. In these
|
||||||
|
cases, consider setting the strict_guarantees parameter.
|
||||||
|
|
||||||
|
strict_guarantees
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
If this parameter is set (default: unset), then BFQ
|
||||||
|
|
||||||
|
- always performs idling when the in-service queue becomes empty;
|
||||||
|
|
||||||
|
- forces the device to serve one I/O request at a time, by dispatching a
|
||||||
|
new request only if there is no outstanding request.
|
||||||
|
|
||||||
|
In the presence of differentiated weights or I/O-request sizes, both
|
||||||
|
the above conditions are needed to guarantee that every BFQ queue
|
||||||
|
receives its allotted share of the bandwidth. The first condition is
|
||||||
|
needed for the reasons explained in the description of the slice_idle
|
||||||
|
tunable. The second condition is needed because all modern storage
|
||||||
|
devices reorder internally-queued requests, which may trivially break
|
||||||
|
the service guarantees enforced by the I/O scheduler.
|
||||||
|
|
||||||
|
Setting strict_guarantees may evidently affect throughput.
|
||||||
|
|
||||||
|
back_seek_max
|
||||||
|
-------------
|
||||||
|
|
||||||
|
This specifies, given in Kbytes, the maximum "distance" for backward seeking.
|
||||||
|
The distance is the amount of space from the current head location to the
|
||||||
|
sectors that are backward in terms of distance.
|
||||||
|
|
||||||
|
This parameter allows the scheduler to anticipate requests in the "backward"
|
||||||
|
direction and consider them as being the "next" if they are within this
|
||||||
|
distance from the current head location.
|
||||||
|
|
||||||
|
back_seek_penalty
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
This parameter is used to compute the cost of backward seeking. If the
|
||||||
|
backward distance of request is just 1/back_seek_penalty from a "front"
|
||||||
|
request, then the seeking cost of two requests is considered equivalent.
|
||||||
|
|
||||||
|
So scheduler will not bias toward one or the other request (otherwise scheduler
|
||||||
|
will bias toward front request). Default value of back_seek_penalty is 2.
|
||||||
|
|
||||||
|
fifo_expire_async
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
This parameter is used to set the timeout of asynchronous requests. Default
|
||||||
|
value of this is 248ms.
|
||||||
|
|
||||||
|
fifo_expire_sync
|
||||||
|
----------------
|
||||||
|
|
||||||
|
This parameter is used to set the timeout of synchronous requests. Default
|
||||||
|
value of this is 124ms. In case to favor synchronous requests over asynchronous
|
||||||
|
one, this value should be decreased relative to fifo_expire_async.
|
||||||
|
|
||||||
|
low_latency
|
||||||
|
-----------
|
||||||
|
|
||||||
|
This parameter is used to enable/disable BFQ's low latency mode. By
|
||||||
|
default, low latency mode is enabled. If enabled, interactive and soft
|
||||||
|
real-time applications are privileged and experience a lower latency,
|
||||||
|
as explained in more detail in the description of how BFQ works.
|
||||||
|
|
||||||
|
DO NOT enable this mode if you need full control on bandwidth
|
||||||
|
distribution. In fact, if it is enabled, then BFQ automatically
|
||||||
|
increases the bandwidth share of privileged applications, as the main
|
||||||
|
means to guarantee a lower latency to them.
|
||||||
|
|
||||||
|
timeout_sync
|
||||||
|
------------
|
||||||
|
|
||||||
|
Maximum amount of device time that can be given to a task (queue) once
|
||||||
|
it has been selected for service. On devices with costly seeks,
|
||||||
|
increasing this time usually increases maximum throughput. On the
|
||||||
|
opposite end, increasing this time coarsens the granularity of the
|
||||||
|
short-term bandwidth and latency guarantees, especially if the
|
||||||
|
following parameter is set to zero.
|
||||||
|
|
||||||
|
max_budget
|
||||||
|
----------
|
||||||
|
|
||||||
|
Maximum amount of service, measured in sectors, that can be provided
|
||||||
|
to a BFQ queue once it is set in service (of course within the limits
|
||||||
|
of the above timeout). According to what said in the description of
|
||||||
|
the algorithm, larger values increase the throughput in proportion to
|
||||||
|
the percentage of sequential I/O requests issued. The price of larger
|
||||||
|
values is that they coarsen the granularity of short-term bandwidth
|
||||||
|
and latency guarantees.
|
||||||
|
|
||||||
|
The default value is 0, which enables auto-tuning: BFQ sets max_budget
|
||||||
|
to the maximum number of sectors that can be served during
|
||||||
|
timeout_sync, according to the estimated peak rate.
|
||||||
|
|
||||||
|
weights
|
||||||
|
-------
|
||||||
|
|
||||||
|
Read-only parameter, used to show the weights of the currently active
|
||||||
|
BFQ queues.
|
||||||
|
|
||||||
|
|
||||||
|
wr_ tunables
|
||||||
|
------------
|
||||||
|
|
||||||
|
BFQ exports a few parameters to control/tune the behavior of
|
||||||
|
low-latency heuristics.
|
||||||
|
|
||||||
|
wr_coeff
|
||||||
|
|
||||||
|
Factor by which the weight of a weight-raised queue is multiplied. If
|
||||||
|
the queue is deemed soft real-time, then the weight is further
|
||||||
|
multiplied by an additional, constant factor.
|
||||||
|
|
||||||
|
wr_max_time
|
||||||
|
|
||||||
|
Maximum duration of a weight-raising period for an interactive task
|
||||||
|
(ms). If set to zero (default value), then this value is computed
|
||||||
|
automatically, as a function of the peak rate of the device. In any
|
||||||
|
case, when the value of this parameter is read, it always reports the
|
||||||
|
current duration, regardless of whether it has been set manually or
|
||||||
|
computed automatically.
|
||||||
|
|
||||||
|
wr_max_softrt_rate
|
||||||
|
|
||||||
|
Maximum service rate below which a queue is deemed to be associated
|
||||||
|
with a soft real-time application, and is then weight-raised
|
||||||
|
accordingly (sectors/sec).
|
||||||
|
|
||||||
|
wr_min_idle_time
|
||||||
|
|
||||||
|
Minimum idle period after which interactive weight-raising may be
|
||||||
|
reactivated for a queue (in ms).
|
||||||
|
|
||||||
|
wr_rt_max_time
|
||||||
|
|
||||||
|
Maximum weight-raising duration for soft real-time queues (in ms). The
|
||||||
|
start time from which this duration is considered is automatically
|
||||||
|
moved forward if the queue is detected to be still soft real-time
|
||||||
|
before the current soft real-time weight-raising period finishes.
|
||||||
|
|
||||||
|
wr_min_inter_arr_async
|
||||||
|
|
||||||
|
Minimum period between I/O request arrivals after which weight-raising
|
||||||
|
may be reactivated for an already busy async queue (in ms).
|
||||||
|
|
||||||
|
|
||||||
|
4. Group scheduling with BFQ
|
||||||
|
============================
|
||||||
|
|
||||||
|
BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely
|
||||||
|
blkio and io. In particular, BFQ supports weight-based proportional
|
||||||
|
share. To activate cgroups support, set BFQ_GROUP_IOSCHED.
|
||||||
|
|
||||||
|
4-1 Service guarantees provided
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
With BFQ, proportional share means true proportional share of the
|
||||||
|
device bandwidth, according to group weights. For example, a group
|
||||||
|
with weight 200 gets twice the bandwidth, and not just twice the time,
|
||||||
|
of a group with weight 100.
|
||||||
|
|
||||||
|
BFQ supports hierarchies (group trees) of any depth. Bandwidth is
|
||||||
|
distributed among groups and processes in the expected way: for each
|
||||||
|
group, the children of the group share the whole bandwidth of the
|
||||||
|
group in proportion to their weights. In particular, this implies
|
||||||
|
that, for each leaf group, every process of the group receives the
|
||||||
|
same share of the whole group bandwidth, unless the ioprio of the
|
||||||
|
process is modified.
|
||||||
|
|
||||||
|
The resource-sharing guarantee for a group may partially or totally
|
||||||
|
switch from bandwidth to time, if providing bandwidth guarantees to
|
||||||
|
the group lowers the throughput too much. This switch occurs on a
|
||||||
|
per-process basis: if a process of a leaf group causes throughput loss
|
||||||
|
if served in such a way to receive its share of the bandwidth, then
|
||||||
|
BFQ switches back to just time-based proportional share for that
|
||||||
|
process.
|
||||||
|
|
||||||
|
4-2 Interface
|
||||||
|
-------------
|
||||||
|
|
||||||
|
To get proportional sharing of bandwidth with BFQ for a given device,
|
||||||
|
BFQ must of course be the active scheduler for that device.
|
||||||
|
|
||||||
|
Within each group directory, the names of the files associated with
|
||||||
|
BFQ-specific cgroup parameters and stats begin with the "bfq."
|
||||||
|
prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for
|
||||||
|
BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group
|
||||||
|
parameter to set the weight of a group with BFQ is blkio.bfq.weight
|
||||||
|
or io.bfq.weight.
|
||||||
|
|
||||||
|
Parameters to set
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
For each group, there is only the following parameter to set.
|
||||||
|
|
||||||
|
weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
|
||||||
|
group inside its parent. Available values: 1..10000 (default 100). The
|
||||||
|
linear mapping between ioprio and weights, described at the beginning
|
||||||
|
of the tunable section, is still valid, but all weights higher than
|
||||||
|
IOPRIO_BE_NR*10 are mapped to ioprio 0.
|
||||||
|
|
||||||
|
Recall that, if low-latency is set, then BFQ automatically raises the
|
||||||
|
weight of the queues associated with interactive and soft real-time
|
||||||
|
applications. Unset this tunable if you need/want to control weights.
|
||||||
|
|
||||||
|
|
||||||
|
[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
|
||||||
|
Scheduler", Proceedings of the First Workshop on Mobile System
|
||||||
|
Technologies (MST-2015), May 2015.
|
||||||
|
http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf
|
||||||
|
|
||||||
|
[2] P. Valente and M. Andreolini, "Improving Application
|
||||||
|
Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of
|
||||||
|
the 5th Annual International Systems and Storage Conference
|
||||||
|
(SYSTOR '12), June 2012.
|
||||||
|
Slightly extended version:
|
||||||
|
http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-
|
||||||
|
results.pdf
|
14
Documentation/block/kyber-iosched.txt
Normal file
14
Documentation/block/kyber-iosched.txt
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
Kyber I/O scheduler tunables
|
||||||
|
===========================
|
||||||
|
|
||||||
|
The only two tunables for the Kyber scheduler are the target latencies for
|
||||||
|
reads and synchronous writes. Kyber will throttle requests in order to meet
|
||||||
|
these target latencies.
|
||||||
|
|
||||||
|
read_lat_nsec
|
||||||
|
-------------
|
||||||
|
Target latency for reads (in nanoseconds).
|
||||||
|
|
||||||
|
write_lat_nsec
|
||||||
|
--------------
|
||||||
|
Target latency for synchronous writes (in nanoseconds).
|
|
@ -43,11 +43,6 @@ large discards are issued, setting this value lower will make Linux issue
|
||||||
smaller discards and potentially help reduce latencies induced by large
|
smaller discards and potentially help reduce latencies induced by large
|
||||||
discard operations.
|
discard operations.
|
||||||
|
|
||||||
discard_zeroes_data (RO)
|
|
||||||
------------------------
|
|
||||||
When read, this file will show if the discarded block are zeroed by the
|
|
||||||
device or not. If its value is '1' the blocks are zeroed otherwise not.
|
|
||||||
|
|
||||||
hw_sector_size (RO)
|
hw_sector_size (RO)
|
||||||
-------------------
|
-------------------
|
||||||
This is the hardware sector size of the device, in bytes.
|
This is the hardware sector size of the device, in bytes.
|
||||||
|
@ -192,5 +187,11 @@ scaling back writes. Writing a value of '0' to this file disables the
|
||||||
feature. Writing a value of '-1' to this file resets the value to the
|
feature. Writing a value of '-1' to this file resets the value to the
|
||||||
default setting.
|
default setting.
|
||||||
|
|
||||||
|
throttle_sample_time (RW)
|
||||||
|
-------------------------
|
||||||
|
This is the time window that blk-throttle samples data, in millisecond.
|
||||||
|
blk-throttle makes decision based on the samplings. Lower time means cgroups
|
||||||
|
have more smooth throughput, but higher CPU overhead. This exists only when
|
||||||
|
CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
|
||||||
|
|
||||||
Jens Axboe <jens.axboe@oracle.com>, February 2009
|
Jens Axboe <jens.axboe@oracle.com>, February 2009
|
||||||
|
|
|
@ -1,84 +0,0 @@
|
||||||
This document describes m[g]flash support in linux.
|
|
||||||
|
|
||||||
Contents
|
|
||||||
1. Overview
|
|
||||||
2. Reserved area configuration
|
|
||||||
3. Example of mflash platform driver registration
|
|
||||||
|
|
||||||
1. Overview
|
|
||||||
|
|
||||||
Mflash and gflash are embedded flash drive. The only difference is mflash is
|
|
||||||
MCP(Multi Chip Package) device. These two device operate exactly same way.
|
|
||||||
So the rest mflash repersents mflash and gflash altogether.
|
|
||||||
|
|
||||||
Internally, mflash has nand flash and other hardware logics and supports
|
|
||||||
2 different operation (ATA, IO) modes. ATA mode doesn't need any new
|
|
||||||
driver and currently works well under standard IDE subsystem. Actually it's
|
|
||||||
one chip SSD. IO mode is ATA-like custom mode for the host that doesn't have
|
|
||||||
IDE interface.
|
|
||||||
|
|
||||||
Following are brief descriptions about IO mode.
|
|
||||||
A. IO mode based on ATA protocol and uses some custom command. (read confirm,
|
|
||||||
write confirm)
|
|
||||||
B. IO mode uses SRAM bus interface.
|
|
||||||
C. IO mode supports 4kB boot area, so host can boot from mflash.
|
|
||||||
|
|
||||||
2. Reserved area configuration
|
|
||||||
If host boot from mflash, usually needs raw area for boot loader image. All of
|
|
||||||
the mflash's block device operation will be taken this value as start offset.
|
|
||||||
Note that boot loader's size of reserved area and kernel configuration value
|
|
||||||
must be same.
|
|
||||||
|
|
||||||
3. Example of mflash platform driver registration
|
|
||||||
Working mflash is very straight forward. Adding platform device stuff to board
|
|
||||||
configuration file is all. Here is some pseudo example.
|
|
||||||
|
|
||||||
static struct mg_drv_data mflash_drv_data = {
|
|
||||||
/* If you want to polling driver set to 1 */
|
|
||||||
.use_polling = 0,
|
|
||||||
/* device attribution */
|
|
||||||
.dev_attr = MG_BOOT_DEV
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct resource mg_mflash_rsc[] = {
|
|
||||||
/* Base address of mflash */
|
|
||||||
[0] = {
|
|
||||||
.start = 0x08000000,
|
|
||||||
.end = 0x08000000 + SZ_64K - 1,
|
|
||||||
.flags = IORESOURCE_MEM
|
|
||||||
},
|
|
||||||
/* mflash interrupt pin */
|
|
||||||
[1] = {
|
|
||||||
.start = IRQ_GPIO(84),
|
|
||||||
.end = IRQ_GPIO(84),
|
|
||||||
.flags = IORESOURCE_IRQ
|
|
||||||
},
|
|
||||||
/* mflash reset pin */
|
|
||||||
[2] = {
|
|
||||||
.start = 43,
|
|
||||||
.end = 43,
|
|
||||||
.name = MG_RST_PIN,
|
|
||||||
.flags = IORESOURCE_IO
|
|
||||||
},
|
|
||||||
/* mflash reset-out pin
|
|
||||||
* If you use mflash as storage device (i.e. other than MG_BOOT_DEV),
|
|
||||||
* should assign this */
|
|
||||||
[3] = {
|
|
||||||
.start = 51,
|
|
||||||
.end = 51,
|
|
||||||
.name = MG_RSTOUT_PIN,
|
|
||||||
.flags = IORESOURCE_IO
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct platform_device mflash_dev = {
|
|
||||||
.name = MG_DEV_NAME,
|
|
||||||
.id = -1,
|
|
||||||
.dev = {
|
|
||||||
.platform_data = &mflash_drv_data,
|
|
||||||
},
|
|
||||||
.num_resources = ARRAY_SIZE(mg_mflash_rsc),
|
|
||||||
.resource = mg_mflash_rsc
|
|
||||||
};
|
|
||||||
|
|
||||||
platform_device_register(&mflash_dev);
|
|
21
Documentation/lightnvm/pblk.txt
Normal file
21
Documentation/lightnvm/pblk.txt
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
pblk: Physical Block Device Target
|
||||||
|
==================================
|
||||||
|
|
||||||
|
pblk implements a fully associative, host-based FTL that exposes a traditional
|
||||||
|
block I/O interface. Its primary responsibilities are:
|
||||||
|
|
||||||
|
- Map logical addresses onto physical addresses (4KB granularity) in a
|
||||||
|
logical-to-physical (L2P) table.
|
||||||
|
- Maintain the integrity and consistency of the L2P table as well as its
|
||||||
|
recovery from normal tear down and power outage.
|
||||||
|
- Deal with controller- and media-specific constrains.
|
||||||
|
- Handle I/O errors.
|
||||||
|
- Implement garbage collection.
|
||||||
|
- Maintain consistency across the I/O stack during synchronization points.
|
||||||
|
|
||||||
|
For more information please refer to:
|
||||||
|
|
||||||
|
http://lightnvm.io
|
||||||
|
|
||||||
|
which maintains updated FAQs, manual pages, technical documentation, tools,
|
||||||
|
contacts, etc.
|
|
@ -2544,6 +2544,14 @@ F: block/
|
||||||
F: kernel/trace/blktrace.c
|
F: kernel/trace/blktrace.c
|
||||||
F: lib/sbitmap.c
|
F: lib/sbitmap.c
|
||||||
|
|
||||||
|
BFQ I/O SCHEDULER
|
||||||
|
M: Paolo Valente <paolo.valente@linaro.org>
|
||||||
|
M: Jens Axboe <axboe@kernel.dk>
|
||||||
|
L: linux-block@vger.kernel.org
|
||||||
|
S: Maintained
|
||||||
|
F: block/bfq-*
|
||||||
|
F: Documentation/block/bfq-iosched.txt
|
||||||
|
|
||||||
BLOCK2MTD DRIVER
|
BLOCK2MTD DRIVER
|
||||||
M: Joern Engel <joern@lazybastard.org>
|
M: Joern Engel <joern@lazybastard.org>
|
||||||
L: linux-mtd@lists.infradead.org
|
L: linux-mtd@lists.infradead.org
|
||||||
|
|
|
@ -115,6 +115,18 @@ config BLK_DEV_THROTTLING
|
||||||
|
|
||||||
See Documentation/cgroups/blkio-controller.txt for more information.
|
See Documentation/cgroups/blkio-controller.txt for more information.
|
||||||
|
|
||||||
|
config BLK_DEV_THROTTLING_LOW
|
||||||
|
bool "Block throttling .low limit interface support (EXPERIMENTAL)"
|
||||||
|
depends on BLK_DEV_THROTTLING
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
Add .low limit interface for block throttling. The low limit is a best
|
||||||
|
effort limit to prioritize cgroups. Depending on the setting, the limit
|
||||||
|
can be used to protect cgroups in terms of bandwidth/iops and better
|
||||||
|
utilize disk resource.
|
||||||
|
|
||||||
|
Note, this is an experimental interface and could be changed someday.
|
||||||
|
|
||||||
config BLK_CMDLINE_PARSER
|
config BLK_CMDLINE_PARSER
|
||||||
bool "Block device command line partition parser"
|
bool "Block device command line partition parser"
|
||||||
default n
|
default n
|
||||||
|
|
|
@ -40,6 +40,7 @@ config CFQ_GROUP_IOSCHED
|
||||||
Enable group IO scheduling in CFQ.
|
Enable group IO scheduling in CFQ.
|
||||||
|
|
||||||
choice
|
choice
|
||||||
|
|
||||||
prompt "Default I/O scheduler"
|
prompt "Default I/O scheduler"
|
||||||
default DEFAULT_CFQ
|
default DEFAULT_CFQ
|
||||||
help
|
help
|
||||||
|
@ -69,6 +70,35 @@ config MQ_IOSCHED_DEADLINE
|
||||||
---help---
|
---help---
|
||||||
MQ version of the deadline IO scheduler.
|
MQ version of the deadline IO scheduler.
|
||||||
|
|
||||||
|
config MQ_IOSCHED_KYBER
|
||||||
|
tristate "Kyber I/O scheduler"
|
||||||
|
default y
|
||||||
|
---help---
|
||||||
|
The Kyber I/O scheduler is a low-overhead scheduler suitable for
|
||||||
|
multiqueue and other fast devices. Given target latencies for reads and
|
||||||
|
synchronous writes, it will self-tune queue depths to achieve that
|
||||||
|
goal.
|
||||||
|
|
||||||
|
config IOSCHED_BFQ
|
||||||
|
tristate "BFQ I/O scheduler"
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of
|
||||||
|
of the device among all processes according to their weights,
|
||||||
|
regardless of the device parameters and with any workload. It
|
||||||
|
also guarantees a low latency to interactive and soft
|
||||||
|
real-time applications. Details in
|
||||||
|
Documentation/block/bfq-iosched.txt
|
||||||
|
|
||||||
|
config BFQ_GROUP_IOSCHED
|
||||||
|
bool "BFQ hierarchical scheduling support"
|
||||||
|
depends on IOSCHED_BFQ && BLK_CGROUP
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
|
||||||
|
Enable hierarchical scheduling in BFQ, using the blkio
|
||||||
|
(cgroups-v1) or io (cgroups-v2) controller.
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -20,6 +20,9 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
|
||||||
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
|
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
|
||||||
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
|
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
|
||||||
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
|
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
|
||||||
|
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
||||||
|
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
||||||
|
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
|
||||||
|
|
||||||
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
|
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
|
||||||
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
|
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
|
||||||
|
|
1139
block/bfq-cgroup.c
Normal file
1139
block/bfq-cgroup.c
Normal file
File diff suppressed because it is too large
Load diff
5047
block/bfq-iosched.c
Normal file
5047
block/bfq-iosched.c
Normal file
File diff suppressed because it is too large
Load diff
941
block/bfq-iosched.h
Normal file
941
block/bfq-iosched.h
Normal file
|
@ -0,0 +1,941 @@
|
||||||
|
/*
|
||||||
|
* Header file for the BFQ I/O scheduler: data structures and
|
||||||
|
* prototypes of interface functions among BFQ components.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License as
|
||||||
|
* published by the Free Software Foundation; either version 2 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*/
|
||||||
|
#ifndef _BFQ_H
|
||||||
|
#define _BFQ_H
|
||||||
|
|
||||||
|
#include <linux/blktrace_api.h>
|
||||||
|
#include <linux/hrtimer.h>
|
||||||
|
#include <linux/blk-cgroup.h>
|
||||||
|
|
||||||
|
#define BFQ_IOPRIO_CLASSES 3
|
||||||
|
#define BFQ_CL_IDLE_TIMEOUT (HZ/5)
|
||||||
|
|
||||||
|
#define BFQ_MIN_WEIGHT 1
|
||||||
|
#define BFQ_MAX_WEIGHT 1000
|
||||||
|
#define BFQ_WEIGHT_CONVERSION_COEFF 10
|
||||||
|
|
||||||
|
#define BFQ_DEFAULT_QUEUE_IOPRIO 4
|
||||||
|
|
||||||
|
#define BFQ_WEIGHT_LEGACY_DFL 100
|
||||||
|
#define BFQ_DEFAULT_GRP_IOPRIO 0
|
||||||
|
#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Soft real-time applications are extremely more latency sensitive
|
||||||
|
* than interactive ones. Over-raise the weight of the former to
|
||||||
|
* privilege them against the latter.
|
||||||
|
*/
|
||||||
|
#define BFQ_SOFTRT_WEIGHT_FACTOR 100
|
||||||
|
|
||||||
|
struct bfq_entity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_service_tree - per ioprio_class service tree.
|
||||||
|
*
|
||||||
|
* Each service tree represents a B-WF2Q+ scheduler on its own. Each
|
||||||
|
* ioprio_class has its own independent scheduler, and so its own
|
||||||
|
* bfq_service_tree. All the fields are protected by the queue lock
|
||||||
|
* of the containing bfqd.
|
||||||
|
*/
|
||||||
|
struct bfq_service_tree {
|
||||||
|
/* tree for active entities (i.e., those backlogged) */
|
||||||
|
struct rb_root active;
|
||||||
|
/* tree for idle entities (i.e., not backlogged, with V <= F_i)*/
|
||||||
|
struct rb_root idle;
|
||||||
|
|
||||||
|
/* idle entity with minimum F_i */
|
||||||
|
struct bfq_entity *first_idle;
|
||||||
|
/* idle entity with maximum F_i */
|
||||||
|
struct bfq_entity *last_idle;
|
||||||
|
|
||||||
|
/* scheduler virtual time */
|
||||||
|
u64 vtime;
|
||||||
|
/* scheduler weight sum; active and idle entities contribute to it */
|
||||||
|
unsigned long wsum;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_sched_data - multi-class scheduler.
|
||||||
|
*
|
||||||
|
* bfq_sched_data is the basic scheduler queue. It supports three
|
||||||
|
* ioprio_classes, and can be used either as a toplevel queue or as an
|
||||||
|
* intermediate queue on a hierarchical setup. @next_in_service
|
||||||
|
* points to the active entity of the sched_data service trees that
|
||||||
|
* will be scheduled next. It is used to reduce the number of steps
|
||||||
|
* needed for each hierarchical-schedule update.
|
||||||
|
*
|
||||||
|
* The supported ioprio_classes are the same as in CFQ, in descending
|
||||||
|
* priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
|
||||||
|
* Requests from higher priority queues are served before all the
|
||||||
|
* requests from lower priority queues; among requests of the same
|
||||||
|
* queue requests are served according to B-WF2Q+.
|
||||||
|
* All the fields are protected by the queue lock of the containing bfqd.
|
||||||
|
*/
|
||||||
|
struct bfq_sched_data {
|
||||||
|
/* entity in service */
|
||||||
|
struct bfq_entity *in_service_entity;
|
||||||
|
/* head-of-line entity (see comments above) */
|
||||||
|
struct bfq_entity *next_in_service;
|
||||||
|
/* array of service trees, one per ioprio_class */
|
||||||
|
struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
|
||||||
|
/* last time CLASS_IDLE was served */
|
||||||
|
unsigned long bfq_class_idle_last_service;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_weight_counter - counter of the number of all active entities
|
||||||
|
* with a given weight.
|
||||||
|
*/
|
||||||
|
struct bfq_weight_counter {
|
||||||
|
unsigned int weight; /* weight of the entities this counter refers to */
|
||||||
|
unsigned int num_active; /* nr of active entities with this weight */
|
||||||
|
/*
|
||||||
|
* Weights tree member (see bfq_data's @queue_weights_tree and
|
||||||
|
* @group_weights_tree)
|
||||||
|
*/
|
||||||
|
struct rb_node weights_node;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_entity - schedulable entity.
|
||||||
|
*
|
||||||
|
* A bfq_entity is used to represent either a bfq_queue (leaf node in the
|
||||||
|
* cgroup hierarchy) or a bfq_group into the upper level scheduler. Each
|
||||||
|
* entity belongs to the sched_data of the parent group in the cgroup
|
||||||
|
* hierarchy. Non-leaf entities have also their own sched_data, stored
|
||||||
|
* in @my_sched_data.
|
||||||
|
*
|
||||||
|
* Each entity stores independently its priority values; this would
|
||||||
|
* allow different weights on different devices, but this
|
||||||
|
* functionality is not exported to userspace by now. Priorities and
|
||||||
|
* weights are updated lazily, first storing the new values into the
|
||||||
|
* new_* fields, then setting the @prio_changed flag. As soon as
|
||||||
|
* there is a transition in the entity state that allows the priority
|
||||||
|
* update to take place the effective and the requested priority
|
||||||
|
* values are synchronized.
|
||||||
|
*
|
||||||
|
* Unless cgroups are used, the weight value is calculated from the
|
||||||
|
* ioprio to export the same interface as CFQ. When dealing with
|
||||||
|
* ``well-behaved'' queues (i.e., queues that do not spend too much
|
||||||
|
* time to consume their budget and have true sequential behavior, and
|
||||||
|
* when there are no external factors breaking anticipation) the
|
||||||
|
* relative weights at each level of the cgroups hierarchy should be
|
||||||
|
* guaranteed. All the fields are protected by the queue lock of the
|
||||||
|
* containing bfqd.
|
||||||
|
*/
|
||||||
|
struct bfq_entity {
|
||||||
|
/* service_tree member */
|
||||||
|
struct rb_node rb_node;
|
||||||
|
/* pointer to the weight counter associated with this entity */
|
||||||
|
struct bfq_weight_counter *weight_counter;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flag, true if the entity is on a tree (either the active or
|
||||||
|
* the idle one of its service_tree) or is in service.
|
||||||
|
*/
|
||||||
|
bool on_st;
|
||||||
|
|
||||||
|
/* B-WF2Q+ start and finish timestamps [sectors/weight] */
|
||||||
|
u64 start, finish;
|
||||||
|
|
||||||
|
/* tree the entity is enqueued into; %NULL if not on a tree */
|
||||||
|
struct rb_root *tree;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* minimum start time of the (active) subtree rooted at this
|
||||||
|
* entity; used for O(log N) lookups into active trees
|
||||||
|
*/
|
||||||
|
u64 min_start;
|
||||||
|
|
||||||
|
/* amount of service received during the last service slot */
|
||||||
|
int service;
|
||||||
|
|
||||||
|
/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
|
||||||
|
int budget;
|
||||||
|
|
||||||
|
/* weight of the queue */
|
||||||
|
int weight;
|
||||||
|
/* next weight if a change is in progress */
|
||||||
|
int new_weight;
|
||||||
|
|
||||||
|
/* original weight, used to implement weight boosting */
|
||||||
|
int orig_weight;
|
||||||
|
|
||||||
|
/* parent entity, for hierarchical scheduling */
|
||||||
|
struct bfq_entity *parent;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For non-leaf nodes in the hierarchy, the associated
|
||||||
|
* scheduler queue, %NULL on leaf nodes.
|
||||||
|
*/
|
||||||
|
struct bfq_sched_data *my_sched_data;
|
||||||
|
/* the scheduler queue this entity belongs to */
|
||||||
|
struct bfq_sched_data *sched_data;
|
||||||
|
|
||||||
|
/* flag, set to request a weight, ioprio or ioprio_class change */
|
||||||
|
int prio_changed;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bfq_group;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_ttime - per process thinktime stats.
|
||||||
|
*/
|
||||||
|
struct bfq_ttime {
|
||||||
|
/* completion time of the last request */
|
||||||
|
u64 last_end_request;
|
||||||
|
|
||||||
|
/* total process thinktime */
|
||||||
|
u64 ttime_total;
|
||||||
|
/* number of thinktime samples */
|
||||||
|
unsigned long ttime_samples;
|
||||||
|
/* average process thinktime */
|
||||||
|
u64 ttime_mean;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_queue - leaf schedulable entity.
|
||||||
|
*
|
||||||
|
* A bfq_queue is a leaf request queue; it can be associated with an
|
||||||
|
* io_context or more, if it is async or shared between cooperating
|
||||||
|
* processes. @cgroup holds a reference to the cgroup, to be sure that it
|
||||||
|
* does not disappear while a bfqq still references it (mostly to avoid
|
||||||
|
* races between request issuing and task migration followed by cgroup
|
||||||
|
* destruction).
|
||||||
|
* All the fields are protected by the queue lock of the containing bfqd.
|
||||||
|
*/
|
||||||
|
struct bfq_queue {
|
||||||
|
/* reference counter */
|
||||||
|
int ref;
|
||||||
|
/* parent bfq_data */
|
||||||
|
struct bfq_data *bfqd;
|
||||||
|
|
||||||
|
/* current ioprio and ioprio class */
|
||||||
|
unsigned short ioprio, ioprio_class;
|
||||||
|
/* next ioprio and ioprio class if a change is in progress */
|
||||||
|
unsigned short new_ioprio, new_ioprio_class;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Shared bfq_queue if queue is cooperating with one or more
|
||||||
|
* other queues.
|
||||||
|
*/
|
||||||
|
struct bfq_queue *new_bfqq;
|
||||||
|
/* request-position tree member (see bfq_group's @rq_pos_tree) */
|
||||||
|
struct rb_node pos_node;
|
||||||
|
/* request-position tree root (see bfq_group's @rq_pos_tree) */
|
||||||
|
struct rb_root *pos_root;
|
||||||
|
|
||||||
|
/* sorted list of pending requests */
|
||||||
|
struct rb_root sort_list;
|
||||||
|
/* if fifo isn't expired, next request to serve */
|
||||||
|
struct request *next_rq;
|
||||||
|
/* number of sync and async requests queued */
|
||||||
|
int queued[2];
|
||||||
|
/* number of requests currently allocated */
|
||||||
|
int allocated;
|
||||||
|
/* number of pending metadata requests */
|
||||||
|
int meta_pending;
|
||||||
|
/* fifo list of requests in sort_list */
|
||||||
|
struct list_head fifo;
|
||||||
|
|
||||||
|
/* entity representing this queue in the scheduler */
|
||||||
|
struct bfq_entity entity;
|
||||||
|
|
||||||
|
/* maximum budget allowed from the feedback mechanism */
|
||||||
|
int max_budget;
|
||||||
|
/* budget expiration (in jiffies) */
|
||||||
|
unsigned long budget_timeout;
|
||||||
|
|
||||||
|
/* number of requests on the dispatch list or inside driver */
|
||||||
|
int dispatched;
|
||||||
|
|
||||||
|
/* status flags */
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
/* node for active/idle bfqq list inside parent bfqd */
|
||||||
|
struct list_head bfqq_list;
|
||||||
|
|
||||||
|
/* associated @bfq_ttime struct */
|
||||||
|
struct bfq_ttime ttime;
|
||||||
|
|
||||||
|
/* bit vector: a 1 for each seeky requests in history */
|
||||||
|
u32 seek_history;
|
||||||
|
|
||||||
|
/* node for the device's burst list */
|
||||||
|
struct hlist_node burst_list_node;
|
||||||
|
|
||||||
|
/* position of the last request enqueued */
|
||||||
|
sector_t last_request_pos;
|
||||||
|
|
||||||
|
/* Number of consecutive pairs of request completion and
|
||||||
|
* arrival, such that the queue becomes idle after the
|
||||||
|
* completion, but the next request arrives within an idle
|
||||||
|
* time slice; used only if the queue's IO_bound flag has been
|
||||||
|
* cleared.
|
||||||
|
*/
|
||||||
|
unsigned int requests_within_timer;
|
||||||
|
|
||||||
|
/* pid of the process owning the queue, used for logging purposes */
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
|
||||||
|
* if the queue is shared.
|
||||||
|
*/
|
||||||
|
struct bfq_io_cq *bic;
|
||||||
|
|
||||||
|
/* current maximum weight-raising time for this queue */
|
||||||
|
unsigned long wr_cur_max_time;
|
||||||
|
/*
|
||||||
|
* Minimum time instant such that, only if a new request is
|
||||||
|
* enqueued after this time instant in an idle @bfq_queue with
|
||||||
|
* no outstanding requests, then the task associated with the
|
||||||
|
* queue it is deemed as soft real-time (see the comments on
|
||||||
|
* the function bfq_bfqq_softrt_next_start())
|
||||||
|
*/
|
||||||
|
unsigned long soft_rt_next_start;
|
||||||
|
/*
|
||||||
|
* Start time of the current weight-raising period if
|
||||||
|
* the @bfq-queue is being weight-raised, otherwise
|
||||||
|
* finish time of the last weight-raising period.
|
||||||
|
*/
|
||||||
|
unsigned long last_wr_start_finish;
|
||||||
|
/* factor by which the weight of this queue is multiplied */
|
||||||
|
unsigned int wr_coeff;
|
||||||
|
/*
|
||||||
|
* Time of the last transition of the @bfq_queue from idle to
|
||||||
|
* backlogged.
|
||||||
|
*/
|
||||||
|
unsigned long last_idle_bklogged;
|
||||||
|
/*
|
||||||
|
* Cumulative service received from the @bfq_queue since the
|
||||||
|
* last transition from idle to backlogged.
|
||||||
|
*/
|
||||||
|
unsigned long service_from_backlogged;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Value of wr start time when switching to soft rt
|
||||||
|
*/
|
||||||
|
unsigned long wr_start_at_switch_to_srt;
|
||||||
|
|
||||||
|
unsigned long split_time; /* time of last split */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_io_cq - per (request_queue, io_context) structure.
|
||||||
|
*/
|
||||||
|
struct bfq_io_cq {
|
||||||
|
/* associated io_cq structure */
|
||||||
|
struct io_cq icq; /* must be the first member */
|
||||||
|
/* array of two process queues, the sync and the async */
|
||||||
|
struct bfq_queue *bfqq[2];
|
||||||
|
/* per (request_queue, blkcg) ioprio */
|
||||||
|
int ioprio;
|
||||||
|
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||||
|
uint64_t blkcg_serial_nr; /* the current blkcg serial */
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* Snapshot of the idle window before merging; taken to
|
||||||
|
* remember this value while the queue is merged, so as to be
|
||||||
|
* able to restore it in case of split.
|
||||||
|
*/
|
||||||
|
bool saved_idle_window;
|
||||||
|
/*
|
||||||
|
* Same purpose as the previous two fields for the I/O bound
|
||||||
|
* classification of a queue.
|
||||||
|
*/
|
||||||
|
bool saved_IO_bound;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Same purpose as the previous fields for the value of the
|
||||||
|
* field keeping the queue's belonging to a large burst
|
||||||
|
*/
|
||||||
|
bool saved_in_large_burst;
|
||||||
|
/*
|
||||||
|
* True if the queue belonged to a burst list before its merge
|
||||||
|
* with another cooperating queue.
|
||||||
|
*/
|
||||||
|
bool was_in_burst_list;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Similar to previous fields: save wr information.
|
||||||
|
*/
|
||||||
|
unsigned long saved_wr_coeff;
|
||||||
|
unsigned long saved_last_wr_start_finish;
|
||||||
|
unsigned long saved_wr_start_at_switch_to_srt;
|
||||||
|
unsigned int saved_wr_cur_max_time;
|
||||||
|
struct bfq_ttime saved_ttime;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum bfq_device_speed {
|
||||||
|
BFQ_BFQD_FAST,
|
||||||
|
BFQ_BFQD_SLOW,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_data - per-device data structure.
|
||||||
|
*
|
||||||
|
* All the fields are protected by @lock.
|
||||||
|
*/
|
||||||
|
struct bfq_data {
|
||||||
|
/* device request queue */
|
||||||
|
struct request_queue *queue;
|
||||||
|
/* dispatch queue */
|
||||||
|
struct list_head dispatch;
|
||||||
|
|
||||||
|
/* root bfq_group for the device */
|
||||||
|
struct bfq_group *root_group;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* rbtree of weight counters of @bfq_queues, sorted by
|
||||||
|
* weight. Used to keep track of whether all @bfq_queues have
|
||||||
|
* the same weight. The tree contains one counter for each
|
||||||
|
* distinct weight associated to some active and not
|
||||||
|
* weight-raised @bfq_queue (see the comments to the functions
|
||||||
|
* bfq_weights_tree_[add|remove] for further details).
|
||||||
|
*/
|
||||||
|
struct rb_root queue_weights_tree;
|
||||||
|
/*
|
||||||
|
* rbtree of non-queue @bfq_entity weight counters, sorted by
|
||||||
|
* weight. Used to keep track of whether all @bfq_groups have
|
||||||
|
* the same weight. The tree contains one counter for each
|
||||||
|
* distinct weight associated to some active @bfq_group (see
|
||||||
|
* the comments to the functions bfq_weights_tree_[add|remove]
|
||||||
|
* for further details).
|
||||||
|
*/
|
||||||
|
struct rb_root group_weights_tree;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of bfq_queues containing requests (including the
|
||||||
|
* queue in service, even if it is idling).
|
||||||
|
*/
|
||||||
|
int busy_queues;
|
||||||
|
/* number of weight-raised busy @bfq_queues */
|
||||||
|
int wr_busy_queues;
|
||||||
|
/* number of queued requests */
|
||||||
|
int queued;
|
||||||
|
/* number of requests dispatched and waiting for completion */
|
||||||
|
int rq_in_driver;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum number of requests in driver in the last
|
||||||
|
* @hw_tag_samples completed requests.
|
||||||
|
*/
|
||||||
|
int max_rq_in_driver;
|
||||||
|
/* number of samples used to calculate hw_tag */
|
||||||
|
int hw_tag_samples;
|
||||||
|
/* flag set to one if the driver is showing a queueing behavior */
|
||||||
|
int hw_tag;
|
||||||
|
|
||||||
|
/* number of budgets assigned */
|
||||||
|
int budgets_assigned;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Timer set when idling (waiting) for the next request from
|
||||||
|
* the queue in service.
|
||||||
|
*/
|
||||||
|
struct hrtimer idle_slice_timer;
|
||||||
|
|
||||||
|
/* bfq_queue in service */
|
||||||
|
struct bfq_queue *in_service_queue;
|
||||||
|
|
||||||
|
/* on-disk position of the last served request */
|
||||||
|
sector_t last_position;
|
||||||
|
|
||||||
|
/* time of last request completion (ns) */
|
||||||
|
u64 last_completion;
|
||||||
|
|
||||||
|
/* time of first rq dispatch in current observation interval (ns) */
|
||||||
|
u64 first_dispatch;
|
||||||
|
/* time of last rq dispatch in current observation interval (ns) */
|
||||||
|
u64 last_dispatch;
|
||||||
|
|
||||||
|
/* beginning of the last budget */
|
||||||
|
ktime_t last_budget_start;
|
||||||
|
/* beginning of the last idle slice */
|
||||||
|
ktime_t last_idling_start;
|
||||||
|
|
||||||
|
/* number of samples in current observation interval */
|
||||||
|
int peak_rate_samples;
|
||||||
|
/* num of samples of seq dispatches in current observation interval */
|
||||||
|
u32 sequential_samples;
|
||||||
|
/* total num of sectors transferred in current observation interval */
|
||||||
|
u64 tot_sectors_dispatched;
|
||||||
|
/* max rq size seen during current observation interval (sectors) */
|
||||||
|
u32 last_rq_max_size;
|
||||||
|
/* time elapsed from first dispatch in current observ. interval (us) */
|
||||||
|
u64 delta_from_first;
|
||||||
|
/*
|
||||||
|
* Current estimate of the device peak rate, measured in
|
||||||
|
* [BFQ_RATE_SHIFT * sectors/usec]. The left-shift by
|
||||||
|
* BFQ_RATE_SHIFT is performed to increase precision in
|
||||||
|
* fixed-point calculations.
|
||||||
|
*/
|
||||||
|
u32 peak_rate;
|
||||||
|
|
||||||
|
/* maximum budget allotted to a bfq_queue before rescheduling */
|
||||||
|
int bfq_max_budget;
|
||||||
|
|
||||||
|
/* list of all the bfq_queues active on the device */
|
||||||
|
struct list_head active_list;
|
||||||
|
/* list of all the bfq_queues idle on the device */
|
||||||
|
struct list_head idle_list;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Timeout for async/sync requests; when it fires, requests
|
||||||
|
* are served in fifo order.
|
||||||
|
*/
|
||||||
|
u64 bfq_fifo_expire[2];
|
||||||
|
/* weight of backward seeks wrt forward ones */
|
||||||
|
unsigned int bfq_back_penalty;
|
||||||
|
/* maximum allowed backward seek */
|
||||||
|
unsigned int bfq_back_max;
|
||||||
|
/* maximum idling time */
|
||||||
|
u32 bfq_slice_idle;
|
||||||
|
|
||||||
|
/* user-configured max budget value (0 for auto-tuning) */
|
||||||
|
int bfq_user_max_budget;
|
||||||
|
/*
|
||||||
|
* Timeout for bfq_queues to consume their budget; used to
|
||||||
|
* prevent seeky queues from imposing long latencies to
|
||||||
|
* sequential or quasi-sequential ones (this also implies that
|
||||||
|
* seeky queues cannot receive guarantees in the service
|
||||||
|
* domain; after a timeout they are charged for the time they
|
||||||
|
* have been in service, to preserve fairness among them, but
|
||||||
|
* without service-domain guarantees).
|
||||||
|
*/
|
||||||
|
unsigned int bfq_timeout;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of consecutive requests that must be issued within
|
||||||
|
* the idle time slice to set again idling to a queue which
|
||||||
|
* was marked as non-I/O-bound (see the definition of the
|
||||||
|
* IO_bound flag for further details).
|
||||||
|
*/
|
||||||
|
unsigned int bfq_requests_within_timer;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Force device idling whenever needed to provide accurate
|
||||||
|
* service guarantees, without caring about throughput
|
||||||
|
* issues. CAVEAT: this may even increase latencies, in case
|
||||||
|
* of useless idling for processes that did stop doing I/O.
|
||||||
|
*/
|
||||||
|
bool strict_guarantees;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Last time at which a queue entered the current burst of
|
||||||
|
* queues being activated shortly after each other; for more
|
||||||
|
* details about this and the following parameters related to
|
||||||
|
* a burst of activations, see the comments on the function
|
||||||
|
* bfq_handle_burst.
|
||||||
|
*/
|
||||||
|
unsigned long last_ins_in_burst;
|
||||||
|
/*
|
||||||
|
* Reference time interval used to decide whether a queue has
|
||||||
|
* been activated shortly after @last_ins_in_burst.
|
||||||
|
*/
|
||||||
|
unsigned long bfq_burst_interval;
|
||||||
|
/* number of queues in the current burst of queue activations */
|
||||||
|
int burst_size;
|
||||||
|
|
||||||
|
/* common parent entity for the queues in the burst */
|
||||||
|
struct bfq_entity *burst_parent_entity;
|
||||||
|
/* Maximum burst size above which the current queue-activation
|
||||||
|
* burst is deemed as 'large'.
|
||||||
|
*/
|
||||||
|
unsigned long bfq_large_burst_thresh;
|
||||||
|
/* true if a large queue-activation burst is in progress */
|
||||||
|
bool large_burst;
|
||||||
|
/*
|
||||||
|
* Head of the burst list (as for the above fields, more
|
||||||
|
* details in the comments on the function bfq_handle_burst).
|
||||||
|
*/
|
||||||
|
struct hlist_head burst_list;
|
||||||
|
|
||||||
|
/* if set to true, low-latency heuristics are enabled */
|
||||||
|
bool low_latency;
|
||||||
|
/*
|
||||||
|
* Maximum factor by which the weight of a weight-raised queue
|
||||||
|
* is multiplied.
|
||||||
|
*/
|
||||||
|
unsigned int bfq_wr_coeff;
|
||||||
|
/* maximum duration of a weight-raising period (jiffies) */
|
||||||
|
unsigned int bfq_wr_max_time;
|
||||||
|
|
||||||
|
/* Maximum weight-raising duration for soft real-time processes */
|
||||||
|
unsigned int bfq_wr_rt_max_time;
|
||||||
|
/*
|
||||||
|
* Minimum idle period after which weight-raising may be
|
||||||
|
* reactivated for a queue (in jiffies).
|
||||||
|
*/
|
||||||
|
unsigned int bfq_wr_min_idle_time;
|
||||||
|
/*
|
||||||
|
* Minimum period between request arrivals after which
|
||||||
|
* weight-raising may be reactivated for an already busy async
|
||||||
|
* queue (in jiffies).
|
||||||
|
*/
|
||||||
|
unsigned long bfq_wr_min_inter_arr_async;
|
||||||
|
|
||||||
|
/* Max service-rate for a soft real-time queue, in sectors/sec */
|
||||||
|
unsigned int bfq_wr_max_softrt_rate;
|
||||||
|
/*
|
||||||
|
* Cached value of the product R*T, used for computing the
|
||||||
|
* maximum duration of weight raising automatically.
|
||||||
|
*/
|
||||||
|
u64 RT_prod;
|
||||||
|
/* device-speed class for the low-latency heuristic */
|
||||||
|
enum bfq_device_speed device_speed;
|
||||||
|
|
||||||
|
/* fallback dummy bfqq for extreme OOM conditions */
|
||||||
|
struct bfq_queue oom_bfqq;
|
||||||
|
|
||||||
|
spinlock_t lock;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bic associated with the task issuing current bio for
|
||||||
|
* merging. This and the next field are used as a support to
|
||||||
|
* be able to perform the bic lookup, needed by bio-merge
|
||||||
|
* functions, before the scheduler lock is taken, and thus
|
||||||
|
* avoid taking the request-queue lock while the scheduler
|
||||||
|
* lock is being held.
|
||||||
|
*/
|
||||||
|
struct bfq_io_cq *bio_bic;
|
||||||
|
/* bfqq associated with the task issuing current bio for merging */
|
||||||
|
struct bfq_queue *bio_bfqq;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum bfqq_state_flags {
|
||||||
|
BFQQF_just_created = 0, /* queue just allocated */
|
||||||
|
BFQQF_busy, /* has requests or is in service */
|
||||||
|
BFQQF_wait_request, /* waiting for a request */
|
||||||
|
BFQQF_non_blocking_wait_rq, /*
|
||||||
|
* waiting for a request
|
||||||
|
* without idling the device
|
||||||
|
*/
|
||||||
|
BFQQF_fifo_expire, /* FIFO checked in this slice */
|
||||||
|
BFQQF_idle_window, /* slice idling enabled */
|
||||||
|
BFQQF_sync, /* synchronous queue */
|
||||||
|
BFQQF_IO_bound, /*
|
||||||
|
* bfqq has timed-out at least once
|
||||||
|
* having consumed at most 2/10 of
|
||||||
|
* its budget
|
||||||
|
*/
|
||||||
|
BFQQF_in_large_burst, /*
|
||||||
|
* bfqq activated in a large burst,
|
||||||
|
* see comments to bfq_handle_burst.
|
||||||
|
*/
|
||||||
|
BFQQF_softrt_update, /*
|
||||||
|
* may need softrt-next-start
|
||||||
|
* update
|
||||||
|
*/
|
||||||
|
BFQQF_coop, /* bfqq is shared */
|
||||||
|
BFQQF_split_coop /* shared bfqq will be split */
|
||||||
|
};
|
||||||
|
|
||||||
|
#define BFQ_BFQQ_FNS(name) \
|
||||||
|
void bfq_mark_bfqq_##name(struct bfq_queue *bfqq); \
|
||||||
|
void bfq_clear_bfqq_##name(struct bfq_queue *bfqq); \
|
||||||
|
int bfq_bfqq_##name(const struct bfq_queue *bfqq);
|
||||||
|
|
||||||
|
BFQ_BFQQ_FNS(just_created);
|
||||||
|
BFQ_BFQQ_FNS(busy);
|
||||||
|
BFQ_BFQQ_FNS(wait_request);
|
||||||
|
BFQ_BFQQ_FNS(non_blocking_wait_rq);
|
||||||
|
BFQ_BFQQ_FNS(fifo_expire);
|
||||||
|
BFQ_BFQQ_FNS(idle_window);
|
||||||
|
BFQ_BFQQ_FNS(sync);
|
||||||
|
BFQ_BFQQ_FNS(IO_bound);
|
||||||
|
BFQ_BFQQ_FNS(in_large_burst);
|
||||||
|
BFQ_BFQQ_FNS(coop);
|
||||||
|
BFQ_BFQQ_FNS(split_coop);
|
||||||
|
BFQ_BFQQ_FNS(softrt_update);
|
||||||
|
#undef BFQ_BFQQ_FNS
|
||||||
|
|
||||||
|
/* Expiration reasons. */
|
||||||
|
enum bfqq_expiration {
|
||||||
|
BFQQE_TOO_IDLE = 0, /*
|
||||||
|
* queue has been idling for
|
||||||
|
* too long
|
||||||
|
*/
|
||||||
|
BFQQE_BUDGET_TIMEOUT, /* budget took too long to be used */
|
||||||
|
BFQQE_BUDGET_EXHAUSTED, /* budget consumed */
|
||||||
|
BFQQE_NO_MORE_REQUESTS, /* the queue has no more requests */
|
||||||
|
BFQQE_PREEMPTED /* preemption in progress */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bfqg_stats {
|
||||||
|
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||||
|
/* number of ios merged */
|
||||||
|
struct blkg_rwstat merged;
|
||||||
|
/* total time spent on device in ns, may not be accurate w/ queueing */
|
||||||
|
struct blkg_rwstat service_time;
|
||||||
|
/* total time spent waiting in scheduler queue in ns */
|
||||||
|
struct blkg_rwstat wait_time;
|
||||||
|
/* number of IOs queued up */
|
||||||
|
struct blkg_rwstat queued;
|
||||||
|
/* total disk time and nr sectors dispatched by this group */
|
||||||
|
struct blkg_stat time;
|
||||||
|
/* sum of number of ios queued across all samples */
|
||||||
|
struct blkg_stat avg_queue_size_sum;
|
||||||
|
/* count of samples taken for average */
|
||||||
|
struct blkg_stat avg_queue_size_samples;
|
||||||
|
/* how many times this group has been removed from service tree */
|
||||||
|
struct blkg_stat dequeue;
|
||||||
|
/* total time spent waiting for it to be assigned a timeslice. */
|
||||||
|
struct blkg_stat group_wait_time;
|
||||||
|
/* time spent idling for this blkcg_gq */
|
||||||
|
struct blkg_stat idle_time;
|
||||||
|
/* total time with empty current active q with other requests queued */
|
||||||
|
struct blkg_stat empty_time;
|
||||||
|
/* fields after this shouldn't be cleared on stat reset */
|
||||||
|
uint64_t start_group_wait_time;
|
||||||
|
uint64_t start_idle_time;
|
||||||
|
uint64_t start_empty_time;
|
||||||
|
uint16_t flags;
|
||||||
|
#endif /* CONFIG_BFQ_GROUP_IOSCHED */
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||||
|
|
||||||
|
/*
|
||||||
|
* struct bfq_group_data - per-blkcg storage for the blkio subsystem.
|
||||||
|
*
|
||||||
|
* @ps: @blkcg_policy_storage that this structure inherits
|
||||||
|
* @weight: weight of the bfq_group
|
||||||
|
*/
|
||||||
|
struct bfq_group_data {
|
||||||
|
/* must be the first member */
|
||||||
|
struct blkcg_policy_data pd;
|
||||||
|
|
||||||
|
unsigned int weight;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct bfq_group - per (device, cgroup) data structure.
|
||||||
|
* @entity: schedulable entity to insert into the parent group sched_data.
|
||||||
|
* @sched_data: own sched_data, to contain child entities (they may be
|
||||||
|
* both bfq_queues and bfq_groups).
|
||||||
|
* @bfqd: the bfq_data for the device this group acts upon.
|
||||||
|
* @async_bfqq: array of async queues for all the tasks belonging to
|
||||||
|
* the group, one queue per ioprio value per ioprio_class,
|
||||||
|
* except for the idle class that has only one queue.
|
||||||
|
* @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
|
||||||
|
* @my_entity: pointer to @entity, %NULL for the toplevel group; used
|
||||||
|
* to avoid too many special cases during group creation/
|
||||||
|
* migration.
|
||||||
|
* @stats: stats for this bfqg.
|
||||||
|
* @active_entities: number of active entities belonging to the group;
|
||||||
|
* unused for the root group. Used to know whether there
|
||||||
|
* are groups with more than one active @bfq_entity
|
||||||
|
* (see the comments to the function
|
||||||
|
* bfq_bfqq_may_idle()).
|
||||||
|
* @rq_pos_tree: rbtree sorted by next_request position, used when
|
||||||
|
* determining if two or more queues have interleaving
|
||||||
|
* requests (see bfq_find_close_cooperator()).
|
||||||
|
*
|
||||||
|
* Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
|
||||||
|
* there is a set of bfq_groups, each one collecting the lower-level
|
||||||
|
* entities belonging to the group that are acting on the same device.
|
||||||
|
*
|
||||||
|
* Locking works as follows:
|
||||||
|
* o @bfqd is protected by the queue lock, RCU is used to access it
|
||||||
|
* from the readers.
|
||||||
|
* o All the other fields are protected by the @bfqd queue lock.
|
||||||
|
*/
|
||||||
|
struct bfq_group {
|
||||||
|
/* must be the first member */
|
||||||
|
struct blkg_policy_data pd;
|
||||||
|
|
||||||
|
struct bfq_entity entity;
|
||||||
|
struct bfq_sched_data sched_data;
|
||||||
|
|
||||||
|
void *bfqd;
|
||||||
|
|
||||||
|
struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
|
||||||
|
struct bfq_queue *async_idle_bfqq;
|
||||||
|
|
||||||
|
struct bfq_entity *my_entity;
|
||||||
|
|
||||||
|
int active_entities;
|
||||||
|
|
||||||
|
struct rb_root rq_pos_tree;
|
||||||
|
|
||||||
|
struct bfqg_stats stats;
|
||||||
|
};
|
||||||
|
|
||||||
|
#else
|
||||||
|
struct bfq_group {
|
||||||
|
struct bfq_sched_data sched_data;
|
||||||
|
|
||||||
|
struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
|
||||||
|
struct bfq_queue *async_idle_bfqq;
|
||||||
|
|
||||||
|
struct rb_root rq_pos_tree;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
|
||||||
|
|
||||||
|
/* --------------- main algorithm interface ----------------- */
|
||||||
|
|
||||||
|
#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \
|
||||||
|
{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
|
||||||
|
|
||||||
|
extern const int bfq_timeout;
|
||||||
|
|
||||||
|
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
|
||||||
|
void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
|
||||||
|
struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
|
||||||
|
void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||||
|
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||||
|
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
||||||
|
struct rb_root *root);
|
||||||
|
void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_entity *entity,
|
||||||
|
struct rb_root *root);
|
||||||
|
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||||
|
bool compensate, enum bfqq_expiration reason);
|
||||||
|
void bfq_put_queue(struct bfq_queue *bfqq);
|
||||||
|
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
|
||||||
|
void bfq_schedule_dispatch(struct bfq_data *bfqd);
|
||||||
|
void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
|
||||||
|
|
||||||
|
/* ------------ end of main algorithm interface -------------- */
|
||||||
|
|
||||||
|
/* ---------------- cgroups-support interface ---------------- */
|
||||||
|
|
||||||
|
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
|
||||||
|
unsigned int op);
|
||||||
|
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
|
||||||
|
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
|
||||||
|
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
|
||||||
|
uint64_t io_start_time, unsigned int op);
|
||||||
|
void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
|
||||||
|
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
|
||||||
|
void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
|
||||||
|
void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg);
|
||||||
|
void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg);
|
||||||
|
void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||||
|
struct bfq_group *bfqg);
|
||||||
|
|
||||||
|
void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
|
||||||
|
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
|
||||||
|
void bfq_end_wr_async(struct bfq_data *bfqd);
|
||||||
|
struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
|
||||||
|
struct blkcg *blkcg);
|
||||||
|
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
|
||||||
|
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||||
|
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
|
||||||
|
void bfqg_put(struct bfq_group *bfqg);
|
||||||
|
|
||||||
|
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||||
|
extern struct cftype bfq_blkcg_legacy_files[];
|
||||||
|
extern struct cftype bfq_blkg_files[];
|
||||||
|
extern struct blkcg_policy blkcg_policy_bfq;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ------------- end of cgroups-support interface ------------- */
|
||||||
|
|
||||||
|
/* - interface of the internal hierarchical B-WF2Q+ scheduler - */
|
||||||
|
|
||||||
|
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||||
|
/* both next loops stop at one of the child entities of the root group */
|
||||||
|
#define for_each_entity(entity) \
|
||||||
|
for (; entity ; entity = entity->parent)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For each iteration, compute parent in advance, so as to be safe if
|
||||||
|
* entity is deallocated during the iteration. Such a deallocation may
|
||||||
|
* happen as a consequence of a bfq_put_queue that frees the bfq_queue
|
||||||
|
* containing entity.
|
||||||
|
*/
|
||||||
|
#define for_each_entity_safe(entity, parent) \
|
||||||
|
for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
|
||||||
|
|
||||||
|
#else /* CONFIG_BFQ_GROUP_IOSCHED */
|
||||||
|
/*
|
||||||
|
* Next two macros are fake loops when cgroups support is not
|
||||||
|
* enabled. I fact, in such a case, there is only one level to go up
|
||||||
|
* (to reach the root group).
|
||||||
|
*/
|
||||||
|
#define for_each_entity(entity) \
|
||||||
|
for (; entity ; entity = NULL)
|
||||||
|
|
||||||
|
#define for_each_entity_safe(entity, parent) \
|
||||||
|
for (parent = NULL; entity ; entity = parent)
|
||||||
|
#endif /* CONFIG_BFQ_GROUP_IOSCHED */
|
||||||
|
|
||||||
|
struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq);
|
||||||
|
struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
|
||||||
|
struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity);
|
||||||
|
struct bfq_entity *bfq_entity_of(struct rb_node *node);
|
||||||
|
unsigned short bfq_ioprio_to_weight(int ioprio);
|
||||||
|
void bfq_put_idle_entity(struct bfq_service_tree *st,
|
||||||
|
struct bfq_entity *entity);
|
||||||
|
struct bfq_service_tree *
|
||||||
|
__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
|
||||||
|
struct bfq_entity *entity);
|
||||||
|
void bfq_bfqq_served(struct bfq_queue *bfqq, int served);
|
||||||
|
void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||||
|
unsigned long time_ms);
|
||||||
|
bool __bfq_deactivate_entity(struct bfq_entity *entity,
|
||||||
|
bool ins_into_idle_tree);
|
||||||
|
bool next_queue_may_preempt(struct bfq_data *bfqd);
|
||||||
|
struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
|
||||||
|
void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
|
||||||
|
void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||||
|
bool ins_into_idle_tree, bool expiration);
|
||||||
|
void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||||
|
void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||||
|
void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||||
|
bool expiration);
|
||||||
|
void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||||
|
|
||||||
|
/* --------------- end of interface of B-WF2Q+ ---------------- */
|
||||||
|
|
||||||
|
/* Logging facilities. */
|
||||||
|
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||||
|
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||||
|
|
||||||
|
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
|
||||||
|
char __pbuf[128]; \
|
||||||
|
\
|
||||||
|
blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
|
||||||
|
blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid, \
|
||||||
|
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
|
||||||
|
__pbuf, ##args); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
|
||||||
|
char __pbuf[128]; \
|
||||||
|
\
|
||||||
|
blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \
|
||||||
|
blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#else /* CONFIG_BFQ_GROUP_IOSCHED */
|
||||||
|
|
||||||
|
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
|
||||||
|
blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \
|
||||||
|
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
|
||||||
|
##args)
|
||||||
|
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0)
|
||||||
|
|
||||||
|
#endif /* CONFIG_BFQ_GROUP_IOSCHED */
|
||||||
|
|
||||||
|
#define bfq_log(bfqd, fmt, args...) \
|
||||||
|
blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
|
||||||
|
|
||||||
|
#endif /* _BFQ_H */
|
1616
block/bfq-wf2q.c
Normal file
1616
block/bfq-wf2q.c
Normal file
File diff suppressed because it is too large
Load diff
19
block/bio.c
19
block/bio.c
|
@ -30,6 +30,7 @@
|
||||||
#include <linux/cgroup.h>
|
#include <linux/cgroup.h>
|
||||||
|
|
||||||
#include <trace/events/block.h>
|
#include <trace/events/block.h>
|
||||||
|
#include "blk.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test patch to inline a certain number of bi_io_vec's inside the bio
|
* Test patch to inline a certain number of bi_io_vec's inside the bio
|
||||||
|
@ -427,7 +428,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||||
* RETURNS:
|
* RETURNS:
|
||||||
* Pointer to new bio on success, NULL on failure.
|
* Pointer to new bio on success, NULL on failure.
|
||||||
*/
|
*/
|
||||||
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
|
struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
||||||
|
struct bio_set *bs)
|
||||||
{
|
{
|
||||||
gfp_t saved_gfp = gfp_mask;
|
gfp_t saved_gfp = gfp_mask;
|
||||||
unsigned front_pad;
|
unsigned front_pad;
|
||||||
|
@ -1824,6 +1826,11 @@ static inline bool bio_remaining_done(struct bio *bio)
|
||||||
* bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
|
* bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
|
||||||
* way to end I/O on a bio. No one should call bi_end_io() directly on a
|
* way to end I/O on a bio. No one should call bi_end_io() directly on a
|
||||||
* bio unless they own it and thus know that it has an end_io function.
|
* bio unless they own it and thus know that it has an end_io function.
|
||||||
|
*
|
||||||
|
* bio_endio() can be called several times on a bio that has been chained
|
||||||
|
* using bio_chain(). The ->bi_end_io() function will only be called the
|
||||||
|
* last time. At this point the BLK_TA_COMPLETE tracing event will be
|
||||||
|
* generated if BIO_TRACE_COMPLETION is set.
|
||||||
**/
|
**/
|
||||||
void bio_endio(struct bio *bio)
|
void bio_endio(struct bio *bio)
|
||||||
{
|
{
|
||||||
|
@ -1844,6 +1851,13 @@ again:
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||||
|
trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
|
||||||
|
bio, bio->bi_error);
|
||||||
|
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||||
|
}
|
||||||
|
|
||||||
|
blk_throtl_bio_endio(bio);
|
||||||
if (bio->bi_end_io)
|
if (bio->bi_end_io)
|
||||||
bio->bi_end_io(bio);
|
bio->bi_end_io(bio);
|
||||||
}
|
}
|
||||||
|
@ -1882,6 +1896,9 @@ struct bio *bio_split(struct bio *bio, int sectors,
|
||||||
|
|
||||||
bio_advance(bio, split->bi_iter.bi_size);
|
bio_advance(bio, split->bi_iter.bi_size);
|
||||||
|
|
||||||
|
if (bio_flagged(bio, BIO_TRACE_COMPLETION))
|
||||||
|
bio_set_flag(bio, BIO_TRACE_COMPLETION);
|
||||||
|
|
||||||
return split;
|
return split;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(bio_split);
|
EXPORT_SYMBOL(bio_split);
|
||||||
|
|
|
@ -772,6 +772,27 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
|
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
|
||||||
|
|
||||||
|
/* Performs queue bypass and policy enabled checks then looks up blkg. */
|
||||||
|
static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
|
||||||
|
const struct blkcg_policy *pol,
|
||||||
|
struct request_queue *q)
|
||||||
|
{
|
||||||
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||||
|
lockdep_assert_held(q->queue_lock);
|
||||||
|
|
||||||
|
if (!blkcg_policy_enabled(q, pol))
|
||||||
|
return ERR_PTR(-EOPNOTSUPP);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This could be the first entry point of blkcg implementation and
|
||||||
|
* we shouldn't allow anything to go through for a bypassing queue.
|
||||||
|
*/
|
||||||
|
if (unlikely(blk_queue_bypass(q)))
|
||||||
|
return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
|
||||||
|
|
||||||
|
return __blkg_lookup(blkcg, q, true /* update_hint */);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blkg_conf_prep - parse and prepare for per-blkg config update
|
* blkg_conf_prep - parse and prepare for per-blkg config update
|
||||||
* @blkcg: target block cgroup
|
* @blkcg: target block cgroup
|
||||||
|
@ -789,6 +810,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||||
__acquires(rcu) __acquires(disk->queue->queue_lock)
|
__acquires(rcu) __acquires(disk->queue->queue_lock)
|
||||||
{
|
{
|
||||||
struct gendisk *disk;
|
struct gendisk *disk;
|
||||||
|
struct request_queue *q;
|
||||||
struct blkcg_gq *blkg;
|
struct blkcg_gq *blkg;
|
||||||
struct module *owner;
|
struct module *owner;
|
||||||
unsigned int major, minor;
|
unsigned int major, minor;
|
||||||
|
@ -807,44 +829,95 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||||
if (!disk)
|
if (!disk)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
if (part) {
|
if (part) {
|
||||||
owner = disk->fops->owner;
|
ret = -ENODEV;
|
||||||
put_disk(disk);
|
goto fail;
|
||||||
module_put(owner);
|
|
||||||
return -ENODEV;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
q = disk->queue;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
spin_lock_irq(disk->queue->queue_lock);
|
spin_lock_irq(q->queue_lock);
|
||||||
|
|
||||||
if (blkcg_policy_enabled(disk->queue, pol))
|
|
||||||
blkg = blkg_lookup_create(blkcg, disk->queue);
|
|
||||||
else
|
|
||||||
blkg = ERR_PTR(-EOPNOTSUPP);
|
|
||||||
|
|
||||||
|
blkg = blkg_lookup_check(blkcg, pol, q);
|
||||||
if (IS_ERR(blkg)) {
|
if (IS_ERR(blkg)) {
|
||||||
ret = PTR_ERR(blkg);
|
ret = PTR_ERR(blkg);
|
||||||
rcu_read_unlock();
|
goto fail_unlock;
|
||||||
spin_unlock_irq(disk->queue->queue_lock);
|
|
||||||
owner = disk->fops->owner;
|
|
||||||
put_disk(disk);
|
|
||||||
module_put(owner);
|
|
||||||
/*
|
|
||||||
* If queue was bypassing, we should retry. Do so after a
|
|
||||||
* short msleep(). It isn't strictly necessary but queue
|
|
||||||
* can be bypassing for some time and it's always nice to
|
|
||||||
* avoid busy looping.
|
|
||||||
*/
|
|
||||||
if (ret == -EBUSY) {
|
|
||||||
msleep(10);
|
|
||||||
ret = restart_syscall();
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (blkg)
|
||||||
|
goto success;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create blkgs walking down from blkcg_root to @blkcg, so that all
|
||||||
|
* non-root blkgs have access to their parents.
|
||||||
|
*/
|
||||||
|
while (true) {
|
||||||
|
struct blkcg *pos = blkcg;
|
||||||
|
struct blkcg *parent;
|
||||||
|
struct blkcg_gq *new_blkg;
|
||||||
|
|
||||||
|
parent = blkcg_parent(blkcg);
|
||||||
|
while (parent && !__blkg_lookup(parent, q, false)) {
|
||||||
|
pos = parent;
|
||||||
|
parent = blkcg_parent(parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Drop locks to do new blkg allocation with GFP_KERNEL. */
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
|
||||||
|
if (unlikely(!new_blkg)) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
spin_lock_irq(q->queue_lock);
|
||||||
|
|
||||||
|
blkg = blkg_lookup_check(pos, pol, q);
|
||||||
|
if (IS_ERR(blkg)) {
|
||||||
|
ret = PTR_ERR(blkg);
|
||||||
|
goto fail_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (blkg) {
|
||||||
|
blkg_free(new_blkg);
|
||||||
|
} else {
|
||||||
|
blkg = blkg_create(pos, q, new_blkg);
|
||||||
|
if (unlikely(IS_ERR(blkg))) {
|
||||||
|
ret = PTR_ERR(blkg);
|
||||||
|
goto fail_unlock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos == blkcg)
|
||||||
|
goto success;
|
||||||
|
}
|
||||||
|
success:
|
||||||
ctx->disk = disk;
|
ctx->disk = disk;
|
||||||
ctx->blkg = blkg;
|
ctx->blkg = blkg;
|
||||||
ctx->body = body;
|
ctx->body = body;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
fail_unlock:
|
||||||
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
rcu_read_unlock();
|
||||||
|
fail:
|
||||||
|
owner = disk->fops->owner;
|
||||||
|
put_disk(disk);
|
||||||
|
module_put(owner);
|
||||||
|
/*
|
||||||
|
* If queue was bypassing, we should retry. Do so after a
|
||||||
|
* short msleep(). It isn't strictly necessary but queue
|
||||||
|
* can be bypassing for some time and it's always nice to
|
||||||
|
* avoid busy looping.
|
||||||
|
*/
|
||||||
|
if (ret == -EBUSY) {
|
||||||
|
msleep(10);
|
||||||
|
ret = restart_syscall();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkg_conf_prep);
|
EXPORT_SYMBOL_GPL(blkg_conf_prep);
|
||||||
|
|
||||||
|
|
143
block/blk-core.c
143
block/blk-core.c
|
@ -268,10 +268,8 @@ void blk_sync_queue(struct request_queue *q)
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
queue_for_each_hw_ctx(q, hctx, i)
|
||||||
cancel_work_sync(&hctx->run_work);
|
cancel_delayed_work_sync(&hctx->run_work);
|
||||||
cancel_delayed_work_sync(&hctx->delay_work);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
cancel_delayed_work_sync(&q->delay_work);
|
cancel_delayed_work_sync(&q->delay_work);
|
||||||
}
|
}
|
||||||
|
@ -500,6 +498,13 @@ void blk_set_queue_dying(struct request_queue *q)
|
||||||
queue_flag_set(QUEUE_FLAG_DYING, q);
|
queue_flag_set(QUEUE_FLAG_DYING, q);
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When queue DYING flag is set, we need to block new req
|
||||||
|
* entering queue, so we call blk_freeze_queue_start() to
|
||||||
|
* prevent I/O from crossing blk_queue_enter().
|
||||||
|
*/
|
||||||
|
blk_freeze_queue_start(q);
|
||||||
|
|
||||||
if (q->mq_ops)
|
if (q->mq_ops)
|
||||||
blk_mq_wake_waiters(q);
|
blk_mq_wake_waiters(q);
|
||||||
else {
|
else {
|
||||||
|
@ -556,9 +561,13 @@ void blk_cleanup_queue(struct request_queue *q)
|
||||||
* prevent that q->request_fn() gets invoked after draining finished.
|
* prevent that q->request_fn() gets invoked after draining finished.
|
||||||
*/
|
*/
|
||||||
blk_freeze_queue(q);
|
blk_freeze_queue(q);
|
||||||
spin_lock_irq(lock);
|
if (!q->mq_ops) {
|
||||||
if (!q->mq_ops)
|
spin_lock_irq(lock);
|
||||||
__blk_drain_queue(q, true);
|
__blk_drain_queue(q, true);
|
||||||
|
} else {
|
||||||
|
blk_mq_debugfs_unregister_mq(q);
|
||||||
|
spin_lock_irq(lock);
|
||||||
|
}
|
||||||
queue_flag_set(QUEUE_FLAG_DEAD, q);
|
queue_flag_set(QUEUE_FLAG_DEAD, q);
|
||||||
spin_unlock_irq(lock);
|
spin_unlock_irq(lock);
|
||||||
|
|
||||||
|
@ -669,6 +678,15 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
|
||||||
if (nowait)
|
if (nowait)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* read pair of barrier in blk_freeze_queue_start(),
|
||||||
|
* we need to order reading __PERCPU_REF_DEAD flag of
|
||||||
|
* .q_usage_counter and reading .mq_freeze_depth or
|
||||||
|
* queue dying flag, otherwise the following wait may
|
||||||
|
* never return if the two reads are reordered.
|
||||||
|
*/
|
||||||
|
smp_rmb();
|
||||||
|
|
||||||
ret = wait_event_interruptible(q->mq_freeze_wq,
|
ret = wait_event_interruptible(q->mq_freeze_wq,
|
||||||
!atomic_read(&q->mq_freeze_depth) ||
|
!atomic_read(&q->mq_freeze_depth) ||
|
||||||
blk_queue_dying(q));
|
blk_queue_dying(q));
|
||||||
|
@ -720,6 +738,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
||||||
if (!q->backing_dev_info)
|
if (!q->backing_dev_info)
|
||||||
goto fail_split;
|
goto fail_split;
|
||||||
|
|
||||||
|
q->stats = blk_alloc_queue_stats();
|
||||||
|
if (!q->stats)
|
||||||
|
goto fail_stats;
|
||||||
|
|
||||||
q->backing_dev_info->ra_pages =
|
q->backing_dev_info->ra_pages =
|
||||||
(VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
|
(VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
|
||||||
q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
|
q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
|
||||||
|
@ -776,6 +798,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
||||||
fail_ref:
|
fail_ref:
|
||||||
percpu_ref_exit(&q->q_usage_counter);
|
percpu_ref_exit(&q->q_usage_counter);
|
||||||
fail_bdi:
|
fail_bdi:
|
||||||
|
blk_free_queue_stats(q->stats);
|
||||||
|
fail_stats:
|
||||||
bdi_put(q->backing_dev_info);
|
bdi_put(q->backing_dev_info);
|
||||||
fail_split:
|
fail_split:
|
||||||
bioset_free(q->bio_split);
|
bioset_free(q->bio_split);
|
||||||
|
@ -889,7 +913,6 @@ out_exit_flush_rq:
|
||||||
q->exit_rq_fn(q, q->fq->flush_rq);
|
q->exit_rq_fn(q, q->fq->flush_rq);
|
||||||
out_free_flush_queue:
|
out_free_flush_queue:
|
||||||
blk_free_flush_queue(q->fq);
|
blk_free_flush_queue(q->fq);
|
||||||
wbt_exit(q);
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_init_allocated_queue);
|
EXPORT_SYMBOL(blk_init_allocated_queue);
|
||||||
|
@ -1128,7 +1151,6 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
|
||||||
|
|
||||||
blk_rq_init(q, rq);
|
blk_rq_init(q, rq);
|
||||||
blk_rq_set_rl(rq, rl);
|
blk_rq_set_rl(rq, rl);
|
||||||
blk_rq_set_prio(rq, ioc);
|
|
||||||
rq->cmd_flags = op;
|
rq->cmd_flags = op;
|
||||||
rq->rq_flags = rq_flags;
|
rq->rq_flags = rq_flags;
|
||||||
|
|
||||||
|
@ -1608,17 +1630,23 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_request_from_bio(struct request *req, struct bio *bio)
|
void blk_init_request_from_bio(struct request *req, struct bio *bio)
|
||||||
{
|
{
|
||||||
|
struct io_context *ioc = rq_ioc(bio);
|
||||||
|
|
||||||
if (bio->bi_opf & REQ_RAHEAD)
|
if (bio->bi_opf & REQ_RAHEAD)
|
||||||
req->cmd_flags |= REQ_FAILFAST_MASK;
|
req->cmd_flags |= REQ_FAILFAST_MASK;
|
||||||
|
|
||||||
req->errors = 0;
|
|
||||||
req->__sector = bio->bi_iter.bi_sector;
|
req->__sector = bio->bi_iter.bi_sector;
|
||||||
if (ioprio_valid(bio_prio(bio)))
|
if (ioprio_valid(bio_prio(bio)))
|
||||||
req->ioprio = bio_prio(bio);
|
req->ioprio = bio_prio(bio);
|
||||||
|
else if (ioc)
|
||||||
|
req->ioprio = ioc->ioprio;
|
||||||
|
else
|
||||||
|
req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
|
||||||
blk_rq_bio_prep(req->q, req, bio);
|
blk_rq_bio_prep(req->q, req, bio);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
|
||||||
|
|
||||||
static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
|
static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
|
||||||
{
|
{
|
||||||
|
@ -1709,7 +1737,7 @@ get_rq:
|
||||||
* We don't worry about that case for efficiency. It won't happen
|
* We don't worry about that case for efficiency. It won't happen
|
||||||
* often, and the elevators are able to handle it.
|
* often, and the elevators are able to handle it.
|
||||||
*/
|
*/
|
||||||
init_request_from_bio(req, bio);
|
blk_init_request_from_bio(req, bio);
|
||||||
|
|
||||||
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
|
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
|
||||||
req->cpu = raw_smp_processor_id();
|
req->cpu = raw_smp_processor_id();
|
||||||
|
@ -1936,7 +1964,13 @@ generic_make_request_checks(struct bio *bio)
|
||||||
if (!blkcg_bio_issue_check(q, bio))
|
if (!blkcg_bio_issue_check(q, bio))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
trace_block_bio_queue(q, bio);
|
if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||||
|
trace_block_bio_queue(q, bio);
|
||||||
|
/* Now that enqueuing has been traced, we need to trace
|
||||||
|
* completion as well.
|
||||||
|
*/
|
||||||
|
bio_set_flag(bio, BIO_TRACE_COMPLETION);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
not_supported:
|
not_supported:
|
||||||
|
@ -2478,7 +2512,7 @@ void blk_start_request(struct request *req)
|
||||||
blk_dequeue_request(req);
|
blk_dequeue_request(req);
|
||||||
|
|
||||||
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
|
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
|
||||||
blk_stat_set_issue_time(&req->issue_stat);
|
blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
|
||||||
req->rq_flags |= RQF_STATS;
|
req->rq_flags |= RQF_STATS;
|
||||||
wbt_issue(req->q->rq_wb, &req->issue_stat);
|
wbt_issue(req->q->rq_wb, &req->issue_stat);
|
||||||
}
|
}
|
||||||
|
@ -2540,22 +2574,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
||||||
{
|
{
|
||||||
int total_bytes;
|
int total_bytes;
|
||||||
|
|
||||||
trace_block_rq_complete(req->q, req, nr_bytes);
|
trace_block_rq_complete(req, error, nr_bytes);
|
||||||
|
|
||||||
if (!req->bio)
|
if (!req->bio)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
|
||||||
* For fs requests, rq is just carrier of independent bio's
|
|
||||||
* and each partial completion should be handled separately.
|
|
||||||
* Reset per-request error on each partial completion.
|
|
||||||
*
|
|
||||||
* TODO: tj: This is too subtle. It would be better to let
|
|
||||||
* low level drivers do what they see fit.
|
|
||||||
*/
|
|
||||||
if (!blk_rq_is_passthrough(req))
|
|
||||||
req->errors = 0;
|
|
||||||
|
|
||||||
if (error && !blk_rq_is_passthrough(req) &&
|
if (error && !blk_rq_is_passthrough(req) &&
|
||||||
!(req->rq_flags & RQF_QUIET)) {
|
!(req->rq_flags & RQF_QUIET)) {
|
||||||
char *error_type;
|
char *error_type;
|
||||||
|
@ -2601,6 +2624,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
||||||
if (bio_bytes == bio->bi_iter.bi_size)
|
if (bio_bytes == bio->bi_iter.bi_size)
|
||||||
req->bio = bio->bi_next;
|
req->bio = bio->bi_next;
|
||||||
|
|
||||||
|
/* Completion has already been traced */
|
||||||
|
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||||
req_bio_endio(req, bio, bio_bytes, error);
|
req_bio_endio(req, bio, bio_bytes, error);
|
||||||
|
|
||||||
total_bytes += bio_bytes;
|
total_bytes += bio_bytes;
|
||||||
|
@ -2699,7 +2724,7 @@ void blk_finish_request(struct request *req, int error)
|
||||||
struct request_queue *q = req->q;
|
struct request_queue *q = req->q;
|
||||||
|
|
||||||
if (req->rq_flags & RQF_STATS)
|
if (req->rq_flags & RQF_STATS)
|
||||||
blk_stat_add(&q->rq_stats[rq_data_dir(req)], req);
|
blk_stat_add(req);
|
||||||
|
|
||||||
if (req->rq_flags & RQF_QUEUED)
|
if (req->rq_flags & RQF_QUEUED)
|
||||||
blk_queue_end_tag(q, req);
|
blk_queue_end_tag(q, req);
|
||||||
|
@ -2776,7 +2801,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
|
||||||
* %false - we are done with this request
|
* %false - we are done with this request
|
||||||
* %true - still buffers pending for this request
|
* %true - still buffers pending for this request
|
||||||
**/
|
**/
|
||||||
bool __blk_end_bidi_request(struct request *rq, int error,
|
static bool __blk_end_bidi_request(struct request *rq, int error,
|
||||||
unsigned int nr_bytes, unsigned int bidi_bytes)
|
unsigned int nr_bytes, unsigned int bidi_bytes)
|
||||||
{
|
{
|
||||||
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
|
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
|
||||||
|
@ -2828,43 +2853,6 @@ void blk_end_request_all(struct request *rq, int error)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_end_request_all);
|
EXPORT_SYMBOL(blk_end_request_all);
|
||||||
|
|
||||||
/**
|
|
||||||
* blk_end_request_cur - Helper function to finish the current request chunk.
|
|
||||||
* @rq: the request to finish the current chunk for
|
|
||||||
* @error: %0 for success, < %0 for error
|
|
||||||
*
|
|
||||||
* Description:
|
|
||||||
* Complete the current consecutively mapped chunk from @rq.
|
|
||||||
*
|
|
||||||
* Return:
|
|
||||||
* %false - we are done with this request
|
|
||||||
* %true - still buffers pending for this request
|
|
||||||
*/
|
|
||||||
bool blk_end_request_cur(struct request *rq, int error)
|
|
||||||
{
|
|
||||||
return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(blk_end_request_cur);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* blk_end_request_err - Finish a request till the next failure boundary.
|
|
||||||
* @rq: the request to finish till the next failure boundary for
|
|
||||||
* @error: must be negative errno
|
|
||||||
*
|
|
||||||
* Description:
|
|
||||||
* Complete @rq till the next failure boundary.
|
|
||||||
*
|
|
||||||
* Return:
|
|
||||||
* %false - we are done with this request
|
|
||||||
* %true - still buffers pending for this request
|
|
||||||
*/
|
|
||||||
bool blk_end_request_err(struct request *rq, int error)
|
|
||||||
{
|
|
||||||
WARN_ON(error >= 0);
|
|
||||||
return blk_end_request(rq, error, blk_rq_err_bytes(rq));
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(blk_end_request_err);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __blk_end_request - Helper function for drivers to complete the request.
|
* __blk_end_request - Helper function for drivers to complete the request.
|
||||||
* @rq: the request being processed
|
* @rq: the request being processed
|
||||||
|
@ -2924,26 +2912,6 @@ bool __blk_end_request_cur(struct request *rq, int error)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__blk_end_request_cur);
|
EXPORT_SYMBOL(__blk_end_request_cur);
|
||||||
|
|
||||||
/**
|
|
||||||
* __blk_end_request_err - Finish a request till the next failure boundary.
|
|
||||||
* @rq: the request to finish till the next failure boundary for
|
|
||||||
* @error: must be negative errno
|
|
||||||
*
|
|
||||||
* Description:
|
|
||||||
* Complete @rq till the next failure boundary. Must be called
|
|
||||||
* with queue lock held.
|
|
||||||
*
|
|
||||||
* Return:
|
|
||||||
* %false - we are done with this request
|
|
||||||
* %true - still buffers pending for this request
|
|
||||||
*/
|
|
||||||
bool __blk_end_request_err(struct request *rq, int error)
|
|
||||||
{
|
|
||||||
WARN_ON(error >= 0);
|
|
||||||
return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(__blk_end_request_err);
|
|
||||||
|
|
||||||
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
|
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
|
||||||
struct bio *bio)
|
struct bio *bio)
|
||||||
{
|
{
|
||||||
|
@ -3106,6 +3074,13 @@ int kblockd_schedule_work_on(int cpu, struct work_struct *work)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(kblockd_schedule_work_on);
|
EXPORT_SYMBOL(kblockd_schedule_work_on);
|
||||||
|
|
||||||
|
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
|
||||||
|
unsigned long delay)
|
||||||
|
{
|
||||||
|
return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
|
||||||
|
|
||||||
int kblockd_schedule_delayed_work(struct delayed_work *dwork,
|
int kblockd_schedule_delayed_work(struct delayed_work *dwork,
|
||||||
unsigned long delay)
|
unsigned long delay)
|
||||||
{
|
{
|
||||||
|
|
|
@ -69,8 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
|
||||||
|
|
||||||
if (unlikely(blk_queue_dying(q))) {
|
if (unlikely(blk_queue_dying(q))) {
|
||||||
rq->rq_flags |= RQF_QUIET;
|
rq->rq_flags |= RQF_QUIET;
|
||||||
rq->errors = -ENXIO;
|
__blk_end_request_all(rq, -ENXIO);
|
||||||
__blk_end_request_all(rq, rq->errors);
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -92,11 +91,10 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
|
||||||
* Insert a fully prepared request at the back of the I/O scheduler queue
|
* Insert a fully prepared request at the back of the I/O scheduler queue
|
||||||
* for execution and wait for completion.
|
* for execution and wait for completion.
|
||||||
*/
|
*/
|
||||||
int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
|
void blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
|
||||||
struct request *rq, int at_head)
|
struct request *rq, int at_head)
|
||||||
{
|
{
|
||||||
DECLARE_COMPLETION_ONSTACK(wait);
|
DECLARE_COMPLETION_ONSTACK(wait);
|
||||||
int err = 0;
|
|
||||||
unsigned long hang_check;
|
unsigned long hang_check;
|
||||||
|
|
||||||
rq->end_io_data = &wait;
|
rq->end_io_data = &wait;
|
||||||
|
@ -108,10 +106,5 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
|
||||||
while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
|
while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
|
||||||
else
|
else
|
||||||
wait_for_completion_io(&wait);
|
wait_for_completion_io(&wait);
|
||||||
|
|
||||||
if (rq->errors)
|
|
||||||
err = -EIO;
|
|
||||||
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_execute_rq);
|
EXPORT_SYMBOL(blk_execute_rq);
|
||||||
|
|
|
@ -447,7 +447,7 @@ void blk_insert_flush(struct request *rq)
|
||||||
if (q->mq_ops)
|
if (q->mq_ops)
|
||||||
blk_mq_end_request(rq, 0);
|
blk_mq_end_request(rq, 0);
|
||||||
else
|
else
|
||||||
__blk_end_bidi_request(rq, 0, 0, 0);
|
__blk_end_request(rq, 0, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -497,8 +497,7 @@ void blk_insert_flush(struct request *rq)
|
||||||
* Description:
|
* Description:
|
||||||
* Issue a flush for the block device in question. Caller can supply
|
* Issue a flush for the block device in question. Caller can supply
|
||||||
* room for storing the error offset in case of a flush error, if they
|
* room for storing the error offset in case of a flush error, if they
|
||||||
* wish to. If WAIT flag is not passed then caller may check only what
|
* wish to.
|
||||||
* request was pushed in some internal queue for later handling.
|
|
||||||
*/
|
*/
|
||||||
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
|
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
|
||||||
sector_t *error_sector)
|
sector_t *error_sector)
|
||||||
|
|
|
@ -389,7 +389,7 @@ static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_integrity_profile nop_profile = {
|
static const struct blk_integrity_profile nop_profile = {
|
||||||
.name = "nop",
|
.name = "nop",
|
||||||
.generate_fn = blk_integrity_nop_fn,
|
.generate_fn = blk_integrity_nop_fn,
|
||||||
.verify_fn = blk_integrity_nop_fn,
|
.verify_fn = blk_integrity_nop_fn,
|
||||||
|
@ -412,12 +412,13 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
|
||||||
|
|
||||||
bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE |
|
bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE |
|
||||||
template->flags;
|
template->flags;
|
||||||
bi->interval_exp = ilog2(queue_logical_block_size(disk->queue));
|
bi->interval_exp = template->interval_exp ? :
|
||||||
|
ilog2(queue_logical_block_size(disk->queue));
|
||||||
bi->profile = template->profile ? template->profile : &nop_profile;
|
bi->profile = template->profile ? template->profile : &nop_profile;
|
||||||
bi->tuple_size = template->tuple_size;
|
bi->tuple_size = template->tuple_size;
|
||||||
bi->tag_size = template->tag_size;
|
bi->tag_size = template->tag_size;
|
||||||
|
|
||||||
blk_integrity_revalidate(disk);
|
disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_integrity_register);
|
EXPORT_SYMBOL(blk_integrity_register);
|
||||||
|
|
||||||
|
@ -430,26 +431,11 @@ EXPORT_SYMBOL(blk_integrity_register);
|
||||||
*/
|
*/
|
||||||
void blk_integrity_unregister(struct gendisk *disk)
|
void blk_integrity_unregister(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
blk_integrity_revalidate(disk);
|
disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES;
|
||||||
memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity));
|
memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_integrity_unregister);
|
EXPORT_SYMBOL(blk_integrity_unregister);
|
||||||
|
|
||||||
void blk_integrity_revalidate(struct gendisk *disk)
|
|
||||||
{
|
|
||||||
struct blk_integrity *bi = &disk->queue->integrity;
|
|
||||||
|
|
||||||
if (!(disk->flags & GENHD_FL_UP))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (bi->profile)
|
|
||||||
disk->queue->backing_dev_info->capabilities |=
|
|
||||||
BDI_CAP_STABLE_WRITES;
|
|
||||||
else
|
|
||||||
disk->queue->backing_dev_info->capabilities &=
|
|
||||||
~BDI_CAP_STABLE_WRITES;
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_integrity_add(struct gendisk *disk)
|
void blk_integrity_add(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
|
if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
|
||||||
|
|
|
@ -37,17 +37,12 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
|
|
||||||
if (flags & BLKDEV_DISCARD_SECURE) {
|
if (flags & BLKDEV_DISCARD_SECURE) {
|
||||||
if (flags & BLKDEV_DISCARD_ZERO)
|
|
||||||
return -EOPNOTSUPP;
|
|
||||||
if (!blk_queue_secure_erase(q))
|
if (!blk_queue_secure_erase(q))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
op = REQ_OP_SECURE_ERASE;
|
op = REQ_OP_SECURE_ERASE;
|
||||||
} else {
|
} else {
|
||||||
if (!blk_queue_discard(q))
|
if (!blk_queue_discard(q))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
if ((flags & BLKDEV_DISCARD_ZERO) &&
|
|
||||||
!q->limits.discard_zeroes_data)
|
|
||||||
return -EOPNOTSUPP;
|
|
||||||
op = REQ_OP_DISCARD;
|
op = REQ_OP_DISCARD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +104,7 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
|
||||||
* @sector: start sector
|
* @sector: start sector
|
||||||
* @nr_sects: number of sectors to discard
|
* @nr_sects: number of sectors to discard
|
||||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||||
* @flags: BLKDEV_IFL_* flags to control behaviour
|
* @flags: BLKDEV_DISCARD_* flags to control behaviour
|
||||||
*
|
*
|
||||||
* Description:
|
* Description:
|
||||||
* Issue a discard request for the sectors in question.
|
* Issue a discard request for the sectors in question.
|
||||||
|
@ -126,7 +121,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
&bio);
|
&bio);
|
||||||
if (!ret && bio) {
|
if (!ret && bio) {
|
||||||
ret = submit_bio_wait(bio);
|
ret = submit_bio_wait(bio);
|
||||||
if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO))
|
if (ret == -EOPNOTSUPP)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
@ -226,20 +221,9 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blkdev_issue_write_same);
|
EXPORT_SYMBOL(blkdev_issue_write_same);
|
||||||
|
|
||||||
/**
|
|
||||||
* __blkdev_issue_write_zeroes - generate number of bios with WRITE ZEROES
|
|
||||||
* @bdev: blockdev to issue
|
|
||||||
* @sector: start sector
|
|
||||||
* @nr_sects: number of sectors to write
|
|
||||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
|
||||||
* @biop: pointer to anchor bio
|
|
||||||
*
|
|
||||||
* Description:
|
|
||||||
* Generate and issue number of bios(REQ_OP_WRITE_ZEROES) with zerofiled pages.
|
|
||||||
*/
|
|
||||||
static int __blkdev_issue_write_zeroes(struct block_device *bdev,
|
static int __blkdev_issue_write_zeroes(struct block_device *bdev,
|
||||||
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
|
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
|
||||||
struct bio **biop)
|
struct bio **biop, unsigned flags)
|
||||||
{
|
{
|
||||||
struct bio *bio = *biop;
|
struct bio *bio = *biop;
|
||||||
unsigned int max_write_zeroes_sectors;
|
unsigned int max_write_zeroes_sectors;
|
||||||
|
@ -258,7 +242,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
|
||||||
bio = next_bio(bio, 0, gfp_mask);
|
bio = next_bio(bio, 0, gfp_mask);
|
||||||
bio->bi_iter.bi_sector = sector;
|
bio->bi_iter.bi_sector = sector;
|
||||||
bio->bi_bdev = bdev;
|
bio->bi_bdev = bdev;
|
||||||
bio_set_op_attrs(bio, REQ_OP_WRITE_ZEROES, 0);
|
bio->bi_opf = REQ_OP_WRITE_ZEROES;
|
||||||
|
if (flags & BLKDEV_ZERO_NOUNMAP)
|
||||||
|
bio->bi_opf |= REQ_NOUNMAP;
|
||||||
|
|
||||||
if (nr_sects > max_write_zeroes_sectors) {
|
if (nr_sects > max_write_zeroes_sectors) {
|
||||||
bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
|
bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
|
||||||
|
@ -282,14 +268,27 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
|
||||||
* @nr_sects: number of sectors to write
|
* @nr_sects: number of sectors to write
|
||||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||||
* @biop: pointer to anchor bio
|
* @biop: pointer to anchor bio
|
||||||
* @discard: discard flag
|
* @flags: controls detailed behavior
|
||||||
*
|
*
|
||||||
* Description:
|
* Description:
|
||||||
* Generate and issue number of bios with zerofiled pages.
|
* Zero-fill a block range, either using hardware offload or by explicitly
|
||||||
|
* writing zeroes to the device.
|
||||||
|
*
|
||||||
|
* Note that this function may fail with -EOPNOTSUPP if the driver signals
|
||||||
|
* zeroing offload support, but the device fails to process the command (for
|
||||||
|
* some devices there is no non-destructive way to verify whether this
|
||||||
|
* operation is actually supported). In this case the caller should call
|
||||||
|
* retry the call to blkdev_issue_zeroout() and the fallback path will be used.
|
||||||
|
*
|
||||||
|
* If a device is using logical block provisioning, the underlying space will
|
||||||
|
* not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
|
||||||
|
*
|
||||||
|
* If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
|
||||||
|
* -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
|
||||||
*/
|
*/
|
||||||
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
|
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
|
||||||
bool discard)
|
unsigned flags)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
int bi_size = 0;
|
int bi_size = 0;
|
||||||
|
@ -302,8 +301,8 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
|
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
|
||||||
biop);
|
biop, flags);
|
||||||
if (ret == 0 || (ret && ret != -EOPNOTSUPP))
|
if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
@ -337,40 +336,23 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
|
||||||
* @sector: start sector
|
* @sector: start sector
|
||||||
* @nr_sects: number of sectors to write
|
* @nr_sects: number of sectors to write
|
||||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||||
* @discard: whether to discard the block range
|
* @flags: controls detailed behavior
|
||||||
*
|
*
|
||||||
* Description:
|
* Description:
|
||||||
* Zero-fill a block range. If the discard flag is set and the block
|
* Zero-fill a block range, either using hardware offload or by explicitly
|
||||||
* device guarantees that subsequent READ operations to the block range
|
* writing zeroes to the device. See __blkdev_issue_zeroout() for the
|
||||||
* in question will return zeroes, the blocks will be discarded. Should
|
* valid values for %flags.
|
||||||
* the discard request fail, if the discard flag is not set, or if
|
|
||||||
* discard_zeroes_data is not supported, this function will resort to
|
|
||||||
* zeroing the blocks manually, thus provisioning (allocating,
|
|
||||||
* anchoring) them. If the block device supports WRITE ZEROES or WRITE SAME
|
|
||||||
* command(s), blkdev_issue_zeroout() will use it to optimize the process of
|
|
||||||
* clearing the block range. Otherwise the zeroing will be performed
|
|
||||||
* using regular WRITE calls.
|
|
||||||
*/
|
*/
|
||||||
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||||
sector_t nr_sects, gfp_t gfp_mask, bool discard)
|
sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct bio *bio = NULL;
|
struct bio *bio = NULL;
|
||||||
struct blk_plug plug;
|
struct blk_plug plug;
|
||||||
|
|
||||||
if (discard) {
|
|
||||||
if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
|
|
||||||
BLKDEV_DISCARD_ZERO))
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
|
|
||||||
ZERO_PAGE(0)))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
blk_start_plug(&plug);
|
blk_start_plug(&plug);
|
||||||
ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
|
ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
|
||||||
&bio, discard);
|
&bio, flags);
|
||||||
if (ret == 0 && bio) {
|
if (ret == 0 && bio) {
|
||||||
ret = submit_bio_wait(bio);
|
ret = submit_bio_wait(bio);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
|
|
|
@ -54,6 +54,20 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
|
||||||
return bio_split(bio, split_sectors, GFP_NOIO, bs);
|
return bio_split(bio, split_sectors, GFP_NOIO, bs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
|
||||||
|
struct bio *bio, struct bio_set *bs, unsigned *nsegs)
|
||||||
|
{
|
||||||
|
*nsegs = 1;
|
||||||
|
|
||||||
|
if (!q->limits.max_write_zeroes_sectors)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
|
||||||
|
}
|
||||||
|
|
||||||
static struct bio *blk_bio_write_same_split(struct request_queue *q,
|
static struct bio *blk_bio_write_same_split(struct request_queue *q,
|
||||||
struct bio *bio,
|
struct bio *bio,
|
||||||
struct bio_set *bs,
|
struct bio_set *bs,
|
||||||
|
@ -200,8 +214,7 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||||
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
|
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
|
||||||
break;
|
break;
|
||||||
case REQ_OP_WRITE_ZEROES:
|
case REQ_OP_WRITE_ZEROES:
|
||||||
split = NULL;
|
split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
|
||||||
nsegs = (*bio)->bi_phys_segments;
|
|
||||||
break;
|
break;
|
||||||
case REQ_OP_WRITE_SAME:
|
case REQ_OP_WRITE_SAME:
|
||||||
split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
|
split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
|
||||||
|
|
|
@ -43,11 +43,157 @@ static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int blk_flags_show(struct seq_file *m, const unsigned long flags,
|
||||||
|
const char *const *flag_name, int flag_name_count)
|
||||||
|
{
|
||||||
|
bool sep = false;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < sizeof(flags) * BITS_PER_BYTE; i++) {
|
||||||
|
if (!(flags & BIT(i)))
|
||||||
|
continue;
|
||||||
|
if (sep)
|
||||||
|
seq_puts(m, " ");
|
||||||
|
sep = true;
|
||||||
|
if (i < flag_name_count && flag_name[i])
|
||||||
|
seq_puts(m, flag_name[i]);
|
||||||
|
else
|
||||||
|
seq_printf(m, "%d", i);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *const blk_queue_flag_name[] = {
|
||||||
|
[QUEUE_FLAG_QUEUED] = "QUEUED",
|
||||||
|
[QUEUE_FLAG_STOPPED] = "STOPPED",
|
||||||
|
[QUEUE_FLAG_SYNCFULL] = "SYNCFULL",
|
||||||
|
[QUEUE_FLAG_ASYNCFULL] = "ASYNCFULL",
|
||||||
|
[QUEUE_FLAG_DYING] = "DYING",
|
||||||
|
[QUEUE_FLAG_BYPASS] = "BYPASS",
|
||||||
|
[QUEUE_FLAG_BIDI] = "BIDI",
|
||||||
|
[QUEUE_FLAG_NOMERGES] = "NOMERGES",
|
||||||
|
[QUEUE_FLAG_SAME_COMP] = "SAME_COMP",
|
||||||
|
[QUEUE_FLAG_FAIL_IO] = "FAIL_IO",
|
||||||
|
[QUEUE_FLAG_STACKABLE] = "STACKABLE",
|
||||||
|
[QUEUE_FLAG_NONROT] = "NONROT",
|
||||||
|
[QUEUE_FLAG_IO_STAT] = "IO_STAT",
|
||||||
|
[QUEUE_FLAG_DISCARD] = "DISCARD",
|
||||||
|
[QUEUE_FLAG_NOXMERGES] = "NOXMERGES",
|
||||||
|
[QUEUE_FLAG_ADD_RANDOM] = "ADD_RANDOM",
|
||||||
|
[QUEUE_FLAG_SECERASE] = "SECERASE",
|
||||||
|
[QUEUE_FLAG_SAME_FORCE] = "SAME_FORCE",
|
||||||
|
[QUEUE_FLAG_DEAD] = "DEAD",
|
||||||
|
[QUEUE_FLAG_INIT_DONE] = "INIT_DONE",
|
||||||
|
[QUEUE_FLAG_NO_SG_MERGE] = "NO_SG_MERGE",
|
||||||
|
[QUEUE_FLAG_POLL] = "POLL",
|
||||||
|
[QUEUE_FLAG_WC] = "WC",
|
||||||
|
[QUEUE_FLAG_FUA] = "FUA",
|
||||||
|
[QUEUE_FLAG_FLUSH_NQ] = "FLUSH_NQ",
|
||||||
|
[QUEUE_FLAG_DAX] = "DAX",
|
||||||
|
[QUEUE_FLAG_STATS] = "STATS",
|
||||||
|
[QUEUE_FLAG_POLL_STATS] = "POLL_STATS",
|
||||||
|
[QUEUE_FLAG_REGISTERED] = "REGISTERED",
|
||||||
|
};
|
||||||
|
|
||||||
|
static int blk_queue_flags_show(struct seq_file *m, void *v)
|
||||||
|
{
|
||||||
|
struct request_queue *q = m->private;
|
||||||
|
|
||||||
|
blk_flags_show(m, q->queue_flags, blk_queue_flag_name,
|
||||||
|
ARRAY_SIZE(blk_queue_flag_name));
|
||||||
|
seq_puts(m, "\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t blk_queue_flags_store(struct file *file, const char __user *ubuf,
|
||||||
|
size_t len, loff_t *offp)
|
||||||
|
{
|
||||||
|
struct request_queue *q = file_inode(file)->i_private;
|
||||||
|
char op[16] = { }, *s;
|
||||||
|
|
||||||
|
len = min(len, sizeof(op) - 1);
|
||||||
|
if (copy_from_user(op, ubuf, len))
|
||||||
|
return -EFAULT;
|
||||||
|
s = op;
|
||||||
|
strsep(&s, " \t\n"); /* strip trailing whitespace */
|
||||||
|
if (strcmp(op, "run") == 0) {
|
||||||
|
blk_mq_run_hw_queues(q, true);
|
||||||
|
} else if (strcmp(op, "start") == 0) {
|
||||||
|
blk_mq_start_stopped_hw_queues(q, true);
|
||||||
|
} else {
|
||||||
|
pr_err("%s: unsupported operation %s. Use either 'run' or 'start'\n",
|
||||||
|
__func__, op);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blk_queue_flags_open(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
return single_open(file, blk_queue_flags_show, inode->i_private);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations blk_queue_flags_fops = {
|
||||||
|
.open = blk_queue_flags_open,
|
||||||
|
.read = seq_read,
|
||||||
|
.llseek = seq_lseek,
|
||||||
|
.release = single_release,
|
||||||
|
.write = blk_queue_flags_store,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
|
||||||
|
{
|
||||||
|
if (stat->nr_samples) {
|
||||||
|
seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
|
||||||
|
stat->nr_samples, stat->mean, stat->min, stat->max);
|
||||||
|
} else {
|
||||||
|
seq_puts(m, "samples=0");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int queue_poll_stat_show(struct seq_file *m, void *v)
|
||||||
|
{
|
||||||
|
struct request_queue *q = m->private;
|
||||||
|
int bucket;
|
||||||
|
|
||||||
|
for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
|
||||||
|
seq_printf(m, "read (%d Bytes): ", 1 << (9+bucket));
|
||||||
|
print_stat(m, &q->poll_stat[2*bucket]);
|
||||||
|
seq_puts(m, "\n");
|
||||||
|
|
||||||
|
seq_printf(m, "write (%d Bytes): ", 1 << (9+bucket));
|
||||||
|
print_stat(m, &q->poll_stat[2*bucket+1]);
|
||||||
|
seq_puts(m, "\n");
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int queue_poll_stat_open(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
return single_open(file, queue_poll_stat_show, inode->i_private);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations queue_poll_stat_fops = {
|
||||||
|
.open = queue_poll_stat_open,
|
||||||
|
.read = seq_read,
|
||||||
|
.llseek = seq_lseek,
|
||||||
|
.release = single_release,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *const hctx_state_name[] = {
|
||||||
|
[BLK_MQ_S_STOPPED] = "STOPPED",
|
||||||
|
[BLK_MQ_S_TAG_ACTIVE] = "TAG_ACTIVE",
|
||||||
|
[BLK_MQ_S_SCHED_RESTART] = "SCHED_RESTART",
|
||||||
|
[BLK_MQ_S_TAG_WAITING] = "TAG_WAITING",
|
||||||
|
|
||||||
|
};
|
||||||
static int hctx_state_show(struct seq_file *m, void *v)
|
static int hctx_state_show(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx = m->private;
|
struct blk_mq_hw_ctx *hctx = m->private;
|
||||||
|
|
||||||
seq_printf(m, "0x%lx\n", hctx->state);
|
blk_flags_show(m, hctx->state, hctx_state_name,
|
||||||
|
ARRAY_SIZE(hctx_state_name));
|
||||||
|
seq_puts(m, "\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,11 +209,35 @@ static const struct file_operations hctx_state_fops = {
|
||||||
.release = single_release,
|
.release = single_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char *const alloc_policy_name[] = {
|
||||||
|
[BLK_TAG_ALLOC_FIFO] = "fifo",
|
||||||
|
[BLK_TAG_ALLOC_RR] = "rr",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *const hctx_flag_name[] = {
|
||||||
|
[ilog2(BLK_MQ_F_SHOULD_MERGE)] = "SHOULD_MERGE",
|
||||||
|
[ilog2(BLK_MQ_F_TAG_SHARED)] = "TAG_SHARED",
|
||||||
|
[ilog2(BLK_MQ_F_SG_MERGE)] = "SG_MERGE",
|
||||||
|
[ilog2(BLK_MQ_F_BLOCKING)] = "BLOCKING",
|
||||||
|
[ilog2(BLK_MQ_F_NO_SCHED)] = "NO_SCHED",
|
||||||
|
};
|
||||||
|
|
||||||
static int hctx_flags_show(struct seq_file *m, void *v)
|
static int hctx_flags_show(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx = m->private;
|
struct blk_mq_hw_ctx *hctx = m->private;
|
||||||
|
const int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(hctx->flags);
|
||||||
|
|
||||||
seq_printf(m, "0x%lx\n", hctx->flags);
|
seq_puts(m, "alloc_policy=");
|
||||||
|
if (alloc_policy < ARRAY_SIZE(alloc_policy_name) &&
|
||||||
|
alloc_policy_name[alloc_policy])
|
||||||
|
seq_puts(m, alloc_policy_name[alloc_policy]);
|
||||||
|
else
|
||||||
|
seq_printf(m, "%d", alloc_policy);
|
||||||
|
seq_puts(m, " ");
|
||||||
|
blk_flags_show(m,
|
||||||
|
hctx->flags ^ BLK_ALLOC_POLICY_TO_MQ_FLAG(alloc_policy),
|
||||||
|
hctx_flag_name, ARRAY_SIZE(hctx_flag_name));
|
||||||
|
seq_puts(m, "\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,13 +253,83 @@ static const struct file_operations hctx_flags_fops = {
|
||||||
.release = single_release,
|
.release = single_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char *const op_name[] = {
|
||||||
|
[REQ_OP_READ] = "READ",
|
||||||
|
[REQ_OP_WRITE] = "WRITE",
|
||||||
|
[REQ_OP_FLUSH] = "FLUSH",
|
||||||
|
[REQ_OP_DISCARD] = "DISCARD",
|
||||||
|
[REQ_OP_ZONE_REPORT] = "ZONE_REPORT",
|
||||||
|
[REQ_OP_SECURE_ERASE] = "SECURE_ERASE",
|
||||||
|
[REQ_OP_ZONE_RESET] = "ZONE_RESET",
|
||||||
|
[REQ_OP_WRITE_SAME] = "WRITE_SAME",
|
||||||
|
[REQ_OP_WRITE_ZEROES] = "WRITE_ZEROES",
|
||||||
|
[REQ_OP_SCSI_IN] = "SCSI_IN",
|
||||||
|
[REQ_OP_SCSI_OUT] = "SCSI_OUT",
|
||||||
|
[REQ_OP_DRV_IN] = "DRV_IN",
|
||||||
|
[REQ_OP_DRV_OUT] = "DRV_OUT",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *const cmd_flag_name[] = {
|
||||||
|
[__REQ_FAILFAST_DEV] = "FAILFAST_DEV",
|
||||||
|
[__REQ_FAILFAST_TRANSPORT] = "FAILFAST_TRANSPORT",
|
||||||
|
[__REQ_FAILFAST_DRIVER] = "FAILFAST_DRIVER",
|
||||||
|
[__REQ_SYNC] = "SYNC",
|
||||||
|
[__REQ_META] = "META",
|
||||||
|
[__REQ_PRIO] = "PRIO",
|
||||||
|
[__REQ_NOMERGE] = "NOMERGE",
|
||||||
|
[__REQ_IDLE] = "IDLE",
|
||||||
|
[__REQ_INTEGRITY] = "INTEGRITY",
|
||||||
|
[__REQ_FUA] = "FUA",
|
||||||
|
[__REQ_PREFLUSH] = "PREFLUSH",
|
||||||
|
[__REQ_RAHEAD] = "RAHEAD",
|
||||||
|
[__REQ_BACKGROUND] = "BACKGROUND",
|
||||||
|
[__REQ_NR_BITS] = "NR_BITS",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *const rqf_name[] = {
|
||||||
|
[ilog2((__force u32)RQF_SORTED)] = "SORTED",
|
||||||
|
[ilog2((__force u32)RQF_STARTED)] = "STARTED",
|
||||||
|
[ilog2((__force u32)RQF_QUEUED)] = "QUEUED",
|
||||||
|
[ilog2((__force u32)RQF_SOFTBARRIER)] = "SOFTBARRIER",
|
||||||
|
[ilog2((__force u32)RQF_FLUSH_SEQ)] = "FLUSH_SEQ",
|
||||||
|
[ilog2((__force u32)RQF_MIXED_MERGE)] = "MIXED_MERGE",
|
||||||
|
[ilog2((__force u32)RQF_MQ_INFLIGHT)] = "MQ_INFLIGHT",
|
||||||
|
[ilog2((__force u32)RQF_DONTPREP)] = "DONTPREP",
|
||||||
|
[ilog2((__force u32)RQF_PREEMPT)] = "PREEMPT",
|
||||||
|
[ilog2((__force u32)RQF_COPY_USER)] = "COPY_USER",
|
||||||
|
[ilog2((__force u32)RQF_FAILED)] = "FAILED",
|
||||||
|
[ilog2((__force u32)RQF_QUIET)] = "QUIET",
|
||||||
|
[ilog2((__force u32)RQF_ELVPRIV)] = "ELVPRIV",
|
||||||
|
[ilog2((__force u32)RQF_IO_STAT)] = "IO_STAT",
|
||||||
|
[ilog2((__force u32)RQF_ALLOCED)] = "ALLOCED",
|
||||||
|
[ilog2((__force u32)RQF_PM)] = "PM",
|
||||||
|
[ilog2((__force u32)RQF_HASHED)] = "HASHED",
|
||||||
|
[ilog2((__force u32)RQF_STATS)] = "STATS",
|
||||||
|
[ilog2((__force u32)RQF_SPECIAL_PAYLOAD)] = "SPECIAL_PAYLOAD",
|
||||||
|
};
|
||||||
|
|
||||||
static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
|
static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
struct request *rq = list_entry_rq(v);
|
struct request *rq = list_entry_rq(v);
|
||||||
|
const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
|
||||||
|
const unsigned int op = rq->cmd_flags & REQ_OP_MASK;
|
||||||
|
|
||||||
seq_printf(m, "%p {.cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n",
|
seq_printf(m, "%p {.op=", rq);
|
||||||
rq, rq->cmd_flags, (__force unsigned int)rq->rq_flags,
|
if (op < ARRAY_SIZE(op_name) && op_name[op])
|
||||||
rq->tag, rq->internal_tag);
|
seq_printf(m, "%s", op_name[op]);
|
||||||
|
else
|
||||||
|
seq_printf(m, "%d", op);
|
||||||
|
seq_puts(m, ", .cmd_flags=");
|
||||||
|
blk_flags_show(m, rq->cmd_flags & ~REQ_OP_MASK, cmd_flag_name,
|
||||||
|
ARRAY_SIZE(cmd_flag_name));
|
||||||
|
seq_puts(m, ", .rq_flags=");
|
||||||
|
blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
|
||||||
|
ARRAY_SIZE(rqf_name));
|
||||||
|
seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
|
||||||
|
rq->internal_tag);
|
||||||
|
if (mq_ops->show_rq)
|
||||||
|
mq_ops->show_rq(m, rq);
|
||||||
|
seq_puts(m, "}\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -322,60 +562,6 @@ static const struct file_operations hctx_io_poll_fops = {
|
||||||
.release = single_release,
|
.release = single_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
|
|
||||||
{
|
|
||||||
seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
|
|
||||||
stat->nr_samples, stat->mean, stat->min, stat->max);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int hctx_stats_show(struct seq_file *m, void *v)
|
|
||||||
{
|
|
||||||
struct blk_mq_hw_ctx *hctx = m->private;
|
|
||||||
struct blk_rq_stat stat[2];
|
|
||||||
|
|
||||||
blk_stat_init(&stat[BLK_STAT_READ]);
|
|
||||||
blk_stat_init(&stat[BLK_STAT_WRITE]);
|
|
||||||
|
|
||||||
blk_hctx_stat_get(hctx, stat);
|
|
||||||
|
|
||||||
seq_puts(m, "read: ");
|
|
||||||
print_stat(m, &stat[BLK_STAT_READ]);
|
|
||||||
seq_puts(m, "\n");
|
|
||||||
|
|
||||||
seq_puts(m, "write: ");
|
|
||||||
print_stat(m, &stat[BLK_STAT_WRITE]);
|
|
||||||
seq_puts(m, "\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int hctx_stats_open(struct inode *inode, struct file *file)
|
|
||||||
{
|
|
||||||
return single_open(file, hctx_stats_show, inode->i_private);
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t hctx_stats_write(struct file *file, const char __user *buf,
|
|
||||||
size_t count, loff_t *ppos)
|
|
||||||
{
|
|
||||||
struct seq_file *m = file->private_data;
|
|
||||||
struct blk_mq_hw_ctx *hctx = m->private;
|
|
||||||
struct blk_mq_ctx *ctx;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
hctx_for_each_ctx(hctx, ctx, i) {
|
|
||||||
blk_stat_init(&ctx->stat[BLK_STAT_READ]);
|
|
||||||
blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct file_operations hctx_stats_fops = {
|
|
||||||
.open = hctx_stats_open,
|
|
||||||
.read = seq_read,
|
|
||||||
.write = hctx_stats_write,
|
|
||||||
.llseek = seq_lseek,
|
|
||||||
.release = single_release,
|
|
||||||
};
|
|
||||||
|
|
||||||
static int hctx_dispatched_show(struct seq_file *m, void *v)
|
static int hctx_dispatched_show(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx = m->private;
|
struct blk_mq_hw_ctx *hctx = m->private;
|
||||||
|
@ -636,6 +822,12 @@ static const struct file_operations ctx_completed_fops = {
|
||||||
.release = single_release,
|
.release = single_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
|
||||||
|
{"poll_stat", 0400, &queue_poll_stat_fops},
|
||||||
|
{"state", 0600, &blk_queue_flags_fops},
|
||||||
|
{},
|
||||||
|
};
|
||||||
|
|
||||||
static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
|
static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
|
||||||
{"state", 0400, &hctx_state_fops},
|
{"state", 0400, &hctx_state_fops},
|
||||||
{"flags", 0400, &hctx_flags_fops},
|
{"flags", 0400, &hctx_flags_fops},
|
||||||
|
@ -646,7 +838,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
|
||||||
{"sched_tags", 0400, &hctx_sched_tags_fops},
|
{"sched_tags", 0400, &hctx_sched_tags_fops},
|
||||||
{"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops},
|
{"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops},
|
||||||
{"io_poll", 0600, &hctx_io_poll_fops},
|
{"io_poll", 0600, &hctx_io_poll_fops},
|
||||||
{"stats", 0600, &hctx_stats_fops},
|
|
||||||
{"dispatched", 0600, &hctx_dispatched_fops},
|
{"dispatched", 0600, &hctx_dispatched_fops},
|
||||||
{"queued", 0600, &hctx_queued_fops},
|
{"queued", 0600, &hctx_queued_fops},
|
||||||
{"run", 0600, &hctx_run_fops},
|
{"run", 0600, &hctx_run_fops},
|
||||||
|
@ -662,16 +853,17 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
|
||||||
{},
|
{},
|
||||||
};
|
};
|
||||||
|
|
||||||
int blk_mq_debugfs_register(struct request_queue *q, const char *name)
|
int blk_mq_debugfs_register(struct request_queue *q)
|
||||||
{
|
{
|
||||||
if (!blk_debugfs_root)
|
if (!blk_debugfs_root)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
|
||||||
q->debugfs_dir = debugfs_create_dir(name, blk_debugfs_root);
|
q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
|
||||||
|
blk_debugfs_root);
|
||||||
if (!q->debugfs_dir)
|
if (!q->debugfs_dir)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
if (blk_mq_debugfs_register_hctxs(q))
|
if (blk_mq_debugfs_register_mq(q))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -741,7 +933,7 @@ static int blk_mq_debugfs_register_hctx(struct request_queue *q,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int blk_mq_debugfs_register_hctxs(struct request_queue *q)
|
int blk_mq_debugfs_register_mq(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
int i;
|
int i;
|
||||||
|
@ -753,6 +945,9 @@ int blk_mq_debugfs_register_hctxs(struct request_queue *q)
|
||||||
if (!q->mq_debugfs_dir)
|
if (!q->mq_debugfs_dir)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
if (!debugfs_create_files(q->mq_debugfs_dir, q, blk_mq_debugfs_queue_attrs))
|
||||||
|
goto err;
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
queue_for_each_hw_ctx(q, hctx, i) {
|
||||||
if (blk_mq_debugfs_register_hctx(q, hctx))
|
if (blk_mq_debugfs_register_hctx(q, hctx))
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -761,11 +956,11 @@ int blk_mq_debugfs_register_hctxs(struct request_queue *q)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err:
|
err:
|
||||||
blk_mq_debugfs_unregister_hctxs(q);
|
blk_mq_debugfs_unregister_mq(q);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
|
void blk_mq_debugfs_unregister_mq(struct request_queue *q)
|
||||||
{
|
{
|
||||||
debugfs_remove_recursive(q->mq_debugfs_dir);
|
debugfs_remove_recursive(q->mq_debugfs_dir);
|
||||||
q->mq_debugfs_dir = NULL;
|
q->mq_debugfs_dir = NULL;
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
* @pdev: PCI device associated with @set.
|
* @pdev: PCI device associated with @set.
|
||||||
*
|
*
|
||||||
* This function assumes the PCI device @pdev has at least as many available
|
* This function assumes the PCI device @pdev has at least as many available
|
||||||
* interrupt vetors as @set has queues. It will then queuery the vector
|
* interrupt vectors as @set has queues. It will then query the vector
|
||||||
* corresponding to each queue for it's affinity mask and built queue mapping
|
* corresponding to each queue for it's affinity mask and built queue mapping
|
||||||
* that maps a queue to the CPUs that have irq affinity for the corresponding
|
* that maps a queue to the CPUs that have irq affinity for the corresponding
|
||||||
* vector.
|
* vector.
|
||||||
|
|
|
@ -30,43 +30,6 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
|
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
|
||||||
|
|
||||||
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
|
|
||||||
int (*init)(struct blk_mq_hw_ctx *),
|
|
||||||
void (*exit)(struct blk_mq_hw_ctx *))
|
|
||||||
{
|
|
||||||
struct blk_mq_hw_ctx *hctx;
|
|
||||||
int ret;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
|
||||||
hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
|
|
||||||
if (!hctx->sched_data) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (init) {
|
|
||||||
ret = init(hctx);
|
|
||||||
if (ret) {
|
|
||||||
/*
|
|
||||||
* We don't want to give exit() a partially
|
|
||||||
* initialized sched_data. init() must clean up
|
|
||||||
* if it fails.
|
|
||||||
*/
|
|
||||||
kfree(hctx->sched_data);
|
|
||||||
hctx->sched_data = NULL;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
error:
|
|
||||||
blk_mq_sched_free_hctx_data(q, exit);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
|
|
||||||
|
|
||||||
static void __blk_mq_sched_assign_ioc(struct request_queue *q,
|
static void __blk_mq_sched_assign_ioc(struct request_queue *q,
|
||||||
struct request *rq,
|
struct request *rq,
|
||||||
struct bio *bio,
|
struct bio *bio,
|
||||||
|
@ -119,7 +82,11 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
|
||||||
if (likely(!data->hctx))
|
if (likely(!data->hctx))
|
||||||
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
|
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
|
||||||
|
|
||||||
if (e) {
|
/*
|
||||||
|
* For a reserved tag, allocate a normal request since we might
|
||||||
|
* have driver dependencies on the value of the internal tag.
|
||||||
|
*/
|
||||||
|
if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
|
||||||
data->flags |= BLK_MQ_REQ_INTERNAL;
|
data->flags |= BLK_MQ_REQ_INTERNAL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -227,22 +194,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
|
|
||||||
struct list_head *rq_list,
|
|
||||||
struct request *(*get_rq)(struct blk_mq_hw_ctx *))
|
|
||||||
{
|
|
||||||
do {
|
|
||||||
struct request *rq;
|
|
||||||
|
|
||||||
rq = get_rq(hctx);
|
|
||||||
if (!rq)
|
|
||||||
break;
|
|
||||||
|
|
||||||
list_add_tail(&rq->queuelist, rq_list);
|
|
||||||
} while (1);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
|
|
||||||
|
|
||||||
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
|
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
|
||||||
struct request **merged_request)
|
struct request **merged_request)
|
||||||
{
|
{
|
||||||
|
@ -508,11 +459,24 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||||
unsigned int hctx_idx)
|
unsigned int hctx_idx)
|
||||||
{
|
{
|
||||||
struct elevator_queue *e = q->elevator;
|
struct elevator_queue *e = q->elevator;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (!e)
|
if (!e)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
|
ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (e->type->ops.mq.init_hctx) {
|
||||||
|
ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
|
||||||
|
if (ret) {
|
||||||
|
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||||
|
@ -523,12 +487,18 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||||
if (!e)
|
if (!e)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
|
||||||
|
e->type->ops.mq.exit_hctx(hctx, hctx_idx);
|
||||||
|
hctx->sched_data = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
|
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
|
struct elevator_queue *eq;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -553,6 +523,18 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
if (e->ops.mq.init_hctx) {
|
||||||
|
queue_for_each_hw_ctx(q, hctx, i) {
|
||||||
|
ret = e->ops.mq.init_hctx(hctx, i);
|
||||||
|
if (ret) {
|
||||||
|
eq = q->elevator;
|
||||||
|
blk_mq_exit_sched(q, eq);
|
||||||
|
kobject_put(&eq->kobj);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err:
|
err:
|
||||||
|
@ -563,6 +545,17 @@ err:
|
||||||
|
|
||||||
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
|
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
|
||||||
{
|
{
|
||||||
|
struct blk_mq_hw_ctx *hctx;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
if (e->type->ops.mq.exit_hctx) {
|
||||||
|
queue_for_each_hw_ctx(q, hctx, i) {
|
||||||
|
if (hctx->sched_data) {
|
||||||
|
e->type->ops.mq.exit_hctx(hctx, i);
|
||||||
|
hctx->sched_data = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if (e->type->ops.mq.exit_sched)
|
if (e->type->ops.mq.exit_sched)
|
||||||
e->type->ops.mq.exit_sched(e);
|
e->type->ops.mq.exit_sched(e);
|
||||||
blk_mq_sched_tags_teardown(q);
|
blk_mq_sched_tags_teardown(q);
|
||||||
|
|
|
@ -4,10 +4,6 @@
|
||||||
#include "blk-mq.h"
|
#include "blk-mq.h"
|
||||||
#include "blk-mq-tag.h"
|
#include "blk-mq-tag.h"
|
||||||
|
|
||||||
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
|
|
||||||
int (*init)(struct blk_mq_hw_ctx *),
|
|
||||||
void (*exit)(struct blk_mq_hw_ctx *));
|
|
||||||
|
|
||||||
void blk_mq_sched_free_hctx_data(struct request_queue *q,
|
void blk_mq_sched_free_hctx_data(struct request_queue *q,
|
||||||
void (*exit)(struct blk_mq_hw_ctx *));
|
void (*exit)(struct blk_mq_hw_ctx *));
|
||||||
|
|
||||||
|
@ -28,9 +24,6 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
|
||||||
struct list_head *list, bool run_queue_async);
|
struct list_head *list, bool run_queue_async);
|
||||||
|
|
||||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
|
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
|
||||||
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
|
|
||||||
struct list_head *rq_list,
|
|
||||||
struct request *(*get_rq)(struct blk_mq_hw_ctx *));
|
|
||||||
|
|
||||||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
|
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
|
||||||
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
|
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
|
||||||
|
@ -86,17 +79,12 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void blk_mq_sched_completed_request(struct request *rq)
|
||||||
blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
|
|
||||||
{
|
{
|
||||||
struct elevator_queue *e = hctx->queue->elevator;
|
struct elevator_queue *e = rq->q->elevator;
|
||||||
|
|
||||||
if (e && e->type->ops.mq.completed_request)
|
if (e && e->type->ops.mq.completed_request)
|
||||||
e->type->ops.mq.completed_request(hctx, rq);
|
e->type->ops.mq.completed_request(rq);
|
||||||
|
|
||||||
BUG_ON(rq->internal_tag == -1);
|
|
||||||
|
|
||||||
blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void blk_mq_sched_started_request(struct request *rq)
|
static inline void blk_mq_sched_started_request(struct request *rq)
|
||||||
|
|
|
@ -253,10 +253,12 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
lockdep_assert_held(&q->sysfs_lock);
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i)
|
queue_for_each_hw_ctx(q, hctx, i)
|
||||||
blk_mq_unregister_hctx(hctx);
|
blk_mq_unregister_hctx(hctx);
|
||||||
|
|
||||||
blk_mq_debugfs_unregister_hctxs(q);
|
blk_mq_debugfs_unregister_mq(q);
|
||||||
|
|
||||||
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
|
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
|
||||||
kobject_del(&q->mq_kobj);
|
kobject_del(&q->mq_kobj);
|
||||||
|
@ -267,9 +269,9 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
|
||||||
|
|
||||||
void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
|
void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
|
||||||
{
|
{
|
||||||
blk_mq_disable_hotplug();
|
mutex_lock(&q->sysfs_lock);
|
||||||
__blk_mq_unregister_dev(dev, q);
|
__blk_mq_unregister_dev(dev, q);
|
||||||
blk_mq_enable_hotplug();
|
mutex_unlock(&q->sysfs_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
|
void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
|
||||||
|
@ -302,12 +304,13 @@ void blk_mq_sysfs_init(struct request_queue *q)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int blk_mq_register_dev(struct device *dev, struct request_queue *q)
|
int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
int ret, i;
|
int ret, i;
|
||||||
|
|
||||||
blk_mq_disable_hotplug();
|
WARN_ON_ONCE(!q->kobj.parent);
|
||||||
|
lockdep_assert_held(&q->sysfs_lock);
|
||||||
|
|
||||||
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
|
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
|
@ -315,20 +318,38 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
|
||||||
|
|
||||||
kobject_uevent(&q->mq_kobj, KOBJ_ADD);
|
kobject_uevent(&q->mq_kobj, KOBJ_ADD);
|
||||||
|
|
||||||
blk_mq_debugfs_register(q, kobject_name(&dev->kobj));
|
blk_mq_debugfs_register(q);
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
queue_for_each_hw_ctx(q, hctx, i) {
|
||||||
ret = blk_mq_register_hctx(hctx);
|
ret = blk_mq_register_hctx(hctx);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
goto unreg;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret)
|
q->mq_sysfs_init_done = true;
|
||||||
__blk_mq_unregister_dev(dev, q);
|
|
||||||
else
|
|
||||||
q->mq_sysfs_init_done = true;
|
|
||||||
out:
|
out:
|
||||||
blk_mq_enable_hotplug();
|
return ret;
|
||||||
|
|
||||||
|
unreg:
|
||||||
|
while (--i >= 0)
|
||||||
|
blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
|
||||||
|
|
||||||
|
blk_mq_debugfs_unregister_mq(q);
|
||||||
|
|
||||||
|
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
|
||||||
|
kobject_del(&q->mq_kobj);
|
||||||
|
kobject_put(&dev->kobj);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blk_mq_register_dev(struct device *dev, struct request_queue *q)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
mutex_lock(&q->sysfs_lock);
|
||||||
|
ret = __blk_mq_register_dev(dev, q);
|
||||||
|
mutex_unlock(&q->sysfs_lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -339,13 +360,17 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
mutex_lock(&q->sysfs_lock);
|
||||||
if (!q->mq_sysfs_init_done)
|
if (!q->mq_sysfs_init_done)
|
||||||
return;
|
goto unlock;
|
||||||
|
|
||||||
blk_mq_debugfs_unregister_hctxs(q);
|
blk_mq_debugfs_unregister_mq(q);
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i)
|
queue_for_each_hw_ctx(q, hctx, i)
|
||||||
blk_mq_unregister_hctx(hctx);
|
blk_mq_unregister_hctx(hctx);
|
||||||
|
|
||||||
|
unlock:
|
||||||
|
mutex_unlock(&q->sysfs_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
int blk_mq_sysfs_register(struct request_queue *q)
|
int blk_mq_sysfs_register(struct request_queue *q)
|
||||||
|
@ -353,10 +378,11 @@ int blk_mq_sysfs_register(struct request_queue *q)
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
int i, ret = 0;
|
int i, ret = 0;
|
||||||
|
|
||||||
|
mutex_lock(&q->sysfs_lock);
|
||||||
if (!q->mq_sysfs_init_done)
|
if (!q->mq_sysfs_init_done)
|
||||||
return ret;
|
goto unlock;
|
||||||
|
|
||||||
blk_mq_debugfs_register_hctxs(q);
|
blk_mq_debugfs_register_mq(q);
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
queue_for_each_hw_ctx(q, hctx, i) {
|
||||||
ret = blk_mq_register_hctx(hctx);
|
ret = blk_mq_register_hctx(hctx);
|
||||||
|
@ -364,5 +390,8 @@ int blk_mq_sysfs_register(struct request_queue *q)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unlock:
|
||||||
|
mutex_unlock(&q->sysfs_lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,7 +96,10 @@ static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
|
||||||
if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
|
if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
|
||||||
!hctx_may_queue(data->hctx, bt))
|
!hctx_may_queue(data->hctx, bt))
|
||||||
return -1;
|
return -1;
|
||||||
return __sbitmap_queue_get(bt);
|
if (data->shallow_depth)
|
||||||
|
return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
|
||||||
|
else
|
||||||
|
return __sbitmap_queue_get(bt);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
||||||
|
|
569
block/blk-mq.c
569
block/blk-mq.c
|
@ -39,6 +39,26 @@
|
||||||
static DEFINE_MUTEX(all_q_mutex);
|
static DEFINE_MUTEX(all_q_mutex);
|
||||||
static LIST_HEAD(all_q_list);
|
static LIST_HEAD(all_q_list);
|
||||||
|
|
||||||
|
static void blk_mq_poll_stats_start(struct request_queue *q);
|
||||||
|
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
|
||||||
|
|
||||||
|
static int blk_mq_poll_stats_bkt(const struct request *rq)
|
||||||
|
{
|
||||||
|
int ddir, bytes, bucket;
|
||||||
|
|
||||||
|
ddir = rq_data_dir(rq);
|
||||||
|
bytes = blk_rq_bytes(rq);
|
||||||
|
|
||||||
|
bucket = ddir + 2*(ilog2(bytes) - 9);
|
||||||
|
|
||||||
|
if (bucket < 0)
|
||||||
|
return -1;
|
||||||
|
else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
|
||||||
|
return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
|
||||||
|
|
||||||
|
return bucket;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if any of the ctx's have pending work in this hardware queue
|
* Check if any of the ctx's have pending work in this hardware queue
|
||||||
*/
|
*/
|
||||||
|
@ -65,7 +85,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
|
||||||
sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw);
|
sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw);
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_freeze_queue_start(struct request_queue *q)
|
void blk_freeze_queue_start(struct request_queue *q)
|
||||||
{
|
{
|
||||||
int freeze_depth;
|
int freeze_depth;
|
||||||
|
|
||||||
|
@ -75,7 +95,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
|
||||||
blk_mq_run_hw_queues(q, false);
|
blk_mq_run_hw_queues(q, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
|
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
|
||||||
|
|
||||||
void blk_mq_freeze_queue_wait(struct request_queue *q)
|
void blk_mq_freeze_queue_wait(struct request_queue *q)
|
||||||
{
|
{
|
||||||
|
@ -105,7 +125,7 @@ void blk_freeze_queue(struct request_queue *q)
|
||||||
* no blk_unfreeze_queue(), and blk_freeze_queue() is not
|
* no blk_unfreeze_queue(), and blk_freeze_queue() is not
|
||||||
* exported to drivers as the only user for unfreeze is blk_mq.
|
* exported to drivers as the only user for unfreeze is blk_mq.
|
||||||
*/
|
*/
|
||||||
blk_mq_freeze_queue_start(q);
|
blk_freeze_queue_start(q);
|
||||||
blk_mq_freeze_queue_wait(q);
|
blk_mq_freeze_queue_wait(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,7 +230,6 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
|
||||||
#endif
|
#endif
|
||||||
rq->special = NULL;
|
rq->special = NULL;
|
||||||
/* tag was already set */
|
/* tag was already set */
|
||||||
rq->errors = 0;
|
|
||||||
rq->extra_len = 0;
|
rq->extra_len = 0;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&rq->timeout_list);
|
INIT_LIST_HEAD(&rq->timeout_list);
|
||||||
|
@ -347,7 +366,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||||
if (rq->tag != -1)
|
if (rq->tag != -1)
|
||||||
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
|
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
|
||||||
if (sched_tag != -1)
|
if (sched_tag != -1)
|
||||||
blk_mq_sched_completed_request(hctx, rq);
|
blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
|
||||||
blk_mq_sched_restart(hctx);
|
blk_mq_sched_restart(hctx);
|
||||||
blk_queue_exit(q);
|
blk_queue_exit(q);
|
||||||
}
|
}
|
||||||
|
@ -365,6 +384,7 @@ void blk_mq_finish_request(struct request *rq)
|
||||||
{
|
{
|
||||||
blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
|
blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_mq_finish_request);
|
||||||
|
|
||||||
void blk_mq_free_request(struct request *rq)
|
void blk_mq_free_request(struct request *rq)
|
||||||
{
|
{
|
||||||
|
@ -402,12 +422,19 @@ static void __blk_mq_complete_request_remote(void *data)
|
||||||
rq->q->softirq_done_fn(rq);
|
rq->q->softirq_done_fn(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blk_mq_ipi_complete_request(struct request *rq)
|
static void __blk_mq_complete_request(struct request *rq)
|
||||||
{
|
{
|
||||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||||
bool shared = false;
|
bool shared = false;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
|
if (rq->internal_tag != -1)
|
||||||
|
blk_mq_sched_completed_request(rq);
|
||||||
|
if (rq->rq_flags & RQF_STATS) {
|
||||||
|
blk_mq_poll_stats_start(rq->q);
|
||||||
|
blk_stat_add(rq);
|
||||||
|
}
|
||||||
|
|
||||||
if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
|
if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
|
||||||
rq->q->softirq_done_fn(rq);
|
rq->q->softirq_done_fn(rq);
|
||||||
return;
|
return;
|
||||||
|
@ -428,33 +455,6 @@ static void blk_mq_ipi_complete_request(struct request *rq)
|
||||||
put_cpu();
|
put_cpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blk_mq_stat_add(struct request *rq)
|
|
||||||
{
|
|
||||||
if (rq->rq_flags & RQF_STATS) {
|
|
||||||
/*
|
|
||||||
* We could rq->mq_ctx here, but there's less of a risk
|
|
||||||
* of races if we have the completion event add the stats
|
|
||||||
* to the local software queue.
|
|
||||||
*/
|
|
||||||
struct blk_mq_ctx *ctx;
|
|
||||||
|
|
||||||
ctx = __blk_mq_get_ctx(rq->q, raw_smp_processor_id());
|
|
||||||
blk_stat_add(&ctx->stat[rq_data_dir(rq)], rq);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __blk_mq_complete_request(struct request *rq)
|
|
||||||
{
|
|
||||||
struct request_queue *q = rq->q;
|
|
||||||
|
|
||||||
blk_mq_stat_add(rq);
|
|
||||||
|
|
||||||
if (!q->softirq_done_fn)
|
|
||||||
blk_mq_end_request(rq, rq->errors);
|
|
||||||
else
|
|
||||||
blk_mq_ipi_complete_request(rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blk_mq_complete_request - end I/O on a request
|
* blk_mq_complete_request - end I/O on a request
|
||||||
* @rq: the request being processed
|
* @rq: the request being processed
|
||||||
|
@ -463,16 +463,14 @@ static void __blk_mq_complete_request(struct request *rq)
|
||||||
* Ends all I/O on a request. It does not handle partial completions.
|
* Ends all I/O on a request. It does not handle partial completions.
|
||||||
* The actual completion happens out-of-order, through a IPI handler.
|
* The actual completion happens out-of-order, through a IPI handler.
|
||||||
**/
|
**/
|
||||||
void blk_mq_complete_request(struct request *rq, int error)
|
void blk_mq_complete_request(struct request *rq)
|
||||||
{
|
{
|
||||||
struct request_queue *q = rq->q;
|
struct request_queue *q = rq->q;
|
||||||
|
|
||||||
if (unlikely(blk_should_fake_timeout(q)))
|
if (unlikely(blk_should_fake_timeout(q)))
|
||||||
return;
|
return;
|
||||||
if (!blk_mark_rq_complete(rq)) {
|
if (!blk_mark_rq_complete(rq))
|
||||||
rq->errors = error;
|
|
||||||
__blk_mq_complete_request(rq);
|
__blk_mq_complete_request(rq);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_complete_request);
|
EXPORT_SYMBOL(blk_mq_complete_request);
|
||||||
|
|
||||||
|
@ -491,7 +489,7 @@ void blk_mq_start_request(struct request *rq)
|
||||||
trace_block_rq_issue(q, rq);
|
trace_block_rq_issue(q, rq);
|
||||||
|
|
||||||
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
|
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
|
||||||
blk_stat_set_issue_time(&rq->issue_stat);
|
blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq));
|
||||||
rq->rq_flags |= RQF_STATS;
|
rq->rq_flags |= RQF_STATS;
|
||||||
wbt_issue(q->rq_wb, &rq->issue_stat);
|
wbt_issue(q->rq_wb, &rq->issue_stat);
|
||||||
}
|
}
|
||||||
|
@ -526,6 +524,15 @@ void blk_mq_start_request(struct request *rq)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_start_request);
|
EXPORT_SYMBOL(blk_mq_start_request);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When we reach here because queue is busy, REQ_ATOM_COMPLETE
|
||||||
|
* flag isn't set yet, so there may be race with timeout handler,
|
||||||
|
* but given rq->deadline is just set in .queue_rq() under
|
||||||
|
* this situation, the race won't be possible in reality because
|
||||||
|
* rq->timeout should be set as big enough to cover the window
|
||||||
|
* between blk_mq_start_request() called from .queue_rq() and
|
||||||
|
* clearing REQ_ATOM_STARTED here.
|
||||||
|
*/
|
||||||
static void __blk_mq_requeue_request(struct request *rq)
|
static void __blk_mq_requeue_request(struct request *rq)
|
||||||
{
|
{
|
||||||
struct request_queue *q = rq->q;
|
struct request_queue *q = rq->q;
|
||||||
|
@ -633,8 +640,7 @@ void blk_mq_abort_requeue_list(struct request_queue *q)
|
||||||
|
|
||||||
rq = list_first_entry(&rq_list, struct request, queuelist);
|
rq = list_first_entry(&rq_list, struct request, queuelist);
|
||||||
list_del_init(&rq->queuelist);
|
list_del_init(&rq->queuelist);
|
||||||
rq->errors = -EIO;
|
blk_mq_end_request(rq, -EIO);
|
||||||
blk_mq_end_request(rq, rq->errors);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_abort_requeue_list);
|
EXPORT_SYMBOL(blk_mq_abort_requeue_list);
|
||||||
|
@ -666,7 +672,7 @@ void blk_mq_rq_timed_out(struct request *req, bool reserved)
|
||||||
* just be ignored. This can happen due to the bitflag ordering.
|
* just be ignored. This can happen due to the bitflag ordering.
|
||||||
* Timeout first checks if STARTED is set, and if it is, assumes
|
* Timeout first checks if STARTED is set, and if it is, assumes
|
||||||
* the request is active. But if we race with completion, then
|
* the request is active. But if we race with completion, then
|
||||||
* we both flags will get cleared. So check here again, and ignore
|
* both flags will get cleared. So check here again, and ignore
|
||||||
* a timeout event with a request that isn't active.
|
* a timeout event with a request that isn't active.
|
||||||
*/
|
*/
|
||||||
if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags))
|
if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags))
|
||||||
|
@ -699,6 +705,19 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
|
||||||
if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
|
if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The rq being checked may have been freed and reallocated
|
||||||
|
* out already here, we avoid this race by checking rq->deadline
|
||||||
|
* and REQ_ATOM_COMPLETE flag together:
|
||||||
|
*
|
||||||
|
* - if rq->deadline is observed as new value because of
|
||||||
|
* reusing, the rq won't be timed out because of timing.
|
||||||
|
* - if rq->deadline is observed as previous value,
|
||||||
|
* REQ_ATOM_COMPLETE flag won't be cleared in reuse path
|
||||||
|
* because we put a barrier between setting rq->deadline
|
||||||
|
* and clearing the flag in blk_mq_start_request(), so
|
||||||
|
* this rq won't be timed out too.
|
||||||
|
*/
|
||||||
if (time_after_eq(jiffies, rq->deadline)) {
|
if (time_after_eq(jiffies, rq->deadline)) {
|
||||||
if (!blk_mark_rq_complete(rq))
|
if (!blk_mark_rq_complete(rq))
|
||||||
blk_mq_rq_timed_out(rq, reserved);
|
blk_mq_rq_timed_out(rq, reserved);
|
||||||
|
@ -727,7 +746,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
|
||||||
* percpu_ref_tryget directly, because we need to be able to
|
* percpu_ref_tryget directly, because we need to be able to
|
||||||
* obtain a reference even in the short window between the queue
|
* obtain a reference even in the short window between the queue
|
||||||
* starting to freeze, by dropping the first reference in
|
* starting to freeze, by dropping the first reference in
|
||||||
* blk_mq_freeze_queue_start, and the moment the last request is
|
* blk_freeze_queue_start, and the moment the last request is
|
||||||
* consumed, marked by the instant q_usage_counter reaches
|
* consumed, marked by the instant q_usage_counter reaches
|
||||||
* zero.
|
* zero.
|
||||||
*/
|
*/
|
||||||
|
@ -845,6 +864,8 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
|
||||||
.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
|
.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
might_sleep_if(wait);
|
||||||
|
|
||||||
if (rq->tag != -1)
|
if (rq->tag != -1)
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
|
@ -964,19 +985,11 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
LIST_HEAD(driver_list);
|
|
||||||
struct list_head *dptr;
|
|
||||||
int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
|
int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
|
||||||
|
|
||||||
if (list_empty(list))
|
if (list_empty(list))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
|
||||||
* Start off with dptr being NULL, so we start the first request
|
|
||||||
* immediately, even if we have more pending.
|
|
||||||
*/
|
|
||||||
dptr = NULL;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now process all the entries, sending them to the driver.
|
* Now process all the entries, sending them to the driver.
|
||||||
*/
|
*/
|
||||||
|
@ -993,23 +1006,21 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
|
||||||
* The initial allocation attempt failed, so we need to
|
* The initial allocation attempt failed, so we need to
|
||||||
* rerun the hardware queue when a tag is freed.
|
* rerun the hardware queue when a tag is freed.
|
||||||
*/
|
*/
|
||||||
if (blk_mq_dispatch_wait_add(hctx)) {
|
if (!blk_mq_dispatch_wait_add(hctx))
|
||||||
/*
|
break;
|
||||||
* It's possible that a tag was freed in the
|
|
||||||
* window between the allocation failure and
|
/*
|
||||||
* adding the hardware queue to the wait queue.
|
* It's possible that a tag was freed in the window
|
||||||
*/
|
* between the allocation failure and adding the
|
||||||
if (!blk_mq_get_driver_tag(rq, &hctx, false))
|
* hardware queue to the wait queue.
|
||||||
break;
|
*/
|
||||||
} else {
|
if (!blk_mq_get_driver_tag(rq, &hctx, false))
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
list_del_init(&rq->queuelist);
|
list_del_init(&rq->queuelist);
|
||||||
|
|
||||||
bd.rq = rq;
|
bd.rq = rq;
|
||||||
bd.list = dptr;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flag last if we have no more requests, or if we have more
|
* Flag last if we have no more requests, or if we have more
|
||||||
|
@ -1038,20 +1049,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
|
||||||
pr_err("blk-mq: bad return on queue: %d\n", ret);
|
pr_err("blk-mq: bad return on queue: %d\n", ret);
|
||||||
case BLK_MQ_RQ_QUEUE_ERROR:
|
case BLK_MQ_RQ_QUEUE_ERROR:
|
||||||
errors++;
|
errors++;
|
||||||
rq->errors = -EIO;
|
blk_mq_end_request(rq, -EIO);
|
||||||
blk_mq_end_request(rq, rq->errors);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == BLK_MQ_RQ_QUEUE_BUSY)
|
if (ret == BLK_MQ_RQ_QUEUE_BUSY)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
|
||||||
* We've done the first request. If we have more than 1
|
|
||||||
* left in the list, set dptr to defer issue.
|
|
||||||
*/
|
|
||||||
if (!dptr && list->next != list->prev)
|
|
||||||
dptr = &driver_list;
|
|
||||||
} while (!list_empty(list));
|
} while (!list_empty(list));
|
||||||
|
|
||||||
hctx->dispatched[queued_to_index(queued)]++;
|
hctx->dispatched[queued_to_index(queued)]++;
|
||||||
|
@ -1062,8 +1065,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
|
||||||
*/
|
*/
|
||||||
if (!list_empty(list)) {
|
if (!list_empty(list)) {
|
||||||
/*
|
/*
|
||||||
* If we got a driver tag for the next request already,
|
* If an I/O scheduler has been configured and we got a driver
|
||||||
* free it again.
|
* tag for the next request already, free it again.
|
||||||
*/
|
*/
|
||||||
rq = list_first_entry(list, struct request, queuelist);
|
rq = list_first_entry(list, struct request, queuelist);
|
||||||
blk_mq_put_driver_tag(rq);
|
blk_mq_put_driver_tag(rq);
|
||||||
|
@ -1073,16 +1076,24 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
|
||||||
spin_unlock(&hctx->lock);
|
spin_unlock(&hctx->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
|
* If SCHED_RESTART was set by the caller of this function and
|
||||||
* it's possible the queue is stopped and restarted again
|
* it is no longer set that means that it was cleared by another
|
||||||
* before this. Queue restart will dispatch requests. And since
|
* thread and hence that a queue rerun is needed.
|
||||||
* requests in rq_list aren't added into hctx->dispatch yet,
|
|
||||||
* the requests in rq_list might get lost.
|
|
||||||
*
|
*
|
||||||
* blk_mq_run_hw_queue() already checks the STOPPED bit
|
* If TAG_WAITING is set that means that an I/O scheduler has
|
||||||
|
* been configured and another thread is waiting for a driver
|
||||||
|
* tag. To guarantee fairness, do not rerun this hardware queue
|
||||||
|
* but let the other thread grab the driver tag.
|
||||||
*
|
*
|
||||||
* If RESTART or TAG_WAITING is set, then let completion restart
|
* If no I/O scheduler has been configured it is possible that
|
||||||
* the queue instead of potentially looping here.
|
* the hardware queue got stopped and restarted before requests
|
||||||
|
* were pushed back onto the dispatch list. Rerun the queue to
|
||||||
|
* avoid starvation. Notes:
|
||||||
|
* - blk_mq_run_hw_queue() checks whether or not a queue has
|
||||||
|
* been stopped before rerunning a queue.
|
||||||
|
* - Some but not all block drivers stop a queue before
|
||||||
|
* returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
|
||||||
|
* and dm-rq.
|
||||||
*/
|
*/
|
||||||
if (!blk_mq_sched_needs_restart(hctx) &&
|
if (!blk_mq_sched_needs_restart(hctx) &&
|
||||||
!test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
|
!test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
|
||||||
|
@ -1104,6 +1115,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||||
blk_mq_sched_dispatch_requests(hctx);
|
blk_mq_sched_dispatch_requests(hctx);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
} else {
|
} else {
|
||||||
|
might_sleep();
|
||||||
|
|
||||||
srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
|
srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
|
||||||
blk_mq_sched_dispatch_requests(hctx);
|
blk_mq_sched_dispatch_requests(hctx);
|
||||||
srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
|
srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
|
||||||
|
@ -1153,13 +1166,9 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
|
||||||
put_cpu();
|
put_cpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (msecs == 0)
|
kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
|
||||||
kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx),
|
&hctx->run_work,
|
||||||
&hctx->run_work);
|
msecs_to_jiffies(msecs));
|
||||||
else
|
|
||||||
kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
|
|
||||||
&hctx->delayed_run_work,
|
|
||||||
msecs_to_jiffies(msecs));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
||||||
|
@ -1172,6 +1181,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||||
{
|
{
|
||||||
__blk_mq_delay_run_hw_queue(hctx, async, 0);
|
__blk_mq_delay_run_hw_queue(hctx, async, 0);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(blk_mq_run_hw_queue);
|
||||||
|
|
||||||
void blk_mq_run_hw_queues(struct request_queue *q, bool async)
|
void blk_mq_run_hw_queues(struct request_queue *q, bool async)
|
||||||
{
|
{
|
||||||
|
@ -1210,8 +1220,7 @@ EXPORT_SYMBOL(blk_mq_queue_stopped);
|
||||||
|
|
||||||
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
|
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||||
{
|
{
|
||||||
cancel_work(&hctx->run_work);
|
cancel_delayed_work_sync(&hctx->run_work);
|
||||||
cancel_delayed_work(&hctx->delay_work);
|
|
||||||
set_bit(BLK_MQ_S_STOPPED, &hctx->state);
|
set_bit(BLK_MQ_S_STOPPED, &hctx->state);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_stop_hw_queue);
|
EXPORT_SYMBOL(blk_mq_stop_hw_queue);
|
||||||
|
@ -1268,38 +1277,40 @@ static void blk_mq_run_work_fn(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
|
|
||||||
hctx = container_of(work, struct blk_mq_hw_ctx, run_work);
|
hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are stopped, don't run the queue. The exception is if
|
||||||
|
* BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
|
||||||
|
* the STOPPED bit and run it.
|
||||||
|
*/
|
||||||
|
if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
|
||||||
|
if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
|
||||||
|
return;
|
||||||
|
|
||||||
|
clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
|
||||||
|
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
|
||||||
|
}
|
||||||
|
|
||||||
__blk_mq_run_hw_queue(hctx);
|
__blk_mq_run_hw_queue(hctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blk_mq_delayed_run_work_fn(struct work_struct *work)
|
|
||||||
{
|
|
||||||
struct blk_mq_hw_ctx *hctx;
|
|
||||||
|
|
||||||
hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work);
|
|
||||||
|
|
||||||
__blk_mq_run_hw_queue(hctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void blk_mq_delay_work_fn(struct work_struct *work)
|
|
||||||
{
|
|
||||||
struct blk_mq_hw_ctx *hctx;
|
|
||||||
|
|
||||||
hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
|
|
||||||
|
|
||||||
if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
|
|
||||||
__blk_mq_run_hw_queue(hctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
||||||
{
|
{
|
||||||
if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
|
if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stop the hw queue, then modify currently delayed work.
|
||||||
|
* This should prevent us from running the queue prematurely.
|
||||||
|
* Mark the queue as auto-clearing STOPPED when it runs.
|
||||||
|
*/
|
||||||
blk_mq_stop_hw_queue(hctx);
|
blk_mq_stop_hw_queue(hctx);
|
||||||
kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
|
set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
|
||||||
&hctx->delay_work, msecs_to_jiffies(msecs));
|
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
|
||||||
|
&hctx->run_work,
|
||||||
|
msecs_to_jiffies(msecs));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_delay_queue);
|
EXPORT_SYMBOL(blk_mq_delay_queue);
|
||||||
|
|
||||||
|
@ -1408,7 +1419,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||||
|
|
||||||
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
|
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
|
||||||
{
|
{
|
||||||
init_request_from_bio(rq, bio);
|
blk_init_request_from_bio(rq, bio);
|
||||||
|
|
||||||
blk_account_io_start(rq, true);
|
blk_account_io_start(rq, true);
|
||||||
}
|
}
|
||||||
|
@ -1453,14 +1464,13 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
|
||||||
return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
|
return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
|
static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
|
||||||
bool may_sleep)
|
bool may_sleep)
|
||||||
{
|
{
|
||||||
struct request_queue *q = rq->q;
|
struct request_queue *q = rq->q;
|
||||||
struct blk_mq_queue_data bd = {
|
struct blk_mq_queue_data bd = {
|
||||||
.rq = rq,
|
.rq = rq,
|
||||||
.list = NULL,
|
.last = true,
|
||||||
.last = 1
|
|
||||||
};
|
};
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
blk_qc_t new_cookie;
|
blk_qc_t new_cookie;
|
||||||
|
@ -1485,31 +1495,42 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
__blk_mq_requeue_request(rq);
|
|
||||||
|
|
||||||
if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
|
if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
|
||||||
*cookie = BLK_QC_T_NONE;
|
*cookie = BLK_QC_T_NONE;
|
||||||
rq->errors = -EIO;
|
blk_mq_end_request(rq, -EIO);
|
||||||
blk_mq_end_request(rq, rq->errors);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__blk_mq_requeue_request(rq);
|
||||||
insert:
|
insert:
|
||||||
blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
|
blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||||
* Multiple hardware queue variant. This will not use per-process plugs,
|
struct request *rq, blk_qc_t *cookie)
|
||||||
* but will attempt to bypass the hctx queueing if we can go straight to
|
{
|
||||||
* hardware for SYNC IO.
|
if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
|
||||||
*/
|
rcu_read_lock();
|
||||||
|
__blk_mq_try_issue_directly(rq, cookie, false);
|
||||||
|
rcu_read_unlock();
|
||||||
|
} else {
|
||||||
|
unsigned int srcu_idx;
|
||||||
|
|
||||||
|
might_sleep();
|
||||||
|
|
||||||
|
srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
|
||||||
|
__blk_mq_try_issue_directly(rq, cookie, true);
|
||||||
|
srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||||
{
|
{
|
||||||
const int is_sync = op_is_sync(bio->bi_opf);
|
const int is_sync = op_is_sync(bio->bi_opf);
|
||||||
const int is_flush_fua = op_is_flush(bio->bi_opf);
|
const int is_flush_fua = op_is_flush(bio->bi_opf);
|
||||||
struct blk_mq_alloc_data data = { .flags = 0 };
|
struct blk_mq_alloc_data data = { .flags = 0 };
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
unsigned int request_count = 0, srcu_idx;
|
unsigned int request_count = 0;
|
||||||
struct blk_plug *plug;
|
struct blk_plug *plug;
|
||||||
struct request *same_queue_rq = NULL;
|
struct request *same_queue_rq = NULL;
|
||||||
blk_qc_t cookie;
|
blk_qc_t cookie;
|
||||||
|
@ -1545,147 +1566,21 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||||
|
|
||||||
cookie = request_to_qc_t(data.hctx, rq);
|
cookie = request_to_qc_t(data.hctx, rq);
|
||||||
|
|
||||||
if (unlikely(is_flush_fua)) {
|
|
||||||
if (q->elevator)
|
|
||||||
goto elv_insert;
|
|
||||||
blk_mq_bio_to_request(rq, bio);
|
|
||||||
blk_insert_flush(rq);
|
|
||||||
goto run_queue;
|
|
||||||
}
|
|
||||||
|
|
||||||
plug = current->plug;
|
plug = current->plug;
|
||||||
/*
|
if (unlikely(is_flush_fua)) {
|
||||||
* If the driver supports defer issued based on 'last', then
|
|
||||||
* queue it up like normal since we can potentially save some
|
|
||||||
* CPU this way.
|
|
||||||
*/
|
|
||||||
if (((plug && !blk_queue_nomerges(q)) || is_sync) &&
|
|
||||||
!(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
|
|
||||||
struct request *old_rq = NULL;
|
|
||||||
|
|
||||||
blk_mq_bio_to_request(rq, bio);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We do limited plugging. If the bio can be merged, do that.
|
|
||||||
* Otherwise the existing request in the plug list will be
|
|
||||||
* issued. So the plug list will have one request at most
|
|
||||||
*/
|
|
||||||
if (plug) {
|
|
||||||
/*
|
|
||||||
* The plug list might get flushed before this. If that
|
|
||||||
* happens, same_queue_rq is invalid and plug list is
|
|
||||||
* empty
|
|
||||||
*/
|
|
||||||
if (same_queue_rq && !list_empty(&plug->mq_list)) {
|
|
||||||
old_rq = same_queue_rq;
|
|
||||||
list_del_init(&old_rq->queuelist);
|
|
||||||
}
|
|
||||||
list_add_tail(&rq->queuelist, &plug->mq_list);
|
|
||||||
} else /* is_sync */
|
|
||||||
old_rq = rq;
|
|
||||||
blk_mq_put_ctx(data.ctx);
|
blk_mq_put_ctx(data.ctx);
|
||||||
if (!old_rq)
|
blk_mq_bio_to_request(rq, bio);
|
||||||
goto done;
|
if (q->elevator) {
|
||||||
|
blk_mq_sched_insert_request(rq, false, true, true,
|
||||||
if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
|
true);
|
||||||
rcu_read_lock();
|
|
||||||
blk_mq_try_issue_directly(old_rq, &cookie, false);
|
|
||||||
rcu_read_unlock();
|
|
||||||
} else {
|
} else {
|
||||||
srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
|
blk_insert_flush(rq);
|
||||||
blk_mq_try_issue_directly(old_rq, &cookie, true);
|
blk_mq_run_hw_queue(data.hctx, true);
|
||||||
srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
|
|
||||||
}
|
}
|
||||||
goto done;
|
} else if (plug && q->nr_hw_queues == 1) {
|
||||||
}
|
|
||||||
|
|
||||||
if (q->elevator) {
|
|
||||||
elv_insert:
|
|
||||||
blk_mq_put_ctx(data.ctx);
|
|
||||||
blk_mq_bio_to_request(rq, bio);
|
|
||||||
blk_mq_sched_insert_request(rq, false, true,
|
|
||||||
!is_sync || is_flush_fua, true);
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
|
|
||||||
/*
|
|
||||||
* For a SYNC request, send it to the hardware immediately. For
|
|
||||||
* an ASYNC request, just ensure that we run it later on. The
|
|
||||||
* latter allows for merging opportunities and more efficient
|
|
||||||
* dispatching.
|
|
||||||
*/
|
|
||||||
run_queue:
|
|
||||||
blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
|
|
||||||
}
|
|
||||||
blk_mq_put_ctx(data.ctx);
|
|
||||||
done:
|
|
||||||
return cookie;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Single hardware queue variant. This will attempt to use any per-process
|
|
||||||
* plug for merging and IO deferral.
|
|
||||||
*/
|
|
||||||
static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
|
||||||
{
|
|
||||||
const int is_sync = op_is_sync(bio->bi_opf);
|
|
||||||
const int is_flush_fua = op_is_flush(bio->bi_opf);
|
|
||||||
struct blk_plug *plug;
|
|
||||||
unsigned int request_count = 0;
|
|
||||||
struct blk_mq_alloc_data data = { .flags = 0 };
|
|
||||||
struct request *rq;
|
|
||||||
blk_qc_t cookie;
|
|
||||||
unsigned int wb_acct;
|
|
||||||
|
|
||||||
blk_queue_bounce(q, &bio);
|
|
||||||
|
|
||||||
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
|
|
||||||
bio_io_error(bio);
|
|
||||||
return BLK_QC_T_NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_queue_split(q, &bio, q->bio_split);
|
|
||||||
|
|
||||||
if (!is_flush_fua && !blk_queue_nomerges(q)) {
|
|
||||||
if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
|
|
||||||
return BLK_QC_T_NONE;
|
|
||||||
} else
|
|
||||||
request_count = blk_plug_queued_count(q);
|
|
||||||
|
|
||||||
if (blk_mq_sched_bio_merge(q, bio))
|
|
||||||
return BLK_QC_T_NONE;
|
|
||||||
|
|
||||||
wb_acct = wbt_wait(q->rq_wb, bio, NULL);
|
|
||||||
|
|
||||||
trace_block_getrq(q, bio, bio->bi_opf);
|
|
||||||
|
|
||||||
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
|
|
||||||
if (unlikely(!rq)) {
|
|
||||||
__wbt_done(q->rq_wb, wb_acct);
|
|
||||||
return BLK_QC_T_NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
wbt_track(&rq->issue_stat, wb_acct);
|
|
||||||
|
|
||||||
cookie = request_to_qc_t(data.hctx, rq);
|
|
||||||
|
|
||||||
if (unlikely(is_flush_fua)) {
|
|
||||||
if (q->elevator)
|
|
||||||
goto elv_insert;
|
|
||||||
blk_mq_bio_to_request(rq, bio);
|
|
||||||
blk_insert_flush(rq);
|
|
||||||
goto run_queue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* A task plug currently exists. Since this is completely lockless,
|
|
||||||
* utilize that to temporarily store requests until the task is
|
|
||||||
* either done or scheduled away.
|
|
||||||
*/
|
|
||||||
plug = current->plug;
|
|
||||||
if (plug) {
|
|
||||||
struct request *last = NULL;
|
struct request *last = NULL;
|
||||||
|
|
||||||
|
blk_mq_put_ctx(data.ctx);
|
||||||
blk_mq_bio_to_request(rq, bio);
|
blk_mq_bio_to_request(rq, bio);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1694,13 +1589,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
||||||
*/
|
*/
|
||||||
if (list_empty(&plug->mq_list))
|
if (list_empty(&plug->mq_list))
|
||||||
request_count = 0;
|
request_count = 0;
|
||||||
|
else if (blk_queue_nomerges(q))
|
||||||
|
request_count = blk_plug_queued_count(q);
|
||||||
|
|
||||||
if (!request_count)
|
if (!request_count)
|
||||||
trace_block_plug(q);
|
trace_block_plug(q);
|
||||||
else
|
else
|
||||||
last = list_entry_rq(plug->mq_list.prev);
|
last = list_entry_rq(plug->mq_list.prev);
|
||||||
|
|
||||||
blk_mq_put_ctx(data.ctx);
|
|
||||||
|
|
||||||
if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
|
if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
|
||||||
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
|
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
|
||||||
blk_flush_plug_list(plug, false);
|
blk_flush_plug_list(plug, false);
|
||||||
|
@ -1708,30 +1604,41 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
||||||
}
|
}
|
||||||
|
|
||||||
list_add_tail(&rq->queuelist, &plug->mq_list);
|
list_add_tail(&rq->queuelist, &plug->mq_list);
|
||||||
return cookie;
|
} else if (plug && !blk_queue_nomerges(q)) {
|
||||||
}
|
blk_mq_bio_to_request(rq, bio);
|
||||||
|
|
||||||
if (q->elevator) {
|
/*
|
||||||
elv_insert:
|
* We do limited plugging. If the bio can be merged, do that.
|
||||||
|
* Otherwise the existing request in the plug list will be
|
||||||
|
* issued. So the plug list will have one request at most
|
||||||
|
* The plug list might get flushed before this. If that happens,
|
||||||
|
* the plug list is empty, and same_queue_rq is invalid.
|
||||||
|
*/
|
||||||
|
if (list_empty(&plug->mq_list))
|
||||||
|
same_queue_rq = NULL;
|
||||||
|
if (same_queue_rq)
|
||||||
|
list_del_init(&same_queue_rq->queuelist);
|
||||||
|
list_add_tail(&rq->queuelist, &plug->mq_list);
|
||||||
|
|
||||||
|
blk_mq_put_ctx(data.ctx);
|
||||||
|
|
||||||
|
if (same_queue_rq)
|
||||||
|
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
|
||||||
|
&cookie);
|
||||||
|
} else if (q->nr_hw_queues > 1 && is_sync) {
|
||||||
blk_mq_put_ctx(data.ctx);
|
blk_mq_put_ctx(data.ctx);
|
||||||
blk_mq_bio_to_request(rq, bio);
|
blk_mq_bio_to_request(rq, bio);
|
||||||
blk_mq_sched_insert_request(rq, false, true,
|
blk_mq_try_issue_directly(data.hctx, rq, &cookie);
|
||||||
!is_sync || is_flush_fua, true);
|
} else if (q->elevator) {
|
||||||
goto done;
|
blk_mq_put_ctx(data.ctx);
|
||||||
}
|
blk_mq_bio_to_request(rq, bio);
|
||||||
if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
|
blk_mq_sched_insert_request(rq, false, true, true, true);
|
||||||
/*
|
} else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
|
||||||
* For a SYNC request, send it to the hardware immediately. For
|
blk_mq_put_ctx(data.ctx);
|
||||||
* an ASYNC request, just ensure that we run it later on. The
|
blk_mq_run_hw_queue(data.hctx, true);
|
||||||
* latter allows for merging opportunities and more efficient
|
} else
|
||||||
* dispatching.
|
blk_mq_put_ctx(data.ctx);
|
||||||
*/
|
|
||||||
run_queue:
|
|
||||||
blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_mq_put_ctx(data.ctx);
|
|
||||||
done:
|
|
||||||
return cookie;
|
return cookie;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1988,9 +1895,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
||||||
if (node == NUMA_NO_NODE)
|
if (node == NUMA_NO_NODE)
|
||||||
node = hctx->numa_node = set->numa_node;
|
node = hctx->numa_node = set->numa_node;
|
||||||
|
|
||||||
INIT_WORK(&hctx->run_work, blk_mq_run_work_fn);
|
INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
|
||||||
INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn);
|
|
||||||
INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
|
|
||||||
spin_lock_init(&hctx->lock);
|
spin_lock_init(&hctx->lock);
|
||||||
INIT_LIST_HEAD(&hctx->dispatch);
|
INIT_LIST_HEAD(&hctx->dispatch);
|
||||||
hctx->queue = q;
|
hctx->queue = q;
|
||||||
|
@ -2067,8 +1972,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
|
||||||
spin_lock_init(&__ctx->lock);
|
spin_lock_init(&__ctx->lock);
|
||||||
INIT_LIST_HEAD(&__ctx->rq_list);
|
INIT_LIST_HEAD(&__ctx->rq_list);
|
||||||
__ctx->queue = q;
|
__ctx->queue = q;
|
||||||
blk_stat_init(&__ctx->stat[BLK_STAT_READ]);
|
|
||||||
blk_stat_init(&__ctx->stat[BLK_STAT_WRITE]);
|
|
||||||
|
|
||||||
/* If the cpu isn't online, the cpu is mapped to first hctx */
|
/* If the cpu isn't online, the cpu is mapped to first hctx */
|
||||||
if (!cpu_online(i))
|
if (!cpu_online(i))
|
||||||
|
@ -2215,6 +2118,8 @@ static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
|
||||||
{
|
{
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
|
|
||||||
|
lockdep_assert_held(&set->tag_list_lock);
|
||||||
|
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||||
blk_mq_freeze_queue(q);
|
blk_mq_freeze_queue(q);
|
||||||
queue_set_hctx_shared(q, shared);
|
queue_set_hctx_shared(q, shared);
|
||||||
|
@ -2227,7 +2132,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
|
||||||
struct blk_mq_tag_set *set = q->tag_set;
|
struct blk_mq_tag_set *set = q->tag_set;
|
||||||
|
|
||||||
mutex_lock(&set->tag_list_lock);
|
mutex_lock(&set->tag_list_lock);
|
||||||
list_del_init(&q->tag_set_list);
|
list_del_rcu(&q->tag_set_list);
|
||||||
|
INIT_LIST_HEAD(&q->tag_set_list);
|
||||||
if (list_is_singular(&set->tag_list)) {
|
if (list_is_singular(&set->tag_list)) {
|
||||||
/* just transitioned to unshared */
|
/* just transitioned to unshared */
|
||||||
set->flags &= ~BLK_MQ_F_TAG_SHARED;
|
set->flags &= ~BLK_MQ_F_TAG_SHARED;
|
||||||
|
@ -2235,6 +2141,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
|
||||||
blk_mq_update_tag_set_depth(set, false);
|
blk_mq_update_tag_set_depth(set, false);
|
||||||
}
|
}
|
||||||
mutex_unlock(&set->tag_list_lock);
|
mutex_unlock(&set->tag_list_lock);
|
||||||
|
|
||||||
|
synchronize_rcu();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
|
static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
|
||||||
|
@ -2252,7 +2160,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
|
||||||
}
|
}
|
||||||
if (set->flags & BLK_MQ_F_TAG_SHARED)
|
if (set->flags & BLK_MQ_F_TAG_SHARED)
|
||||||
queue_set_hctx_shared(q, true);
|
queue_set_hctx_shared(q, true);
|
||||||
list_add_tail(&q->tag_set_list, &set->tag_list);
|
list_add_tail_rcu(&q->tag_set_list, &set->tag_list);
|
||||||
|
|
||||||
mutex_unlock(&set->tag_list_lock);
|
mutex_unlock(&set->tag_list_lock);
|
||||||
}
|
}
|
||||||
|
@ -2364,6 +2272,12 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||||
/* mark the queue as mq asap */
|
/* mark the queue as mq asap */
|
||||||
q->mq_ops = set->ops;
|
q->mq_ops = set->ops;
|
||||||
|
|
||||||
|
q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
|
||||||
|
blk_mq_poll_stats_bkt,
|
||||||
|
BLK_MQ_POLL_STATS_BKTS, q);
|
||||||
|
if (!q->poll_cb)
|
||||||
|
goto err_exit;
|
||||||
|
|
||||||
q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
|
q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
|
||||||
if (!q->queue_ctx)
|
if (!q->queue_ctx)
|
||||||
goto err_exit;
|
goto err_exit;
|
||||||
|
@ -2398,10 +2312,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||||
INIT_LIST_HEAD(&q->requeue_list);
|
INIT_LIST_HEAD(&q->requeue_list);
|
||||||
spin_lock_init(&q->requeue_lock);
|
spin_lock_init(&q->requeue_lock);
|
||||||
|
|
||||||
if (q->nr_hw_queues > 1)
|
blk_queue_make_request(q, blk_mq_make_request);
|
||||||
blk_queue_make_request(q, blk_mq_make_request);
|
|
||||||
else
|
|
||||||
blk_queue_make_request(q, blk_sq_make_request);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do this after blk_queue_make_request() overrides it...
|
* Do this after blk_queue_make_request() overrides it...
|
||||||
|
@ -2456,8 +2367,6 @@ void blk_mq_free_queue(struct request_queue *q)
|
||||||
list_del_init(&q->all_q_node);
|
list_del_init(&q->all_q_node);
|
||||||
mutex_unlock(&all_q_mutex);
|
mutex_unlock(&all_q_mutex);
|
||||||
|
|
||||||
wbt_exit(q);
|
|
||||||
|
|
||||||
blk_mq_del_queue_tag_set(q);
|
blk_mq_del_queue_tag_set(q);
|
||||||
|
|
||||||
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
|
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
|
||||||
|
@ -2502,7 +2411,7 @@ static void blk_mq_queue_reinit_work(void)
|
||||||
* take place in parallel.
|
* take place in parallel.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(q, &all_q_list, all_q_node)
|
list_for_each_entry(q, &all_q_list, all_q_node)
|
||||||
blk_mq_freeze_queue_start(q);
|
blk_freeze_queue_start(q);
|
||||||
list_for_each_entry(q, &all_q_list, all_q_node)
|
list_for_each_entry(q, &all_q_list, all_q_node)
|
||||||
blk_mq_freeze_queue_wait(q);
|
blk_mq_freeze_queue_wait(q);
|
||||||
|
|
||||||
|
@ -2743,6 +2652,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
|
||||||
{
|
{
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
|
|
||||||
|
lockdep_assert_held(&set->tag_list_lock);
|
||||||
|
|
||||||
if (nr_hw_queues > nr_cpu_ids)
|
if (nr_hw_queues > nr_cpu_ids)
|
||||||
nr_hw_queues = nr_cpu_ids;
|
nr_hw_queues = nr_cpu_ids;
|
||||||
if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
|
if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
|
||||||
|
@ -2755,16 +2666,6 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
|
||||||
blk_mq_update_queue_map(set);
|
blk_mq_update_queue_map(set);
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||||
blk_mq_realloc_hw_ctxs(set, q);
|
blk_mq_realloc_hw_ctxs(set, q);
|
||||||
|
|
||||||
/*
|
|
||||||
* Manually set the make_request_fn as blk_queue_make_request
|
|
||||||
* resets a lot of the queue settings.
|
|
||||||
*/
|
|
||||||
if (q->nr_hw_queues > 1)
|
|
||||||
q->make_request_fn = blk_mq_make_request;
|
|
||||||
else
|
|
||||||
q->make_request_fn = blk_sq_make_request;
|
|
||||||
|
|
||||||
blk_mq_queue_reinit(q, cpu_online_mask);
|
blk_mq_queue_reinit(q, cpu_online_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2773,39 +2674,69 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
||||||
|
|
||||||
|
/* Enable polling stats and return whether they were already enabled. */
|
||||||
|
static bool blk_poll_stats_enable(struct request_queue *q)
|
||||||
|
{
|
||||||
|
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
|
||||||
|
test_and_set_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
|
||||||
|
return true;
|
||||||
|
blk_stat_add_callback(q, q->poll_cb);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blk_mq_poll_stats_start(struct request_queue *q)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We don't arm the callback if polling stats are not enabled or the
|
||||||
|
* callback is already active.
|
||||||
|
*/
|
||||||
|
if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
|
||||||
|
blk_stat_is_active(q->poll_cb))
|
||||||
|
return;
|
||||||
|
|
||||||
|
blk_stat_activate_msecs(q->poll_cb, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
|
||||||
|
{
|
||||||
|
struct request_queue *q = cb->data;
|
||||||
|
int bucket;
|
||||||
|
|
||||||
|
for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
|
||||||
|
if (cb->stat[bucket].nr_samples)
|
||||||
|
q->poll_stat[bucket] = cb->stat[bucket];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
|
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
|
||||||
struct blk_mq_hw_ctx *hctx,
|
struct blk_mq_hw_ctx *hctx,
|
||||||
struct request *rq)
|
struct request *rq)
|
||||||
{
|
{
|
||||||
struct blk_rq_stat stat[2];
|
|
||||||
unsigned long ret = 0;
|
unsigned long ret = 0;
|
||||||
|
int bucket;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If stats collection isn't on, don't sleep but turn it on for
|
* If stats collection isn't on, don't sleep but turn it on for
|
||||||
* future users
|
* future users
|
||||||
*/
|
*/
|
||||||
if (!blk_stat_enable(q))
|
if (!blk_poll_stats_enable(q))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
|
||||||
* We don't have to do this once per IO, should optimize this
|
|
||||||
* to just use the current window of stats until it changes
|
|
||||||
*/
|
|
||||||
memset(&stat, 0, sizeof(stat));
|
|
||||||
blk_hctx_stat_get(hctx, stat);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As an optimistic guess, use half of the mean service time
|
* As an optimistic guess, use half of the mean service time
|
||||||
* for this type of request. We can (and should) make this smarter.
|
* for this type of request. We can (and should) make this smarter.
|
||||||
* For instance, if the completion latencies are tight, we can
|
* For instance, if the completion latencies are tight, we can
|
||||||
* get closer than just half the mean. This is especially
|
* get closer than just half the mean. This is especially
|
||||||
* important on devices where the completion latencies are longer
|
* important on devices where the completion latencies are longer
|
||||||
* than ~10 usec.
|
* than ~10 usec. We do use the stats for the relevant IO size
|
||||||
|
* if available which does lead to better estimates.
|
||||||
*/
|
*/
|
||||||
if (req_op(rq) == REQ_OP_READ && stat[BLK_STAT_READ].nr_samples)
|
bucket = blk_mq_poll_stats_bkt(rq);
|
||||||
ret = (stat[BLK_STAT_READ].mean + 1) / 2;
|
if (bucket < 0)
|
||||||
else if (req_op(rq) == REQ_OP_WRITE && stat[BLK_STAT_WRITE].nr_samples)
|
return ret;
|
||||||
ret = (stat[BLK_STAT_WRITE].mean + 1) / 2;
|
|
||||||
|
if (q->poll_stat[bucket].nr_samples)
|
||||||
|
ret = (q->poll_stat[bucket].mean + 1) / 2;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,6 @@ struct blk_mq_ctx {
|
||||||
|
|
||||||
/* incremented at completion time */
|
/* incremented at completion time */
|
||||||
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
|
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
|
||||||
struct blk_rq_stat stat[2];
|
|
||||||
|
|
||||||
struct request_queue *queue;
|
struct request_queue *queue;
|
||||||
struct kobject kobj;
|
struct kobject kobj;
|
||||||
|
@ -79,6 +78,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
|
||||||
*/
|
*/
|
||||||
extern void blk_mq_sysfs_init(struct request_queue *q);
|
extern void blk_mq_sysfs_init(struct request_queue *q);
|
||||||
extern void blk_mq_sysfs_deinit(struct request_queue *q);
|
extern void blk_mq_sysfs_deinit(struct request_queue *q);
|
||||||
|
extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q);
|
||||||
extern int blk_mq_sysfs_register(struct request_queue *q);
|
extern int blk_mq_sysfs_register(struct request_queue *q);
|
||||||
extern void blk_mq_sysfs_unregister(struct request_queue *q);
|
extern void blk_mq_sysfs_unregister(struct request_queue *q);
|
||||||
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
|
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
|
||||||
|
@ -87,13 +87,12 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
|
||||||
* debugfs helpers
|
* debugfs helpers
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_BLK_DEBUG_FS
|
#ifdef CONFIG_BLK_DEBUG_FS
|
||||||
int blk_mq_debugfs_register(struct request_queue *q, const char *name);
|
int blk_mq_debugfs_register(struct request_queue *q);
|
||||||
void blk_mq_debugfs_unregister(struct request_queue *q);
|
void blk_mq_debugfs_unregister(struct request_queue *q);
|
||||||
int blk_mq_debugfs_register_hctxs(struct request_queue *q);
|
int blk_mq_debugfs_register_mq(struct request_queue *q);
|
||||||
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
|
void blk_mq_debugfs_unregister_mq(struct request_queue *q);
|
||||||
#else
|
#else
|
||||||
static inline int blk_mq_debugfs_register(struct request_queue *q,
|
static inline int blk_mq_debugfs_register(struct request_queue *q)
|
||||||
const char *name)
|
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -102,12 +101,12 @@ static inline void blk_mq_debugfs_unregister(struct request_queue *q)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
|
static inline int blk_mq_debugfs_register_mq(struct request_queue *q)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
|
static inline void blk_mq_debugfs_unregister_mq(struct request_queue *q)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -142,6 +141,7 @@ struct blk_mq_alloc_data {
|
||||||
/* input parameter */
|
/* input parameter */
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
unsigned int flags;
|
unsigned int flags;
|
||||||
|
unsigned int shallow_depth;
|
||||||
|
|
||||||
/* input & output parameter */
|
/* input & output parameter */
|
||||||
struct blk_mq_ctx *ctx;
|
struct blk_mq_ctx *ctx;
|
||||||
|
|
|
@ -103,7 +103,6 @@ void blk_set_default_limits(struct queue_limits *lim)
|
||||||
lim->discard_granularity = 0;
|
lim->discard_granularity = 0;
|
||||||
lim->discard_alignment = 0;
|
lim->discard_alignment = 0;
|
||||||
lim->discard_misaligned = 0;
|
lim->discard_misaligned = 0;
|
||||||
lim->discard_zeroes_data = 0;
|
|
||||||
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
|
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
|
||||||
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
|
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
|
||||||
lim->alignment_offset = 0;
|
lim->alignment_offset = 0;
|
||||||
|
@ -127,7 +126,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
|
||||||
blk_set_default_limits(lim);
|
blk_set_default_limits(lim);
|
||||||
|
|
||||||
/* Inherit limits from component devices */
|
/* Inherit limits from component devices */
|
||||||
lim->discard_zeroes_data = 1;
|
|
||||||
lim->max_segments = USHRT_MAX;
|
lim->max_segments = USHRT_MAX;
|
||||||
lim->max_discard_segments = 1;
|
lim->max_discard_segments = 1;
|
||||||
lim->max_hw_sectors = UINT_MAX;
|
lim->max_hw_sectors = UINT_MAX;
|
||||||
|
@ -609,7 +607,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||||
t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
|
t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
|
||||||
|
|
||||||
t->cluster &= b->cluster;
|
t->cluster &= b->cluster;
|
||||||
t->discard_zeroes_data &= b->discard_zeroes_data;
|
|
||||||
|
|
||||||
/* Physical block size a multiple of the logical block size? */
|
/* Physical block size a multiple of the logical block size? */
|
||||||
if (t->physical_block_size & (t->logical_block_size - 1)) {
|
if (t->physical_block_size & (t->logical_block_size - 1)) {
|
||||||
|
|
363
block/blk-stat.c
363
block/blk-stat.c
|
@ -4,10 +4,27 @@
|
||||||
* Copyright (C) 2016 Jens Axboe
|
* Copyright (C) 2016 Jens Axboe
|
||||||
*/
|
*/
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/rculist.h>
|
||||||
#include <linux/blk-mq.h>
|
#include <linux/blk-mq.h>
|
||||||
|
|
||||||
#include "blk-stat.h"
|
#include "blk-stat.h"
|
||||||
#include "blk-mq.h"
|
#include "blk-mq.h"
|
||||||
|
#include "blk.h"
|
||||||
|
|
||||||
|
#define BLK_RQ_STAT_BATCH 64
|
||||||
|
|
||||||
|
struct blk_queue_stats {
|
||||||
|
struct list_head callbacks;
|
||||||
|
spinlock_t lock;
|
||||||
|
bool enable_accounting;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void blk_stat_init(struct blk_rq_stat *stat)
|
||||||
|
{
|
||||||
|
stat->min = -1ULL;
|
||||||
|
stat->max = stat->nr_samples = stat->mean = 0;
|
||||||
|
stat->batch = stat->nr_batch = 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void blk_stat_flush_batch(struct blk_rq_stat *stat)
|
static void blk_stat_flush_batch(struct blk_rq_stat *stat)
|
||||||
{
|
{
|
||||||
|
@ -48,166 +65,10 @@ static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
|
||||||
dst->nr_samples += src->nr_samples;
|
dst->nr_samples += src->nr_samples;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
|
static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
stat->min = min(stat->min, value);
|
||||||
struct blk_mq_ctx *ctx;
|
stat->max = max(stat->max, value);
|
||||||
uint64_t latest = 0;
|
|
||||||
int i, j, nr;
|
|
||||||
|
|
||||||
blk_stat_init(&dst[BLK_STAT_READ]);
|
|
||||||
blk_stat_init(&dst[BLK_STAT_WRITE]);
|
|
||||||
|
|
||||||
nr = 0;
|
|
||||||
do {
|
|
||||||
uint64_t newest = 0;
|
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
|
||||||
hctx_for_each_ctx(hctx, ctx, j) {
|
|
||||||
blk_stat_flush_batch(&ctx->stat[BLK_STAT_READ]);
|
|
||||||
blk_stat_flush_batch(&ctx->stat[BLK_STAT_WRITE]);
|
|
||||||
|
|
||||||
if (!ctx->stat[BLK_STAT_READ].nr_samples &&
|
|
||||||
!ctx->stat[BLK_STAT_WRITE].nr_samples)
|
|
||||||
continue;
|
|
||||||
if (ctx->stat[BLK_STAT_READ].time > newest)
|
|
||||||
newest = ctx->stat[BLK_STAT_READ].time;
|
|
||||||
if (ctx->stat[BLK_STAT_WRITE].time > newest)
|
|
||||||
newest = ctx->stat[BLK_STAT_WRITE].time;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* No samples
|
|
||||||
*/
|
|
||||||
if (!newest)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (newest > latest)
|
|
||||||
latest = newest;
|
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
|
||||||
hctx_for_each_ctx(hctx, ctx, j) {
|
|
||||||
if (ctx->stat[BLK_STAT_READ].time == newest) {
|
|
||||||
blk_stat_sum(&dst[BLK_STAT_READ],
|
|
||||||
&ctx->stat[BLK_STAT_READ]);
|
|
||||||
nr++;
|
|
||||||
}
|
|
||||||
if (ctx->stat[BLK_STAT_WRITE].time == newest) {
|
|
||||||
blk_stat_sum(&dst[BLK_STAT_WRITE],
|
|
||||||
&ctx->stat[BLK_STAT_WRITE]);
|
|
||||||
nr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* If we race on finding an entry, just loop back again.
|
|
||||||
* Should be very rare.
|
|
||||||
*/
|
|
||||||
} while (!nr);
|
|
||||||
|
|
||||||
dst[BLK_STAT_READ].time = dst[BLK_STAT_WRITE].time = latest;
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
|
|
||||||
{
|
|
||||||
if (q->mq_ops)
|
|
||||||
blk_mq_stat_get(q, dst);
|
|
||||||
else {
|
|
||||||
blk_stat_flush_batch(&q->rq_stats[BLK_STAT_READ]);
|
|
||||||
blk_stat_flush_batch(&q->rq_stats[BLK_STAT_WRITE]);
|
|
||||||
memcpy(&dst[BLK_STAT_READ], &q->rq_stats[BLK_STAT_READ],
|
|
||||||
sizeof(struct blk_rq_stat));
|
|
||||||
memcpy(&dst[BLK_STAT_WRITE], &q->rq_stats[BLK_STAT_WRITE],
|
|
||||||
sizeof(struct blk_rq_stat));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst)
|
|
||||||
{
|
|
||||||
struct blk_mq_ctx *ctx;
|
|
||||||
unsigned int i, nr;
|
|
||||||
|
|
||||||
nr = 0;
|
|
||||||
do {
|
|
||||||
uint64_t newest = 0;
|
|
||||||
|
|
||||||
hctx_for_each_ctx(hctx, ctx, i) {
|
|
||||||
blk_stat_flush_batch(&ctx->stat[BLK_STAT_READ]);
|
|
||||||
blk_stat_flush_batch(&ctx->stat[BLK_STAT_WRITE]);
|
|
||||||
|
|
||||||
if (!ctx->stat[BLK_STAT_READ].nr_samples &&
|
|
||||||
!ctx->stat[BLK_STAT_WRITE].nr_samples)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (ctx->stat[BLK_STAT_READ].time > newest)
|
|
||||||
newest = ctx->stat[BLK_STAT_READ].time;
|
|
||||||
if (ctx->stat[BLK_STAT_WRITE].time > newest)
|
|
||||||
newest = ctx->stat[BLK_STAT_WRITE].time;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!newest)
|
|
||||||
break;
|
|
||||||
|
|
||||||
hctx_for_each_ctx(hctx, ctx, i) {
|
|
||||||
if (ctx->stat[BLK_STAT_READ].time == newest) {
|
|
||||||
blk_stat_sum(&dst[BLK_STAT_READ],
|
|
||||||
&ctx->stat[BLK_STAT_READ]);
|
|
||||||
nr++;
|
|
||||||
}
|
|
||||||
if (ctx->stat[BLK_STAT_WRITE].time == newest) {
|
|
||||||
blk_stat_sum(&dst[BLK_STAT_WRITE],
|
|
||||||
&ctx->stat[BLK_STAT_WRITE]);
|
|
||||||
nr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* If we race on finding an entry, just loop back again.
|
|
||||||
* Should be very rare, as the window is only updated
|
|
||||||
* occasionally
|
|
||||||
*/
|
|
||||||
} while (!nr);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now)
|
|
||||||
{
|
|
||||||
stat->min = -1ULL;
|
|
||||||
stat->max = stat->nr_samples = stat->mean = 0;
|
|
||||||
stat->batch = stat->nr_batch = 0;
|
|
||||||
stat->time = time_now & BLK_STAT_NSEC_MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_stat_init(struct blk_rq_stat *stat)
|
|
||||||
{
|
|
||||||
__blk_stat_init(stat, ktime_to_ns(ktime_get()));
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now)
|
|
||||||
{
|
|
||||||
return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool blk_stat_is_current(struct blk_rq_stat *stat)
|
|
||||||
{
|
|
||||||
return __blk_stat_is_current(stat, ktime_to_ns(ktime_get()));
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
|
|
||||||
{
|
|
||||||
s64 now, value;
|
|
||||||
|
|
||||||
now = __blk_stat_time(ktime_to_ns(ktime_get()));
|
|
||||||
if (now < blk_stat_time(&rq->issue_stat))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (!__blk_stat_is_current(stat, now))
|
|
||||||
__blk_stat_init(stat, now);
|
|
||||||
|
|
||||||
value = now - blk_stat_time(&rq->issue_stat);
|
|
||||||
if (value > stat->max)
|
|
||||||
stat->max = value;
|
|
||||||
if (value < stat->min)
|
|
||||||
stat->min = value;
|
|
||||||
|
|
||||||
if (stat->batch + value < stat->batch ||
|
if (stat->batch + value < stat->batch ||
|
||||||
stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
|
stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
|
||||||
|
@ -217,40 +78,172 @@ void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
|
||||||
stat->nr_batch++;
|
stat->nr_batch++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_stat_clear(struct request_queue *q)
|
void blk_stat_add(struct request *rq)
|
||||||
{
|
{
|
||||||
if (q->mq_ops) {
|
struct request_queue *q = rq->q;
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_stat_callback *cb;
|
||||||
struct blk_mq_ctx *ctx;
|
struct blk_rq_stat *stat;
|
||||||
int i, j;
|
int bucket;
|
||||||
|
s64 now, value;
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
now = __blk_stat_time(ktime_to_ns(ktime_get()));
|
||||||
hctx_for_each_ctx(hctx, ctx, j) {
|
if (now < blk_stat_time(&rq->issue_stat))
|
||||||
blk_stat_init(&ctx->stat[BLK_STAT_READ]);
|
return;
|
||||||
blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
|
|
||||||
}
|
value = now - blk_stat_time(&rq->issue_stat);
|
||||||
|
|
||||||
|
blk_throtl_stat_add(rq, value);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
|
||||||
|
if (blk_stat_is_active(cb)) {
|
||||||
|
bucket = cb->bucket_fn(rq);
|
||||||
|
if (bucket < 0)
|
||||||
|
continue;
|
||||||
|
stat = &this_cpu_ptr(cb->cpu_stat)[bucket];
|
||||||
|
__blk_stat_add(stat, value);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
blk_stat_init(&q->rq_stats[BLK_STAT_READ]);
|
|
||||||
blk_stat_init(&q->rq_stats[BLK_STAT_WRITE]);
|
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_stat_set_issue_time(struct blk_issue_stat *stat)
|
static void blk_stat_timer_fn(unsigned long data)
|
||||||
{
|
{
|
||||||
stat->time = (stat->time & BLK_STAT_MASK) |
|
struct blk_stat_callback *cb = (void *)data;
|
||||||
(ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK);
|
unsigned int bucket;
|
||||||
}
|
int cpu;
|
||||||
|
|
||||||
/*
|
for (bucket = 0; bucket < cb->buckets; bucket++)
|
||||||
* Enable stat tracking, return whether it was enabled
|
blk_stat_init(&cb->stat[bucket]);
|
||||||
*/
|
|
||||||
bool blk_stat_enable(struct request_queue *q)
|
for_each_online_cpu(cpu) {
|
||||||
{
|
struct blk_rq_stat *cpu_stat;
|
||||||
if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
|
|
||||||
set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu);
|
||||||
return false;
|
for (bucket = 0; bucket < cb->buckets; bucket++) {
|
||||||
|
blk_stat_sum(&cb->stat[bucket], &cpu_stat[bucket]);
|
||||||
|
blk_stat_init(&cpu_stat[bucket]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
cb->timer_fn(cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct blk_stat_callback *
|
||||||
|
blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
|
||||||
|
int (*bucket_fn)(const struct request *),
|
||||||
|
unsigned int buckets, void *data)
|
||||||
|
{
|
||||||
|
struct blk_stat_callback *cb;
|
||||||
|
|
||||||
|
cb = kmalloc(sizeof(*cb), GFP_KERNEL);
|
||||||
|
if (!cb)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
cb->stat = kmalloc_array(buckets, sizeof(struct blk_rq_stat),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!cb->stat) {
|
||||||
|
kfree(cb);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat),
|
||||||
|
__alignof__(struct blk_rq_stat));
|
||||||
|
if (!cb->cpu_stat) {
|
||||||
|
kfree(cb->stat);
|
||||||
|
kfree(cb);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
cb->timer_fn = timer_fn;
|
||||||
|
cb->bucket_fn = bucket_fn;
|
||||||
|
cb->data = data;
|
||||||
|
cb->buckets = buckets;
|
||||||
|
setup_timer(&cb->timer, blk_stat_timer_fn, (unsigned long)cb);
|
||||||
|
|
||||||
|
return cb;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_stat_alloc_callback);
|
||||||
|
|
||||||
|
void blk_stat_add_callback(struct request_queue *q,
|
||||||
|
struct blk_stat_callback *cb)
|
||||||
|
{
|
||||||
|
unsigned int bucket;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
struct blk_rq_stat *cpu_stat;
|
||||||
|
|
||||||
|
cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu);
|
||||||
|
for (bucket = 0; bucket < cb->buckets; bucket++)
|
||||||
|
blk_stat_init(&cpu_stat[bucket]);
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock(&q->stats->lock);
|
||||||
|
list_add_tail_rcu(&cb->list, &q->stats->callbacks);
|
||||||
|
set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
||||||
|
spin_unlock(&q->stats->lock);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_stat_add_callback);
|
||||||
|
|
||||||
|
void blk_stat_remove_callback(struct request_queue *q,
|
||||||
|
struct blk_stat_callback *cb)
|
||||||
|
{
|
||||||
|
spin_lock(&q->stats->lock);
|
||||||
|
list_del_rcu(&cb->list);
|
||||||
|
if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
|
||||||
|
clear_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
||||||
|
spin_unlock(&q->stats->lock);
|
||||||
|
|
||||||
|
del_timer_sync(&cb->timer);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_stat_remove_callback);
|
||||||
|
|
||||||
|
static void blk_stat_free_callback_rcu(struct rcu_head *head)
|
||||||
|
{
|
||||||
|
struct blk_stat_callback *cb;
|
||||||
|
|
||||||
|
cb = container_of(head, struct blk_stat_callback, rcu);
|
||||||
|
free_percpu(cb->cpu_stat);
|
||||||
|
kfree(cb->stat);
|
||||||
|
kfree(cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
void blk_stat_free_callback(struct blk_stat_callback *cb)
|
||||||
|
{
|
||||||
|
if (cb)
|
||||||
|
call_rcu(&cb->rcu, blk_stat_free_callback_rcu);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_stat_free_callback);
|
||||||
|
|
||||||
|
void blk_stat_enable_accounting(struct request_queue *q)
|
||||||
|
{
|
||||||
|
spin_lock(&q->stats->lock);
|
||||||
|
q->stats->enable_accounting = true;
|
||||||
|
set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
||||||
|
spin_unlock(&q->stats->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct blk_queue_stats *blk_alloc_queue_stats(void)
|
||||||
|
{
|
||||||
|
struct blk_queue_stats *stats;
|
||||||
|
|
||||||
|
stats = kmalloc(sizeof(*stats), GFP_KERNEL);
|
||||||
|
if (!stats)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&stats->callbacks);
|
||||||
|
spin_lock_init(&stats->lock);
|
||||||
|
stats->enable_accounting = false;
|
||||||
|
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
void blk_free_queue_stats(struct blk_queue_stats *stats)
|
||||||
|
{
|
||||||
|
if (!stats)
|
||||||
|
return;
|
||||||
|
|
||||||
|
WARN_ON(!list_empty(&stats->callbacks));
|
||||||
|
|
||||||
|
kfree(stats);
|
||||||
}
|
}
|
||||||
|
|
204
block/blk-stat.h
204
block/blk-stat.h
|
@ -1,33 +1,85 @@
|
||||||
#ifndef BLK_STAT_H
|
#ifndef BLK_STAT_H
|
||||||
#define BLK_STAT_H
|
#define BLK_STAT_H
|
||||||
|
|
||||||
/*
|
#include <linux/kernel.h>
|
||||||
* ~0.13s window as a power-of-2 (2^27 nsecs)
|
#include <linux/blkdev.h>
|
||||||
*/
|
#include <linux/ktime.h>
|
||||||
#define BLK_STAT_NSEC 134217728ULL
|
#include <linux/rcupdate.h>
|
||||||
#define BLK_STAT_NSEC_MASK ~(BLK_STAT_NSEC - 1)
|
#include <linux/timer.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Upper 3 bits can be used elsewhere
|
* from upper:
|
||||||
|
* 3 bits: reserved for other usage
|
||||||
|
* 12 bits: size
|
||||||
|
* 49 bits: time
|
||||||
*/
|
*/
|
||||||
#define BLK_STAT_RES_BITS 3
|
#define BLK_STAT_RES_BITS 3
|
||||||
#define BLK_STAT_SHIFT (64 - BLK_STAT_RES_BITS)
|
#define BLK_STAT_SIZE_BITS 12
|
||||||
#define BLK_STAT_TIME_MASK ((1ULL << BLK_STAT_SHIFT) - 1)
|
#define BLK_STAT_RES_SHIFT (64 - BLK_STAT_RES_BITS)
|
||||||
#define BLK_STAT_MASK ~BLK_STAT_TIME_MASK
|
#define BLK_STAT_SIZE_SHIFT (BLK_STAT_RES_SHIFT - BLK_STAT_SIZE_BITS)
|
||||||
|
#define BLK_STAT_TIME_MASK ((1ULL << BLK_STAT_SIZE_SHIFT) - 1)
|
||||||
|
#define BLK_STAT_SIZE_MASK \
|
||||||
|
(((1ULL << BLK_STAT_SIZE_BITS) - 1) << BLK_STAT_SIZE_SHIFT)
|
||||||
|
#define BLK_STAT_RES_MASK (~((1ULL << BLK_STAT_RES_SHIFT) - 1))
|
||||||
|
|
||||||
enum {
|
/**
|
||||||
BLK_STAT_READ = 0,
|
* struct blk_stat_callback - Block statistics callback.
|
||||||
BLK_STAT_WRITE,
|
*
|
||||||
|
* A &struct blk_stat_callback is associated with a &struct request_queue. While
|
||||||
|
* @timer is active, that queue's request completion latencies are sorted into
|
||||||
|
* buckets by @bucket_fn and added to a per-cpu buffer, @cpu_stat. When the
|
||||||
|
* timer fires, @cpu_stat is flushed to @stat and @timer_fn is invoked.
|
||||||
|
*/
|
||||||
|
struct blk_stat_callback {
|
||||||
|
/*
|
||||||
|
* @list: RCU list of callbacks for a &struct request_queue.
|
||||||
|
*/
|
||||||
|
struct list_head list;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @timer: Timer for the next callback invocation.
|
||||||
|
*/
|
||||||
|
struct timer_list timer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @cpu_stat: Per-cpu statistics buckets.
|
||||||
|
*/
|
||||||
|
struct blk_rq_stat __percpu *cpu_stat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @bucket_fn: Given a request, returns which statistics bucket it
|
||||||
|
* should be accounted under. Return -1 for no bucket for this
|
||||||
|
* request.
|
||||||
|
*/
|
||||||
|
int (*bucket_fn)(const struct request *);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @buckets: Number of statistics buckets.
|
||||||
|
*/
|
||||||
|
unsigned int buckets;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @stat: Array of statistics buckets.
|
||||||
|
*/
|
||||||
|
struct blk_rq_stat *stat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @fn: Callback function.
|
||||||
|
*/
|
||||||
|
void (*timer_fn)(struct blk_stat_callback *);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @data: Private pointer for the user.
|
||||||
|
*/
|
||||||
|
void *data;
|
||||||
|
|
||||||
|
struct rcu_head rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
void blk_stat_add(struct blk_rq_stat *, struct request *);
|
struct blk_queue_stats *blk_alloc_queue_stats(void);
|
||||||
void blk_hctx_stat_get(struct blk_mq_hw_ctx *, struct blk_rq_stat *);
|
void blk_free_queue_stats(struct blk_queue_stats *);
|
||||||
void blk_queue_stat_get(struct request_queue *, struct blk_rq_stat *);
|
|
||||||
void blk_stat_clear(struct request_queue *);
|
void blk_stat_add(struct request *);
|
||||||
void blk_stat_init(struct blk_rq_stat *);
|
|
||||||
bool blk_stat_is_current(struct blk_rq_stat *);
|
|
||||||
void blk_stat_set_issue_time(struct blk_issue_stat *);
|
|
||||||
bool blk_stat_enable(struct request_queue *);
|
|
||||||
|
|
||||||
static inline u64 __blk_stat_time(u64 time)
|
static inline u64 __blk_stat_time(u64 time)
|
||||||
{
|
{
|
||||||
|
@ -36,7 +88,117 @@ static inline u64 __blk_stat_time(u64 time)
|
||||||
|
|
||||||
static inline u64 blk_stat_time(struct blk_issue_stat *stat)
|
static inline u64 blk_stat_time(struct blk_issue_stat *stat)
|
||||||
{
|
{
|
||||||
return __blk_stat_time(stat->time);
|
return __blk_stat_time(stat->stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline sector_t blk_capped_size(sector_t size)
|
||||||
|
{
|
||||||
|
return size & ((1ULL << BLK_STAT_SIZE_BITS) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline sector_t blk_stat_size(struct blk_issue_stat *stat)
|
||||||
|
{
|
||||||
|
return (stat->stat & BLK_STAT_SIZE_MASK) >> BLK_STAT_SIZE_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void blk_stat_set_issue(struct blk_issue_stat *stat,
|
||||||
|
sector_t size)
|
||||||
|
{
|
||||||
|
stat->stat = (stat->stat & BLK_STAT_RES_MASK) |
|
||||||
|
(ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK) |
|
||||||
|
(((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* record time/size info in request but not add a callback */
|
||||||
|
void blk_stat_enable_accounting(struct request_queue *q);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blk_stat_alloc_callback() - Allocate a block statistics callback.
|
||||||
|
* @timer_fn: Timer callback function.
|
||||||
|
* @bucket_fn: Bucket callback function.
|
||||||
|
* @buckets: Number of statistics buckets.
|
||||||
|
* @data: Value for the @data field of the &struct blk_stat_callback.
|
||||||
|
*
|
||||||
|
* See &struct blk_stat_callback for details on the callback functions.
|
||||||
|
*
|
||||||
|
* Return: &struct blk_stat_callback on success or NULL on ENOMEM.
|
||||||
|
*/
|
||||||
|
struct blk_stat_callback *
|
||||||
|
blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
|
||||||
|
int (*bucket_fn)(const struct request *),
|
||||||
|
unsigned int buckets, void *data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blk_stat_add_callback() - Add a block statistics callback to be run on a
|
||||||
|
* request queue.
|
||||||
|
* @q: The request queue.
|
||||||
|
* @cb: The callback.
|
||||||
|
*
|
||||||
|
* Note that a single &struct blk_stat_callback can only be added to a single
|
||||||
|
* &struct request_queue.
|
||||||
|
*/
|
||||||
|
void blk_stat_add_callback(struct request_queue *q,
|
||||||
|
struct blk_stat_callback *cb);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blk_stat_remove_callback() - Remove a block statistics callback from a
|
||||||
|
* request queue.
|
||||||
|
* @q: The request queue.
|
||||||
|
* @cb: The callback.
|
||||||
|
*
|
||||||
|
* When this returns, the callback is not running on any CPUs and will not be
|
||||||
|
* called again unless readded.
|
||||||
|
*/
|
||||||
|
void blk_stat_remove_callback(struct request_queue *q,
|
||||||
|
struct blk_stat_callback *cb);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blk_stat_free_callback() - Free a block statistics callback.
|
||||||
|
* @cb: The callback.
|
||||||
|
*
|
||||||
|
* @cb may be NULL, in which case this does nothing. If it is not NULL, @cb must
|
||||||
|
* not be associated with a request queue. I.e., if it was previously added with
|
||||||
|
* blk_stat_add_callback(), it must also have been removed since then with
|
||||||
|
* blk_stat_remove_callback().
|
||||||
|
*/
|
||||||
|
void blk_stat_free_callback(struct blk_stat_callback *cb);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blk_stat_is_active() - Check if a block statistics callback is currently
|
||||||
|
* gathering statistics.
|
||||||
|
* @cb: The callback.
|
||||||
|
*/
|
||||||
|
static inline bool blk_stat_is_active(struct blk_stat_callback *cb)
|
||||||
|
{
|
||||||
|
return timer_pending(&cb->timer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blk_stat_activate_nsecs() - Gather block statistics during a time window in
|
||||||
|
* nanoseconds.
|
||||||
|
* @cb: The callback.
|
||||||
|
* @nsecs: Number of nanoseconds to gather statistics for.
|
||||||
|
*
|
||||||
|
* The timer callback will be called when the window expires.
|
||||||
|
*/
|
||||||
|
static inline void blk_stat_activate_nsecs(struct blk_stat_callback *cb,
|
||||||
|
u64 nsecs)
|
||||||
|
{
|
||||||
|
mod_timer(&cb->timer, jiffies + nsecs_to_jiffies(nsecs));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blk_stat_activate_msecs() - Gather block statistics during a time window in
|
||||||
|
* milliseconds.
|
||||||
|
* @cb: The callback.
|
||||||
|
* @msecs: Number of milliseconds to gather statistics for.
|
||||||
|
*
|
||||||
|
* The timer callback will be called when the window expires.
|
||||||
|
*/
|
||||||
|
static inline void blk_stat_activate_msecs(struct blk_stat_callback *cb,
|
||||||
|
unsigned int msecs)
|
||||||
|
{
|
||||||
|
mod_timer(&cb->timer, jiffies + msecs_to_jiffies(msecs));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -208,7 +208,7 @@ static ssize_t queue_discard_max_store(struct request_queue *q,
|
||||||
|
|
||||||
static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
|
static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
|
||||||
{
|
{
|
||||||
return queue_var_show(queue_discard_zeroes_data(q), page);
|
return queue_var_show(0, page);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
|
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
|
||||||
|
@ -503,26 +503,6 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
|
||||||
return queue_var_show(blk_queue_dax(q), page);
|
return queue_var_show(blk_queue_dax(q), page);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
|
|
||||||
{
|
|
||||||
return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
|
|
||||||
pre, (long long) stat->nr_samples,
|
|
||||||
(long long) stat->mean, (long long) stat->min,
|
|
||||||
(long long) stat->max);
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t queue_stats_show(struct request_queue *q, char *page)
|
|
||||||
{
|
|
||||||
struct blk_rq_stat stat[2];
|
|
||||||
ssize_t ret;
|
|
||||||
|
|
||||||
blk_queue_stat_get(q, stat);
|
|
||||||
|
|
||||||
ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
|
|
||||||
ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct queue_sysfs_entry queue_requests_entry = {
|
static struct queue_sysfs_entry queue_requests_entry = {
|
||||||
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
|
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
|
||||||
.show = queue_requests_show,
|
.show = queue_requests_show,
|
||||||
|
@ -691,17 +671,20 @@ static struct queue_sysfs_entry queue_dax_entry = {
|
||||||
.show = queue_dax_show,
|
.show = queue_dax_show,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct queue_sysfs_entry queue_stats_entry = {
|
|
||||||
.attr = {.name = "stats", .mode = S_IRUGO },
|
|
||||||
.show = queue_stats_show,
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct queue_sysfs_entry queue_wb_lat_entry = {
|
static struct queue_sysfs_entry queue_wb_lat_entry = {
|
||||||
.attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
|
.attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
|
||||||
.show = queue_wb_lat_show,
|
.show = queue_wb_lat_show,
|
||||||
.store = queue_wb_lat_store,
|
.store = queue_wb_lat_store,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||||
|
static struct queue_sysfs_entry throtl_sample_time_entry = {
|
||||||
|
.attr = {.name = "throttle_sample_time", .mode = S_IRUGO | S_IWUSR },
|
||||||
|
.show = blk_throtl_sample_time_show,
|
||||||
|
.store = blk_throtl_sample_time_store,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct attribute *default_attrs[] = {
|
static struct attribute *default_attrs[] = {
|
||||||
&queue_requests_entry.attr,
|
&queue_requests_entry.attr,
|
||||||
&queue_ra_entry.attr,
|
&queue_ra_entry.attr,
|
||||||
|
@ -733,9 +716,11 @@ static struct attribute *default_attrs[] = {
|
||||||
&queue_poll_entry.attr,
|
&queue_poll_entry.attr,
|
||||||
&queue_wc_entry.attr,
|
&queue_wc_entry.attr,
|
||||||
&queue_dax_entry.attr,
|
&queue_dax_entry.attr,
|
||||||
&queue_stats_entry.attr,
|
|
||||||
&queue_wb_lat_entry.attr,
|
&queue_wb_lat_entry.attr,
|
||||||
&queue_poll_delay_entry.attr,
|
&queue_poll_delay_entry.attr,
|
||||||
|
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||||
|
&throtl_sample_time_entry.attr,
|
||||||
|
#endif
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -810,7 +795,9 @@ static void blk_release_queue(struct kobject *kobj)
|
||||||
struct request_queue *q =
|
struct request_queue *q =
|
||||||
container_of(kobj, struct request_queue, kobj);
|
container_of(kobj, struct request_queue, kobj);
|
||||||
|
|
||||||
wbt_exit(q);
|
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
|
||||||
|
blk_stat_remove_callback(q, q->poll_cb);
|
||||||
|
blk_stat_free_callback(q->poll_cb);
|
||||||
bdi_put(q->backing_dev_info);
|
bdi_put(q->backing_dev_info);
|
||||||
blkcg_exit_queue(q);
|
blkcg_exit_queue(q);
|
||||||
|
|
||||||
|
@ -819,6 +806,8 @@ static void blk_release_queue(struct kobject *kobj)
|
||||||
elevator_exit(q, q->elevator);
|
elevator_exit(q, q->elevator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blk_free_queue_stats(q->stats);
|
||||||
|
|
||||||
blk_exit_rl(&q->root_rl);
|
blk_exit_rl(&q->root_rl);
|
||||||
|
|
||||||
if (q->queue_tags)
|
if (q->queue_tags)
|
||||||
|
@ -855,23 +844,6 @@ struct kobj_type blk_queue_ktype = {
|
||||||
.release = blk_release_queue,
|
.release = blk_release_queue,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void blk_wb_init(struct request_queue *q)
|
|
||||||
{
|
|
||||||
#ifndef CONFIG_BLK_WBT_MQ
|
|
||||||
if (q->mq_ops)
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
#ifndef CONFIG_BLK_WBT_SQ
|
|
||||||
if (q->request_fn)
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If this fails, we don't get throttling
|
|
||||||
*/
|
|
||||||
wbt_init(q);
|
|
||||||
}
|
|
||||||
|
|
||||||
int blk_register_queue(struct gendisk *disk)
|
int blk_register_queue(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -881,6 +853,11 @@ int blk_register_queue(struct gendisk *disk)
|
||||||
if (WARN_ON(!q))
|
if (WARN_ON(!q))
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
|
|
||||||
|
WARN_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags),
|
||||||
|
"%s is registering an already registered queue\n",
|
||||||
|
kobject_name(&dev->kobj));
|
||||||
|
queue_flag_set_unlocked(QUEUE_FLAG_REGISTERED, q);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SCSI probing may synchronously create and destroy a lot of
|
* SCSI probing may synchronously create and destroy a lot of
|
||||||
* request_queues for non-existent devices. Shutting down a fully
|
* request_queues for non-existent devices. Shutting down a fully
|
||||||
|
@ -900,9 +877,6 @@ int blk_register_queue(struct gendisk *disk)
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (q->mq_ops)
|
|
||||||
blk_mq_register_dev(dev, q);
|
|
||||||
|
|
||||||
/* Prevent changes through sysfs until registration is completed. */
|
/* Prevent changes through sysfs until registration is completed. */
|
||||||
mutex_lock(&q->sysfs_lock);
|
mutex_lock(&q->sysfs_lock);
|
||||||
|
|
||||||
|
@ -912,9 +886,14 @@ int blk_register_queue(struct gendisk *disk)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (q->mq_ops)
|
||||||
|
__blk_mq_register_dev(dev, q);
|
||||||
|
|
||||||
kobject_uevent(&q->kobj, KOBJ_ADD);
|
kobject_uevent(&q->kobj, KOBJ_ADD);
|
||||||
|
|
||||||
blk_wb_init(q);
|
wbt_enable_default(q);
|
||||||
|
|
||||||
|
blk_throtl_register_queue(q);
|
||||||
|
|
||||||
if (q->request_fn || (q->mq_ops && q->elevator)) {
|
if (q->request_fn || (q->mq_ops && q->elevator)) {
|
||||||
ret = elv_register_queue(q);
|
ret = elv_register_queue(q);
|
||||||
|
@ -939,6 +918,11 @@ void blk_unregister_queue(struct gendisk *disk)
|
||||||
if (WARN_ON(!q))
|
if (WARN_ON(!q))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
|
||||||
|
|
||||||
|
wbt_exit(q);
|
||||||
|
|
||||||
|
|
||||||
if (q->mq_ops)
|
if (q->mq_ops)
|
||||||
blk_mq_unregister_dev(disk_to_dev(disk), q);
|
blk_mq_unregister_dev(disk_to_dev(disk), q);
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -89,7 +89,6 @@ static void blk_rq_timed_out(struct request *req)
|
||||||
ret = q->rq_timed_out_fn(req);
|
ret = q->rq_timed_out_fn(req);
|
||||||
switch (ret) {
|
switch (ret) {
|
||||||
case BLK_EH_HANDLED:
|
case BLK_EH_HANDLED:
|
||||||
/* Can we use req->errors here? */
|
|
||||||
__blk_complete_request(req);
|
__blk_complete_request(req);
|
||||||
break;
|
break;
|
||||||
case BLK_EH_RESET_TIMER:
|
case BLK_EH_RESET_TIMER:
|
||||||
|
|
|
@ -255,8 +255,8 @@ static inline bool stat_sample_valid(struct blk_rq_stat *stat)
|
||||||
* that it's writes impacting us, and not just some sole read on
|
* that it's writes impacting us, and not just some sole read on
|
||||||
* a device that is in a lower power state.
|
* a device that is in a lower power state.
|
||||||
*/
|
*/
|
||||||
return stat[BLK_STAT_READ].nr_samples >= 1 &&
|
return (stat[READ].nr_samples >= 1 &&
|
||||||
stat[BLK_STAT_WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES;
|
stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
|
static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
|
||||||
|
@ -277,7 +277,7 @@ enum {
|
||||||
LAT_EXCEEDED,
|
LAT_EXCEEDED,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
|
static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
|
||||||
{
|
{
|
||||||
struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
|
struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
|
||||||
u64 thislat;
|
u64 thislat;
|
||||||
|
@ -293,7 +293,7 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
|
||||||
*/
|
*/
|
||||||
thislat = rwb_sync_issue_lat(rwb);
|
thislat = rwb_sync_issue_lat(rwb);
|
||||||
if (thislat > rwb->cur_win_nsec ||
|
if (thislat > rwb->cur_win_nsec ||
|
||||||
(thislat > rwb->min_lat_nsec && !stat[BLK_STAT_READ].nr_samples)) {
|
(thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) {
|
||||||
trace_wbt_lat(bdi, thislat);
|
trace_wbt_lat(bdi, thislat);
|
||||||
return LAT_EXCEEDED;
|
return LAT_EXCEEDED;
|
||||||
}
|
}
|
||||||
|
@ -308,8 +308,8 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
|
||||||
* waited or still has writes in flights, consider us doing
|
* waited or still has writes in flights, consider us doing
|
||||||
* just writes as well.
|
* just writes as well.
|
||||||
*/
|
*/
|
||||||
if ((stat[BLK_STAT_WRITE].nr_samples && blk_stat_is_current(stat)) ||
|
if (stat[WRITE].nr_samples || wb_recent_wait(rwb) ||
|
||||||
wb_recent_wait(rwb) || wbt_inflight(rwb))
|
wbt_inflight(rwb))
|
||||||
return LAT_UNKNOWN_WRITES;
|
return LAT_UNKNOWN_WRITES;
|
||||||
return LAT_UNKNOWN;
|
return LAT_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
@ -317,8 +317,8 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
|
||||||
/*
|
/*
|
||||||
* If the 'min' latency exceeds our target, step down.
|
* If the 'min' latency exceeds our target, step down.
|
||||||
*/
|
*/
|
||||||
if (stat[BLK_STAT_READ].min > rwb->min_lat_nsec) {
|
if (stat[READ].min > rwb->min_lat_nsec) {
|
||||||
trace_wbt_lat(bdi, stat[BLK_STAT_READ].min);
|
trace_wbt_lat(bdi, stat[READ].min);
|
||||||
trace_wbt_stat(bdi, stat);
|
trace_wbt_stat(bdi, stat);
|
||||||
return LAT_EXCEEDED;
|
return LAT_EXCEEDED;
|
||||||
}
|
}
|
||||||
|
@ -329,14 +329,6 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
|
||||||
return LAT_OK;
|
return LAT_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int latency_exceeded(struct rq_wb *rwb)
|
|
||||||
{
|
|
||||||
struct blk_rq_stat stat[2];
|
|
||||||
|
|
||||||
blk_queue_stat_get(rwb->queue, stat);
|
|
||||||
return __latency_exceeded(rwb, stat);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
|
static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
|
||||||
{
|
{
|
||||||
struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
|
struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
|
||||||
|
@ -355,7 +347,6 @@ static void scale_up(struct rq_wb *rwb)
|
||||||
|
|
||||||
rwb->scale_step--;
|
rwb->scale_step--;
|
||||||
rwb->unknown_cnt = 0;
|
rwb->unknown_cnt = 0;
|
||||||
blk_stat_clear(rwb->queue);
|
|
||||||
|
|
||||||
rwb->scaled_max = calc_wb_limits(rwb);
|
rwb->scaled_max = calc_wb_limits(rwb);
|
||||||
|
|
||||||
|
@ -385,15 +376,12 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle)
|
||||||
|
|
||||||
rwb->scaled_max = false;
|
rwb->scaled_max = false;
|
||||||
rwb->unknown_cnt = 0;
|
rwb->unknown_cnt = 0;
|
||||||
blk_stat_clear(rwb->queue);
|
|
||||||
calc_wb_limits(rwb);
|
calc_wb_limits(rwb);
|
||||||
rwb_trace_step(rwb, "step down");
|
rwb_trace_step(rwb, "step down");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rwb_arm_timer(struct rq_wb *rwb)
|
static void rwb_arm_timer(struct rq_wb *rwb)
|
||||||
{
|
{
|
||||||
unsigned long expires;
|
|
||||||
|
|
||||||
if (rwb->scale_step > 0) {
|
if (rwb->scale_step > 0) {
|
||||||
/*
|
/*
|
||||||
* We should speed this up, using some variant of a fast
|
* We should speed this up, using some variant of a fast
|
||||||
|
@ -411,17 +399,16 @@ static void rwb_arm_timer(struct rq_wb *rwb)
|
||||||
rwb->cur_win_nsec = rwb->win_nsec;
|
rwb->cur_win_nsec = rwb->win_nsec;
|
||||||
}
|
}
|
||||||
|
|
||||||
expires = jiffies + nsecs_to_jiffies(rwb->cur_win_nsec);
|
blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec);
|
||||||
mod_timer(&rwb->window_timer, expires);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void wb_timer_fn(unsigned long data)
|
static void wb_timer_fn(struct blk_stat_callback *cb)
|
||||||
{
|
{
|
||||||
struct rq_wb *rwb = (struct rq_wb *) data;
|
struct rq_wb *rwb = cb->data;
|
||||||
unsigned int inflight = wbt_inflight(rwb);
|
unsigned int inflight = wbt_inflight(rwb);
|
||||||
int status;
|
int status;
|
||||||
|
|
||||||
status = latency_exceeded(rwb);
|
status = latency_exceeded(rwb, cb->stat);
|
||||||
|
|
||||||
trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step,
|
trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step,
|
||||||
inflight);
|
inflight);
|
||||||
|
@ -614,7 +601,7 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
|
||||||
|
|
||||||
__wbt_wait(rwb, bio->bi_opf, lock);
|
__wbt_wait(rwb, bio->bi_opf, lock);
|
||||||
|
|
||||||
if (!timer_pending(&rwb->window_timer))
|
if (!blk_stat_is_active(rwb->cb))
|
||||||
rwb_arm_timer(rwb);
|
rwb_arm_timer(rwb);
|
||||||
|
|
||||||
if (current_is_kswapd())
|
if (current_is_kswapd())
|
||||||
|
@ -666,22 +653,37 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
|
||||||
rwb->wc = write_cache_on;
|
rwb->wc = write_cache_on;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Disable wbt, if enabled by default. Only called from CFQ, if we have
|
* Disable wbt, if enabled by default. Only called from CFQ.
|
||||||
* cgroups enabled
|
|
||||||
*/
|
*/
|
||||||
void wbt_disable_default(struct request_queue *q)
|
void wbt_disable_default(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct rq_wb *rwb = q->rq_wb;
|
struct rq_wb *rwb = q->rq_wb;
|
||||||
|
|
||||||
if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT) {
|
if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT)
|
||||||
del_timer_sync(&rwb->window_timer);
|
wbt_exit(q);
|
||||||
rwb->win_nsec = rwb->min_lat_nsec = 0;
|
|
||||||
wbt_update_limits(rwb);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(wbt_disable_default);
|
EXPORT_SYMBOL_GPL(wbt_disable_default);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enable wbt if defaults are configured that way
|
||||||
|
*/
|
||||||
|
void wbt_enable_default(struct request_queue *q)
|
||||||
|
{
|
||||||
|
/* Throttling already enabled? */
|
||||||
|
if (q->rq_wb)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Queue not registered? Maybe shutting down... */
|
||||||
|
if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if ((q->mq_ops && IS_ENABLED(CONFIG_BLK_WBT_MQ)) ||
|
||||||
|
(q->request_fn && IS_ENABLED(CONFIG_BLK_WBT_SQ)))
|
||||||
|
wbt_init(q);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(wbt_enable_default);
|
||||||
|
|
||||||
u64 wbt_default_latency_nsec(struct request_queue *q)
|
u64 wbt_default_latency_nsec(struct request_queue *q)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -694,29 +696,33 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
|
||||||
return 75000000ULL;
|
return 75000000ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int wbt_data_dir(const struct request *rq)
|
||||||
|
{
|
||||||
|
return rq_data_dir(rq);
|
||||||
|
}
|
||||||
|
|
||||||
int wbt_init(struct request_queue *q)
|
int wbt_init(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct rq_wb *rwb;
|
struct rq_wb *rwb;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/*
|
|
||||||
* For now, we depend on the stats window being larger than
|
|
||||||
* our monitoring window. Ensure that this isn't inadvertently
|
|
||||||
* violated.
|
|
||||||
*/
|
|
||||||
BUILD_BUG_ON(RWB_WINDOW_NSEC > BLK_STAT_NSEC);
|
|
||||||
BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
|
BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
|
||||||
|
|
||||||
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
|
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
|
||||||
if (!rwb)
|
if (!rwb)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
|
||||||
|
if (!rwb->cb) {
|
||||||
|
kfree(rwb);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < WBT_NUM_RWQ; i++) {
|
for (i = 0; i < WBT_NUM_RWQ; i++) {
|
||||||
atomic_set(&rwb->rq_wait[i].inflight, 0);
|
atomic_set(&rwb->rq_wait[i].inflight, 0);
|
||||||
init_waitqueue_head(&rwb->rq_wait[i].wait);
|
init_waitqueue_head(&rwb->rq_wait[i].wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
setup_timer(&rwb->window_timer, wb_timer_fn, (unsigned long) rwb);
|
|
||||||
rwb->wc = 1;
|
rwb->wc = 1;
|
||||||
rwb->queue_depth = RWB_DEF_DEPTH;
|
rwb->queue_depth = RWB_DEF_DEPTH;
|
||||||
rwb->last_comp = rwb->last_issue = jiffies;
|
rwb->last_comp = rwb->last_issue = jiffies;
|
||||||
|
@ -726,10 +732,10 @@ int wbt_init(struct request_queue *q)
|
||||||
wbt_update_limits(rwb);
|
wbt_update_limits(rwb);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Assign rwb, and turn on stats tracking for this queue
|
* Assign rwb and add the stats callback.
|
||||||
*/
|
*/
|
||||||
q->rq_wb = rwb;
|
q->rq_wb = rwb;
|
||||||
blk_stat_enable(q);
|
blk_stat_add_callback(q, rwb->cb);
|
||||||
|
|
||||||
rwb->min_lat_nsec = wbt_default_latency_nsec(q);
|
rwb->min_lat_nsec = wbt_default_latency_nsec(q);
|
||||||
|
|
||||||
|
@ -744,7 +750,8 @@ void wbt_exit(struct request_queue *q)
|
||||||
struct rq_wb *rwb = q->rq_wb;
|
struct rq_wb *rwb = q->rq_wb;
|
||||||
|
|
||||||
if (rwb) {
|
if (rwb) {
|
||||||
del_timer_sync(&rwb->window_timer);
|
blk_stat_remove_callback(q, rwb->cb);
|
||||||
|
blk_stat_free_callback(rwb->cb);
|
||||||
q->rq_wb = NULL;
|
q->rq_wb = NULL;
|
||||||
kfree(rwb);
|
kfree(rwb);
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,27 +32,27 @@ enum {
|
||||||
|
|
||||||
static inline void wbt_clear_state(struct blk_issue_stat *stat)
|
static inline void wbt_clear_state(struct blk_issue_stat *stat)
|
||||||
{
|
{
|
||||||
stat->time &= BLK_STAT_TIME_MASK;
|
stat->stat &= ~BLK_STAT_RES_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
|
static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
|
||||||
{
|
{
|
||||||
return (stat->time & BLK_STAT_MASK) >> BLK_STAT_SHIFT;
|
return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
|
static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
|
||||||
{
|
{
|
||||||
stat->time |= ((u64) wb_acct) << BLK_STAT_SHIFT;
|
stat->stat |= ((u64) wb_acct) << BLK_STAT_RES_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
|
static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
|
||||||
{
|
{
|
||||||
return (stat->time >> BLK_STAT_SHIFT) & WBT_TRACKED;
|
return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool wbt_is_read(struct blk_issue_stat *stat)
|
static inline bool wbt_is_read(struct blk_issue_stat *stat)
|
||||||
{
|
{
|
||||||
return (stat->time >> BLK_STAT_SHIFT) & WBT_READ;
|
return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct rq_wait {
|
struct rq_wait {
|
||||||
|
@ -81,7 +81,7 @@ struct rq_wb {
|
||||||
u64 win_nsec; /* default window size */
|
u64 win_nsec; /* default window size */
|
||||||
u64 cur_win_nsec; /* current window size */
|
u64 cur_win_nsec; /* current window size */
|
||||||
|
|
||||||
struct timer_list window_timer;
|
struct blk_stat_callback *cb;
|
||||||
|
|
||||||
s64 sync_issue;
|
s64 sync_issue;
|
||||||
void *sync_cookie;
|
void *sync_cookie;
|
||||||
|
@ -117,6 +117,7 @@ void wbt_update_limits(struct rq_wb *);
|
||||||
void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
|
void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
|
||||||
void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
|
void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
|
||||||
void wbt_disable_default(struct request_queue *);
|
void wbt_disable_default(struct request_queue *);
|
||||||
|
void wbt_enable_default(struct request_queue *);
|
||||||
|
|
||||||
void wbt_set_queue_depth(struct rq_wb *, unsigned int);
|
void wbt_set_queue_depth(struct rq_wb *, unsigned int);
|
||||||
void wbt_set_write_cache(struct rq_wb *, bool);
|
void wbt_set_write_cache(struct rq_wb *, bool);
|
||||||
|
@ -155,6 +156,9 @@ static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
|
||||||
static inline void wbt_disable_default(struct request_queue *q)
|
static inline void wbt_disable_default(struct request_queue *q)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
static inline void wbt_enable_default(struct request_queue *q)
|
||||||
|
{
|
||||||
|
}
|
||||||
static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
|
static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
15
block/blk.h
15
block/blk.h
|
@ -60,15 +60,12 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
|
||||||
int blk_init_rl(struct request_list *rl, struct request_queue *q,
|
int blk_init_rl(struct request_list *rl, struct request_queue *q,
|
||||||
gfp_t gfp_mask);
|
gfp_t gfp_mask);
|
||||||
void blk_exit_rl(struct request_list *rl);
|
void blk_exit_rl(struct request_list *rl);
|
||||||
void init_request_from_bio(struct request *req, struct bio *bio);
|
|
||||||
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
|
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
|
||||||
struct bio *bio);
|
struct bio *bio);
|
||||||
void blk_queue_bypass_start(struct request_queue *q);
|
void blk_queue_bypass_start(struct request_queue *q);
|
||||||
void blk_queue_bypass_end(struct request_queue *q);
|
void blk_queue_bypass_end(struct request_queue *q);
|
||||||
void blk_dequeue_request(struct request *rq);
|
void blk_dequeue_request(struct request *rq);
|
||||||
void __blk_queue_free_tags(struct request_queue *q);
|
void __blk_queue_free_tags(struct request_queue *q);
|
||||||
bool __blk_end_bidi_request(struct request *rq, int error,
|
|
||||||
unsigned int nr_bytes, unsigned int bidi_bytes);
|
|
||||||
void blk_freeze_queue(struct request_queue *q);
|
void blk_freeze_queue(struct request_queue *q);
|
||||||
|
|
||||||
static inline void blk_queue_enter_live(struct request_queue *q)
|
static inline void blk_queue_enter_live(struct request_queue *q)
|
||||||
|
@ -319,10 +316,22 @@ static inline struct io_context *create_io_context(gfp_t gfp_mask, int node)
|
||||||
extern void blk_throtl_drain(struct request_queue *q);
|
extern void blk_throtl_drain(struct request_queue *q);
|
||||||
extern int blk_throtl_init(struct request_queue *q);
|
extern int blk_throtl_init(struct request_queue *q);
|
||||||
extern void blk_throtl_exit(struct request_queue *q);
|
extern void blk_throtl_exit(struct request_queue *q);
|
||||||
|
extern void blk_throtl_register_queue(struct request_queue *q);
|
||||||
#else /* CONFIG_BLK_DEV_THROTTLING */
|
#else /* CONFIG_BLK_DEV_THROTTLING */
|
||||||
static inline void blk_throtl_drain(struct request_queue *q) { }
|
static inline void blk_throtl_drain(struct request_queue *q) { }
|
||||||
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
|
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
|
||||||
static inline void blk_throtl_exit(struct request_queue *q) { }
|
static inline void blk_throtl_exit(struct request_queue *q) { }
|
||||||
|
static inline void blk_throtl_register_queue(struct request_queue *q) { }
|
||||||
#endif /* CONFIG_BLK_DEV_THROTTLING */
|
#endif /* CONFIG_BLK_DEV_THROTTLING */
|
||||||
|
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||||
|
extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
|
||||||
|
extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
|
||||||
|
const char *page, size_t count);
|
||||||
|
extern void blk_throtl_bio_endio(struct bio *bio);
|
||||||
|
extern void blk_throtl_stat_add(struct request *rq, u64 time);
|
||||||
|
#else
|
||||||
|
static inline void blk_throtl_bio_endio(struct bio *bio) { }
|
||||||
|
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* BLK_INTERNAL_H */
|
#endif /* BLK_INTERNAL_H */
|
||||||
|
|
|
@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
|
||||||
struct bsg_job *job = container_of(kref, struct bsg_job, kref);
|
struct bsg_job *job = container_of(kref, struct bsg_job, kref);
|
||||||
struct request *rq = job->req;
|
struct request *rq = job->req;
|
||||||
|
|
||||||
blk_end_request_all(rq, rq->errors);
|
blk_end_request_all(rq, scsi_req(rq)->result);
|
||||||
|
|
||||||
put_device(job->dev); /* release reference for the request */
|
put_device(job->dev); /* release reference for the request */
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ void bsg_job_done(struct bsg_job *job, int result,
|
||||||
struct scsi_request *rq = scsi_req(req);
|
struct scsi_request *rq = scsi_req(req);
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = job->req->errors = result;
|
err = scsi_req(job->req)->result = result;
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
/* we're only returning the result field in the reply */
|
/* we're only returning the result field in the reply */
|
||||||
rq->sense_len = sizeof(u32);
|
rq->sense_len = sizeof(u32);
|
||||||
|
@ -177,7 +177,7 @@ failjob_rls_job:
|
||||||
* @q: request queue to manage
|
* @q: request queue to manage
|
||||||
*
|
*
|
||||||
* On error the create_bsg_job function should return a -Exyz error value
|
* On error the create_bsg_job function should return a -Exyz error value
|
||||||
* that will be set to the req->errors.
|
* that will be set to ->result.
|
||||||
*
|
*
|
||||||
* Drivers/subsys should pass this to the queue init function.
|
* Drivers/subsys should pass this to the queue init function.
|
||||||
*/
|
*/
|
||||||
|
@ -201,7 +201,7 @@ static void bsg_request_fn(struct request_queue *q)
|
||||||
|
|
||||||
ret = bsg_create_job(dev, req);
|
ret = bsg_create_job(dev, req);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
req->errors = ret;
|
scsi_req(req)->result = ret;
|
||||||
blk_end_request_all(req, ret);
|
blk_end_request_all(req, ret);
|
||||||
spin_lock_irq(q->queue_lock);
|
spin_lock_irq(q->queue_lock);
|
||||||
continue;
|
continue;
|
||||||
|
|
12
block/bsg.c
12
block/bsg.c
|
@ -391,13 +391,13 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
|
||||||
struct scsi_request *req = scsi_req(rq);
|
struct scsi_request *req = scsi_req(rq);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors);
|
dprintk("rq %p bio %p 0x%x\n", rq, bio, req->result);
|
||||||
/*
|
/*
|
||||||
* fill in all the output members
|
* fill in all the output members
|
||||||
*/
|
*/
|
||||||
hdr->device_status = rq->errors & 0xff;
|
hdr->device_status = req->result & 0xff;
|
||||||
hdr->transport_status = host_byte(rq->errors);
|
hdr->transport_status = host_byte(req->result);
|
||||||
hdr->driver_status = driver_byte(rq->errors);
|
hdr->driver_status = driver_byte(req->result);
|
||||||
hdr->info = 0;
|
hdr->info = 0;
|
||||||
if (hdr->device_status || hdr->transport_status || hdr->driver_status)
|
if (hdr->device_status || hdr->transport_status || hdr->driver_status)
|
||||||
hdr->info |= SG_INFO_CHECK;
|
hdr->info |= SG_INFO_CHECK;
|
||||||
|
@ -431,8 +431,8 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
|
||||||
* just a protocol response (i.e. non negative), that gets
|
* just a protocol response (i.e. non negative), that gets
|
||||||
* processed above.
|
* processed above.
|
||||||
*/
|
*/
|
||||||
if (!ret && rq->errors < 0)
|
if (!ret && req->result < 0)
|
||||||
ret = rq->errors;
|
ret = req->result;
|
||||||
|
|
||||||
blk_rq_unmap_user(bio);
|
blk_rq_unmap_user(bio);
|
||||||
scsi_req_free_cmd(req);
|
scsi_req_free_cmd(req);
|
||||||
|
|
|
@ -3761,16 +3761,14 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
||||||
static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
||||||
{
|
{
|
||||||
struct cfq_data *cfqd = cic_to_cfqd(cic);
|
struct cfq_data *cfqd = cic_to_cfqd(cic);
|
||||||
struct cfq_queue *cfqq;
|
struct cfq_queue *cfqq;
|
||||||
uint64_t serial_nr;
|
uint64_t serial_nr;
|
||||||
bool nonroot_cg;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
serial_nr = bio_blkcg(bio)->css.serial_nr;
|
serial_nr = bio_blkcg(bio)->css.serial_nr;
|
||||||
nonroot_cg = bio_blkcg(bio) != &blkcg_root;
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3778,7 +3776,7 @@ static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
||||||
* spuriously on a newly created cic but there's no harm.
|
* spuriously on a newly created cic but there's no harm.
|
||||||
*/
|
*/
|
||||||
if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
|
if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
|
||||||
return nonroot_cg;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Drop reference to queues. New queues will be assigned in new
|
* Drop reference to queues. New queues will be assigned in new
|
||||||
|
@ -3799,12 +3797,10 @@ static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
||||||
}
|
}
|
||||||
|
|
||||||
cic->blkcg_serial_nr = serial_nr;
|
cic->blkcg_serial_nr = serial_nr;
|
||||||
return nonroot_cg;
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
||||||
{
|
{
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
|
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
|
||||||
|
|
||||||
|
@ -4449,12 +4445,11 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
|
||||||
const int rw = rq_data_dir(rq);
|
const int rw = rq_data_dir(rq);
|
||||||
const bool is_sync = rq_is_sync(rq);
|
const bool is_sync = rq_is_sync(rq);
|
||||||
struct cfq_queue *cfqq;
|
struct cfq_queue *cfqq;
|
||||||
bool disable_wbt;
|
|
||||||
|
|
||||||
spin_lock_irq(q->queue_lock);
|
spin_lock_irq(q->queue_lock);
|
||||||
|
|
||||||
check_ioprio_changed(cic, bio);
|
check_ioprio_changed(cic, bio);
|
||||||
disable_wbt = check_blkcg_changed(cic, bio);
|
check_blkcg_changed(cic, bio);
|
||||||
new_queue:
|
new_queue:
|
||||||
cfqq = cic_to_cfqq(cic, is_sync);
|
cfqq = cic_to_cfqq(cic, is_sync);
|
||||||
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
|
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
|
||||||
|
@ -4491,9 +4486,6 @@ new_queue:
|
||||||
rq->elv.priv[1] = cfqq->cfqg;
|
rq->elv.priv[1] = cfqq->cfqg;
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
if (disable_wbt)
|
|
||||||
wbt_disable_default(q);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4706,6 +4698,7 @@ static void cfq_registered_queue(struct request_queue *q)
|
||||||
*/
|
*/
|
||||||
if (blk_queue_nonrot(q))
|
if (blk_queue_nonrot(q))
|
||||||
cfqd->cfq_slice_idle = 0;
|
cfqd->cfq_slice_idle = 0;
|
||||||
|
wbt_disable_default(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -685,7 +685,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||||
case BLKALIGNOFF:
|
case BLKALIGNOFF:
|
||||||
return compat_put_int(arg, bdev_alignment_offset(bdev));
|
return compat_put_int(arg, bdev_alignment_offset(bdev));
|
||||||
case BLKDISCARDZEROES:
|
case BLKDISCARDZEROES:
|
||||||
return compat_put_uint(arg, bdev_discard_zeroes_data(bdev));
|
return compat_put_uint(arg, 0);
|
||||||
case BLKFLSBUF:
|
case BLKFLSBUF:
|
||||||
case BLKROSET:
|
case BLKROSET:
|
||||||
case BLKDISCARD:
|
case BLKDISCARD:
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
|
|
||||||
#include "blk.h"
|
#include "blk.h"
|
||||||
#include "blk-mq-sched.h"
|
#include "blk-mq-sched.h"
|
||||||
|
#include "blk-wbt.h"
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(elv_list_lock);
|
static DEFINE_SPINLOCK(elv_list_lock);
|
||||||
static LIST_HEAD(elv_list);
|
static LIST_HEAD(elv_list);
|
||||||
|
@ -877,6 +878,8 @@ void elv_unregister_queue(struct request_queue *q)
|
||||||
kobject_uevent(&e->kobj, KOBJ_REMOVE);
|
kobject_uevent(&e->kobj, KOBJ_REMOVE);
|
||||||
kobject_del(&e->kobj);
|
kobject_del(&e->kobj);
|
||||||
e->registered = 0;
|
e->registered = 0;
|
||||||
|
/* Re-enable throttling in case elevator disabled it */
|
||||||
|
wbt_enable_default(q);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(elv_unregister_queue);
|
EXPORT_SYMBOL(elv_unregister_queue);
|
||||||
|
|
|
@ -1060,8 +1060,19 @@ static struct attribute *disk_attrs[] = {
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
|
||||||
|
{
|
||||||
|
struct device *dev = container_of(kobj, typeof(*dev), kobj);
|
||||||
|
struct gendisk *disk = dev_to_disk(dev);
|
||||||
|
|
||||||
|
if (a == &dev_attr_badblocks.attr && !disk->bb)
|
||||||
|
return 0;
|
||||||
|
return a->mode;
|
||||||
|
}
|
||||||
|
|
||||||
static struct attribute_group disk_attr_group = {
|
static struct attribute_group disk_attr_group = {
|
||||||
.attrs = disk_attrs,
|
.attrs = disk_attrs,
|
||||||
|
.is_visible = disk_visible,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct attribute_group *disk_attr_groups[] = {
|
static const struct attribute_group *disk_attr_groups[] = {
|
||||||
|
@ -1352,7 +1363,7 @@ struct kobject *get_disk(struct gendisk *disk)
|
||||||
owner = disk->fops->owner;
|
owner = disk->fops->owner;
|
||||||
if (owner && !try_module_get(owner))
|
if (owner && !try_module_get(owner))
|
||||||
return NULL;
|
return NULL;
|
||||||
kobj = kobject_get(&disk_to_dev(disk)->kobj);
|
kobj = kobject_get_unless_zero(&disk_to_dev(disk)->kobj);
|
||||||
if (kobj == NULL) {
|
if (kobj == NULL) {
|
||||||
module_put(owner);
|
module_put(owner);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -255,7 +255,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
|
||||||
truncate_inode_pages_range(mapping, start, end);
|
truncate_inode_pages_range(mapping, start, end);
|
||||||
|
|
||||||
return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
|
return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
|
||||||
false);
|
BLKDEV_ZERO_NOUNMAP);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int put_ushort(unsigned long arg, unsigned short val)
|
static int put_ushort(unsigned long arg, unsigned short val)
|
||||||
|
@ -547,7 +547,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
|
||||||
case BLKALIGNOFF:
|
case BLKALIGNOFF:
|
||||||
return put_int(arg, bdev_alignment_offset(bdev));
|
return put_int(arg, bdev_alignment_offset(bdev));
|
||||||
case BLKDISCARDZEROES:
|
case BLKDISCARDZEROES:
|
||||||
return put_uint(arg, bdev_discard_zeroes_data(bdev));
|
return put_uint(arg, 0);
|
||||||
case BLKSECTGET:
|
case BLKSECTGET:
|
||||||
max_sectors = min_t(unsigned int, USHRT_MAX,
|
max_sectors = min_t(unsigned int, USHRT_MAX,
|
||||||
queue_max_sectors(bdev_get_queue(bdev)));
|
queue_max_sectors(bdev_get_queue(bdev)));
|
||||||
|
|
|
@ -163,22 +163,12 @@ out:
|
||||||
|
|
||||||
int ioprio_best(unsigned short aprio, unsigned short bprio)
|
int ioprio_best(unsigned short aprio, unsigned short bprio)
|
||||||
{
|
{
|
||||||
unsigned short aclass;
|
|
||||||
unsigned short bclass;
|
|
||||||
|
|
||||||
if (!ioprio_valid(aprio))
|
if (!ioprio_valid(aprio))
|
||||||
aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
|
aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
|
||||||
if (!ioprio_valid(bprio))
|
if (!ioprio_valid(bprio))
|
||||||
bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
|
bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
|
||||||
|
|
||||||
aclass = IOPRIO_PRIO_CLASS(aprio);
|
return min(aprio, bprio);
|
||||||
bclass = IOPRIO_PRIO_CLASS(bprio);
|
|
||||||
if (aclass == bclass)
|
|
||||||
return min(aprio, bprio);
|
|
||||||
if (aclass > bclass)
|
|
||||||
return bprio;
|
|
||||||
else
|
|
||||||
return aprio;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
|
SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
|
||||||
|
|
719
block/kyber-iosched.c
Normal file
719
block/kyber-iosched.c
Normal file
|
@ -0,0 +1,719 @@
|
||||||
|
/*
|
||||||
|
* The Kyber I/O scheduler. Controls latency by throttling queue depths using
|
||||||
|
* scalable techniques.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2017 Facebook
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License v2 as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/blk-mq.h>
|
||||||
|
#include <linux/elevator.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/sbitmap.h>
|
||||||
|
|
||||||
|
#include "blk.h"
|
||||||
|
#include "blk-mq.h"
|
||||||
|
#include "blk-mq-sched.h"
|
||||||
|
#include "blk-mq-tag.h"
|
||||||
|
#include "blk-stat.h"
|
||||||
|
|
||||||
|
/* Scheduling domains. */
|
||||||
|
enum {
|
||||||
|
KYBER_READ,
|
||||||
|
KYBER_SYNC_WRITE,
|
||||||
|
KYBER_OTHER, /* Async writes, discard, etc. */
|
||||||
|
KYBER_NUM_DOMAINS,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
KYBER_MIN_DEPTH = 256,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to prevent starvation of synchronous requests by a flood of
|
||||||
|
* asynchronous requests, we reserve 25% of requests for synchronous
|
||||||
|
* operations.
|
||||||
|
*/
|
||||||
|
KYBER_ASYNC_PERCENT = 75,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initial device-wide depths for each scheduling domain.
|
||||||
|
*
|
||||||
|
* Even for fast devices with lots of tags like NVMe, you can saturate
|
||||||
|
* the device with only a fraction of the maximum possible queue depth.
|
||||||
|
* So, we cap these to a reasonable value.
|
||||||
|
*/
|
||||||
|
static const unsigned int kyber_depth[] = {
|
||||||
|
[KYBER_READ] = 256,
|
||||||
|
[KYBER_SYNC_WRITE] = 128,
|
||||||
|
[KYBER_OTHER] = 64,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Scheduling domain batch sizes. We favor reads.
|
||||||
|
*/
|
||||||
|
static const unsigned int kyber_batch_size[] = {
|
||||||
|
[KYBER_READ] = 16,
|
||||||
|
[KYBER_SYNC_WRITE] = 8,
|
||||||
|
[KYBER_OTHER] = 8,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct kyber_queue_data {
|
||||||
|
struct request_queue *q;
|
||||||
|
|
||||||
|
struct blk_stat_callback *cb;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The device is divided into multiple scheduling domains based on the
|
||||||
|
* request type. Each domain has a fixed number of in-flight requests of
|
||||||
|
* that type device-wide, limited by these tokens.
|
||||||
|
*/
|
||||||
|
struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Async request percentage, converted to per-word depth for
|
||||||
|
* sbitmap_get_shallow().
|
||||||
|
*/
|
||||||
|
unsigned int async_depth;
|
||||||
|
|
||||||
|
/* Target latencies in nanoseconds. */
|
||||||
|
u64 read_lat_nsec, write_lat_nsec;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct kyber_hctx_data {
|
||||||
|
spinlock_t lock;
|
||||||
|
struct list_head rqs[KYBER_NUM_DOMAINS];
|
||||||
|
unsigned int cur_domain;
|
||||||
|
unsigned int batching;
|
||||||
|
wait_queue_t domain_wait[KYBER_NUM_DOMAINS];
|
||||||
|
atomic_t wait_index[KYBER_NUM_DOMAINS];
|
||||||
|
};
|
||||||
|
|
||||||
|
static int rq_sched_domain(const struct request *rq)
|
||||||
|
{
|
||||||
|
unsigned int op = rq->cmd_flags;
|
||||||
|
|
||||||
|
if ((op & REQ_OP_MASK) == REQ_OP_READ)
|
||||||
|
return KYBER_READ;
|
||||||
|
else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
|
||||||
|
return KYBER_SYNC_WRITE;
|
||||||
|
else
|
||||||
|
return KYBER_OTHER;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NONE = 0,
|
||||||
|
GOOD = 1,
|
||||||
|
GREAT = 2,
|
||||||
|
BAD = -1,
|
||||||
|
AWFUL = -2,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define IS_GOOD(status) ((status) > 0)
|
||||||
|
#define IS_BAD(status) ((status) < 0)
|
||||||
|
|
||||||
|
static int kyber_lat_status(struct blk_stat_callback *cb,
|
||||||
|
unsigned int sched_domain, u64 target)
|
||||||
|
{
|
||||||
|
u64 latency;
|
||||||
|
|
||||||
|
if (!cb->stat[sched_domain].nr_samples)
|
||||||
|
return NONE;
|
||||||
|
|
||||||
|
latency = cb->stat[sched_domain].mean;
|
||||||
|
if (latency >= 2 * target)
|
||||||
|
return AWFUL;
|
||||||
|
else if (latency > target)
|
||||||
|
return BAD;
|
||||||
|
else if (latency <= target / 2)
|
||||||
|
return GREAT;
|
||||||
|
else /* (latency <= target) */
|
||||||
|
return GOOD;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adjust the read or synchronous write depth given the status of reads and
|
||||||
|
* writes. The goal is that the latencies of the two domains are fair (i.e., if
|
||||||
|
* one is good, then the other is good).
|
||||||
|
*/
|
||||||
|
static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd,
|
||||||
|
unsigned int sched_domain, int this_status,
|
||||||
|
int other_status)
|
||||||
|
{
|
||||||
|
unsigned int orig_depth, depth;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this domain had no samples, or reads and writes are both good or
|
||||||
|
* both bad, don't adjust the depth.
|
||||||
|
*/
|
||||||
|
if (this_status == NONE ||
|
||||||
|
(IS_GOOD(this_status) && IS_GOOD(other_status)) ||
|
||||||
|
(IS_BAD(this_status) && IS_BAD(other_status)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth;
|
||||||
|
|
||||||
|
if (other_status == NONE) {
|
||||||
|
depth++;
|
||||||
|
} else {
|
||||||
|
switch (this_status) {
|
||||||
|
case GOOD:
|
||||||
|
if (other_status == AWFUL)
|
||||||
|
depth -= max(depth / 4, 1U);
|
||||||
|
else
|
||||||
|
depth -= max(depth / 8, 1U);
|
||||||
|
break;
|
||||||
|
case GREAT:
|
||||||
|
if (other_status == AWFUL)
|
||||||
|
depth /= 2;
|
||||||
|
else
|
||||||
|
depth -= max(depth / 4, 1U);
|
||||||
|
break;
|
||||||
|
case BAD:
|
||||||
|
depth++;
|
||||||
|
break;
|
||||||
|
case AWFUL:
|
||||||
|
if (other_status == GREAT)
|
||||||
|
depth += 2;
|
||||||
|
else
|
||||||
|
depth++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
depth = clamp(depth, 1U, kyber_depth[sched_domain]);
|
||||||
|
if (depth != orig_depth)
|
||||||
|
sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adjust the depth of other requests given the status of reads and synchronous
|
||||||
|
* writes. As long as either domain is doing fine, we don't throttle, but if
|
||||||
|
* both domains are doing badly, we throttle heavily.
|
||||||
|
*/
|
||||||
|
static void kyber_adjust_other_depth(struct kyber_queue_data *kqd,
|
||||||
|
int read_status, int write_status,
|
||||||
|
bool have_samples)
|
||||||
|
{
|
||||||
|
unsigned int orig_depth, depth;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth;
|
||||||
|
|
||||||
|
if (read_status == NONE && write_status == NONE) {
|
||||||
|
depth += 2;
|
||||||
|
} else if (have_samples) {
|
||||||
|
if (read_status == NONE)
|
||||||
|
status = write_status;
|
||||||
|
else if (write_status == NONE)
|
||||||
|
status = read_status;
|
||||||
|
else
|
||||||
|
status = max(read_status, write_status);
|
||||||
|
switch (status) {
|
||||||
|
case GREAT:
|
||||||
|
depth += 2;
|
||||||
|
break;
|
||||||
|
case GOOD:
|
||||||
|
depth++;
|
||||||
|
break;
|
||||||
|
case BAD:
|
||||||
|
depth -= max(depth / 4, 1U);
|
||||||
|
break;
|
||||||
|
case AWFUL:
|
||||||
|
depth /= 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]);
|
||||||
|
if (depth != orig_depth)
|
||||||
|
sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Apply heuristics for limiting queue depths based on gathered latency
|
||||||
|
* statistics.
|
||||||
|
*/
|
||||||
|
static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
|
||||||
|
{
|
||||||
|
struct kyber_queue_data *kqd = cb->data;
|
||||||
|
int read_status, write_status;
|
||||||
|
|
||||||
|
read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
|
||||||
|
write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
|
||||||
|
|
||||||
|
kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status);
|
||||||
|
kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status);
|
||||||
|
kyber_adjust_other_depth(kqd, read_status, write_status,
|
||||||
|
cb->stat[KYBER_OTHER].nr_samples != 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Continue monitoring latencies if we aren't hitting the targets or
|
||||||
|
* we're still throttling other requests.
|
||||||
|
*/
|
||||||
|
if (!blk_stat_is_active(kqd->cb) &&
|
||||||
|
((IS_BAD(read_status) || IS_BAD(write_status) ||
|
||||||
|
kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER])))
|
||||||
|
blk_stat_activate_msecs(kqd->cb, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* All of the hardware queues have the same depth, so we can just grab
|
||||||
|
* the shift of the first one.
|
||||||
|
*/
|
||||||
|
return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct kyber_queue_data *kqd;
|
||||||
|
unsigned int max_tokens;
|
||||||
|
unsigned int shift;
|
||||||
|
int ret = -ENOMEM;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
|
||||||
|
if (!kqd)
|
||||||
|
goto err;
|
||||||
|
kqd->q = q;
|
||||||
|
|
||||||
|
kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
|
||||||
|
KYBER_NUM_DOMAINS, kqd);
|
||||||
|
if (!kqd->cb)
|
||||||
|
goto err_kqd;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The maximum number of tokens for any scheduling domain is at least
|
||||||
|
* the queue depth of a single hardware queue. If the hardware doesn't
|
||||||
|
* have many tags, still provide a reasonable number.
|
||||||
|
*/
|
||||||
|
max_tokens = max_t(unsigned int, q->tag_set->queue_depth,
|
||||||
|
KYBER_MIN_DEPTH);
|
||||||
|
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||||
|
WARN_ON(!kyber_depth[i]);
|
||||||
|
WARN_ON(!kyber_batch_size[i]);
|
||||||
|
ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
|
||||||
|
max_tokens, -1, false, GFP_KERNEL,
|
||||||
|
q->node);
|
||||||
|
if (ret) {
|
||||||
|
while (--i >= 0)
|
||||||
|
sbitmap_queue_free(&kqd->domain_tokens[i]);
|
||||||
|
goto err_cb;
|
||||||
|
}
|
||||||
|
sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
shift = kyber_sched_tags_shift(kqd);
|
||||||
|
kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
|
||||||
|
|
||||||
|
kqd->read_lat_nsec = 2000000ULL;
|
||||||
|
kqd->write_lat_nsec = 10000000ULL;
|
||||||
|
|
||||||
|
return kqd;
|
||||||
|
|
||||||
|
err_cb:
|
||||||
|
blk_stat_free_callback(kqd->cb);
|
||||||
|
err_kqd:
|
||||||
|
kfree(kqd);
|
||||||
|
err:
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||||
|
{
|
||||||
|
struct kyber_queue_data *kqd;
|
||||||
|
struct elevator_queue *eq;
|
||||||
|
|
||||||
|
eq = elevator_alloc(q, e);
|
||||||
|
if (!eq)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
kqd = kyber_queue_data_alloc(q);
|
||||||
|
if (IS_ERR(kqd)) {
|
||||||
|
kobject_put(&eq->kobj);
|
||||||
|
return PTR_ERR(kqd);
|
||||||
|
}
|
||||||
|
|
||||||
|
eq->elevator_data = kqd;
|
||||||
|
q->elevator = eq;
|
||||||
|
|
||||||
|
blk_stat_add_callback(q, kqd->cb);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kyber_exit_sched(struct elevator_queue *e)
|
||||||
|
{
|
||||||
|
struct kyber_queue_data *kqd = e->elevator_data;
|
||||||
|
struct request_queue *q = kqd->q;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
blk_stat_remove_callback(q, kqd->cb);
|
||||||
|
|
||||||
|
for (i = 0; i < KYBER_NUM_DOMAINS; i++)
|
||||||
|
sbitmap_queue_free(&kqd->domain_tokens[i]);
|
||||||
|
blk_stat_free_callback(kqd->cb);
|
||||||
|
kfree(kqd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||||
|
{
|
||||||
|
struct kyber_hctx_data *khd;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
|
||||||
|
if (!khd)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
spin_lock_init(&khd->lock);
|
||||||
|
|
||||||
|
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||||
|
INIT_LIST_HEAD(&khd->rqs[i]);
|
||||||
|
INIT_LIST_HEAD(&khd->domain_wait[i].task_list);
|
||||||
|
atomic_set(&khd->wait_index[i], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
khd->cur_domain = 0;
|
||||||
|
khd->batching = 0;
|
||||||
|
|
||||||
|
hctx->sched_data = khd;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||||
|
{
|
||||||
|
kfree(hctx->sched_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int rq_get_domain_token(struct request *rq)
|
||||||
|
{
|
||||||
|
return (long)rq->elv.priv[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rq_set_domain_token(struct request *rq, int token)
|
||||||
|
{
|
||||||
|
rq->elv.priv[0] = (void *)(long)token;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rq_clear_domain_token(struct kyber_queue_data *kqd,
|
||||||
|
struct request *rq)
|
||||||
|
{
|
||||||
|
unsigned int sched_domain;
|
||||||
|
int nr;
|
||||||
|
|
||||||
|
nr = rq_get_domain_token(rq);
|
||||||
|
if (nr != -1) {
|
||||||
|
sched_domain = rq_sched_domain(rq);
|
||||||
|
sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
|
||||||
|
rq->mq_ctx->cpu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct request *kyber_get_request(struct request_queue *q,
|
||||||
|
unsigned int op,
|
||||||
|
struct blk_mq_alloc_data *data)
|
||||||
|
{
|
||||||
|
struct kyber_queue_data *kqd = q->elevator->elevator_data;
|
||||||
|
struct request *rq;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We use the scheduler tags as per-hardware queue queueing tokens.
|
||||||
|
* Async requests can be limited at this stage.
|
||||||
|
*/
|
||||||
|
if (!op_is_sync(op))
|
||||||
|
data->shallow_depth = kqd->async_depth;
|
||||||
|
|
||||||
|
rq = __blk_mq_alloc_request(data, op);
|
||||||
|
if (rq)
|
||||||
|
rq_set_domain_token(rq, -1);
|
||||||
|
return rq;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kyber_put_request(struct request *rq)
|
||||||
|
{
|
||||||
|
struct request_queue *q = rq->q;
|
||||||
|
struct kyber_queue_data *kqd = q->elevator->elevator_data;
|
||||||
|
|
||||||
|
rq_clear_domain_token(kqd, rq);
|
||||||
|
blk_mq_finish_request(rq);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kyber_completed_request(struct request *rq)
|
||||||
|
{
|
||||||
|
struct request_queue *q = rq->q;
|
||||||
|
struct kyber_queue_data *kqd = q->elevator->elevator_data;
|
||||||
|
unsigned int sched_domain;
|
||||||
|
u64 now, latency, target;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if this request met our latency goal. If not, quickly gather
|
||||||
|
* some statistics and start throttling.
|
||||||
|
*/
|
||||||
|
sched_domain = rq_sched_domain(rq);
|
||||||
|
switch (sched_domain) {
|
||||||
|
case KYBER_READ:
|
||||||
|
target = kqd->read_lat_nsec;
|
||||||
|
break;
|
||||||
|
case KYBER_SYNC_WRITE:
|
||||||
|
target = kqd->write_lat_nsec;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we are already monitoring latencies, don't check again. */
|
||||||
|
if (blk_stat_is_active(kqd->cb))
|
||||||
|
return;
|
||||||
|
|
||||||
|
now = __blk_stat_time(ktime_to_ns(ktime_get()));
|
||||||
|
if (now < blk_stat_time(&rq->issue_stat))
|
||||||
|
return;
|
||||||
|
|
||||||
|
latency = now - blk_stat_time(&rq->issue_stat);
|
||||||
|
|
||||||
|
if (latency > target)
|
||||||
|
blk_stat_activate_msecs(kqd->cb, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
|
||||||
|
struct blk_mq_hw_ctx *hctx)
|
||||||
|
{
|
||||||
|
LIST_HEAD(rq_list);
|
||||||
|
struct request *rq, *next;
|
||||||
|
|
||||||
|
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||||
|
list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
|
||||||
|
unsigned int sched_domain;
|
||||||
|
|
||||||
|
sched_domain = rq_sched_domain(rq);
|
||||||
|
list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags,
|
||||||
|
void *key)
|
||||||
|
{
|
||||||
|
struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
|
||||||
|
|
||||||
|
list_del_init(&wait->task_list);
|
||||||
|
blk_mq_run_hw_queue(hctx, true);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kyber_get_domain_token(struct kyber_queue_data *kqd,
|
||||||
|
struct kyber_hctx_data *khd,
|
||||||
|
struct blk_mq_hw_ctx *hctx)
|
||||||
|
{
|
||||||
|
unsigned int sched_domain = khd->cur_domain;
|
||||||
|
struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
|
||||||
|
wait_queue_t *wait = &khd->domain_wait[sched_domain];
|
||||||
|
struct sbq_wait_state *ws;
|
||||||
|
int nr;
|
||||||
|
|
||||||
|
nr = __sbitmap_queue_get(domain_tokens);
|
||||||
|
if (nr >= 0)
|
||||||
|
return nr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we failed to get a domain token, make sure the hardware queue is
|
||||||
|
* run when one becomes available. Note that this is serialized on
|
||||||
|
* khd->lock, but we still need to be careful about the waker.
|
||||||
|
*/
|
||||||
|
if (list_empty_careful(&wait->task_list)) {
|
||||||
|
init_waitqueue_func_entry(wait, kyber_domain_wake);
|
||||||
|
wait->private = hctx;
|
||||||
|
ws = sbq_wait_ptr(domain_tokens,
|
||||||
|
&khd->wait_index[sched_domain]);
|
||||||
|
add_wait_queue(&ws->wait, wait);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try again in case a token was freed before we got on the wait
|
||||||
|
* queue.
|
||||||
|
*/
|
||||||
|
nr = __sbitmap_queue_get(domain_tokens);
|
||||||
|
}
|
||||||
|
return nr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct request *
|
||||||
|
kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
|
||||||
|
struct kyber_hctx_data *khd,
|
||||||
|
struct blk_mq_hw_ctx *hctx,
|
||||||
|
bool *flushed)
|
||||||
|
{
|
||||||
|
struct list_head *rqs;
|
||||||
|
struct request *rq;
|
||||||
|
int nr;
|
||||||
|
|
||||||
|
rqs = &khd->rqs[khd->cur_domain];
|
||||||
|
rq = list_first_entry_or_null(rqs, struct request, queuelist);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there wasn't already a pending request and we haven't flushed the
|
||||||
|
* software queues yet, flush the software queues and check again.
|
||||||
|
*/
|
||||||
|
if (!rq && !*flushed) {
|
||||||
|
kyber_flush_busy_ctxs(khd, hctx);
|
||||||
|
*flushed = true;
|
||||||
|
rq = list_first_entry_or_null(rqs, struct request, queuelist);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rq) {
|
||||||
|
nr = kyber_get_domain_token(kqd, khd, hctx);
|
||||||
|
if (nr >= 0) {
|
||||||
|
khd->batching++;
|
||||||
|
rq_set_domain_token(rq, nr);
|
||||||
|
list_del_init(&rq->queuelist);
|
||||||
|
return rq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* There were either no pending requests or no tokens. */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||||
|
{
|
||||||
|
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
|
||||||
|
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||||
|
bool flushed = false;
|
||||||
|
struct request *rq;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
spin_lock(&khd->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First, if we are still entitled to batch, try to dispatch a request
|
||||||
|
* from the batch.
|
||||||
|
*/
|
||||||
|
if (khd->batching < kyber_batch_size[khd->cur_domain]) {
|
||||||
|
rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
|
||||||
|
if (rq)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Either,
|
||||||
|
* 1. We were no longer entitled to a batch.
|
||||||
|
* 2. The domain we were batching didn't have any requests.
|
||||||
|
* 3. The domain we were batching was out of tokens.
|
||||||
|
*
|
||||||
|
* Start another batch. Note that this wraps back around to the original
|
||||||
|
* domain if no other domains have requests or tokens.
|
||||||
|
*/
|
||||||
|
khd->batching = 0;
|
||||||
|
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||||
|
if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
|
||||||
|
khd->cur_domain = 0;
|
||||||
|
else
|
||||||
|
khd->cur_domain++;
|
||||||
|
|
||||||
|
rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
|
||||||
|
if (rq)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
rq = NULL;
|
||||||
|
out:
|
||||||
|
spin_unlock(&khd->lock);
|
||||||
|
return rq;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
|
||||||
|
{
|
||||||
|
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||||
|
if (!list_empty_careful(&khd->rqs[i]))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KYBER_LAT_SHOW_STORE(op) \
|
||||||
|
static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \
|
||||||
|
char *page) \
|
||||||
|
{ \
|
||||||
|
struct kyber_queue_data *kqd = e->elevator_data; \
|
||||||
|
\
|
||||||
|
return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \
|
||||||
|
const char *page, size_t count) \
|
||||||
|
{ \
|
||||||
|
struct kyber_queue_data *kqd = e->elevator_data; \
|
||||||
|
unsigned long long nsec; \
|
||||||
|
int ret; \
|
||||||
|
\
|
||||||
|
ret = kstrtoull(page, 10, &nsec); \
|
||||||
|
if (ret) \
|
||||||
|
return ret; \
|
||||||
|
\
|
||||||
|
kqd->op##_lat_nsec = nsec; \
|
||||||
|
\
|
||||||
|
return count; \
|
||||||
|
}
|
||||||
|
KYBER_LAT_SHOW_STORE(read);
|
||||||
|
KYBER_LAT_SHOW_STORE(write);
|
||||||
|
#undef KYBER_LAT_SHOW_STORE
|
||||||
|
|
||||||
|
#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
|
||||||
|
static struct elv_fs_entry kyber_sched_attrs[] = {
|
||||||
|
KYBER_LAT_ATTR(read),
|
||||||
|
KYBER_LAT_ATTR(write),
|
||||||
|
__ATTR_NULL
|
||||||
|
};
|
||||||
|
#undef KYBER_LAT_ATTR
|
||||||
|
|
||||||
|
static struct elevator_type kyber_sched = {
|
||||||
|
.ops.mq = {
|
||||||
|
.init_sched = kyber_init_sched,
|
||||||
|
.exit_sched = kyber_exit_sched,
|
||||||
|
.init_hctx = kyber_init_hctx,
|
||||||
|
.exit_hctx = kyber_exit_hctx,
|
||||||
|
.get_request = kyber_get_request,
|
||||||
|
.put_request = kyber_put_request,
|
||||||
|
.completed_request = kyber_completed_request,
|
||||||
|
.dispatch_request = kyber_dispatch_request,
|
||||||
|
.has_work = kyber_has_work,
|
||||||
|
},
|
||||||
|
.uses_mq = true,
|
||||||
|
.elevator_attrs = kyber_sched_attrs,
|
||||||
|
.elevator_name = "kyber",
|
||||||
|
.elevator_owner = THIS_MODULE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init kyber_init(void)
|
||||||
|
{
|
||||||
|
return elv_register(&kyber_sched);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit kyber_exit(void)
|
||||||
|
{
|
||||||
|
elv_unregister(&kyber_sched);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(kyber_init);
|
||||||
|
module_exit(kyber_exit);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Omar Sandoval");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("Kyber I/O scheduler");
|
|
@ -497,7 +497,6 @@ rescan:
|
||||||
|
|
||||||
if (disk->fops->revalidate_disk)
|
if (disk->fops->revalidate_disk)
|
||||||
disk->fops->revalidate_disk(disk);
|
disk->fops->revalidate_disk(disk);
|
||||||
blk_integrity_revalidate(disk);
|
|
||||||
check_disk_size_change(disk, bdev);
|
check_disk_size_change(disk, bdev);
|
||||||
bdev->bd_invalidated = 0;
|
bdev->bd_invalidated = 0;
|
||||||
if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
|
if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
|
||||||
|
|
|
@ -262,11 +262,11 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
|
||||||
/*
|
/*
|
||||||
* fill in all the output members
|
* fill in all the output members
|
||||||
*/
|
*/
|
||||||
hdr->status = rq->errors & 0xff;
|
hdr->status = req->result & 0xff;
|
||||||
hdr->masked_status = status_byte(rq->errors);
|
hdr->masked_status = status_byte(req->result);
|
||||||
hdr->msg_status = msg_byte(rq->errors);
|
hdr->msg_status = msg_byte(req->result);
|
||||||
hdr->host_status = host_byte(rq->errors);
|
hdr->host_status = host_byte(req->result);
|
||||||
hdr->driver_status = driver_byte(rq->errors);
|
hdr->driver_status = driver_byte(req->result);
|
||||||
hdr->info = 0;
|
hdr->info = 0;
|
||||||
if (hdr->masked_status || hdr->host_status || hdr->driver_status)
|
if (hdr->masked_status || hdr->host_status || hdr->driver_status)
|
||||||
hdr->info |= SG_INFO_CHECK;
|
hdr->info |= SG_INFO_CHECK;
|
||||||
|
@ -362,7 +362,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
|
||||||
goto out_free_cdb;
|
goto out_free_cdb;
|
||||||
|
|
||||||
bio = rq->bio;
|
bio = rq->bio;
|
||||||
rq->retries = 0;
|
req->retries = 0;
|
||||||
|
|
||||||
start_time = jiffies;
|
start_time = jiffies;
|
||||||
|
|
||||||
|
@ -476,13 +476,13 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
/* default. possible overriden later */
|
/* default. possible overriden later */
|
||||||
rq->retries = 5;
|
req->retries = 5;
|
||||||
|
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case SEND_DIAGNOSTIC:
|
case SEND_DIAGNOSTIC:
|
||||||
case FORMAT_UNIT:
|
case FORMAT_UNIT:
|
||||||
rq->timeout = FORMAT_UNIT_TIMEOUT;
|
rq->timeout = FORMAT_UNIT_TIMEOUT;
|
||||||
rq->retries = 1;
|
req->retries = 1;
|
||||||
break;
|
break;
|
||||||
case START_STOP:
|
case START_STOP:
|
||||||
rq->timeout = START_STOP_TIMEOUT;
|
rq->timeout = START_STOP_TIMEOUT;
|
||||||
|
@ -495,7 +495,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
|
||||||
break;
|
break;
|
||||||
case READ_DEFECT_DATA:
|
case READ_DEFECT_DATA:
|
||||||
rq->timeout = READ_DEFECT_DATA_TIMEOUT;
|
rq->timeout = READ_DEFECT_DATA_TIMEOUT;
|
||||||
rq->retries = 1;
|
req->retries = 1;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
|
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
|
||||||
|
@ -509,7 +509,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
|
||||||
|
|
||||||
blk_execute_rq(q, disk, rq, 0);
|
blk_execute_rq(q, disk, rq, 0);
|
||||||
|
|
||||||
err = rq->errors & 0xff; /* only 8 bit SCSI status */
|
err = req->result & 0xff; /* only 8 bit SCSI status */
|
||||||
if (err) {
|
if (err) {
|
||||||
if (req->sense_len && req->sense) {
|
if (req->sense_len && req->sense) {
|
||||||
bytes = (OMAX_SB_LEN > req->sense_len) ?
|
bytes = (OMAX_SB_LEN > req->sense_len) ?
|
||||||
|
@ -547,7 +547,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
|
||||||
scsi_req(rq)->cmd[0] = cmd;
|
scsi_req(rq)->cmd[0] = cmd;
|
||||||
scsi_req(rq)->cmd[4] = data;
|
scsi_req(rq)->cmd[4] = data;
|
||||||
scsi_req(rq)->cmd_len = 6;
|
scsi_req(rq)->cmd_len = 6;
|
||||||
err = blk_execute_rq(q, bd_disk, rq, 0);
|
blk_execute_rq(q, bd_disk, rq, 0);
|
||||||
|
err = scsi_req(rq)->result ? -EIO : 0;
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
|
|
153
block/sed-opal.c
153
block/sed-opal.c
|
@ -275,8 +275,8 @@ static bool check_tper(const void *data)
|
||||||
u8 flags = tper->supported_features;
|
u8 flags = tper->supported_features;
|
||||||
|
|
||||||
if (!(flags & TPER_SYNC_SUPPORTED)) {
|
if (!(flags & TPER_SYNC_SUPPORTED)) {
|
||||||
pr_err("TPer sync not supported. flags = %d\n",
|
pr_debug("TPer sync not supported. flags = %d\n",
|
||||||
tper->supported_features);
|
tper->supported_features);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -289,7 +289,7 @@ static bool check_sum(const void *data)
|
||||||
u32 nlo = be32_to_cpu(sum->num_locking_objects);
|
u32 nlo = be32_to_cpu(sum->num_locking_objects);
|
||||||
|
|
||||||
if (nlo == 0) {
|
if (nlo == 0) {
|
||||||
pr_err("Need at least one locking object.\n");
|
pr_debug("Need at least one locking object.\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -385,9 +385,9 @@ static int next(struct opal_dev *dev)
|
||||||
|
|
||||||
error = step->fn(dev, step->data);
|
error = step->fn(dev, step->data);
|
||||||
if (error) {
|
if (error) {
|
||||||
pr_err("Error on step function: %d with error %d: %s\n",
|
pr_debug("Error on step function: %d with error %d: %s\n",
|
||||||
state, error,
|
state, error,
|
||||||
opal_error_to_human(error));
|
opal_error_to_human(error));
|
||||||
|
|
||||||
/* For each OPAL command we do a discovery0 then we
|
/* For each OPAL command we do a discovery0 then we
|
||||||
* start some sort of session.
|
* start some sort of session.
|
||||||
|
@ -419,8 +419,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
|
||||||
print_buffer(dev->resp, hlen);
|
print_buffer(dev->resp, hlen);
|
||||||
|
|
||||||
if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
|
if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
|
||||||
pr_warn("Discovery length overflows buffer (%zu+%u)/%u\n",
|
pr_debug("Discovery length overflows buffer (%zu+%u)/%u\n",
|
||||||
sizeof(*hdr), hlen, IO_BUFFER_LENGTH);
|
sizeof(*hdr), hlen, IO_BUFFER_LENGTH);
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -503,7 +503,7 @@ static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok)
|
||||||
if (*err)
|
if (*err)
|
||||||
return;
|
return;
|
||||||
if (cmd->pos >= IO_BUFFER_LENGTH - 1) {
|
if (cmd->pos >= IO_BUFFER_LENGTH - 1) {
|
||||||
pr_err("Error adding u8: end of buffer.\n");
|
pr_debug("Error adding u8: end of buffer.\n");
|
||||||
*err = -ERANGE;
|
*err = -ERANGE;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -553,7 +553,7 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
|
||||||
len = DIV_ROUND_UP(msb, 4);
|
len = DIV_ROUND_UP(msb, 4);
|
||||||
|
|
||||||
if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
|
if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
|
||||||
pr_err("Error adding u64: end of buffer.\n");
|
pr_debug("Error adding u64: end of buffer.\n");
|
||||||
*err = -ERANGE;
|
*err = -ERANGE;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -579,7 +579,7 @@ static void add_token_bytestring(int *err, struct opal_dev *cmd,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len >= IO_BUFFER_LENGTH - cmd->pos - header_len) {
|
if (len >= IO_BUFFER_LENGTH - cmd->pos - header_len) {
|
||||||
pr_err("Error adding bytestring: end of buffer.\n");
|
pr_debug("Error adding bytestring: end of buffer.\n");
|
||||||
*err = -ERANGE;
|
*err = -ERANGE;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -597,7 +597,7 @@ static void add_token_bytestring(int *err, struct opal_dev *cmd,
|
||||||
static int build_locking_range(u8 *buffer, size_t length, u8 lr)
|
static int build_locking_range(u8 *buffer, size_t length, u8 lr)
|
||||||
{
|
{
|
||||||
if (length > OPAL_UID_LENGTH) {
|
if (length > OPAL_UID_LENGTH) {
|
||||||
pr_err("Can't build locking range. Length OOB\n");
|
pr_debug("Can't build locking range. Length OOB\n");
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -614,7 +614,7 @@ static int build_locking_range(u8 *buffer, size_t length, u8 lr)
|
||||||
static int build_locking_user(u8 *buffer, size_t length, u8 lr)
|
static int build_locking_user(u8 *buffer, size_t length, u8 lr)
|
||||||
{
|
{
|
||||||
if (length > OPAL_UID_LENGTH) {
|
if (length > OPAL_UID_LENGTH) {
|
||||||
pr_err("Can't build locking range user, Length OOB\n");
|
pr_debug("Can't build locking range user, Length OOB\n");
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -648,7 +648,7 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn)
|
||||||
add_token_u8(&err, cmd, OPAL_ENDLIST);
|
add_token_u8(&err, cmd, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error finalizing command.\n");
|
pr_debug("Error finalizing command.\n");
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -660,7 +660,7 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn)
|
||||||
hdr->subpkt.length = cpu_to_be32(cmd->pos - sizeof(*hdr));
|
hdr->subpkt.length = cpu_to_be32(cmd->pos - sizeof(*hdr));
|
||||||
while (cmd->pos % 4) {
|
while (cmd->pos % 4) {
|
||||||
if (cmd->pos >= IO_BUFFER_LENGTH) {
|
if (cmd->pos >= IO_BUFFER_LENGTH) {
|
||||||
pr_err("Error: Buffer overrun\n");
|
pr_debug("Error: Buffer overrun\n");
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
}
|
}
|
||||||
cmd->cmd[cmd->pos++] = 0;
|
cmd->cmd[cmd->pos++] = 0;
|
||||||
|
@ -679,14 +679,14 @@ static const struct opal_resp_tok *response_get_token(
|
||||||
const struct opal_resp_tok *tok;
|
const struct opal_resp_tok *tok;
|
||||||
|
|
||||||
if (n >= resp->num) {
|
if (n >= resp->num) {
|
||||||
pr_err("Token number doesn't exist: %d, resp: %d\n",
|
pr_debug("Token number doesn't exist: %d, resp: %d\n",
|
||||||
n, resp->num);
|
n, resp->num);
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
tok = &resp->toks[n];
|
tok = &resp->toks[n];
|
||||||
if (tok->len == 0) {
|
if (tok->len == 0) {
|
||||||
pr_err("Token length must be non-zero\n");
|
pr_debug("Token length must be non-zero\n");
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -727,7 +727,7 @@ static ssize_t response_parse_short(struct opal_resp_tok *tok,
|
||||||
|
|
||||||
tok->type = OPAL_DTA_TOKENID_UINT;
|
tok->type = OPAL_DTA_TOKENID_UINT;
|
||||||
if (tok->len > 9) {
|
if (tok->len > 9) {
|
||||||
pr_warn("uint64 with more than 8 bytes\n");
|
pr_debug("uint64 with more than 8 bytes\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
for (i = tok->len - 1; i > 0; i--) {
|
for (i = tok->len - 1; i > 0; i--) {
|
||||||
|
@ -814,8 +814,8 @@ static int response_parse(const u8 *buf, size_t length,
|
||||||
|
|
||||||
if (clen == 0 || plen == 0 || slen == 0 ||
|
if (clen == 0 || plen == 0 || slen == 0 ||
|
||||||
slen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
|
slen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
|
||||||
pr_err("Bad header length. cp: %u, pkt: %u, subpkt: %u\n",
|
pr_debug("Bad header length. cp: %u, pkt: %u, subpkt: %u\n",
|
||||||
clen, plen, slen);
|
clen, plen, slen);
|
||||||
print_buffer(pos, sizeof(*hdr));
|
print_buffer(pos, sizeof(*hdr));
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -848,7 +848,7 @@ static int response_parse(const u8 *buf, size_t length,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_entries == 0) {
|
if (num_entries == 0) {
|
||||||
pr_err("Couldn't parse response.\n");
|
pr_debug("Couldn't parse response.\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
resp->num = num_entries;
|
resp->num = num_entries;
|
||||||
|
@ -861,18 +861,18 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
|
||||||
{
|
{
|
||||||
*store = NULL;
|
*store = NULL;
|
||||||
if (!resp) {
|
if (!resp) {
|
||||||
pr_err("Response is NULL\n");
|
pr_debug("Response is NULL\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n > resp->num) {
|
if (n > resp->num) {
|
||||||
pr_err("Response has %d tokens. Can't access %d\n",
|
pr_debug("Response has %d tokens. Can't access %d\n",
|
||||||
resp->num, n);
|
resp->num, n);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) {
|
if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) {
|
||||||
pr_err("Token is not a byte string!\n");
|
pr_debug("Token is not a byte string!\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -883,26 +883,26 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
|
||||||
static u64 response_get_u64(const struct parsed_resp *resp, int n)
|
static u64 response_get_u64(const struct parsed_resp *resp, int n)
|
||||||
{
|
{
|
||||||
if (!resp) {
|
if (!resp) {
|
||||||
pr_err("Response is NULL\n");
|
pr_debug("Response is NULL\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n > resp->num) {
|
if (n > resp->num) {
|
||||||
pr_err("Response has %d tokens. Can't access %d\n",
|
pr_debug("Response has %d tokens. Can't access %d\n",
|
||||||
resp->num, n);
|
resp->num, n);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (resp->toks[n].type != OPAL_DTA_TOKENID_UINT) {
|
if (resp->toks[n].type != OPAL_DTA_TOKENID_UINT) {
|
||||||
pr_err("Token is not unsigned it: %d\n",
|
pr_debug("Token is not unsigned it: %d\n",
|
||||||
resp->toks[n].type);
|
resp->toks[n].type);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(resp->toks[n].width == OPAL_WIDTH_TINY ||
|
if (!(resp->toks[n].width == OPAL_WIDTH_TINY ||
|
||||||
resp->toks[n].width == OPAL_WIDTH_SHORT)) {
|
resp->toks[n].width == OPAL_WIDTH_SHORT)) {
|
||||||
pr_err("Atom is not short or tiny: %d\n",
|
pr_debug("Atom is not short or tiny: %d\n",
|
||||||
resp->toks[n].width);
|
resp->toks[n].width);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -949,7 +949,7 @@ static int parse_and_check_status(struct opal_dev *dev)
|
||||||
|
|
||||||
error = response_parse(dev->resp, IO_BUFFER_LENGTH, &dev->parsed);
|
error = response_parse(dev->resp, IO_BUFFER_LENGTH, &dev->parsed);
|
||||||
if (error) {
|
if (error) {
|
||||||
pr_err("Couldn't parse response.\n");
|
pr_debug("Couldn't parse response.\n");
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -975,7 +975,7 @@ static int start_opal_session_cont(struct opal_dev *dev)
|
||||||
tsn = response_get_u64(&dev->parsed, 5);
|
tsn = response_get_u64(&dev->parsed, 5);
|
||||||
|
|
||||||
if (hsn == 0 && tsn == 0) {
|
if (hsn == 0 && tsn == 0) {
|
||||||
pr_err("Couldn't authenticate session\n");
|
pr_debug("Couldn't authenticate session\n");
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1012,7 +1012,7 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
|
||||||
|
|
||||||
ret = cmd_finalize(dev, dev->hsn, dev->tsn);
|
ret = cmd_finalize(dev, dev->hsn, dev->tsn);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
pr_err("Error finalizing command buffer: %d\n", ret);
|
pr_debug("Error finalizing command buffer: %d\n", ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1041,7 +1041,7 @@ static int gen_key(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building gen key command\n");
|
pr_debug("Error building gen key command\n");
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1059,8 +1059,8 @@ static int get_active_key_cont(struct opal_dev *dev)
|
||||||
return error;
|
return error;
|
||||||
keylen = response_get_string(&dev->parsed, 4, &activekey);
|
keylen = response_get_string(&dev->parsed, 4, &activekey);
|
||||||
if (!activekey) {
|
if (!activekey) {
|
||||||
pr_err("%s: Couldn't extract the Activekey from the response\n",
|
pr_debug("%s: Couldn't extract the Activekey from the response\n",
|
||||||
__func__);
|
__func__);
|
||||||
return OPAL_INVAL_PARAM;
|
return OPAL_INVAL_PARAM;
|
||||||
}
|
}
|
||||||
dev->prev_data = kmemdup(activekey, keylen, GFP_KERNEL);
|
dev->prev_data = kmemdup(activekey, keylen, GFP_KERNEL);
|
||||||
|
@ -1103,7 +1103,7 @@ static int get_active_key(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building get active key command\n");
|
pr_debug("Error building get active key command\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1159,7 +1159,7 @@ static inline int enable_global_lr(struct opal_dev *dev, u8 *uid,
|
||||||
err = generic_lr_enable_disable(dev, uid, !!setup->RLE, !!setup->WLE,
|
err = generic_lr_enable_disable(dev, uid, !!setup->RLE, !!setup->WLE,
|
||||||
0, 0);
|
0, 0);
|
||||||
if (err)
|
if (err)
|
||||||
pr_err("Failed to create enable global lr command\n");
|
pr_debug("Failed to create enable global lr command\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1217,7 +1217,7 @@ static int setup_locking_range(struct opal_dev *dev, void *data)
|
||||||
|
|
||||||
}
|
}
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building Setup Locking range command.\n");
|
pr_debug("Error building Setup Locking range command.\n");
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1234,11 +1234,8 @@ static int start_generic_opal_session(struct opal_dev *dev,
|
||||||
u32 hsn;
|
u32 hsn;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
if (key == NULL && auth != OPAL_ANYBODY_UID) {
|
if (key == NULL && auth != OPAL_ANYBODY_UID)
|
||||||
pr_err("%s: Attempted to open ADMIN_SP Session without a Host" \
|
|
||||||
"Challenge, and not as the Anybody UID\n", __func__);
|
|
||||||
return OPAL_INVAL_PARAM;
|
return OPAL_INVAL_PARAM;
|
||||||
}
|
|
||||||
|
|
||||||
clear_opal_cmd(dev);
|
clear_opal_cmd(dev);
|
||||||
|
|
||||||
|
@ -1273,12 +1270,12 @@ static int start_generic_opal_session(struct opal_dev *dev,
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
pr_err("Cannot start Admin SP session with auth %d\n", auth);
|
pr_debug("Cannot start Admin SP session with auth %d\n", auth);
|
||||||
return OPAL_INVAL_PARAM;
|
return OPAL_INVAL_PARAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building start adminsp session command.\n");
|
pr_debug("Error building start adminsp session command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1369,7 +1366,7 @@ static int start_auth_opal_session(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building STARTSESSION command.\n");
|
pr_debug("Error building STARTSESSION command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1391,7 +1388,7 @@ static int revert_tper(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_STARTLIST);
|
add_token_u8(&err, dev, OPAL_STARTLIST);
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building REVERT TPER command.\n");
|
pr_debug("Error building REVERT TPER command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1426,7 +1423,7 @@ static int internal_activate_user(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building Activate UserN command.\n");
|
pr_debug("Error building Activate UserN command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1453,7 +1450,7 @@ static int erase_locking_range(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building Erase Locking Range Command.\n");
|
pr_debug("Error building Erase Locking Range Command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
return finalize_and_send(dev, parse_and_check_status);
|
return finalize_and_send(dev, parse_and_check_status);
|
||||||
|
@ -1484,7 +1481,7 @@ static int set_mbr_done(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error Building set MBR Done command\n");
|
pr_debug("Error Building set MBR Done command\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1516,7 +1513,7 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error Building set MBR done command\n");
|
pr_debug("Error Building set MBR done command\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1567,7 +1564,7 @@ static int set_new_pw(struct opal_dev *dev, void *data)
|
||||||
|
|
||||||
if (generic_pw_cmd(usr->opal_key.key, usr->opal_key.key_len,
|
if (generic_pw_cmd(usr->opal_key.key, usr->opal_key.key_len,
|
||||||
cpin_uid, dev)) {
|
cpin_uid, dev)) {
|
||||||
pr_err("Error building set password command.\n");
|
pr_debug("Error building set password command.\n");
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1582,7 +1579,7 @@ static int set_sid_cpin_pin(struct opal_dev *dev, void *data)
|
||||||
memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH);
|
memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH);
|
||||||
|
|
||||||
if (generic_pw_cmd(key->key, key->key_len, cpin_uid, dev)) {
|
if (generic_pw_cmd(key->key, key->key_len, cpin_uid, dev)) {
|
||||||
pr_err("Error building Set SID cpin\n");
|
pr_debug("Error building Set SID cpin\n");
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
}
|
}
|
||||||
return finalize_and_send(dev, parse_and_check_status);
|
return finalize_and_send(dev, parse_and_check_status);
|
||||||
|
@ -1657,7 +1654,7 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building add user to locking range command.\n");
|
pr_debug("Error building add user to locking range command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1691,7 +1688,7 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
|
||||||
/* vars are initalized to locked */
|
/* vars are initalized to locked */
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
pr_err("Tried to set an invalid locking state... returning to uland\n");
|
pr_debug("Tried to set an invalid locking state... returning to uland\n");
|
||||||
return OPAL_INVAL_PARAM;
|
return OPAL_INVAL_PARAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1718,7 +1715,7 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building SET command.\n");
|
pr_debug("Error building SET command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
return finalize_and_send(dev, parse_and_check_status);
|
return finalize_and_send(dev, parse_and_check_status);
|
||||||
|
@ -1752,14 +1749,14 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data)
|
||||||
/* vars are initalized to locked */
|
/* vars are initalized to locked */
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
pr_err("Tried to set an invalid locking state.\n");
|
pr_debug("Tried to set an invalid locking state.\n");
|
||||||
return OPAL_INVAL_PARAM;
|
return OPAL_INVAL_PARAM;
|
||||||
}
|
}
|
||||||
ret = generic_lr_enable_disable(dev, lr_buffer, 1, 1,
|
ret = generic_lr_enable_disable(dev, lr_buffer, 1, 1,
|
||||||
read_locked, write_locked);
|
read_locked, write_locked);
|
||||||
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
pr_err("Error building SET command.\n");
|
pr_debug("Error building SET command.\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
return finalize_and_send(dev, parse_and_check_status);
|
return finalize_and_send(dev, parse_and_check_status);
|
||||||
|
@ -1811,7 +1808,7 @@ static int activate_lsp(struct opal_dev *dev, void *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building Activate LockingSP command.\n");
|
pr_debug("Error building Activate LockingSP command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1831,7 +1828,7 @@ static int get_lsp_lifecycle_cont(struct opal_dev *dev)
|
||||||
/* 0x08 is Manufacured Inactive */
|
/* 0x08 is Manufacured Inactive */
|
||||||
/* 0x09 is Manufactured */
|
/* 0x09 is Manufactured */
|
||||||
if (lc_status != OPAL_MANUFACTURED_INACTIVE) {
|
if (lc_status != OPAL_MANUFACTURED_INACTIVE) {
|
||||||
pr_err("Couldn't determine the status of the Lifcycle state\n");
|
pr_debug("Couldn't determine the status of the Lifecycle state\n");
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1868,7 +1865,7 @@ static int get_lsp_lifecycle(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error Building GET Lifecycle Status command\n");
|
pr_debug("Error Building GET Lifecycle Status command\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1887,7 +1884,7 @@ static int get_msid_cpin_pin_cont(struct opal_dev *dev)
|
||||||
|
|
||||||
strlen = response_get_string(&dev->parsed, 4, &msid_pin);
|
strlen = response_get_string(&dev->parsed, 4, &msid_pin);
|
||||||
if (!msid_pin) {
|
if (!msid_pin) {
|
||||||
pr_err("%s: Couldn't extract PIN from response\n", __func__);
|
pr_debug("%s: Couldn't extract PIN from response\n", __func__);
|
||||||
return OPAL_INVAL_PARAM;
|
return OPAL_INVAL_PARAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1929,7 +1926,7 @@ static int get_msid_cpin_pin(struct opal_dev *dev, void *data)
|
||||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
pr_err("Error building Get MSID CPIN PIN command.\n");
|
pr_debug("Error building Get MSID CPIN PIN command.\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2124,18 +2121,18 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
|
||||||
|
|
||||||
if (lk_unlk->l_state != OPAL_RO &&
|
if (lk_unlk->l_state != OPAL_RO &&
|
||||||
lk_unlk->l_state != OPAL_RW) {
|
lk_unlk->l_state != OPAL_RW) {
|
||||||
pr_err("Locking state was not RO or RW\n");
|
pr_debug("Locking state was not RO or RW\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
if (lk_unlk->session.who < OPAL_USER1 ||
|
if (lk_unlk->session.who < OPAL_USER1 ||
|
||||||
lk_unlk->session.who > OPAL_USER9) {
|
lk_unlk->session.who > OPAL_USER9) {
|
||||||
pr_err("Authority was not within the range of users: %d\n",
|
pr_debug("Authority was not within the range of users: %d\n",
|
||||||
lk_unlk->session.who);
|
lk_unlk->session.who);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
if (lk_unlk->session.sum) {
|
if (lk_unlk->session.sum) {
|
||||||
pr_err("%s not supported in sum. Use setup locking range\n",
|
pr_debug("%s not supported in sum. Use setup locking range\n",
|
||||||
__func__);
|
__func__);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2312,7 +2309,7 @@ static int opal_activate_user(struct opal_dev *dev,
|
||||||
/* We can't activate Admin1 it's active as manufactured */
|
/* We can't activate Admin1 it's active as manufactured */
|
||||||
if (opal_session->who < OPAL_USER1 ||
|
if (opal_session->who < OPAL_USER1 ||
|
||||||
opal_session->who > OPAL_USER9) {
|
opal_session->who > OPAL_USER9) {
|
||||||
pr_err("Who was not a valid user: %d\n", opal_session->who);
|
pr_debug("Who was not a valid user: %d\n", opal_session->who);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2343,9 +2340,9 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
|
||||||
|
|
||||||
ret = __opal_lock_unlock(dev, &suspend->unlk);
|
ret = __opal_lock_unlock(dev, &suspend->unlk);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
pr_warn("Failed to unlock LR %hhu with sum %d\n",
|
pr_debug("Failed to unlock LR %hhu with sum %d\n",
|
||||||
suspend->unlk.session.opal_key.lr,
|
suspend->unlk.session.opal_key.lr,
|
||||||
suspend->unlk.session.sum);
|
suspend->unlk.session.sum);
|
||||||
was_failure = true;
|
was_failure = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2363,10 +2360,8 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||||
return -EACCES;
|
return -EACCES;
|
||||||
if (!dev)
|
if (!dev)
|
||||||
return -ENOTSUPP;
|
return -ENOTSUPP;
|
||||||
if (!dev->supported) {
|
if (!dev->supported)
|
||||||
pr_err("Not supported\n");
|
|
||||||
return -ENOTSUPP;
|
return -ENOTSUPP;
|
||||||
}
|
|
||||||
|
|
||||||
p = memdup_user(arg, _IOC_SIZE(cmd));
|
p = memdup_user(arg, _IOC_SIZE(cmd));
|
||||||
if (IS_ERR(p))
|
if (IS_ERR(p))
|
||||||
|
@ -2410,7 +2405,7 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||||
ret = opal_secure_erase_locking_range(dev, p);
|
ret = opal_secure_erase_locking_range(dev, p);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
pr_warn("No such Opal Ioctl %u\n", cmd);
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
kfree(p);
|
kfree(p);
|
||||||
|
|
|
@ -160,28 +160,28 @@ static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
|
||||||
return t10_pi_verify(iter, t10_pi_ip_fn, 3);
|
return t10_pi_verify(iter, t10_pi_ip_fn, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct blk_integrity_profile t10_pi_type1_crc = {
|
const struct blk_integrity_profile t10_pi_type1_crc = {
|
||||||
.name = "T10-DIF-TYPE1-CRC",
|
.name = "T10-DIF-TYPE1-CRC",
|
||||||
.generate_fn = t10_pi_type1_generate_crc,
|
.generate_fn = t10_pi_type1_generate_crc,
|
||||||
.verify_fn = t10_pi_type1_verify_crc,
|
.verify_fn = t10_pi_type1_verify_crc,
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL(t10_pi_type1_crc);
|
EXPORT_SYMBOL(t10_pi_type1_crc);
|
||||||
|
|
||||||
struct blk_integrity_profile t10_pi_type1_ip = {
|
const struct blk_integrity_profile t10_pi_type1_ip = {
|
||||||
.name = "T10-DIF-TYPE1-IP",
|
.name = "T10-DIF-TYPE1-IP",
|
||||||
.generate_fn = t10_pi_type1_generate_ip,
|
.generate_fn = t10_pi_type1_generate_ip,
|
||||||
.verify_fn = t10_pi_type1_verify_ip,
|
.verify_fn = t10_pi_type1_verify_ip,
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL(t10_pi_type1_ip);
|
EXPORT_SYMBOL(t10_pi_type1_ip);
|
||||||
|
|
||||||
struct blk_integrity_profile t10_pi_type3_crc = {
|
const struct blk_integrity_profile t10_pi_type3_crc = {
|
||||||
.name = "T10-DIF-TYPE3-CRC",
|
.name = "T10-DIF-TYPE3-CRC",
|
||||||
.generate_fn = t10_pi_type3_generate_crc,
|
.generate_fn = t10_pi_type3_generate_crc,
|
||||||
.verify_fn = t10_pi_type3_verify_crc,
|
.verify_fn = t10_pi_type3_verify_crc,
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL(t10_pi_type3_crc);
|
EXPORT_SYMBOL(t10_pi_type3_crc);
|
||||||
|
|
||||||
struct blk_integrity_profile t10_pi_type3_ip = {
|
const struct blk_integrity_profile t10_pi_type3_ip = {
|
||||||
.name = "T10-DIF-TYPE3-IP",
|
.name = "T10-DIF-TYPE3-IP",
|
||||||
.generate_fn = t10_pi_type3_generate_ip,
|
.generate_fn = t10_pi_type3_generate_ip,
|
||||||
.verify_fn = t10_pi_type3_verify_ip,
|
.verify_fn = t10_pi_type3_verify_ip,
|
||||||
|
|
|
@ -312,22 +312,6 @@ config BLK_DEV_SKD
|
||||||
|
|
||||||
Use device /dev/skd$N amd /dev/skd$Np$M.
|
Use device /dev/skd$N amd /dev/skd$Np$M.
|
||||||
|
|
||||||
config BLK_DEV_OSD
|
|
||||||
tristate "OSD object-as-blkdev support"
|
|
||||||
depends on SCSI_OSD_ULD
|
|
||||||
---help---
|
|
||||||
Saying Y or M here will allow the exporting of a single SCSI
|
|
||||||
OSD (object-based storage) object as a Linux block device.
|
|
||||||
|
|
||||||
For example, if you create a 2G object on an OSD device,
|
|
||||||
you can then use this module to present that 2G object as
|
|
||||||
a Linux block device.
|
|
||||||
|
|
||||||
To compile this driver as a module, choose M here: the
|
|
||||||
module will be called osdblk.
|
|
||||||
|
|
||||||
If unsure, say N.
|
|
||||||
|
|
||||||
config BLK_DEV_SX8
|
config BLK_DEV_SX8
|
||||||
tristate "Promise SATA SX8 support"
|
tristate "Promise SATA SX8 support"
|
||||||
depends on PCI
|
depends on PCI
|
||||||
|
@ -434,23 +418,6 @@ config ATA_OVER_ETH
|
||||||
This driver provides Support for ATA over Ethernet block
|
This driver provides Support for ATA over Ethernet block
|
||||||
devices like the Coraid EtherDrive (R) Storage Blade.
|
devices like the Coraid EtherDrive (R) Storage Blade.
|
||||||
|
|
||||||
config MG_DISK
|
|
||||||
tristate "mGine mflash, gflash support"
|
|
||||||
depends on ARM && GPIOLIB
|
|
||||||
help
|
|
||||||
mGine mFlash(gFlash) block device driver
|
|
||||||
|
|
||||||
config MG_DISK_RES
|
|
||||||
int "Size of reserved area before MBR"
|
|
||||||
depends on MG_DISK
|
|
||||||
default 0
|
|
||||||
help
|
|
||||||
Define size of reserved area that usually used for boot. Unit is KB.
|
|
||||||
All of the block device operation will be taken this value as start
|
|
||||||
offset
|
|
||||||
Examples:
|
|
||||||
1024 => 1 MB
|
|
||||||
|
|
||||||
config SUNVDC
|
config SUNVDC
|
||||||
tristate "Sun Virtual Disk Client support"
|
tristate "Sun Virtual Disk Client support"
|
||||||
depends on SUN_LDOMS
|
depends on SUN_LDOMS
|
||||||
|
@ -512,19 +479,7 @@ config VIRTIO_BLK_SCSI
|
||||||
Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on
|
Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on
|
||||||
virtio-blk devices. This is only supported for the legacy
|
virtio-blk devices. This is only supported for the legacy
|
||||||
virtio protocol and not enabled by default by any hypervisor.
|
virtio protocol and not enabled by default by any hypervisor.
|
||||||
Your probably want to virtio-scsi instead.
|
You probably want to use virtio-scsi instead.
|
||||||
|
|
||||||
config BLK_DEV_HD
|
|
||||||
bool "Very old hard disk (MFM/RLL/IDE) driver"
|
|
||||||
depends on HAVE_IDE
|
|
||||||
depends on !ARM || ARCH_RPC || BROKEN
|
|
||||||
help
|
|
||||||
This is a very old hard disk driver that lacks the enhanced
|
|
||||||
functionality of the newer ones.
|
|
||||||
|
|
||||||
It is required for systems with ancient MFM/RLL/ESDI drives.
|
|
||||||
|
|
||||||
If unsure, say N.
|
|
||||||
|
|
||||||
config BLK_DEV_RBD
|
config BLK_DEV_RBD
|
||||||
tristate "Rados block device (RBD)"
|
tristate "Rados block device (RBD)"
|
||||||
|
|
|
@ -19,10 +19,8 @@ obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
|
||||||
obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
|
obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
|
||||||
obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
|
obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
|
||||||
obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
|
obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
|
||||||
obj-$(CONFIG_MG_DISK) += mg_disk.o
|
|
||||||
obj-$(CONFIG_SUNVDC) += sunvdc.o
|
obj-$(CONFIG_SUNVDC) += sunvdc.o
|
||||||
obj-$(CONFIG_BLK_DEV_SKD) += skd.o
|
obj-$(CONFIG_BLK_DEV_SKD) += skd.o
|
||||||
obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
|
|
||||||
|
|
||||||
obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
|
obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
|
||||||
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
|
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
|
||||||
|
@ -30,7 +28,6 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
|
||||||
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
|
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
|
||||||
|
|
||||||
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
|
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
|
||||||
obj-$(CONFIG_BLK_DEV_HD) += hd.o
|
|
||||||
|
|
||||||
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
|
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
|
||||||
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
|
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
|
||||||
|
|
|
@ -617,12 +617,12 @@ static void fd_error( void )
|
||||||
if (!fd_request)
|
if (!fd_request)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
fd_request->errors++;
|
fd_request->error_count++;
|
||||||
if (fd_request->errors >= MAX_ERRORS) {
|
if (fd_request->error_count >= MAX_ERRORS) {
|
||||||
printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
|
printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
|
||||||
fd_end_request_cur(-EIO);
|
fd_end_request_cur(-EIO);
|
||||||
}
|
}
|
||||||
else if (fd_request->errors == RECALIBRATE_ERRORS) {
|
else if (fd_request->error_count == RECALIBRATE_ERRORS) {
|
||||||
printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
|
printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
|
||||||
if (SelectedDrive != -1)
|
if (SelectedDrive != -1)
|
||||||
SUD.track = -1;
|
SUD.track = -1;
|
||||||
|
@ -1386,7 +1386,7 @@ static void setup_req_params( int drive )
|
||||||
ReqData = ReqBuffer + 512 * ReqCnt;
|
ReqData = ReqBuffer + 512 * ReqCnt;
|
||||||
|
|
||||||
if (UseTrackbuffer)
|
if (UseTrackbuffer)
|
||||||
read_track = (ReqCmd == READ && fd_request->errors == 0);
|
read_track = (ReqCmd == READ && fd_request->error_count == 0);
|
||||||
else
|
else
|
||||||
read_track = 0;
|
read_track = 0;
|
||||||
|
|
||||||
|
@ -1409,8 +1409,10 @@ static struct request *set_next_request(void)
|
||||||
fdc_queue = 0;
|
fdc_queue = 0;
|
||||||
if (q) {
|
if (q) {
|
||||||
rq = blk_fetch_request(q);
|
rq = blk_fetch_request(q);
|
||||||
if (rq)
|
if (rq) {
|
||||||
|
rq->error_count = 0;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} while (fdc_queue != old_pos);
|
} while (fdc_queue != old_pos);
|
||||||
|
|
||||||
|
|
|
@ -134,28 +134,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void brd_free_page(struct brd_device *brd, sector_t sector)
|
|
||||||
{
|
|
||||||
struct page *page;
|
|
||||||
pgoff_t idx;
|
|
||||||
|
|
||||||
spin_lock(&brd->brd_lock);
|
|
||||||
idx = sector >> PAGE_SECTORS_SHIFT;
|
|
||||||
page = radix_tree_delete(&brd->brd_pages, idx);
|
|
||||||
spin_unlock(&brd->brd_lock);
|
|
||||||
if (page)
|
|
||||||
__free_page(page);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void brd_zero_page(struct brd_device *brd, sector_t sector)
|
|
||||||
{
|
|
||||||
struct page *page;
|
|
||||||
|
|
||||||
page = brd_lookup_page(brd, sector);
|
|
||||||
if (page)
|
|
||||||
clear_highpage(page);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Free all backing store pages and radix tree. This must only be called when
|
* Free all backing store pages and radix tree. This must only be called when
|
||||||
* there are no other users of the device.
|
* there are no other users of the device.
|
||||||
|
@ -212,24 +190,6 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void discard_from_brd(struct brd_device *brd,
|
|
||||||
sector_t sector, size_t n)
|
|
||||||
{
|
|
||||||
while (n >= PAGE_SIZE) {
|
|
||||||
/*
|
|
||||||
* Don't want to actually discard pages here because
|
|
||||||
* re-allocating the pages can result in writeback
|
|
||||||
* deadlocks under heavy load.
|
|
||||||
*/
|
|
||||||
if (0)
|
|
||||||
brd_free_page(brd, sector);
|
|
||||||
else
|
|
||||||
brd_zero_page(brd, sector);
|
|
||||||
sector += PAGE_SIZE >> SECTOR_SHIFT;
|
|
||||||
n -= PAGE_SIZE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy n bytes from src to the brd starting at sector. Does not sleep.
|
* Copy n bytes from src to the brd starting at sector. Does not sleep.
|
||||||
*/
|
*/
|
||||||
|
@ -338,14 +298,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
|
||||||
if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
|
if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
|
||||||
goto io_error;
|
goto io_error;
|
||||||
|
|
||||||
if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
|
|
||||||
if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) ||
|
|
||||||
bio->bi_iter.bi_size & ~PAGE_MASK)
|
|
||||||
goto io_error;
|
|
||||||
discard_from_brd(brd, sector, bio->bi_iter.bi_size);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
bio_for_each_segment(bvec, bio, iter) {
|
bio_for_each_segment(bvec, bio, iter) {
|
||||||
unsigned int len = bvec.bv_len;
|
unsigned int len = bvec.bv_len;
|
||||||
int err;
|
int err;
|
||||||
|
@ -357,7 +309,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
|
||||||
sector += len >> SECTOR_SHIFT;
|
sector += len >> SECTOR_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
|
||||||
bio_endio(bio);
|
bio_endio(bio);
|
||||||
return BLK_QC_T_NONE;
|
return BLK_QC_T_NONE;
|
||||||
io_error:
|
io_error:
|
||||||
|
@ -464,11 +415,6 @@ static struct brd_device *brd_alloc(int i)
|
||||||
* is harmless)
|
* is harmless)
|
||||||
*/
|
*/
|
||||||
blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
|
blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
|
||||||
|
|
||||||
brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
|
|
||||||
blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX);
|
|
||||||
brd->brd_queue->limits.discard_zeroes_data = 1;
|
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
|
|
||||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
|
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1864,8 +1864,7 @@ static void cciss_softirq_done(struct request *rq)
|
||||||
/* set the residual count for pc requests */
|
/* set the residual count for pc requests */
|
||||||
if (blk_rq_is_passthrough(rq))
|
if (blk_rq_is_passthrough(rq))
|
||||||
scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
|
scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
|
||||||
|
blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
|
||||||
blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
|
|
||||||
|
|
||||||
spin_lock_irqsave(&h->lock, flags);
|
spin_lock_irqsave(&h->lock, flags);
|
||||||
cmd_free(h, c);
|
cmd_free(h, c);
|
||||||
|
@ -3140,18 +3139,19 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
{
|
{
|
||||||
int retry_cmd = 0;
|
int retry_cmd = 0;
|
||||||
struct request *rq = cmd->rq;
|
struct request *rq = cmd->rq;
|
||||||
|
struct scsi_request *sreq = scsi_req(rq);
|
||||||
|
|
||||||
rq->errors = 0;
|
sreq->result = 0;
|
||||||
|
|
||||||
if (timeout)
|
if (timeout)
|
||||||
rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
|
sreq->result = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
|
||||||
|
|
||||||
if (cmd->err_info->CommandStatus == 0) /* no error has occurred */
|
if (cmd->err_info->CommandStatus == 0) /* no error has occurred */
|
||||||
goto after_error_processing;
|
goto after_error_processing;
|
||||||
|
|
||||||
switch (cmd->err_info->CommandStatus) {
|
switch (cmd->err_info->CommandStatus) {
|
||||||
case CMD_TARGET_STATUS:
|
case CMD_TARGET_STATUS:
|
||||||
rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
|
sreq->result = evaluate_target_status(h, cmd, &retry_cmd);
|
||||||
break;
|
break;
|
||||||
case CMD_DATA_UNDERRUN:
|
case CMD_DATA_UNDERRUN:
|
||||||
if (!blk_rq_is_passthrough(cmd->rq)) {
|
if (!blk_rq_is_passthrough(cmd->rq)) {
|
||||||
|
@ -3169,7 +3169,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
case CMD_INVALID:
|
case CMD_INVALID:
|
||||||
dev_warn(&h->pdev->dev, "cciss: cmd %p is "
|
dev_warn(&h->pdev->dev, "cciss: cmd %p is "
|
||||||
"reported invalid\n", cmd);
|
"reported invalid\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ERROR);
|
DID_PASSTHROUGH : DID_ERROR);
|
||||||
|
@ -3177,7 +3177,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
case CMD_PROTOCOL_ERR:
|
case CMD_PROTOCOL_ERR:
|
||||||
dev_warn(&h->pdev->dev, "cciss: cmd %p has "
|
dev_warn(&h->pdev->dev, "cciss: cmd %p has "
|
||||||
"protocol error\n", cmd);
|
"protocol error\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ERROR);
|
DID_PASSTHROUGH : DID_ERROR);
|
||||||
|
@ -3185,7 +3185,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
case CMD_HARDWARE_ERR:
|
case CMD_HARDWARE_ERR:
|
||||||
dev_warn(&h->pdev->dev, "cciss: cmd %p had "
|
dev_warn(&h->pdev->dev, "cciss: cmd %p had "
|
||||||
" hardware error\n", cmd);
|
" hardware error\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ERROR);
|
DID_PASSTHROUGH : DID_ERROR);
|
||||||
|
@ -3193,7 +3193,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
case CMD_CONNECTION_LOST:
|
case CMD_CONNECTION_LOST:
|
||||||
dev_warn(&h->pdev->dev, "cciss: cmd %p had "
|
dev_warn(&h->pdev->dev, "cciss: cmd %p had "
|
||||||
"connection lost\n", cmd);
|
"connection lost\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ERROR);
|
DID_PASSTHROUGH : DID_ERROR);
|
||||||
|
@ -3201,7 +3201,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
case CMD_ABORTED:
|
case CMD_ABORTED:
|
||||||
dev_warn(&h->pdev->dev, "cciss: cmd %p was "
|
dev_warn(&h->pdev->dev, "cciss: cmd %p was "
|
||||||
"aborted\n", cmd);
|
"aborted\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ABORT);
|
DID_PASSTHROUGH : DID_ABORT);
|
||||||
|
@ -3209,7 +3209,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
case CMD_ABORT_FAILED:
|
case CMD_ABORT_FAILED:
|
||||||
dev_warn(&h->pdev->dev, "cciss: cmd %p reports "
|
dev_warn(&h->pdev->dev, "cciss: cmd %p reports "
|
||||||
"abort failed\n", cmd);
|
"abort failed\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ERROR);
|
DID_PASSTHROUGH : DID_ERROR);
|
||||||
|
@ -3224,21 +3224,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
} else
|
} else
|
||||||
dev_warn(&h->pdev->dev,
|
dev_warn(&h->pdev->dev,
|
||||||
"%p retried too many times\n", cmd);
|
"%p retried too many times\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ABORT);
|
DID_PASSTHROUGH : DID_ABORT);
|
||||||
break;
|
break;
|
||||||
case CMD_TIMEOUT:
|
case CMD_TIMEOUT:
|
||||||
dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
|
dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ERROR);
|
DID_PASSTHROUGH : DID_ERROR);
|
||||||
break;
|
break;
|
||||||
case CMD_UNABORTABLE:
|
case CMD_UNABORTABLE:
|
||||||
dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
|
dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ERROR);
|
DID_PASSTHROUGH : DID_ERROR);
|
||||||
|
@ -3247,7 +3247,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
|
||||||
dev_warn(&h->pdev->dev, "cmd %p returned "
|
dev_warn(&h->pdev->dev, "cmd %p returned "
|
||||||
"unknown status %x\n", cmd,
|
"unknown status %x\n", cmd,
|
||||||
cmd->err_info->CommandStatus);
|
cmd->err_info->CommandStatus);
|
||||||
rq->errors = make_status_bytes(SAM_STAT_GOOD,
|
sreq->result = make_status_bytes(SAM_STAT_GOOD,
|
||||||
cmd->err_info->CommandStatus, DRIVER_OK,
|
cmd->err_info->CommandStatus, DRIVER_OK,
|
||||||
blk_rq_is_passthrough(cmd->rq) ?
|
blk_rq_is_passthrough(cmd->rq) ?
|
||||||
DID_PASSTHROUGH : DID_ERROR);
|
DID_PASSTHROUGH : DID_ERROR);
|
||||||
|
@ -3380,9 +3380,9 @@ static void do_cciss_request(struct request_queue *q)
|
||||||
if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
|
if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
|
||||||
dev_warn(&h->pdev->dev,
|
dev_warn(&h->pdev->dev,
|
||||||
"%s: error mapping page for DMA\n", __func__);
|
"%s: error mapping page for DMA\n", __func__);
|
||||||
creq->errors = make_status_bytes(SAM_STAT_GOOD,
|
scsi_req(creq)->result =
|
||||||
0, DRIVER_OK,
|
make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK,
|
||||||
DID_SOFT_ERROR);
|
DID_SOFT_ERROR);
|
||||||
cmd_free(h, c);
|
cmd_free(h, c);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -3395,9 +3395,9 @@ static void do_cciss_request(struct request_queue *q)
|
||||||
if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
|
if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
|
||||||
(seg - (h->max_cmd_sgentries - 1)) *
|
(seg - (h->max_cmd_sgentries - 1)) *
|
||||||
sizeof(SGDescriptor_struct))) {
|
sizeof(SGDescriptor_struct))) {
|
||||||
creq->errors = make_status_bytes(SAM_STAT_GOOD,
|
scsi_req(creq)->result =
|
||||||
0, DRIVER_OK,
|
make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK,
|
||||||
DID_SOFT_ERROR);
|
DID_SOFT_ERROR);
|
||||||
cmd_free(h, c);
|
cmd_free(h, c);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -236,9 +236,6 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
|
||||||
seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
|
seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
|
||||||
seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
|
seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
|
||||||
seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
|
seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
|
||||||
|
|
||||||
if (f & EE_IS_TRIM)
|
|
||||||
__seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim");
|
|
||||||
seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
|
seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
|
||||||
seq_putc(m, '\n');
|
seq_putc(m, '\n');
|
||||||
}
|
}
|
||||||
|
|
|
@ -437,9 +437,6 @@ enum {
|
||||||
|
|
||||||
/* is this a TRIM aka REQ_DISCARD? */
|
/* is this a TRIM aka REQ_DISCARD? */
|
||||||
__EE_IS_TRIM,
|
__EE_IS_TRIM,
|
||||||
/* our lower level cannot handle trim,
|
|
||||||
* and we want to fall back to zeroout instead */
|
|
||||||
__EE_IS_TRIM_USE_ZEROOUT,
|
|
||||||
|
|
||||||
/* In case a barrier failed,
|
/* In case a barrier failed,
|
||||||
* we need to resubmit without the barrier flag. */
|
* we need to resubmit without the barrier flag. */
|
||||||
|
@ -482,7 +479,6 @@ enum {
|
||||||
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
|
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
|
||||||
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
|
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
|
||||||
#define EE_IS_TRIM (1<<__EE_IS_TRIM)
|
#define EE_IS_TRIM (1<<__EE_IS_TRIM)
|
||||||
#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT)
|
|
||||||
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
|
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
|
||||||
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
|
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
|
||||||
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
|
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
|
||||||
|
@ -1561,8 +1557,6 @@ extern void start_resync_timer_fn(unsigned long data);
|
||||||
extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
|
extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
|
||||||
|
|
||||||
/* drbd_receiver.c */
|
/* drbd_receiver.c */
|
||||||
extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
|
|
||||||
sector_t start, unsigned int nr_sectors, bool discard);
|
|
||||||
extern int drbd_receiver(struct drbd_thread *thi);
|
extern int drbd_receiver(struct drbd_thread *thi);
|
||||||
extern int drbd_ack_receiver(struct drbd_thread *thi);
|
extern int drbd_ack_receiver(struct drbd_thread *thi);
|
||||||
extern void drbd_send_ping_wf(struct work_struct *ws);
|
extern void drbd_send_ping_wf(struct work_struct *ws);
|
||||||
|
|
|
@ -931,7 +931,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r
|
||||||
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
|
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
|
||||||
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
|
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
|
||||||
p->qlim->discard_enabled = blk_queue_discard(q);
|
p->qlim->discard_enabled = blk_queue_discard(q);
|
||||||
p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q);
|
|
||||||
p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
|
p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
|
||||||
} else {
|
} else {
|
||||||
q = device->rq_queue;
|
q = device->rq_queue;
|
||||||
|
@ -941,7 +940,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r
|
||||||
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
|
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
|
||||||
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
|
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
|
||||||
p->qlim->discard_enabled = 0;
|
p->qlim->discard_enabled = 0;
|
||||||
p->qlim->discard_zeroes_data = 0;
|
|
||||||
p->qlim->write_same_capable = 0;
|
p->qlim->write_same_capable = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1668,7 +1666,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
|
||||||
(bio->bi_opf & REQ_FUA ? DP_FUA : 0) |
|
(bio->bi_opf & REQ_FUA ? DP_FUA : 0) |
|
||||||
(bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
|
(bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
|
||||||
(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
|
(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
|
||||||
(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0);
|
(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
|
||||||
|
(bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0);
|
||||||
else
|
else
|
||||||
return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
|
return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1199,10 +1199,6 @@ static void decide_on_discard_support(struct drbd_device *device,
|
||||||
struct drbd_connection *connection = first_peer_device(device)->connection;
|
struct drbd_connection *connection = first_peer_device(device)->connection;
|
||||||
bool can_do = b ? blk_queue_discard(b) : true;
|
bool can_do = b ? blk_queue_discard(b) : true;
|
||||||
|
|
||||||
if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) {
|
|
||||||
can_do = false;
|
|
||||||
drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n");
|
|
||||||
}
|
|
||||||
if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
|
if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
|
||||||
can_do = false;
|
can_do = false;
|
||||||
drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
|
drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
|
||||||
|
@ -1217,10 +1213,12 @@ static void decide_on_discard_support(struct drbd_device *device,
|
||||||
blk_queue_discard_granularity(q, 512);
|
blk_queue_discard_granularity(q, 512);
|
||||||
q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
|
q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||||
|
q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
|
||||||
} else {
|
} else {
|
||||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
|
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||||
blk_queue_discard_granularity(q, 0);
|
blk_queue_discard_granularity(q, 0);
|
||||||
q->limits.max_discard_sectors = 0;
|
q->limits.max_discard_sectors = 0;
|
||||||
|
q->limits.max_write_zeroes_sectors = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1482,8 +1480,7 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
|
||||||
if (disk_conf->al_extents > drbd_al_extents_max(nbc))
|
if (disk_conf->al_extents > drbd_al_extents_max(nbc))
|
||||||
disk_conf->al_extents = drbd_al_extents_max(nbc);
|
disk_conf->al_extents = drbd_al_extents_max(nbc);
|
||||||
|
|
||||||
if (!blk_queue_discard(q)
|
if (!blk_queue_discard(q)) {
|
||||||
|| (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) {
|
|
||||||
if (disk_conf->rs_discard_granularity) {
|
if (disk_conf->rs_discard_granularity) {
|
||||||
disk_conf->rs_discard_granularity = 0; /* disable feature */
|
disk_conf->rs_discard_granularity = 0; /* disable feature */
|
||||||
drbd_info(device, "rs_discard_granularity feature disabled\n");
|
drbd_info(device, "rs_discard_granularity feature disabled\n");
|
||||||
|
|
|
@ -1448,105 +1448,14 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
|
||||||
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
|
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* We *may* ignore the discard-zeroes-data setting, if so configured.
|
|
||||||
*
|
|
||||||
* Assumption is that it "discard_zeroes_data=0" is only because the backend
|
|
||||||
* may ignore partial unaligned discards.
|
|
||||||
*
|
|
||||||
* LVM/DM thin as of at least
|
|
||||||
* LVM version: 2.02.115(2)-RHEL7 (2015-01-28)
|
|
||||||
* Library version: 1.02.93-RHEL7 (2015-01-28)
|
|
||||||
* Driver version: 4.29.0
|
|
||||||
* still behaves this way.
|
|
||||||
*
|
|
||||||
* For unaligned (wrt. alignment and granularity) or too small discards,
|
|
||||||
* we zero-out the initial (and/or) trailing unaligned partial chunks,
|
|
||||||
* but discard all the aligned full chunks.
|
|
||||||
*
|
|
||||||
* At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1".
|
|
||||||
*/
|
|
||||||
int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard)
|
|
||||||
{
|
|
||||||
struct block_device *bdev = device->ldev->backing_bdev;
|
|
||||||
struct request_queue *q = bdev_get_queue(bdev);
|
|
||||||
sector_t tmp, nr;
|
|
||||||
unsigned int max_discard_sectors, granularity;
|
|
||||||
int alignment;
|
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
if (!discard)
|
|
||||||
goto zero_out;
|
|
||||||
|
|
||||||
/* Zero-sector (unknown) and one-sector granularities are the same. */
|
|
||||||
granularity = max(q->limits.discard_granularity >> 9, 1U);
|
|
||||||
alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
|
|
||||||
|
|
||||||
max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
|
|
||||||
max_discard_sectors -= max_discard_sectors % granularity;
|
|
||||||
if (unlikely(!max_discard_sectors))
|
|
||||||
goto zero_out;
|
|
||||||
|
|
||||||
if (nr_sectors < granularity)
|
|
||||||
goto zero_out;
|
|
||||||
|
|
||||||
tmp = start;
|
|
||||||
if (sector_div(tmp, granularity) != alignment) {
|
|
||||||
if (nr_sectors < 2*granularity)
|
|
||||||
goto zero_out;
|
|
||||||
/* start + gran - (start + gran - align) % gran */
|
|
||||||
tmp = start + granularity - alignment;
|
|
||||||
tmp = start + granularity - sector_div(tmp, granularity);
|
|
||||||
|
|
||||||
nr = tmp - start;
|
|
||||||
err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
|
|
||||||
nr_sectors -= nr;
|
|
||||||
start = tmp;
|
|
||||||
}
|
|
||||||
while (nr_sectors >= granularity) {
|
|
||||||
nr = min_t(sector_t, nr_sectors, max_discard_sectors);
|
|
||||||
err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
|
|
||||||
nr_sectors -= nr;
|
|
||||||
start += nr;
|
|
||||||
}
|
|
||||||
zero_out:
|
|
||||||
if (nr_sectors) {
|
|
||||||
err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0);
|
|
||||||
}
|
|
||||||
return err != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool can_do_reliable_discards(struct drbd_device *device)
|
|
||||||
{
|
|
||||||
struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
|
|
||||||
struct disk_conf *dc;
|
|
||||||
bool can_do;
|
|
||||||
|
|
||||||
if (!blk_queue_discard(q))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (q->limits.discard_zeroes_data)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
dc = rcu_dereference(device->ldev->disk_conf);
|
|
||||||
can_do = dc->discard_zeroes_if_aligned;
|
|
||||||
rcu_read_unlock();
|
|
||||||
return can_do;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
|
static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
|
||||||
{
|
{
|
||||||
/* If the backend cannot discard, or does not guarantee
|
struct block_device *bdev = device->ldev->backing_bdev;
|
||||||
* read-back zeroes in discarded ranges, we fall back to
|
|
||||||
* zero-out. Unless configuration specifically requested
|
|
||||||
* otherwise. */
|
|
||||||
if (!can_do_reliable_discards(device))
|
|
||||||
peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
|
|
||||||
|
|
||||||
if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
|
if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9,
|
||||||
peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT)))
|
GFP_NOIO, 0))
|
||||||
peer_req->flags |= EE_WAS_ERROR;
|
peer_req->flags |= EE_WAS_ERROR;
|
||||||
|
|
||||||
drbd_endio_write_sec_final(peer_req);
|
drbd_endio_write_sec_final(peer_req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2376,7 +2285,7 @@ static unsigned long wire_flags_to_bio_flags(u32 dpf)
|
||||||
static unsigned long wire_flags_to_bio_op(u32 dpf)
|
static unsigned long wire_flags_to_bio_op(u32 dpf)
|
||||||
{
|
{
|
||||||
if (dpf & DP_DISCARD)
|
if (dpf & DP_DISCARD)
|
||||||
return REQ_OP_DISCARD;
|
return REQ_OP_WRITE_ZEROES;
|
||||||
else
|
else
|
||||||
return REQ_OP_WRITE;
|
return REQ_OP_WRITE;
|
||||||
}
|
}
|
||||||
|
@ -2567,7 +2476,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
|
||||||
op_flags = wire_flags_to_bio_flags(dp_flags);
|
op_flags = wire_flags_to_bio_flags(dp_flags);
|
||||||
if (pi->cmd == P_TRIM) {
|
if (pi->cmd == P_TRIM) {
|
||||||
D_ASSERT(peer_device, peer_req->i.size > 0);
|
D_ASSERT(peer_device, peer_req->i.size > 0);
|
||||||
D_ASSERT(peer_device, op == REQ_OP_DISCARD);
|
D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
|
||||||
D_ASSERT(peer_device, peer_req->pages == NULL);
|
D_ASSERT(peer_device, peer_req->pages == NULL);
|
||||||
} else if (peer_req->pages == NULL) {
|
} else if (peer_req->pages == NULL) {
|
||||||
D_ASSERT(device, peer_req->i.size == 0);
|
D_ASSERT(device, peer_req->i.size == 0);
|
||||||
|
@ -4880,7 +4789,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
|
||||||
|
|
||||||
if (get_ldev(device)) {
|
if (get_ldev(device)) {
|
||||||
struct drbd_peer_request *peer_req;
|
struct drbd_peer_request *peer_req;
|
||||||
const int op = REQ_OP_DISCARD;
|
const int op = REQ_OP_WRITE_ZEROES;
|
||||||
|
|
||||||
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
|
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
|
||||||
size, 0, GFP_NOIO);
|
size, 0, GFP_NOIO);
|
||||||
|
|
|
@ -59,6 +59,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
|
||||||
drbd_req_make_private_bio(req, bio_src);
|
drbd_req_make_private_bio(req, bio_src);
|
||||||
req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
|
req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
|
||||||
| (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
|
| (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
|
||||||
|
| (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0)
|
||||||
| (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
|
| (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
|
||||||
req->device = device;
|
req->device = device;
|
||||||
req->master_bio = bio_src;
|
req->master_bio = bio_src;
|
||||||
|
@ -1148,10 +1149,10 @@ static int drbd_process_write_request(struct drbd_request *req)
|
||||||
|
|
||||||
static void drbd_process_discard_req(struct drbd_request *req)
|
static void drbd_process_discard_req(struct drbd_request *req)
|
||||||
{
|
{
|
||||||
int err = drbd_issue_discard_or_zero_out(req->device,
|
struct block_device *bdev = req->device->ldev->backing_bdev;
|
||||||
req->i.sector, req->i.size >> 9, true);
|
|
||||||
|
|
||||||
if (err)
|
if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
|
||||||
|
GFP_NOIO, 0))
|
||||||
req->private_bio->bi_error = -EIO;
|
req->private_bio->bi_error = -EIO;
|
||||||
bio_endio(req->private_bio);
|
bio_endio(req->private_bio);
|
||||||
}
|
}
|
||||||
|
@ -1180,7 +1181,8 @@ drbd_submit_req_private_bio(struct drbd_request *req)
|
||||||
if (get_ldev(device)) {
|
if (get_ldev(device)) {
|
||||||
if (drbd_insert_fault(device, type))
|
if (drbd_insert_fault(device, type))
|
||||||
bio_io_error(bio);
|
bio_io_error(bio);
|
||||||
else if (bio_op(bio) == REQ_OP_DISCARD)
|
else if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
|
||||||
|
bio_op(bio) == REQ_OP_DISCARD)
|
||||||
drbd_process_discard_req(req);
|
drbd_process_discard_req(req);
|
||||||
else
|
else
|
||||||
generic_make_request(bio);
|
generic_make_request(bio);
|
||||||
|
@ -1234,7 +1236,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
|
||||||
_drbd_start_io_acct(device, req);
|
_drbd_start_io_acct(device, req);
|
||||||
|
|
||||||
/* process discards always from our submitter thread */
|
/* process discards always from our submitter thread */
|
||||||
if (bio_op(bio) & REQ_OP_DISCARD)
|
if ((bio_op(bio) & REQ_OP_WRITE_ZEROES) ||
|
||||||
|
(bio_op(bio) & REQ_OP_DISCARD))
|
||||||
goto queue_for_submitter_thread;
|
goto queue_for_submitter_thread;
|
||||||
|
|
||||||
if (rw == WRITE && req->private_bio && req->i.size
|
if (rw == WRITE && req->private_bio && req->i.size
|
||||||
|
|
|
@ -174,7 +174,8 @@ void drbd_peer_request_endio(struct bio *bio)
|
||||||
struct drbd_peer_request *peer_req = bio->bi_private;
|
struct drbd_peer_request *peer_req = bio->bi_private;
|
||||||
struct drbd_device *device = peer_req->peer_device->device;
|
struct drbd_device *device = peer_req->peer_device->device;
|
||||||
bool is_write = bio_data_dir(bio) == WRITE;
|
bool is_write = bio_data_dir(bio) == WRITE;
|
||||||
bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD);
|
bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
|
||||||
|
bio_op(bio) == REQ_OP_DISCARD;
|
||||||
|
|
||||||
if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
|
if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
|
||||||
drbd_warn(device, "%s: error=%d s=%llus\n",
|
drbd_warn(device, "%s: error=%d s=%llus\n",
|
||||||
|
@ -249,6 +250,7 @@ void drbd_request_endio(struct bio *bio)
|
||||||
/* to avoid recursion in __req_mod */
|
/* to avoid recursion in __req_mod */
|
||||||
if (unlikely(bio->bi_error)) {
|
if (unlikely(bio->bi_error)) {
|
||||||
switch (bio_op(bio)) {
|
switch (bio_op(bio)) {
|
||||||
|
case REQ_OP_WRITE_ZEROES:
|
||||||
case REQ_OP_DISCARD:
|
case REQ_OP_DISCARD:
|
||||||
if (bio->bi_error == -EOPNOTSUPP)
|
if (bio->bi_error == -EOPNOTSUPP)
|
||||||
what = DISCARD_COMPLETED_NOTSUPP;
|
what = DISCARD_COMPLETED_NOTSUPP;
|
||||||
|
|
|
@ -2805,8 +2805,10 @@ static int set_next_request(void)
|
||||||
fdc_queue = 0;
|
fdc_queue = 0;
|
||||||
if (q) {
|
if (q) {
|
||||||
current_req = blk_fetch_request(q);
|
current_req = blk_fetch_request(q);
|
||||||
if (current_req)
|
if (current_req) {
|
||||||
|
current_req->error_count = 0;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} while (fdc_queue != old_pos);
|
} while (fdc_queue != old_pos);
|
||||||
|
|
||||||
|
@ -2866,7 +2868,7 @@ do_request:
|
||||||
_floppy = floppy_type + DP->autodetect[DRS->probed_format];
|
_floppy = floppy_type + DP->autodetect[DRS->probed_format];
|
||||||
} else
|
} else
|
||||||
probing = 0;
|
probing = 0;
|
||||||
errors = &(current_req->errors);
|
errors = &(current_req->error_count);
|
||||||
tmp = make_raw_rw_request();
|
tmp = make_raw_rw_request();
|
||||||
if (tmp < 2) {
|
if (tmp < 2) {
|
||||||
request_done(tmp);
|
request_done(tmp);
|
||||||
|
@ -4207,9 +4209,7 @@ static int __init do_floppy_init(void)
|
||||||
disks[drive]->fops = &floppy_fops;
|
disks[drive]->fops = &floppy_fops;
|
||||||
sprintf(disks[drive]->disk_name, "fd%d", drive);
|
sprintf(disks[drive]->disk_name, "fd%d", drive);
|
||||||
|
|
||||||
init_timer(&motor_off_timer[drive]);
|
setup_timer(&motor_off_timer[drive], motor_off_callback, drive);
|
||||||
motor_off_timer[drive].data = drive;
|
|
||||||
motor_off_timer[drive].function = motor_off_callback;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err = register_blkdev(FLOPPY_MAJOR, "fd");
|
err = register_blkdev(FLOPPY_MAJOR, "fd");
|
||||||
|
|
|
@ -1,803 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
||||||
*
|
|
||||||
* This is the low-level hd interrupt support. It traverses the
|
|
||||||
* request-list, using interrupts to jump between functions. As
|
|
||||||
* all the functions are called within interrupts, we may not
|
|
||||||
* sleep. Special care is recommended.
|
|
||||||
*
|
|
||||||
* modified by Drew Eckhardt to check nr of hd's from the CMOS.
|
|
||||||
*
|
|
||||||
* Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
|
|
||||||
* in the early extended-partition checks and added DM partitions
|
|
||||||
*
|
|
||||||
* IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
|
|
||||||
* and general streamlining by Mark Lord.
|
|
||||||
*
|
|
||||||
* Removed 99% of above. Use Mark's ide driver for those options.
|
|
||||||
* This is now a lightweight ST-506 driver. (Paul Gortmaker)
|
|
||||||
*
|
|
||||||
* Modified 1995 Russell King for ARM processor.
|
|
||||||
*
|
|
||||||
* Bugfix: max_sectors must be <= 255 or the wheels tend to come
|
|
||||||
* off in a hurry once you queue things up - Paul G. 02/2001
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Uncomment the following if you want verbose error reports. */
|
|
||||||
/* #define VERBOSE_ERRORS */
|
|
||||||
|
|
||||||
#include <linux/blkdev.h>
|
|
||||||
#include <linux/errno.h>
|
|
||||||
#include <linux/signal.h>
|
|
||||||
#include <linux/interrupt.h>
|
|
||||||
#include <linux/timer.h>
|
|
||||||
#include <linux/fs.h>
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
#include <linux/genhd.h>
|
|
||||||
#include <linux/string.h>
|
|
||||||
#include <linux/ioport.h>
|
|
||||||
#include <linux/init.h>
|
|
||||||
#include <linux/blkpg.h>
|
|
||||||
#include <linux/ata.h>
|
|
||||||
#include <linux/hdreg.h>
|
|
||||||
|
|
||||||
#define HD_IRQ 14
|
|
||||||
|
|
||||||
#define REALLY_SLOW_IO
|
|
||||||
#include <asm/io.h>
|
|
||||||
#include <linux/uaccess.h>
|
|
||||||
|
|
||||||
#ifdef __arm__
|
|
||||||
#undef HD_IRQ
|
|
||||||
#endif
|
|
||||||
#include <asm/irq.h>
|
|
||||||
#ifdef __arm__
|
|
||||||
#define HD_IRQ IRQ_HARDDISK
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Hd controller regster ports */
|
|
||||||
|
|
||||||
#define HD_DATA 0x1f0 /* _CTL when writing */
|
|
||||||
#define HD_ERROR 0x1f1 /* see err-bits */
|
|
||||||
#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */
|
|
||||||
#define HD_SECTOR 0x1f3 /* starting sector */
|
|
||||||
#define HD_LCYL 0x1f4 /* starting cylinder */
|
|
||||||
#define HD_HCYL 0x1f5 /* high byte of starting cyl */
|
|
||||||
#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */
|
|
||||||
#define HD_STATUS 0x1f7 /* see status-bits */
|
|
||||||
#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */
|
|
||||||
#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */
|
|
||||||
#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */
|
|
||||||
|
|
||||||
#define HD_CMD 0x3f6 /* used for resets */
|
|
||||||
#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */
|
|
||||||
|
|
||||||
/* Bits of HD_STATUS */
|
|
||||||
#define ERR_STAT 0x01
|
|
||||||
#define INDEX_STAT 0x02
|
|
||||||
#define ECC_STAT 0x04 /* Corrected error */
|
|
||||||
#define DRQ_STAT 0x08
|
|
||||||
#define SEEK_STAT 0x10
|
|
||||||
#define SERVICE_STAT SEEK_STAT
|
|
||||||
#define WRERR_STAT 0x20
|
|
||||||
#define READY_STAT 0x40
|
|
||||||
#define BUSY_STAT 0x80
|
|
||||||
|
|
||||||
/* Bits for HD_ERROR */
|
|
||||||
#define MARK_ERR 0x01 /* Bad address mark */
|
|
||||||
#define TRK0_ERR 0x02 /* couldn't find track 0 */
|
|
||||||
#define ABRT_ERR 0x04 /* Command aborted */
|
|
||||||
#define MCR_ERR 0x08 /* media change request */
|
|
||||||
#define ID_ERR 0x10 /* ID field not found */
|
|
||||||
#define MC_ERR 0x20 /* media changed */
|
|
||||||
#define ECC_ERR 0x40 /* Uncorrectable ECC error */
|
|
||||||
#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */
|
|
||||||
#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */
|
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(hd_lock);
|
|
||||||
static struct request_queue *hd_queue;
|
|
||||||
static struct request *hd_req;
|
|
||||||
|
|
||||||
#define TIMEOUT_VALUE (6*HZ)
|
|
||||||
#define HD_DELAY 0
|
|
||||||
|
|
||||||
#define MAX_ERRORS 16 /* Max read/write errors/sector */
|
|
||||||
#define RESET_FREQ 8 /* Reset controller every 8th retry */
|
|
||||||
#define RECAL_FREQ 4 /* Recalibrate every 4th retry */
|
|
||||||
#define MAX_HD 2
|
|
||||||
|
|
||||||
#define STAT_OK (READY_STAT|SEEK_STAT)
|
|
||||||
#define OK_STATUS(s) (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK)
|
|
||||||
|
|
||||||
static void recal_intr(void);
|
|
||||||
static void bad_rw_intr(void);
|
|
||||||
|
|
||||||
static int reset;
|
|
||||||
static int hd_error;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This struct defines the HD's and their types.
|
|
||||||
*/
|
|
||||||
struct hd_i_struct {
|
|
||||||
unsigned int head, sect, cyl, wpcom, lzone, ctl;
|
|
||||||
int unit;
|
|
||||||
int recalibrate;
|
|
||||||
int special_op;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef HD_TYPE
|
|
||||||
static struct hd_i_struct hd_info[] = { HD_TYPE };
|
|
||||||
static int NR_HD = ARRAY_SIZE(hd_info);
|
|
||||||
#else
|
|
||||||
static struct hd_i_struct hd_info[MAX_HD];
|
|
||||||
static int NR_HD;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static struct gendisk *hd_gendisk[MAX_HD];
|
|
||||||
|
|
||||||
static struct timer_list device_timer;
|
|
||||||
|
|
||||||
#define TIMEOUT_VALUE (6*HZ)
|
|
||||||
|
|
||||||
#define SET_TIMER \
|
|
||||||
do { \
|
|
||||||
mod_timer(&device_timer, jiffies + TIMEOUT_VALUE); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
static void (*do_hd)(void) = NULL;
|
|
||||||
#define SET_HANDLER(x) \
|
|
||||||
if ((do_hd = (x)) != NULL) \
|
|
||||||
SET_TIMER; \
|
|
||||||
else \
|
|
||||||
del_timer(&device_timer);
|
|
||||||
|
|
||||||
|
|
||||||
#if (HD_DELAY > 0)
|
|
||||||
|
|
||||||
#include <linux/i8253.h>
|
|
||||||
|
|
||||||
unsigned long last_req;
|
|
||||||
|
|
||||||
unsigned long read_timer(void)
|
|
||||||
{
|
|
||||||
unsigned long t, flags;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&i8253_lock, flags);
|
|
||||||
t = jiffies * 11932;
|
|
||||||
outb_p(0, 0x43);
|
|
||||||
i = inb_p(0x40);
|
|
||||||
i |= inb(0x40) << 8;
|
|
||||||
raw_spin_unlock_irqrestore(&i8253_lock, flags);
|
|
||||||
return(t - i);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void __init hd_setup(char *str, int *ints)
|
|
||||||
{
|
|
||||||
int hdind = 0;
|
|
||||||
|
|
||||||
if (ints[0] != 3)
|
|
||||||
return;
|
|
||||||
if (hd_info[0].head != 0)
|
|
||||||
hdind = 1;
|
|
||||||
hd_info[hdind].head = ints[2];
|
|
||||||
hd_info[hdind].sect = ints[3];
|
|
||||||
hd_info[hdind].cyl = ints[1];
|
|
||||||
hd_info[hdind].wpcom = 0;
|
|
||||||
hd_info[hdind].lzone = ints[1];
|
|
||||||
hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0);
|
|
||||||
NR_HD = hdind+1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool hd_end_request(int err, unsigned int bytes)
|
|
||||||
{
|
|
||||||
if (__blk_end_request(hd_req, err, bytes))
|
|
||||||
return true;
|
|
||||||
hd_req = NULL;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool hd_end_request_cur(int err)
|
|
||||||
{
|
|
||||||
return hd_end_request(err, blk_rq_cur_bytes(hd_req));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dump_status(const char *msg, unsigned int stat)
|
|
||||||
{
|
|
||||||
char *name = "hd?";
|
|
||||||
if (hd_req)
|
|
||||||
name = hd_req->rq_disk->disk_name;
|
|
||||||
|
|
||||||
#ifdef VERBOSE_ERRORS
|
|
||||||
printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
|
|
||||||
if (stat & BUSY_STAT) printk("Busy ");
|
|
||||||
if (stat & READY_STAT) printk("DriveReady ");
|
|
||||||
if (stat & WRERR_STAT) printk("WriteFault ");
|
|
||||||
if (stat & SEEK_STAT) printk("SeekComplete ");
|
|
||||||
if (stat & DRQ_STAT) printk("DataRequest ");
|
|
||||||
if (stat & ECC_STAT) printk("CorrectedError ");
|
|
||||||
if (stat & INDEX_STAT) printk("Index ");
|
|
||||||
if (stat & ERR_STAT) printk("Error ");
|
|
||||||
printk("}\n");
|
|
||||||
if ((stat & ERR_STAT) == 0) {
|
|
||||||
hd_error = 0;
|
|
||||||
} else {
|
|
||||||
hd_error = inb(HD_ERROR);
|
|
||||||
printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff);
|
|
||||||
if (hd_error & BBD_ERR) printk("BadSector ");
|
|
||||||
if (hd_error & ECC_ERR) printk("UncorrectableError ");
|
|
||||||
if (hd_error & ID_ERR) printk("SectorIdNotFound ");
|
|
||||||
if (hd_error & ABRT_ERR) printk("DriveStatusError ");
|
|
||||||
if (hd_error & TRK0_ERR) printk("TrackZeroNotFound ");
|
|
||||||
if (hd_error & MARK_ERR) printk("AddrMarkNotFound ");
|
|
||||||
printk("}");
|
|
||||||
if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
|
|
||||||
printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
|
|
||||||
inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
|
|
||||||
if (hd_req)
|
|
||||||
printk(", sector=%ld", blk_rq_pos(hd_req));
|
|
||||||
}
|
|
||||||
printk("\n");
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff);
|
|
||||||
if ((stat & ERR_STAT) == 0) {
|
|
||||||
hd_error = 0;
|
|
||||||
} else {
|
|
||||||
hd_error = inb(HD_ERROR);
|
|
||||||
printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static void check_status(void)
|
|
||||||
{
|
|
||||||
int i = inb_p(HD_STATUS);
|
|
||||||
|
|
||||||
if (!OK_STATUS(i)) {
|
|
||||||
dump_status("check_status", i);
|
|
||||||
bad_rw_intr();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int controller_busy(void)
|
|
||||||
{
|
|
||||||
int retries = 100000;
|
|
||||||
unsigned char status;
|
|
||||||
|
|
||||||
do {
|
|
||||||
status = inb_p(HD_STATUS);
|
|
||||||
} while ((status & BUSY_STAT) && --retries);
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int status_ok(void)
|
|
||||||
{
|
|
||||||
unsigned char status = inb_p(HD_STATUS);
|
|
||||||
|
|
||||||
if (status & BUSY_STAT)
|
|
||||||
return 1; /* Ancient, but does it make sense??? */
|
|
||||||
if (status & WRERR_STAT)
|
|
||||||
return 0;
|
|
||||||
if (!(status & READY_STAT))
|
|
||||||
return 0;
|
|
||||||
if (!(status & SEEK_STAT))
|
|
||||||
return 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int controller_ready(unsigned int drive, unsigned int head)
|
|
||||||
{
|
|
||||||
int retry = 100;
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (controller_busy() & BUSY_STAT)
|
|
||||||
return 0;
|
|
||||||
outb_p(0xA0 | (drive<<4) | head, HD_CURRENT);
|
|
||||||
if (status_ok())
|
|
||||||
return 1;
|
|
||||||
} while (--retry);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void hd_out(struct hd_i_struct *disk,
|
|
||||||
unsigned int nsect,
|
|
||||||
unsigned int sect,
|
|
||||||
unsigned int head,
|
|
||||||
unsigned int cyl,
|
|
||||||
unsigned int cmd,
|
|
||||||
void (*intr_addr)(void))
|
|
||||||
{
|
|
||||||
unsigned short port;
|
|
||||||
|
|
||||||
#if (HD_DELAY > 0)
|
|
||||||
while (read_timer() - last_req < HD_DELAY)
|
|
||||||
/* nothing */;
|
|
||||||
#endif
|
|
||||||
if (reset)
|
|
||||||
return;
|
|
||||||
if (!controller_ready(disk->unit, head)) {
|
|
||||||
reset = 1;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
SET_HANDLER(intr_addr);
|
|
||||||
outb_p(disk->ctl, HD_CMD);
|
|
||||||
port = HD_DATA;
|
|
||||||
outb_p(disk->wpcom >> 2, ++port);
|
|
||||||
outb_p(nsect, ++port);
|
|
||||||
outb_p(sect, ++port);
|
|
||||||
outb_p(cyl, ++port);
|
|
||||||
outb_p(cyl >> 8, ++port);
|
|
||||||
outb_p(0xA0 | (disk->unit << 4) | head, ++port);
|
|
||||||
outb_p(cmd, ++port);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void hd_request (void);
|
|
||||||
|
|
||||||
static int drive_busy(void)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
unsigned char c;
|
|
||||||
|
|
||||||
for (i = 0; i < 500000 ; i++) {
|
|
||||||
c = inb_p(HD_STATUS);
|
|
||||||
if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
dump_status("reset timed out", c);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void reset_controller(void)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
outb_p(4, HD_CMD);
|
|
||||||
for (i = 0; i < 1000; i++) barrier();
|
|
||||||
outb_p(hd_info[0].ctl & 0x0f, HD_CMD);
|
|
||||||
for (i = 0; i < 1000; i++) barrier();
|
|
||||||
if (drive_busy())
|
|
||||||
printk("hd: controller still busy\n");
|
|
||||||
else if ((hd_error = inb(HD_ERROR)) != 1)
|
|
||||||
printk("hd: controller reset failed: %02x\n", hd_error);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void reset_hd(void)
|
|
||||||
{
|
|
||||||
static int i;
|
|
||||||
|
|
||||||
repeat:
|
|
||||||
if (reset) {
|
|
||||||
reset = 0;
|
|
||||||
i = -1;
|
|
||||||
reset_controller();
|
|
||||||
} else {
|
|
||||||
check_status();
|
|
||||||
if (reset)
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
if (++i < NR_HD) {
|
|
||||||
struct hd_i_struct *disk = &hd_info[i];
|
|
||||||
disk->special_op = disk->recalibrate = 1;
|
|
||||||
hd_out(disk, disk->sect, disk->sect, disk->head-1,
|
|
||||||
disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd);
|
|
||||||
if (reset)
|
|
||||||
goto repeat;
|
|
||||||
} else
|
|
||||||
hd_request();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Ok, don't know what to do with the unexpected interrupts: on some machines
|
|
||||||
* doing a reset and a retry seems to result in an eternal loop. Right now I
|
|
||||||
* ignore it, and just set the timeout.
|
|
||||||
*
|
|
||||||
* On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
|
|
||||||
* drive enters "idle", "standby", or "sleep" mode, so if the status looks
|
|
||||||
* "good", we just ignore the interrupt completely.
|
|
||||||
*/
|
|
||||||
static void unexpected_hd_interrupt(void)
|
|
||||||
{
|
|
||||||
unsigned int stat = inb_p(HD_STATUS);
|
|
||||||
|
|
||||||
if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) {
|
|
||||||
dump_status("unexpected interrupt", stat);
|
|
||||||
SET_TIMER;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* bad_rw_intr() now tries to be a bit smarter and does things
|
|
||||||
* according to the error returned by the controller.
|
|
||||||
* -Mika Liljeberg (liljeber@cs.Helsinki.FI)
|
|
||||||
*/
|
|
||||||
static void bad_rw_intr(void)
|
|
||||||
{
|
|
||||||
struct request *req = hd_req;
|
|
||||||
|
|
||||||
if (req != NULL) {
|
|
||||||
struct hd_i_struct *disk = req->rq_disk->private_data;
|
|
||||||
if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
|
|
||||||
hd_end_request_cur(-EIO);
|
|
||||||
disk->special_op = disk->recalibrate = 1;
|
|
||||||
} else if (req->errors % RESET_FREQ == 0)
|
|
||||||
reset = 1;
|
|
||||||
else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0)
|
|
||||||
disk->special_op = disk->recalibrate = 1;
|
|
||||||
/* Otherwise just retry */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int wait_DRQ(void)
|
|
||||||
{
|
|
||||||
int retries;
|
|
||||||
int stat;
|
|
||||||
|
|
||||||
for (retries = 0; retries < 100000; retries++) {
|
|
||||||
stat = inb_p(HD_STATUS);
|
|
||||||
if (stat & DRQ_STAT)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
dump_status("wait_DRQ", stat);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void read_intr(void)
|
|
||||||
{
|
|
||||||
struct request *req;
|
|
||||||
int i, retries = 100000;
|
|
||||||
|
|
||||||
do {
|
|
||||||
i = (unsigned) inb_p(HD_STATUS);
|
|
||||||
if (i & BUSY_STAT)
|
|
||||||
continue;
|
|
||||||
if (!OK_STATUS(i))
|
|
||||||
break;
|
|
||||||
if (i & DRQ_STAT)
|
|
||||||
goto ok_to_read;
|
|
||||||
} while (--retries > 0);
|
|
||||||
dump_status("read_intr", i);
|
|
||||||
bad_rw_intr();
|
|
||||||
hd_request();
|
|
||||||
return;
|
|
||||||
|
|
||||||
ok_to_read:
|
|
||||||
req = hd_req;
|
|
||||||
insw(HD_DATA, bio_data(req->bio), 256);
|
|
||||||
#ifdef DEBUG
|
|
||||||
printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
|
|
||||||
req->rq_disk->disk_name, blk_rq_pos(req) + 1,
|
|
||||||
blk_rq_sectors(req) - 1, bio_data(req->bio)+512);
|
|
||||||
#endif
|
|
||||||
if (hd_end_request(0, 512)) {
|
|
||||||
SET_HANDLER(&read_intr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
(void) inb_p(HD_STATUS);
|
|
||||||
#if (HD_DELAY > 0)
|
|
||||||
last_req = read_timer();
|
|
||||||
#endif
|
|
||||||
hd_request();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void write_intr(void)
|
|
||||||
{
|
|
||||||
struct request *req = hd_req;
|
|
||||||
int i;
|
|
||||||
int retries = 100000;
|
|
||||||
|
|
||||||
do {
|
|
||||||
i = (unsigned) inb_p(HD_STATUS);
|
|
||||||
if (i & BUSY_STAT)
|
|
||||||
continue;
|
|
||||||
if (!OK_STATUS(i))
|
|
||||||
break;
|
|
||||||
if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT))
|
|
||||||
goto ok_to_write;
|
|
||||||
} while (--retries > 0);
|
|
||||||
dump_status("write_intr", i);
|
|
||||||
bad_rw_intr();
|
|
||||||
hd_request();
|
|
||||||
return;
|
|
||||||
|
|
||||||
ok_to_write:
|
|
||||||
if (hd_end_request(0, 512)) {
|
|
||||||
SET_HANDLER(&write_intr);
|
|
||||||
outsw(HD_DATA, bio_data(req->bio), 256);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if (HD_DELAY > 0)
|
|
||||||
last_req = read_timer();
|
|
||||||
#endif
|
|
||||||
hd_request();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void recal_intr(void)
|
|
||||||
{
|
|
||||||
check_status();
|
|
||||||
#if (HD_DELAY > 0)
|
|
||||||
last_req = read_timer();
|
|
||||||
#endif
|
|
||||||
hd_request();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is another of the error-routines I don't know what to do with. The
|
|
||||||
* best idea seems to just set reset, and start all over again.
|
|
||||||
*/
|
|
||||||
static void hd_times_out(unsigned long dummy)
|
|
||||||
{
|
|
||||||
char *name;
|
|
||||||
|
|
||||||
do_hd = NULL;
|
|
||||||
|
|
||||||
if (!hd_req)
|
|
||||||
return;
|
|
||||||
|
|
||||||
spin_lock_irq(hd_queue->queue_lock);
|
|
||||||
reset = 1;
|
|
||||||
name = hd_req->rq_disk->disk_name;
|
|
||||||
printk("%s: timeout\n", name);
|
|
||||||
if (++hd_req->errors >= MAX_ERRORS) {
|
|
||||||
#ifdef DEBUG
|
|
||||||
printk("%s: too many errors\n", name);
|
|
||||||
#endif
|
|
||||||
hd_end_request_cur(-EIO);
|
|
||||||
}
|
|
||||||
hd_request();
|
|
||||||
spin_unlock_irq(hd_queue->queue_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int do_special_op(struct hd_i_struct *disk, struct request *req)
|
|
||||||
{
|
|
||||||
if (disk->recalibrate) {
|
|
||||||
disk->recalibrate = 0;
|
|
||||||
hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr);
|
|
||||||
return reset;
|
|
||||||
}
|
|
||||||
if (disk->head > 16) {
|
|
||||||
printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
|
|
||||||
hd_end_request_cur(-EIO);
|
|
||||||
}
|
|
||||||
disk->special_op = 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The driver enables interrupts as much as possible. In order to do this,
|
|
||||||
* (a) the device-interrupt is disabled before entering hd_request(),
|
|
||||||
* and (b) the timeout-interrupt is disabled before the sti().
|
|
||||||
*
|
|
||||||
* Interrupts are still masked (by default) whenever we are exchanging
|
|
||||||
* data/cmds with a drive, because some drives seem to have very poor
|
|
||||||
* tolerance for latency during I/O. The IDE driver has support to unmask
|
|
||||||
* interrupts for non-broken hardware, so use that driver if required.
|
|
||||||
*/
|
|
||||||
static void hd_request(void)
|
|
||||||
{
|
|
||||||
unsigned int block, nsect, sec, track, head, cyl;
|
|
||||||
struct hd_i_struct *disk;
|
|
||||||
struct request *req;
|
|
||||||
|
|
||||||
if (do_hd)
|
|
||||||
return;
|
|
||||||
repeat:
|
|
||||||
del_timer(&device_timer);
|
|
||||||
|
|
||||||
if (!hd_req) {
|
|
||||||
hd_req = blk_fetch_request(hd_queue);
|
|
||||||
if (!hd_req) {
|
|
||||||
do_hd = NULL;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
req = hd_req;
|
|
||||||
|
|
||||||
if (reset) {
|
|
||||||
reset_hd();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
disk = req->rq_disk->private_data;
|
|
||||||
block = blk_rq_pos(req);
|
|
||||||
nsect = blk_rq_sectors(req);
|
|
||||||
if (block >= get_capacity(req->rq_disk) ||
|
|
||||||
((block+nsect) > get_capacity(req->rq_disk))) {
|
|
||||||
printk("%s: bad access: block=%d, count=%d\n",
|
|
||||||
req->rq_disk->disk_name, block, nsect);
|
|
||||||
hd_end_request_cur(-EIO);
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (disk->special_op) {
|
|
||||||
if (do_special_op(disk, req))
|
|
||||||
goto repeat;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
sec = block % disk->sect + 1;
|
|
||||||
track = block / disk->sect;
|
|
||||||
head = track % disk->head;
|
|
||||||
cyl = track / disk->head;
|
|
||||||
#ifdef DEBUG
|
|
||||||
printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n",
|
|
||||||
req->rq_disk->disk_name,
|
|
||||||
req_data_dir(req) == READ ? "read" : "writ",
|
|
||||||
cyl, head, sec, nsect, bio_data(req->bio));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
switch (req_op(req)) {
|
|
||||||
case REQ_OP_READ:
|
|
||||||
hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ,
|
|
||||||
&read_intr);
|
|
||||||
if (reset)
|
|
||||||
goto repeat;
|
|
||||||
break;
|
|
||||||
case REQ_OP_WRITE:
|
|
||||||
hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE,
|
|
||||||
&write_intr);
|
|
||||||
if (reset)
|
|
||||||
goto repeat;
|
|
||||||
if (wait_DRQ()) {
|
|
||||||
bad_rw_intr();
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
outsw(HD_DATA, bio_data(req->bio), 256);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
printk("unknown hd-command\n");
|
|
||||||
hd_end_request_cur(-EIO);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void do_hd_request(struct request_queue *q)
|
|
||||||
{
|
|
||||||
hd_request();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
|
||||||
{
|
|
||||||
struct hd_i_struct *disk = bdev->bd_disk->private_data;
|
|
||||||
|
|
||||||
geo->heads = disk->head;
|
|
||||||
geo->sectors = disk->sect;
|
|
||||||
geo->cylinders = disk->cyl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Releasing a block device means we sync() it, so that it can safely
|
|
||||||
* be forgotten about...
|
|
||||||
*/
|
|
||||||
|
|
||||||
static irqreturn_t hd_interrupt(int irq, void *dev_id)
|
|
||||||
{
|
|
||||||
void (*handler)(void) = do_hd;
|
|
||||||
|
|
||||||
spin_lock(hd_queue->queue_lock);
|
|
||||||
|
|
||||||
do_hd = NULL;
|
|
||||||
del_timer(&device_timer);
|
|
||||||
if (!handler)
|
|
||||||
handler = unexpected_hd_interrupt;
|
|
||||||
handler();
|
|
||||||
|
|
||||||
spin_unlock(hd_queue->queue_lock);
|
|
||||||
|
|
||||||
return IRQ_HANDLED;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct block_device_operations hd_fops = {
|
|
||||||
.getgeo = hd_getgeo,
|
|
||||||
};
|
|
||||||
|
|
||||||
static int __init hd_init(void)
|
|
||||||
{
|
|
||||||
int drive;
|
|
||||||
|
|
||||||
if (register_blkdev(HD_MAJOR, "hd"))
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
hd_queue = blk_init_queue(do_hd_request, &hd_lock);
|
|
||||||
if (!hd_queue) {
|
|
||||||
unregister_blkdev(HD_MAJOR, "hd");
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_queue_max_hw_sectors(hd_queue, 255);
|
|
||||||
init_timer(&device_timer);
|
|
||||||
device_timer.function = hd_times_out;
|
|
||||||
blk_queue_logical_block_size(hd_queue, 512);
|
|
||||||
|
|
||||||
if (!NR_HD) {
|
|
||||||
/*
|
|
||||||
* We don't know anything about the drive. This means
|
|
||||||
* that you *MUST* specify the drive parameters to the
|
|
||||||
* kernel yourself.
|
|
||||||
*
|
|
||||||
* If we were on an i386, we used to read this info from
|
|
||||||
* the BIOS or CMOS. This doesn't work all that well,
|
|
||||||
* since this assumes that this is a primary or secondary
|
|
||||||
* drive, and if we're using this legacy driver, it's
|
|
||||||
* probably an auxiliary controller added to recover
|
|
||||||
* legacy data off an ST-506 drive. Either way, it's
|
|
||||||
* definitely safest to have the user explicitly specify
|
|
||||||
* the information.
|
|
||||||
*/
|
|
||||||
printk("hd: no drives specified - use hd=cyl,head,sectors"
|
|
||||||
" on kernel command line\n");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (drive = 0 ; drive < NR_HD ; drive++) {
|
|
||||||
struct gendisk *disk = alloc_disk(64);
|
|
||||||
struct hd_i_struct *p = &hd_info[drive];
|
|
||||||
if (!disk)
|
|
||||||
goto Enomem;
|
|
||||||
disk->major = HD_MAJOR;
|
|
||||||
disk->first_minor = drive << 6;
|
|
||||||
disk->fops = &hd_fops;
|
|
||||||
sprintf(disk->disk_name, "hd%c", 'a'+drive);
|
|
||||||
disk->private_data = p;
|
|
||||||
set_capacity(disk, p->head * p->sect * p->cyl);
|
|
||||||
disk->queue = hd_queue;
|
|
||||||
p->unit = drive;
|
|
||||||
hd_gendisk[drive] = disk;
|
|
||||||
printk("%s: %luMB, CHS=%d/%d/%d\n",
|
|
||||||
disk->disk_name, (unsigned long)get_capacity(disk)/2048,
|
|
||||||
p->cyl, p->head, p->sect);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (request_irq(HD_IRQ, hd_interrupt, 0, "hd", NULL)) {
|
|
||||||
printk("hd: unable to get IRQ%d for the hard disk driver\n",
|
|
||||||
HD_IRQ);
|
|
||||||
goto out1;
|
|
||||||
}
|
|
||||||
if (!request_region(HD_DATA, 8, "hd")) {
|
|
||||||
printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA);
|
|
||||||
goto out2;
|
|
||||||
}
|
|
||||||
if (!request_region(HD_CMD, 1, "hd(cmd)")) {
|
|
||||||
printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD);
|
|
||||||
goto out3;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Let them fly */
|
|
||||||
for (drive = 0; drive < NR_HD; drive++)
|
|
||||||
add_disk(hd_gendisk[drive]);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
out3:
|
|
||||||
release_region(HD_DATA, 8);
|
|
||||||
out2:
|
|
||||||
free_irq(HD_IRQ, NULL);
|
|
||||||
out1:
|
|
||||||
for (drive = 0; drive < NR_HD; drive++)
|
|
||||||
put_disk(hd_gendisk[drive]);
|
|
||||||
NR_HD = 0;
|
|
||||||
out:
|
|
||||||
del_timer(&device_timer);
|
|
||||||
unregister_blkdev(HD_MAJOR, "hd");
|
|
||||||
blk_cleanup_queue(hd_queue);
|
|
||||||
return -1;
|
|
||||||
Enomem:
|
|
||||||
while (drive--)
|
|
||||||
put_disk(hd_gendisk[drive]);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __init parse_hd_setup(char *line)
|
|
||||||
{
|
|
||||||
int ints[6];
|
|
||||||
|
|
||||||
(void) get_options(line, ARRAY_SIZE(ints), ints);
|
|
||||||
hd_setup(NULL, ints);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
__setup("hd=", parse_hd_setup);
|
|
||||||
|
|
||||||
late_initcall(hd_init);
|
|
|
@ -445,32 +445,27 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void handle_partial_read(struct loop_cmd *cmd, long bytes)
|
static void lo_complete_rq(struct request *rq)
|
||||||
{
|
{
|
||||||
if (bytes < 0 || op_is_write(req_op(cmd->rq)))
|
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||||
return;
|
|
||||||
|
|
||||||
if (unlikely(bytes < blk_rq_bytes(cmd->rq))) {
|
if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio &&
|
||||||
|
cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) {
|
||||||
struct bio *bio = cmd->rq->bio;
|
struct bio *bio = cmd->rq->bio;
|
||||||
|
|
||||||
bio_advance(bio, bytes);
|
bio_advance(bio, cmd->ret);
|
||||||
zero_fill_bio(bio);
|
zero_fill_bio(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
|
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
|
||||||
{
|
{
|
||||||
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
|
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
|
||||||
struct request *rq = cmd->rq;
|
|
||||||
|
|
||||||
handle_partial_read(cmd, ret);
|
cmd->ret = ret;
|
||||||
|
blk_mq_complete_request(cmd->rq);
|
||||||
if (ret > 0)
|
|
||||||
ret = 0;
|
|
||||||
else if (ret < 0)
|
|
||||||
ret = -EIO;
|
|
||||||
|
|
||||||
blk_mq_complete_request(rq, ret);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
|
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
|
||||||
|
@ -528,6 +523,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
|
||||||
case REQ_OP_FLUSH:
|
case REQ_OP_FLUSH:
|
||||||
return lo_req_flush(lo, rq);
|
return lo_req_flush(lo, rq);
|
||||||
case REQ_OP_DISCARD:
|
case REQ_OP_DISCARD:
|
||||||
|
case REQ_OP_WRITE_ZEROES:
|
||||||
return lo_discard(lo, rq, pos);
|
return lo_discard(lo, rq, pos);
|
||||||
case REQ_OP_WRITE:
|
case REQ_OP_WRITE:
|
||||||
if (lo->transfer)
|
if (lo->transfer)
|
||||||
|
@ -826,7 +822,7 @@ static void loop_config_discard(struct loop_device *lo)
|
||||||
q->limits.discard_granularity = 0;
|
q->limits.discard_granularity = 0;
|
||||||
q->limits.discard_alignment = 0;
|
q->limits.discard_alignment = 0;
|
||||||
blk_queue_max_discard_sectors(q, 0);
|
blk_queue_max_discard_sectors(q, 0);
|
||||||
q->limits.discard_zeroes_data = 0;
|
blk_queue_max_write_zeroes_sectors(q, 0);
|
||||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
|
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -834,7 +830,7 @@ static void loop_config_discard(struct loop_device *lo)
|
||||||
q->limits.discard_granularity = inode->i_sb->s_blocksize;
|
q->limits.discard_granularity = inode->i_sb->s_blocksize;
|
||||||
q->limits.discard_alignment = 0;
|
q->limits.discard_alignment = 0;
|
||||||
blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
|
blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
|
||||||
q->limits.discard_zeroes_data = 1;
|
blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1660,6 +1656,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||||
switch (req_op(cmd->rq)) {
|
switch (req_op(cmd->rq)) {
|
||||||
case REQ_OP_FLUSH:
|
case REQ_OP_FLUSH:
|
||||||
case REQ_OP_DISCARD:
|
case REQ_OP_DISCARD:
|
||||||
|
case REQ_OP_WRITE_ZEROES:
|
||||||
cmd->use_aio = false;
|
cmd->use_aio = false;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -1686,8 +1683,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
|
||||||
ret = do_req_filebacked(lo, cmd->rq);
|
ret = do_req_filebacked(lo, cmd->rq);
|
||||||
failed:
|
failed:
|
||||||
/* complete non-aio request */
|
/* complete non-aio request */
|
||||||
if (!cmd->use_aio || ret)
|
if (!cmd->use_aio || ret) {
|
||||||
blk_mq_complete_request(cmd->rq, ret ? -EIO : 0);
|
cmd->ret = ret ? -EIO : 0;
|
||||||
|
blk_mq_complete_request(cmd->rq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void loop_queue_work(struct kthread_work *work)
|
static void loop_queue_work(struct kthread_work *work)
|
||||||
|
@ -1710,9 +1709,10 @@ static int loop_init_request(void *data, struct request *rq,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_mq_ops loop_mq_ops = {
|
static const struct blk_mq_ops loop_mq_ops = {
|
||||||
.queue_rq = loop_queue_rq,
|
.queue_rq = loop_queue_rq,
|
||||||
.init_request = loop_init_request,
|
.init_request = loop_init_request,
|
||||||
|
.complete = lo_complete_rq,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int loop_add(struct loop_device **l, int i)
|
static int loop_add(struct loop_device **l, int i)
|
||||||
|
|
|
@ -70,6 +70,7 @@ struct loop_cmd {
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
bool use_aio; /* use AIO interface to handle I/O */
|
bool use_aio; /* use AIO interface to handle I/O */
|
||||||
|
long ret;
|
||||||
struct kiocb iocb;
|
struct kiocb iocb;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -169,6 +169,25 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
|
||||||
return false; /* device present */
|
return false; /* device present */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* we have to use runtime tag to setup command header */
|
||||||
|
static void mtip_init_cmd_header(struct request *rq)
|
||||||
|
{
|
||||||
|
struct driver_data *dd = rq->q->queuedata;
|
||||||
|
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||||
|
u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
|
||||||
|
|
||||||
|
/* Point the command headers at the command tables. */
|
||||||
|
cmd->command_header = dd->port->command_list +
|
||||||
|
(sizeof(struct mtip_cmd_hdr) * rq->tag);
|
||||||
|
cmd->command_header_dma = dd->port->command_list_dma +
|
||||||
|
(sizeof(struct mtip_cmd_hdr) * rq->tag);
|
||||||
|
|
||||||
|
if (host_cap_64)
|
||||||
|
cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
|
||||||
|
|
||||||
|
cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
|
static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
|
||||||
{
|
{
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
|
@ -180,6 +199,9 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
|
||||||
if (IS_ERR(rq))
|
if (IS_ERR(rq))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
/* Internal cmd isn't submitted via .queue_rq */
|
||||||
|
mtip_init_cmd_header(rq);
|
||||||
|
|
||||||
return blk_mq_rq_to_pdu(rq);
|
return blk_mq_rq_to_pdu(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -241,7 +263,8 @@ static void mtip_async_complete(struct mtip_port *port,
|
||||||
|
|
||||||
rq = mtip_rq_from_tag(dd, tag);
|
rq = mtip_rq_from_tag(dd, tag);
|
||||||
|
|
||||||
blk_mq_complete_request(rq, status);
|
cmd->status = status;
|
||||||
|
blk_mq_complete_request(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2910,18 +2933,19 @@ static void mtip_softirq_done_fn(struct request *rq)
|
||||||
if (unlikely(cmd->unaligned))
|
if (unlikely(cmd->unaligned))
|
||||||
up(&dd->port->cmd_slot_unal);
|
up(&dd->port->cmd_slot_unal);
|
||||||
|
|
||||||
blk_mq_end_request(rq, rq->errors);
|
blk_mq_end_request(rq, cmd->status);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mtip_abort_cmd(struct request *req, void *data,
|
static void mtip_abort_cmd(struct request *req, void *data,
|
||||||
bool reserved)
|
bool reserved)
|
||||||
{
|
{
|
||||||
|
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
|
||||||
struct driver_data *dd = data;
|
struct driver_data *dd = data;
|
||||||
|
|
||||||
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
|
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
|
||||||
|
|
||||||
clear_bit(req->tag, dd->port->cmds_to_issue);
|
clear_bit(req->tag, dd->port->cmds_to_issue);
|
||||||
req->errors = -EIO;
|
cmd->status = -EIO;
|
||||||
mtip_softirq_done_fn(req);
|
mtip_softirq_done_fn(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3807,6 +3831,8 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||||
struct request *rq = bd->rq;
|
struct request *rq = bd->rq;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
mtip_init_cmd_header(rq);
|
||||||
|
|
||||||
if (unlikely(mtip_check_unal_depth(hctx, rq)))
|
if (unlikely(mtip_check_unal_depth(hctx, rq)))
|
||||||
return BLK_MQ_RQ_QUEUE_BUSY;
|
return BLK_MQ_RQ_QUEUE_BUSY;
|
||||||
|
|
||||||
|
@ -3816,7 +3842,6 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||||
if (likely(!ret))
|
if (likely(!ret))
|
||||||
return BLK_MQ_RQ_QUEUE_OK;
|
return BLK_MQ_RQ_QUEUE_OK;
|
||||||
|
|
||||||
rq->errors = ret;
|
|
||||||
return BLK_MQ_RQ_QUEUE_ERROR;
|
return BLK_MQ_RQ_QUEUE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3838,7 +3863,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
|
||||||
{
|
{
|
||||||
struct driver_data *dd = data;
|
struct driver_data *dd = data;
|
||||||
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||||
u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For flush requests, request_idx starts at the end of the
|
* For flush requests, request_idx starts at the end of the
|
||||||
|
@ -3855,17 +3879,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
|
||||||
|
|
||||||
memset(cmd->command, 0, CMD_DMA_ALLOC_SZ);
|
memset(cmd->command, 0, CMD_DMA_ALLOC_SZ);
|
||||||
|
|
||||||
/* Point the command headers at the command tables. */
|
|
||||||
cmd->command_header = dd->port->command_list +
|
|
||||||
(sizeof(struct mtip_cmd_hdr) * request_idx);
|
|
||||||
cmd->command_header_dma = dd->port->command_list_dma +
|
|
||||||
(sizeof(struct mtip_cmd_hdr) * request_idx);
|
|
||||||
|
|
||||||
if (host_cap_64)
|
|
||||||
cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
|
|
||||||
|
|
||||||
cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
|
|
||||||
|
|
||||||
sg_init_table(cmd->sg, MTIP_MAX_SG);
|
sg_init_table(cmd->sg, MTIP_MAX_SG);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -3889,7 +3902,7 @@ exit_handler:
|
||||||
return BLK_EH_RESET_TIMER;
|
return BLK_EH_RESET_TIMER;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_mq_ops mtip_mq_ops = {
|
static const struct blk_mq_ops mtip_mq_ops = {
|
||||||
.queue_rq = mtip_queue_rq,
|
.queue_rq = mtip_queue_rq,
|
||||||
.init_request = mtip_init_cmd,
|
.init_request = mtip_init_cmd,
|
||||||
.exit_request = mtip_free_cmd,
|
.exit_request = mtip_free_cmd,
|
||||||
|
@ -4025,7 +4038,6 @@ skip_create_disk:
|
||||||
dd->queue->limits.discard_granularity = 4096;
|
dd->queue->limits.discard_granularity = 4096;
|
||||||
blk_queue_max_discard_sectors(dd->queue,
|
blk_queue_max_discard_sectors(dd->queue,
|
||||||
MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
|
MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
|
||||||
dd->queue->limits.discard_zeroes_data = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the capacity of the device in 512 byte sectors. */
|
/* Set the capacity of the device in 512 byte sectors. */
|
||||||
|
@ -4107,9 +4119,11 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
|
||||||
struct driver_data *dd = (struct driver_data *)data;
|
struct driver_data *dd = (struct driver_data *)data;
|
||||||
struct mtip_cmd *cmd;
|
struct mtip_cmd *cmd;
|
||||||
|
|
||||||
if (likely(!reserv))
|
if (likely(!reserv)) {
|
||||||
blk_mq_complete_request(rq, -ENODEV);
|
cmd = blk_mq_rq_to_pdu(rq);
|
||||||
else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
|
cmd->status = -ENODEV;
|
||||||
|
blk_mq_complete_request(rq);
|
||||||
|
} else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
|
||||||
|
|
||||||
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
|
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
|
||||||
if (cmd->comp_func)
|
if (cmd->comp_func)
|
||||||
|
@ -4162,7 +4176,7 @@ static int mtip_block_remove(struct driver_data *dd)
|
||||||
dev_info(&dd->pdev->dev, "device %s surprise removal\n",
|
dev_info(&dd->pdev->dev, "device %s surprise removal\n",
|
||||||
dd->disk->disk_name);
|
dd->disk->disk_name);
|
||||||
|
|
||||||
blk_mq_freeze_queue_start(dd->queue);
|
blk_freeze_queue_start(dd->queue);
|
||||||
blk_mq_stop_hw_queues(dd->queue);
|
blk_mq_stop_hw_queues(dd->queue);
|
||||||
blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
|
blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
|
||||||
|
|
||||||
|
|
|
@ -352,6 +352,7 @@ struct mtip_cmd {
|
||||||
int retries; /* The number of retries left for this command. */
|
int retries; /* The number of retries left for this command. */
|
||||||
|
|
||||||
int direction; /* Data transfer direction */
|
int direction; /* Data transfer direction */
|
||||||
|
int status;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Structure used to describe a port. */
|
/* Structure used to describe a port. */
|
||||||
|
|
1429
drivers/block/nbd.c
1429
drivers/block/nbd.c
File diff suppressed because it is too large
Load diff
|
@ -117,6 +117,10 @@ static bool use_lightnvm;
|
||||||
module_param(use_lightnvm, bool, S_IRUGO);
|
module_param(use_lightnvm, bool, S_IRUGO);
|
||||||
MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
|
MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
|
||||||
|
|
||||||
|
static bool blocking;
|
||||||
|
module_param(blocking, bool, S_IRUGO);
|
||||||
|
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
|
||||||
|
|
||||||
static int irqmode = NULL_IRQ_SOFTIRQ;
|
static int irqmode = NULL_IRQ_SOFTIRQ;
|
||||||
|
|
||||||
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
|
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
|
||||||
|
@ -277,7 +281,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
|
||||||
case NULL_IRQ_SOFTIRQ:
|
case NULL_IRQ_SOFTIRQ:
|
||||||
switch (queue_mode) {
|
switch (queue_mode) {
|
||||||
case NULL_Q_MQ:
|
case NULL_Q_MQ:
|
||||||
blk_mq_complete_request(cmd->rq, cmd->rq->errors);
|
blk_mq_complete_request(cmd->rq);
|
||||||
break;
|
break;
|
||||||
case NULL_Q_RQ:
|
case NULL_Q_RQ:
|
||||||
blk_complete_request(cmd->rq);
|
blk_complete_request(cmd->rq);
|
||||||
|
@ -357,6 +361,8 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||||
{
|
{
|
||||||
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
|
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
|
||||||
|
|
||||||
|
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
|
||||||
|
|
||||||
if (irqmode == NULL_IRQ_TIMER) {
|
if (irqmode == NULL_IRQ_TIMER) {
|
||||||
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||||
cmd->timer.function = null_cmd_timer_expired;
|
cmd->timer.function = null_cmd_timer_expired;
|
||||||
|
@ -392,7 +398,7 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_mq_ops null_mq_ops = {
|
static const struct blk_mq_ops null_mq_ops = {
|
||||||
.queue_rq = null_queue_rq,
|
.queue_rq = null_queue_rq,
|
||||||
.init_hctx = null_init_hctx,
|
.init_hctx = null_init_hctx,
|
||||||
.complete = null_softirq_done_fn,
|
.complete = null_softirq_done_fn,
|
||||||
|
@ -437,14 +443,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
|
||||||
if (IS_ERR(rq))
|
if (IS_ERR(rq))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
rq->__sector = bio->bi_iter.bi_sector;
|
blk_init_request_from_bio(rq, bio);
|
||||||
rq->ioprio = bio_prio(bio);
|
|
||||||
|
|
||||||
if (bio_has_data(bio))
|
|
||||||
rq->nr_phys_segments = bio_phys_segments(q, bio);
|
|
||||||
|
|
||||||
rq->__data_len = bio->bi_iter.bi_size;
|
|
||||||
rq->bio = rq->biotail = bio;
|
|
||||||
|
|
||||||
rq->end_io_data = rqd;
|
rq->end_io_data = rqd;
|
||||||
|
|
||||||
|
@ -724,6 +723,9 @@ static int null_add_dev(void)
|
||||||
nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||||
nullb->tag_set.driver_data = nullb;
|
nullb->tag_set.driver_data = nullb;
|
||||||
|
|
||||||
|
if (blocking)
|
||||||
|
nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
|
||||||
|
|
||||||
rv = blk_mq_alloc_tag_set(&nullb->tag_set);
|
rv = blk_mq_alloc_tag_set(&nullb->tag_set);
|
||||||
if (rv)
|
if (rv)
|
||||||
goto out_cleanup_queues;
|
goto out_cleanup_queues;
|
||||||
|
|
|
@ -1,693 +0,0 @@
|
||||||
|
|
||||||
/*
|
|
||||||
osdblk.c -- Export a single SCSI OSD object as a Linux block device
|
|
||||||
|
|
||||||
|
|
||||||
Copyright 2009 Red Hat, Inc.
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program; see the file COPYING. If not, write to
|
|
||||||
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
||||||
|
|
||||||
|
|
||||||
Instructions for use
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
1) Map a Linux block device to an existing OSD object.
|
|
||||||
|
|
||||||
In this example, we will use partition id 1234, object id 5678,
|
|
||||||
OSD device /dev/osd1.
|
|
||||||
|
|
||||||
$ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
|
|
||||||
|
|
||||||
|
|
||||||
2) List all active blkdev<->object mappings.
|
|
||||||
|
|
||||||
In this example, we have performed step #1 twice, creating two blkdevs,
|
|
||||||
mapped to two separate OSD objects.
|
|
||||||
|
|
||||||
$ cat /sys/class/osdblk/list
|
|
||||||
0 174 1234 5678 /dev/osd1
|
|
||||||
1 179 1994 897123 /dev/osd0
|
|
||||||
|
|
||||||
The columns, in order, are:
|
|
||||||
- blkdev unique id
|
|
||||||
- blkdev assigned major
|
|
||||||
- OSD object partition id
|
|
||||||
- OSD object id
|
|
||||||
- OSD device
|
|
||||||
|
|
||||||
|
|
||||||
3) Remove an active blkdev<->object mapping.
|
|
||||||
|
|
||||||
In this example, we remove the mapping with blkdev unique id 1.
|
|
||||||
|
|
||||||
$ echo 1 > /sys/class/osdblk/remove
|
|
||||||
|
|
||||||
|
|
||||||
NOTE: The actual creation and deletion of OSD objects is outside the scope
|
|
||||||
of this driver.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
#include <linux/device.h>
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <linux/fs.h>
|
|
||||||
#include <linux/slab.h>
|
|
||||||
#include <scsi/osd_initiator.h>
|
|
||||||
#include <scsi/osd_attributes.h>
|
|
||||||
#include <scsi/osd_sec.h>
|
|
||||||
#include <scsi/scsi_device.h>
|
|
||||||
|
|
||||||
#define DRV_NAME "osdblk"
|
|
||||||
#define PFX DRV_NAME ": "
|
|
||||||
|
|
||||||
/* #define _OSDBLK_DEBUG */
|
|
||||||
#ifdef _OSDBLK_DEBUG
|
|
||||||
#define OSDBLK_DEBUG(fmt, a...) \
|
|
||||||
printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
|
|
||||||
#else
|
|
||||||
#define OSDBLK_DEBUG(fmt, a...) \
|
|
||||||
do { if (0) printk(fmt, ##a); } while (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
|
|
||||||
MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
|
|
||||||
MODULE_LICENSE("GPL");
|
|
||||||
|
|
||||||
struct osdblk_device;
|
|
||||||
|
|
||||||
enum {
|
|
||||||
OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */
|
|
||||||
OSDBLK_MAX_REQ = 32, /* max parallel requests */
|
|
||||||
OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */
|
|
||||||
};
|
|
||||||
|
|
||||||
struct osdblk_request {
|
|
||||||
struct request *rq; /* blk layer request */
|
|
||||||
struct bio *bio; /* cloned bio */
|
|
||||||
struct osdblk_device *osdev; /* associated blkdev */
|
|
||||||
};
|
|
||||||
|
|
||||||
struct osdblk_device {
|
|
||||||
int id; /* blkdev unique id */
|
|
||||||
|
|
||||||
int major; /* blkdev assigned major */
|
|
||||||
struct gendisk *disk; /* blkdev's gendisk and rq */
|
|
||||||
struct request_queue *q;
|
|
||||||
|
|
||||||
struct osd_dev *osd; /* associated OSD */
|
|
||||||
|
|
||||||
char name[32]; /* blkdev name, e.g. osdblk34 */
|
|
||||||
|
|
||||||
spinlock_t lock; /* queue lock */
|
|
||||||
|
|
||||||
struct osd_obj_id obj; /* OSD partition, obj id */
|
|
||||||
uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */
|
|
||||||
|
|
||||||
struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */
|
|
||||||
|
|
||||||
struct list_head node;
|
|
||||||
|
|
||||||
char osd_path[0]; /* OSD device path */
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct class *class_osdblk; /* /sys/class/osdblk */
|
|
||||||
static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
|
|
||||||
static LIST_HEAD(osdblkdev_list);
|
|
||||||
|
|
||||||
static const struct block_device_operations osdblk_bd_ops = {
|
|
||||||
.owner = THIS_MODULE,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const struct osd_attr g_attr_logical_length = ATTR_DEF(
|
|
||||||
OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
|
|
||||||
|
|
||||||
static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
|
|
||||||
const struct osd_obj_id *obj)
|
|
||||||
{
|
|
||||||
osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* copied from exofs; move to libosd? */
|
|
||||||
/*
|
|
||||||
* Perform a synchronous OSD operation. copied from exofs; move to libosd?
|
|
||||||
*/
|
|
||||||
static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
or->timeout = timeout;
|
|
||||||
ret = osd_finalize_request(or, 0, credential, NULL);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = osd_execute_request(or);
|
|
||||||
|
|
||||||
/* osd_req_decode_sense(or, ret); */
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Perform an asynchronous OSD operation. copied from exofs; move to libosd?
|
|
||||||
*/
|
|
||||||
static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
|
|
||||||
void *caller_context, u8 *cred)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = osd_finalize_request(or, 0, cred, NULL);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = osd_execute_request_async(or, async_done, caller_context);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* copied from exofs; move to libosd? */
|
|
||||||
static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
|
|
||||||
{
|
|
||||||
struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
|
|
||||||
void *iter = NULL;
|
|
||||||
int nelem;
|
|
||||||
|
|
||||||
do {
|
|
||||||
nelem = 1;
|
|
||||||
osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
|
|
||||||
if ((cur_attr.attr_page == attr->attr_page) &&
|
|
||||||
(cur_attr.attr_id == attr->attr_id)) {
|
|
||||||
attr->len = cur_attr.len;
|
|
||||||
attr->val_ptr = cur_attr.val_ptr;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
} while (iter);
|
|
||||||
|
|
||||||
return -EIO;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
|
|
||||||
{
|
|
||||||
struct osd_request *or;
|
|
||||||
struct osd_attr attr;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
/* start request */
|
|
||||||
or = osd_start_request(osdev->osd, GFP_KERNEL);
|
|
||||||
if (!or)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
/* create a get-attributes(length) request */
|
|
||||||
osd_req_get_attributes(or, &osdev->obj);
|
|
||||||
|
|
||||||
osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
|
|
||||||
|
|
||||||
/* execute op synchronously */
|
|
||||||
ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/* extract length from returned attribute info */
|
|
||||||
attr = g_attr_logical_length;
|
|
||||||
ret = extract_attr_from_req(or, &attr);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
*size_out = get_unaligned_be64(attr.val_ptr);
|
|
||||||
|
|
||||||
out:
|
|
||||||
osd_end_request(or);
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static void osdblk_osd_complete(struct osd_request *or, void *private)
|
|
||||||
{
|
|
||||||
struct osdblk_request *orq = private;
|
|
||||||
struct osd_sense_info osi;
|
|
||||||
int ret = osd_req_decode_sense(or, &osi);
|
|
||||||
|
|
||||||
if (ret) {
|
|
||||||
ret = -EIO;
|
|
||||||
OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* complete OSD request */
|
|
||||||
osd_end_request(or);
|
|
||||||
|
|
||||||
/* complete request passed to osdblk by block layer */
|
|
||||||
__blk_end_request_all(orq->rq, ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bio_chain_put(struct bio *chain)
|
|
||||||
{
|
|
||||||
struct bio *tmp;
|
|
||||||
|
|
||||||
while (chain) {
|
|
||||||
tmp = chain;
|
|
||||||
chain = chain->bi_next;
|
|
||||||
|
|
||||||
bio_put(tmp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
|
|
||||||
{
|
|
||||||
struct bio *tmp, *new_chain = NULL, *tail = NULL;
|
|
||||||
|
|
||||||
while (old_chain) {
|
|
||||||
tmp = bio_clone_kmalloc(old_chain, gfpmask);
|
|
||||||
if (!tmp)
|
|
||||||
goto err_out;
|
|
||||||
|
|
||||||
tmp->bi_bdev = NULL;
|
|
||||||
gfpmask &= ~__GFP_DIRECT_RECLAIM;
|
|
||||||
tmp->bi_next = NULL;
|
|
||||||
|
|
||||||
if (!new_chain)
|
|
||||||
new_chain = tail = tmp;
|
|
||||||
else {
|
|
||||||
tail->bi_next = tmp;
|
|
||||||
tail = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
old_chain = old_chain->bi_next;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new_chain;
|
|
||||||
|
|
||||||
err_out:
|
|
||||||
OSDBLK_DEBUG("bio_chain_clone with err\n");
|
|
||||||
bio_chain_put(new_chain);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void osdblk_rq_fn(struct request_queue *q)
|
|
||||||
{
|
|
||||||
struct osdblk_device *osdev = q->queuedata;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
struct request *rq;
|
|
||||||
struct osdblk_request *orq;
|
|
||||||
struct osd_request *or;
|
|
||||||
struct bio *bio;
|
|
||||||
bool do_write, do_flush;
|
|
||||||
|
|
||||||
/* peek at request from block layer */
|
|
||||||
rq = blk_fetch_request(q);
|
|
||||||
if (!rq)
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* deduce our operation (read, write, flush) */
|
|
||||||
/* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
|
|
||||||
* into a clearly defined set of RPC commands:
|
|
||||||
* read, write, flush, scsi command, power mgmt req,
|
|
||||||
* driver-specific, etc.
|
|
||||||
*/
|
|
||||||
|
|
||||||
do_flush = (req_op(rq) == REQ_OP_FLUSH);
|
|
||||||
do_write = (rq_data_dir(rq) == WRITE);
|
|
||||||
|
|
||||||
if (!do_flush) { /* osd_flush does not use a bio */
|
|
||||||
/* a bio clone to be passed down to OSD request */
|
|
||||||
bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
|
|
||||||
if (!bio)
|
|
||||||
break;
|
|
||||||
} else
|
|
||||||
bio = NULL;
|
|
||||||
|
|
||||||
/* alloc internal OSD request, for OSD command execution */
|
|
||||||
or = osd_start_request(osdev->osd, GFP_ATOMIC);
|
|
||||||
if (!or) {
|
|
||||||
bio_chain_put(bio);
|
|
||||||
OSDBLK_DEBUG("osd_start_request with err\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
orq = &osdev->req[rq->tag];
|
|
||||||
orq->rq = rq;
|
|
||||||
orq->bio = bio;
|
|
||||||
orq->osdev = osdev;
|
|
||||||
|
|
||||||
/* init OSD command: flush, write or read */
|
|
||||||
if (do_flush)
|
|
||||||
osd_req_flush_object(or, &osdev->obj,
|
|
||||||
OSD_CDB_FLUSH_ALL, 0, 0);
|
|
||||||
else if (do_write)
|
|
||||||
osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
|
|
||||||
bio, blk_rq_bytes(rq));
|
|
||||||
else
|
|
||||||
osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
|
|
||||||
bio, blk_rq_bytes(rq));
|
|
||||||
|
|
||||||
OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
|
|
||||||
do_flush ? "flush" : do_write ?
|
|
||||||
"write" : "read", blk_rq_bytes(rq),
|
|
||||||
blk_rq_pos(rq) * 512ULL);
|
|
||||||
|
|
||||||
/* begin OSD command execution */
|
|
||||||
if (osd_async_op(or, osdblk_osd_complete, orq,
|
|
||||||
osdev->obj_cred)) {
|
|
||||||
osd_end_request(or);
|
|
||||||
blk_requeue_request(q, rq);
|
|
||||||
bio_chain_put(bio);
|
|
||||||
OSDBLK_DEBUG("osd_execute_request_async with err\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* remove the special 'flush' marker, now that the command
|
|
||||||
* is executing
|
|
||||||
*/
|
|
||||||
rq->special = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void osdblk_free_disk(struct osdblk_device *osdev)
|
|
||||||
{
|
|
||||||
struct gendisk *disk = osdev->disk;
|
|
||||||
|
|
||||||
if (!disk)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (disk->flags & GENHD_FL_UP)
|
|
||||||
del_gendisk(disk);
|
|
||||||
if (disk->queue)
|
|
||||||
blk_cleanup_queue(disk->queue);
|
|
||||||
put_disk(disk);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int osdblk_init_disk(struct osdblk_device *osdev)
|
|
||||||
{
|
|
||||||
struct gendisk *disk;
|
|
||||||
struct request_queue *q;
|
|
||||||
int rc;
|
|
||||||
u64 obj_size = 0;
|
|
||||||
|
|
||||||
/* contact OSD, request size info about the object being mapped */
|
|
||||||
rc = osdblk_get_obj_size(osdev, &obj_size);
|
|
||||||
if (rc)
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
/* create gendisk info */
|
|
||||||
disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR);
|
|
||||||
if (!disk)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
|
|
||||||
disk->major = osdev->major;
|
|
||||||
disk->first_minor = 0;
|
|
||||||
disk->fops = &osdblk_bd_ops;
|
|
||||||
disk->private_data = osdev;
|
|
||||||
|
|
||||||
/* init rq */
|
|
||||||
q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
|
|
||||||
if (!q) {
|
|
||||||
put_disk(disk);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* switch queue to TCQ mode; allocate tag map */
|
|
||||||
rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO);
|
|
||||||
if (rc) {
|
|
||||||
blk_cleanup_queue(q);
|
|
||||||
put_disk(disk);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Set our limits to the lower device limits, because osdblk cannot
|
|
||||||
* sleep when allocating a lower-request and therefore cannot be
|
|
||||||
* bouncing.
|
|
||||||
*/
|
|
||||||
blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
|
|
||||||
|
|
||||||
blk_queue_prep_rq(q, blk_queue_start_tag);
|
|
||||||
blk_queue_write_cache(q, true, false);
|
|
||||||
|
|
||||||
disk->queue = q;
|
|
||||||
|
|
||||||
q->queuedata = osdev;
|
|
||||||
|
|
||||||
osdev->disk = disk;
|
|
||||||
osdev->q = q;
|
|
||||||
|
|
||||||
/* finally, announce the disk to the world */
|
|
||||||
set_capacity(disk, obj_size / 512ULL);
|
|
||||||
add_disk(disk);
|
|
||||||
|
|
||||||
printk(KERN_INFO "%s: Added of size 0x%llx\n",
|
|
||||||
disk->disk_name, (unsigned long long)obj_size);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/********************************************************************
|
|
||||||
* /sys/class/osdblk/
|
|
||||||
* add map OSD object to blkdev
|
|
||||||
* remove unmap OSD object
|
|
||||||
* list show mappings
|
|
||||||
*******************************************************************/
|
|
||||||
|
|
||||||
static void class_osdblk_release(struct class *cls)
|
|
||||||
{
|
|
||||||
kfree(cls);
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t class_osdblk_list(struct class *c,
|
|
||||||
struct class_attribute *attr,
|
|
||||||
char *data)
|
|
||||||
{
|
|
||||||
int n = 0;
|
|
||||||
struct list_head *tmp;
|
|
||||||
|
|
||||||
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
|
|
||||||
|
|
||||||
list_for_each(tmp, &osdblkdev_list) {
|
|
||||||
struct osdblk_device *osdev;
|
|
||||||
|
|
||||||
osdev = list_entry(tmp, struct osdblk_device, node);
|
|
||||||
|
|
||||||
n += sprintf(data+n, "%d %d %llu %llu %s\n",
|
|
||||||
osdev->id,
|
|
||||||
osdev->major,
|
|
||||||
osdev->obj.partition,
|
|
||||||
osdev->obj.id,
|
|
||||||
osdev->osd_path);
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&ctl_mutex);
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t class_osdblk_add(struct class *c,
|
|
||||||
struct class_attribute *attr,
|
|
||||||
const char *buf, size_t count)
|
|
||||||
{
|
|
||||||
struct osdblk_device *osdev;
|
|
||||||
ssize_t rc;
|
|
||||||
int irc, new_id = 0;
|
|
||||||
struct list_head *tmp;
|
|
||||||
|
|
||||||
if (!try_module_get(THIS_MODULE))
|
|
||||||
return -ENODEV;
|
|
||||||
|
|
||||||
/* new osdblk_device object */
|
|
||||||
osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
|
|
||||||
if (!osdev) {
|
|
||||||
rc = -ENOMEM;
|
|
||||||
goto err_out_mod;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static osdblk_device initialization */
|
|
||||||
spin_lock_init(&osdev->lock);
|
|
||||||
INIT_LIST_HEAD(&osdev->node);
|
|
||||||
|
|
||||||
/* generate unique id: find highest unique id, add one */
|
|
||||||
|
|
||||||
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
|
|
||||||
|
|
||||||
list_for_each(tmp, &osdblkdev_list) {
|
|
||||||
struct osdblk_device *osdev;
|
|
||||||
|
|
||||||
osdev = list_entry(tmp, struct osdblk_device, node);
|
|
||||||
if (osdev->id > new_id)
|
|
||||||
new_id = osdev->id + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
osdev->id = new_id;
|
|
||||||
|
|
||||||
/* add to global list */
|
|
||||||
list_add_tail(&osdev->node, &osdblkdev_list);
|
|
||||||
|
|
||||||
mutex_unlock(&ctl_mutex);
|
|
||||||
|
|
||||||
/* parse add command */
|
|
||||||
if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
|
|
||||||
osdev->osd_path) != 3) {
|
|
||||||
rc = -EINVAL;
|
|
||||||
goto err_out_slot;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialize rest of new object */
|
|
||||||
sprintf(osdev->name, DRV_NAME "%d", osdev->id);
|
|
||||||
|
|
||||||
/* contact requested OSD */
|
|
||||||
osdev->osd = osduld_path_lookup(osdev->osd_path);
|
|
||||||
if (IS_ERR(osdev->osd)) {
|
|
||||||
rc = PTR_ERR(osdev->osd);
|
|
||||||
goto err_out_slot;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* build OSD credential */
|
|
||||||
osdblk_make_credential(osdev->obj_cred, &osdev->obj);
|
|
||||||
|
|
||||||
/* register our block device */
|
|
||||||
irc = register_blkdev(0, osdev->name);
|
|
||||||
if (irc < 0) {
|
|
||||||
rc = irc;
|
|
||||||
goto err_out_osd;
|
|
||||||
}
|
|
||||||
|
|
||||||
osdev->major = irc;
|
|
||||||
|
|
||||||
/* set up and announce blkdev mapping */
|
|
||||||
rc = osdblk_init_disk(osdev);
|
|
||||||
if (rc)
|
|
||||||
goto err_out_blkdev;
|
|
||||||
|
|
||||||
return count;
|
|
||||||
|
|
||||||
err_out_blkdev:
|
|
||||||
unregister_blkdev(osdev->major, osdev->name);
|
|
||||||
err_out_osd:
|
|
||||||
osduld_put_device(osdev->osd);
|
|
||||||
err_out_slot:
|
|
||||||
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
|
|
||||||
list_del_init(&osdev->node);
|
|
||||||
mutex_unlock(&ctl_mutex);
|
|
||||||
|
|
||||||
kfree(osdev);
|
|
||||||
err_out_mod:
|
|
||||||
OSDBLK_DEBUG("Error adding device %s\n", buf);
|
|
||||||
module_put(THIS_MODULE);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t class_osdblk_remove(struct class *c,
|
|
||||||
struct class_attribute *attr,
|
|
||||||
const char *buf,
|
|
||||||
size_t count)
|
|
||||||
{
|
|
||||||
struct osdblk_device *osdev = NULL;
|
|
||||||
int target_id, rc;
|
|
||||||
unsigned long ul;
|
|
||||||
struct list_head *tmp;
|
|
||||||
|
|
||||||
rc = kstrtoul(buf, 10, &ul);
|
|
||||||
if (rc)
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
/* convert to int; abort if we lost anything in the conversion */
|
|
||||||
target_id = (int) ul;
|
|
||||||
if (target_id != ul)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
/* remove object from list immediately */
|
|
||||||
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
|
|
||||||
|
|
||||||
list_for_each(tmp, &osdblkdev_list) {
|
|
||||||
osdev = list_entry(tmp, struct osdblk_device, node);
|
|
||||||
if (osdev->id == target_id) {
|
|
||||||
list_del_init(&osdev->node);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
osdev = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&ctl_mutex);
|
|
||||||
|
|
||||||
if (!osdev)
|
|
||||||
return -ENOENT;
|
|
||||||
|
|
||||||
/* clean up and free blkdev and associated OSD connection */
|
|
||||||
osdblk_free_disk(osdev);
|
|
||||||
unregister_blkdev(osdev->major, osdev->name);
|
|
||||||
osduld_put_device(osdev->osd);
|
|
||||||
kfree(osdev);
|
|
||||||
|
|
||||||
/* release module ref */
|
|
||||||
module_put(THIS_MODULE);
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct class_attribute class_osdblk_attrs[] = {
|
|
||||||
__ATTR(add, 0200, NULL, class_osdblk_add),
|
|
||||||
__ATTR(remove, 0200, NULL, class_osdblk_remove),
|
|
||||||
__ATTR(list, 0444, class_osdblk_list, NULL),
|
|
||||||
__ATTR_NULL
|
|
||||||
};
|
|
||||||
|
|
||||||
static int osdblk_sysfs_init(void)
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* create control files in sysfs
|
|
||||||
* /sys/class/osdblk/...
|
|
||||||
*/
|
|
||||||
class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
|
|
||||||
if (!class_osdblk)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
class_osdblk->name = DRV_NAME;
|
|
||||||
class_osdblk->owner = THIS_MODULE;
|
|
||||||
class_osdblk->class_release = class_osdblk_release;
|
|
||||||
class_osdblk->class_attrs = class_osdblk_attrs;
|
|
||||||
|
|
||||||
ret = class_register(class_osdblk);
|
|
||||||
if (ret) {
|
|
||||||
kfree(class_osdblk);
|
|
||||||
class_osdblk = NULL;
|
|
||||||
printk(PFX "failed to create class osdblk\n");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void osdblk_sysfs_cleanup(void)
|
|
||||||
{
|
|
||||||
if (class_osdblk)
|
|
||||||
class_destroy(class_osdblk);
|
|
||||||
class_osdblk = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __init osdblk_init(void)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
rc = osdblk_sysfs_init();
|
|
||||||
if (rc)
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __exit osdblk_exit(void)
|
|
||||||
{
|
|
||||||
osdblk_sysfs_cleanup();
|
|
||||||
}
|
|
||||||
|
|
||||||
module_init(osdblk_init);
|
|
||||||
module_exit(osdblk_exit);
|
|
||||||
|
|
|
@ -300,6 +300,11 @@ static void pcd_init_units(void)
|
||||||
struct gendisk *disk = alloc_disk(1);
|
struct gendisk *disk = alloc_disk(1);
|
||||||
if (!disk)
|
if (!disk)
|
||||||
continue;
|
continue;
|
||||||
|
disk->queue = blk_init_queue(do_pcd_request, &pcd_lock);
|
||||||
|
if (!disk->queue) {
|
||||||
|
put_disk(disk);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
cd->disk = disk;
|
cd->disk = disk;
|
||||||
cd->pi = &cd->pia;
|
cd->pi = &cd->pia;
|
||||||
cd->present = 0;
|
cd->present = 0;
|
||||||
|
@ -735,18 +740,36 @@ static int pcd_detect(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* I/O request processing */
|
/* I/O request processing */
|
||||||
static struct request_queue *pcd_queue;
|
static int pcd_queue;
|
||||||
|
|
||||||
static void do_pcd_request(struct request_queue * q)
|
static int set_next_request(void)
|
||||||
|
{
|
||||||
|
struct pcd_unit *cd;
|
||||||
|
struct request_queue *q;
|
||||||
|
int old_pos = pcd_queue;
|
||||||
|
|
||||||
|
do {
|
||||||
|
cd = &pcd[pcd_queue];
|
||||||
|
q = cd->present ? cd->disk->queue : NULL;
|
||||||
|
if (++pcd_queue == PCD_UNITS)
|
||||||
|
pcd_queue = 0;
|
||||||
|
if (q) {
|
||||||
|
pcd_req = blk_fetch_request(q);
|
||||||
|
if (pcd_req)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (pcd_queue != old_pos);
|
||||||
|
|
||||||
|
return pcd_req != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pcd_request(void)
|
||||||
{
|
{
|
||||||
if (pcd_busy)
|
if (pcd_busy)
|
||||||
return;
|
return;
|
||||||
while (1) {
|
while (1) {
|
||||||
if (!pcd_req) {
|
if (!pcd_req && !set_next_request())
|
||||||
pcd_req = blk_fetch_request(q);
|
return;
|
||||||
if (!pcd_req)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rq_data_dir(pcd_req) == READ) {
|
if (rq_data_dir(pcd_req) == READ) {
|
||||||
struct pcd_unit *cd = pcd_req->rq_disk->private_data;
|
struct pcd_unit *cd = pcd_req->rq_disk->private_data;
|
||||||
|
@ -766,6 +789,11 @@ static void do_pcd_request(struct request_queue * q)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void do_pcd_request(struct request_queue *q)
|
||||||
|
{
|
||||||
|
pcd_request();
|
||||||
|
}
|
||||||
|
|
||||||
static inline void next_request(int err)
|
static inline void next_request(int err)
|
||||||
{
|
{
|
||||||
unsigned long saved_flags;
|
unsigned long saved_flags;
|
||||||
|
@ -774,7 +802,7 @@ static inline void next_request(int err)
|
||||||
if (!__blk_end_request_cur(pcd_req, err))
|
if (!__blk_end_request_cur(pcd_req, err))
|
||||||
pcd_req = NULL;
|
pcd_req = NULL;
|
||||||
pcd_busy = 0;
|
pcd_busy = 0;
|
||||||
do_pcd_request(pcd_queue);
|
pcd_request();
|
||||||
spin_unlock_irqrestore(&pcd_lock, saved_flags);
|
spin_unlock_irqrestore(&pcd_lock, saved_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -849,7 +877,7 @@ static void do_pcd_read_drq(void)
|
||||||
|
|
||||||
do_pcd_read();
|
do_pcd_read();
|
||||||
spin_lock_irqsave(&pcd_lock, saved_flags);
|
spin_lock_irqsave(&pcd_lock, saved_flags);
|
||||||
do_pcd_request(pcd_queue);
|
pcd_request();
|
||||||
spin_unlock_irqrestore(&pcd_lock, saved_flags);
|
spin_unlock_irqrestore(&pcd_lock, saved_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -957,19 +985,10 @@ static int __init pcd_init(void)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
pcd_queue = blk_init_queue(do_pcd_request, &pcd_lock);
|
|
||||||
if (!pcd_queue) {
|
|
||||||
unregister_blkdev(major, name);
|
|
||||||
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
|
|
||||||
put_disk(cd->disk);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
|
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
|
||||||
if (cd->present) {
|
if (cd->present) {
|
||||||
register_cdrom(&cd->info);
|
register_cdrom(&cd->info);
|
||||||
cd->disk->private_data = cd;
|
cd->disk->private_data = cd;
|
||||||
cd->disk->queue = pcd_queue;
|
|
||||||
add_disk(cd->disk);
|
add_disk(cd->disk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -988,9 +1007,9 @@ static void __exit pcd_exit(void)
|
||||||
pi_release(cd->pi);
|
pi_release(cd->pi);
|
||||||
unregister_cdrom(&cd->info);
|
unregister_cdrom(&cd->info);
|
||||||
}
|
}
|
||||||
|
blk_cleanup_queue(cd->disk->queue);
|
||||||
put_disk(cd->disk);
|
put_disk(cd->disk);
|
||||||
}
|
}
|
||||||
blk_cleanup_queue(pcd_queue);
|
|
||||||
unregister_blkdev(major, name);
|
unregister_blkdev(major, name);
|
||||||
pi_unregister_driver(par_drv);
|
pi_unregister_driver(par_drv);
|
||||||
}
|
}
|
||||||
|
|
|
@ -381,12 +381,33 @@ static enum action do_pd_write_start(void);
|
||||||
static enum action do_pd_read_drq(void);
|
static enum action do_pd_read_drq(void);
|
||||||
static enum action do_pd_write_done(void);
|
static enum action do_pd_write_done(void);
|
||||||
|
|
||||||
static struct request_queue *pd_queue;
|
static int pd_queue;
|
||||||
static int pd_claimed;
|
static int pd_claimed;
|
||||||
|
|
||||||
static struct pd_unit *pd_current; /* current request's drive */
|
static struct pd_unit *pd_current; /* current request's drive */
|
||||||
static PIA *pi_current; /* current request's PIA */
|
static PIA *pi_current; /* current request's PIA */
|
||||||
|
|
||||||
|
static int set_next_request(void)
|
||||||
|
{
|
||||||
|
struct gendisk *disk;
|
||||||
|
struct request_queue *q;
|
||||||
|
int old_pos = pd_queue;
|
||||||
|
|
||||||
|
do {
|
||||||
|
disk = pd[pd_queue].gd;
|
||||||
|
q = disk ? disk->queue : NULL;
|
||||||
|
if (++pd_queue == PD_UNITS)
|
||||||
|
pd_queue = 0;
|
||||||
|
if (q) {
|
||||||
|
pd_req = blk_fetch_request(q);
|
||||||
|
if (pd_req)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (pd_queue != old_pos);
|
||||||
|
|
||||||
|
return pd_req != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static void run_fsm(void)
|
static void run_fsm(void)
|
||||||
{
|
{
|
||||||
while (1) {
|
while (1) {
|
||||||
|
@ -418,8 +439,7 @@ static void run_fsm(void)
|
||||||
spin_lock_irqsave(&pd_lock, saved_flags);
|
spin_lock_irqsave(&pd_lock, saved_flags);
|
||||||
if (!__blk_end_request_cur(pd_req,
|
if (!__blk_end_request_cur(pd_req,
|
||||||
res == Ok ? 0 : -EIO)) {
|
res == Ok ? 0 : -EIO)) {
|
||||||
pd_req = blk_fetch_request(pd_queue);
|
if (!set_next_request())
|
||||||
if (!pd_req)
|
|
||||||
stop = 1;
|
stop = 1;
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&pd_lock, saved_flags);
|
spin_unlock_irqrestore(&pd_lock, saved_flags);
|
||||||
|
@ -719,18 +739,15 @@ static int pd_special_command(struct pd_unit *disk,
|
||||||
enum action (*func)(struct pd_unit *disk))
|
enum action (*func)(struct pd_unit *disk))
|
||||||
{
|
{
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||||
if (IS_ERR(rq))
|
if (IS_ERR(rq))
|
||||||
return PTR_ERR(rq);
|
return PTR_ERR(rq);
|
||||||
|
|
||||||
rq->special = func;
|
rq->special = func;
|
||||||
|
blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
|
||||||
err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
|
|
||||||
|
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
return err;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* kernel glue structures */
|
/* kernel glue structures */
|
||||||
|
@ -839,7 +856,13 @@ static void pd_probe_drive(struct pd_unit *disk)
|
||||||
p->first_minor = (disk - pd) << PD_BITS;
|
p->first_minor = (disk - pd) << PD_BITS;
|
||||||
disk->gd = p;
|
disk->gd = p;
|
||||||
p->private_data = disk;
|
p->private_data = disk;
|
||||||
p->queue = pd_queue;
|
p->queue = blk_init_queue(do_pd_request, &pd_lock);
|
||||||
|
if (!p->queue) {
|
||||||
|
disk->gd = NULL;
|
||||||
|
put_disk(p);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
blk_queue_max_hw_sectors(p->queue, cluster);
|
||||||
|
|
||||||
if (disk->drive == -1) {
|
if (disk->drive == -1) {
|
||||||
for (disk->drive = 0; disk->drive <= 1; disk->drive++)
|
for (disk->drive = 0; disk->drive <= 1; disk->drive++)
|
||||||
|
@ -919,26 +942,18 @@ static int __init pd_init(void)
|
||||||
if (disable)
|
if (disable)
|
||||||
goto out1;
|
goto out1;
|
||||||
|
|
||||||
pd_queue = blk_init_queue(do_pd_request, &pd_lock);
|
|
||||||
if (!pd_queue)
|
|
||||||
goto out1;
|
|
||||||
|
|
||||||
blk_queue_max_hw_sectors(pd_queue, cluster);
|
|
||||||
|
|
||||||
if (register_blkdev(major, name))
|
if (register_blkdev(major, name))
|
||||||
goto out2;
|
goto out1;
|
||||||
|
|
||||||
printk("%s: %s version %s, major %d, cluster %d, nice %d\n",
|
printk("%s: %s version %s, major %d, cluster %d, nice %d\n",
|
||||||
name, name, PD_VERSION, major, cluster, nice);
|
name, name, PD_VERSION, major, cluster, nice);
|
||||||
if (!pd_detect())
|
if (!pd_detect())
|
||||||
goto out3;
|
goto out2;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out3:
|
|
||||||
unregister_blkdev(major, name);
|
|
||||||
out2:
|
out2:
|
||||||
blk_cleanup_queue(pd_queue);
|
unregister_blkdev(major, name);
|
||||||
out1:
|
out1:
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
@ -953,11 +968,11 @@ static void __exit pd_exit(void)
|
||||||
if (p) {
|
if (p) {
|
||||||
disk->gd = NULL;
|
disk->gd = NULL;
|
||||||
del_gendisk(p);
|
del_gendisk(p);
|
||||||
|
blk_cleanup_queue(p->queue);
|
||||||
put_disk(p);
|
put_disk(p);
|
||||||
pi_release(disk->pi);
|
pi_release(disk->pi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
blk_cleanup_queue(pd_queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
|
|
@ -287,6 +287,12 @@ static void __init pf_init_units(void)
|
||||||
struct gendisk *disk = alloc_disk(1);
|
struct gendisk *disk = alloc_disk(1);
|
||||||
if (!disk)
|
if (!disk)
|
||||||
continue;
|
continue;
|
||||||
|
disk->queue = blk_init_queue(do_pf_request, &pf_spin_lock);
|
||||||
|
if (!disk->queue) {
|
||||||
|
put_disk(disk);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
blk_queue_max_segments(disk->queue, cluster);
|
||||||
pf->disk = disk;
|
pf->disk = disk;
|
||||||
pf->pi = &pf->pia;
|
pf->pi = &pf->pia;
|
||||||
pf->media_status = PF_NM;
|
pf->media_status = PF_NM;
|
||||||
|
@ -772,7 +778,28 @@ static int pf_ready(void)
|
||||||
return (((status_reg(pf_current) & (STAT_BUSY | pf_mask)) == pf_mask));
|
return (((status_reg(pf_current) & (STAT_BUSY | pf_mask)) == pf_mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct request_queue *pf_queue;
|
static int pf_queue;
|
||||||
|
|
||||||
|
static int set_next_request(void)
|
||||||
|
{
|
||||||
|
struct pf_unit *pf;
|
||||||
|
struct request_queue *q;
|
||||||
|
int old_pos = pf_queue;
|
||||||
|
|
||||||
|
do {
|
||||||
|
pf = &units[pf_queue];
|
||||||
|
q = pf->present ? pf->disk->queue : NULL;
|
||||||
|
if (++pf_queue == PF_UNITS)
|
||||||
|
pf_queue = 0;
|
||||||
|
if (q) {
|
||||||
|
pf_req = blk_fetch_request(q);
|
||||||
|
if (pf_req)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (pf_queue != old_pos);
|
||||||
|
|
||||||
|
return pf_req != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static void pf_end_request(int err)
|
static void pf_end_request(int err)
|
||||||
{
|
{
|
||||||
|
@ -780,16 +807,13 @@ static void pf_end_request(int err)
|
||||||
pf_req = NULL;
|
pf_req = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_pf_request(struct request_queue * q)
|
static void pf_request(void)
|
||||||
{
|
{
|
||||||
if (pf_busy)
|
if (pf_busy)
|
||||||
return;
|
return;
|
||||||
repeat:
|
repeat:
|
||||||
if (!pf_req) {
|
if (!pf_req && !set_next_request())
|
||||||
pf_req = blk_fetch_request(q);
|
return;
|
||||||
if (!pf_req)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
pf_current = pf_req->rq_disk->private_data;
|
pf_current = pf_req->rq_disk->private_data;
|
||||||
pf_block = blk_rq_pos(pf_req);
|
pf_block = blk_rq_pos(pf_req);
|
||||||
|
@ -817,6 +841,11 @@ repeat:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void do_pf_request(struct request_queue *q)
|
||||||
|
{
|
||||||
|
pf_request();
|
||||||
|
}
|
||||||
|
|
||||||
static int pf_next_buf(void)
|
static int pf_next_buf(void)
|
||||||
{
|
{
|
||||||
unsigned long saved_flags;
|
unsigned long saved_flags;
|
||||||
|
@ -846,7 +875,7 @@ static inline void next_request(int err)
|
||||||
spin_lock_irqsave(&pf_spin_lock, saved_flags);
|
spin_lock_irqsave(&pf_spin_lock, saved_flags);
|
||||||
pf_end_request(err);
|
pf_end_request(err);
|
||||||
pf_busy = 0;
|
pf_busy = 0;
|
||||||
do_pf_request(pf_queue);
|
pf_request();
|
||||||
spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
|
spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -972,15 +1001,6 @@ static int __init pf_init(void)
|
||||||
put_disk(pf->disk);
|
put_disk(pf->disk);
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
pf_queue = blk_init_queue(do_pf_request, &pf_spin_lock);
|
|
||||||
if (!pf_queue) {
|
|
||||||
unregister_blkdev(major, name);
|
|
||||||
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
|
|
||||||
put_disk(pf->disk);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_queue_max_segments(pf_queue, cluster);
|
|
||||||
|
|
||||||
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
|
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
|
||||||
struct gendisk *disk = pf->disk;
|
struct gendisk *disk = pf->disk;
|
||||||
|
@ -988,7 +1008,6 @@ static int __init pf_init(void)
|
||||||
if (!pf->present)
|
if (!pf->present)
|
||||||
continue;
|
continue;
|
||||||
disk->private_data = pf;
|
disk->private_data = pf;
|
||||||
disk->queue = pf_queue;
|
|
||||||
add_disk(disk);
|
add_disk(disk);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1003,10 +1022,10 @@ static void __exit pf_exit(void)
|
||||||
if (!pf->present)
|
if (!pf->present)
|
||||||
continue;
|
continue;
|
||||||
del_gendisk(pf->disk);
|
del_gendisk(pf->disk);
|
||||||
|
blk_cleanup_queue(pf->disk->queue);
|
||||||
put_disk(pf->disk);
|
put_disk(pf->disk);
|
||||||
pi_release(pf->pi);
|
pi_release(pf->pi);
|
||||||
}
|
}
|
||||||
blk_cleanup_queue(pf_queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
|
|
@ -724,7 +724,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
|
||||||
rq->rq_flags |= RQF_QUIET;
|
rq->rq_flags |= RQF_QUIET;
|
||||||
|
|
||||||
blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
|
blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
|
||||||
if (rq->errors)
|
if (scsi_req(rq)->result)
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
out:
|
out:
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
|
|
|
@ -4317,7 +4317,7 @@ static int rbd_init_request(void *data, struct request *rq,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_mq_ops rbd_mq_ops = {
|
static const struct blk_mq_ops rbd_mq_ops = {
|
||||||
.queue_rq = rbd_queue_rq,
|
.queue_rq = rbd_queue_rq,
|
||||||
.init_request = rbd_init_request,
|
.init_request = rbd_init_request,
|
||||||
};
|
};
|
||||||
|
@ -4380,7 +4380,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||||
q->limits.discard_granularity = segment_size;
|
q->limits.discard_granularity = segment_size;
|
||||||
q->limits.discard_alignment = segment_size;
|
q->limits.discard_alignment = segment_size;
|
||||||
blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
|
blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
|
||||||
q->limits.discard_zeroes_data = 1;
|
|
||||||
|
|
||||||
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
|
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
|
||||||
q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
|
q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
|
||||||
|
|
|
@ -300,7 +300,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
|
||||||
RSXX_HW_BLK_SIZE >> 9);
|
RSXX_HW_BLK_SIZE >> 9);
|
||||||
card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
|
card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
|
||||||
card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE;
|
card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE;
|
||||||
card->queue->limits.discard_zeroes_data = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
card->queue->queuedata = card;
|
card->queue->queuedata = card;
|
||||||
|
|
|
@ -211,7 +211,7 @@ enum head {
|
||||||
struct swim_priv {
|
struct swim_priv {
|
||||||
struct swim __iomem *base;
|
struct swim __iomem *base;
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
struct request_queue *queue;
|
int fdc_queue;
|
||||||
int floppy_count;
|
int floppy_count;
|
||||||
struct floppy_state unit[FD_MAX_UNIT];
|
struct floppy_state unit[FD_MAX_UNIT];
|
||||||
};
|
};
|
||||||
|
@ -525,12 +525,33 @@ static int floppy_read_sectors(struct floppy_state *fs,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void redo_fd_request(struct request_queue *q)
|
static struct request *swim_next_request(struct swim_priv *swd)
|
||||||
{
|
{
|
||||||
|
struct request_queue *q;
|
||||||
|
struct request *rq;
|
||||||
|
int old_pos = swd->fdc_queue;
|
||||||
|
|
||||||
|
do {
|
||||||
|
q = swd->unit[swd->fdc_queue].disk->queue;
|
||||||
|
if (++swd->fdc_queue == swd->floppy_count)
|
||||||
|
swd->fdc_queue = 0;
|
||||||
|
if (q) {
|
||||||
|
rq = blk_fetch_request(q);
|
||||||
|
if (rq)
|
||||||
|
return rq;
|
||||||
|
}
|
||||||
|
} while (swd->fdc_queue != old_pos);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void do_fd_request(struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct swim_priv *swd = q->queuedata;
|
||||||
struct request *req;
|
struct request *req;
|
||||||
struct floppy_state *fs;
|
struct floppy_state *fs;
|
||||||
|
|
||||||
req = blk_fetch_request(q);
|
req = swim_next_request(swd);
|
||||||
while (req) {
|
while (req) {
|
||||||
int err = -EIO;
|
int err = -EIO;
|
||||||
|
|
||||||
|
@ -554,15 +575,10 @@ static void redo_fd_request(struct request_queue *q)
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
if (!__blk_end_request_cur(req, err))
|
if (!__blk_end_request_cur(req, err))
|
||||||
req = blk_fetch_request(q);
|
req = swim_next_request(swd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_fd_request(struct request_queue *q)
|
|
||||||
{
|
|
||||||
redo_fd_request(q);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct floppy_struct floppy_type[4] = {
|
static struct floppy_struct floppy_type[4] = {
|
||||||
{ 0, 0, 0, 0, 0, 0x00, 0x00, 0x00, 0x00, NULL }, /* no testing */
|
{ 0, 0, 0, 0, 0, 0x00, 0x00, 0x00, 0x00, NULL }, /* no testing */
|
||||||
{ 720, 9, 1, 80, 0, 0x2A, 0x02, 0xDF, 0x50, NULL }, /* 360KB SS 3.5"*/
|
{ 720, 9, 1, 80, 0, 0x2A, 0x02, 0xDF, 0x50, NULL }, /* 360KB SS 3.5"*/
|
||||||
|
@ -833,22 +849,25 @@ static int swim_floppy_init(struct swim_priv *swd)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock_init(&swd->lock);
|
||||||
|
|
||||||
for (drive = 0; drive < swd->floppy_count; drive++) {
|
for (drive = 0; drive < swd->floppy_count; drive++) {
|
||||||
swd->unit[drive].disk = alloc_disk(1);
|
swd->unit[drive].disk = alloc_disk(1);
|
||||||
if (swd->unit[drive].disk == NULL) {
|
if (swd->unit[drive].disk == NULL) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto exit_put_disks;
|
goto exit_put_disks;
|
||||||
}
|
}
|
||||||
|
swd->unit[drive].disk->queue = blk_init_queue(do_fd_request,
|
||||||
|
&swd->lock);
|
||||||
|
if (!swd->unit[drive].disk->queue) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
put_disk(swd->unit[drive].disk);
|
||||||
|
goto exit_put_disks;
|
||||||
|
}
|
||||||
|
swd->unit[drive].disk->queue->queuedata = swd;
|
||||||
swd->unit[drive].swd = swd;
|
swd->unit[drive].swd = swd;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_init(&swd->lock);
|
|
||||||
swd->queue = blk_init_queue(do_fd_request, &swd->lock);
|
|
||||||
if (!swd->queue) {
|
|
||||||
err = -ENOMEM;
|
|
||||||
goto exit_put_disks;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (drive = 0; drive < swd->floppy_count; drive++) {
|
for (drive = 0; drive < swd->floppy_count; drive++) {
|
||||||
swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE;
|
swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE;
|
||||||
swd->unit[drive].disk->major = FLOPPY_MAJOR;
|
swd->unit[drive].disk->major = FLOPPY_MAJOR;
|
||||||
|
@ -856,7 +875,6 @@ static int swim_floppy_init(struct swim_priv *swd)
|
||||||
sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
|
sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
|
||||||
swd->unit[drive].disk->fops = &floppy_fops;
|
swd->unit[drive].disk->fops = &floppy_fops;
|
||||||
swd->unit[drive].disk->private_data = &swd->unit[drive];
|
swd->unit[drive].disk->private_data = &swd->unit[drive];
|
||||||
swd->unit[drive].disk->queue = swd->queue;
|
|
||||||
set_capacity(swd->unit[drive].disk, 2880);
|
set_capacity(swd->unit[drive].disk, 2880);
|
||||||
add_disk(swd->unit[drive].disk);
|
add_disk(swd->unit[drive].disk);
|
||||||
}
|
}
|
||||||
|
@ -943,13 +961,12 @@ static int swim_remove(struct platform_device *dev)
|
||||||
|
|
||||||
for (drive = 0; drive < swd->floppy_count; drive++) {
|
for (drive = 0; drive < swd->floppy_count; drive++) {
|
||||||
del_gendisk(swd->unit[drive].disk);
|
del_gendisk(swd->unit[drive].disk);
|
||||||
|
blk_cleanup_queue(swd->unit[drive].disk->queue);
|
||||||
put_disk(swd->unit[drive].disk);
|
put_disk(swd->unit[drive].disk);
|
||||||
}
|
}
|
||||||
|
|
||||||
unregister_blkdev(FLOPPY_MAJOR, "fd");
|
unregister_blkdev(FLOPPY_MAJOR, "fd");
|
||||||
|
|
||||||
blk_cleanup_queue(swd->queue);
|
|
||||||
|
|
||||||
/* eject floppies */
|
/* eject floppies */
|
||||||
|
|
||||||
for (drive = 0; drive < swd->floppy_count; drive++)
|
for (drive = 0; drive < swd->floppy_count; drive++)
|
||||||
|
|
|
@ -343,8 +343,8 @@ static void start_request(struct floppy_state *fs)
|
||||||
req->rq_disk->disk_name, req->cmd,
|
req->rq_disk->disk_name, req->cmd,
|
||||||
(long)blk_rq_pos(req), blk_rq_sectors(req),
|
(long)blk_rq_pos(req), blk_rq_sectors(req),
|
||||||
bio_data(req->bio));
|
bio_data(req->bio));
|
||||||
swim3_dbg(" errors=%d current_nr_sectors=%u\n",
|
swim3_dbg(" current_nr_sectors=%u\n",
|
||||||
req->errors, blk_rq_cur_sectors(req));
|
blk_rq_cur_sectors(req));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (blk_rq_pos(req) >= fs->total_secs) {
|
if (blk_rq_pos(req) >= fs->total_secs) {
|
||||||
|
|
|
@ -111,7 +111,7 @@ static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr,
|
||||||
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
|
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void virtblk_scsi_reques_done(struct request *req)
|
static inline void virtblk_scsi_request_done(struct request *req)
|
||||||
{
|
{
|
||||||
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
||||||
struct virtio_blk *vblk = req->q->queuedata;
|
struct virtio_blk *vblk = req->q->queuedata;
|
||||||
|
@ -119,7 +119,7 @@ static inline void virtblk_scsi_reques_done(struct request *req)
|
||||||
|
|
||||||
sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
|
sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
|
||||||
sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
|
sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
|
||||||
req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
|
sreq->result = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
|
static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
|
||||||
|
@ -144,7 +144,7 @@ static inline int virtblk_add_req_scsi(struct virtqueue *vq,
|
||||||
{
|
{
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
static inline void virtblk_scsi_reques_done(struct request *req)
|
static inline void virtblk_scsi_request_done(struct request *req)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
#define virtblk_ioctl NULL
|
#define virtblk_ioctl NULL
|
||||||
|
@ -175,19 +175,15 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
|
||||||
static inline void virtblk_request_done(struct request *req)
|
static inline void virtblk_request_done(struct request *req)
|
||||||
{
|
{
|
||||||
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
||||||
int error = virtblk_result(vbr);
|
|
||||||
|
|
||||||
switch (req_op(req)) {
|
switch (req_op(req)) {
|
||||||
case REQ_OP_SCSI_IN:
|
case REQ_OP_SCSI_IN:
|
||||||
case REQ_OP_SCSI_OUT:
|
case REQ_OP_SCSI_OUT:
|
||||||
virtblk_scsi_reques_done(req);
|
virtblk_scsi_request_done(req);
|
||||||
break;
|
|
||||||
case REQ_OP_DRV_IN:
|
|
||||||
req->errors = (error != 0);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_mq_end_request(req, error);
|
blk_mq_end_request(req, virtblk_result(vbr));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtblk_done(struct virtqueue *vq)
|
static void virtblk_done(struct virtqueue *vq)
|
||||||
|
@ -205,7 +201,7 @@ static void virtblk_done(struct virtqueue *vq)
|
||||||
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
|
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
|
||||||
struct request *req = blk_mq_rq_from_pdu(vbr);
|
struct request *req = blk_mq_rq_from_pdu(vbr);
|
||||||
|
|
||||||
blk_mq_complete_request(req, req->errors);
|
blk_mq_complete_request(req);
|
||||||
req_done = true;
|
req_done = true;
|
||||||
}
|
}
|
||||||
if (unlikely(virtqueue_is_broken(vq)))
|
if (unlikely(virtqueue_is_broken(vq)))
|
||||||
|
@ -310,7 +306,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
|
||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
|
blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
|
||||||
|
err = virtblk_result(blk_mq_rq_to_pdu(req));
|
||||||
out:
|
out:
|
||||||
blk_put_request(req);
|
blk_put_request(req);
|
||||||
return err;
|
return err;
|
||||||
|
@ -597,7 +594,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set)
|
||||||
return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
|
return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_mq_ops virtio_mq_ops = {
|
static const struct blk_mq_ops virtio_mq_ops = {
|
||||||
.queue_rq = virtio_queue_rq,
|
.queue_rq = virtio_queue_rq,
|
||||||
.complete = virtblk_request_done,
|
.complete = virtblk_request_done,
|
||||||
.init_request = virtblk_init_request,
|
.init_request = virtblk_init_request,
|
||||||
|
|
|
@ -115,6 +115,15 @@ struct split_bio {
|
||||||
atomic_t pending;
|
atomic_t pending;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct blkif_req {
|
||||||
|
int error;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct blkif_req *blkif_req(struct request *rq)
|
||||||
|
{
|
||||||
|
return blk_mq_rq_to_pdu(rq);
|
||||||
|
}
|
||||||
|
|
||||||
static DEFINE_MUTEX(blkfront_mutex);
|
static DEFINE_MUTEX(blkfront_mutex);
|
||||||
static const struct block_device_operations xlvbd_block_fops;
|
static const struct block_device_operations xlvbd_block_fops;
|
||||||
|
|
||||||
|
@ -907,8 +916,14 @@ out_busy:
|
||||||
return BLK_MQ_RQ_QUEUE_BUSY;
|
return BLK_MQ_RQ_QUEUE_BUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct blk_mq_ops blkfront_mq_ops = {
|
static void blkif_complete_rq(struct request *rq)
|
||||||
|
{
|
||||||
|
blk_mq_end_request(rq, blkif_req(rq)->error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct blk_mq_ops blkfront_mq_ops = {
|
||||||
.queue_rq = blkif_queue_rq,
|
.queue_rq = blkif_queue_rq,
|
||||||
|
.complete = blkif_complete_rq,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void blkif_set_queue_limits(struct blkfront_info *info)
|
static void blkif_set_queue_limits(struct blkfront_info *info)
|
||||||
|
@ -969,7 +984,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
|
||||||
info->tag_set.queue_depth = BLK_RING_SIZE(info);
|
info->tag_set.queue_depth = BLK_RING_SIZE(info);
|
||||||
info->tag_set.numa_node = NUMA_NO_NODE;
|
info->tag_set.numa_node = NUMA_NO_NODE;
|
||||||
info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
|
info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
|
||||||
info->tag_set.cmd_size = 0;
|
info->tag_set.cmd_size = sizeof(struct blkif_req);
|
||||||
info->tag_set.driver_data = info;
|
info->tag_set.driver_data = info;
|
||||||
|
|
||||||
if (blk_mq_alloc_tag_set(&info->tag_set))
|
if (blk_mq_alloc_tag_set(&info->tag_set))
|
||||||
|
@ -1543,7 +1558,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
|
struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
|
||||||
struct blkfront_info *info = rinfo->dev_info;
|
struct blkfront_info *info = rinfo->dev_info;
|
||||||
int error;
|
|
||||||
|
|
||||||
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
||||||
return IRQ_HANDLED;
|
return IRQ_HANDLED;
|
||||||
|
@ -1587,37 +1601,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
|
blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
|
||||||
switch (bret->operation) {
|
switch (bret->operation) {
|
||||||
case BLKIF_OP_DISCARD:
|
case BLKIF_OP_DISCARD:
|
||||||
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
||||||
struct request_queue *rq = info->rq;
|
struct request_queue *rq = info->rq;
|
||||||
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
|
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
|
||||||
info->gd->disk_name, op_name(bret->operation));
|
info->gd->disk_name, op_name(bret->operation));
|
||||||
error = -EOPNOTSUPP;
|
blkif_req(req)->error = -EOPNOTSUPP;
|
||||||
info->feature_discard = 0;
|
info->feature_discard = 0;
|
||||||
info->feature_secdiscard = 0;
|
info->feature_secdiscard = 0;
|
||||||
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
|
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
|
||||||
queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
|
queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
|
||||||
}
|
}
|
||||||
blk_mq_complete_request(req, error);
|
|
||||||
break;
|
break;
|
||||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||||
case BLKIF_OP_WRITE_BARRIER:
|
case BLKIF_OP_WRITE_BARRIER:
|
||||||
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
||||||
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
|
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
|
||||||
info->gd->disk_name, op_name(bret->operation));
|
info->gd->disk_name, op_name(bret->operation));
|
||||||
error = -EOPNOTSUPP;
|
blkif_req(req)->error = -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
|
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
|
||||||
rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
|
rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
|
||||||
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
|
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
|
||||||
info->gd->disk_name, op_name(bret->operation));
|
info->gd->disk_name, op_name(bret->operation));
|
||||||
error = -EOPNOTSUPP;
|
blkif_req(req)->error = -EOPNOTSUPP;
|
||||||
}
|
}
|
||||||
if (unlikely(error)) {
|
if (unlikely(blkif_req(req)->error)) {
|
||||||
if (error == -EOPNOTSUPP)
|
if (blkif_req(req)->error == -EOPNOTSUPP)
|
||||||
error = 0;
|
blkif_req(req)->error = 0;
|
||||||
info->feature_fua = 0;
|
info->feature_fua = 0;
|
||||||
info->feature_flush = 0;
|
info->feature_flush = 0;
|
||||||
xlvbd_flush(info);
|
xlvbd_flush(info);
|
||||||
|
@ -1629,11 +1642,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||||
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
|
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
|
||||||
"request: %x\n", bret->status);
|
"request: %x\n", bret->status);
|
||||||
|
|
||||||
blk_mq_complete_request(req, error);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blk_mq_complete_request(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
rinfo->ring.rsp_cons = i;
|
rinfo->ring.rsp_cons = i;
|
||||||
|
@ -2345,6 +2359,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||||
unsigned long sector_size;
|
unsigned long sector_size;
|
||||||
unsigned int physical_sector_size;
|
unsigned int physical_sector_size;
|
||||||
unsigned int binfo;
|
unsigned int binfo;
|
||||||
|
char *envp[] = { "RESIZE=1", NULL };
|
||||||
int err, i;
|
int err, i;
|
||||||
|
|
||||||
switch (info->connected) {
|
switch (info->connected) {
|
||||||
|
@ -2361,6 +2376,8 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||||
sectors);
|
sectors);
|
||||||
set_capacity(info->gd, sectors);
|
set_capacity(info->gd, sectors);
|
||||||
revalidate_disk(info->gd);
|
revalidate_disk(info->gd);
|
||||||
|
kobject_uevent_env(&disk_to_dev(info->gd)->kobj,
|
||||||
|
KOBJ_CHANGE, envp);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
case BLKIF_STATE_SUSPENDED:
|
case BLKIF_STATE_SUSPENDED:
|
||||||
|
|
|
@ -829,10 +829,14 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
|
||||||
offset = (bio->bi_iter.bi_sector &
|
offset = (bio->bi_iter.bi_sector &
|
||||||
(SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
|
(SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
|
||||||
|
|
||||||
if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
|
switch (bio_op(bio)) {
|
||||||
|
case REQ_OP_DISCARD:
|
||||||
|
case REQ_OP_WRITE_ZEROES:
|
||||||
zram_bio_discard(zram, index, offset, bio);
|
zram_bio_discard(zram, index, offset, bio);
|
||||||
bio_endio(bio);
|
bio_endio(bio);
|
||||||
return;
|
return;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio_for_each_segment(bvec, bio, iter) {
|
bio_for_each_segment(bvec, bio, iter) {
|
||||||
|
@ -1192,6 +1196,8 @@ static int zram_add(void)
|
||||||
zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
|
zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
|
||||||
zram->disk->queue->limits.chunk_sectors = 0;
|
zram->disk->queue->limits.chunk_sectors = 0;
|
||||||
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
|
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
|
||||||
|
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* zram_bio_discard() will clear all logical blocks if logical block
|
* zram_bio_discard() will clear all logical blocks if logical block
|
||||||
* size is identical with physical block size(PAGE_SIZE). But if it is
|
* size is identical with physical block size(PAGE_SIZE). But if it is
|
||||||
|
@ -1201,10 +1207,7 @@ static int zram_add(void)
|
||||||
* zeroed.
|
* zeroed.
|
||||||
*/
|
*/
|
||||||
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
|
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
|
||||||
zram->disk->queue->limits.discard_zeroes_data = 1;
|
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
|
||||||
else
|
|
||||||
zram->disk->queue->limits.discard_zeroes_data = 0;
|
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
|
|
||||||
|
|
||||||
add_disk(zram->disk);
|
add_disk(zram->disk);
|
||||||
|
|
||||||
|
|
|
@ -2218,7 +2218,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
|
||||||
rq->timeout = 60 * HZ;
|
rq->timeout = 60 * HZ;
|
||||||
bio = rq->bio;
|
bio = rq->bio;
|
||||||
|
|
||||||
if (blk_execute_rq(q, cdi->disk, rq, 0)) {
|
blk_execute_rq(q, cdi->disk, rq, 0);
|
||||||
|
if (scsi_req(rq)->result) {
|
||||||
struct request_sense *s = req->sense;
|
struct request_sense *s = req->sense;
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
cdi->last_sense = s->sense_key;
|
cdi->last_sense = s->sense_key;
|
||||||
|
|
|
@ -107,7 +107,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
|
||||||
memcpy(scsi_req(rq)->cmd, pc->c, 12);
|
memcpy(scsi_req(rq)->cmd, pc->c, 12);
|
||||||
if (drive->media == ide_tape)
|
if (drive->media == ide_tape)
|
||||||
scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1;
|
scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1;
|
||||||
error = blk_execute_rq(drive->queue, disk, rq, 0);
|
blk_execute_rq(drive->queue, disk, rq, 0);
|
||||||
|
error = scsi_req(rq)->result ? -EIO : 0;
|
||||||
put_req:
|
put_req:
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
return error;
|
return error;
|
||||||
|
@ -454,7 +455,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
|
||||||
debug_log("%s: I/O error\n", drive->name);
|
debug_log("%s: I/O error\n", drive->name);
|
||||||
|
|
||||||
if (drive->media != ide_tape)
|
if (drive->media != ide_tape)
|
||||||
pc->rq->errors++;
|
scsi_req(pc->rq)->result++;
|
||||||
|
|
||||||
if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) {
|
if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) {
|
||||||
printk(KERN_ERR PFX "%s: I/O error in request "
|
printk(KERN_ERR PFX "%s: I/O error in request "
|
||||||
|
@ -488,13 +489,13 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
|
||||||
drive->failed_pc = NULL;
|
drive->failed_pc = NULL;
|
||||||
|
|
||||||
if (ata_misc_request(rq)) {
|
if (ata_misc_request(rq)) {
|
||||||
rq->errors = 0;
|
scsi_req(rq)->result = 0;
|
||||||
error = 0;
|
error = 0;
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
|
if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
|
||||||
if (rq->errors == 0)
|
if (scsi_req(rq)->result == 0)
|
||||||
rq->errors = -EIO;
|
scsi_req(rq)->result = -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = uptodate ? 0 : -EIO;
|
error = uptodate ? 0 : -EIO;
|
||||||
|
|
|
@ -247,10 +247,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq)
|
||||||
|
|
||||||
struct cdrom_info *info = drive->driver_data;
|
struct cdrom_info *info = drive->driver_data;
|
||||||
|
|
||||||
if (!rq->errors)
|
if (!scsi_req(rq)->result)
|
||||||
info->write_timeout = jiffies + ATAPI_WAIT_WRITE_BUSY;
|
info->write_timeout = jiffies + ATAPI_WAIT_WRITE_BUSY;
|
||||||
|
|
||||||
rq->errors = 1;
|
scsi_req(rq)->result = 1;
|
||||||
|
|
||||||
if (time_after(jiffies, info->write_timeout))
|
if (time_after(jiffies, info->write_timeout))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -294,8 +294,8 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if we have an error, pass CHECK_CONDITION as the SCSI status byte */
|
/* if we have an error, pass CHECK_CONDITION as the SCSI status byte */
|
||||||
if (blk_rq_is_scsi(rq) && !rq->errors)
|
if (blk_rq_is_scsi(rq) && !scsi_req(rq)->result)
|
||||||
rq->errors = SAM_STAT_CHECK_CONDITION;
|
scsi_req(rq)->result = SAM_STAT_CHECK_CONDITION;
|
||||||
|
|
||||||
if (blk_noretry_request(rq))
|
if (blk_noretry_request(rq))
|
||||||
do_end_request = 1;
|
do_end_request = 1;
|
||||||
|
@ -325,7 +325,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
|
||||||
* Arrange to retry the request but be sure to give up if we've
|
* Arrange to retry the request but be sure to give up if we've
|
||||||
* retried too many times.
|
* retried too many times.
|
||||||
*/
|
*/
|
||||||
if (++rq->errors > ERROR_MAX)
|
if (++scsi_req(rq)->result > ERROR_MAX)
|
||||||
do_end_request = 1;
|
do_end_request = 1;
|
||||||
break;
|
break;
|
||||||
case ILLEGAL_REQUEST:
|
case ILLEGAL_REQUEST:
|
||||||
|
@ -372,7 +372,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
|
||||||
/* go to the default handler for other errors */
|
/* go to the default handler for other errors */
|
||||||
ide_error(drive, "cdrom_decode_status", stat);
|
ide_error(drive, "cdrom_decode_status", stat);
|
||||||
return 1;
|
return 1;
|
||||||
} else if (++rq->errors > ERROR_MAX)
|
} else if (++scsi_req(rq)->result > ERROR_MAX)
|
||||||
/* we've racked up too many retries, abort */
|
/* we've racked up too many retries, abort */
|
||||||
do_end_request = 1;
|
do_end_request = 1;
|
||||||
}
|
}
|
||||||
|
@ -452,7 +452,8 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
error = blk_execute_rq(drive->queue, info->disk, rq, 0);
|
blk_execute_rq(drive->queue, info->disk, rq, 0);
|
||||||
|
error = scsi_req(rq)->result ? -EIO : 0;
|
||||||
|
|
||||||
if (buffer)
|
if (buffer)
|
||||||
*bufflen = scsi_req(rq)->resid_len;
|
*bufflen = scsi_req(rq)->resid_len;
|
||||||
|
@ -683,8 +684,8 @@ out_end:
|
||||||
if (cmd->nleft == 0)
|
if (cmd->nleft == 0)
|
||||||
uptodate = 1;
|
uptodate = 1;
|
||||||
} else {
|
} else {
|
||||||
if (uptodate <= 0 && rq->errors == 0)
|
if (uptodate <= 0 && scsi_req(rq)->result == 0)
|
||||||
rq->errors = -EIO;
|
scsi_req(rq)->result = -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uptodate == 0 && rq->bio)
|
if (uptodate == 0 && rq->bio)
|
||||||
|
@ -1379,7 +1380,7 @@ static int ide_cdrom_prep_pc(struct request *rq)
|
||||||
* appropriate action
|
* appropriate action
|
||||||
*/
|
*/
|
||||||
if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) {
|
if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) {
|
||||||
rq->errors = ILLEGAL_REQUEST;
|
scsi_req(rq)->result = ILLEGAL_REQUEST;
|
||||||
return BLKPREP_KILL;
|
return BLKPREP_KILL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -307,7 +307,8 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
|
||||||
scsi_req_init(rq);
|
scsi_req_init(rq);
|
||||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||||
rq->rq_flags = RQF_QUIET;
|
rq->rq_flags = RQF_QUIET;
|
||||||
ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
|
blk_execute_rq(drive->queue, cd->disk, rq, 0);
|
||||||
|
ret = scsi_req(rq)->result ? -EIO : 0;
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
/*
|
/*
|
||||||
* A reset will unlock the door. If it was previously locked,
|
* A reset will unlock the door. If it was previously locked,
|
||||||
|
|
|
@ -173,8 +173,8 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
|
||||||
*(int *)&scsi_req(rq)->cmd[1] = arg;
|
*(int *)&scsi_req(rq)->cmd[1] = arg;
|
||||||
rq->special = setting->set;
|
rq->special = setting->set;
|
||||||
|
|
||||||
if (blk_execute_rq(q, NULL, rq, 0))
|
blk_execute_rq(q, NULL, rq, 0);
|
||||||
ret = rq->errors;
|
ret = scsi_req(rq)->result;
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -186,7 +186,7 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq)
|
||||||
|
|
||||||
err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]);
|
err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]);
|
||||||
if (err)
|
if (err)
|
||||||
rq->errors = err;
|
scsi_req(rq)->result = err;
|
||||||
ide_complete_rq(drive, err, blk_rq_bytes(rq));
|
ide_complete_rq(drive, 0, blk_rq_bytes(rq));
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
}
|
}
|
||||||
|
|
|
@ -470,7 +470,6 @@ ide_devset_get(multcount, mult_count);
|
||||||
static int set_multcount(ide_drive_t *drive, int arg)
|
static int set_multcount(ide_drive_t *drive, int arg)
|
||||||
{
|
{
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
int error;
|
|
||||||
|
|
||||||
if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
|
if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -484,7 +483,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
|
||||||
|
|
||||||
drive->mult_req = arg;
|
drive->mult_req = arg;
|
||||||
drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
|
drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
|
||||||
error = blk_execute_rq(drive->queue, NULL, rq, 0);
|
blk_execute_rq(drive->queue, NULL, rq, 0);
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
|
|
||||||
return (drive->mult_count == arg) ? 0 : -EIO;
|
return (drive->mult_count == arg) ? 0 : -EIO;
|
||||||
|
|
|
@ -490,7 +490,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
|
||||||
* make sure request is sane
|
* make sure request is sane
|
||||||
*/
|
*/
|
||||||
if (hwif->rq)
|
if (hwif->rq)
|
||||||
hwif->rq->errors = 0;
|
scsi_req(hwif->rq)->result = 0;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
|
||||||
if ((stat & ATA_BUSY) ||
|
if ((stat & ATA_BUSY) ||
|
||||||
((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
|
((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
|
||||||
/* other bits are useless when BUSY */
|
/* other bits are useless when BUSY */
|
||||||
rq->errors |= ERROR_RESET;
|
scsi_req(rq)->result |= ERROR_RESET;
|
||||||
} else if (stat & ATA_ERR) {
|
} else if (stat & ATA_ERR) {
|
||||||
/* err has different meaning on cdrom and tape */
|
/* err has different meaning on cdrom and tape */
|
||||||
if (err == ATA_ABORTED) {
|
if (err == ATA_ABORTED) {
|
||||||
|
@ -25,10 +25,10 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
|
||||||
drive->crc_count++;
|
drive->crc_count++;
|
||||||
} else if (err & (ATA_BBK | ATA_UNC)) {
|
} else if (err & (ATA_BBK | ATA_UNC)) {
|
||||||
/* retries won't help these */
|
/* retries won't help these */
|
||||||
rq->errors = ERROR_MAX;
|
scsi_req(rq)->result = ERROR_MAX;
|
||||||
} else if (err & ATA_TRK0NF) {
|
} else if (err & ATA_TRK0NF) {
|
||||||
/* help it find track zero */
|
/* help it find track zero */
|
||||||
rq->errors |= ERROR_RECAL;
|
scsi_req(rq)->result |= ERROR_RECAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,23 +39,23 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
|
||||||
ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE);
|
ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) {
|
if (scsi_req(rq)->result >= ERROR_MAX || blk_noretry_request(rq)) {
|
||||||
ide_kill_rq(drive, rq);
|
ide_kill_rq(drive, rq);
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
|
if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
|
||||||
rq->errors |= ERROR_RESET;
|
scsi_req(rq)->result |= ERROR_RESET;
|
||||||
|
|
||||||
if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
|
if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) {
|
||||||
++rq->errors;
|
++scsi_req(rq)->result;
|
||||||
return ide_do_reset(drive);
|
return ide_do_reset(drive);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
|
if ((scsi_req(rq)->result & ERROR_RECAL) == ERROR_RECAL)
|
||||||
drive->special_flags |= IDE_SFLAG_RECALIBRATE;
|
drive->special_flags |= IDE_SFLAG_RECALIBRATE;
|
||||||
|
|
||||||
++rq->errors;
|
++scsi_req(rq)->result;
|
||||||
|
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
}
|
}
|
||||||
|
@ -68,7 +68,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq,
|
||||||
if ((stat & ATA_BUSY) ||
|
if ((stat & ATA_BUSY) ||
|
||||||
((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
|
((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
|
||||||
/* other bits are useless when BUSY */
|
/* other bits are useless when BUSY */
|
||||||
rq->errors |= ERROR_RESET;
|
scsi_req(rq)->result |= ERROR_RESET;
|
||||||
} else {
|
} else {
|
||||||
/* add decoding error stuff */
|
/* add decoding error stuff */
|
||||||
}
|
}
|
||||||
|
@ -77,14 +77,14 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq,
|
||||||
/* force an abort */
|
/* force an abort */
|
||||||
hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
|
hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
|
||||||
|
|
||||||
if (rq->errors >= ERROR_MAX) {
|
if (scsi_req(rq)->result >= ERROR_MAX) {
|
||||||
ide_kill_rq(drive, rq);
|
ide_kill_rq(drive, rq);
|
||||||
} else {
|
} else {
|
||||||
if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
|
if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) {
|
||||||
++rq->errors;
|
++scsi_req(rq)->result;
|
||||||
return ide_do_reset(drive);
|
return ide_do_reset(drive);
|
||||||
}
|
}
|
||||||
++rq->errors;
|
++scsi_req(rq)->result;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
|
@ -130,11 +130,11 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
|
||||||
if (cmd)
|
if (cmd)
|
||||||
ide_complete_cmd(drive, cmd, stat, err);
|
ide_complete_cmd(drive, cmd, stat, err);
|
||||||
} else if (ata_pm_request(rq)) {
|
} else if (ata_pm_request(rq)) {
|
||||||
rq->errors = 1;
|
scsi_req(rq)->result = 1;
|
||||||
ide_complete_pm_rq(drive, rq);
|
ide_complete_pm_rq(drive, rq);
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
}
|
}
|
||||||
rq->errors = err;
|
scsi_req(rq)->result = err;
|
||||||
ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
|
ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
}
|
}
|
||||||
|
@ -149,8 +149,8 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
|
||||||
|
|
||||||
if (rq && ata_misc_request(rq) &&
|
if (rq && ata_misc_request(rq) &&
|
||||||
scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
|
scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
|
||||||
if (err <= 0 && rq->errors == 0)
|
if (err <= 0 && scsi_req(rq)->result == 0)
|
||||||
rq->errors = -EIO;
|
scsi_req(rq)->result = -EIO;
|
||||||
ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
|
ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,7 +98,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ata_misc_request(rq))
|
if (ata_misc_request(rq))
|
||||||
rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
|
scsi_req(rq)->result = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
|
||||||
|
|
||||||
return uptodate;
|
return uptodate;
|
||||||
}
|
}
|
||||||
|
@ -239,7 +239,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
|
||||||
? rq->rq_disk->disk_name
|
? rq->rq_disk->disk_name
|
||||||
: "dev?"));
|
: "dev?"));
|
||||||
|
|
||||||
if (rq->errors >= ERROR_MAX) {
|
if (scsi_req(rq)->result >= ERROR_MAX) {
|
||||||
if (drive->failed_pc) {
|
if (drive->failed_pc) {
|
||||||
ide_floppy_report_error(floppy, drive->failed_pc);
|
ide_floppy_report_error(floppy, drive->failed_pc);
|
||||||
drive->failed_pc = NULL;
|
drive->failed_pc = NULL;
|
||||||
|
@ -247,7 +247,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
|
||||||
printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
|
printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
|
||||||
|
|
||||||
if (ata_misc_request(rq)) {
|
if (ata_misc_request(rq)) {
|
||||||
rq->errors = 0;
|
scsi_req(rq)->result = 0;
|
||||||
ide_complete_rq(drive, 0, blk_rq_bytes(rq));
|
ide_complete_rq(drive, 0, blk_rq_bytes(rq));
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
} else
|
} else
|
||||||
|
@ -301,8 +301,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
|
||||||
return ide_floppy_issue_pc(drive, &cmd, pc);
|
return ide_floppy_issue_pc(drive, &cmd, pc);
|
||||||
out_end:
|
out_end:
|
||||||
drive->failed_pc = NULL;
|
drive->failed_pc = NULL;
|
||||||
if (blk_rq_is_passthrough(rq) && rq->errors == 0)
|
if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
|
||||||
rq->errors = -EIO;
|
scsi_req(rq)->result = -EIO;
|
||||||
ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
|
ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
}
|
}
|
||||||
|
|
|
@ -141,12 +141,12 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
|
||||||
drive->failed_pc = NULL;
|
drive->failed_pc = NULL;
|
||||||
|
|
||||||
if ((media == ide_floppy || media == ide_tape) && drv_req) {
|
if ((media == ide_floppy || media == ide_tape) && drv_req) {
|
||||||
rq->errors = 0;
|
scsi_req(rq)->result = 0;
|
||||||
} else {
|
} else {
|
||||||
if (media == ide_tape)
|
if (media == ide_tape)
|
||||||
rq->errors = IDE_DRV_ERROR_GENERAL;
|
scsi_req(rq)->result = IDE_DRV_ERROR_GENERAL;
|
||||||
else if (blk_rq_is_passthrough(rq) && rq->errors == 0)
|
else if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
|
||||||
rq->errors = -EIO;
|
scsi_req(rq)->result = -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
|
ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
|
||||||
|
@ -271,7 +271,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printk("%s: DRIVE_CMD (null)\n", drive->name);
|
printk("%s: DRIVE_CMD (null)\n", drive->name);
|
||||||
#endif
|
#endif
|
||||||
rq->errors = 0;
|
scsi_req(rq)->result = 0;
|
||||||
ide_complete_rq(drive, 0, blk_rq_bytes(rq));
|
ide_complete_rq(drive, 0, blk_rq_bytes(rq));
|
||||||
|
|
||||||
return ide_stopped;
|
return ide_stopped;
|
||||||
|
|
|
@ -128,7 +128,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
|
||||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||||
scsi_req_init(rq);
|
scsi_req_init(rq);
|
||||||
ide_req(rq)->type = ATA_PRIV_TASKFILE;
|
ide_req(rq)->type = ATA_PRIV_TASKFILE;
|
||||||
err = blk_execute_rq(drive->queue, NULL, rq, 0);
|
blk_execute_rq(drive->queue, NULL, rq, 0);
|
||||||
|
err = scsi_req(rq)->result ? -EIO : 0;
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
|
@ -227,8 +228,8 @@ static int generic_drive_reset(ide_drive_t *drive)
|
||||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||||
scsi_req(rq)->cmd_len = 1;
|
scsi_req(rq)->cmd_len = 1;
|
||||||
scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
|
scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
|
||||||
if (blk_execute_rq(drive->queue, NULL, rq, 1))
|
blk_execute_rq(drive->queue, NULL, rq, 1);
|
||||||
ret = rq->errors;
|
ret = scsi_req(rq)->result;
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
|
||||||
scsi_req(rq)->cmd_len = 1;
|
scsi_req(rq)->cmd_len = 1;
|
||||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||||
rq->special = &timeout;
|
rq->special = &timeout;
|
||||||
rc = blk_execute_rq(q, NULL, rq, 1);
|
blk_execute_rq(q, NULL, rq, 1);
|
||||||
|
rc = scsi_req(rq)->result ? -EIO : 0;
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
if (rc)
|
if (rc)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
|
@ -27,7 +27,8 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
|
||||||
mesg.event = PM_EVENT_FREEZE;
|
mesg.event = PM_EVENT_FREEZE;
|
||||||
rqpm.pm_state = mesg.event;
|
rqpm.pm_state = mesg.event;
|
||||||
|
|
||||||
ret = blk_execute_rq(drive->queue, NULL, rq, 0);
|
blk_execute_rq(drive->queue, NULL, rq, 0);
|
||||||
|
ret = scsi_req(rq)->result ? -EIO : 0;
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
|
|
||||||
if (ret == 0 && ide_port_acpi(hwif)) {
|
if (ret == 0 && ide_port_acpi(hwif)) {
|
||||||
|
@ -55,8 +56,8 @@ static int ide_pm_execute_rq(struct request *rq)
|
||||||
spin_lock_irq(q->queue_lock);
|
spin_lock_irq(q->queue_lock);
|
||||||
if (unlikely(blk_queue_dying(q))) {
|
if (unlikely(blk_queue_dying(q))) {
|
||||||
rq->rq_flags |= RQF_QUIET;
|
rq->rq_flags |= RQF_QUIET;
|
||||||
rq->errors = -ENXIO;
|
scsi_req(rq)->result = -ENXIO;
|
||||||
__blk_end_request_all(rq, rq->errors);
|
__blk_end_request_all(rq, 0);
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
@ -66,7 +67,7 @@ static int ide_pm_execute_rq(struct request *rq)
|
||||||
|
|
||||||
wait_for_completion_io(&wait);
|
wait_for_completion_io(&wait);
|
||||||
|
|
||||||
return rq->errors ? -EIO : 0;
|
return scsi_req(rq)->result ? -EIO : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int generic_ide_resume(struct device *dev)
|
int generic_ide_resume(struct device *dev)
|
||||||
|
|
|
@ -366,7 +366,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
|
||||||
err = pc->error;
|
err = pc->error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rq->errors = err;
|
scsi_req(rq)->result = err;
|
||||||
|
|
||||||
return uptodate;
|
return uptodate;
|
||||||
}
|
}
|
||||||
|
@ -879,7 +879,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
|
||||||
tape->valid = 0;
|
tape->valid = 0;
|
||||||
|
|
||||||
ret = size;
|
ret = size;
|
||||||
if (rq->errors == IDE_DRV_ERROR_GENERAL)
|
if (scsi_req(rq)->result == IDE_DRV_ERROR_GENERAL)
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
out_put:
|
out_put:
|
||||||
blk_put_request(rq);
|
blk_put_request(rq);
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue