mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-20 05:31:15 +00:00
pnfs-obj: osd raid engine read/write implementation
With the use of the in-kernel osd library. Implement read/write of data from/to osd-objects according to information specified in the objects-layout. Support for stripping over mirrors with a received stripe_unit. There are however a few constrains which are not supported: 1. Stripe Unit must be a multiple of PAGE_SIZE 2. stripe length (stripe_unit * number_of_stripes) can not be bigger then 32bit. Also support raid-groups and partial-layout. Partial-layout is when not all the groups are received on the line, addressing only a partial range of the file. TODO: Only raid0! raid 4/5/6 support will come at later stage A none supported layout will send IO through the MDS [Important fallout from the last rebase] Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> [gfp_flags] Signed-off-by: Benny Halevy <bhalevy@panasas.com>
This commit is contained in:
parent
d20581aa4b
commit
04f8345038
3 changed files with 901 additions and 0 deletions
|
@ -128,6 +128,260 @@ objlayout_free_lseg(struct pnfs_layout_segment *lseg)
|
|||
objio_free_lseg(lseg);
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O Operations
|
||||
*/
|
||||
static inline u64
|
||||
end_offset(u64 start, u64 len)
|
||||
{
|
||||
u64 end;
|
||||
|
||||
end = start + len;
|
||||
return end >= start ? end : NFS4_MAX_UINT64;
|
||||
}
|
||||
|
||||
/* last octet in a range */
|
||||
static inline u64
|
||||
last_byte_offset(u64 start, u64 len)
|
||||
{
|
||||
u64 end;
|
||||
|
||||
BUG_ON(!len);
|
||||
end = start + len;
|
||||
return end > start ? end - 1 : NFS4_MAX_UINT64;
|
||||
}
|
||||
|
||||
static struct objlayout_io_state *
|
||||
objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
|
||||
struct page **pages,
|
||||
unsigned pgbase,
|
||||
loff_t offset,
|
||||
size_t count,
|
||||
struct pnfs_layout_segment *lseg,
|
||||
void *rpcdata,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct objlayout_io_state *state;
|
||||
u64 lseg_end_offset;
|
||||
|
||||
dprintk("%s: allocating io_state\n", __func__);
|
||||
if (objio_alloc_io_state(lseg, &state, gfp_flags))
|
||||
return NULL;
|
||||
|
||||
BUG_ON(offset < lseg->pls_range.offset);
|
||||
lseg_end_offset = end_offset(lseg->pls_range.offset,
|
||||
lseg->pls_range.length);
|
||||
BUG_ON(offset >= lseg_end_offset);
|
||||
if (offset + count > lseg_end_offset) {
|
||||
count = lseg->pls_range.length -
|
||||
(offset - lseg->pls_range.offset);
|
||||
dprintk("%s: truncated count %Zd\n", __func__, count);
|
||||
}
|
||||
|
||||
if (pgbase > PAGE_SIZE) {
|
||||
pages += pgbase >> PAGE_SHIFT;
|
||||
pgbase &= ~PAGE_MASK;
|
||||
}
|
||||
|
||||
state->lseg = lseg;
|
||||
state->rpcdata = rpcdata;
|
||||
state->pages = pages;
|
||||
state->pgbase = pgbase;
|
||||
state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
state->offset = offset;
|
||||
state->count = count;
|
||||
state->sync = 0;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
static void
|
||||
objlayout_free_io_state(struct objlayout_io_state *state)
|
||||
{
|
||||
dprintk("%s: freeing io_state\n", __func__);
|
||||
if (unlikely(!state))
|
||||
return;
|
||||
|
||||
objio_free_io_state(state);
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O done common code
|
||||
*/
|
||||
static void
|
||||
objlayout_iodone(struct objlayout_io_state *state)
|
||||
{
|
||||
dprintk("%s: state %p status\n", __func__, state);
|
||||
|
||||
objlayout_free_io_state(state);
|
||||
}
|
||||
|
||||
/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
|
||||
* This is because the osd completion is called with ints-off from
|
||||
* the block layer
|
||||
*/
|
||||
static void _rpc_read_complete(struct work_struct *work)
|
||||
{
|
||||
struct rpc_task *task;
|
||||
struct nfs_read_data *rdata;
|
||||
|
||||
dprintk("%s enter\n", __func__);
|
||||
task = container_of(work, struct rpc_task, u.tk_work);
|
||||
rdata = container_of(task, struct nfs_read_data, task);
|
||||
|
||||
pnfs_ld_read_done(rdata);
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
|
||||
{
|
||||
int eof = state->eof;
|
||||
struct nfs_read_data *rdata;
|
||||
|
||||
state->status = status;
|
||||
dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
|
||||
rdata = state->rpcdata;
|
||||
rdata->task.tk_status = status;
|
||||
if (status >= 0) {
|
||||
rdata->res.count = status;
|
||||
rdata->res.eof = eof;
|
||||
}
|
||||
objlayout_iodone(state);
|
||||
/* must not use state after this point */
|
||||
|
||||
if (sync)
|
||||
pnfs_ld_read_done(rdata);
|
||||
else {
|
||||
INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
|
||||
schedule_work(&rdata->task.u.tk_work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform sync or async reads.
|
||||
*/
|
||||
enum pnfs_try_status
|
||||
objlayout_read_pagelist(struct nfs_read_data *rdata)
|
||||
{
|
||||
loff_t offset = rdata->args.offset;
|
||||
size_t count = rdata->args.count;
|
||||
struct objlayout_io_state *state;
|
||||
ssize_t status = 0;
|
||||
loff_t eof;
|
||||
|
||||
dprintk("%s: Begin inode %p offset %llu count %d\n",
|
||||
__func__, rdata->inode, offset, (int)count);
|
||||
|
||||
eof = i_size_read(rdata->inode);
|
||||
if (unlikely(offset + count > eof)) {
|
||||
if (offset >= eof) {
|
||||
status = 0;
|
||||
rdata->res.count = 0;
|
||||
rdata->res.eof = 1;
|
||||
goto out;
|
||||
}
|
||||
count = eof - offset;
|
||||
}
|
||||
|
||||
state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
|
||||
rdata->args.pages, rdata->args.pgbase,
|
||||
offset, count,
|
||||
rdata->lseg, rdata,
|
||||
GFP_KERNEL);
|
||||
if (unlikely(!state)) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
state->eof = state->offset + state->count >= eof;
|
||||
|
||||
status = objio_read_pagelist(state);
|
||||
out:
|
||||
dprintk("%s: Return status %Zd\n", __func__, status);
|
||||
rdata->pnfs_error = status;
|
||||
return PNFS_ATTEMPTED;
|
||||
}
|
||||
|
||||
/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
|
||||
* This is because the osd completion is called with ints-off from
|
||||
* the block layer
|
||||
*/
|
||||
static void _rpc_write_complete(struct work_struct *work)
|
||||
{
|
||||
struct rpc_task *task;
|
||||
struct nfs_write_data *wdata;
|
||||
|
||||
dprintk("%s enter\n", __func__);
|
||||
task = container_of(work, struct rpc_task, u.tk_work);
|
||||
wdata = container_of(task, struct nfs_write_data, task);
|
||||
|
||||
pnfs_ld_write_done(wdata);
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
|
||||
bool sync)
|
||||
{
|
||||
struct nfs_write_data *wdata;
|
||||
|
||||
dprintk("%s: Begin\n", __func__);
|
||||
wdata = state->rpcdata;
|
||||
state->status = status;
|
||||
wdata->task.tk_status = status;
|
||||
if (status >= 0) {
|
||||
wdata->res.count = status;
|
||||
wdata->verf.committed = state->committed;
|
||||
dprintk("%s: Return status %d committed %d\n",
|
||||
__func__, wdata->task.tk_status,
|
||||
wdata->verf.committed);
|
||||
} else
|
||||
dprintk("%s: Return status %d\n",
|
||||
__func__, wdata->task.tk_status);
|
||||
objlayout_iodone(state);
|
||||
/* must not use state after this point */
|
||||
|
||||
if (sync)
|
||||
pnfs_ld_write_done(wdata);
|
||||
else {
|
||||
INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
|
||||
schedule_work(&wdata->task.u.tk_work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform sync or async writes.
|
||||
*/
|
||||
enum pnfs_try_status
|
||||
objlayout_write_pagelist(struct nfs_write_data *wdata,
|
||||
int how)
|
||||
{
|
||||
struct objlayout_io_state *state;
|
||||
ssize_t status;
|
||||
|
||||
dprintk("%s: Begin inode %p offset %llu count %u\n",
|
||||
__func__, wdata->inode, wdata->args.offset, wdata->args.count);
|
||||
|
||||
state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
|
||||
wdata->args.pages,
|
||||
wdata->args.pgbase,
|
||||
wdata->args.offset,
|
||||
wdata->args.count,
|
||||
wdata->lseg, wdata,
|
||||
GFP_NOFS);
|
||||
if (unlikely(!state)) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
state->sync = how & FLUSH_SYNC;
|
||||
|
||||
status = objio_write_pagelist(state, how & FLUSH_STABLE);
|
||||
out:
|
||||
dprintk("%s: Return status %Zd\n", __func__, status);
|
||||
wdata->pnfs_error = status;
|
||||
return PNFS_ATTEMPTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get Device Info API for io engines
|
||||
*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue