staging/lustre/clio: cl_lock simplification

In this patch, the cl_lock cache is eliminated. cl_lock is turned into a cacheless data container for the requirements of locks to complete the IO. cl_lock is created before I/O starts and destroyed when the I/O is complete. cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached to cl_lock at OSC layer. LDLM lock is still cacheable. Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue() methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock consists of multiple sub cl_locks, each sub locks will be enqueued correspondingly. At OSC layer, the lock enqueue request will tend to reuse cached LDLM lock; otherwise a new LDLM lock will have to be requested from OST side. cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel() method will be called for each layer to release the resource held by this lock. At OSC layer, the reference count of LDLM lock, which is held at clo_enqueue time, is released. LDLM lock can only be canceled if there is no cl_lock using it. Signed-off-by: Bobi Jam <bobijam.xu@intel.com> Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com> Reviewed-on: http://review.whamcloud.com/10858 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3259 Reviewed-by: John L. Hammond <john.hammond@intel.com> Signed-off-by: Oleg Drokin <green@linuxhacker.ru> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2025-06-26 16:41:25 +00:00 · 2016-03-30 19:48:40 -04:00 · 2016-03-30 19:48:40 -04:00 · 06563b5606
commit 06563b5606
parent e5c4e635c3
33 changed files with 1192 additions and 5549 deletions
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@ -82,7 +82,6 @@
 *  - i_mutex
 *      - PG_locked
 *	  - cl_object_header::coh_page_guard
- *	  - cl_object_header::coh_lock_guard
 *	  - lu_site::ls_guard
 *
 * See the top comment in cl_object.c for the description of overall locking and
@ -404,16 +403,6 @@ struct cl_object_header {
 	 * here.
 	 */
 	struct lu_object_header  coh_lu;
-	/** \name locks
-	 * \todo XXX move locks below to the separate cache-lines, they are
-	 * mostly useless otherwise.
-	 */
-	/** @{ */
-	/** Lock protecting lock list. */
-	spinlock_t		 coh_lock_guard;
-	/** @} locks */
-	/** List of cl_lock's granted for this object. */
-	struct list_head	       coh_locks;

 	/**
 	 * Parent object. It is assumed that an object has a well-defined
@ -795,16 +784,9 @@ struct cl_page_slice {
 /**
 * Lock mode. For the client extent locks.
 *
- * \warning: cl_lock_mode_match() assumes particular ordering here.
 * \ingroup cl_lock
 */
 enum cl_lock_mode {
-	/**
-	 * Mode of a lock that protects no data, and exists only as a
-	 * placeholder. This is used for `glimpse' requests. A phantom lock
-	 * might get promoted to real lock at some point.
-	 */
-	CLM_PHANTOM,
 	CLM_READ,
 	CLM_WRITE,
 	CLM_GROUP
@ -1114,12 +1096,6 @@ static inline struct page *cl_page_vmpage(struct cl_page *page)
 * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
 * cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
 *
- * All locks for a given object are linked into cl_object_header::coh_locks
- * list (protected by cl_object_header::coh_lock_guard spin-lock) through
- * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can
- * sort it in starting lock offset, or use altogether different data structure
- * like a tree.
- *
 * Typical cl_lock consists of the two layers:
 *
 *     - vvp_lock (vvp specific data), and
@ -1320,289 +1296,21 @@ struct cl_lock_descr {
 	__u32	     cld_enq_flags;
 };

-#define DDESCR "%s(%d):[%lu, %lu]"
+#define DDESCR "%s(%d):[%lu, %lu]:%x"
 #define PDESCR(descr)						   \
 	cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode,	\
-	(descr)->cld_start, (descr)->cld_end
+	(descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags

 const char *cl_lock_mode_name(const enum cl_lock_mode mode);

-/**
- * Lock state-machine states.
- *
- * \htmlonly
- * <pre>
- *
- * Possible state transitions:
- *
- *	      +------------------>NEW
- *	      |		    |
- *	      |		    | cl_enqueue_try()
- *	      |		    |
- *	      |    cl_unuse_try()  V
- *	      |  +--------------QUEUING (*)
- *	      |  |		 |
- *	      |  |		 | cl_enqueue_try()
- *	      |  |		 |
- *	      |  | cl_unuse_try()  V
- *    sub-lock  |  +-------------ENQUEUED (*)
- *    canceled  |  |		 |
- *	      |  |		 | cl_wait_try()
- *	      |  |		 |
- *	      |  |		(R)
- *	      |  |		 |
- *	      |  |		 V
- *	      |  |		HELD<---------+
- *	      |  |		 |	    |
- *	      |  |		 |	    | cl_use_try()
- *	      |  |  cl_unuse_try() |	    |
- *	      |  |		 |	    |
- *	      |  |		 V	 ---+
- *	      |  +------------>INTRANSIT (D) <--+
- *	      |		    |	    |
- *	      |     cl_unuse_try() |	    | cached lock found
- *	      |		    |	    | cl_use_try()
- *	      |		    |	    |
- *	      |		    V	    |
- *	      +------------------CACHED---------+
- *				   |
- *				  (C)
- *				   |
- *				   V
- *				FREEING
- *
- * Legend:
- *
- *	 In states marked with (*) transition to the same state (i.e., a loop
- *	 in the diagram) is possible.
- *
- *	 (R) is the point where Receive call-back is invoked: it allows layers
- *	 to handle arrival of lock reply.
- *
- *	 (C) is the point where Cancellation call-back is invoked.
- *
- *	 (D) is the transit state which means the lock is changing.
- *
- *	 Transition to FREEING state is possible from any other state in the
- *	 diagram in case of unrecoverable error.
- * </pre>
- * \endhtmlonly
- *
- * These states are for individual cl_lock object. Top-lock and its sub-locks
- * can be in the different states. Another way to say this is that we have
- * nested state-machines.
- *
- * Separate QUEUING and ENQUEUED states are needed to support non-blocking
- * operation for locks with multiple sub-locks. Imagine lock on a file F, that
- * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send
- * enqueue to S0, wait for its completion, then send enqueue for S1, wait for
- * its completion and at last enqueue lock for S2, and wait for its
- * completion. In that case, top-lock is in QUEUING state while S0, S1 are
- * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note
- * that in this case, sub-locks move from state to state, and top-lock remains
- * in the same state).
- */
-enum cl_lock_state {
-	/**
-	 * Lock that wasn't yet enqueued
-	 */
-	CLS_NEW,
-	/**
-	 * Enqueue is in progress, blocking for some intermediate interaction
-	 * with the other side.
-	 */
-	CLS_QUEUING,
-	/**
-	 * Lock is fully enqueued, waiting for server to reply when it is
-	 * granted.
-	 */
-	CLS_ENQUEUED,
-	/**
-	 * Lock granted, actively used by some IO.
-	 */
-	CLS_HELD,
-	/**
-	 * This state is used to mark the lock is being used, or unused.
-	 * We need this state because the lock may have several sublocks,
-	 * so it's impossible to have an atomic way to bring all sublocks
-	 * into CLS_HELD state at use case, or all sublocks to CLS_CACHED
-	 * at unuse case.
-	 * If a thread is referring to a lock, and it sees the lock is in this
-	 * state, it must wait for the lock.
-	 * See state diagram for details.
-	 */
-	CLS_INTRANSIT,
-	/**
-	 * Lock granted, not used.
-	 */
-	CLS_CACHED,
-	/**
-	 * Lock is being destroyed.
-	 */
-	CLS_FREEING,
-	CLS_NR
-};
-
-enum cl_lock_flags {
-	/**
-	 * lock has been cancelled. This flag is never cleared once set (by
-	 * cl_lock_cancel0()).
-	 */
-	CLF_CANCELLED	= 1 << 0,
-	/** cancellation is pending for this lock. */
-	CLF_CANCELPEND	= 1 << 1,
-	/** destruction is pending for this lock. */
-	CLF_DOOMED	= 1 << 2,
-	/** from enqueue RPC reply upcall. */
-	CLF_FROM_UPCALL	= 1 << 3,
-};
-
-/**
- * Lock closure.
- *
- * Lock closure is a collection of locks (both top-locks and sub-locks) that
- * might be updated in a result of an operation on a certain lock (which lock
- * this is a closure of).
- *
- * Closures are needed to guarantee dead-lock freedom in the presence of
- *
- *     - nested state-machines (top-lock state-machine composed of sub-lock
- *       state-machines), and
- *
- *     - shared sub-locks.
- *
- * Specifically, many operations, such as lock enqueue, wait, unlock,
- * etc. start from a top-lock, and then operate on a sub-locks of this
- * top-lock, holding a top-lock mutex. When sub-lock state changes as a result
- * of such operation, this change has to be propagated to all top-locks that
- * share this sub-lock. Obviously, no natural lock ordering (e.g.,
- * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has
- * to be used. Lock closure systematizes this try-and-repeat logic.
- */
-struct cl_lock_closure {
-	/**
-	 * Lock that is mutexed when closure construction is started. When
-	 * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on
-	 * origin is released before waiting.
-	 */
-	struct cl_lock   *clc_origin;
-	/**
-	 * List of enclosed locks, so far. Locks are linked here through
-	 * cl_lock::cll_inclosure.
-	 */
-	struct list_head	clc_list;
-	/**
-	 * True iff closure is in a `wait' mode. This determines what
-	 * cl_lock_enclosure() does when a lock L to be added to the closure
-	 * is currently mutexed by some other thread.
-	 *
-	 * If cl_lock_closure::clc_wait is not set, then closure construction
-	 * fails with CLO_REPEAT immediately.
-	 *
-	 * In wait mode, cl_lock_enclosure() waits until next attempt to build
-	 * a closure might succeed. To this end it releases an origin mutex
-	 * (cl_lock_closure::clc_origin), that has to be the only lock mutex
-	 * owned by the current thread, and then waits on L mutex (by grabbing
-	 * it and immediately releasing), before returning CLO_REPEAT to the
-	 * caller.
-	 */
-	int	       clc_wait;
-	/** Number of locks in the closure. */
-	int	       clc_nr;
-};
-
 /**
 * Layered client lock.
 */
 struct cl_lock {
-	/** Reference counter. */
-	atomic_t	  cll_ref;
 	/** List of slices. Immutable after creation. */
 	struct list_head	    cll_layers;
-	/**
-	 * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected
-	 * by cl_lock::cll_descr::cld_obj::coh_lock_guard.
-	 */
-	struct list_head	    cll_linkage;
-	/**
-	 * Parameters of this lock. Protected by
-	 * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within
-	 * cl_lock::cll_guard. Modified only on lock creation and in
-	 * cl_lock_modify().
-	 */
+	/** lock attribute, extent, cl_object, etc. */
 	struct cl_lock_descr  cll_descr;
-	/** Protected by cl_lock::cll_guard. */
-	enum cl_lock_state    cll_state;
-	/** signals state changes. */
-	wait_queue_head_t	   cll_wq;
-	/**
-	 * Recursive lock, most fields in cl_lock{} are protected by this.
-	 *
-	 * Locking rules: this mutex is never held across network
-	 * communication, except when lock is being canceled.
-	 *
-	 * Lock ordering: a mutex of a sub-lock is taken first, then a mutex
-	 * on a top-lock. Other direction is implemented through a
-	 * try-lock-repeat loop. Mutices of unrelated locks can be taken only
-	 * by try-locking.
-	 *
-	 * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
-	 */
-	struct mutex		cll_guard;
-	struct task_struct	*cll_guarder;
-	int		   cll_depth;
-
-	/**
-	 * the owner for INTRANSIT state
-	 */
-	struct task_struct	*cll_intransit_owner;
-	int		   cll_error;
-	/**
-	 * Number of holds on a lock. A hold prevents a lock from being
-	 * canceled and destroyed. Protected by cl_lock::cll_guard.
-	 *
-	 * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release()
-	 */
-	int		   cll_holds;
-	 /**
-	  * Number of lock users. Valid in cl_lock_state::CLS_HELD state
-	  * only. Lock user pins lock in CLS_HELD state. Protected by
-	  * cl_lock::cll_guard.
-	  *
-	  * \see cl_wait(), cl_unuse().
-	  */
-	int		   cll_users;
-	/**
-	 * Flag bit-mask. Values from enum cl_lock_flags. Updates are
-	 * protected by cl_lock::cll_guard.
-	 */
-	unsigned long	 cll_flags;
-	/**
-	 * A linkage into a list of locks in a closure.
-	 *
-	 * \see cl_lock_closure
-	 */
-	struct list_head	    cll_inclosure;
-	/**
-	 * Confict lock at queuing time.
-	 */
-	struct cl_lock       *cll_conflict;
-	/**
-	 * A list of references to this lock, for debugging.
-	 */
-	struct lu_ref	 cll_reference;
-	/**
-	 * A list of holds on this lock, for debugging.
-	 */
-	struct lu_ref	 cll_holders;
-	/**
-	 * A reference for cl_lock::cll_descr::cld_obj. For debugging.
-	 */
-	struct lu_ref_link    cll_obj_ref;
-#ifdef CONFIG_LOCKDEP
-	/* "dep_map" name is assumed by lockdep.h macros. */
-	struct lockdep_map    dep_map;
-#endif
 };

 /**
@ -1621,171 +1329,33 @@ struct cl_lock_slice {
 	struct list_head		       cls_linkage;
 };

-/**
- * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}().
- *
- * NOTE: lov_subresult() depends on ordering here.
- */
-enum cl_lock_transition {
-	/** operation cannot be completed immediately. Wait for state change. */
-	CLO_WAIT	= 1,
-	/** operation had to release lock mutex, restart. */
-	CLO_REPEAT      = 2,
-	/** lower layer re-enqueued. */
-	CLO_REENQUEUED  = 3,
-};
-
 /**
 *
 * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
 */
 struct cl_lock_operations {
-	/**
-	 * \name statemachine
-	 *
-	 * State machine transitions. These 3 methods are called to transfer
-	 * lock from one state to another, as described in the commentary
-	 * above enum #cl_lock_state.
-	 *
-	 * \retval 0	  this layer has nothing more to do to before
-	 *		       transition to the target state happens;
-	 *
-	 * \retval CLO_REPEAT method had to release and re-acquire cl_lock
-	 *		    mutex, repeat invocation of transition method
-	 *		    across all layers;
-	 *
-	 * \retval CLO_WAIT   this layer cannot move to the target state
-	 *		    immediately, as it has to wait for certain event
-	 *		    (e.g., the communication with the server). It
-	 *		    is guaranteed, that when the state transfer
-	 *		    becomes possible, cl_lock::cll_wq wait-queue
-	 *		    is signaled. Caller can wait for this event by
-	 *		    calling cl_lock_state_wait();
-	 *
-	 * \retval -ve	failure, abort state transition, move the lock
-	 *		    into cl_lock_state::CLS_FREEING state, and set
-	 *		    cl_lock::cll_error.
-	 *
-	 * Once all layers voted to agree to transition (by returning 0), lock
-	 * is moved into corresponding target state. All state transition
-	 * methods are optional.
-	 */
 	/** @{ */
 	/**
 	 * Attempts to enqueue the lock. Called top-to-bottom.
 	 *
+	 * \retval 0	this layer has enqueued the lock successfully
+	 * \retval >0	this layer has enqueued the lock, but need to wait on
+	 *		@anchor for resources
+	 * \retval -ve	failure
+	 *
 	 * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
 	 * \see osc_lock_enqueue()
 	 */
 	int  (*clo_enqueue)(const struct lu_env *env,
 			    const struct cl_lock_slice *slice,
-			    struct cl_io *io, __u32 enqflags);
+			    struct cl_io *io, struct cl_sync_io *anchor);
 	/**
-	 * Attempts to wait for enqueue result. Called top-to-bottom.
-	 *
-	 * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait()
-	 */
-	int  (*clo_wait)(const struct lu_env *env,
-			 const struct cl_lock_slice *slice);
-	/**
-	 * Attempts to unlock the lock. Called bottom-to-top. In addition to
-	 * usual return values of lock state-machine methods, this can return
-	 * -ESTALE to indicate that lock cannot be returned to the cache, and
-	 * has to be re-initialized.
-	 * unuse is a one-shot operation, so it must NOT return CLO_WAIT.
-	 *
-	 * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse()
-	 */
-	int  (*clo_unuse)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice);
-	/**
-	 * Notifies layer that cached lock is started being used.
-	 *
-	 * \pre lock->cll_state == CLS_CACHED
-	 *
-	 * \see lov_lock_use(), osc_lock_use()
-	 */
-	int  (*clo_use)(const struct lu_env *env,
-			const struct cl_lock_slice *slice);
-	/** @} statemachine */
-	/**
-	 * A method invoked when lock state is changed (as a result of state
-	 * transition). This is used, for example, to track when the state of
-	 * a sub-lock changes, to propagate this change to the corresponding
-	 * top-lock. Optional
-	 *
-	 * \see lovsub_lock_state()
-	 */
-	void (*clo_state)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice,
-			  enum cl_lock_state st);
-	/**
-	 * Returns true, iff given lock is suitable for the given io, idea
-	 * being, that there are certain "unsafe" locks, e.g., ones acquired
-	 * for O_APPEND writes, that we don't want to re-use for a normal
-	 * write, to avoid the danger of cascading evictions. Optional. Runs
-	 * under cl_object_header::coh_lock_guard.
-	 *
-	 * XXX this should take more information about lock needed by
-	 * io. Probably lock description or something similar.
-	 *
-	 * \see lov_fits_into()
-	 */
-	int (*clo_fits_into)(const struct lu_env *env,
-			     const struct cl_lock_slice *slice,
-			     const struct cl_lock_descr *need,
-			     const struct cl_io *io);
-	/**
-	 * \name ast
-	 * Asynchronous System Traps. All of then are optional, all are
-	 * executed bottom-to-top.
-	 */
-	/** @{ */
-
-	/**
-	 * Cancellation callback. Cancel a lock voluntarily, or under
-	 * the request of server.
+	 * Cancel a lock, release its DLM lock ref, while does not cancel the
+	 * DLM lock
 	 */
 	void (*clo_cancel)(const struct lu_env *env,
 			   const struct cl_lock_slice *slice);
-	/**
-	 * Lock weighting ast. Executed to estimate how precious this lock
-	 * is. The sum of results across all layers is used to determine
-	 * whether lock worth keeping in cache given present memory usage.
-	 *
-	 * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh().
-	 */
-	unsigned long (*clo_weigh)(const struct lu_env *env,
-				   const struct cl_lock_slice *slice);
-	/** @} ast */
-
-	/**
-	 * \see lovsub_lock_closure()
-	 */
-	int (*clo_closure)(const struct lu_env *env,
-			   const struct cl_lock_slice *slice,
-			   struct cl_lock_closure *closure);
-	/**
-	 * Executed bottom-to-top when lock description changes (e.g., as a
-	 * result of server granting more generous lock than was requested).
-	 *
-	 * \see lovsub_lock_modify()
-	 */
-	int (*clo_modify)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice,
-			  const struct cl_lock_descr *updated);
-	/**
-	 * Notifies layers (bottom-to-top) that lock is going to be
-	 * destroyed. Responsibility of layers is to prevent new references on
-	 * this lock from being acquired once this method returns.
-	 *
-	 * This can be called multiple times due to the races.
-	 *
-	 * \see cl_lock_delete()
-	 * \see osc_lock_delete(), lovsub_lock_delete()
-	 */
-	void (*clo_delete)(const struct lu_env *env,
-			   const struct cl_lock_slice *slice);
+	/** @} */
 	/**
 	 * Destructor. Frees resources and the slice.
 	 *
@ -2164,10 +1734,14 @@ enum cl_enq_flags {
 	 * for async glimpse lock.
 	 */
 	CEF_AGL	  = 0x00000020,
+	/**
+	 * enqueue a lock to test DLM lock existence.
+	 */
+	CEF_PEEK	= 0x00000040,
 	/**
 	 * mask of enq_flags.
 	 */
-	CEF_MASK	 = 0x0000003f,
+	CEF_MASK         = 0x0000007f,
 };

 /**
@ -2177,12 +1751,12 @@ enum cl_enq_flags {
 struct cl_io_lock_link {
 	/** linkage into one of cl_lockset lists. */
 	struct list_head	   cill_linkage;
-	struct cl_lock_descr cill_descr;
-	struct cl_lock      *cill_lock;
+	struct cl_lock          cill_lock;
 	/** optional destructor */
 	void	       (*cill_fini)(const struct lu_env *env,
 				    struct cl_io_lock_link *link);
 };
+#define cill_descr	cill_lock.cll_descr

 /**
 * Lock-set represents a collection of locks, that io needs at a
@ -2216,8 +1790,6 @@ struct cl_io_lock_link {
 struct cl_lockset {
 	/** locks to be acquired. */
 	struct list_head  cls_todo;
-	/** locks currently being processed. */
-	struct list_head  cls_curr;
 	/** locks acquired. */
 	struct list_head  cls_done;
 };
@ -2581,9 +2153,7 @@ struct cl_site {
 	 * and top-locks (and top-pages) are accounted here.
 	 */
 	struct cache_stats    cs_pages;
-	struct cache_stats    cs_locks;
 	atomic_t	  cs_pages_state[CPS_NR];
-	atomic_t	  cs_locks_state[CLS_NR];
 };

 int  cl_site_init(struct cl_site *s, struct cl_device *top);
@ -2707,7 +2277,7 @@ int  cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
 		       struct ost_lvb *lvb);
 int  cl_conf_set(const struct lu_env *env, struct cl_object *obj,
 		 const struct cl_object_conf *conf);
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj);
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj);

 /**
@ -2845,121 +2415,17 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie,
 * @{
 */

-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source);
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source);
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
-				const struct cl_lock_descr *need,
-				const char *scope, const void *source);
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
-				 struct cl_object *obj, pgoff_t index,
-				 struct cl_lock *except, int pending,
-				 int canceld);
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock);
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+		 const struct cl_io *io);
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock);
 const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
 				       const struct lu_device_type *dtype);
-
-void cl_lock_get(struct cl_lock *lock);
-void cl_lock_get_trust(struct cl_lock *lock);
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
-		      const char *scope, const void *source);
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
-			  const char *scope, const void *source);
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
-		    const char *scope, const void *source);
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
-		     const char *scope, const void *source);
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock);
-
-int cl_lock_is_intransit(struct cl_lock *lock);
-
-int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
-			 int keep_mutex);
-
-/** \name statemachine statemachine
- * Interface to lock state machine consists of 3 parts:
- *
- *     - "try" functions that attempt to effect a state transition. If state
- *     transition is not possible right now (e.g., if it has to wait for some
- *     asynchronous event to occur), these functions return
- *     cl_lock_transition::CLO_WAIT.
- *
- *     - "non-try" functions that implement synchronous blocking interface on
- *     top of non-blocking "try" functions. These functions repeatedly call
- *     corresponding "try" versions, and if state transition is not possible
- *     immediately, wait for lock state change.
- *
- *     - methods from cl_lock_operations, called by "try" functions. Lock can
- *     be advanced to the target state only when all layers voted that they
- *     are ready for this transition. "Try" functions call methods under lock
- *     mutex. If a layer had to release a mutex, it re-acquires it and returns
- *     cl_lock_transition::CLO_REPEAT, causing "try" function to call all
- *     layers again.
- *
- * TRY	      NON-TRY      METHOD			    FINAL STATE
- *
- * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED
- *
- * cl_wait_try()    cl_wait()    cl_lock_operations::clo_wait()    CLS_HELD
- *
- * cl_unuse_try()   cl_unuse()   cl_lock_operations::clo_unuse()   CLS_CACHED
- *
- * cl_use_try()     NONE	 cl_lock_operations::clo_use()     CLS_HELD
- *
- * @{
- */
-
-int cl_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock);
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
-		   struct cl_io *io, __u32 flags);
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic);
-
-/** @} statemachine */
-
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
-		       enum cl_lock_state state);
-int cl_queue_match(const struct list_head *queue,
-		   const struct cl_lock_descr *need);
-
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_is_mutexed(struct cl_lock *lock);
-int cl_lock_nr_mutexed(const struct lu_env *env);
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_ext_match(const struct cl_lock_descr *has,
-		      const struct cl_lock_descr *need);
-int cl_lock_descr_match(const struct cl_lock_descr *has,
-			const struct cl_lock_descr *need);
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need);
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
-		   const struct cl_lock_descr *desc);
-
-void cl_lock_closure_init(const struct lu_env *env,
-			  struct cl_lock_closure *closure,
-			  struct cl_lock *origin, int wait);
-void cl_lock_closure_fini(struct cl_lock_closure *closure);
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
-			  struct cl_lock_closure *closure);
-void cl_lock_disclosure(const struct lu_env *env,
-			struct cl_lock_closure *closure);
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
-		      struct cl_lock_closure *closure);
-
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock, struct cl_sync_io *anchor);
 void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error);
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
-
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);

 /** @} cl_lock */

--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ b/drivers/staging/lustre/lustre/include/lclient.h
@ -98,10 +98,6 @@ struct ccc_io {
 			int cui_to;
 		} write;
 	} u;
-	/**
-	 * True iff io is processing glimpse right now.
-	 */
-	int		  cui_glimpse;
 	/**
 	 * Layout version when this IO is initialized
 	 */
@ -123,6 +119,7 @@ extern struct lu_context_key ccc_key;
 extern struct lu_context_key ccc_session_key;

 struct ccc_thread_info {
+	struct cl_lock		cti_lock;
 	struct cl_lock_descr cti_descr;
 	struct cl_io	 cti_io;
 	struct cl_attr       cti_attr;
@ -137,6 +134,14 @@ static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
 	return info;
 }

+static inline struct cl_lock *ccc_env_lock(const struct lu_env *env)
+{
+	struct cl_lock *lock = &ccc_env_info(env)->cti_lock;
+
+	memset(lock, 0, sizeof(*lock));
+	return lock;
+}
+
 static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
 {
 	struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
@ -308,18 +313,7 @@ void ccc_lock_delete(const struct lu_env *env,
 void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice);
 int ccc_lock_enqueue(const struct lu_env *env,
 		     const struct cl_lock_slice *slice,
-		     struct cl_io *io, __u32 enqflags);
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_fits_into(const struct lu_env *env,
-		       const struct cl_lock_slice *slice,
-		       const struct cl_lock_descr *need,
-		       const struct cl_io *io);
-void ccc_lock_state(const struct lu_env *env,
-		    const struct cl_lock_slice *slice,
-		    enum cl_lock_state state);
-
+		     struct cl_io *io, struct cl_sync_io *anchor);
 int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
 			  __u32 enqflags, enum cl_lock_mode mode,
 			  pgoff_t start, pgoff_t end);
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@ -2582,6 +2582,8 @@ struct ldlm_extent {
 	__u64 gid;
 };

+#define LDLM_GID_ANY ((__u64)-1)
+
 static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
 				      struct ldlm_extent *ex2)
 {
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@ -71,6 +71,7 @@ struct obd_device;
 */
 enum ldlm_error {
 	ELDLM_OK = 0,
+	ELDLM_LOCK_MATCHED = 1,

 	ELDLM_LOCK_CHANGED = 300,
 	ELDLM_LOCK_ABORTED = 301,
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@ -748,6 +748,7 @@ int ldlm_error2errno(enum ldlm_error error)

 	switch (error) {
 	case ELDLM_OK:
+	case ELDLM_LOCK_MATCHED:
 		result = 0;
 		break;
 	case ELDLM_LOCK_CHANGED:
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@ -657,7 +657,7 @@ void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
 	struct ldlm_lock *lock;

 	lock = ldlm_handle2lock(lockh);
-	LASSERT(lock);
+	LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie);
 	ldlm_lock_addref_internal(lock, mode);
 	LDLM_LOCK_PUT(lock);
 }
@ -1092,6 +1092,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue,

 		if (unlikely(match == LCK_GROUP) &&
 		    lock->l_resource->lr_type == LDLM_EXTENT &&
+		    policy->l_extent.gid != LDLM_GID_ANY &&
 		    lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
 			continue;

--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@ -347,7 +347,6 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 	struct ldlm_lock *lock;
 	struct ldlm_reply *reply;
 	int cleanup_phase = 1;
-	int size = 0;

 	lock = ldlm_handle2lock(lockh);
 	/* ldlm_cli_enqueue is holding a reference on this lock. */
@ -375,8 +374,8 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		goto cleanup;
 	}

-	if (lvb_len != 0) {
-		LASSERT(lvb);
+	if (lvb_len > 0) {
+		int size = 0;

 		size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
 					    RCL_SERVER);
@ -390,12 +389,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 			rc = -EINVAL;
 			goto cleanup;
 		}
+		lvb_len = size;
 	}

 	if (rc == ELDLM_LOCK_ABORTED) {
-		if (lvb_len != 0)
+		if (lvb_len > 0 && lvb)
 			rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
-					   lvb, size);
+					   lvb, lvb_len);
 		if (rc == 0)
 			rc = ELDLM_LOCK_ABORTED;
 		goto cleanup;
@ -489,7 +489,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 	/* If the lock has already been granted by a completion AST, don't
 	 * clobber the LVB with an older one.
 	 */
-	if (lvb_len != 0) {
+	if (lvb_len > 0) {
 		/* We must lock or a racing completion might update lvb without
 		 * letting us know and we'll clobber the correct value.
 		 * Cannot unlock after the check either, as that still leaves
@ -498,7 +498,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		lock_res_and_lock(lock);
 		if (lock->l_req_mode != lock->l_granted_mode)
 			rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
-					   lock->l_lvb_data, size);
+					   lock->l_lvb_data, lvb_len);
 		unlock_res_and_lock(lock);
 		if (rc < 0) {
 			cleanup_phase = 1;
@ -518,7 +518,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		}
 	}

-	if (lvb_len && lvb) {
+	if (lvb_len > 0 && lvb) {
 		/* Copy the LVB here, and not earlier, because the completion
 		 * AST (if any) can override what we got in the reply
 		 */
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@ -1400,3 +1400,4 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res)
 			LDLM_DEBUG_LIMIT(level, lock, "###");
 	}
 }
+EXPORT_SYMBOL(ldlm_resource_dump);
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@ -86,17 +86,17 @@ blkcnt_t dirty_cnt(struct inode *inode)
 int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
 		    struct inode *inode, struct cl_object *clob, int agl)
 {
-	struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr;
 	struct ll_inode_info *lli   = ll_i2info(inode);
 	const struct lu_fid  *fid   = lu_object_fid(&clob->co_lu);
-	struct ccc_io	*cio   = ccc_env_io(env);
-	struct cl_lock       *lock;
 	int result;

 	result = 0;
 	if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
 		CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
 		if (lli->lli_has_smd) {
+			struct cl_lock *lock = ccc_env_lock(env);
+			struct cl_lock_descr *descr = &lock->cll_descr;
+
 			/* NOTE: this looks like DLM lock request, but it may
 			 *       not be one. Due to CEF_ASYNC flag (translated
 			 *       to LDLM_FL_HAS_INTENT by osc), this is
@ -113,11 +113,10 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
 			 */
 			*descr = whole_file;
 			descr->cld_obj   = clob;
-			descr->cld_mode  = CLM_PHANTOM;
+			descr->cld_mode  = CLM_READ;
 			descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
 			if (agl)
 				descr->cld_enq_flags |= CEF_AGL;
-			cio->cui_glimpse = 1;
 			/*
 			 * CEF_ASYNC is used because glimpse sub-locks cannot
 			 * deadlock (because they never conflict with other
@ -126,19 +125,11 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
 			 * CEF_MUST protects glimpse lock from conversion into
 			 * a lockless mode.
 			 */
-			lock = cl_lock_request(env, io, descr, "glimpse",
-					       current);
-			cio->cui_glimpse = 0;
+			result = cl_lock_request(env, io, lock);
+			if (result < 0)
+				return result;

-			if (!lock)
-				return 0;
-
-			if (IS_ERR(lock))
-				return PTR_ERR(lock);
-
-			LASSERT(agl == 0);
-			result = cl_wait(env, lock);
-			if (result == 0) {
+			if (!agl) {
 				ll_merge_attr(env, inode);
 				if (i_size_read(inode) > 0 &&
 				    inode->i_blocks == 0) {
@ -150,9 +141,8 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
 					 */
 					inode->i_blocks = dirty_cnt(inode);
 				}
-				cl_unuse(env, lock);
 			}
-			cl_lock_release(env, lock, "glimpse", current);
+			cl_lock_release(env, lock);
 		} else {
 			CDEBUG(D_DLMTRACE, "No objects for inode\n");
 			ll_merge_attr(env, inode);
@ -233,10 +223,7 @@ int cl_local_size(struct inode *inode)
 {
 	struct lu_env	   *env = NULL;
 	struct cl_io	    *io  = NULL;
-	struct ccc_thread_info  *cti;
 	struct cl_object	*clob;
-	struct cl_lock_descr    *descr;
-	struct cl_lock	  *lock;
 	int		      result;
 	int		      refcheck;

@ -252,19 +239,15 @@ int cl_local_size(struct inode *inode)
 	if (result > 0) {
 		result = io->ci_result;
 	} else if (result == 0) {
-		cti = ccc_env_info(env);
-		descr = &cti->cti_descr;
+		struct cl_lock *lock = ccc_env_lock(env);

-		*descr = whole_file;
-		descr->cld_obj = clob;
-		lock = cl_lock_peek(env, io, descr, "localsize", current);
-		if (lock) {
+		lock->cll_descr = whole_file;
+		lock->cll_descr.cld_enq_flags = CEF_PEEK;
+		lock->cll_descr.cld_obj = clob;
+		result = cl_lock_request(env, io, lock);
+		if (result == 0) {
 			ll_merge_attr(env, inode);
-			cl_unuse(env, lock);
-			cl_lock_release(env, lock, "localsize", current);
-			result = 0;
-		} else {
-			result = -ENODATA;
+			cl_lock_release(env, lock);
 		}
 	}
 	cl_io_fini(env, io);
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@ -475,12 +475,6 @@ int ccc_transient_page_prep(const struct lu_env *env,
 *
 */

-void ccc_lock_delete(const struct lu_env *env,
-		     const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-}
-
 void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
 {
 	struct ccc_lock *clk = cl2ccc_lock(slice);
@ -490,111 +484,12 @@ void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)

 int ccc_lock_enqueue(const struct lu_env *env,
 		     const struct cl_lock_slice *slice,
-		     struct cl_io *unused, __u32 enqflags)
+		     struct cl_io *unused, struct cl_sync_io *anchor)
 {
 	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
 	return 0;
 }

-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-/**
- * Implementation of cl_lock_operations::clo_fits_into() methods for ccc
- * layer. This function is executed every time io finds an existing lock in
- * the lock cache while creating new lock. This function has to decide whether
- * cached lock "fits" into io.
- *
- * \param slice lock to be checked
- * \param io    IO that wants a lock.
- *
- * \see lov_lock_fits_into().
- */
-int ccc_lock_fits_into(const struct lu_env *env,
-		       const struct cl_lock_slice *slice,
-		       const struct cl_lock_descr *need,
-		       const struct cl_io *io)
-{
-	const struct cl_lock       *lock  = slice->cls_lock;
-	const struct cl_lock_descr *descr = &lock->cll_descr;
-	const struct ccc_io	*cio   = ccc_env_io(env);
-	int			 result;
-
-	/*
-	 * Work around DLM peculiarity: it assumes that glimpse
-	 * (LDLM_FL_HAS_INTENT) lock is always LCK_PR, and returns reads lock
-	 * when asked for LCK_PW lock with LDLM_FL_HAS_INTENT flag set. Make
-	 * sure that glimpse doesn't get CLM_WRITE top-lock, so that it
-	 * doesn't enqueue CLM_WRITE sub-locks.
-	 */
-	if (cio->cui_glimpse)
-		result = descr->cld_mode != CLM_WRITE;
-
-	/*
-	 * Also, don't match incomplete write locks for read, otherwise read
-	 * would enqueue missing sub-locks in the write mode.
-	 */
-	else if (need->cld_mode != descr->cld_mode)
-		result = lock->cll_state >= CLS_ENQUEUED;
-	else
-		result = 1;
-	return result;
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for ccc layer, invoked
- * whenever lock state changes. Transfers object attributes, that might be
- * updated as a result of lock acquiring into inode.
- */
-void ccc_lock_state(const struct lu_env *env,
-		    const struct cl_lock_slice *slice,
-		    enum cl_lock_state state)
-{
-	struct cl_lock *lock = slice->cls_lock;
-
-	/*
-	 * Refresh inode attributes when the lock is moving into CLS_HELD
-	 * state, and only when this is a result of real enqueue, rather than
-	 * of finding lock in the cache.
-	 */
-	if (state == CLS_HELD && lock->cll_state < CLS_HELD) {
-		struct cl_object *obj;
-		struct inode     *inode;
-
-		obj   = slice->cls_obj;
-		inode = ccc_object_inode(obj);
-
-		/* vmtruncate() sets the i_size
-		 * under both a DLM lock and the
-		 * ll_inode_size_lock().  If we don't get the
-		 * ll_inode_size_lock() here we can match the DLM lock and
-		 * reset i_size.  generic_file_write can then trust the
-		 * stale i_size when doing appending writes and effectively
-		 * cancel the result of the truncate.  Getting the
-		 * ll_inode_size_lock() after the enqueue maintains the DLM
-		 * -> ll_inode_size_lock() acquiring order.
-		 */
-		if (lock->cll_descr.cld_start == 0 &&
-		    lock->cll_descr.cld_end == CL_PAGE_EOF)
-			ll_merge_attr(env, inode);
-	}
-}
-
 /*****************************************************************************
 *
 * io operations.
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@ -145,7 +145,7 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
 	io->ci_ignore_layout = 1;

 	rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
-	if (rc) {
+	if (rc != 0) {
 		cl_io_fini(env, io);
 		cl_env_put(env, &refcheck);
 		/* Does not make sense to take GL for released layout */
@ -154,7 +154,8 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
 		return rc;
 	}

-	descr = &ccc_env_info(env)->cti_descr;
+	lock = ccc_env_lock(env);
+	descr = &lock->cll_descr;
 	descr->cld_obj = obj;
 	descr->cld_start = 0;
 	descr->cld_end = CL_PAGE_EOF;
@ -164,11 +165,11 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
 	enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
 	descr->cld_enq_flags = enqflags;

-	lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current);
-	if (IS_ERR(lock)) {
+	rc = cl_lock_request(env, io, lock);
+	if (rc < 0) {
 		cl_io_fini(env, io);
 		cl_env_put(env, &refcheck);
-		return PTR_ERR(lock);
+		return rc;
 	}

 	cg->cg_env  = cl_env_get(&refcheck);
@ -194,8 +195,7 @@ void cl_put_grouplock(struct ccc_grouplock *cg)
 	cl_env_implant(env, &refcheck);
 	cl_env_put(env, &refcheck);

-	cl_unuse(env, lock);
-	cl_lock_release(env, lock, GROUPLOCK_SCOPE, current);
+	cl_lock_release(env, lock);
 	cl_io_fini(env, io);
 	cl_env_put(env, NULL);
 }
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@ -150,8 +150,7 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
 	 * If this page holds the last refc of cl_object, the following
 	 * call path may cause reschedule:
 	 *   cl_page_put -> cl_page_free -> cl_object_put ->
-	 *     lu_object_put -> lu_object_free -> lov_delete_raid0 ->
-	 *     cl_locks_prune.
+	 *     lu_object_put -> lu_object_free -> lov_delete_raid0.
 	 *
 	 * However, the kernel can't get rid of this inode until all pages have
 	 * been cleaned up. Now that we hold page lock here, it's pretty safe
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@ -233,7 +233,7 @@ static int vvp_mmap_locks(const struct lu_env *env,
 	ldlm_policy_data_t      policy;
 	unsigned long	   addr;
 	ssize_t		 count;
-	int		     result;
+	int		 result = 0;
 	struct iov_iter i;
 	struct iovec iov;

@ -265,10 +265,10 @@ static int vvp_mmap_locks(const struct lu_env *env,

 			if (ll_file_nolock(vma->vm_file)) {
 				/*
-				 * For no lock case, a lockless lock will be
-				 * generated.
+				 * For no lock case is not allowed for mmap
 				 */
-				flags = CEF_NEVER;
+				result = -EINVAL;
+				break;
 			}

 			/*
@ -290,10 +290,8 @@ static int vvp_mmap_locks(const struct lu_env *env,
 			       descr->cld_mode, descr->cld_start,
 			       descr->cld_end);

-			if (result < 0) {
-				up_read(&mm->mmap_sem);
-				return result;
-			}
+			if (result < 0)
+				break;

 			if (vma->vm_end - addr >= count)
 				break;
@ -302,8 +300,10 @@ static int vvp_mmap_locks(const struct lu_env *env,
 			addr = vma->vm_end;
 		}
 		up_read(&mm->mmap_sem);
+		if (result < 0)
+			break;
 	}
-	return 0;
+	return result;
 }

 static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
@ -781,6 +781,7 @@ static int vvp_io_write_start(const struct lu_env *env,
 		 * PARALLEL IO This has to be changed for parallel IO doing
 		 * out-of-order writes.
 		 */
+		ll_merge_attr(env, inode);
 		pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
 		cio->cui_iocb->ki_pos = pos;
 	} else {
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@ -51,32 +51,9 @@
 *
 */

-/**
- * Estimates lock value for the purpose of managing the lock cache during
- * memory shortages.
- *
- * Locks for memory mapped files are almost infinitely precious, others are
- * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are
- * ordered within themselves by weights assigned from other layers.
- */
-static unsigned long vvp_lock_weigh(const struct lu_env *env,
-				    const struct cl_lock_slice *slice)
-{
-	struct ccc_object *cob = cl2ccc(slice->cls_obj);
-
-	return atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0;
-}
-
 static const struct cl_lock_operations vvp_lock_ops = {
-	.clo_delete    = ccc_lock_delete,
 	.clo_fini      = ccc_lock_fini,
-	.clo_enqueue   = ccc_lock_enqueue,
-	.clo_wait      = ccc_lock_wait,
-	.clo_use       = ccc_lock_use,
-	.clo_unuse     = ccc_lock_unuse,
-	.clo_fits_into = ccc_lock_fits_into,
-	.clo_state     = ccc_lock_state,
-	.clo_weigh     = vvp_lock_weigh
+	.clo_enqueue   = ccc_lock_enqueue
 };

 int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@ -170,11 +170,15 @@ static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
 	struct inode *inode = ccc_object_inode(obj);
 	int rc;

-	rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_ALL, 1);
-	if (rc == 0)
-		truncate_inode_pages(inode->i_mapping, 0);
+	rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
+	if (rc < 0) {
+		CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
+		       PFID(lu_object_fid(&obj->co_lu)), rc);
+		return rc;
+	}

-	return rc;
+	truncate_inode_pages(inode->i_mapping, 0);
+	return 0;
 }

 static const struct cl_object_operations vvp_ops = {
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@ -280,25 +280,18 @@ struct lov_object {
 	struct task_struct	*lo_owner;
 };

-/**
- * Flags that top-lock can set on each of its sub-locks.
- */
-enum lov_sub_flags {
-	/** Top-lock acquired a hold (cl_lock_hold()) on a sub-lock. */
-	LSF_HELD = 1 << 0
-};
-
 /**
 * State lov_lock keeps for each sub-lock.
 */
 struct lov_lock_sub {
 	/** sub-lock itself */
-	struct lovsub_lock  *sub_lock;
-	/** An array of per-sub-lock flags, taken from enum lov_sub_flags */
-	unsigned	     sub_flags;
+	struct cl_lock		sub_lock;
+	/** Set if the sublock has ever been enqueued, meaning it may
+	 * hold resources of underlying layers
+	 */
+	unsigned int		sub_is_enqueued:1,
+				sub_initialized:1;
 	int		  sub_stripe;
-	struct cl_lock_descr sub_descr;
-	struct cl_lock_descr sub_got;
 };

 /**
@ -308,59 +301,8 @@ struct lov_lock {
 	struct cl_lock_slice   lls_cl;
 	/** Number of sub-locks in this lock */
 	int		    lls_nr;
-	/**
-	 * Number of existing sub-locks.
-	 */
-	unsigned	       lls_nr_filled;
-	/**
-	 * Set when sub-lock was canceled, while top-lock was being
-	 * used, or unused.
-	 */
-	unsigned int	       lls_cancel_race:1;
-	/**
-	 * An array of sub-locks
-	 *
-	 * There are two issues with managing sub-locks:
-	 *
-	 *     - sub-locks are concurrently canceled, and
-	 *
-	 *     - sub-locks are shared with other top-locks.
-	 *
-	 * To manage cancellation, top-lock acquires a hold on a sublock
-	 * (lov_sublock_adopt()) when the latter is inserted into
-	 * lov_lock::lls_sub[]. This hold is released (lov_sublock_release())
-	 * when top-lock is going into CLS_CACHED state or destroyed. Hold
-	 * prevents sub-lock from cancellation.
-	 *
-	 * Sub-lock sharing means, among other things, that top-lock that is
-	 * in the process of creation (i.e., not yet inserted into lock list)
-	 * is already accessible to other threads once at least one of its
-	 * sub-locks is created, see lov_lock_sub_init().
-	 *
-	 * Sub-lock can be in one of the following states:
-	 *
-	 *     - doesn't exist, lov_lock::lls_sub[]::sub_lock == NULL. Such
-	 *       sub-lock was either never created (top-lock is in CLS_NEW
-	 *       state), or it was created, then canceled, then destroyed
-	 *       (lov_lock_unlink() cleared sub-lock pointer in the top-lock).
-	 *
-	 *     - sub-lock exists and is on
-	 *       hold. (lov_lock::lls_sub[]::sub_flags & LSF_HELD). This is a
-	 *       normal state of a sub-lock in CLS_HELD and CLS_CACHED states
-	 *       of a top-lock.
-	 *
-	 *     - sub-lock exists, but is not held by the top-lock. This
-	 *       happens after top-lock released a hold on sub-locks before
-	 *       going into cache (lov_lock_unuse()).
-	 *
-	 * \todo To support wide-striping, array has to be replaced with a set
-	 * of queues to avoid scanning.
-	 */
-	struct lov_lock_sub   *lls_sub;
-	/**
-	 * Original description with which lock was enqueued.
-	 */
-	struct cl_lock_descr   lls_orig;
+	/** sublock array */
+	struct lov_lock_sub     lls_sub[0];
 };

 struct lov_page {
@ -445,7 +387,6 @@ struct lov_thread_info {
 	struct ost_lvb	  lti_lvb;
 	struct cl_2queue	lti_cl2q;
 	struct cl_page_list     lti_plist;
-	struct cl_lock_closure  lti_closure;
 	wait_queue_t	  lti_waiter;
 	struct cl_attr          lti_attr;
 };
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@ -143,9 +143,7 @@ static void *lov_key_init(const struct lu_context *ctx,
 	struct lov_thread_info *info;

 	info = kmem_cache_zalloc(lov_thread_kmem, GFP_NOFS);
-	if (info)
-		INIT_LIST_HEAD(&info->lti_closure.clc_list);
-	else
+	if (!info)
 		info = ERR_PTR(-ENOMEM);
 	return info;
 }
@ -155,7 +153,6 @@ static void lov_key_fini(const struct lu_context *ctx,
 {
 	struct lov_thread_info *info = data;

-	LINVRNT(list_empty(&info->lti_closure.clc_list));
 	kmem_cache_free(lov_thread_kmem, info);
 }

--- a/drivers/staging/lustre/lustre/lov/lov_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lov_lock.c
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@ -310,8 +310,6 @@ static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
 	LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED);

 	lov_layout_wait(env, lov);
-
-	cl_locks_prune(env, &lov->lo_cl, 0);
 	return 0;
 }

@ -379,7 +377,7 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
 			struct lovsub_object *los = r0->lo_sub[i];

 			if (los) {
-				cl_locks_prune(env, &los->lso_cl, 1);
+				cl_object_prune(env, &los->lso_cl);
 				/*
 				 * If top-level object is to be evicted from
 				 * the cache, so are its sub-objects.
@ -388,7 +386,6 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
 			}
 		}
 	}
-	cl_locks_prune(env, &lov->lo_cl, 0);
 	return 0;
 }

@ -714,7 +711,9 @@ static int lov_layout_change(const struct lu_env *unused,
 	old_ops = &lov_dispatch[lov->lo_type];
 	new_ops = &lov_dispatch[llt];

-	cl_object_prune(env, &lov->lo_cl);
+	result = cl_object_prune(env, &lov->lo_cl);
+	if (result != 0)
+		goto out;

 	result = old_ops->llo_delete(env, lov, &lov->u);
 	if (result == 0) {
@ -736,6 +735,7 @@ static int lov_layout_change(const struct lu_env *unused,
 		}
 	}

+out:
 	cl_env_put(env, &refcheck);
 	cl_env_reexit(cookie);
 	return result;
@ -816,7 +816,8 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
 		goto out;
 	}

-	lov->lo_layout_invalid = lov_layout_change(env, lov, conf);
+	result = lov_layout_change(env, lov, conf);
+	lov->lo_layout_invalid = result != 0;

 out:
 	lov_conf_unlock(lov);
--- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
@ -62,391 +62,8 @@ static void lovsub_lock_fini(const struct lu_env *env,
 	kmem_cache_free(lovsub_lock_kmem, lsl);
 }

-static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-
-	parent = lov->lls_cl.cls_lock;
-	cl_lock_get(parent);
-	lu_ref_add(&parent->cll_reference, "lovsub-parent", current);
-	cl_lock_mutex_get(env, parent);
-}
-
-static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-
-	parent = lov->lls_cl.cls_lock;
-	cl_lock_mutex_put(env, lov->lls_cl.cls_lock);
-	lu_ref_del(&parent->cll_reference, "lovsub-parent", current);
-	cl_lock_put(env, parent);
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for lovsub layer, which
- * method is called whenever sub-lock state changes. Propagates state change
- * to the top-locks.
- */
-static void lovsub_lock_state(const struct lu_env *env,
-			      const struct cl_lock_slice *slice,
-			      enum cl_lock_state state)
-{
-	struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-	struct lov_lock_link *scan;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		struct lov_lock *lov    = scan->lll_super;
-		struct cl_lock  *parent = lov->lls_cl.cls_lock;
-
-		if (sub->lss_active != parent) {
-			lovsub_parent_lock(env, lov);
-			cl_lock_signal(env, parent);
-			lovsub_parent_unlock(env, lov);
-		}
-	}
-}
-
-/**
- * Implementation of cl_lock_operation::clo_weigh() estimating lock weight by
- * asking parent lock.
- */
-static unsigned long lovsub_lock_weigh(const struct lu_env *env,
-				       const struct cl_lock_slice *slice)
-{
-	struct lovsub_lock *lock = cl2lovsub_lock(slice);
-	struct lov_lock    *lov;
-	unsigned long       dumbbell;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	if (!list_empty(&lock->lss_parents)) {
-		/*
-		 * It is not clear whether all parents have to be asked and
-		 * their estimations summed, or it is enough to ask one. For
-		 * the current usages, one is always enough.
-		 */
-		lov = container_of(lock->lss_parents.next,
-				   struct lov_lock_link, lll_list)->lll_super;
-
-		lovsub_parent_lock(env, lov);
-		dumbbell = cl_lock_weigh(env, lov->lls_cl.cls_lock);
-		lovsub_parent_unlock(env, lov);
-	} else
-		dumbbell = 0;
-
-	return dumbbell;
-}
-
-/**
- * Maps start/end offsets within a stripe, to offsets within a file.
- */
-static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
-				  struct lov_object *lov,
-				  int stripe, struct cl_lock_descr *out)
-{
-	pgoff_t size; /* stripe size in pages */
-	pgoff_t skip; /* how many pages in every stripe are occupied by
-		       * "other" stripes
-		       */
-	pgoff_t start;
-	pgoff_t end;
-
-	start = in->cld_start;
-	end   = in->cld_end;
-
-	if (lov->lo_lsm->lsm_stripe_count > 1) {
-		size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size);
-		skip = (lov->lo_lsm->lsm_stripe_count - 1) * size;
-
-		/* XXX overflow check here? */
-		start += start/size * skip + stripe * size;
-
-		if (end != CL_PAGE_EOF) {
-			end += end/size * skip + stripe * size;
-			/*
-			 * And check for overflow...
-			 */
-			if (end < in->cld_end)
-				end = CL_PAGE_EOF;
-		}
-	}
-	out->cld_start = start;
-	out->cld_end   = end;
-}
-
-/**
- * Adjusts parent lock extent when a sub-lock is attached to a parent. This is
- * called in two ways:
- *
- *     - as part of receive call-back, when server returns granted extent to
- *       the client, and
- *
- *     - when top-lock finds existing sub-lock in the cache.
- *
- * Note, that lock mode is not propagated to the parent: i.e., if CLM_READ
- * top-lock matches CLM_WRITE sub-lock, top-lock is still CLM_READ.
- */
-int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
-		       struct lovsub_lock *sublock,
-		       const struct cl_lock_descr *d, int idx)
-{
-	struct cl_lock       *parent;
-	struct lovsub_object *subobj;
-	struct cl_lock_descr *pd;
-	struct cl_lock_descr *parent_descr;
-	int		   result;
-
-	parent       = lov->lls_cl.cls_lock;
-	parent_descr = &parent->cll_descr;
-	LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode));
-
-	subobj = cl2lovsub(sublock->lss_cl.cls_obj);
-	pd     = &lov_env_info(env)->lti_ldescr;
-
-	pd->cld_obj  = parent_descr->cld_obj;
-	pd->cld_mode = parent_descr->cld_mode;
-	pd->cld_gid  = parent_descr->cld_gid;
-	lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd);
-	lov->lls_sub[idx].sub_got = *d;
-	/*
-	 * Notify top-lock about modification, if lock description changes
-	 * materially.
-	 */
-	if (!cl_lock_ext_match(parent_descr, pd))
-		result = cl_lock_modify(env, parent, pd);
-	else
-		result = 0;
-	return result;
-}
-
-static int lovsub_lock_modify(const struct lu_env *env,
-			      const struct cl_lock_slice *s,
-			      const struct cl_lock_descr *d)
-{
-	struct lovsub_lock   *lock   = cl2lovsub_lock(s);
-	struct lov_lock_link *scan;
-	struct lov_lock      *lov;
-	int result		   = 0;
-
-	LASSERT(cl_lock_mode_match(d->cld_mode,
-				   s->cls_lock->cll_descr.cld_mode));
-	list_for_each_entry(scan, &lock->lss_parents, lll_list) {
-		int rc;
-
-		lov = scan->lll_super;
-		lovsub_parent_lock(env, lov);
-		rc = lov_sublock_modify(env, lov, lock, d, scan->lll_idx);
-		lovsub_parent_unlock(env, lov);
-		result = result ?: rc;
-	}
-	return result;
-}
-
-static int lovsub_lock_closure(const struct lu_env *env,
-			       const struct cl_lock_slice *slice,
-			       struct cl_lock_closure *closure)
-{
-	struct lovsub_lock   *sub;
-	struct cl_lock       *parent;
-	struct lov_lock_link *scan;
-	int		   result;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	sub    = cl2lovsub_lock(slice);
-	result = 0;
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		parent = scan->lll_super->lls_cl.cls_lock;
-		result = cl_lock_closure_build(env, parent, closure);
-		if (result != 0)
-			break;
-	}
-	return result;
-}
-
-/**
- * A helper function for lovsub_lock_delete() that deals with a given parent
- * top-lock.
- */
-static int lovsub_lock_delete_one(const struct lu_env *env,
-				  struct cl_lock *child, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-	int	     result;
-
-	parent = lov->lls_cl.cls_lock;
-	if (parent->cll_error)
-		return 0;
-
-	result = 0;
-	switch (parent->cll_state) {
-	case CLS_ENQUEUED:
-		/* See LU-1355 for the case that a glimpse lock is
-		 * interrupted by signal
-		 */
-		LASSERT(parent->cll_flags & CLF_CANCELLED);
-		break;
-	case CLS_QUEUING:
-	case CLS_FREEING:
-		cl_lock_signal(env, parent);
-		break;
-	case CLS_INTRANSIT:
-		/*
-		 * Here lies a problem: a sub-lock is canceled while top-lock
-		 * is being unlocked. Top-lock cannot be moved into CLS_NEW
-		 * state, because unlocking has to succeed eventually by
-		 * placing lock into CLS_CACHED (or failing it), see
-		 * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED
-		 * state, because lov maintains an invariant that all
-		 * sub-locks exist in CLS_CACHED (this allows cached top-lock
-		 * to be reused immediately). Nor can we wait for top-lock
-		 * state to change, because this can be synchronous to the
-		 * current thread.
-		 *
-		 * We know for sure that lov_lock_unuse() will be called at
-		 * least one more time to finish un-using, so leave a mark on
-		 * the top-lock, that will be seen by the next call to
-		 * lov_lock_unuse().
-		 */
-		if (cl_lock_is_intransit(parent))
-			lov->lls_cancel_race = 1;
-		break;
-	case CLS_CACHED:
-		/*
-		 * if a sub-lock is canceled move its top-lock into CLS_NEW
-		 * state to preserve an invariant that a top-lock in
-		 * CLS_CACHED is immediately ready for re-use (i.e., has all
-		 * sub-locks), and so that next attempt to re-use the top-lock
-		 * enqueues missing sub-lock.
-		 */
-		cl_lock_state_set(env, parent, CLS_NEW);
-		/* fall through */
-	case CLS_NEW:
-		/*
-		 * if last sub-lock is canceled, destroy the top-lock (which
-		 * is now `empty') proactively.
-		 */
-		if (lov->lls_nr_filled == 0) {
-			/* ... but unfortunately, this cannot be done easily,
-			 * as cancellation of a top-lock might acquire mutices
-			 * of its other sub-locks, violating lock ordering,
-			 * see cl_lock_{cancel,delete}() preconditions.
-			 *
-			 * To work around this, the mutex of this sub-lock is
-			 * released, top-lock is destroyed, and sub-lock mutex
-			 * acquired again. The list of parents has to be
-			 * re-scanned from the beginning after this.
-			 *
-			 * Only do this if no mutices other than on @child and
-			 * @parent are held by the current thread.
-			 *
-			 * TODO: The lock modal here is too complex, because
-			 * the lock may be canceled and deleted by voluntarily:
-			 *    cl_lock_request
-			 *      -> osc_lock_enqueue_wait
-			 *	-> osc_lock_cancel_wait
-			 *	  -> cl_lock_delete
-			 *	    -> lovsub_lock_delete
-			 *	      -> cl_lock_cancel/delete
-			 *		-> ...
-			 *
-			 * The better choice is to spawn a kernel thread for
-			 * this purpose. -jay
-			 */
-			if (cl_lock_nr_mutexed(env) == 2) {
-				cl_lock_mutex_put(env, child);
-				cl_lock_cancel(env, parent);
-				cl_lock_delete(env, parent);
-				result = 1;
-			}
-		}
-		break;
-	case CLS_HELD:
-		CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n");
-	default:
-		CERROR("Impossible state: %d\n", parent->cll_state);
-		LBUG();
-		break;
-	}
-
-	return result;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked in "bottom-to-top" delete, when lock destruction starts from the
- * sub-lock (e.g, as a result of ldlm lock LRU policy).
- */
-static void lovsub_lock_delete(const struct lu_env *env,
-			       const struct cl_lock_slice *slice)
-{
-	struct cl_lock     *child = slice->cls_lock;
-	struct lovsub_lock *sub   = cl2lovsub_lock(slice);
-	int restart;
-
-	LASSERT(cl_lock_is_mutexed(child));
-
-	/*
-	 * Destruction of a sub-lock might take multiple iterations, because
-	 * when the last sub-lock of a given top-lock is deleted, top-lock is
-	 * canceled proactively, and this requires to release sub-lock
-	 * mutex. Once sub-lock mutex has been released, list of its parents
-	 * has to be re-scanned from the beginning.
-	 */
-	do {
-		struct lov_lock      *lov;
-		struct lov_lock_link *scan;
-		struct lov_lock_link *temp;
-		struct lov_lock_sub  *subdata;
-
-		restart = 0;
-		list_for_each_entry_safe(scan, temp,
-					 &sub->lss_parents, lll_list) {
-			lov     = scan->lll_super;
-			subdata = &lov->lls_sub[scan->lll_idx];
-			lovsub_parent_lock(env, lov);
-			subdata->sub_got = subdata->sub_descr;
-			lov_lock_unlink(env, scan, sub);
-			restart = lovsub_lock_delete_one(env, child, lov);
-			lovsub_parent_unlock(env, lov);
-
-			if (restart) {
-				cl_lock_mutex_get(env, child);
-				break;
-			}
-	       }
-	} while (restart);
-}
-
-static int lovsub_lock_print(const struct lu_env *env, void *cookie,
-			     lu_printer_t p, const struct cl_lock_slice *slice)
-{
-	struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-	struct lov_lock      *lov;
-	struct lov_lock_link *scan;
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		lov = scan->lll_super;
-		(*p)(env, cookie, "[%d %p ", scan->lll_idx, lov);
-		if (lov)
-			cl_lock_descr_print(env, cookie, p,
-					    &lov->lls_cl.cls_lock->cll_descr);
-		(*p)(env, cookie, "] ");
-	}
-	return 0;
-}
-
 static const struct cl_lock_operations lovsub_lock_ops = {
 	.clo_fini    = lovsub_lock_fini,
-	.clo_state   = lovsub_lock_state,
-	.clo_delete  = lovsub_lock_delete,
-	.clo_modify  = lovsub_lock_modify,
-	.clo_closure = lovsub_lock_closure,
-	.clo_weigh   = lovsub_lock_weigh,
-	.clo_print   = lovsub_lock_print
 };

 int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@ -160,7 +160,6 @@ static int cl_io_init0(const struct lu_env *env, struct cl_io *io,

 	io->ci_type = iot;
 	INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
-	INIT_LIST_HEAD(&io->ci_lockset.cls_curr);
 	INIT_LIST_HEAD(&io->ci_lockset.cls_done);
 	INIT_LIST_HEAD(&io->ci_layers);

@ -242,37 +241,7 @@ static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
 			      const struct cl_lock_descr *d1)
 {
 	return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
-			  lu_object_fid(&d1->cld_obj->co_lu)) ?:
-		__diff_normalize(d0->cld_start, d1->cld_start);
-}
-
-static int cl_lock_descr_cmp(const struct cl_lock_descr *d0,
-			     const struct cl_lock_descr *d1)
-{
-	int ret;
-
-	ret = lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
-			 lu_object_fid(&d1->cld_obj->co_lu));
-	if (ret)
-		return ret;
-	if (d0->cld_end < d1->cld_start)
-		return -1;
-	if (d0->cld_start > d0->cld_end)
-		return 1;
-	return 0;
-}
-
-static void cl_lock_descr_merge(struct cl_lock_descr *d0,
-				const struct cl_lock_descr *d1)
-{
-	d0->cld_start = min(d0->cld_start, d1->cld_start);
-	d0->cld_end = max(d0->cld_end, d1->cld_end);
-
-	if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
-		d0->cld_mode = CLM_WRITE;
-
-	if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
-		d0->cld_mode = CLM_GROUP;
+			  lu_object_fid(&d1->cld_obj->co_lu));
 }

 /*
@ -321,33 +290,35 @@ static void cl_io_locks_sort(struct cl_io *io)
 	} while (!done);
 }

-/**
- * Check whether \a queue contains locks matching \a need.
- *
- * \retval +ve there is a matching lock in the \a queue
- * \retval   0 there are no matching locks in the \a queue
- */
-int cl_queue_match(const struct list_head *queue,
-		   const struct cl_lock_descr *need)
+static void cl_lock_descr_merge(struct cl_lock_descr *d0,
+				const struct cl_lock_descr *d1)
 {
-	struct cl_io_lock_link *scan;
+	d0->cld_start = min(d0->cld_start, d1->cld_start);
+	d0->cld_end = max(d0->cld_end, d1->cld_end);

-	list_for_each_entry(scan, queue, cill_linkage) {
-		if (cl_lock_descr_match(&scan->cill_descr, need))
-			return 1;
-	}
-	return 0;
+	if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
+		d0->cld_mode = CLM_WRITE;
+
+	if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
+		d0->cld_mode = CLM_GROUP;
 }
-EXPORT_SYMBOL(cl_queue_match);

-static int cl_queue_merge(const struct list_head *queue,
-			  const struct cl_lock_descr *need)
+static int cl_lockset_merge(const struct cl_lockset *set,
+			    const struct cl_lock_descr *need)
 {
 	struct cl_io_lock_link *scan;

-	list_for_each_entry(scan, queue, cill_linkage) {
-		if (cl_lock_descr_cmp(&scan->cill_descr, need))
+	list_for_each_entry(scan, &set->cls_todo, cill_linkage) {
+		if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj))
 			continue;
+
+		/* Merge locks for the same object because ldlm lock server
+		 * may expand the lock extent, otherwise there is a deadlock
+		 * case if two conflicted locks are queueud for the same object
+		 * and lock server expands one lock to overlap the another.
+		 * The side effect is that it can generate a multi-stripe lock
+		 * that may cause casacading problem
+		 */
 		cl_lock_descr_merge(&scan->cill_descr, need);
 		CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
 		       scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
@ -357,87 +328,20 @@ static int cl_queue_merge(const struct list_head *queue,
 	return 0;
 }

-static int cl_lockset_match(const struct cl_lockset *set,
-			    const struct cl_lock_descr *need)
-{
-	return cl_queue_match(&set->cls_curr, need) ||
-	       cl_queue_match(&set->cls_done, need);
-}
-
-static int cl_lockset_merge(const struct cl_lockset *set,
-			    const struct cl_lock_descr *need)
-{
-	return cl_queue_merge(&set->cls_todo, need) ||
-	       cl_lockset_match(set, need);
-}
-
-static int cl_lockset_lock_one(const struct lu_env *env,
-			       struct cl_io *io, struct cl_lockset *set,
-			       struct cl_io_lock_link *link)
-{
-	struct cl_lock *lock;
-	int	     result;
-
-	lock = cl_lock_request(env, io, &link->cill_descr, "io", io);
-
-	if (!IS_ERR(lock)) {
-		link->cill_lock = lock;
-		list_move(&link->cill_linkage, &set->cls_curr);
-		if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) {
-			result = cl_wait(env, lock);
-			if (result == 0)
-				list_move(&link->cill_linkage, &set->cls_done);
-		} else
-			result = 0;
-	} else
-		result = PTR_ERR(lock);
-	return result;
-}
-
-static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io,
-			      struct cl_io_lock_link *link)
-{
-	struct cl_lock *lock = link->cill_lock;
-
-	list_del_init(&link->cill_linkage);
-	if (lock) {
-		cl_lock_release(env, lock, "io", io);
-		link->cill_lock = NULL;
-	}
-	if (link->cill_fini)
-		link->cill_fini(env, link);
-}
-
 static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
 			   struct cl_lockset *set)
 {
 	struct cl_io_lock_link *link;
 	struct cl_io_lock_link *temp;
-	struct cl_lock	 *lock;
 	int result;

 	result = 0;
 	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
-		if (!cl_lockset_match(set, &link->cill_descr)) {
-			/* XXX some locking to guarantee that locks aren't
-			 * expanded in between.
-			 */
-			result = cl_lockset_lock_one(env, io, set, link);
-			if (result != 0)
-				break;
-		} else
-			cl_lock_link_fini(env, io, link);
-	}
-	if (result == 0) {
-		list_for_each_entry_safe(link, temp,
-					 &set->cls_curr, cill_linkage) {
-			lock = link->cill_lock;
-			result = cl_wait(env, lock);
-			if (result == 0)
-				list_move(&link->cill_linkage, &set->cls_done);
-			else
-				break;
-		}
+		result = cl_lock_request(env, io, &link->cill_lock);
+		if (result < 0)
+			break;
+
+		list_move(&link->cill_linkage, &set->cls_done);
 	}
 	return result;
 }
@ -493,16 +397,19 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io)

 	set = &io->ci_lockset;

-	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage)
-		cl_lock_link_fini(env, io, link);
-
-	list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage)
-		cl_lock_link_fini(env, io, link);
+	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
+		list_del_init(&link->cill_linkage);
+		if (link->cill_fini)
+			link->cill_fini(env, link);
+	}

 	list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
-		cl_unuse(env, link->cill_lock);
-		cl_lock_link_fini(env, io, link);
+		list_del_init(&link->cill_linkage);
+		cl_lock_release(env, &link->cill_lock);
+		if (link->cill_fini)
+			link->cill_fini(env, link);
 	}
+
 	cl_io_for_each_reverse(scan, io) {
 		if (scan->cis_iop->op[io->ci_type].cio_unlock)
 			scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
@ -1435,6 +1342,7 @@ EXPORT_SYMBOL(cl_sync_io_end);
 void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
 		     void (*end)(const struct lu_env *, struct cl_sync_io *))
 {
+	memset(anchor, 0, sizeof(*anchor));
 	init_waitqueue_head(&anchor->csi_waitq);
 	atomic_set(&anchor->csi_sync_nr, nr);
 	atomic_set(&anchor->csi_barrier, nr > 0);
--- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@ -44,7 +44,6 @@
 *
 *  i_mutex
 *      PG_locked
- *	  ->coh_lock_guard
 *	  ->coh_attr_guard
 *	  ->ls_guard
 */
@ -63,8 +62,6 @@

 static struct kmem_cache *cl_env_kmem;

-/** Lock class of cl_object_header::coh_lock_guard */
-static struct lock_class_key cl_lock_guard_class;
 /** Lock class of cl_object_header::coh_attr_guard */
 static struct lock_class_key cl_attr_guard_class;

@ -79,11 +76,8 @@ int cl_object_header_init(struct cl_object_header *h)

 	result = lu_object_header_init(&h->coh_lu);
 	if (result == 0) {
-		spin_lock_init(&h->coh_lock_guard);
 		spin_lock_init(&h->coh_attr_guard);
-		lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class);
 		lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
-		INIT_LIST_HEAD(&h->coh_locks);
 		h->coh_page_bufsize = 0;
 	}
 	return result;
@ -310,7 +304,7 @@ EXPORT_SYMBOL(cl_conf_set);
 /**
 * Prunes caches of pages and locks for this object.
 */
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
 {
 	struct lu_object_header *top;
 	struct cl_object *o;
@ -326,10 +320,7 @@ void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
 		}
 	}

-	/* TODO: pruning locks will be moved into layers after cl_lock
-	 * simplification is done
-	 */
-	cl_locks_prune(env, obj, 1);
+	return result;
 }
 EXPORT_SYMBOL(cl_object_prune);

@ -342,19 +333,9 @@ EXPORT_SYMBOL(cl_object_prune);
 */
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
 {
-	struct cl_object_header *hdr;
-
-	hdr = cl_object_header(obj);
+	struct cl_object_header *hdr = cl_object_header(obj);

 	set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
-	/*
-	 * Destroy all locks. Object destruction (including cl_inode_fini())
-	 * cannot cancel the locks, because in the case of a local client,
-	 * where client and server share the same thread running
-	 * prune_icache(), this can dead-lock with ldlm_cancel_handler()
-	 * waiting on __wait_on_freeing_inode().
-	 */
-	cl_locks_prune(env, obj, 0);
 }
 EXPORT_SYMBOL(cl_object_kill);

@ -406,11 +387,8 @@ int cl_site_init(struct cl_site *s, struct cl_device *d)
 	result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
 	if (result == 0) {
 		cache_stats_init(&s->cs_pages, "pages");
-		cache_stats_init(&s->cs_locks, "locks");
 		for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
 			atomic_set(&s->cs_pages_state[0], 0);
-		for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
-			atomic_set(&s->cs_locks_state[i], 0);
 		cl_env_percpu_refill();
 	}
 	return result;
@ -445,15 +423,6 @@ int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
 		[CPS_PAGEIN]  = "r",
 		[CPS_FREEING] = "f"
 	};
-	static const char *lstate[] = {
-		[CLS_NEW]       = "n",
-		[CLS_QUEUING]   = "q",
-		[CLS_ENQUEUED]  = "e",
-		[CLS_HELD]      = "h",
-		[CLS_INTRANSIT] = "t",
-		[CLS_CACHED]    = "c",
-		[CLS_FREEING]   = "f"
-	};
 /*
       lookup    hit  total   busy create
 pages: ...... ...... ...... ...... ...... [...... ...... ...... ......]
@ -467,12 +436,6 @@ locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......]
 		seq_printf(m, "%s: %u ", pstate[i],
 			   atomic_read(&site->cs_pages_state[i]));
 	seq_printf(m, "]\n");
-	cache_stats_print(&site->cs_locks, m, 0);
-	seq_printf(m, " [");
-	for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i)
-		seq_printf(m, "%s: %u ", lstate[i],
-			   atomic_read(&site->cs_locks_state[i]));
-	seq_printf(m, "]\n");
 	cache_stats_print(&cl_env_stats, m, 0);
 	seq_printf(m, "\n");
 	return 0;
@ -1147,12 +1110,6 @@ void cl_stack_fini(const struct lu_env *env, struct cl_device *cl)
 }
 EXPORT_SYMBOL(cl_stack_fini);

-int  cl_lock_init(void);
-void cl_lock_fini(void);
-
-int  cl_page_init(void);
-void cl_page_fini(void);
-
 static struct lu_context_key cl_key;

 struct cl_thread_info *cl_env_info(const struct lu_env *env)
@ -1247,22 +1204,13 @@ int cl_global_init(void)
 	if (result)
 		goto out_kmem;

-	result = cl_lock_init();
-	if (result)
-		goto out_context;
-
-	result = cl_page_init();
-	if (result)
-		goto out_lock;
-
 	result = cl_env_percpu_init();
 	if (result)
 		/* no cl_env_percpu_fini on error */
-		goto out_lock;
+		goto out_context;

 	return 0;
-out_lock:
-	cl_lock_fini();
+
 out_context:
 	lu_context_key_degister(&cl_key);
 out_kmem:
@ -1278,8 +1226,6 @@ out_store:
 void cl_global_fini(void)
 {
 	cl_env_percpu_fini();
-	cl_lock_fini();
-	cl_page_fini();
 	lu_context_key_degister(&cl_key);
 	lu_kmem_fini(cl_object_caches);
 	cl_env_store_fini();
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@ -1075,12 +1075,3 @@ void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
 	slice->cpl_page = page;
 }
 EXPORT_SYMBOL(cl_page_slice_add);
-
-int  cl_page_init(void)
-{
-	return 0;
-}
-
-void cl_page_fini(void)
-{
-}
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@ -171,7 +171,7 @@ struct echo_thread_info {

 	struct cl_2queue	eti_queue;
 	struct cl_io	    eti_io;
-	struct cl_lock_descr    eti_descr;
+	struct cl_lock          eti_lock;
 	struct lu_fid	   eti_fid;
 	struct lu_fid		eti_fid2;
 };
@ -327,26 +327,8 @@ static void echo_lock_fini(const struct lu_env *env,
 	kmem_cache_free(echo_lock_kmem, ecl);
 }

-static void echo_lock_delete(const struct lu_env *env,
-			     const struct cl_lock_slice *slice)
-{
-	struct echo_lock *ecl      = cl2echo_lock(slice);
-
-	LASSERT(list_empty(&ecl->el_chain));
-}
-
-static int echo_lock_fits_into(const struct lu_env *env,
-			       const struct cl_lock_slice *slice,
-			       const struct cl_lock_descr *need,
-			       const struct cl_io *unused)
-{
-	return 1;
-}
-
 static struct cl_lock_operations echo_lock_ops = {
 	.clo_fini      = echo_lock_fini,
-	.clo_delete    = echo_lock_delete,
-	.clo_fits_into = echo_lock_fits_into
 };

 /** @} echo_lock */
@ -811,16 +793,7 @@ static void echo_lock_release(const struct lu_env *env,
 {
 	struct cl_lock *clk = echo_lock2cl(ecl);

-	cl_lock_get(clk);
-	cl_unuse(env, clk);
-	cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
-	if (!still_used) {
-		cl_lock_mutex_get(env, clk);
-		cl_lock_cancel(env, clk);
-		cl_lock_delete(env, clk);
-		cl_lock_mutex_put(env, clk);
-	}
-	cl_lock_put(env, clk);
+	cl_lock_release(env, clk);
 }

 static struct lu_device *echo_device_free(const struct lu_env *env,
@ -1014,9 +987,11 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,

 	info = echo_env_info(env);
 	io = &info->eti_io;
-	descr = &info->eti_descr;
+	lck = &info->eti_lock;
 	obj = echo_obj2cl(eco);

+	memset(lck, 0, sizeof(*lck));
+	descr = &lck->cll_descr;
 	descr->cld_obj   = obj;
 	descr->cld_start = cl_index(obj, start);
 	descr->cld_end   = cl_index(obj, end);
@ -1024,25 +999,20 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
 	descr->cld_enq_flags = enqflags;
 	io->ci_obj = obj;

-	lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
-	if (lck) {
+	rc = cl_lock_request(env, io, lck);
+	if (rc == 0) {
 		struct echo_client_obd *ec = eco->eo_dev->ed_ec;
 		struct echo_lock *el;

-		rc = cl_wait(env, lck);
-		if (rc == 0) {
-			el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
-			spin_lock(&ec->ec_lock);
-			if (list_empty(&el->el_chain)) {
-				list_add(&el->el_chain, &ec->ec_locks);
-				el->el_cookie = ++ec->ec_unique;
-			}
-			atomic_inc(&el->el_refcount);
-			*cookie = el->el_cookie;
-			spin_unlock(&ec->ec_lock);
-		} else {
-			cl_lock_release(env, lck, "ec enqueue", current);
+		el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
+		spin_lock(&ec->ec_lock);
+		if (list_empty(&el->el_chain)) {
+			list_add(&el->el_chain, &ec->ec_locks);
+			el->el_cookie = ++ec->ec_unique;
 		}
+		atomic_inc(&el->el_refcount);
+		*cookie = el->el_cookie;
+		spin_unlock(&ec->ec_lock);
 	}
 	return rc;
 }
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@ -76,6 +76,8 @@ static inline char *ext_flags(struct osc_extent *ext, char *flags)
 	*buf++ = ext->oe_rw ? 'r' : 'w';
 	if (ext->oe_intree)
 		*buf++ = 'i';
+	if (ext->oe_sync)
+		*buf++ = 'S';
 	if (ext->oe_srvlock)
 		*buf++ = 's';
 	if (ext->oe_hp)
@ -121,9 +123,13 @@ static const char *oes_strings[] = {
 		__ext->oe_grants, __ext->oe_nr_pages,			      \
 		list_empty_marker(&__ext->oe_pages),			      \
 		waitqueue_active(&__ext->oe_waitq) ? '+' : '-',		      \
-		__ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner,	      \
+		__ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner,	      \
 		/* ----- part 4 ----- */				      \
 		## __VA_ARGS__);					      \
+	if (lvl == D_ERROR && __ext->oe_dlmlock)			      \
+		LDLM_ERROR(__ext->oe_dlmlock, "extent: %p\n", __ext);	      \
+	else								      \
+		LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p\n", __ext);	      \
 } while (0)

 #undef EASSERTF
@ -240,20 +246,25 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
 		goto out;
 	}

-	if (!ext->oe_osclock && ext->oe_grants > 0) {
+	if (ext->oe_sync && ext->oe_grants > 0) {
 		rc = 90;
 		goto out;
 	}

-	if (ext->oe_osclock) {
-		struct cl_lock_descr *descr;
+	if (ext->oe_dlmlock) {
+		struct ldlm_extent *extent;

-		descr = &ext->oe_osclock->cll_descr;
-		if (!(descr->cld_start <= ext->oe_start &&
-		      descr->cld_end >= ext->oe_max_end)) {
+		extent = &ext->oe_dlmlock->l_policy_data.l_extent;
+		if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) &&
+		      extent->end >= cl_offset(osc2cl(obj), ext->oe_max_end))) {
 			rc = 100;
 			goto out;
 		}
+
+		if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))) {
+			rc = 102;
+			goto out;
+		}
 	}

 	if (ext->oe_nr_pages > ext->oe_mppr) {
@ -359,7 +370,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
 	ext->oe_state = OES_INV;
 	INIT_LIST_HEAD(&ext->oe_pages);
 	init_waitqueue_head(&ext->oe_waitq);
-	ext->oe_osclock = NULL;
+	ext->oe_dlmlock = NULL;

 	return ext;
 }
@ -385,9 +396,11 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
 		LASSERT(ext->oe_state == OES_INV);
 		LASSERT(!ext->oe_intree);

-		if (ext->oe_osclock) {
-			cl_lock_put(env, ext->oe_osclock);
-			ext->oe_osclock = NULL;
+		if (ext->oe_dlmlock) {
+			lu_ref_add(&ext->oe_dlmlock->l_reference,
+				   "osc_extent", ext);
+			LDLM_LOCK_PUT(ext->oe_dlmlock);
+			ext->oe_dlmlock = NULL;
 		}
 		osc_extent_free(ext);
 	}
@ -543,7 +556,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
 	if (cur->oe_max_end != victim->oe_max_end)
 		return -ERANGE;

-	LASSERT(cur->oe_osclock == victim->oe_osclock);
+	LASSERT(cur->oe_dlmlock == victim->oe_dlmlock);
 	ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_CACHE_SHIFT;
 	chunk_start = cur->oe_start >> ppc_bits;
 	chunk_end = cur->oe_end >> ppc_bits;
@ -624,10 +637,10 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
 static struct osc_extent *osc_extent_find(const struct lu_env *env,
 					  struct osc_object *obj, pgoff_t index,
 					  int *grants)
-
 {
 	struct client_obd *cli = osc_cli(obj);
-	struct cl_lock *lock;
+	struct osc_lock   *olck;
+	struct cl_lock_descr *descr;
 	struct osc_extent *cur;
 	struct osc_extent *ext;
 	struct osc_extent *conflict = NULL;
@ -644,8 +657,12 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
 	if (!cur)
 		return ERR_PTR(-ENOMEM);

-	lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
-	LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+	olck = osc_env_io(env)->oi_write_osclock;
+	LASSERTF(olck, "page %lu is not covered by lock\n", index);
+	LASSERT(olck->ols_state == OLS_GRANTED);
+
+	descr = &olck->ols_cl.cls_lock->cll_descr;
+	LASSERT(descr->cld_mode >= CLM_WRITE);

 	LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
 	ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
@ -657,19 +674,23 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
 	max_pages = cli->cl_max_pages_per_rpc;
 	LASSERT((max_pages & ~chunk_mask) == 0);
 	max_end = index - (index % max_pages) + max_pages - 1;
-	max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end);
+	max_end = min_t(pgoff_t, max_end, descr->cld_end);

 	/* initialize new extent by parameters so far */
 	cur->oe_max_end = max_end;
 	cur->oe_start = index & chunk_mask;
 	cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
-	if (cur->oe_start < lock->cll_descr.cld_start)
-		cur->oe_start = lock->cll_descr.cld_start;
+	if (cur->oe_start < descr->cld_start)
+		cur->oe_start = descr->cld_start;
 	if (cur->oe_end > max_end)
 		cur->oe_end = max_end;
-	cur->oe_osclock = lock;
 	cur->oe_grants = 0;
 	cur->oe_mppr = max_pages;
+	if (olck->ols_dlmlock) {
+		LASSERT(olck->ols_hold);
+		cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock);
+		lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur);
+	}

 	/* grants has been allocated by caller */
 	LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
@ -691,7 +712,7 @@ restart:
 			break;

 		/* if covering by different locks, no chance to match */
-		if (lock != ext->oe_osclock) {
+		if (olck->ols_dlmlock != ext->oe_dlmlock) {
 			EASSERTF(!overlapped(ext, cur), ext,
 				 EXTSTR"\n", EXTPARA(cur));

@ -795,7 +816,7 @@ restart:
 	if (found) {
 		LASSERT(!conflict);
 		if (!IS_ERR(found)) {
-			LASSERT(found->oe_osclock == cur->oe_osclock);
+			LASSERT(found->oe_dlmlock == cur->oe_dlmlock);
 			OSC_EXTENT_DUMP(D_CACHE, found,
 					"found caching ext for %lu.\n", index);
 		}
@ -810,7 +831,7 @@ restart:
 		found = osc_extent_hold(cur);
 		osc_extent_insert(obj, cur);
 		OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
-				index, lock->cll_descr.cld_end);
+				index, descr->cld_end);
 	}
 	osc_object_unlock(obj);

@ -2630,6 +2651,7 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
 	}

 	ext->oe_rw = !!(cmd & OBD_BRW_READ);
+	ext->oe_sync = 1;
 	ext->oe_urgent = 1;
 	ext->oe_start = start;
 	ext->oe_end = ext->oe_max_end = end;
@ -3087,27 +3109,27 @@ static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
 				struct osc_page *ops, void *cbdata)
 {
 	struct osc_thread_info *info = osc_env_info(env);
-	struct cl_lock *lock = cbdata;
+	struct osc_object *osc = cbdata;
 	pgoff_t index;

 	index = osc_index(ops);
 	if (index >= info->oti_fn_index) {
-		struct cl_lock *tmp;
+		struct ldlm_lock *tmp;
 		struct cl_page *page = ops->ops_cl.cpl_page;

 		/* refresh non-overlapped index */
-		tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index,
-				       lock, 1, 0);
+		tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0);
 		if (tmp) {
+			__u64 end = tmp->l_policy_data.l_extent.end;
 			/* Cache the first-non-overlapped index so as to skip
-			 * all pages within [index, oti_fn_index). This
-			 * is safe because if tmp lock is canceled, it will
-			 * discard these pages.
+			 * all pages within [index, oti_fn_index). This is safe
+			 * because if tmp lock is canceled, it will discard
+			 * these pages.
 			 */
-			info->oti_fn_index = tmp->cll_descr.cld_end + 1;
-			if (tmp->cll_descr.cld_end == CL_PAGE_EOF)
+			info->oti_fn_index = cl_index(osc2cl(osc), end + 1);
+			if (end == OBD_OBJECT_EOF)
 				info->oti_fn_index = CL_PAGE_EOF;
-			cl_lock_put(env, tmp);
+			LDLM_LOCK_PUT(tmp);
 		} else if (cl_page_own(env, io, page) == 0) {
 			/* discard the page */
 			cl_page_discard(env, io, page);
@ -3125,11 +3147,8 @@ static int discard_cb(const struct lu_env *env, struct cl_io *io,
 		      struct osc_page *ops, void *cbdata)
 {
 	struct osc_thread_info *info = osc_env_info(env);
-	struct cl_lock *lock = cbdata;
 	struct cl_page *page = ops->ops_cl.cpl_page;

-	LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
-
 	/* page is top page. */
 	info->oti_next_index = osc_index(ops) + 1;
 	if (cl_page_own(env, io, page) == 0) {
@ -3154,30 +3173,27 @@ static int discard_cb(const struct lu_env *env, struct cl_io *io,
 * If error happens on any step, the process continues anyway (the reasoning
 * behind this being that lock cancellation cannot be delayed indefinitely).
 */
-int osc_lock_discard_pages(const struct lu_env *env, struct osc_lock *ols)
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+			   pgoff_t start, pgoff_t end, enum cl_lock_mode mode)
 {
 	struct osc_thread_info *info = osc_env_info(env);
 	struct cl_io *io = &info->oti_io;
-	struct cl_object *osc = ols->ols_cl.cls_obj;
-	struct cl_lock *lock = ols->ols_cl.cls_lock;
-	struct cl_lock_descr *descr = &lock->cll_descr;
 	osc_page_gang_cbt cb;
 	int res;
 	int result;

-	io->ci_obj = cl_object_top(osc);
+	io->ci_obj = cl_object_top(osc2cl(osc));
 	io->ci_ignore_layout = 1;
 	result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
 	if (result != 0)
 		goto out;

-	cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb;
-	info->oti_fn_index = info->oti_next_index = descr->cld_start;
+	cb = mode == CLM_READ ? check_and_discard_cb : discard_cb;
+	info->oti_fn_index = info->oti_next_index = start;
 	do {
-		res = osc_page_gang_lookup(env, io, cl2osc(osc),
-					   info->oti_next_index, descr->cld_end,
-					   cb, (void *)lock);
-		if (info->oti_next_index > descr->cld_end)
+		res = osc_page_gang_lookup(env, io, osc,
+					   info->oti_next_index, end, cb, osc);
+		if (info->oti_next_index > end)
 			break;

 		if (res == CLP_GANG_RESCHED)
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@ -68,6 +68,9 @@ struct osc_io {
 	struct cl_io_slice oi_cl;
 	/** true if this io is lockless. */
 	int		oi_lockless;
+	/** how many LRU pages are reserved for this IO */
+	int oi_lru_reserved;
+
 	/** active extents, we know how many bytes is going to be written,
 	 * so having an active extent will prevent it from being fragmented
 	 */
@ -77,8 +80,8 @@ struct osc_io {
 	 */
 	struct osc_extent *oi_trunc;

-	int oi_lru_reserved;
-
+	/** write osc_lock for this IO, used by osc_extent_find(). */
+	struct osc_lock   *oi_write_osclock;
 	struct obd_info    oi_info;
 	struct obdo	oi_oa;
 	struct osc_async_cbargs {
@ -117,6 +120,7 @@ struct osc_thread_info {
 	 */
 	pgoff_t			oti_next_index;
 	pgoff_t			oti_fn_index; /* first non-overlapped index */
+	struct cl_sync_io	oti_anchor;
 };

 struct osc_object {
@ -173,6 +177,10 @@ struct osc_object {
 	struct radix_tree_root	oo_tree;
 	spinlock_t		oo_tree_lock;
 	unsigned long		oo_npages;
+
+	/* Protect osc_lock this osc_object has */
+	spinlock_t		oo_ol_spin;
+	struct list_head	oo_ol_list;
 };

 static inline void osc_object_lock(struct osc_object *obj)
@ -212,8 +220,6 @@ enum osc_lock_state {
 	OLS_ENQUEUED,
 	OLS_UPCALL_RECEIVED,
 	OLS_GRANTED,
-	OLS_RELEASED,
-	OLS_BLOCKED,
 	OLS_CANCELLED
 };

@ -222,10 +228,8 @@ enum osc_lock_state {
 *
 * Interaction with DLM.
 *
- * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode).
- *
 * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
- * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock.
+ * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock.
 *
 * This pointer is protected through a reference, acquired by
 * osc_lock_upcall0(). Also, an additional reference is acquired by
@ -263,16 +267,27 @@ enum osc_lock_state {
 */
 struct osc_lock {
 	struct cl_lock_slice     ols_cl;
+	/** Internal lock to protect states, etc. */
+	spinlock_t		ols_lock;
+	/** Owner sleeps on this channel for state change */
+	struct cl_sync_io	*ols_owner;
+	/** waiting list for this lock to be cancelled */
+	struct list_head	ols_waiting_list;
+	/** wait entry of ols_waiting_list */
+	struct list_head	ols_wait_entry;
+	/** list entry for osc_object::oo_ol_list */
+	struct list_head	ols_nextlock_oscobj;
+
 	/** underlying DLM lock */
-	struct ldlm_lock	*ols_lock;
-	/** lock value block */
-	struct ost_lvb	   ols_lvb;
+	struct ldlm_lock	*ols_dlmlock;
 	/** DLM flags with which osc_lock::ols_lock was enqueued */
 	__u64		    ols_flags;
 	/** osc_lock::ols_lock handle */
 	struct lustre_handle     ols_handle;
 	struct ldlm_enqueue_info ols_einfo;
 	enum osc_lock_state      ols_state;
+	/** lock value block */
+	struct ost_lvb		ols_lvb;

 	/**
 	 * true, if ldlm_lock_addref() was called against
@ -302,16 +317,6 @@ struct osc_lock {
 	 * If true, osc_lock_enqueue is able to tolerate the -EUSERS error.
 	 */
 				 ols_locklessable:1,
-	/**
-	 * set by osc_lock_use() to wait until blocking AST enters into
-	 * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for
-	 * further synchronization.
-	 */
-				 ols_ast_wait:1,
-	/**
-	 * If the data of this lock has been flushed to server side.
-	 */
-				 ols_flush:1,
 	/**
 	 * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
 	 * the EVAVAIL error as tolerable, this will make upper logic happy
@ -325,15 +330,6 @@ struct osc_lock {
 	 * For async glimpse lock.
 	 */
 				 ols_agl:1;
-	/**
-	 * IO that owns this lock. This field is used for a dead-lock
-	 * avoidance by osc_lock_enqueue_wait().
-	 *
-	 * XXX: unfortunately, the owner of a osc_lock is not unique,
-	 * the lock may have multiple users, if the lock is granted and
-	 * then matched.
-	 */
-	struct osc_io	   *ols_owner;
 };

 /**
@ -627,6 +623,8 @@ struct osc_extent {
 	unsigned int       oe_intree:1,
 	/** 0 is write, 1 is read */
 			   oe_rw:1,
+	/** sync extent, queued by osc_queue_sync_pages() */
+				oe_sync:1,
 			   oe_srvlock:1,
 			   oe_memalloc:1,
 	/** an ACTIVE extent is going to be truncated, so when this extent
@ -675,7 +673,7 @@ struct osc_extent {
 	 */
 	wait_queue_head_t	oe_waitq;
 	/** lock covering this extent */
-	struct cl_lock    *oe_osclock;
+	struct ldlm_lock	*oe_dlmlock;
 	/** terminator of this extent. Must be true if this extent is in IO. */
 	struct task_struct	*oe_owner;
 	/** return value of writeback. If somebody is waiting for this extent,
@ -690,14 +688,14 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 		      int sent, int rc);
 void osc_extent_release(const struct lu_env *env, struct osc_extent *ext);

-int osc_lock_discard_pages(const struct lu_env *env, struct osc_lock *lock);
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+			   pgoff_t start, pgoff_t end, enum cl_lock_mode mode);

 typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
 				 struct osc_page *, void *);
 int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
 			 struct osc_object *osc, pgoff_t start, pgoff_t end,
 			 osc_page_gang_cbt cb, void *cbdata);
-
 /** @} osc */

 #endif /* OSC_CL_INTERNAL_H */
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@ -108,12 +108,14 @@ void osc_update_next_shrink(struct client_obd *cli);

 extern struct ptlrpc_request_set *PTLRPCD_SET;

+typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh,
+				    int rc);
+
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		     __u64 *flags, ldlm_policy_data_t *policy,
 		     struct ost_lvb *lvb, int kms_valid,
-		     obd_enqueue_update_f upcall,
+		     osc_enqueue_upcall_f upcall,
 		     void *cookie, struct ldlm_enqueue_info *einfo,
-		     struct lustre_handle *lockh,
 		     struct ptlrpc_request_set *rqset, int async, int agl);
 int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);

@ -140,7 +142,6 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
 		   int target, bool force);
 int osc_lru_reclaim(struct client_obd *cli);

-extern spinlock_t osc_ast_guard;
 unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);

 int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
@ -199,5 +200,8 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
 int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
 		   struct obd_quotactl *oqctl);
 int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+				       struct osc_object *obj, pgoff_t index,
+				       int pending, int canceling);

 #endif /* OSC_INTERNAL_H */
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@ -354,6 +354,7 @@ static void osc_io_rw_iter_fini(const struct lu_env *env,
 		atomic_add(oio->oi_lru_reserved, cli->cl_lru_left);
 		oio->oi_lru_reserved = 0;
 	}
+	oio->oi_write_osclock = NULL;
 }

 static int osc_io_fault_start(const struct lu_env *env,
@ -751,8 +752,7 @@ static void osc_req_attr_set(const struct lu_env *env,
 	struct lov_oinfo *oinfo;
 	struct cl_req *clerq;
 	struct cl_page *apage; /* _some_ page in @clerq */
-	struct cl_lock *lock;  /* _some_ lock protecting @apage */
-	struct osc_lock *olck;
+	struct ldlm_lock *lock;  /* _some_ lock protecting @apage */
 	struct osc_page *opg;
 	struct obdo *oa;
 	struct ost_lvb *lvb;
@ -782,38 +782,37 @@ static void osc_req_attr_set(const struct lu_env *env,
 		oa->o_valid |= OBD_MD_FLID;
 	}
 	if (flags & OBD_MD_FLHANDLE) {
-		struct cl_object *subobj;

 		clerq = slice->crs_req;
 		LASSERT(!list_empty(&clerq->crq_pages));
 		apage = container_of(clerq->crq_pages.next,
 				     struct cl_page, cp_flight);
 		opg = osc_cl_page_osc(apage, NULL);
-		subobj = opg->ops_cl.cpl_obj;
-		lock = cl_lock_at_pgoff(env, subobj, osc_index(opg),
-					NULL, 1, 1);
-		if (!lock) {
-			struct cl_object_header *head;
-			struct cl_lock *scan;
+		lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
+					    1, 1);
+		if (!lock && !opg->ops_srvlock) {
+			struct ldlm_resource *res;
+			struct ldlm_res_id *resname;
+
+			CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n");
+
+			resname = &osc_env_info(env)->oti_resname;
+			ostid_build_res_name(&oinfo->loi_oi, resname);
+			res = ldlm_resource_get(
+				osc_export(cl2osc(obj))->exp_obd->obd_namespace,
+				NULL, resname, LDLM_EXTENT, 0);
+			ldlm_resource_dump(D_ERROR, res);

-			head = cl_object_header(subobj);
-			list_for_each_entry(scan, &head->coh_locks, cll_linkage)
-				CL_LOCK_DEBUG(D_ERROR, env, scan,
-					      "no cover page!\n");
-			CL_PAGE_DEBUG(D_ERROR, env, apage,
-				      "dump uncover page!\n");
 			dump_stack();
 			LBUG();
 		}

-		olck = osc_lock_at(lock);
-		LASSERT(ergo(opg->ops_srvlock, !olck->ols_lock));
 		/* check for lockless io. */
-		if (olck->ols_lock) {
-			oa->o_handle = olck->ols_lock->l_remote_handle;
+		if (lock) {
+			oa->o_handle = lock->l_remote_handle;
 			oa->o_valid |= OBD_MD_FLHANDLE;
+			LDLM_LOCK_PUT(lock);
 		}
-		cl_lock_put(env, lock);
 	}
 }

--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@ -96,6 +96,8 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
 	atomic_set(&osc->oo_nr_writes, 0);
 	spin_lock_init(&osc->oo_lock);
 	spin_lock_init(&osc->oo_tree_lock);
+	spin_lock_init(&osc->oo_ol_spin);
+	INIT_LIST_HEAD(&osc->oo_ol_list);

 	cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));

@ -122,6 +124,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
 	LASSERT(list_empty(&osc->oo_reading_exts));
 	LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
 	LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
+	LASSERT(list_empty(&osc->oo_ol_list));

 	lu_object_fini(obj);
 	kmem_cache_free(osc_object_kmem, osc);
@ -194,6 +197,32 @@ static int osc_object_glimpse(const struct lu_env *env,
 	return 0;
 }

+static int osc_object_ast_clear(struct ldlm_lock *lock, void *data)
+{
+	LASSERT(lock->l_granted_mode == lock->l_req_mode);
+	if (lock->l_ast_data == data)
+		lock->l_ast_data = NULL;
+	return LDLM_ITER_CONTINUE;
+}
+
+static int osc_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+	struct osc_object       *osc = cl2osc(obj);
+	struct ldlm_res_id      *resname = &osc_env_info(env)->oti_resname;
+
+	LASSERTF(osc->oo_npages == 0,
+		 DFID "still have %lu pages, obj: %p, osc: %p\n",
+		 PFID(lu_object_fid(&obj->co_lu)), osc->oo_npages, obj, osc);
+
+	/* DLM locks don't hold a reference of osc_object so we have to
+	 * clear it before the object is being destroyed.
+	 */
+	ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+	ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname,
+			      osc_object_ast_clear, osc);
+	return 0;
+}
+
 void osc_object_set_contended(struct osc_object *obj)
 {
 	obj->oo_contention_time = cfs_time_current();
@ -238,12 +267,12 @@ static const struct cl_object_operations osc_ops = {
 	.coo_io_init   = osc_io_init,
 	.coo_attr_get  = osc_attr_get,
 	.coo_attr_set  = osc_attr_set,
-	.coo_glimpse   = osc_object_glimpse
+	.coo_glimpse   = osc_object_glimpse,
+	.coo_prune     = osc_object_prune
 };

 static const struct lu_object_operations osc_lu_obj_ops = {
 	.loo_object_init      = osc_object_init,
-	.loo_object_delete    = NULL,
 	.loo_object_release   = NULL,
 	.loo_object_free      = osc_object_free,
 	.loo_object_print     = osc_object_print,
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@ -135,15 +135,15 @@ static int osc_page_is_under_lock(const struct lu_env *env,
 				  struct cl_io *unused, pgoff_t *max_index)
 {
 	struct osc_page *opg = cl2osc_page(slice);
-	struct cl_lock *lock;
+	struct ldlm_lock *dlmlock;
 	int result = -ENODATA;

-	*max_index = 0;
-	lock = cl_lock_at_pgoff(env, slice->cpl_obj, osc_index(opg),
-				NULL, 1, 0);
-	if (lock) {
-		*max_index = lock->cll_descr.cld_end;
-		cl_lock_put(env, lock);
+	dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj),
+				       osc_index(opg), 1, 0);
+	if (dlmlock) {
+		*max_index = cl_index(slice->cpl_obj,
+				      dlmlock->l_policy_data.l_extent.end);
+		LDLM_LOCK_PUT(dlmlock);
 		result = 0;
 	}
 	return result;
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@ -92,12 +92,13 @@ struct osc_fsync_args {

 struct osc_enqueue_args {
 	struct obd_export	*oa_exp;
+	enum ldlm_type		oa_type;
+	enum ldlm_mode		oa_mode;
 	__u64		    *oa_flags;
-	obd_enqueue_update_f      oa_upcall;
+	osc_enqueue_upcall_f	oa_upcall;
 	void		     *oa_cookie;
 	struct ost_lvb	   *oa_lvb;
-	struct lustre_handle     *oa_lockh;
-	struct ldlm_enqueue_info *oa_ei;
+	struct lustre_handle	oa_lockh;
 	unsigned int	      oa_agl:1;
 };

@ -2068,14 +2069,12 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
 	LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);

 	lock_res_and_lock(lock);
-	spin_lock(&osc_ast_guard);

 	if (!lock->l_ast_data)
 		lock->l_ast_data = data;
 	if (lock->l_ast_data == data)
 		set = 1;

-	spin_unlock(&osc_ast_guard);
 	unlock_res_and_lock(lock);

 	return set;
@ -2117,36 +2116,38 @@ static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
 	return rc;
 }

-static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
-			    obd_enqueue_update_f upcall, void *cookie,
-			    __u64 *flags, int agl, int rc)
+static int osc_enqueue_fini(struct ptlrpc_request *req,
+			    osc_enqueue_upcall_f upcall, void *cookie,
+			    struct lustre_handle *lockh, enum ldlm_mode mode,
+			    __u64 *flags, int agl, int errcode)
 {
-	int intent = *flags & LDLM_FL_HAS_INTENT;
+	bool intent = *flags & LDLM_FL_HAS_INTENT;
+	int rc;

-	if (intent) {
-		/* The request was created before ldlm_cli_enqueue call. */
-		if (rc == ELDLM_LOCK_ABORTED) {
-			struct ldlm_reply *rep;
+	/* The request was created before ldlm_cli_enqueue call. */
+	if (intent && errcode == ELDLM_LOCK_ABORTED) {
+		struct ldlm_reply *rep;

-			rep = req_capsule_server_get(&req->rq_pill,
-						     &RMF_DLM_REP);
+		rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);

-			rep->lock_policy_res1 =
-				ptlrpc_status_ntoh(rep->lock_policy_res1);
-			if (rep->lock_policy_res1)
-				rc = rep->lock_policy_res1;
-		}
-	}
-
-	if ((intent != 0 && rc == ELDLM_LOCK_ABORTED && agl == 0) ||
-	    (rc == 0)) {
+		rep->lock_policy_res1 =
+			ptlrpc_status_ntoh(rep->lock_policy_res1);
+		if (rep->lock_policy_res1)
+			errcode = rep->lock_policy_res1;
+		if (!agl)
+			*flags |= LDLM_FL_LVB_READY;
+	} else if (errcode == ELDLM_OK) {
 		*flags |= LDLM_FL_LVB_READY;
-		CDEBUG(D_INODE, "got kms %llu blocks %llu mtime %llu\n",
-		       lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_mtime);
 	}

 	/* Call the update callback. */
-	rc = (*upcall)(cookie, rc);
+	rc = (*upcall)(cookie, lockh, errcode);
+	/* release the reference taken in ldlm_cli_enqueue() */
+	if (errcode == ELDLM_LOCK_MATCHED)
+		errcode = ELDLM_OK;
+	if (errcode == ELDLM_OK && lustre_handle_is_used(lockh))
+		ldlm_lock_decref(lockh, mode);
+
 	return rc;
 }

@ -2155,62 +2156,47 @@ static int osc_enqueue_interpret(const struct lu_env *env,
 				 struct osc_enqueue_args *aa, int rc)
 {
 	struct ldlm_lock *lock;
-	struct lustre_handle handle;
-	__u32 mode;
-	struct ost_lvb *lvb;
-	__u32 lvb_len;
-	__u64 *flags = aa->oa_flags;
+	struct lustre_handle *lockh = &aa->oa_lockh;
+	enum ldlm_mode mode = aa->oa_mode;
+	struct ost_lvb *lvb = aa->oa_lvb;
+	__u32 lvb_len = sizeof(*lvb);
+	__u64 flags = 0;

-	/* Make a local copy of a lock handle and a mode, because aa->oa_*
-	 * might be freed anytime after lock upcall has been called.
-	 */
-	lustre_handle_copy(&handle, aa->oa_lockh);
-	mode = aa->oa_ei->ei_mode;

 	/* ldlm_cli_enqueue is holding a reference on the lock, so it must
 	 * be valid.
 	 */
-	lock = ldlm_handle2lock(&handle);
+	lock = ldlm_handle2lock(lockh);
+	LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n",
+		 lockh->cookie, req, aa);

 	/* Take an additional reference so that a blocking AST that
 	 * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
 	 * to arrive after an upcall has been executed by
 	 * osc_enqueue_fini().
 	 */
-	ldlm_lock_addref(&handle, mode);
+	ldlm_lock_addref(lockh, mode);

 	/* Let CP AST to grant the lock first. */
 	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);

-	if (aa->oa_agl && rc == ELDLM_LOCK_ABORTED) {
-		lvb = NULL;
-		lvb_len = 0;
-	} else {
-		lvb = aa->oa_lvb;
-		lvb_len = sizeof(*aa->oa_lvb);
+	if (aa->oa_agl) {
+		LASSERT(!aa->oa_lvb);
+		LASSERT(!aa->oa_flags);
+		aa->oa_flags = &flags;
 	}

 	/* Complete obtaining the lock procedure. */
-	rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_ei->ei_type, 1,
-				   mode, flags, lvb, lvb_len, &handle, rc);
+	rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1,
+				   aa->oa_mode, aa->oa_flags, lvb, lvb_len,
+				   lockh, rc);
 	/* Complete osc stuff. */
-	rc = osc_enqueue_fini(req, aa->oa_lvb, aa->oa_upcall, aa->oa_cookie,
-			      flags, aa->oa_agl, rc);
+	rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode,
+			      aa->oa_flags, aa->oa_agl, rc);

 	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);

-	/* Release the lock for async request. */
-	if (lustre_handle_is_used(&handle) && rc == ELDLM_OK)
-		/*
-		 * Releases a reference taken by ldlm_cli_enqueue(), if it is
-		 * not already released by
-		 * ldlm_cli_enqueue_fini()->failed_lock_cleanup()
-		 */
-		ldlm_lock_decref(&handle, mode);
-
-	LASSERTF(lock, "lockh %p, req %p, aa %p - client evicted?\n",
-		 aa->oa_lockh, req, aa);
-	ldlm_lock_decref(&handle, mode);
+	ldlm_lock_decref(lockh, mode);
 	LDLM_LOCK_PUT(lock);
 	return rc;
 }
@ -2222,21 +2208,21 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
 * other synchronous requests, however keeping some locks and trying to obtain
 * others may take a considerable amount of time in a case of ost failure; and
 * when other sync requests do not get released lock from a client, the client
- * is excluded from the cluster -- such scenarious make the life difficult, so
+ * is evicted from the cluster -- such scenaries make the life difficult, so
 * release locks just after they are obtained.
 */
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		     __u64 *flags, ldlm_policy_data_t *policy,
 		     struct ost_lvb *lvb, int kms_valid,
-		     obd_enqueue_update_f upcall, void *cookie,
+		     osc_enqueue_upcall_f upcall, void *cookie,
 		     struct ldlm_enqueue_info *einfo,
-		     struct lustre_handle *lockh,
 		     struct ptlrpc_request_set *rqset, int async, int agl)
 {
 	struct obd_device *obd = exp->exp_obd;
+	struct lustre_handle lockh = { 0 };
 	struct ptlrpc_request *req = NULL;
 	int intent = *flags & LDLM_FL_HAS_INTENT;
-	__u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY);
+	__u64 match_lvb = agl ? 0 : LDLM_FL_LVB_READY;
 	enum ldlm_mode mode;
 	int rc;

@ -2272,55 +2258,39 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	if (einfo->ei_mode == LCK_PR)
 		mode |= LCK_PW;
 	mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id,
-			       einfo->ei_type, policy, mode, lockh, 0);
+			       einfo->ei_type, policy, mode, &lockh, 0);
 	if (mode) {
-		struct ldlm_lock *matched = ldlm_handle2lock(lockh);
+		struct ldlm_lock *matched;

-		if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) {
-			/* For AGL, if enqueue RPC is sent but the lock is not
-			 * granted, then skip to process this strpe.
-			 * Return -ECANCELED to tell the caller.
+		if (*flags & LDLM_FL_TEST_LOCK)
+			return ELDLM_OK;
+
+		matched = ldlm_handle2lock(&lockh);
+		if (agl) {
+			/* AGL enqueues DLM locks speculatively. Therefore if
+			 * it already exists a DLM lock, it wll just inform the
+			 * caller to cancel the AGL process for this stripe.
 			 */
-			ldlm_lock_decref(lockh, mode);
+			ldlm_lock_decref(&lockh, mode);
 			LDLM_LOCK_PUT(matched);
 			return -ECANCELED;
-		}
-
-		if (osc_set_lock_data_with_check(matched, einfo)) {
+		} else if (osc_set_lock_data_with_check(matched, einfo)) {
 			*flags |= LDLM_FL_LVB_READY;
-			/* addref the lock only if not async requests and PW
-			 * lock is matched whereas we asked for PR.
-			 */
-			if (!rqset && einfo->ei_mode != mode)
-				ldlm_lock_addref(lockh, LCK_PR);
-			if (intent) {
-				/* I would like to be able to ASSERT here that
-				 * rss <= kms, but I can't, for reasons which
-				 * are explained in lov_enqueue()
-				 */
-			}
+			/* We already have a lock, and it's referenced. */
+			(*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED);

-			/* We already have a lock, and it's referenced.
-			 *
-			 * At this point, the cl_lock::cll_state is CLS_QUEUING,
-			 * AGL upcall may change it to CLS_HELD directly.
-			 */
-			(*upcall)(cookie, ELDLM_OK);
-
-			if (einfo->ei_mode != mode)
-				ldlm_lock_decref(lockh, LCK_PW);
-			else if (rqset)
-				/* For async requests, decref the lock. */
-				ldlm_lock_decref(lockh, einfo->ei_mode);
+			ldlm_lock_decref(&lockh, mode);
 			LDLM_LOCK_PUT(matched);
 			return ELDLM_OK;
+		} else {
+			ldlm_lock_decref(&lockh, mode);
+			LDLM_LOCK_PUT(matched);
 		}
-
-		ldlm_lock_decref(lockh, mode);
-		LDLM_LOCK_PUT(matched);
 	}

- no_match:
+no_match:
+	if (*flags & LDLM_FL_TEST_LOCK)
+		return -ENOLCK;
 	if (intent) {
 		LIST_HEAD(cancels);

@ -2344,21 +2314,31 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	*flags &= ~LDLM_FL_BLOCK_GRANTED;

 	rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
-			      sizeof(*lvb), LVB_T_OST, lockh, async);
-	if (rqset) {
+			      sizeof(*lvb), LVB_T_OST, &lockh, async);
+	if (async) {
 		if (!rc) {
 			struct osc_enqueue_args *aa;

-			CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+			CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
 			aa = ptlrpc_req_async_args(req);
-			aa->oa_ei = einfo;
 			aa->oa_exp = exp;
-			aa->oa_flags  = flags;
+			aa->oa_mode = einfo->ei_mode;
+			aa->oa_type = einfo->ei_type;
+			lustre_handle_copy(&aa->oa_lockh, &lockh);
 			aa->oa_upcall = upcall;
 			aa->oa_cookie = cookie;
-			aa->oa_lvb    = lvb;
-			aa->oa_lockh  = lockh;
 			aa->oa_agl    = !!agl;
+			if (!agl) {
+				aa->oa_flags = flags;
+				aa->oa_lvb = lvb;
+			} else {
+				/* AGL is essentially to enqueue an DLM lock
+				* in advance, so we don't care about the
+				* result of AGL enqueue.
+				*/
+				aa->oa_lvb = NULL;
+				aa->oa_flags = NULL;
+			}

 			req->rq_interpret_reply =
 				(ptlrpc_interpterer_t)osc_enqueue_interpret;
@ -2372,7 +2352,8 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		return rc;
 	}

-	rc = osc_enqueue_fini(req, lvb, upcall, cookie, flags, agl, rc);
+	rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode,
+			      flags, agl, rc);
 	if (intent)
 		ptlrpc_req_finished(req);

@ -3359,7 +3340,6 @@ static struct obd_ops osc_obd_ops = {
 };

 extern struct lu_kmem_descr osc_caches[];
-extern spinlock_t osc_ast_guard;
 extern struct lock_class_key osc_ast_guard_class;

 static int __init osc_init(void)
@ -3386,9 +3366,6 @@ static int __init osc_init(void)
 	if (rc)
 		goto out_kmem;

-	spin_lock_init(&osc_ast_guard);
-	lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
-
 	/* This is obviously too much memory, only prevent overflow here */
 	if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
 		rc = -EINVAL;