File Event Monitoring provides a simple, flexible mechanism to observe operations applied to vnode and vfs objects. These per-object operations are the events and the code that observes these events is the monitor.
- FEM can not affect lock ordering in file systems. In other words, a pass-through monitor, as is shown in [Appendix C], installed on an arbitrary vnode should not create new lock interdependencies.
- A FEM monitor may be installed or removed at any time. At any time the object is valid, in that it could be used even if it isn't reachable.
- FEM should have minimal "probe effect". The action of dispatching the event to the monitor should be as lightweight as possible.
- FEM should have no effect on objects which are not monitored. No is a strong word, but the spirit is to have no measurable effect in the "normal" cases; and no more than a nominal effect for low frequency cases.
- FEM should permit monitors to be added and removed on selected objects. This implies that the monitoring framework is not global.
- FEM should permit monitors to specify only the operations they are interested in observing.
- FEM should provide a means to detect the creation and deletion of objects.
1.2.1 Non-Goals and Non-Requirements
This project does not and does not attempt to bring stacking file systems, file system composition or file system interposition abilities to Solaris. The obstacles to these concepts remain firmly in place [Skinner93] and are not in the scope of the File Event Monitor. The File Event Monitor borrows some implementation ideas from these systems, so the guilt by association is clear.
The following code example, from ufs_vnops.c, illuminates the problem:
err = pvn_vplist_dirty(vp, (u_offset_t)off, ufs_putapage,pvn_vplist_dirty() looks in the page cache for matching pages to be written out, and directly invokes the 3rd argument (ufs_putapage) to do so. If we attempted to "stack" a file system ontop of UFS, it would never get the opportunity to see this operation. Ufs_putapage() isn't even a vnode op of ufs, so the solution to this is more than just mechanical.
flags, cr);
There is no clear line beyond which FEM modules dare not step. We, and members of the NFSv4 team we consulted with, believe that a simple distributed cache coherence mechanism can be built safely as a FEM module. That implies suspending a number of vnode operations for more than a few network exchanges. Once that believe becomes accepted, we will have gained some understanding of where this line is.
If a stacking mechanism is ever adopted, FEM may be discarded and its interfaces adapted to meet the new mechanism. FEM makes few maintenance demands upon monitors and less upon file systems; so there should not be too much dependency upon implementation.
vnode_t
[...] (A)vnops
[v_op] ----> [vop_read] --------> xfs_read
[vop_write]--------> xfs_write
[...]
vnode_t
[...] (A)vnops
[v_op]--->[0] ----> [vop_read] --------> xfs_read
[1] -+ [vop_write]--------> xfs_write
| [...]
|
|
| (B)vnops
+--> [vop_read] --------> watch_read
[vop_write]--------> watch_write
[...]
vnode_tIn FEM, the caller of the vnode operation does not need to be changed because a special set of FEM provided vnodeops "vhead_xxx" are responsible for locating the top interposer and passing the operation to it.
[...] (vhead)vnops
[v_op] --------> [vop_read] ---> vhead_read() { ... }
[v_femhead] [vop_write] ---> vhead_write() { ... }
| [...]
V
[lock]
[femlist]
|
V
[refc, size]
[top]
|
|
|
| +++++++++++++ (B)fem_t
+--------->+ [ops] + ---->[fem_read] -----> watch_read() { .. }
+ [private] + [fem_write] -----> watch_write() { .. }
+ + [ ... ]
+===========+
+ + (C)fem_t
+[ops] + ---->[fem_read] ------> count_read() { ... }
+[private] + [fem_write] -----> count_write() { ... }
+ + [ ... ]
+===========+
+ + (A)vnops
+[ops] + ---->[vop_read] ------> xfs_read() { ... }
+[private] + [vop_write] -----> xfs_write() { ... }
+++++++++++++ [ ... ]
int
count_read(femarg_t *farg, uio_t *uiop, int ioflag, cred_t *cr)
{
uint32_t *ip = farg->fa_fnode->fn_available;
atomic_add_32(ip, 1);
return (femnext_read(farg, uiop, ioflag, cr));
}
int
count_write(femarg_t *farg, uio_t *uiop, int ioflag, cred_t *cr)
{
uint32_t *ip = farg->fa_fnode->fn_available;
atomic_add_32(ip+1, 1);
return (femnext_write(farg, uiop, ioflag, cr));
}
struct fem_node {
void *fn_available;
union {
fem_t *fem;
vnodeops_t *vnode;
fsem_t *fsem;
vfsops_t *vfs;
void *anon; /* anonymous, for updates */
} fn_op;
};
struct fem_arg {
union {
struct vnode *vp,
**vpp;
struct vfs *vfsp;
void *anon;
} fa_vnode;
struct fem_node *fa_fnode;
};
struct fem_list {
int feml_status;
uint_t feml_refc;
int feml_tos;
int feml_ssize;
struct fem_node feml_nodes[1]; /*variable index */
};
struct fem_head {
kmutex_t femh_lock;
struct fem_list *femh_list;
};
static struct fem_list *
fem_lock(struct fem_head *fp)
{
struct fem_list *sp = NULL;
if (fp != NULL) {
mutex_enter(&fp->femh_lock);
sp = fp->femh_list;
}
return (sp);
}
static int
vhead_X(struct vnode *vp, int a0, int a1)
{
struct fem_arg farg;
struct fem_list *femsp;
int (*func)();
void *arg0;
int errc;
if ((femsp = fem_lock(vp->v_femhead)) == NULL) {
func = vp->v_op->vop_X;
arg0 = vp;
fem_unlock(vp->v_femhead);
errc = (*func)(arg0, a0, a1);
} else {
fem_addref(femsp);
fem_unlock(vp->v_femhead);
farg.fa_vnode.vp = vp;
farg.fa_fnode = femsp->feml_nodes + femsp->feml_tos;
if ((arg0 = farg.fa_fnode->fn_available) == NULL) {
arg0 = vp;
func = farg.fa_fnode->fn_op.vnode->vop_X;
} else {
func = farg.fa_fnode->fn_op.fem->vsop_X;
}
errc = (*func)(arg0, a0, a1);
fem_release(femsp);
}
return (errc);
}
-----<------
/ \
/ \
{init}-> (exists) -> (idle) -> (reclaim)
\ | /
\ | /
\ | /
\ | /
\ | /
V | V
\|/
V
(invalid) ---> {final}
| Interface Name | Proposed Stability Classification | Specified in What Document? | Former Stability Classification or Other Comments |
| <sys/fem.h> |
consolidation private |
this |
new |
| void fem_init(void); |
consolidation private |
this |
this is a bootstrap function called at startup(). |
| struct fem; struct fem_arg; struct fem_node; typedef struct fem fem_t; typedef struct fsem fsem_t; typedef struct fem_arg femarg_t; typedef struct fem_arg fsemarg_t; typedef int femop_t(); |
consolidation private |
this |
these types are used by monitor implementations
to identify monitors and provide monitor private data. |
| void vn_exists(vnode_t *vn) ; void vn_idle(vnode_t *vn); void vn_reclaim(vnode_t*vn) void vn_invalid(vnode_t *vn); VFSNAME_VNSTATE |
consolidation private |
this |
This interface implements the vnode lifecycle. |
| int fsem_create(char *name, const struct fs_operation_def
*templ, fsem_t **actual); int fsem_is_installed(struct vfs *v, fsem_t *mon); int fsem_install(struct vfs *v, fsem_t *mon, void *arg); int fsem_uninstall(struct vfs *v, fsem_t *mon); |
consolidation private |
this |
These routines manage the set of installed monitors
on a vfs object. |
| int fem_create(char *name, const struct fs_operation_def
*templ, fem_t **actual); int fem_install(struct vnode *v, fem_t *mon, void *arg); int fem_is_installed(struct vnode *v, fem_t *mon); int fem_uninstall(struct vnode *v, fem_t *mon); int fem_getvnops(struct vnode *v, struct vnodeops **ops); int fem_setvnops(struct vnode *v, struct vnodeops *nops); |
consolidation private |
this |
These routines manage the set of installed monitors
on a vnode object. |
| extern int vfsnext_mount(struct fem_arg *vf, struct vnode *mvp, struct mounta *uap, struct cred *cr); extern int vfsnext_unmount(struct fem_arg *vf, int flag, struct cred *cr); extern int vfsnext_root(struct fem_arg *vf, struct vnode **vpp); extern int vfsnext_statvfs(struct fem_arg *vf, struct statvfs64 *sp); extern int vfsnext_sync(struct fem_arg *vf, short flag, struct cred *cr); extern int vfsnext_vget(struct fem_arg *vf, struct vnode **vpp, struct fid *fidp); extern int vfsnext_mountroot(struct fem_arg *vf, enum whymountroot reason); extern int vfsnext_freevfs(struct fem_arg *vf); extern int vfsnext_vnstate(struct fem_arg *vf, struct vnode *vn, int nstate); |
consolidation private |
none | These correspond to the vfs operations and permit
a monitor to invoke the logcally next monitor operation (or vfs operation
if this is the last or only monitor. |
| int vnext_close(femarg_t *vf, int flag,
int count, offset_t offset, struct cred *cr); int vnext_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr); int vnext_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr); int vnext_ioctl(femarg_t *vf, int cmd, intptr_t arg, int flag, struct cred *cr, int *rvalp); int vnext_setfl(femarg_t *vf, int oflags, int nflags, struct cred *cr); int vnext_getattr(femarg_t *vf, struct vattr *vap, int flags, struct cred *cr); int vnext_setattr(femarg_t *vf, struct vattr *vap, int flags, struct cred *cr); int vnext_access(femarg_t *vf, int mode, int flags, struct cred *cr); int vnext_lookup(femarg_t *vf, char *nm, struct vnode **vpp, struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cr); int vnext_create(femarg_t *vf, char *name, struct vattr *vap, vcexcl_t excl, int mode, struct vnode **vpp, struct cred *cr, int flag); int vnext_remove(femarg_t *vf, char *nm, struct cred *cr); int vnext_link(femarg_t *vf, struct vnode *svp, char *tnm, struct cred *cr); int vnext_rename(femarg_t *vf, char *snm, struct vnode *tdvp, char *tnm, struct cred *cr); int vnext_mkdir(femarg_t *vf, char *dirname, struct vattr *vap, struct vnode **vpp, struct cred *cr); int vnext_rmdir(femarg_t *vf, char *nm, struct vnode *cdir, struct cred *cr); int vnext_readdir(femarg_t *vf, struct uio *uiop, struct cred *cr, int *eofp); int vnext_symlink(femarg_t *vf, char *linkname, struct vattr *vap, char *target, struct cred *cr); int vnext_readlink(femarg_t *vf, struct uio *uiop, struct cred *cr); int vnext_fsync(femarg_t *vf, int syncflag, struct cred *cr); void vnext_inactive(femarg_t *vf, struct cred *cr); int vnext_fid(femarg_t *vf, struct fid *fidp); void vnext_rwlock(femarg_t *vf, int write_lock); void vnext_rwunlock(femarg_t *vf, int write_lock); int vnext_seek(femarg_t *vf, offset_t ooff, offset_t *noffp); int vnext_cmp(femarg_t *vf, struct vnode *vp2); int vnext_frlock(femarg_t *vf, int cmd, struct flock64 *bfp, int flag, offset_t offset, struct flk_callback *flk_cbp, struct cred *cr); int vnext_space(femarg_t *vf, int cmd, struct flock64 *bfp, int flag, offset_t offset, struct cred *cr); int vnext_realvp(femarg_t *vf, struct vnode **vpp); int vnext_getpage(femarg_t *vf, offset_t off, size_t len, uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg, caddr_t addr, enum seg _rw rw, struct cred *cr); int vnext_putpage(femarg_t *vf, offset_t off, size_t len, int flags, struct cred *cr); int vnext_map(femarg_t *vf, offset_t off, struct as *as, caddr_t *addrp, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr) ; int vnext_addmap(femarg_t *vf, offset_t off, struct as *as, caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr ); int vnext_delmap(femarg_t *vf, offset_t off, struct as *as, caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cr); int vnext_poll(femarg_t *vf, short events, int anyyet, short *reventsp, struct pollhead **phpp); int vnext_dump(femarg_t *vf, caddr_t addr, int lbdn, int dblks); int vnext_pathconf(femarg_t *vf, int cmd, ulong_t *valp, struct cred *cr); int vnext_pageio(femarg_t *vf, struct page *pp, u_offset_t io_off, size_t io_len, int flags, struct cred *cr); int vnext_dumpctl(femarg_t *vf, int action, int *blkp); void vnext_dispose(femarg_t *vf, struct page *pp, int flag, int dn, struct cred *cr); int vnext_setsecattr(femarg_t *vf, vsecattr_t *vsap, int flag, struct cred *cr); int vnext_getsecattr(femarg_t *vf, vsecattr_t *vsap, int flag, struct cred *cr); int vnext_shrlock(femarg_t *vf, int cmd, struct shrlock *shr, int flag); |
consolidation private |
none |
These correspond to the vfs operations and permit
a monitor to invoke the logcally next monitor operation (or vfs operation
if this is the last or only monitor. |
This project has no security implications
NAME
fem_create, fsem_create - create a fem_t from a specification
SYNOPSIS
#include <sys/fem.h>
extern int fem_create(char *name, const struct fs_operation_def *spec,
fem_t **new_fem);
extern int fsem_create(char *name, const struct fs_operation_def *spec,
fsem_t **new_fem);
extern void fem_delete(fem_t *fem);
extern void fsem_delete(fsem_t *fem);
DESCRIPTION
Fem_create, fsem_create create an event monitor from a specification.
A specification which captures just the read and write operations on
a vnode would look like:
struct fs_operation_def spec[] = {
{ VOPNAME_READ, (femop_t) mymon_read },
{ VOPNAME_WRITE, (femop_t) mymon_write },
{ NULL, NULL }
};
The operations are cast into a generic function type (femop_t),
however technically these two functions are compatible with the
generic function type.
Notice that only the functions which are required are provided;
this enables a shortcut within the monitor to skip over layers
which have no interest in certain operations.
The fem_t returned may be applied to the appropriate object; and
later its resources may be reclaimed with fem_delete or fsem_delete
as appropriate.
RETURN VALUES
0 - monitor was created successfully.
EINVAL - specification contained an invalid name.
EINVAL - specification was invalid.
NAME
fem_install, fsem_install - install event monitors
SYNOPSIS
#include <sys/fem.h>
extern int fem_install(vnode_t *vn, fem_t *mon, void *arg);
extern int fem_uninstall(vnode_t *vn, fem_t *mon);
extern int fsem_install(vfs_t *vf, fsem_t *mon, void *arg);
extern int fsem_uninstall(vfs_t *vf, fem_t *mon);
DESCRIPTION
fem_install() and fsem_install() install a monitor created by
fem_create() and fsem_create() onto a vnode and vfs respectively.
There can only be one instance of mon on a particular object;
if you must place the same object twice, you must create two
separate ones.
Arg is provided as the private_data field of the fem_node referenced
in the fem_arg. Arg may not be NULL.
Fem_install() and fsem_uninstall() removed the monitors matching mon
from the provided objects. Uninstall means that the path leading to
this monitor is removed; however in-flight operations may still
reference it.
RETURNS
0 - The monitor was installed / uninstalled ok.
EINVAL - mon is NULL or invalid.
EINVAL - arg is NULL.
EBUSY - mon is already installed.
EBUSY - too many monitors installed.
NAME
vnext_op, vfsnext_op - event propagation functions.
SYNOPSIS:
#include <sys/fem.h>
extern int vnext_open(struct fem_arg *vf, int mode, struct cred *cr);
extern int vnext_close(struct fem_arg *vf, int flag, int count,
offset_t offset, struct cred *cr);
extern int vnext_read(struct fem_arg *vf, struct uio *uiop, int ioflag,
struct cred *cr);
extern int vnext_write(struct fem_arg *vf, struct uio *uiop, int ioflag,
struct cred *cr);
extern int vnext_ioctl(struct fem_arg *vf, int cmd, intptr_t arg, int flag,
struct cred *cr, int *rvalp);
extern int vnext_setfl(struct fem_arg *vf, int oflags, int nflags,
struct cred *cr);
extern int vnext_getattr(struct fem_arg *vf, struct vattr *vap, int flags,
struct cred *cr);
extern int vnext_setattr(struct fem_arg *vf, struct vattr *vap, int flags,
struct cred *cr);
extern int vnext_access(struct fem_arg *vf, int mode, int flags,
struct cred *cr);
extern int vnext_lookup(struct fem_arg *vf, char *nm, struct vnode **vpp,
struct pathname *pnp, int flags,
struct vnode *rdir, struct cred *cr);
extern int vnext_create(struct fem_arg *vf, char *name, struct vattr *vap,
vcexcl_t excl, int mode, struct vnode **vpp,
struct cred *cr, int flag);
extern int vnext_remove(struct fem_arg *vf, char *nm, struct cred *cr);
extern int vnext_link(struct fem_arg *vf, struct vnode *svp, char *tnm,
struct cred *cr);
extern int vnext_rename(struct fem_arg *vf, char *snm, struct vnode *tdvp,
char *tnm, struct cred *cr);
extern int vnext_mkdir(struct fem_arg *vf, char *dirname,
struct vattr *vap, struct vnode **vpp, struct cred *cr);
extern int vnext_rmdir(struct fem_arg *vf, char *nm, struct vnode *cdir,
struct cred *cr);
extern int vnext_readdir(struct fem_arg *vf, struct uio *uiop,
struct cred *cr, int *eofp);
extern int vnext_symlink(struct fem_arg *vf, char *linkname,
extern int vnext_readlink(struct fem_arg *vf, struct uio *uiop,
struct cred *cr);
extern int vnext_fsync(struct fem_arg *vf, int syncflag, struct cred *cr);
extern void vnext_inactive(struct fem_arg *vf, struct cred *cr);
extern int vnext_fid(struct fem_arg *vf, struct fid *fidp);
extern void vnext_rwlock(struct fem_arg *vf, int write_lock);
extern void vnext_rwunlock(struct fem_arg *vf, int write_lock);
extern int vnext_seek(struct fem_arg *vf, offset_t ooff, offset_t *noffp);
extern int vnext_cmp(struct fem_arg *vf, struct vnode *vp2);
extern int vnext_frlock(struct fem_arg *vf, int cmd, struct flock64 *bfp,
int flag, offset_t offset,
struct flk_callback *flk_cbp, struct cred *cr);
extern int vnext_space(struct fem_arg *vf, int cmd, struct flock64 *bfp,
int flag, offset_t offset, struct cred *cr);
extern int vnext_realvp(struct fem_arg *vf, struct vnode **vpp);
extern int vnext_getpage(struct fem_arg *vf, offset_t off, size_t len,
uint_t *protp, struct page **plarr, size_t plsz,
struct seg *seg, caddr_t addr, enum seg_rw rw,
struct cred *cr);
extern int vnext_putpage(struct fem_arg *vf, offset_t off, size_t len,
int flags, struct cred *cr);
extern int vnext_map(struct fem_arg *vf, offset_t off, struct as *as,
caddr_t *addrp, size_t len, uchar_t prot, uchar_t maxprot,
uint_t flags, struct cred *cr);
extern int vnext_addmap(struct fem_arg *vf, offset_t off, struct as *as,
caddr_t addr, size_t len, uchar_t prot,
uchar_t maxprot, uint_t flags, struct cred *cr);
extern int vnext_delmap(struct fem_arg *vf, offset_t off, struct as *as,
caddr_t addr, size_t len, uint_t prot,
uint_t maxprot, uint_t flags, struct cred *cr);
extern int vnext_poll(struct fem_arg *vf, short events, int anyyet,
short *reventsp, struct pollhead **phpp);
extern int vnext_dump(struct fem_arg *vf, caddr_t addr, int lbdn,
int dblks);
extern int vnext_pathconf(struct fem_arg *vf, int cmd, ulong_t *valp,
struct cred *cr);
extern int vnext_pageio(struct fem_arg *vf, struct page *pp,
u_offset_t io_off, size_t io_len, int flags,
struct cred *cr);
extern int vnext_dumpctl(struct fem_arg *vf, int action, int *blkp);
extern void vnext_dispose(struct fem_arg *vf, struct page *pp, int flag,
int dn, struct cred *cr);
extern int vnext_setsecattr(struct fem_arg *vf, vsecattr_t *vsap, int flag,
struct cred *cr);
extern int vnext_getsecattr(struct fem_arg *vf, vsecattr_t *vsap, int flag,
struct cred *cr);
extern int vnext_shrlock(struct fem_arg *vf, int cmd, struct shrlock *shr,
int flag);
extern int vfsnext_mount(struct fem_arg *vf, struct vnode *mvp,
struct mounta *uap, struct cred *cr);
extern int vfsnext_unmount(struct fem_arg *vf, int flag, struct cred *cr);
extern int vfsnext_root(struct fem_arg *vf, struct vnode **vpp);
extern int vfsnext_statvfs(struct fem_arg *vf, struct statvfs64 *sp);
extern int vfsnext_sync(struct fem_arg *vf, short flag, struct cred *cr);
extern int vfsnext_vget(struct fem_arg *vf, struct vnode **vpp,
struct fid *fidp);
extern int vfsnext_mountroot(struct fem_arg *vf, enum whymountroot reason);
extern int vfsnext_freevfs(struct fem_arg *vf);
extern int vfsnext_vnstate(struct fem_arg *vf, struct vnode *vn,
int nstate);
struct fem_node {
void *fn_available;
union {
fem_t *fem;
vnodeops_t *vnode;
fsem_t *fsem;
vfsops_t *vfs;
void *anon; /* anonymous, for updates */
} fn_op;
};
struct fem_arg {
union {
struct vnode *vp,
**vpp;
struct vfs *vfsp;
void *anon;
} fa_vnode;
struct fem_node *fa_fnode;
};
DESCRIPTION
The vnext_*, vfsnext_* functions provide a means to propagate an event
to either the next monitor configured for this object, or the object itself.
Each of these functions corresponds to a vnode or vfs operation; as is
indicated by the name - vnext_open corresponds to vop_open.
The next function advances the fa_fnode pointed to by the first argument
to each of these to locate the appropriate location within the monitor
stack. The private data pointer provided in the installation function
is stored in (vf->fa_fnode->fn_available). Since the fem_node may be
replicated, this location should not be relied upon.
RETURN VALUES
Varies by the individual function.
NAME
vn_exits, vn_invalid - announce life cycle of a vnode.
SYNOPSIS
#include <sys/vfs.h>
extern int vfs_announces(vfs_t *vfs);
extern void vn_exists(vnode_t *vn);
extern void vn_idle(vnode_t *vn);
extern void vn_reclaim(vnode_t *vn);
extern void vn_invalid(vnode_t *vn);
DESCRIPTION
These functions are used by a file system to announce the life cycle of
a vnode. A file system may choose to support these announcements in one
of three ways - not at all, basic and full.
If the file system chooses not to support the announcement at all, that means
that monitors which may be automatically attached and cleaned up cannot be
used applied.
Basic announcement involves a file system announcing, via vn_exists(), that
a vnode is ready for operations, however none can proceed yet. The latter
restraint permits a monitor to know whether any operations on the vnode
could have been performed before it was installed. At this point, the
v_vfsp member of the vnode should be initialized, to permit monitoring
via the vfs.
Basic announcement also involves announcing the destruction of a vnode, via
vn_invalid(), when it is no longer referenced. This permits a monitor
to cleanup its resources.
The full announcement augments basic announcement by indicating when a vnode
has become idle (no longer referenced) as opposed to having its state
destroyed. This is intended to support caches of vnodes, as is common practise
in file systems. The function vn_reclaim() is used to indicate when a vnode
is re-entering service.
Full announcement is meant to balance the system performance and memory
requirements. The monitor management can cache its state in a parallel
fashion to the vnode, thus avoiding unnecessary consumption of resources
RETURN VALUES
(vfs_announces) 0 - does not provide announcement.
1 - provides basic announcement.
2 - provides full annnouncement.
NAME
fem_getop, fem_setop, fsem_getop, fsem_setop - get and set object methods.
SYNOPSIS
extern int fem_getop(vnode_t *v, vnodeops_t **ops);
extern int fem_setop(vnode_t *v, vnodeops_t *ops);
extern int fsem_getop(vfs_t *v, vfsops_t **ops);
extern int fsem_setop(vfs_t *v, vfsops_t *ops);
DESCRIPTION
These functions extract and set the methods for the underlying object;
and correspond roughly to the vop_getvnops, vop_setvnops, ...
RETURN VALUE
0 - get/set operation ok.
EINVAL - object is not monitored.
NAME
fem_is_installed, fsem_is_installed - check for monitor.
SYNOPSIS
#include <sys/fem.h>
extern int fem_is_installed(vnode_t *vn, fem_t *mon);
extern int fsem_is_installed(vfs_t *vf, fsem_t *mon);
DESCRIPTION
Fem_is_installed, fsem_is_installed return true (non zero) if the
provided mon is already installed on the provided object; otherwise
they return false (0).
RETURN VALUES
0 - The monitor is not already installed.
1 - The monitor is already installed.
NAME
fem_init - static initialization.
DESCRIPTION
Fem_init builds tables and initial state for file event monitoring;
fem_init must be called before any fem/fsem function; is called from startup().
RETURN VALUES
none.
#include <sys/errno.h>
#include <sys/conf.h>
#include <sys/modctl.h>
#include <sys/open.h>
#include <sys/stat.h>
#include <sys/ddi.h>
#include <sys/cmn_err.h>
#include <sys/sunddi.h>
#include "sys/mutex.h"
#include "sys/condvar.h"
#include "sys/vfs.h"
#include "sys/vnode.h"
#include "sys/fem.h"
#include "sys/file.h"
#include "cmd.h"
static fs_operation_def_t femnul_conf[] = {
{ VOPNAME_OPEN, (femop_t *) vnext_open },
{ VOPNAME_CLOSE, (femop_t *) vnext_close },
{ VOPNAME_READ, (femop_t *) vnext_read },
{ VOPNAME_WRITE, (femop_t *) vnext_write },
{ VOPNAME_IOCTL, (femop_t *) vnext_ioctl },
{ VOPNAME_SETFL, (femop_t *) vnext_setfl },
{ VOPNAME_GETATTR, (femop_t *) vnext_getattr },
{ VOPNAME_SETATTR, (femop_t *) vnext_setattr },
{ VOPNAME_ACCESS, (femop_t *) vnext_access },
{ VOPNAME_LOOKUP, (femop_t *) vnext_lookup },
{ VOPNAME_CREATE, (femop_t *) vnext_create },
{ VOPNAME_REMOVE, (femop_t *) vnext_remove },
{ VOPNAME_LINK, (femop_t *) vnext_link },
{ VOPNAME_RENAME, (femop_t *) vnext_rename },
{ VOPNAME_MKDIR, (femop_t *) vnext_mkdir },
{ VOPNAME_RMDIR, (femop_t *) vnext_rmdir },
{ VOPNAME_READDIR, (femop_t *) vnext_readdir },
{ VOPNAME_SYMLINK, (femop_t *) vnext_symlink },
{ VOPNAME_READLINK, (femop_t *) vnext_readlink },
{ VOPNAME_FSYNC, (femop_t *) vnext_fsync },
{ VOPNAME_INACTIVE, (femop_t *) vnext_inactive },
{ VOPNAME_FID, (femop_t *) vnext_fid },
{ VOPNAME_RWLOCK, (femop_t *) vnext_rwlock },
{ VOPNAME_RWUNLOCK, (femop_t *) vnext_rwunlock },
{ VOPNAME_SEEK, (femop_t *) vnext_seek },
{ VOPNAME_CMP, (femop_t *) vnext_cmp },
{ VOPNAME_FRLOCK, (femop_t *) vnext_frlock },
{ VOPNAME_SPACE, (femop_t *) vnext_space },
{ VOPNAME_REALVP, (femop_t *) vnext_realvp },
{ VOPNAME_GETPAGE, (femop_t *) vnext_getpage },
{ VOPNAME_PUTPAGE, (femop_t *) vnext_putpage },
{ VOPNAME_MAP, (femop_t *) vnext_map },
{ VOPNAME_ADDMAP, (femop_t *) vnext_addmap },
{ VOPNAME_DELMAP, (femop_t *) vnext_delmap },
{ VOPNAME_POLL, (femop_t *) vnext_poll },
{ VOPNAME_DUMP, (femop_t *) vnext_dump },
{ VOPNAME_PATHCONF, (femop_t *) vnext_pathconf },
{ VOPNAME_PAGEIO, (femop_t *) vnext_pageio },
{ VOPNAME_DUMPCTL, (femop_t *) vnext_dumpctl },
{ VOPNAME_DISPOSE, (femop_t *) vnext_dispose },
{ VOPNAME_SETSECATTR, (femop_t *) vnext_setsecattr },
{ VOPNAME_GETSECATTR, (femop_t *) vnext_getsecattr },
{ VOPNAME_SHRLOCK, (femop_t *) vnext_shrlock },
{ NULL, NULL }
};
/*
* Construct a NULL device by placing two copies of the 'vnext' routines
* on the vnode. Two are needed because the basic vnode+monitor:
* fopX -> vheadX -> monitorX -> nextX -> vopX
* but if monitorX = vnextX
* fopX -> vheadX -> vnextX -> vopX
* so we generate two copies of the "null" and push both.
*/
static fem_t *femnull[2];
void
cmdnull_init()
{
int err;
if ((err = fem_create("devmon_fem", femnul_conf, femnull+0)) == 0) {
if ((err = fem_create("devmon2_fem", femnul_conf, femnull+1))
!= 0) {
fem_delete(femnull[0]);
femnull[0] = NULL;
}
}
}
void
cmdnull_fini()
{
if (femnull[0] != NULL) {
fem_delete(femnull[0]);
femnull[0] = NULL;
}
if (femnull[1] != NULL) {
fem_delete(femnull[1]);
femnull[1] = NULL;
}
}
int
cmdnull_add(vnode_t *vn)
{
int e;
if ((e = fem_install(vn, femnull[0], (void *)vn)) == 0) {
if ((e = fem_install(vn, femnull[1], (void *)vn)) != 0) {
fem_uninstall(vn, femnull[0]);
}
}
return (e);
}
void
cmdnull_del(vnode_t *vn)
{
fem_uninstall(vn, femnull[0]);
fem_uninstall(vn, femnull[1]);
}
#include <sys/errno.h>
#include <sys/conf.h>
#include <sys/modctl.h>
#include <sys/open.h>
#include <sys/stat.h>
#include <sys/ddi.h>
#include <sys/cmn_err.h>
#include <sys/sunddi.h>
#include "sys/mutex.h"
#include "sys/condvar.h"
#include "sys/vfs.h"
#include "sys/vnode.h"
#include "sys/fem.h"
#include "sys/file.h"
#include "cmd.h"
#ifndef MAXCMDLEN
#define MAXCMDLEN 255
#endif
static int debugging = 0;
static void *mondev_head; /* opaque handle top of state structs */
Mondev *
dev2mon(dev_t _devno)
{
Mondev *p = ddi_get_soft_state(mondev_head, getminor((_devno)));
if (p == NULL || p->magic != MD_MAGIC) {
cmn_err(CE_NOTE, "Invalid mondev from %u\n", _devno);
return (NULL);
}
return (p);
}
static int monitor_read();
static int monitor_write();
fs_operation_def_t devmon_templ[] = {
{ VOPNAME_READ, monitor_read },
{ VOPNAME_WRITE, monitor_write },
{ NULL, NULL }
};
static fem_t *devmon_femops;
/*
* These are the entry points into our driver that are called when the
* driver is loaded, during a system call, or in response to an interrupt.
*/
static int cmd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
void **result);
dev2mon(dev_t _devno)
{
Mondev *p = ddi_get_soft_state(mondev_head, getminor((_devno)));
if (p == NULL || p->magic != MD_MAGIC) {
cmn_err(CE_NOTE, "Invalid mondev from %u\n", _devno);
return (NULL);
}
return (p);
}
static int monitor_read();
static int monitor_write();
fs_operation_def_t devmon_templ[] = {
{ VOPNAME_READ, monitor_read },
{ VOPNAME_WRITE, monitor_write },
{ NULL, NULL }
};
static fem_t *devmon_femops;
int
_init(void)
{
int error;
if ((error = ddi_soft_state_init(&mondev_head, sizeof (Mondev), 1)) != 0)
return (error);
if ((error = mod_install(&modlinkage)) != 0)
ddi_soft_state_fini(&mondev_head);
else if ((error = fem_create("devmon_fem", devmon_templ,
&devmon_femops)) != 0) {
ddi_soft_state_fini(&mondev_head);
}
return (error);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
int
_fini(void)
{
int status;
if ((status = mod_remove(&modlinkage)) != 0)
return (status);
ddi_soft_state_fini(&mondev_head);
/*fem_delete(devmon_femops); LEAK! */
return (status);
}
/*
* a mechanism to notice events and queue them up.
*/
static
mondev_putev(void *myid, int flags)
{
struct vnode_name *m = myid;
Mondev *d = m->Mondev;
struct fevent *qp;
int n;
int empty;
if ((n = vnam_check(m))) {
return (n);
}
mutex_enter(&d->lock);
empty = (n = d->head) == d->tail;
qp = d->eventq + n;
if (++n == d->qsize) {
n = 0;
}
if (n == d->tail) {
/* Qoverflow */
d->oflow++;
mutex_exit(&d->lock);
return (0);
}
d->head = n;
qp->item = m;
qp->flags = flags;
qp->ts = 0;
if (empty) {
cv_signal(&d->qwait);
}
mutex_exit(&d->lock);
return (1);
}
static int
mondev_getevent(Mondev *dev, struct fevent *e)
{
int n;
mutex_enter(&dev->lock);
if (dev->head == dev->tail) {
mutex_exit(&dev->lock);
return (0);
}
*e = dev->eventq[dev->tail];
if ((n = dev->tail+1) == dev->qsize) {
n = 0;
}
dev->tail = n;
mutex_exit(&dev->lock);
return (1);
}
static int
mondev_wait(Mondev *dev)
{
mutex_enter(&dev->lock);
while (dev->head == dev->tail) {
if (cv_wait_sig(&dev->qwait, &dev->lock) == 0) {
mutex_exit(&dev->lock);
return (-1);
}
}
mutex_exit(&dev->lock);
return (0);
}
static int
monitor_write(femarg_t *arg, struct uio *uiop, int ioflag, struct cred *cr)
{
if (debugging) {
cmn_err(CE_NOTE, "monitor write: fem_node %p, mypointer %p\n",
arg->fa_fnode, arg->fa_fnode->fn_available);
}
mondev_putev(arg->fa_fnode->fn_available, 2);
return (vnext_write(arg, uiop, ioflag, cr));
}
static int
monitor_read(femarg_t *arg, struct uio *uiop, int ioflag, struct cred *cr)
{
if (debugging) {
cmn_err(CE_NOTE, "monitor read: fem_node %p, mypointer %p\n",
arg->fa_fnode, arg->fa_fnode->fn_available);
}
mondev_putev(arg->fa_fnode->fn_available, 1);
return (vnext_read(arg, uiop, ioflag, cr));
}
static void
vnam_link(Mondev *d, struct vnode_name *nam)
{
nam->next = d->vnhead;
d->vnhead = nam;
}
static void
vnam_unlink(Mondev *d, struct vnode_name *nam)
{
struct vnode_name **m;
for (m = &d->vnhead; *m && *m != nam; m = &((*m)->next));
if (*m == nam) {
*m = nam->next;
}
}
static struct vnode_name *
vnam_create(char *name, vnode_t *v, Mondev *mdev)
{
struct vnode_name *vnam;
vnam = kmem_alloc(sizeof(*vnam), KM_SLEEP);
vnam->name = kmem_alloc(MAXCMDLEN, KM_SLEEP);
strcpy(vnam->name, name);
vnam->v = v;
vnam->Mondev = mdev;
mutex_enter(&mdev->lock);
vnam_link(mdev, vnam);
mutex_exit(&mdev->lock);
if (debugging) {
cmn_err(CE_NOTE,
"vnam %p { name %p, vn %p, dev %p, next %p} \n",
vnam, vnam->name, vnam->v, vnam->Mondev, vnam->next);
}
vnam->magic = VD_MAGIC;
return (vnam);
}
#define CHECK(x) if (!(x)) { \
cmn_err(CE_NOTE, "%s - failed\n", #x); \
return (EINVAL); \
}
static int
vnam_check(struct vnode_name *vn)
{
CHECK(vn != NULL);
CHECK(vn->magic == VD_MAGIC);
CHECK(vn->name != NULL);
CHECK(vn->v != NULL);
CHECK(vn->Mondev != NULL);
CHECK(vn->Mondev->magic == MD_MAGIC);
return (0);
}
static void
vnam_release(struct vnode_name *nam)
{
VN_RELE(nam->v);
kmem_free(nam->name, MAXCMDLEN);
kmem_free(nam, sizeof(*nam));
}
static int
parse_request(char *s, int *type, char **fn)
{
int c;
int i;
i = 0;
switch (c = s[i++]) {
case 'r':
case 'w':
case 'a':
break;
default:
return (EINVAL);
}
while (s[i] == ' ') i++;
if (s[i] != '/') {
return (EINVAL); /* must be fully qualified */
}
*type = c;
*fn = s+i;
while (s[i]) i++;
if (s[--i] == '\n') {
s[i] = '\0';
}
return (0);
}
static int
mondev_request(Mondev *mdev, char *s)
{
int c;
int e;
struct vnode *vn;
struct vnode_name *vnam;
char *fn;
if ((e = parse_request(s, &c, &fn)) != 0) {
return (e);
}
if (debugging) {
cmn_err(CE_NOTE, "attaching (%c) to (%s)...\n", c, fn);
}
if ((e = vn_open(fn, UIO_SYSSPACE, FREAD, 0, &vn, 0, 0)) != 0) {
cmn_err(CE_NOTE, "open %s failed - %e\n", fn, e);
return (e);
}
if ((vnam = vnam_create(fn, vn, mdev)) == NULL) {
cmn_err(CE_NOTE, "Unable to create vnam\n");
return (EINVAL);
}
if ((e = fem_install(vn, devmon_femops, (void *)vnam)) != 0) {
cmn_err(CE_NOTE, "fem_install failed - %e\n", fn, e);
mutex_enter(&mdev->lock);
vnam_unlink(mdev, vnam);
mutex_exit(&mdev->lock);
vnam_release(vnam);
return (e);
}
return (0);
}
/*ARGSUSED*/
static int
cmd_probe(dev_info_t *dip)
{
return (DDI_PROBE_DONTCARE);
}
/*
* cmd_open is called in response to the open(2) system call
*/
/*ARGSUSED*/
static int
cmd_open(dev_t *dev, int openflags, int otyp, cred_t *credp)
{
int retval = 0;
Mondev *mondev;
mondev = dev2mon(*dev);
if (mondev == NULL)
return (ENXIO);
if (otyp != OTYP_CHR)
return (EINVAL);
mutex_enter(&mondev->lock);
mondev->state |= CMD_OPEN;
retval = 0;
mutex_exit(&mondev->lock);
return (retval);
}
/*
* cmd_close is called after the last process that has the device open
* calls close(2)
*/
/*ARGSUSED*/
static int
cmd_close(dev_t dev, int openflags, int otyp, cred_t *credp)
{
Mondev *mondev;
struct vnode_name *v;
struct fevent ev;
int e;
mondev = dev2mon(dev);
if (debugging) {
cmn_err(CE_NOTE, "cmd_close [%p]\n", mondev, e);
}
mutex_enter(&mondev->lock);
mondev->state = 0;
while (v = mondev->vnhead) {
vnam_unlink(mondev, v);
mutex_exit(&mondev->lock);
if (e = fem_uninstall(v->v, devmon_femops)) {
if (debugging) {
cmn_err(CE_NOTE,
"fem_uninstall failed - '%d'\n", e);
}
/* Leaks, uninstall shouldn't faile */
} else {
vnam_release(v);
}
mutex_enter(&mondev->lock);
}
mondev->head = mondev->tail = 0; /*Reset*/
mutex_exit(&mondev->lock);
return (0);
}
/*ARGSUSED*/
static int
cmd_write(dev_t dev, struct uio *uiop, cred_t *credp)
{
Mondev *mondev;
char *buf;
unsigned i;
int c;
int n;
if (uiop->uio_resid > MAXCMDLEN) {
return (E2BIG);
}
n = uiop->uio_resid+1;
buf = kmem_alloc(n, KM_SLEEP);
i = 0;
while (uiop->uio_resid > 0) {
if ((c = uwritec(uiop)) == -1) {
cmn_err(CE_NOTE, "cmd_write: invalid write buffer\n");
return (0);
}
buf[i++] = c;
}
buf[i] = 0;
mondev = dev2mon(dev);
c = mondev_request(mondev, buf);
kmem_free(buf, n);
return (c);
}
/* errors not checked!*/
/*ARGSUSED*/
static int
cmd_read(dev_t dev, struct uio *uiop, cred_t *credp)
{
Mondev *mondev;
unsigned i;
int c;
struct fevent ev;
mondev = dev2mon(dev);
if (mondev == NULL) {
cmn_err(CE_NOTE, "cmd_read: dev %u == NULL?\n", dev);
return ENXIO;
}
while (mondev_getevent(mondev, &ev) == 0) {
if (mondev_wait(mondev) == -1) {
return (EINTR);
}
}
if (c=vnam_check(ev.item)) {
return (c);
}
ureadc(ev.flags, uiop);
ureadc(' ', uiop);
for (i = 0; c = ev.item->name[i]; i++) {
ureadc(c, uiop);
}
ureadc('\n', uiop);
return (0);
}
[Alt 2001] Lori Alt, Anton Rang; FSI stacking architecture.
[Graves 2002] Tim Graves; Marketting Requirements Document for modular file system implementation.
[Skinner 1993] Glenn Skinner, Thomas K. Wong; "Stacking" Vnodes: A Progress Report.
[Skinner 1992] Glenn Skinner; ? Glenn Notes on the interposition design ?, personal communication PSARC
PSARC/2001/679 - Vnode Interfaces.
PSARC 1999/289 - NFSv4.
PSARC/2002/544 - NFSv4 Replication and Migration.
PSARC/2000/078 - Event mechanism for Cascade.
PSARC/1992/026 - Stacking vnodes.