From david@gwynne.id.au Mon Jun 10 12:35:19 2013 Return-Path: Received: from animata.net (toy.eait.uq.edu.au. [130.102.64.64]) by mx.google.com with ESMTPSA id nz3sm8475807pbb.33.2013.06.09.19.35.17 for (version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Sun, 09 Jun 2013 19:35:19 -0700 (PDT) Date: Mon, 10 Jun 2013 12:35:15 +1000 From: David Gwynne To: beck@openbsd.org Subject: last trim diff i could find Message-ID: <20130610023515.GH28670@animata.net> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Status: RO X-Status: A Content-Length: 23180 Lines: 833 Index: kern/vfs_bio.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_bio.c,v retrieving revision 1.133 diff -u -p -r1.133 vfs_bio.c --- kern/vfs_bio.c 6 Jul 2011 20:50:05 -0000 1.133 +++ kern/vfs_bio.c 12 Jul 2011 12:25:17 -0000 @@ -902,6 +902,8 @@ start: b.b_lblkno = blkno; bp = RB_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b); if (bp != NULL) { + if (ISSET(bp->b_flags, B_DELETE)) + panic("getting wipe collide"); if (ISSET(bp->b_flags, B_BUSY)) { SET(bp->b_flags, B_WANTED); error = tsleep(bp, slpflag | (PRIBIO + 1), "getblk", @@ -1053,6 +1055,8 @@ buf_get(struct vnode *vp, daddr64_t blkn bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); + if (ISSET(bp->b_flags, B_DELETE)) + panic("inserting wipe collide"); if (RB_INSERT(buf_rb_bufs, &vp->v_bufs_tree, bp)) panic("buf_get: dup lblk vp %p bp %p", vp, bp); } else { Index: scsi/sd.c =================================================================== RCS file: /cvs/src/sys/scsi/sd.c,v retrieving revision 1.237 diff -u -p -r1.237 sd.c --- scsi/sd.c 12 Jul 2011 04:25:49 -0000 1.237 +++ scsi/sd.c 12 Jul 2011 12:25:17 -0000 @@ -110,6 +110,45 @@ void sd_cmd_rw16(struct scsi_xfer *, int void sd_buf_done(struct scsi_xfer *); +void sd_delete(struct sd_softc *, struct buf *); +void sd_unmap(struct sd_softc *, struct sd_unmap_tree *); +void sd_unmap_tmo(void *); +void sd_unmap_task(void *, void *); +void sd_write_same_16(struct sd_softc *, struct sd_unmap_tree *); +void sd_delete_done(struct scsi_xfer *); + +struct sd_unmap_node { + RB_ENTRY(sd_unmap_node) un_entry; + daddr64_t un_addr; + u_int un_len; +}; +RB_HEAD(sd_unmap_rbtree, sd_unmap_node); + +struct sd_unmap_tree { + struct sd_unmap_rbtree ut_tree; + struct bufq_fifo_head ut_bufs; /* this is naughty */ + struct workq_task ut_wqt; + u_int ut_sectors; + u_int ut_descs; +}; + +int sd_unmap_node_cmp(struct sd_unmap_node *, struct sd_unmap_node *); +struct sd_unmap_tree *sd_unmap_tree_new(struct sd_softc *); +struct sd_unmap_node *sd_unmap_node_new(struct sd_softc *, + daddr64_t, u_int nsecs); +void sd_unmap_tree_done(struct sd_softc *, struct sd_unmap_tree *, int); + +void sd_unmap_merge(struct sd_unmap_tree *, + struct sd_unmap_node *, struct sd_unmap_node *); +void sd_unmap_merge_left(struct sd_unmap_tree *, struct sd_unmap_node *); +void sd_unmap_merge_right(struct sd_unmap_tree *, struct sd_unmap_node *); + +RB_PROTOTYPE(sd_unmap_rbtree, sd_unmap_node, un_entry, sd_unmap_node_cmp); + +struct pool sd_unmap_tree_pl; +struct pool sd_unmap_node_pl; +int sd_unmap_pools = 0; + struct cfattach sd_ca = { sizeof(struct sd_softc), sdmatch, sdattach, sddetach, sdactivate @@ -194,9 +233,18 @@ sdattach(struct device *parent, struct d */ printf("\n"); + if (!sd_unmap_pools) { + pool_init(&sd_unmap_tree_pl, sizeof(struct sd_unmap_tree), + 0, 0, 0, "sddeltr", NULL); + pool_init(&sd_unmap_node_pl, sizeof(struct sd_unmap_node), + 0, 0, 0, "sddelnd", NULL); + sd_unmap_pools = 1; + } + rw_init(&sc->sc_unmap_lk, "sddel"); scsi_xsh_set(&sc->sc_xsh, sc_link, sdstart); timeout_set(&sc->sc_timeout, (void (*)(void *))scsi_xsh_add, &sc->sc_xsh); + timeout_set(&sc->sc_unmap_tmo, sd_unmap_tmo, sc); /* Spin up non-UMASS devices ready or not. */ if ((sc->sc_link->flags & SDEV_UMASS) == 0) @@ -533,18 +581,25 @@ sdstrategy(struct buf *bp) goto bad; } + if (ISSET(bp->b_flags, B_DELETE) && ISSET(sc->flags, SDF_THIN)) + bp->b_bcount = bp->b_resid; + /* Validate the request. */ if (bounds_check_with_label(bp, sc->sc_dk.dk_label) == -1) goto done; - /* Place it in the queue of disk activities for this disk. */ - bufq_queue(&sc->sc_bufq, bp); + if (ISSET(bp->b_flags, B_DELETE)) + sd_delete(sc, bp); + else { + /* Place it in the queue of disk activities for this disk. */ + bufq_queue(&sc->sc_bufq, bp); - /* - * Tell the device to get going on the transfer if it's - * not doing anything, otherwise just wait for completion - */ - scsi_xsh_add(&sc->sc_xsh); + /* + * Tell the device to get going on the transfer if it's + * not doing anything, otherwise just wait for completion + */ + scsi_xsh_add(&sc->sc_xsh); + } device_unref(&sc->sc_dev); return; @@ -1607,6 +1662,11 @@ sd_get_parms(struct sd_softc *sc, struct CLR(sc->flags, SDF_THIN); } + SET(sc->flags, SDF_THIN); + sc->params.unmap_sectors = 0xffff; + sc->params.unmap_descs = 64; + sc->sc_delete = sd_unmap; + buf = dma_alloc(sizeof(*buf), PR_NOWAIT); if (buf == NULL) goto validate; @@ -1786,3 +1846,297 @@ sd_flush(struct sd_softc *sc, int flags) scsi_xs_put(xs); } + +int +sd_unmap_node_cmp(struct sd_unmap_node *a, struct sd_unmap_node *b) +{ + if (a->un_addr + a->un_len >= b->un_addr && + b->un_addr + b->un_len >= a->un_addr) + return (0); + + return (a->un_addr - b->un_addr); +} + +struct sd_unmap_tree * +sd_unmap_tree_new(struct sd_softc *sc) +{ + struct sd_unmap_tree *ut; + + ut = pool_get(&sd_unmap_tree_pl, PR_WAITOK | PR_ZERO); + SIMPLEQ_INIT(&ut->ut_bufs); + RB_INIT(&ut->ut_tree); + + sc->sc_unmap_tree = ut; + timeout_add_sec(&sc->sc_unmap_tmo, 1); + return (ut); +} + +struct sd_unmap_node * +sd_unmap_node_new(struct sd_softc *sc, daddr64_t secno, u_int nsecs) +{ + struct sd_unmap_node *un; + + un = pool_get(&sd_unmap_node_pl, PR_WAITOK | PR_ZERO); + un->un_addr = secno; + un->un_len = nsecs; + + return (un); +} + +void +sd_unmap_merge(struct sd_unmap_tree *ut, struct sd_unmap_node *l, + struct sd_unmap_node *r) +{ + l->un_len += r->un_len; + ut->ut_descs--; + + RB_REMOVE(sd_unmap_rbtree, &ut->ut_tree, r); + pool_put(&sd_unmap_node_pl, r); +} + +void +sd_unmap_merge_left(struct sd_unmap_tree *ut, struct sd_unmap_node *un) +{ + struct sd_unmap_node *sn; + + sn = RB_PREV(sd_unmap_rbtree, &ut->ut_tree, un); + if (sn != NULL && sn->un_addr + sn->un_len == un->un_addr) + sd_unmap_merge(ut, sn, un); +} + +void +sd_unmap_merge_right(struct sd_unmap_tree *ut, struct sd_unmap_node *un) +{ + struct sd_unmap_node *sn; + + sn = RB_NEXT(sd_unmap_rbtree, &ut->ut_tree, un); + if (sn != NULL && un->un_addr + un->un_len == sn->un_addr) + sd_unmap_merge(ut, un, sn); +} + +void +sd_delete(struct sd_softc *sc, struct buf *bp) +{ + struct sd_unmap_tree *ut, *dt = NULL; + struct sd_unmap_node *un, unkey; + struct partition *p; + daddr64_t secno; + u_int nsecs; + + secno = bp->b_blkno / (sc->sc_dk.dk_label->d_secsize / DEV_BSIZE); + p = &sc->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)]; + secno += DL_GETPOFFSET(p); + nsecs = howmany(bp->b_bcount, sc->sc_dk.dk_label->d_secsize); + + rw_enter_write(&sc->sc_unmap_lk); + ut = sc->sc_unmap_tree; + if (ut != NULL && ut->ut_sectors + nsecs > sc->params.unmap_sectors) { + dt = ut; + ut = NULL; + } + if (ut == NULL) { + ut = sd_unmap_tree_new(sc); + un = sd_unmap_node_new(sc, secno, nsecs); + RB_INSERT(sd_unmap_rbtree, &ut->ut_tree, un); + ut->ut_sectors = nsecs; + ut->ut_descs = 1; + } else { + memset(&unkey, 0, sizeof(unkey)); + unkey.un_addr = secno; + unkey.un_len = nsecs; + + un = RB_FIND(sd_unmap_rbtree, &ut->ut_tree, &unkey); + if (un == NULL) { + if (ut->ut_descs >= sc->params.unmap_descs) { + dt = ut; + ut = sd_unmap_tree_new(sc); + } + un = sd_unmap_node_new(sc, secno, nsecs); + RB_INSERT(sd_unmap_rbtree, &ut->ut_tree, un); + ut->ut_sectors += nsecs; + ut->ut_descs += 1; + } else { + un->un_len += nsecs; + ut->ut_sectors += nsecs; + + if (secno < un->un_addr) { + un->un_addr = secno; + sd_unmap_merge_left(ut, un); + } else + sd_unmap_merge_right(ut, un); + } + + } + SIMPLEQ_INSERT_TAIL(&ut->ut_bufs, bp, + b_bufq.bufq_data_fifo.bqf_entries); + rw_exit_write(&sc->sc_unmap_lk); + + if (dt != NULL) + sc->sc_delete(sc, dt); +} + +void +sd_unmap_tmo(void *xsc) +{ + struct sd_softc *sc = xsc; + + if (workq_add_task(NULL, 0, sd_unmap_task, sc, NULL) != 0) + timeout_add(&sc->sc_unmap_tmo, 1); +} + +void +sd_unmap_task(void *xsc, void *xarg) +{ + struct sd_softc *sc = xsc; + struct sd_unmap_tree *ut; + + rw_enter_write(&sc->sc_unmap_lk); + ut = sc->sc_unmap_tree; + if (ut != NULL) + sc->sc_unmap_tree = NULL; + rw_exit_write(&sc->sc_unmap_lk); + + if (ut != NULL) { + printf("%s\n", __func__); + sc->sc_delete(sc, ut); + } +} + +void +sd_unmap(struct sd_softc *sc, struct sd_unmap_tree *ut) +{ + struct sd_unmap_node *un; + struct scsi_xfer *xs; + + struct scsi_unmap cdb; + struct scsi_unmap_data *unmap; + struct scsi_unmap_desc *descs, *d; + size_t len; + int i = 0; + + xs = scsi_xs_get(sc->sc_link, 0); + if (xs == NULL) { + sd_unmap_tree_done(sc, ut, ENXIO); + return; + } + + len = sizeof(*unmap) + sizeof(*d) * ut->ut_descs; + unmap = dma_alloc(len, PR_WAITOK | PR_ZERO); + + memset(&cdb, 0, sizeof(cdb)); + cdb.opcode = UNMAP; + _lto2b(len, cdb.list_len); + + _lto2b(len, unmap->data_length); + _lto2b(len - sizeof(*unmap), unmap->desc_length); + + descs = (struct scsi_unmap_desc *)(unmap + 1); + RB_FOREACH(un, sd_unmap_rbtree, &ut->ut_tree) { + printf("%s[%d] %u@%lld\n", __func__, + i, un->un_len, un->un_addr); + d = &descs[i++]; + _lto8b(un->un_addr, d->logical_addr); + _lto8b(un->un_len, d->logical_blocks); + } + + memcpy(xs->cmd, &cdb, sizeof(cdb)); + xs->cmdlen = sizeof(cdb); + xs->flags |= SCSI_DATA_OUT; + xs->timeout = 60000; + xs->data = (void *)unmap; + xs->datalen = len; + xs->done = sd_delete_done; + xs->cookie = ut; + + xs->error = XS_NOERROR; + sd_delete_done(xs); +// scsi_xs_exec(xs); +} + +void +sd_write_same_16(struct sd_softc *sc, struct sd_unmap_tree *ut) +{ + struct sd_unmap_node *un = RB_ROOT(&ut->ut_tree); + struct scsi_xfer *xs; + + struct scsi_write_same_16 cdb; + void *sector; + + xs = scsi_xs_get(sc->sc_link, 0); + if (xs == NULL) { + sd_unmap_tree_done(sc, ut, ENXIO); + return; + } + + sector = dma_alloc(sc->sc_dk.dk_label->d_secsize, PR_WAITOK | PR_ZERO); + + memset(&cdb, 0, sizeof(cdb)); + cdb.opcode = WRITE_SAME_16; + cdb.flags = WRITE_SAME_F_UNMAP; + _lto8b(un->un_addr, cdb.lba); + _lto8b(un->un_len, cdb.length); + + memcpy(xs->cmd, &cdb, sizeof(cdb)); + xs->cmdlen = sizeof(cdb); + xs->flags |= SCSI_DATA_OUT; + xs->timeout = 60000; + xs->data = sector; + xs->datalen = sc->sc_dk.dk_label->d_secsize; + xs->done = sd_delete_done; /* XXX */ + xs->cookie = ut; + + xs->error = XS_NOERROR; + sd_delete_done(xs); +// scsi_xs_exec(xs); +} + +void +sd_delete_done(struct scsi_xfer *xs) +{ + struct sd_softc *sc = xs->sc_link->device_softc; + struct sd_unmap_tree *ut = xs->cookie; + int error; + + switch (xs->error) { + case XS_NOERROR: + error = 0; + break; + default: + printf("%s %d\n", __func__, xs->error); + error = EIO; + break; + } + + dma_free(xs->data, xs->datalen); + scsi_xs_put(xs); + + sd_unmap_tree_done(sc, ut, error); +} + +void +sd_unmap_tree_done(struct sd_softc *sc, struct sd_unmap_tree *ut, int error) +{ + struct sd_unmap_node *un; + struct buf *bp; + int s; + + while ((un = RB_ROOT(&ut->ut_tree)) != NULL) { + RB_REMOVE(sd_unmap_rbtree, &ut->ut_tree, un); + pool_put(&sd_unmap_node_pl, un); + } + while ((bp = SIMPLEQ_FIRST(&ut->ut_bufs)) != NULL) { + SIMPLEQ_REMOVE_HEAD(&ut->ut_bufs, + b_bufq.bufq_data_fifo.bqf_entries); + + bp->b_error = error; + if (error != 0) + bp->b_flags |= B_ERROR; + + s = splbio(); + biodone(bp); + splx(s); + } + pool_put(&sd_unmap_tree_pl, ut); +} + +RB_GENERATE(sd_unmap_rbtree, sd_unmap_node, un_entry, sd_unmap_node_cmp); Index: scsi/sdvar.h =================================================================== RCS file: /cvs/src/sys/scsi/sdvar.h,v retrieving revision 1.38 diff -u -p -r1.38 sdvar.h --- scsi/sdvar.h 11 Jul 2011 06:26:09 -0000 1.38 +++ scsi/sdvar.h 12 Jul 2011 12:25:17 -0000 @@ -48,6 +48,8 @@ */ #ifdef _KERNEL +struct sd_unmap_tree; + struct sd_softc { struct device sc_dev; struct disk sc_dk; @@ -73,6 +75,13 @@ struct sd_softc { struct timeout sc_timeout; struct scsi_xshandler sc_xsh; + + /* thin provisioning */ + void (*sc_delete)(struct sd_softc *, + struct sd_unmap_tree *); + struct rwlock sc_unmap_lk; + struct sd_unmap_tree *sc_unmap_tree; + struct timeout sc_unmap_tmo; }; #define SDGP_RESULT_OK 0 /* parameters obtained */ Index: sys/buf.h =================================================================== RCS file: /cvs/src/sys/sys/buf.h,v retrieving revision 1.78 diff -u -p -r1.78 buf.h --- sys/buf.h 4 Jul 2011 04:30:41 -0000 1.78 +++ sys/buf.h 12 Jul 2011 12:25:17 -0000 @@ -214,12 +214,13 @@ struct buf { #define B_PDAEMON 0x00200000 /* I/O started by pagedaemon */ #define B_RELEASED 0x00400000 /* free this buffer after its kvm */ #define B_NOTMAPPED 0x00800000 /* BUSY, but not necessarily mapped */ +#define B_DELETE 0x01000000 /* Free underlying storage space */ #define B_BITS "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \ "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \ "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \ "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \ - "\025SCANNED\026DAEMON\027RELEASED\030NOTMAPPED" + "\025SCANNED\026DAEMON\027RELEASED\030NOTMAPPED\031B_DELETE" /* * This structure describes a clustered I/O. It is stored in the b_saveaddr Index: ufs/ffs/ffs_alloc.c =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_alloc.c,v retrieving revision 1.91 diff -u -p -r1.91 ffs_alloc.c --- ufs/ffs/ffs_alloc.c 4 Jul 2011 04:30:41 -0000 1.91 +++ ufs/ffs/ffs_alloc.c 12 Jul 2011 12:25:17 -0000 @@ -51,6 +51,7 @@ #include #include #include +#include #include @@ -70,7 +71,7 @@ } while (0) daddr64_t ffs_alloccg(struct inode *, int, daddr64_t, int); -struct buf * ffs_cgread(struct fs *, struct inode *, int); +struct buf * ffs_cgread(struct fs *, struct vnode *, int); daddr64_t ffs_alloccgblk(struct inode *, struct buf *, daddr64_t); daddr64_t ffs_clusteralloc(struct inode *, int, daddr64_t, int); ino_t ffs_dirpref(struct inode *); @@ -1184,11 +1185,11 @@ ffs_hashalloc(struct inode *ip, int cg, } struct buf * -ffs_cgread(struct fs *fs, struct inode *ip, int cg) +ffs_cgread(struct fs *fs, struct vnode *devvp, int cg) { struct buf *bp; - if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + if (bread(devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, &bp)) { brelse(bp); return (NULL); @@ -1227,7 +1228,7 @@ ffs_fragextend(struct inode *ip, int cg, return (0); } - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1284,7 +1285,7 @@ ffs_alloccg(struct inode *ip, int cg, da if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) return (0); - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1438,7 +1439,7 @@ ffs_clusteralloc(struct inode *ip, int c if (fs->fs_maxcluster[cg] < len) return (0); - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1551,7 +1552,7 @@ ffs_nodealloccg(struct inode *ip, int cg if (fs->fs_cs(fs, cg).cs_nifree == 0) return (0); - if (!(bp = ffs_cgread(fs, ip, cg))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1693,30 +1694,121 @@ gotit: * free map. If a fragment is deallocated, a possible * block reassembly is checked. */ +/* + * To support intelligent drives that want to know about free space, + * we split this operation into two parts. First, we notify the drive + * that the free space is available. Only when that's done do we + * actually mark the space free in the filesystem. + */ +struct blkfree_args { + struct buf buf; + struct workq_task wqt; + struct fs *fs; + ino_t ino; + uid_t uid; + dev_t idev; + struct vnode *devvp; + daddr64_t bno; + long size; +}; +struct pool blkfreeargs_pool; +void ffs_blkfree_biodone(struct buf *); +void ffs_blkfree_postwipe(void *, void *); +#define WIPE_EXTRA_MEM 512 + +void ffs_blkfree_init(void) +{ + pool_init(&blkfreeargs_pool, sizeof(struct blkfree_args), 0, 0, 0, + "blkfreepl", &pool_allocator_nointr); +} + +/* + * Set up the wipe operation. This function can sleep. + */ void ffs_blkfree(struct inode *ip, daddr64_t bno, long size) { + struct blkfree_args *args; + struct mount *mp; + struct ufsmount *ump; + struct vnode *devvp; struct fs *fs; + daddr64_t diskblk; + + fs = ip->i_fs; + mp = ITOV(ip)->v_mount; + ump = mp->mnt_data; + devvp = ump->um_devvp; + diskblk = fsbtodb(fs, bno); + + args = pool_get(&blkfreeargs_pool, PR_WAITOK | PR_ZERO); + args->fs = ip->i_fs; + args->ino = ip->i_number; + args->uid = DIP(ip, uid); + args->idev = ip->i_dev; + args->devvp = ip->i_devvp; + args->bno = bno; + args->size = size; + + if (size < fs->fs_bsize) { + ffs_blkfree_postwipe(args, NULL); + return; + } + + args->buf.b_flags = B_DELETE | B_CALL | B_PHYS; + args->buf.b_iodone = ffs_blkfree_biodone; + args->buf.b_blkno = diskblk; + args->buf.b_resid = size; + args->buf.b_dev = ump->um_dev; + args->buf.b_vp = devvp; + devvp->v_numoutput++; + + VOP_STRATEGY(&args->buf); +} + +/* + * biodone callback. This may not sleep, so we queue the rest of the work. + */ +void +ffs_blkfree_biodone(struct buf *v) +{ + struct blkfree_args *args = (struct blkfree_args *)v; + workq_queue_task(NULL, &args->wqt, 0, ffs_blkfree_postwipe, v, NULL); +} + +/* + * The original FFS blkfree code. This function may sleep. + */ +void +ffs_blkfree_postwipe(void *v, void *unused) +{ + struct blkfree_args *args = (struct blkfree_args *)v; + struct fs *fs = args->fs; + ino_t ino = args->ino; + uid_t uid = args->uid; + dev_t idev = args->idev; + struct vnode *devvp = args->devvp; + daddr64_t bno = args->bno; + long size = args->size; struct cg *cgp; struct buf *bp; daddr64_t blkno; int i, cg, blk, frags, bbase; - fs = ip->i_fs; if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n", - ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); + idev, fs->fs_bsize, size, fs->fs_fsmnt); panic("ffs_blkfree: bad size"); } cg = dtog(fs, bno); if ((u_int)bno >= fs->fs_size) { - printf("bad block %lld, ino %u\n", bno, ip->i_number); - ffs_fserr(fs, DIP(ip, uid), "bad block"); - return; + printf("bad block %lld, ino %u\n", bno, ino); + ffs_fserr(fs, uid, "bad block"); + goto out; } - if (!(bp = ffs_cgread(fs, ip, cg))) - return; + if (!(bp = ffs_cgread(fs, devvp, cg))) + goto out; cgp = (struct cg *)bp->b_data; cgp->cg_ffs2_time = cgp->cg_time = time_second; @@ -1726,7 +1818,7 @@ ffs_blkfree(struct inode *ip, daddr64_t blkno = fragstoblks(fs, bno); if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) { printf("dev = 0x%x, block = %lld, fs = %s\n", - ip->i_dev, bno, fs->fs_fsmnt); + idev, bno, fs->fs_fsmnt); panic("ffs_blkfree: freeing free block"); } ffs_setblock(fs, cg_blksfree(cgp), blkno); @@ -1755,7 +1847,7 @@ ffs_blkfree(struct inode *ip, daddr64_t for (i = 0; i < frags; i++) { if (isset(cg_blksfree(cgp), bno + i)) { printf("dev = 0x%x, block = %lld, fs = %s\n", - ip->i_dev, bno + i, fs->fs_fsmnt); + idev, bno + i, fs->fs_fsmnt); panic("ffs_blkfree: freeing free frag"); } setbit(cg_blksfree(cgp), bno + i); @@ -1790,6 +1882,8 @@ ffs_blkfree(struct inode *ip, daddr64_t } fs->fs_fmod = 1; bdwrite(bp); +out: + pool_put(&blkfreeargs_pool, args); } int @@ -1823,7 +1917,7 @@ ffs_freefile(struct inode *pip, ino_t in pip->i_dev, ino, fs->fs_fsmnt); cg = ino_to_cg(fs, ino); - if (!(bp = ffs_cgread(fs, pip, cg))) + if (!(bp = ffs_cgread(fs, pip->i_devvp, cg))) return (0); cgp = (struct cg *)bp->b_data; @@ -1874,7 +1968,7 @@ ffs_checkblk(struct inode *ip, daddr64_t if ((u_int)bno >= fs->fs_size) panic("ffs_checkblk: bad block %lld", bno); - if (!(bp = ffs_cgread(fs, ip, dtog(fs, bno)))) + if (!(bp = ffs_cgread(fs, ip->i_devvp, dtog(fs, bno)))) return (0); cgp = (struct cg *)bp->b_data; Index: ufs/ffs/ffs_extern.h =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_extern.h,v retrieving revision 1.37 diff -u -p -r1.37 ffs_extern.h --- ufs/ffs/ffs_extern.h 21 Dec 2010 20:14:44 -0000 1.37 +++ ufs/ffs/ffs_extern.h 12 Jul 2011 12:25:17 -0000 @@ -114,6 +114,7 @@ int64_t ffs2_blkpref(struct inode *, dad #endif void ffs_blkfree(struct inode *, daddr64_t, long); void ffs_clusteracct(struct fs *, struct cg *, daddr64_t, int); +void ffs_blkfree_init(void); /* ffs_balloc.c */ int ffs_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **); @@ -200,3 +201,4 @@ extern struct pool ffs_dinode1_pool; /* #ifdef FFS2 extern struct pool ffs_dinode2_pool; /* memory pool for UFS2 dinodes */ #endif +extern struct pool blkfreeargs_pool; Index: ufs/ffs/ffs_vfsops.c =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v retrieving revision 1.133 diff -u -p -r1.133 ffs_vfsops.c --- ufs/ffs/ffs_vfsops.c 4 Jul 2011 20:35:35 -0000 1.133 +++ ufs/ffs/ffs_vfsops.c 12 Jul 2011 12:25:17 -0000 @@ -1488,6 +1488,7 @@ ffs_init(struct vfsconf *vfsp) pool_init(&ffs_dinode2_pool, sizeof(struct ufs2_dinode), 0, 0, 0, "dino2pl", &pool_allocator_nointr); #endif + ffs_blkfree_init(); softdep_initialize(); Index: ufs/mfs/mfs_vnops.c =================================================================== RCS file: /cvs/src/sys/ufs/mfs/mfs_vnops.c,v retrieving revision 1.42 diff -u -p -r1.42 mfs_vnops.c --- ufs/mfs/mfs_vnops.c 4 Jul 2011 20:35:35 -0000 1.42 +++ ufs/mfs/mfs_vnops.c 12 Jul 2011 12:25:17 -0000 @@ -47,6 +47,9 @@ #include #include +#include + + /* mfs vnode operations. */ struct vops mfs_vops = { .vop_lookup = mfs_badop, @@ -85,6 +88,29 @@ struct vops mfs_vops = { .vop_bwrite = vop_generic_bwrite }; +void mfs_wipe(struct mfsnode *mfsp, struct buf *bp); + +void +mfs_wipe(struct mfsnode *mfsp, struct buf *bp) +{ + struct proc *p; + long offset = bp->b_blkno << DEV_BSHIFT; + vsize_t size = bp->b_resid; + vaddr_t addr; + + p = pfind(mfsp->mfs_pid); + addr = (vaddr_t)mfsp->mfs_baseoff + offset; + + /* don't unmap pieces of pages */ + if (addr & PAGE_MASK) + return; + size = size - (size & PAGE_MASK); + + if (size > 0) + uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, + PGO_FREE); +} + /* * Vnode Operations. * @@ -166,7 +192,9 @@ mfs_doio(struct mfsnode *mfsp, struct bu bp->b_bcount = mfsp->mfs_size - offset; base = mfsp->mfs_baseoff + offset; - if (bp->b_flags & B_READ) + if (bp->b_flags & B_DELETE) + mfs_wipe(mfsp, bp); + else if (bp->b_flags & B_READ) bp->b_error = copyin(base, bp->b_data, bp->b_bcount); else bp->b_error = copyout(bp->b_data, base, bp->b_bcount);