/* $NetBSD: lfs_rfw.c,v 1.40 2025/10/20 04:20:37 perseant Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2025 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Konrad E. Schroder . * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.40 2025/10/20 04:20:37 perseant Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Roll-forward code. */ static bool all_selector(void *, struct vnode *); static void drop_vnode_pages(struct mount *, struct lwp *); static void update_inoblk_copy_dinode(struct lfs *, union lfs_dinode *, const union lfs_dinode *); static int update_inogen(struct lfs_inofuncarg *); static int update_inoblk(struct lfs_inofuncarg *); static int ino_func_setclean(struct lfs_inofuncarg *); static int finfo_func_rfw(struct lfs_finfofuncarg *); static int finfo_func_rewrite(struct lfs_finfofuncarg *); static int finfo_func_setclean(struct lfs_finfofuncarg *); static int update_meta(struct lfs *, ino_t, int, daddr_t, daddr_t, size_t, struct lwp *l); static int skip_superblock(struct lfs *, daddr_t *); static int rewrite_block(struct lfs *, struct vnode *, daddr_t, daddr_t, size_t, int *); #if 0 static bool lfs_isseq(const struct lfs *fs, long int lbn1, long int lbn2); #endif extern int lfs_do_rfw; int rblkcnt; int lfs_rfw_max_psegs = 0; /* * Allocate a particular inode with a particular version number, freeing * any previous versions of this inode that may have gone before. * Used by the roll-forward code. * * XXX this function does not have appropriate locking to be used on a live fs; * XXX but something similar could probably be used for an "undelete" call. * * Called with the Ifile inode locked. */ int lfs_rf_valloc(struct lfs *fs, ino_t ino, int vers, struct lwp *l, struct vnode **vpp, union lfs_dinode *dip) { struct vattr va; struct vnode *vp; struct inode *ip; int error; KASSERT(ino > LFS_IFILE_INUM); ASSERT_SEGLOCK(fs); /* XXX it doesn't, really */ /* * First, just try a vget. If the version number is the one we want, * we don't have to do anything else. If the version number is wrong, * take appropriate action. */ error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp); if (error == 0) { DLOG((DLOG_RF, "lfs_rf_valloc[1]: ino %d vp %p\n", (int)ino, vp)); *vpp = vp; ip = VTOI(vp); DLOG((DLOG_RF, " ip->i_gen=%jd dip nlink %jd seeking" " version %jd\n", (intmax_t)ip->i_gen, (intmax_t)(dip == NULL ? -1 : lfs_dino_getnlink(fs, dip)), (intmax_t)vers)); if (ip->i_gen == vers) { /* * We have what we wanted already. */ DLOG((DLOG_RF, " pre-existing\n")); return 0; } else if (ip->i_gen < vers && dip != NULL && lfs_dino_getnlink(fs, dip) > 0) { /* * We have found a newer version. Truncate * the old vnode to zero and re-initialize * from the given dinode. */ DLOG((DLOG_RF, " replace old version %jd\n", (intmax_t)ip->i_gen)); lfs_truncate(vp, (off_t)0, 0, NOCRED); ip->i_gen = vers; vp->v_type = IFTOVT(lfs_dino_getmode(fs, dip)); update_inoblk_copy_dinode(fs, ip->i_din, dip); LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE); return 0; } else { /* * Not the right version and nothing to * initialize from. Don't recover this data. */ DLOG((DLOG_RF, "ino %d: sought version %d, got %d\n", (int)ino, (int)vers, (int)lfs_dino_getgen(fs, ip->i_din))); vput(vp); *vpp = NULLVP; return EEXIST; } } /* * No version of this inode was found in the cache. * Make a new one from the dinode. We will add data blocks * as they come in, so scrub any block addresses off of the * inode and reset block counts to zero. */ if (dip == NULL) return ENOENT; vattr_null(&va); va.va_type = IFTOVT(lfs_dino_getmode(fs, dip)); va.va_mode = lfs_dino_getmode(fs, dip) & ALLPERMS; va.va_fileid = ino; va.va_gen = vers; error = vcache_new(fs->lfs_ivnode->v_mount, NULL, &va, NOCRED, NULL, &vp); if (error) return error; error = vn_lock(vp, LK_EXCLUSIVE); if (error) goto err; ip = VTOI(vp); update_inoblk_copy_dinode(fs, ip->i_din, dip); DLOG((DLOG_RF, "lfs_valloc[2] ino %d vp %p size=%lld effnblks=%d," " blocks=%d\n", (int)ino, vp, (long long)ip->i_size, (int)ip->i_lfs_effnblks, (int)lfs_dino_getblocks(fs, ip->i_din))); *vpp = vp; return 0; err: vrele(vp); *vpp = NULLVP; return error; } /* * Load the appropriate indirect block, and change the appropriate pointer. * Mark the block dirty. Do segment and avail accounting. */ static int update_meta(struct lfs *fs, ino_t ino, int vers, daddr_t lbn, daddr_t ndaddr, size_t size, struct lwp *l) { int error; struct vnode *vp; struct inode *ip; daddr_t odaddr; struct indir a[ULFS_NIADDR]; int num; struct buf *bp; SEGUSE *sup; u_int64_t newsize, loff; KASSERT(lbn >= 0); /* no indirect blocks */ KASSERT(ino > LFS_IFILE_INUM); DLOG((DLOG_RF, "update_meta: ino %d lbn %d size %d at 0x%jx\n", (int)ino, (int)lbn, (int)size, (uintmax_t)ndaddr)); if ((error = lfs_rf_valloc(fs, ino, vers, l, &vp, NULL)) != 0) return error; ip = VTOI(vp); /* * If block already exists, note its new location * but do not account it as new. */ ulfs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL); if (odaddr == UNASSIGNED) { if ((error = lfs_balloc(vp, (lbn << lfs_sb_getbshift(fs)), size, NOCRED, 0, &bp)) != 0) { vput(vp); return (error); } /* No need to write, the block is already on disk */ if (bp->b_oflags & BO_DELWRI) { LFS_UNLOCK_BUF(bp); /* Account recovery of the previous version */ lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); } brelse(bp, BC_INVAL); DLOG((DLOG_RF, "balloc ip->i_lfs_effnblks = %d," " lfs_dino_getblocks(fs, ip->i_din) = %d\n", (int)ip->i_lfs_effnblks, (int)lfs_dino_getblocks(fs, ip->i_din))); } else { /* XXX fragextend? */ DLOG((DLOG_RF, "block exists, no balloc\n")); } /* * Extend the file, if it is not large enough already. * XXX This is not exactly right, we don't know how much of the * XXX last block is actually used. * * XXX We should be able to encode the actual data length of the * XXX last block in fi_lastlength, since we can infer the * XXX necessary block length from that using a variant of * XXX lfs_blksize(). */ loff = lfs_lblktosize(fs, lbn); if (loff >= (ULFS_NDADDR << lfs_sb_getbshift(fs))) { /* No fragments */ newsize = loff + 1; } else { /* Subtract only a fragment to account for block size */ newsize = loff + size - lfs_fsbtob(fs, 1) + 1; } if (ip->i_size < newsize) { DLOG((DLOG_RF, "ino %d size %d -> %d\n", (int)ino, (int)ip->i_size, (int)newsize)); lfs_dino_setsize(fs, ip->i_din, newsize); ip->i_size = newsize; /* * tell vm our new size for the case the inode won't * appear later. */ uvm_vnp_setsize(vp, newsize); } lfs_update_single(fs, NULL, vp, lbn, ndaddr, size); LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, ndaddr), bp); sup->su_nbytes += size; LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, ndaddr), bp); /* differences here should be due to UNWRITTEN indirect blocks. */ if (vp->v_type != VLNK) { if (!(ip->i_lfs_effnblks >= lfs_dino_getblocks(fs, ip->i_din)) #if 0 || !(lfs_lblkno(fs, ip->i_size) > ULFS_NDADDR || ip->i_lfs_effnblks == lfs_dino_getblocks(fs, ip->i_din)) #endif /* 0 */ ) { vprint("vnode", vp); printf("effnblks=%jd dino_getblocks=%jd\n", (intmax_t)ip->i_lfs_effnblks, (intmax_t)lfs_dino_getblocks(fs, ip->i_din)); } KASSERT(ip->i_lfs_effnblks >= lfs_dino_getblocks(fs, ip->i_din)); #if 0 KASSERT(lfs_lblkno(fs, ip->i_size) > ULFS_NDADDR || ip->i_lfs_effnblks == lfs_dino_getblocks(fs, ip->i_din)); #endif /* 0 */ } #ifdef DEBUG /* Now look again to make sure it worked */ ulfs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL); if (LFS_DBTOFSB(fs, odaddr) != ndaddr) DLOG((DLOG_RF, "update_meta: failed setting ino %jd lbn %jd" " to %jd\n", (intmax_t)ino, (intmax_t)lbn, (intmax_t)ndaddr)); #endif /* DEBUG */ vput(vp); return 0; } /* * Copy some the fields of the dinode as needed by update_inoblk(). */ static void update_inoblk_copy_dinode(struct lfs *fs, union lfs_dinode *dstu, const union lfs_dinode *srcu) { if (fs->lfs_is64) { struct lfs64_dinode *dst = &dstu->u_64; const struct lfs64_dinode *src = &srcu->u_64; unsigned i; /* * Copy everything but the block pointers and di_blocks. * XXX what about di_extb? */ dst->di_mode = src->di_mode; dst->di_nlink = src->di_nlink; dst->di_uid = src->di_uid; dst->di_gid = src->di_gid; dst->di_blksize = src->di_blksize; dst->di_size = src->di_size; dst->di_atime = src->di_atime; dst->di_mtime = src->di_mtime; dst->di_ctime = src->di_ctime; dst->di_birthtime = src->di_birthtime; dst->di_mtimensec = src->di_mtimensec; dst->di_atimensec = src->di_atimensec; dst->di_ctimensec = src->di_ctimensec; dst->di_birthnsec = src->di_birthnsec; dst->di_gen = src->di_gen; dst->di_kernflags = src->di_kernflags; dst->di_flags = src->di_flags; dst->di_extsize = src->di_extsize; dst->di_modrev = src->di_modrev; dst->di_inumber = src->di_inumber; for (i = 0; i < __arraycount(src->di_spare); i++) { dst->di_spare[i] = src->di_spare[i]; } /* Short symlinks store their data in di_db. */ if ((src->di_mode & LFS_IFMT) == LFS_IFLNK && src->di_size < lfs_sb_getmaxsymlinklen(fs)) { memcpy(dst->di_db, src->di_db, src->di_size); } } else { struct lfs32_dinode *dst = &dstu->u_32; const struct lfs32_dinode *src = &srcu->u_32; /* Get mode, link count, size, and times */ memcpy(dst, src, offsetof(struct lfs32_dinode, di_db[0])); /* Then the rest, except di_blocks */ dst->di_flags = src->di_flags; dst->di_gen = src->di_gen; dst->di_uid = src->di_uid; dst->di_gid = src->di_gid; dst->di_modrev = src->di_modrev; /* Short symlinks store their data in di_db. */ if ((src->di_mode & LFS_IFMT) == LFS_IFLNK && src->di_size < lfs_sb_getmaxsymlinklen(fs)) { memcpy(dst->di_db, src->di_db, src->di_size); } } } static int update_inoblk(struct lfs_inofuncarg *lifa) { struct lfs *fs; daddr_t offset, daddr; struct lwp *l; struct vnode *devvp, *vp; struct inode *ip; union lfs_dinode *dip; struct buf *dbp, *ibp; int error; IFILE *ifp; SEGUSE *sup; unsigned i, num; uint32_t gen, osn, nsn; char *buf; fs = lifa->fs; offset = lifa->offset; l = lifa->l; devvp = VTOI(fs->lfs_ivnode)->i_devvp; /* * Get the inode, update times and perms. * DO NOT update disk blocks, we do that separately. */ error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 0, &dbp); if (error) { DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error)); return error; } buf = malloc(dbp->b_bcount, M_SEGMENT, M_WAITOK); memcpy(buf, dbp->b_data, dbp->b_bcount); brelse(dbp, BC_AGE); num = LFS_INOPB(fs); for (i = num; i-- > 0; ) { dip = DINO_IN_BLOCK(fs, buf, i); if (lfs_dino_getinumber(fs, dip) <= LFS_IFILE_INUM) continue; /* Check generation number */ LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp); gen = lfs_if_getversion(fs, ifp); brelse(ibp, 0); if (lfs_dino_getgen(fs, dip) < gen) { continue; } /* * This inode is the newest generation. Load it. */ error = lfs_rf_valloc(fs, lfs_dino_getinumber(fs, dip), lfs_dino_getgen(fs, dip), l, &vp, dip); if (error) { DLOG((DLOG_RF, "update_inoblk: lfs_rf_valloc" " returned %d\n", error)); continue; } ip = VTOI(vp); if (lfs_dino_getsize(fs, dip) != ip->i_size && vp->v_type != VLNK) { /* XXX What should we do with symlinks? */ DLOG((DLOG_RF, " ino %jd size %jd -> %jd\n", (intmax_t)lfs_dino_getinumber(fs, dip), (intmax_t)ip->i_size, (intmax_t)lfs_dino_getsize(fs, dip))); lfs_truncate(vp, lfs_dino_getsize(fs, dip), 0, NOCRED); } update_inoblk_copy_dinode(fs, ip->i_din, dip); ip->i_flags = lfs_dino_getflags(fs, dip); ip->i_gen = lfs_dino_getgen(fs, dip); ip->i_uid = lfs_dino_getuid(fs, dip); ip->i_gid = lfs_dino_getgid(fs, dip); ip->i_mode = lfs_dino_getmode(fs, dip); ip->i_nlink = lfs_dino_getnlink(fs, dip); ip->i_size = lfs_dino_getsize(fs, dip); LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE); /* Re-initialize to get type right */ ulfs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, &vp); /* Record change in location */ LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp); daddr = lfs_if_getdaddr(fs, ifp); lfs_if_setdaddr(fs, ifp, offset); error = LFS_BWRITE_LOG(ibp); /* Ifile */ /* And do segment accounting */ osn = lfs_dtosn(fs, daddr); nsn = lfs_dtosn(fs, offset); if (DADDR_IS_BAD(daddr) || osn != nsn) { if (!DADDR_IS_BAD(daddr)) { LFS_SEGENTRY(sup, fs, osn, ibp); sup->su_nbytes -= DINOSIZE(fs); LFS_WRITESEGENTRY(sup, fs, osn, ibp); } LFS_SEGENTRY(sup, fs, nsn, ibp); sup->su_nbytes += DINOSIZE(fs); LFS_WRITESEGENTRY(sup, fs, nsn, ibp); } vput(vp); } free(buf, M_SEGMENT); return 0; } /* * Note the highest generation number of each inode in the Ifile. * This allows us to skip processing data for intermediate versions. */ static int update_inogen(struct lfs_inofuncarg *lifa) { struct lfs *fs; daddr_t offset; struct vnode *devvp; union lfs_dinode *dip; struct buf *dbp, *ibp; int error; IFILE *ifp; unsigned i, num; fs = lifa->fs; offset = lifa->offset; devvp = VTOI(fs->lfs_ivnode)->i_devvp; /* Read inode block */ error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 0, &dbp); if (error) { DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error)); return error; } /* Check each inode against ifile entry */ num = LFS_INOPB(fs); for (i = num; i-- > 0; ) { dip = DINO_IN_BLOCK(fs, dbp->b_data, i); if (lfs_dino_getinumber(fs, dip) == LFS_IFILE_INUM) continue; /* Update generation number */ LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp); if (lfs_if_getversion(fs, ifp) < lfs_dino_getgen(fs, dip)) lfs_if_setversion(fs, ifp, lfs_dino_getgen(fs, dip)); error = LFS_BWRITE_LOG(ibp); /* Ifile */ if (error) break; } brelse(dbp, 0); return error; } static int finfo_func_rfw(struct lfs_finfofuncarg *lffa) { struct lfs *fs; FINFO *fip; daddr_t *offsetp; struct lwp *l; int j; size_t size; fs = lffa->fs; fip = lffa->finfop; offsetp = lffa->offsetp; l = lffa->l; size = lfs_sb_getbsize(fs); for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) { if (j == lfs_fi_getnblocks(fs, fip) - 1) size = lfs_fi_getlastlength(fs, fip); /* Account for and update any direct blocks */ if (lfs_fi_getino(fs, fip) > LFS_IFILE_INUM && lfs_fi_getblock(fs, fip, j) >= 0) { update_meta(fs, lfs_fi_getino(fs, fip), lfs_fi_getversion(fs, fip), lfs_fi_getblock(fs, fip, j), *offsetp, size, l); ++rblkcnt; } *offsetp += lfs_btofsb(fs, size); } return 0; } static int skip_superblock(struct lfs *fs, daddr_t *offsetp) { daddr_t offset; int i; /* * If this is segment 0, skip the label. * If the segment has a superblock and we're at the top * of the segment, skip the superblock. */ offset = *offsetp; if (offset == lfs_sb_gets0addr(fs)) { offset += lfs_btofsb(fs, LFS_LABELPAD); } for (i = 0; i < LFS_MAXNUMSB; i++) { if (offset == lfs_sb_getsboff(fs, i)) { offset += lfs_btofsb(fs, LFS_SBPAD); break; } } *offsetp = offset; return 0; } /* * Read the partial sement at offset. * * If finfo_func and ino_func are both NULL, check the summary * and data checksums. During roll forward, this must be done in its * entirety before processing any blocks. * * If finfo_func is given, use that to process every file block * in the segment summary. If ino_func is given, use that to process * every inode block. */ #define CKSEG_NONE 0x0000 #define CKSEG_CKSUM 0x0001 #define CKSEG_AVAIL 0x0002 int lfs_parse_pseg(struct lfs *fs, daddr_t *offsetp, u_int64_t nextserial, kauth_cred_t cred, int *pseg_flags, struct lwp *l, int (*ino_func)(struct lfs_inofuncarg *), int (*finfo_func)(struct lfs_finfofuncarg *), int flags, void *arg) { struct vnode *devvp; struct buf *bp, *dbp; int error, ninos, i, j; SEGSUM *ssp; daddr_t offset, prevoffset; IINFO *iip; FINFO *fip; size_t size; uint32_t datasum, foundsum; char *buf; struct lfs_inofuncarg lifa; struct lfs_finfofuncarg lffa; KASSERT(fs != NULL); KASSERT(offsetp != NULL); devvp = VTOI(fs->lfs_ivnode)->i_devvp; /* Set up callback arguments */ lifa.fs = fs; /* lifa.offset = offset; */ lifa.cred = cred; lifa.l = l; lifa.buf = malloc(lfs_sb_getbsize(fs), M_SEGMENT, M_WAITOK); lifa.arg = arg; lffa.fs = fs; /* lffa.offsetp = offsetp; */ /* lffa.finfop = finfop; */ lffa.cred = cred; lffa.l = l; lffa.arg = arg; prevoffset = *offsetp; skip_superblock(fs, offsetp); offset = *offsetp; /* Read in the segment summary */ buf = malloc(lfs_sb_getsumsize(fs), M_SEGMENT, M_WAITOK); error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getsumsize(fs), 0, &bp); if (error) goto err; memcpy(buf, bp->b_data, bp->b_bcount); brelse(bp, BC_AGE); ssp = (SEGSUM *)buf; /* * Phase I: Check summary checksum. */ if (flags & CKSEG_CKSUM) { size_t sumstart; if (lfs_ss_getmagic(fs, ssp) != SS_MAGIC) { DLOG((DLOG_RF, "Bad magic at 0x%" PRIx64 "\n", offset)); offset = -1; goto err; } sumstart = lfs_ss_getsumstart(fs); if (lfs_ss_getsumsum(fs, ssp) != cksum((char *)ssp + sumstart, lfs_sb_getsumsize(fs) - sumstart)) { DLOG((DLOG_RF, "Sumsum error at 0x%" PRIx64 "\n", offset)); offset = -1; goto err; } #if 0 /* * Under normal conditions, we should never be producing * a partial segment with neither inode blocks nor data blocks. * However, these do sometimes appear and they need not * prevent us from continuing. */ if (lfs_ss_getnfinfo(fs, ssp) == 0 && lfs_ss_getninos(fs, ssp) == 0) { DLOG((DLOG_RF, "Empty pseg at 0x%" PRIx64 "\n", offset)); offset = -1; goto err; } #endif /* 0 */ if (lfs_sb_getversion(fs) == 1) { if (lfs_ss_getcreate(fs, ssp) < lfs_sb_gettstamp(fs)) { DLOG((DLOG_RF, "Old data at 0x%" PRIx64 "\n", offset)); offset = -1; goto err; } } else { if (nextserial > 0 && lfs_ss_getserial(fs, ssp) != nextserial) { DLOG((DLOG_RF, "Serial number at 0x%jx given as 0x%jx," " expected 0x%jx\n", (intmax_t)offset, (intmax_t)lfs_ss_getserial(fs, ssp), (intmax_t)nextserial)); offset = -1; goto err; } if (lfs_ss_getident(fs, ssp) != lfs_sb_getident(fs)) { DLOG((DLOG_RF, "Incorrect fsid (0x%x vs 0x%x) at 0x%" PRIx64 "\n", lfs_ss_getident(fs, ssp), lfs_sb_getident(fs), offset)); offset = -1; goto err; } } } if (pseg_flags) *pseg_flags = lfs_ss_getflags(fs, ssp); offset += lfs_btofsb(fs, lfs_sb_getsumsize(fs)); /* Handle individual blocks */ foundsum = 0; ninos = howmany(lfs_ss_getninos(fs, ssp), LFS_INOPB(fs)); iip = SEGSUM_IINFOSTART(fs, buf); fip = SEGSUM_FINFOBASE(fs, (SEGSUM *)buf); KASSERT(lfs_ss_getnfinfo(fs, ssp) <= lfs_sb_getssize(fs) / lfs_sb_getfsize(fs)); KASSERT(lfs_ss_getnfinfo(fs, ssp) <= lfs_sb_getfsize(fs) / sizeof(FINFO32)); for (i = 0; i < lfs_ss_getnfinfo(fs, ssp) || ninos; ++i) { /* Inode block? */ if (ninos && lfs_ii_getblock(fs, iip) == offset) { if (flags & CKSEG_CKSUM) { /* Read in the head and add to the buffer */ error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getbsize(fs), 0, &dbp); if (error) { offset = -1; goto err; } foundsum = lfs_cksum_part(dbp->b_data, sizeof(uint32_t), foundsum); brelse(dbp, BC_AGE); } else if (ino_func != NULL) { lifa.offset = offset; error = (*ino_func)(&lifa); if (error != 0) { offset = -1; goto err; } } offset += lfs_btofsb(fs, lfs_sb_getibsize(fs)); iip = NEXTLOWER_IINFO(fs, iip); --ninos; --i; /* compensate for ++i in loop header */ continue; } /* File block */ size = lfs_sb_getbsize(fs); if (flags & CKSEG_CKSUM) { for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) { if (j == lfs_fi_getnblocks(fs, fip) - 1) size = lfs_fi_getlastlength(fs, fip); error = bread(devvp, LFS_FSBTODB(fs, offset), size, 0, &dbp); if (error) { offset = -1; goto err; } foundsum = lfs_cksum_part(dbp->b_data, sizeof(uint32_t), foundsum); brelse(dbp, BC_AGE); offset += lfs_btofsb(fs, size); } } else if (finfo_func != NULL) { lffa.offsetp = &offset; lffa.finfop = fip; (*finfo_func)(&lffa); } else { int n = lfs_fi_getnblocks(fs, fip); size = lfs_fi_getlastlength(fs, fip); offset += lfs_btofsb(fs, lfs_sb_getbsize(fs) * (n - 1) + size); } fip = NEXT_FINFO(fs, fip); } /* Checksum the array, compare */ if (flags & CKSEG_CKSUM) { datasum = lfs_ss_getdatasum(fs, ssp); foundsum = lfs_cksum_fold(foundsum); if (datasum != foundsum) { DLOG((DLOG_RF, "Datasum error at 0x%" PRIx64 " (wanted %x got %x)\n", offset, datasum, foundsum)); offset = -1; goto err; } } else { /* Don't clog the buffer queue */ mutex_enter(&lfs_lock); if (locked_queue_count > LFS_MAX_BUFS || locked_queue_bytes > LFS_MAX_BYTES) { lfs_flush(fs, SEGM_CKP, 0); } mutex_exit(&lfs_lock); } /* * If we're at the end of the segment, move to the next. * A partial segment needs space for a segment header (1 fsb) * and a full block ("frag" fsb). Thus, adding "frag" fsb should * still be within the current segment (whereas frag + 1 might * be at the start of the next segment). * * This needs to match the definition of LFS_PARTIAL_FITS * in lfs_segment.c. */ if (lfs_dtosn(fs, offset + lfs_sb_getfrag(fs)) != lfs_dtosn(fs, offset)) { if (lfs_dtosn(fs, offset) == lfs_dtosn(fs, lfs_ss_getnext(fs, ssp))) { offset = -1; goto err; } offset = lfs_ss_getnext(fs, ssp); DLOG((DLOG_RF, "LFS roll forward: moving to offset 0x%" PRIx64 " -> segment %d\n", offset, lfs_dtosn(fs,offset))); } if (flags & CKSEG_AVAIL) lfs_sb_subavail(fs, offset - prevoffset); err: free(lifa.buf, M_SEGMENT); free(buf, M_SEGMENT); *offsetp = offset; return 0; } /* * Roll forward. */ void lfs_roll_forward(struct lfs *fs, struct mount *mp, struct lwp *l) { int flags, dirty; daddr_t startoffset, offset, nextoffset, endpseg; u_int64_t nextserial, startserial, endserial; int sn, curseg; struct proc *p; kauth_cred_t cred; SEGUSE *sup; struct buf *bp; p = l ? l->l_proc : NULL; cred = p ? p->p_cred : NOCRED; /* * We don't roll forward for v1 filesystems, because * of the danger that the clock was turned back between the last * checkpoint and crash. This would roll forward garbage. * * v2 filesystems don't have this problem because they use a * monotonically increasing serial number instead of a timestamp. */ rblkcnt = 0; if ((lfs_sb_getpflags(fs) & LFS_PF_CLEAN) || !lfs_do_rfw || lfs_sb_getversion(fs) <= 1 || p == NULL) return; DLOG((DLOG_RF, "%s: begin roll forward at serial 0x%jx\n", lfs_sb_getfsmnt(fs), (intmax_t)lfs_sb_getserial(fs))); DEBUG_CHECK_FREELIST(fs); /* * Phase I: Find the address of the last good partial * segment that was written after the checkpoint. Mark * the segments in question dirty, so they won't be * reallocated. */ endpseg = startoffset = offset = lfs_sb_getoffset(fs); flags = 0x0; DLOG((DLOG_RF, "LFS roll forward phase 1: start at offset 0x%" PRIx64 "\n", offset)); LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp); if (!(sup->su_flags & SEGUSE_DIRTY)) lfs_sb_subnclean(fs, 1); sup->su_flags |= SEGUSE_DIRTY; LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp); startserial = lfs_sb_getserial(fs); endserial = nextserial = startserial + 1; nextoffset = offset; while (1) { nextoffset = offset; lfs_parse_pseg(fs, &nextoffset, nextserial, cred, &flags, l, NULL, NULL, CKSEG_CKSUM, NULL); if (nextoffset == -1) break; if (lfs_sntod(fs, offset) != lfs_sntod(fs, nextoffset)) { LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp); if (!(sup->su_flags & SEGUSE_DIRTY)) lfs_sb_subnclean(fs, 1); sup->su_flags |= SEGUSE_DIRTY; LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp); } DLOG((DLOG_RF, "LFS roll forward phase 1: offset=0x%jx" " serial=0x%jx\n", (intmax_t)nextoffset, (intmax_t)nextserial)); if (flags & SS_DIROP) { DLOG((DLOG_RF, "lfs_mountfs: dirops at 0x%" PRIx64 "\n", offset)); if (!(flags & SS_CONT)) { DLOG((DLOG_RF, "lfs_mountfs: dirops end " "at 0x%" PRIx64 "\n", offset)); } } offset = nextoffset; ++nextserial; if (!(flags & SS_CONT)) { endpseg = nextoffset; endserial = nextserial; } if (lfs_rfw_max_psegs > 0 && nextserial > startserial + lfs_rfw_max_psegs) break; } if (flags & SS_CONT) { DLOG((DLOG_RF, "LFS roll forward: warning: incomplete " "dirops discarded (0x%jx < 0x%jx)\n", endpseg, nextoffset)); } if (lfs_sb_getversion(fs) > 1) lfs_sb_setserial(fs, endserial); DLOG((DLOG_RF, "LFS roll forward phase 1: completed: " "endpseg=0x%" PRIx64 "\n", endpseg)); offset = startoffset; if (offset != endpseg) { /* Don't overwrite what we're trying to preserve */ lfs_sb_setoffset(fs, endpseg); lfs_sb_setcurseg(fs, lfs_sntod(fs, lfs_dtosn(fs, endpseg))); for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs));;) { sn = (sn + 1) % lfs_sb_getnseg(fs); /* XXX could we just fail to roll forward? */ if (sn == curseg) panic("lfs_mountfs: no clean segments"); LFS_SEGENTRY(sup, fs, sn, bp); dirty = (sup->su_flags & SEGUSE_DIRTY); brelse(bp, 0); if (!dirty) break; } lfs_sb_setnextseg(fs, lfs_sntod(fs, sn)); /* Explicitly set this segment dirty */ LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, endpseg), bp); sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, endpseg), bp); /* * Phase II: Identify the highest generation of each * inode. We will ignore inodes and data blocks * belonging to old versions. */ offset = startoffset; nextserial = startserial + 1; DLOG((DLOG_RF, "LFS roll forward phase 2 beginning\n")); while (offset > 0 && offset != endpseg) { lfs_parse_pseg(fs, &offset, nextserial++, cred, NULL, l, update_inogen, NULL, CKSEG_NONE, NULL); DEBUG_CHECK_FREELIST(fs); } /* * Phase III: Update inodes. */ offset = startoffset; nextserial = startserial + 1; DLOG((DLOG_RF, "LFS roll forward phase 3 beginning\n")); while (offset > 0 && offset != endpseg) { lfs_parse_pseg(fs, &offset, nextserial++, cred, NULL, l, update_inoblk, NULL, CKSEG_NONE, NULL); DEBUG_CHECK_FREELIST(fs); } /* * Phase IV: Roll forward, updating data blocks. */ offset = startoffset; nextserial = startserial + 1; DLOG((DLOG_RF, "LFS roll forward phase 4 beginning\n")); while (offset > 0 && offset != endpseg) { lfs_parse_pseg(fs, &offset, nextserial++, cred, NULL, l, NULL, finfo_func_rfw, CKSEG_AVAIL, NULL); DEBUG_CHECK_FREELIST(fs); } /* * Finish: flush our changes to disk. */ lfs_sb_setserial(fs, endserial); lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); DLOG((DLOG_RF, "lfs_mountfs: roll forward " "examined %jd blocks\n", (intmax_t)(endpseg - startoffset))); } /* Get rid of our vnodes, except the ifile */ drop_vnode_pages(mp, l); DLOG((DLOG_RF, "LFS roll forward complete\n")); printf("%s: roll forward recovered %d data blocks\n", lfs_sb_getfsmnt(fs), rblkcnt); /* * At this point we have no more changes to write to disk. * Reset the "avail" count to match the segments as they * appear on disk, and the clean segment count. */ lfs_reset_avail(fs); } static bool all_selector(void *cl, struct vnode *vp) { return true; } /* * Dump any pages from vnodes that may have been put on * during truncation. */ static void drop_vnode_pages(struct mount *mp, struct lwp *l) { struct vnode_iterator *marker; struct lfs *fs; struct vnode *vp; fs = VFSTOULFS(mp)->um_lfs; vfs_vnode_iterator_init(mp, &marker); while ((vp = vfs_vnode_iterator_next(marker, all_selector, NULL)) != NULL) { if (vp == fs->lfs_ivnode) continue; VOP_LOCK(vp, LK_EXCLUSIVE | LK_RETRY); uvm_vnp_setsize(vp, 0); uvm_vnp_setsize(vp, VTOI(vp)->i_size); VOP_UNLOCK(vp); vrele(vp); } vfs_vnode_iterator_destroy(marker); } static int ino_func_setclean(struct lfs_inofuncarg *lifa) { struct lfs *fs; daddr_t offset; struct vnode *devvp, *vp; union lfs_dinode *dip; struct buf *dbp, *ibp; int error; IFILE *ifp; unsigned i, num; daddr_t true_addr; ino_t ino; fs = lifa->fs; offset = lifa->offset; devvp = VTOI(fs->lfs_ivnode)->i_devvp; /* Read inode block */ error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 0, &dbp); if (error) { DLOG((DLOG_RF, "ino_func_setclean: bread returned %d\n", error)); return error; } memcpy(lifa->buf, dbp->b_data, dbp->b_bcount); brelse(dbp, BC_AGE); /* Check each inode against ifile entry */ num = LFS_INOPB(fs); for (i = num; i-- > 0; ) { dip = DINO_IN_BLOCK(fs, lifa->buf, i); ino = lfs_dino_getinumber(fs, dip); if (ino == LFS_IFILE_INUM) { /* Check address against superblock */ true_addr = lfs_sb_getidaddr(fs); } else { /* Not ifile. Check address against ifile. */ LFS_IENTRY(ifp, fs, ino, ibp); true_addr = lfs_if_getdaddr(fs, ifp); brelse(ibp, 0); } if (offset != true_addr) continue; /* XXX We can use fastvget here! */ /* * An inode we need to relocate. * Get it if we can. */ error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE | LK_NOWAIT, &vp); if (error) continue; KASSERT(VTOI(vp)->i_gen == lfs_dino_getgen(fs, dip)); lfs_setclean(fs, vp); VOP_UNLOCK(vp); vrele(vp); } return error; } static int ino_func_rewrite(struct lfs_inofuncarg *lifa) { struct lfs *fs; daddr_t offset; struct vnode *devvp, *vp; union lfs_dinode *dip; struct buf *dbp, *ibp; int error; IFILE *ifp; unsigned i, num; daddr_t true_addr; ino_t ino; fs = lifa->fs; offset = lifa->offset; devvp = VTOI(fs->lfs_ivnode)->i_devvp; /* Read inode block */ error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 0, &dbp); if (error) { DLOG((DLOG_RF, "ino_func_rewrite: bread returned %d\n", error)); return error; } memcpy(lifa->buf, dbp->b_data, dbp->b_bcount); brelse(dbp, BC_AGE); /* Check each inode against ifile entry */ num = LFS_INOPB(fs); for (i = num; i-- > 0; ) { dip = DINO_IN_BLOCK(fs, lifa->buf, i); ino = lfs_dino_getinumber(fs, dip); if (ino == LFS_IFILE_INUM) { /* Check address against superblock */ true_addr = lfs_sb_getidaddr(fs); } else { /* Not ifile. Check address against ifile. */ LFS_IENTRY(ifp, fs, ino, ibp); true_addr = lfs_if_getdaddr(fs, ifp); brelse(ibp, 0); } if (offset != true_addr) continue; if (ino == LFS_IFILE_INUM) continue; /* XXX We can use fastvget here! */ /* * An inode we need to relocate. * Get it if we can. */ error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE | LK_NOWAIT, &vp); if (error) continue; KASSERT(VTOI(vp)->i_gen == lfs_dino_getgen(fs, dip)); if (!(VTOI(vp)->i_state & IN_CLEANING)) { lfs_setclean(fs, vp); lfs_writeinode(fs, fs->lfs_sp, VTOI(vp)); } VOP_UNLOCK(vp); vrele(vp); } return error; } static int rewrite_block(struct lfs *fs, struct vnode *vp, daddr_t lbn, daddr_t offset, size_t size, int *have_finfop) { daddr_t daddr; int error; struct buf *bp; struct inode *ip; KASSERT(have_finfop != NULL); /* Look up current location of this block. */ error = VOP_BMAP(vp, lbn, NULL, &daddr, NULL); if (error) return error; /* Skip any block that is not here. */ if (offset != 0 && LFS_DBTOFSB(fs, daddr) != offset) return ESTALE; /* * It is (was recently) here. Read the block. */ //size = lfs_blksize(fs, VTOI(vp), lbn); error = bread(vp, lbn, size, 0, &bp); if (error) return error; if (vp == fs->lfs_ivnode) { VOP_BWRITE(vp, bp); } else { /* Get ready to write. */ if (!*have_finfop) { ip = VTOI(vp); lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); fs->lfs_sp->vp = vp; *have_finfop = 1; } KASSERT(bp->b_vp == vp); /* bp->b_cflags |= BC_INVAL; */ /* brelse will kill the buffer */ lfs_bwrite_ext(bp, BW_CLEAN); KASSERT(bp->b_vp == vp); mutex_enter(&bufcache_lock); while (lfs_gatherblock(fs->lfs_sp, bp, &bufcache_lock)) { KASSERT(bp->b_vp != NULL); } mutex_exit(&bufcache_lock); KASSERT(bp->b_flags & B_GATHERED); KASSERT(fs->lfs_sp->cbpp[-1] == bp); } return 0; } static int finfo_func_rewrite(struct lfs_finfofuncarg *lffa) { struct lfs *fs; FINFO *fip; daddr_t *offsetp; int j, have_finfo, error; size_t size, bytes; ino_t ino; uint32_t gen; struct vnode *vp; daddr_t lbn; int *fragsp; fs = lffa->fs; fip = lffa->finfop; offsetp = lffa->offsetp; fragsp = (int *)lffa->arg; /* Get the inode and check its version. */ ino = lfs_fi_getino(fs, fip); gen = lfs_fi_getversion(fs, fip); error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE|LK_NOWAIT, &vp); /* * If we can't, or if version is wrong, or it has dirop blocks on it, * we can't relocate its blocks; but we still have to count * blocks through the partial segment to return the right offset. * XXX actually we can move DIROP vnodes' *old* data, as long * XXX as we are sure that we are moving *only* the old data---? */ if (error || VTOI(vp)->i_gen != gen || (vp->v_uflag & VU_DIROP)) { if (error == 0) error = ESTALE; if (vp != NULL) { VOP_UNLOCK(vp); vrele(vp); vp = NULL; } bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs)) + lfs_fi_getlastlength(fs, fip); *offsetp += lfs_btofsb(fs, bytes); return error; } /* * We have the vnode and its version is correct. * Take a cleaning reference; and loop through the blocks * and rewrite them. */ lfs_setclean(fs, vp); size = lfs_sb_getbsize(fs); have_finfo = 0; for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) { if (j == lfs_fi_getnblocks(fs, fip) - 1) size = lfs_fi_getlastlength(fs, fip); /* * An error of ESTALE indicates that there was nothing * to rewrite; this is not a problem. Any other error * causes us to skip the rest of this FINFO. */ if (vp != NULL && error == 0) { lbn = lfs_fi_getblock(fs, fip, j); error = rewrite_block(fs, vp, lbn, *offsetp, size, &have_finfo); if (error == ESTALE) error = 0; if (fragsp != NULL && error == 0) *fragsp += lfs_btofsb(fs, size); } *offsetp += lfs_btofsb(fs, size); } /* * If we acquired finfo, release it and write the blocks. */ if (have_finfo) { lfs_updatemeta(fs->lfs_sp); fs->lfs_sp->vp = NULL; lfs_release_finfo(fs); lfs_writeinode(fs, fs->lfs_sp, VTOI(vp)); } /* Release vnode */ VOP_UNLOCK(vp); vrele(vp); return error; } static int finfo_func_setclean(struct lfs_finfofuncarg *lffa) { struct lfs *fs; FINFO *fip; daddr_t *offsetp; int error; size_t bytes; ino_t ino; uint32_t gen; struct vnode *vp; fs = lffa->fs; fip = lffa->finfop; offsetp = lffa->offsetp; /* Get the inode and check its version. */ ino = lfs_fi_getino(fs, fip); gen = lfs_fi_getversion(fs, fip); error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE|LK_NOWAIT, &vp); /* If we have it and its version is right, take a cleaning reference */ if (error == 0 && VTOI(vp)->i_gen == gen) lfs_setclean(fs, vp); if (vp != NULL) { VOP_UNLOCK(vp); vrele(vp); vp = NULL; } /* Skip to the next block */ bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs)) + lfs_fi_getlastlength(fs, fip); *offsetp += lfs_btofsb(fs, bytes); return error; } /* * Use the partial-segment parser to rewrite (clean) a segment. */ int lfs_rewrite_segment(struct lfs *fs, int sn, int *fragsp, kauth_cred_t cred, struct lwp *l) { daddr_t ooffset, offset, endpseg; ASSERT_SEGLOCK(fs); offset = lfs_sntod(fs, sn); skip_superblock(fs, &offset); endpseg = lfs_sntod(fs, sn + 1); while (offset > 0 && offset != endpseg) { /* First check summary validity (XXX unnecessary?) */ ooffset = offset; lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, NULL, NULL, CKSEG_CKSUM, NULL); if (offset == ooffset) break; /* * Valid, proceed. * * First write the file blocks, marking their * inodes IN_CLEANING. */ offset = ooffset; lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, NULL, finfo_func_rewrite, CKSEG_NONE, fragsp); /* * Now go back and pick up any inodes that * were not already marked IN_CLEANING, and * write them as well. */ offset = ooffset; lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, ino_func_rewrite, NULL, CKSEG_NONE, fragsp); } return 0; } /* * Rewrite the contents of one or more segments, in preparation for * marking them clean. */ int lfs_rewrite_segments(struct lfs *fs, int *snn, int len, int *directp, int *offsetp, struct lwp *l) { kauth_cred_t cred; int i, error; struct buf *bp; SEGUSE *sup; daddr_t offset, endpseg; ASSERT_NO_SEGLOCK(fs); cred = l ? l->l_cred : NOCRED; /* Prevent new dirops and acquire the cleaner lock. */ lfs_writer_enter(fs, "rewritesegs"); if ((error = lfs_cleanerlock(fs)) != 0) { lfs_writer_leave(fs); return error; } /* * Pre-reference vnodes now that we have cleaner lock * but before we take the segment lock. We don't want to * mix cleaning blocks with flushed vnodes. */ for (i = 0; i < len; i++) { error = 0; /* Refuse to clean segments that are ACTIVE */ LFS_SEGENTRY(sup, fs, snn[i], bp); if (sup->su_flags & SEGUSE_ACTIVE || !(sup->su_flags & SEGUSE_DIRTY)) error = EINVAL; brelse(bp, 0); if (error) break; offset = lfs_sntod(fs, snn[i]); skip_superblock(fs, &offset); endpseg = lfs_sntod(fs, snn[i] + 1); while (offset > 0 && offset != endpseg) { lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, ino_func_setclean, finfo_func_setclean, CKSEG_NONE, NULL); } } /* * Actually rewrite the contents of the segment. */ lfs_seglock(fs, SEGM_CLEAN); for (i = 0; i < len; i++) { error = 0; /* Refuse to clean segments that are ACTIVE */ LFS_SEGENTRY(sup, fs, snn[i], bp); if (sup->su_flags & SEGUSE_ACTIVE || !(sup->su_flags & SEGUSE_DIRTY)) error = EINVAL; brelse(bp, 0); if (error) break; error = lfs_rewrite_segment(fs, snn[i], directp, cred, l); if (error) { printf(" rewrite_segment returned %d\n", error); break; } } while (lfs_writeseg(fs, fs->lfs_sp)) ; *offsetp = lfs_btofsb(fs, fs->lfs_sp->bytes_written); lfs_segunlock(fs); lfs_cleanerunlock(fs); lfs_writer_leave(fs); return error; } #if 0 static bool lfs_isseq(const struct lfs *fs, long int lbn1, long int lbn2) { return lbn2 == lbn1 + lfs_sb_getfrag(__UNCONST(fs)); } /* * Rewrite the contents of a file in order to coalesce it. * We don't bother rewriting indirect blocks because they will have to * be rewritten anyway when we rewrite the direct blocks. */ int lfs_rewrite_file(struct lfs *fs, ino_t ino, struct lwp *l) { daddr_t lbn, hiblk, daddr; int i, error, num, run; struct vnode *vp; struct indir indirs[ULFS_NIADDR+2]; size_t size; ASSERT_SEGLOCK(fs); error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp); if (error) return error; lfs_acquire_finfo(fs, ino, VTOI(vp)->i_gen); for (lbn = 0, hiblk = VTOI(vp)->i_lfs_hiblk; lbn < hiblk; ++lbn) { error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, &run, lfs_isseq); if (daddr == UNASSIGNED) continue; for (i = 0; i <= run; i++) { size = lfs_blksize(fs, VTOI(vp), lbn); error = rewrite_block(fs, vp, lbn++, 0x0, size, NULL); if (error) break; } } lfs_release_finfo(fs); while (lfs_writeseg(fs, fs->lfs_sp)) ; lfs_segunlock(fs); return error; } #endif /* 0 */ static int ino_func_checkempty(struct lfs_inofuncarg *lifa) { struct lfs *fs; daddr_t offset; struct vnode *devvp; union lfs_dinode *dip; struct buf *dbp, *ibp; int error; IFILE *ifp; unsigned i, num; daddr_t true_addr; ino_t ino; fs = lifa->fs; offset = lifa->offset; devvp = VTOI(fs->lfs_ivnode)->i_devvp; /* Read inode block */ error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 0, &dbp); if (error) { DLOG((DLOG_RF, "ino_func_checkempty: bread returned %d\n", error)); return error; } /* Check each inode against ifile entry */ num = LFS_INOPB(fs); for (i = num; i-- > 0; ) { dip = DINO_IN_BLOCK(fs, dbp->b_data, i); ino = lfs_dino_getinumber(fs, dip); if (ino == LFS_IFILE_INUM) { /* Check address against superblock */ true_addr = lfs_sb_getidaddr(fs); } else { /* Not ifile. Check address against ifile. */ LFS_IENTRY(ifp, fs, ino, ibp); true_addr = lfs_if_getdaddr(fs, ifp); brelse(ibp, 0); } if (offset == true_addr) { error = EEXIST; break; } } brelse(dbp, BC_AGE); return error; } static int finfo_func_checkempty(struct lfs_finfofuncarg *lffa) { struct lfs *fs; FINFO *fip; daddr_t *offsetp; int j, error; size_t size, bytes; ino_t ino; uint32_t gen; struct vnode *vp; daddr_t lbn, daddr; fs = lffa->fs; fip = lffa->finfop; offsetp = lffa->offsetp; /* Get the inode and check its version. */ ino = lfs_fi_getino(fs, fip); gen = lfs_fi_getversion(fs, fip); error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp); /* * If we can't, or if version is wrong, this FINFO does not refer * to a live file. Skip over it and continue. */ if (error || VTOI(vp)->i_gen != gen) { if (error == 0) error = ESTALE; if (vp != NULL) { VOP_UNLOCK(vp); vrele(vp); vp = NULL; } bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs)) + lfs_fi_getlastlength(fs, fip); *offsetp += lfs_btofsb(fs, bytes); return error; } /* * We have the vnode and its version is correct. * Loop through the blocks and check their currency. */ size = lfs_sb_getbsize(fs); for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) { if (j == lfs_fi_getnblocks(fs, fip) - 1) size = lfs_fi_getlastlength(fs, fip); if (vp != NULL) { lbn = lfs_fi_getblock(fs, fip, j); /* Look up current location of this block. */ error = VOP_BMAP(vp, lbn, NULL, &daddr, NULL); if (error) break; /* If it is here, the segment is not empty. */ if (LFS_DBTOFSB(fs, daddr) == *offsetp) { error = EEXIST; break; } } *offsetp += lfs_btofsb(fs, size); } /* Release vnode */ VOP_UNLOCK(vp); vrele(vp); return error; } int lfs_checkempty(struct lfs *fs, int sn, kauth_cred_t cred, struct lwp *l) { daddr_t offset, endpseg; int error; ASSERT_SEGLOCK(fs); offset = lfs_sntod(fs, sn); skip_superblock(fs, &offset); endpseg = lfs_sntod(fs, sn + 1); while (offset > 0 && offset < endpseg) { error = lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, ino_func_checkempty, finfo_func_checkempty, CKSEG_NONE, NULL); if (error) return error; } return 0; }