diff options
-rw-r--r-- | ChangeLog.txt | 4 | ||||
-rw-r--r-- | doc/database | 32 | ||||
-rw-r--r-- | guts/rename.c | 41 | ||||
-rw-r--r-- | guts/rmdir.c | 27 | ||||
-rw-r--r-- | guts/unlinkat.c | 28 | ||||
-rw-r--r-- | pseudo.c | 75 | ||||
-rw-r--r-- | pseudo.h | 3 | ||||
-rw-r--r-- | pseudo_client.c | 32 | ||||
-rw-r--r-- | pseudo_db.c | 127 | ||||
-rw-r--r-- | pseudo_db.h | 3 | ||||
-rw-r--r-- | pseudo_ipc.h | 1 | ||||
-rw-r--r-- | pseudo_table.c | 3 |
12 files changed, 273 insertions, 103 deletions
diff --git a/ChangeLog.txt b/ChangeLog.txt index b9953bd..057d25a 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,3 +1,7 @@ +2010-08-17: + * (seebs) create speculative-deletion logic + * (seebs) remove crackpot theories about cross-device renames + 2010-08-16: * (rp) Fix ld_preload/ld_library_path mixup. * (seebs) Handle failed allocations. diff --git a/doc/database b/doc/database index da8f750..7003aff 100644 --- a/doc/database +++ b/doc/database @@ -14,6 +14,7 @@ FILES: gid (integer) mode (integer) rdev (integer) + deleting (integer) There are two indexes on the file database, one by path and one by device and inode. Earlier versions of pseudo ignored symlinks, but this turned @@ -30,10 +31,38 @@ to obtain the device and inode, then modifying all matching records. If a file shows up with no name (this should VERY rarely happen), it is stored in the database with the special name 'NAMELESS FILE'. This name can never -be sent by the client (all names are sent as absolute paths). If a later +be sent by the client (all names are sent as absolute paths). If a laterThe request comes in with a valid name, the 'NAMELESS FILE' is renamed to it so it can be unlinked later. +The "deleting" field is used to track files which are in the midst of +being possibly-deleted. The issue is that if you issue an unlink database +operation only after removing a file, there is a window during which +another program can genuinely create a new file with the same name or +inode, and that request can reach the daemon before the unlink operation +does. To address this, three operations are addded: may-unlink, did-unlink, +and cancel-unlink. The may-unlink operation tags a file for possible +deletion, setting "deleting" to 1. A clash involving a file marked for +deletion is resolved by deleting the existing database entry without +complaint. A did-unlink operation deletes a file ONLY if it is marked +for deletion. A cancel-unlink operation unmarks a file for deletion. + +You can still have a race condition here, but it seems a lot less likely. +The failure would be: + Process A marks file X for deletion. + Process A unlinks file X. + Process B creates file X. + Process B requests a link for X. + The server unlinks the previously marked X. + The server creates a new link for X, not marked for deletion. + Process B marks file X for deletion. + Process A's delete-marked-file request finally shows up. + The server unlinks the newly-marked X. + Process B fails to delete the file. + Process B attempts to cancel the mark-for-deletion. + +This shouldn't be a common occurrence. + Rename operations use a pair of paths, separated by a null byte; the client sends the total length of both names (plus the null byte), and the server knows to split them around the null byte. The impact of a rename on things @@ -88,3 +117,4 @@ opened. These values are not completely reliable. A value of 0 is typical for non-open operations, and a value outside the 0-15 range (usually -1) indicates that something went wrong trying to identify the mode of a given open. + diff --git a/guts/rename.c b/guts/rename.c index 973b164..8d4106b 100644 --- a/guts/rename.c +++ b/guts/rename.c @@ -10,6 +10,7 @@ struct stat64 oldbuf, newbuf; int oldrc, newrc; int save_errno; + int old_db_entry = 0; pseudo_debug(2, "rename: %s->%s\n", oldpath ? oldpath : "<nil>", @@ -28,24 +29,24 @@ errno = save_errno; /* newpath must be removed. */ - /* as with unlink, we have to do the remove before the operation - */ - msg = pseudo_client_op(OP_UNLINK, 0, -1, -1, newpath, newrc ? NULL : &newbuf); - /* stash the server's old data */ + /* as with unlink, we have to mark that the file may get deleted */ + msg = pseudo_client_op(OP_MAY_UNLINK, 0, -1, -1, newpath, newrc ? NULL : &newbuf); + if (msg && msg->result == RESULT_SUCCEED) + old_db_entry = 1; rc = real_rename(oldpath, newpath); save_errno = errno; - if (rc == -1) { - if (msg && msg->result == RESULT_SUCCEED) { - newbuf.st_uid = msg->uid; - newbuf.st_gid = msg->uid; - newbuf.st_mode = msg->mode; - newbuf.st_dev = msg->dev; - newbuf.st_ino = msg->ino; + if (old_db_entry) { + if (rc == -1) { /* since we failed, that wasn't really unlinked -- put * it back. */ - pseudo_client_op(OP_LINK, 0, -1, -1, newpath, &newbuf); + pseudo_client_op(OP_CANCEL_UNLINK, 0, -1, -1, newpath, &newbuf); + } else { + /* confirm that the file was removed */ + pseudo_client_op(OP_DID_UNLINK, 0, -1, -1, newpath, &newbuf); } + } + if (rc == -1) { /* and we're done. */ errno = save_errno; return rc; @@ -72,21 +73,7 @@ * we may have to rename or remove directory trees even though in * theory rename can never destroy a directory tree. */ - - /* re-stat the new file. Why? Because if something got moved - * across device boundaries, its dev/ino changed! - */ - newrc = real___lxstat64(_STAT_VER, newpath, &newbuf); - if (msg && msg->result == RESULT_SUCCEED) { - pseudo_stat_msg(&oldbuf, msg); - if (newrc == 0) { - if (newbuf.st_dev != oldbuf.st_dev) { - oldbuf.st_dev = newbuf.st_dev; - oldbuf.st_ino = newbuf.st_ino; - } - } - pseudo_debug(1, "renaming %s, got old mode of 0%o\n", oldpath, (int) msg->mode); - } else { + if (!old_db_entry) { /* create an entry under the old name, which will then be * renamed; this way, children would get renamed too, if there * were any. diff --git a/guts/rmdir.c b/guts/rmdir.c index f3cd9e4..af7fb7e 100644 --- a/guts/rmdir.c +++ b/guts/rmdir.c @@ -6,30 +6,29 @@ * wrap_rmdir(const char *path) { * int rc = -1; */ - pseudo_msg_t *old_file; + pseudo_msg_t *msg; struct stat64 buf; int save_errno; + int old_db_entry = 0; rc = real___lxstat64(_STAT_VER, path, &buf); if (rc == -1) { return rc; } - old_file = pseudo_client_op(OP_UNLINK, 0, -1, -1, path, &buf); + msg = pseudo_client_op(OP_MAY_UNLINK, 0, -1, -1, path, &buf); + if (msg && msg->result == RESULT_SUCCEED) + old_db_entry = 1; rc = real_rmdir(path); - if (rc == -1) { - save_errno = errno; - if (old_file && old_file->result == RESULT_SUCCEED) { - pseudo_debug(1, "rmdir failed, trying to relink...\n"); - buf.st_uid = old_file->uid; - buf.st_gid = old_file->uid; - buf.st_mode = old_file->mode; - buf.st_dev = old_file->dev; - buf.st_ino = old_file->ino; - pseudo_client_op(OP_LINK, 0, -1, -1, path, &buf); + if (old_db_entry) { + if (rc == -1) { + save_errno = errno; + pseudo_client_op(OP_CANCEL_UNLINK, 0, -1, -1, path, &buf); + errno = save_errno; } else { - pseudo_debug(1, "rmdir failed, but found no database entry, ignoring.\n"); + pseudo_client_op(OP_DID_UNLINK, 0, -1, -1, path, &buf); } - errno = save_errno; + } else { + pseudo_debug(1, "rmdir on <%s>, not in database, no effect.\n", path); } /* return rc; diff --git a/guts/unlinkat.c b/guts/unlinkat.c index 13fdc82..3bfda81 100644 --- a/guts/unlinkat.c +++ b/guts/unlinkat.c @@ -6,9 +6,10 @@ * wrap_unlinkat(int dirfd, const char *path, int rflags) { * int rc = -1; */ - pseudo_msg_t *old_file; + pseudo_msg_t *msg; int save_errno; struct stat64 buf; + int old_db_entry; #ifdef PSEUDO_NO_REAL_AT_FUNCTIONS if (dirfd != AT_FDCWD) { @@ -35,27 +36,26 @@ if (rc == -1) { return rc; } - old_file = pseudo_client_op(OP_UNLINK, 0, -1, dirfd, path, &buf); + msg = pseudo_client_op(OP_MAY_UNLINK, 0, -1, dirfd, path, &buf); + if (msg && msg->result == RESULT_SUCCEED) + old_db_entry = 1; #ifdef PSEUDO_NO_REAL_AT_FUNCTIONS rc = real_unlink(path); #else rc = real_unlinkat(dirfd, path, rflags); #endif - if (rc == -1) { - save_errno = errno; - if (old_file && old_file->result == RESULT_SUCCEED) { - pseudo_debug(1, "unlink failed, trying to relink...\n"); - buf.st_uid = old_file->uid; - buf.st_gid = old_file->uid; - buf.st_mode = old_file->mode; - buf.st_dev = old_file->dev; - buf.st_ino = old_file->ino; - pseudo_client_op(OP_LINK, 0, -1, dirfd, path, &buf); + if (old_db_entry) { + if (rc == -1) { + save_errno = errno; + pseudo_client_op(OP_CANCEL_UNLINK, 0, -1, -1, path, &buf); + errno = save_errno; } else { - pseudo_debug(1, "unlink failed, but found no database entry, ignoring.\n"); + pseudo_client_op(OP_DID_UNLINK, 0, -1, -1, path, &buf); } - errno = save_errno; + } else { + pseudo_debug(1, "unlink on <%s>, not in database, no effect.\n", path); } + /* return rc; * } */ @@ -403,21 +403,24 @@ pseudo_op(pseudo_msg_t *msg, const char *program, const char *tag) { */ if (by_path.ino != msg_header.ino && msg_header.ino != 0) { switch (msg->op) { - case OP_EXEC: /* FALLTHROUGH */ - case OP_RENAME: - /* A rename that crossed a filesystem can change the inode - * number legitimately. - */ - pseudo_debug(2, "inode changed for '%s': %llu in db, %llu in request.\n", - msg->path, - (unsigned long long) by_path.ino, - (unsigned long long) msg_header.ino); + case OP_EXEC: break; default: - pseudo_diag("inode mismatch: '%s' ino %llu in db, %llu in request.\n", - msg->path, - (unsigned long long) by_path.ino, - (unsigned long long) msg_header.ino); + /* if the path is in the database with a + * different inode, but we were expecting + * it to get deleted, mark the old one + * as deleted. + */ + if (by_path.deleting != 0) { + pseudo_debug(1, "inode mismatch for '%s' -- old one was marked for deletion, deleting.\n", + msg->path); + pdb_did_unlink_file(msg->path); + } else { + pseudo_diag("inode mismatch: '%s' ino %llu in db, %llu in request.\n", + msg->path, + (unsigned long long) by_path.ino, + (unsigned long long) msg_header.ino); + } } } /* If the database entry disagrees on S_ISDIR, it's just @@ -493,12 +496,22 @@ pseudo_op(pseudo_msg_t *msg, const char *program, const char *tag) { break; } if (mismatch) { - pseudo_diag("path mismatch [%d link%s]: ino %llu db '%s' req '%s'.\n", - msg->nlink, - msg->nlink == 1 ? "" : "s", - (unsigned long long) msg_header.ino, - path_by_ino ? path_by_ino : "no path", - msg->path); + /* a mismatch, but we were planning to delete + * the file, so it must have gotten deleted + * already. + */ + if (by_ino.deleting != 0) { + pseudo_debug(1, "inode mismatch for '%s' -- old one was marked for deletion, deleting.\n", + msg->path); + pdb_did_unlink_file(path_by_ino); + } else { + pseudo_diag("path mismatch [%d link%s]: ino %llu db '%s' req '%s'.\n", + msg->nlink, + msg->nlink == 1 ? "" : "s", + (unsigned long long) msg_header.ino, + path_by_ino ? path_by_ino : "no path", + msg->path); + } } } else { /* I don't think I've ever seen this one. */ @@ -686,6 +699,19 @@ pseudo_op(pseudo_msg_t *msg, const char *program, const char *tag) { pdb_rename_file(oldpath, msg); pdb_update_inode(msg); break; + case OP_MAY_UNLINK: + if (pdb_may_unlink_file(msg)) { + /* harmless, but client wants to know so it knows + * whether to follow up... */ + msg->result = RESULT_FAIL; + } + break; + case OP_DID_UNLINK: + pdb_did_unlink_file(msg->path); + break; + case OP_CANCEL_UNLINK: + pdb_cancel_unlink_file(msg); + break; case OP_UNLINK: /* this removes any entries with the given path from the * database. No response is needed. @@ -707,16 +733,7 @@ pseudo_op(pseudo_msg_t *msg, const char *program, const char *tag) { (unsigned long long) msg->ino); pdb_unlink_file_dev(msg); } - /* if we had a match for this path, stash it in the - * message -- client may want to relink it if the - * real_unlink() fails. - */ - if (found_path) { - *msg = by_path; - msg->result = RESULT_SUCCEED; - } else { - msg->result = RESULT_NONE; - } + msg->result = RESULT_NONE; break; case OP_MKDIR: /* FALLTHROUGH */ case OP_MKNOD: @@ -51,6 +51,9 @@ typedef enum { */ OP_SYMLINK, OP_EXEC, + OP_MAY_UNLINK, + OP_DID_UNLINK, + OP_CANCEL_UNLINK, OP_MAX } op_id_t; extern char *pseudo_op_name(op_id_t id); diff --git a/pseudo_client.c b/pseudo_client.c index c5a1531..842b0c3 100644 --- a/pseudo_client.c +++ b/pseudo_client.c @@ -952,28 +952,32 @@ pseudo_client_op(op_id_t op, int access, int fd, int dirfd, const char *path, co pseudo_client_path(dirfd, fd_path(fd)); break; /* operations for which we should use the magic uid/gid */ - case OP_CHMOD: /* FALLTHROUGH */ - case OP_CREAT: /* FALLTHROUGH */ - case OP_FCHMOD: /* FALLTHROUGH */ - case OP_MKDIR: /* FALLTHROUGH */ - case OP_MKNOD: /* FALLTHROUGH */ - case OP_SYMLINK: /* FALLTHROUGH */ + case OP_CHMOD: + case OP_CREAT: + case OP_FCHMOD: + case OP_MKDIR: + case OP_MKNOD: + case OP_SYMLINK: msg.uid = pseudo_fuid; msg.gid = pseudo_fgid; pseudo_debug(2, "fuid: %d ", pseudo_fuid); - /* fallthrough. chown/fchown uid/gid already calculated, and + /* FALLTHROUGH */ + /* chown/fchown uid/gid already calculated, and * a link or rename should not change a file's ownership. * (operations which can create should be CREAT or MKNOD * or MKDIR) */ - case OP_EXEC: /* FALLTHROUGH */ - case OP_CHOWN: /* FALLTHROUGH */ - case OP_FCHOWN: /* FALLTHROUGH */ - case OP_FSTAT: /* FALLTHROUGH */ - case OP_LINK: /* FALLTHROUGH */ - case OP_RENAME: /* FALLTHROUGH */ - case OP_STAT: /* FALLTHROUGH */ + case OP_EXEC: + case OP_CHOWN: + case OP_FCHOWN: + case OP_FSTAT: + case OP_LINK: + case OP_RENAME: + case OP_STAT: case OP_UNLINK: + case OP_DID_UNLINK: + case OP_CANCEL_UNLINK: + case OP_MAY_UNLINK: do_request = 1; break; default: diff --git a/pseudo_db.c b/pseudo_db.c index 8e3b9b4..62f2322 100644 --- a/pseudo_db.c +++ b/pseudo_db.c @@ -213,6 +213,7 @@ static struct sql_migration { } file_migrations[] = { { create_migration_table }, { index_migration_table }, + { "ALTER TABLE files ADD deleting INTEGER;" }, { NULL }, }, log_migrations[] = { { create_migration_table }, @@ -1283,8 +1284,8 @@ pdb_link_file(pseudo_msg_t *msg) { static sqlite3_stmt *insert; int rc; char *sql = "INSERT INTO files " - " ( path, dev, ino, uid, gid, mode, rdev ) " - " VALUES (?, ?, ?, ?, ?, ?, ?);"; + " ( path, dev, ino, uid, gid, mode, rdev, deleting ) " + " VALUES (?, ?, ?, ?, ?, ?, ?, 0);"; if (!file_db && get_db(&file_db)) { pseudo_diag("database error.\n"); @@ -1391,6 +1392,120 @@ pdb_update_file_path(pseudo_msg_t *msg) { return rc != SQLITE_DONE; } +/* mark a file for pending deletion */ +int +pdb_may_unlink_file(pseudo_msg_t *msg) { + static sqlite3_stmt *mark_file; + int rc, exact; + char *sql_mark_file = "UPDATE files SET deleting = 1 WHERE path = ?;"; + + if (!file_db && get_db(&file_db)) { + pseudo_diag("database error.\n"); + return 0; + } + if (!mark_file) { + rc = sqlite3_prepare_v2(file_db, sql_mark_file, strlen(sql_mark_file), &mark_file, NULL); + if (rc) { + dberr(file_db, "couldn't prepare DELETE statement"); + return 1; + } + } + if (!msg) { + return 1; + } + if (msg->pathlen) { + sqlite3_bind_text(mark_file, 1, msg->path, -1, SQLITE_STATIC); + } else { + pseudo_debug(1, "cannot mark a file for pending deletion without a path."); + return 1; + } + rc = sqlite3_step(mark_file); + if (rc != SQLITE_DONE) { + dberr(file_db, "mark for deletion may have failed"); + return 1; + } + exact = sqlite3_changes(file_db); + pseudo_debug(3, "(exact %d) ", exact); + sqlite3_reset(mark_file); + sqlite3_clear_bindings(mark_file); + /* indicate whether we marked something */ + if (exact > 0) + return 0; + else + return 1; +} + +/* unmark a file for pending deletion */ +int +pdb_cancel_unlink_file(pseudo_msg_t *msg) { + static sqlite3_stmt *mark_file; + int rc, exact; + char *sql_mark_file = "UPDATE files SET deleting = 0 WHERE path = ?;"; + + if (!file_db && get_db(&file_db)) { + pseudo_diag("database error.\n"); + return 0; + } + if (!mark_file) { + rc = sqlite3_prepare_v2(file_db, sql_mark_file, strlen(sql_mark_file), &mark_file, NULL); + if (rc) { + dberr(file_db, "couldn't prepare DELETE statement"); + return 1; + } + } + if (!msg) { + return 1; + } + if (msg->pathlen) { + sqlite3_bind_text(mark_file, 1, msg->path, -1, SQLITE_STATIC); + } else { + pseudo_debug(1, "cannot unmark a file for pending deletion without a path."); + return 1; + } + rc = sqlite3_step(mark_file); + if (rc != SQLITE_DONE) { + dberr(file_db, "unmark for deletion may have failed"); + } + exact = sqlite3_changes(file_db); + pseudo_debug(3, "(exact %d) ", exact); + sqlite3_reset(mark_file); + sqlite3_clear_bindings(mark_file); + return rc != SQLITE_DONE; +} + +int +pdb_did_unlink_file(char *path) { + static sqlite3_stmt *delete_exact; + int rc, exact; + char *sql_delete_exact = "DELETE FROM files WHERE path = ? AND deleting = 1;"; + + if (!file_db && get_db(&file_db)) { + pseudo_diag("database error.\n"); + return 0; + } + if (!delete_exact) { + rc = sqlite3_prepare_v2(file_db, sql_delete_exact, strlen(sql_delete_exact), &delete_exact, NULL); + if (rc) { + dberr(file_db, "couldn't prepare DELETE statement"); + return 1; + } + } + if (!path) { + pseudo_debug(1, "cannot unlink a file without a path."); + return 1; + } + sqlite3_bind_text(delete_exact, 1, path, -1, SQLITE_STATIC); + rc = sqlite3_step(delete_exact); + if (rc != SQLITE_DONE) { + dberr(file_db, "cleanup of file marked for deletion may have failed"); + } + exact = sqlite3_changes(file_db); + pseudo_debug(3, "(exact %d)\n", exact); + sqlite3_reset(delete_exact); + sqlite3_clear_bindings(delete_exact); + return rc != SQLITE_DONE; +} + /* unlink a file, by path */ int pdb_unlink_file(pseudo_msg_t *msg) { @@ -1438,7 +1553,7 @@ pdb_unlink_file(pseudo_msg_t *msg) { * Use > and < instead of a glob at the end. */ int -pdb_unlink_contents( pseudo_msg_t *msg) { +pdb_unlink_contents(pseudo_msg_t *msg) { static sqlite3_stmt *delete_sub; int rc, sub; char *sql_delete_sub = "DELETE FROM files WHERE " @@ -1470,7 +1585,7 @@ pdb_unlink_contents( pseudo_msg_t *msg) { dberr(file_db, "delete sub by path may have failed"); } sub = sqlite3_changes(file_db); - pseudo_debug(3, "sub %d) ", sub); + pseudo_debug(3, "(sub %d) ", sub); sqlite3_reset(delete_sub); sqlite3_clear_bindings(delete_sub); return rc != SQLITE_DONE; @@ -1673,6 +1788,7 @@ pdb_find_file_exact(pseudo_msg_t *msg) { msg->gid = (unsigned long) sqlite3_column_int64(select, 5); msg->mode = (unsigned long) sqlite3_column_int64(select, 6); msg->rdev = (unsigned long) sqlite3_column_int64(select, 7); + msg->deleting = (int) sqlite3_column_int64(select, 8); rc = 0; break; case SQLITE_DONE: @@ -1728,6 +1844,7 @@ pdb_find_file_path(pseudo_msg_t *msg) { msg->gid = sqlite3_column_int64(select, 5); msg->mode = sqlite3_column_int64(select, 6); msg->rdev = sqlite3_column_int64(select, 7); + msg->deleting = (int) sqlite3_column_int64(select, 8); rc = 0; break; case SQLITE_DONE: @@ -1824,6 +1941,7 @@ pdb_find_file_dev(pseudo_msg_t *msg) { msg->gid = (unsigned long) sqlite3_column_int64(select, 5); msg->mode = (unsigned long) sqlite3_column_int64(select, 6); msg->rdev = (unsigned long) sqlite3_column_int64(select, 7); + msg->deleting = (int) sqlite3_column_int64(select, 8); rc = 0; break; case SQLITE_DONE: @@ -1872,6 +1990,7 @@ pdb_find_file_ino(pseudo_msg_t *msg) { msg->gid = (unsigned long) sqlite3_column_int64(select, 5); msg->mode = (unsigned long) sqlite3_column_int64(select, 6); msg->rdev = (unsigned long) sqlite3_column_int64(select, 7); + msg->deleting = (int) sqlite3_column_int64(select, 8); rc = 0; break; case SQLITE_DONE: diff --git a/pseudo_db.h b/pseudo_db.h index 2e7dbb9..c9eb228 100644 --- a/pseudo_db.h +++ b/pseudo_db.h @@ -37,7 +37,10 @@ typedef struct { char *program; } log_entry; +extern int pdb_cancel_unlink_file(pseudo_msg_t *msg); +extern int pdb_did_unlink_file(char *path); extern int pdb_link_file(pseudo_msg_t *msg); +extern int pdb_may_unlink_file(pseudo_msg_t *msg); extern int pdb_unlink_file(pseudo_msg_t *msg); extern int pdb_unlink_file_dev(pseudo_msg_t *msg); extern int pdb_update_file(pseudo_msg_t *msg); diff --git a/pseudo_ipc.h b/pseudo_ipc.h index 664cbd2..40953a8 100644 --- a/pseudo_ipc.h +++ b/pseudo_ipc.h @@ -48,6 +48,7 @@ typedef struct { dev_t rdev; unsigned int pathlen; int nlink; + int deleting; char path[]; } pseudo_msg_t; diff --git a/pseudo_table.c b/pseudo_table.c index 4c49f3d..c890adc 100644 --- a/pseudo_table.c +++ b/pseudo_table.c @@ -47,6 +47,9 @@ static char *operation_names[] = { "unlink", "symlink", "exec", + "may-unlink", + "did-unlink", + "cancel-unlink", NULL }; |