int journal_forget (handle_t *handle, struct buffer_head *bh)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
struct journal_head *jh;
int drop_reserve = 0;
int err = 0;
int was_modified = 0;
BUFFER_TRACE(bh, "entry");
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
if (!buffer_jbd(bh))
goto not_jbd;
jh = bh2jh(bh);
/* Critical error: attempting to delete a bitmap buffer, maybe?
* Don't do any jbd operations, and return an error. */
if (!J_EXPECT_JH(jh, !jh->b_committed_data,
"inconsistent data on disk")) {
err = -EIO;
goto not_jbd;
}
/* keep track of wether or not this transaction modified us */
was_modified = jh->b_modified;
/*
* The buffer's going from the transaction, we must drop
* all references -bzzz
*/
jh->b_modified = 0;
if (jh->b_transaction == handle->h_transaction) {
J_ASSERT_JH(jh, !jh->b_frozen_data);
/* If we are forgetting a buffer which is already part
* of this transaction, then we can just drop it from
* the transaction immediately. */
clear_buffer_dirty(bh);
clear_buffer_jbddirty(bh);
JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
/*
* we only want to drop a reference if this transaction
* modified the buffer
*/
if (was_modified)
drop_reserve = 1;
/*
* We are no longer going to journal this buffer.
* However, the commit of this transaction is still
* important to the buffer: the delete that we are now
* processing might obsolete an old log entry, so by
* committing, we can satisfy the buffer's checkpoint.
*
* So, if we have a checkpoint on the buffer, we should
* now refile the buffer on our BJ_Forget list so that
* we know to remove the checkpoint after we commit.
*/
if (jh->b_cp_transaction) {
__journal_temp_unlink_buffer(jh);
__journal_file_buffer(jh, transaction, BJ_Forget);
} else {
__journal_unfile_buffer(jh);
journal_remove_journal_head(bh);
__brelse(bh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
__bforget(bh);
goto drop;
}
}
} else if (jh->b_transaction) {
J_ASSERT_JH(jh, (jh->b_transaction ==
journal->j_committing_transaction));
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction");
/* ... but we CAN drop it from the new transaction if we
* have also modified it since the original commit. */
if (jh->b_next_transaction) {
J_ASSERT(jh->b_next_transaction == transaction);
jh->b_next_transaction = NULL;
/*
* only drop a reference if this transaction modified
* the buffer
*/
if (was_modified)
drop_reserve = 1;
}
}
//.........这里部分代码省略.........
static void write_one_revoke_record(transaction_t *transaction,
struct list_head *log_bufs,
struct buffer_head **descriptorp,
int *offsetp,
struct jbd2_revoke_record_s *record)
{
journal_t *journal = transaction->t_journal;
int csum_size = 0;
struct buffer_head *descriptor;
int sz, offset;
/* If we are already aborting, this all becomes a noop. We
still need to go round the loop in
jbd2_journal_write_revoke_records in order to free all of the
revoke records: only the IO to the journal is omitted. */
if (is_journal_aborted(journal))
return;
descriptor = *descriptorp;
offset = *offsetp;
/* Do we need to leave space at the end for a checksum? */
if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_block_tail);
if (jbd2_has_feature_64bit(journal))
sz = 8;
else
sz = 4;
/* Make sure we have a descriptor with space left for the record */
if (descriptor) {
if (offset + sz > journal->j_blocksize - csum_size) {
flush_descriptor(journal, descriptor, offset);
descriptor = NULL;
}
}
if (!descriptor) {
descriptor = jbd2_journal_get_descriptor_buffer(transaction,
JBD2_REVOKE_BLOCK);
if (!descriptor)
return;
/* Record it so that we can wait for IO completion later */
BUFFER_TRACE(descriptor, "file in log_bufs");
jbd2_file_log_bh(log_bufs, descriptor);
offset = sizeof(jbd2_journal_revoke_header_t);
*descriptorp = descriptor;
}
if (jbd2_has_feature_64bit(journal))
* ((__be64 *)(&descriptor->b_data[offset])) =
cpu_to_be64(record->blocknr);
else
* ((__be32 *)(&descriptor->b_data[offset])) =
cpu_to_be32(record->blocknr);
offset += sz;
*offsetp = offset;
}
/*
* Perform an actual checkpoint. We take the first transaction on the
* list of transactions to be checkpointed and send all its buffers
* to disk. We submit larger chunks of data at once.
*
* The journal should be locked before calling this function.
* Called with j_checkpoint_mutex held.
*/
int jbd2_log_do_checkpoint(journal_t *journal)
{
struct journal_head *jh;
struct buffer_head *bh;
transaction_t *transaction;
tid_t this_tid;
int result, batch_count = 0;
jbd_debug(1, "Start checkpoint\n");
/*
* First thing: if there are any transactions in the log which
* don't need checkpointing, just eliminate them from the
* journal straight away.
*/
result = jbd2_cleanup_journal_tail(journal);
trace_jbd2_checkpoint(journal, result);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0)
return result;
/*
* OK, we need to start writing disk blocks. Take one transaction
* and write it.
*/
result = 0;
spin_lock(&journal->j_list_lock);
if (!journal->j_checkpoint_transactions)
goto out;
transaction = journal->j_checkpoint_transactions;
if (transaction->t_chp_stats.cs_chp_time == 0)
transaction->t_chp_stats.cs_chp_time = jiffies;
this_tid = transaction->t_tid;
restart:
/*
* If someone cleaned up this transaction while we slept, we're
* done (maybe it's a new transaction, but it fell at the same
* address).
*/
if (journal->j_checkpoint_transactions != transaction ||
transaction->t_tid != this_tid)
goto out;
/* checkpoint all of the transaction's buffers */
while (transaction->t_checkpoint_list) {
jh = transaction->t_checkpoint_list;
bh = jh2bh(jh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
get_bh(bh);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
__brelse(bh);
goto retry;
}
if (jh->b_transaction != NULL) {
transaction_t *t = jh->b_transaction;
tid_t tid = t->t_tid;
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
if (unlikely(journal->j_flags & JBD2_UNMOUNT))
/*
* The journal thread is dead; so
* starting and waiting for a commit
* to finish will cause us to wait for
* a _very_ long time.
*/
printk(KERN_ERR
"JBD2: %s: Waiting for Godot: block %llu\n",
journal->j_devname, (unsigned long long) bh->b_blocknr);
jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid);
goto retry;
}
if (!buffer_dirty(bh)) {
if (unlikely(buffer_write_io_error(bh)) && !result)
result = -EIO;
BUFFER_TRACE(bh, "remove from checkpoint");
if (__jbd2_journal_remove_checkpoint(jh))
/* The transaction was released; we're done */
goto out;
continue;
}
/*
* Important: we are about to write the buffer, and
* possibly block, while still holding the journal
* lock. We cannot afford to let the transaction
* logic start messing around with this buffer before
//.........这里部分代码省略.........
/*
* Submit all the data buffers to disk
*/
static int journal_submit_data_buffers(journal_t *journal,
transaction_t *commit_transaction,
int write_op)
{
struct journal_head *jh;
struct buffer_head *bh;
int locked;
int bufs = 0;
struct buffer_head **wbuf = journal->j_wbuf;
int err = 0;
/*
* Whenever we unlock the journal and sleep, things can get added
* onto ->t_sync_datalist, so we have to keep looping back to
* write_out_data until we *know* that the list is empty.
*
* Cleanup any flushed data buffers from the data list. Even in
* abort mode, we want to flush this out as soon as possible.
*/
write_out_data:
cond_resched();
spin_lock(&journal->j_list_lock);
while (commit_transaction->t_sync_datalist) {
jh = commit_transaction->t_sync_datalist;
bh = jh2bh(jh);
locked = 0;
/* Get reference just to make sure buffer does not disappear
* when we are forced to drop various locks */
get_bh(bh);
/* If the buffer is dirty, we need to submit IO and hence
* we need the buffer lock. We try to lock the buffer without
* blocking. If we fail, we need to drop j_list_lock and do
* blocking lock_buffer().
*/
if (buffer_dirty(bh)) {
if (!trylock_buffer(bh)) {
BUFFER_TRACE(bh, "needs blocking lock");
spin_unlock(&journal->j_list_lock);
trace_jbd_do_submit_data(journal,
commit_transaction);
/* Write out all data to prevent deadlocks */
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
lock_buffer(bh);
spin_lock(&journal->j_list_lock);
}
locked = 1;
}
/* We have to get bh_state lock. Again out of order, sigh. */
if (!inverted_lock(journal, bh)) {
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
}
/* Someone already cleaned up the buffer? */
if (!buffer_jbd(bh) || bh2jh(bh) != jh
|| jh->b_transaction != commit_transaction
|| jh->b_jlist != BJ_SyncData) {
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
BUFFER_TRACE(bh, "already cleaned up");
release_data_buffer(bh);
continue;
}
if (locked && test_clear_buffer_dirty(bh)) {
BUFFER_TRACE(bh, "needs writeout, adding to array");
wbuf[bufs++] = bh;
__journal_file_buffer(jh, commit_transaction,
BJ_Locked);
jbd_unlock_bh_state(bh);
if (bufs == journal->j_wbufsize) {
spin_unlock(&journal->j_list_lock);
trace_jbd_do_submit_data(journal,
commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
goto write_out_data;
}
} else if (!locked && buffer_locked(bh)) {
__journal_file_buffer(jh, commit_transaction,
BJ_Locked);
jbd_unlock_bh_state(bh);
put_bh(bh);
} else {
BUFFER_TRACE(bh, "writeout complete: unfile");
if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
__journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
release_data_buffer(bh);
}
if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
//.........这里部分代码省略.........
/**
* ext4_add_groupblocks() -- Add given blocks to an existing group
* @handle: handle to this transaction
* @sb: super block
* @block: start physcial block to add to the block group
* @count: number of blocks to free
*
* This marks the blocks as free in the bitmap. We ask the
* mballoc to reload the buddy after this by setting group
* EXT4_GROUP_INFO_NEED_INIT_BIT flag
*/
void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count)
{
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gd_bh;
ext4_group_t block_group;
ext4_grpblk_t bit;
unsigned int i;
struct ext4_group_desc *desc;
struct ext4_super_block *es;
struct ext4_sb_info *sbi;
int err = 0, ret, blk_free_count;
ext4_grpblk_t blocks_freed;
struct ext4_group_info *grp;
sbi = EXT4_SB(sb);
es = sbi->s_es;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
grp = ext4_get_group_info(sb, block_group);
/*
* Check to see if we are freeing blocks across a group
* boundary.
*/
if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
goto error_return;
}
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
desc = ext4_get_group_desc(sb, block_group, &gd_bh);
if (!desc)
goto error_return;
if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
in_range(ext4_inode_bitmap(sb, desc), block, count) ||
in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
in_range(block + count - 1, ext4_inode_table(sb, desc),
sbi->s_itb_per_group)) {
ext4_error(sb, __func__,
"Adding blocks in system zones - "
"Block = %llu, count = %lu",
block, count);
goto error_return;
}
/*
* We are about to add blocks to the bitmap,
* so we need undo access.
*/
BUFFER_TRACE(bitmap_bh, "getting undo access");
err = ext4_journal_get_undo_access(handle, bitmap_bh);
if (err)
goto error_return;
/*
* We are about to modify some metadata. Call the journal APIs
* to unshare ->b_data if a currently-committing transaction is
* using it
*/
BUFFER_TRACE(gd_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gd_bh);
if (err)
goto error_return;
/*
* make sure we don't allow a parallel init on other groups in the
* same buddy cache
*/
down_write(&grp->alloc_sem);
for (i = 0, blocks_freed = 0; i < count; i++) {
BUFFER_TRACE(bitmap_bh, "clear bit");
if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
bit + i, bitmap_bh->b_data)) {
ext4_error(sb, __func__,
"bit already cleared for block %llu",
(ext4_fsblk_t)(block + i));
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
blocks_freed++;
}
}
ext4_lock_group(sb, block_group);
blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
ext4_free_blks_set(sb, desc, blk_free_count);
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
//.........这里部分代码省略.........
/*
* NOTE! When we get the inode, we're the only people
* that have access to it, and as such there are no
* race conditions we have to worry about. The inode
* is not on the hash-lists, and it cannot be reached
* through the filesystem because the directory entry
* has been deleted earlier.
*
* HOWEVER: we must make sure that we get no aliases,
* which means that we have to call "clear_inode()"
* _before_ we mark the inode not in use in the inode
* bitmaps. Otherwise a newly created file might use
* the same inode number (not actually the same pointer
* though), and then we'd have two inodes sharing the
* same inode number and space on the harddisk.
*/
void ext3_free_inode (handle_t *handle, struct inode * inode)
{
struct super_block * sb = inode->i_sb;
int is_directory;
unsigned long ino;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
unsigned long block_group;
unsigned long bit;
struct ext3_group_desc * gdp;
struct ext3_super_block * es;
struct ext3_sb_info *sbi;
int fatal = 0, err;
if (atomic_read(&inode->i_count) > 1) {
printk ("ext3_free_inode: inode has count=%d\n",
atomic_read(&inode->i_count));
return;
}
if (inode->i_nlink) {
printk ("ext3_free_inode: inode has nlink=%d\n",
inode->i_nlink);
return;
}
if (!sb) {
printk("ext3_free_inode: inode on nonexistent device\n");
return;
}
sbi = EXT3_SB(sb);
ino = inode->i_ino;
ext3_debug ("freeing inode %lu\n", ino);
trace_ext3_free_inode(inode);
is_directory = S_ISDIR(inode->i_mode);
es = EXT3_SB(sb)->s_es;
if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
ext3_error (sb, "ext3_free_inode",
"reserved or nonexistent inode %lu", ino);
goto error_return;
}
block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
bitmap_bh = read_inode_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
BUFFER_TRACE(bitmap_bh, "get_write_access");
fatal = ext3_journal_get_write_access(handle, bitmap_bh);
if (fatal)
goto error_return;
/* Ok, now we can actually update the inode bitmaps.. */
if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
bit, bitmap_bh->b_data))
ext3_error (sb, "ext3_free_inode",
"bit already cleared for inode %lu", ino);
else {
gdp = ext3_get_group_desc (sb, block_group, &bh2);
BUFFER_TRACE(bh2, "get_write_access");
fatal = ext3_journal_get_write_access(handle, bh2);
if (fatal) goto error_return;
if (gdp) {
spin_lock(sb_bgl_lock(sbi, block_group));
le16_add_cpu(&gdp->bg_free_inodes_count, 1);
if (is_directory)
le16_add_cpu(&gdp->bg_used_dirs_count, -1);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_inc(&sbi->s_freeinodes_counter);
if (is_directory)
percpu_counter_dec(&sbi->s_dirs_counter);
}
BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh2);
if (!fatal) fatal = err;
}
BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bitmap_bh);
if (!fatal)
fatal = err;
//.........这里部分代码省略.........
/*
* jbd2_journal_commit_transaction
*
* The primary function for committing a transaction to the log. This
* function is called by the journal thread to begin a complete commit.
*/
void jbd2_journal_commit_transaction(journal_t *journal)
{
transaction_t *commit_transaction;
struct journal_head *jh, *new_jh, *descriptor;
struct buffer_head **wbuf = journal->j_wbuf;
int bufs;
int flags;
int err;
unsigned long long blocknr;
char *tagp = NULL;
journal_header_t *header;
journal_block_tag_t *tag = NULL;
int space_left = 0;
int first_tag = 0;
int tag_flag;
int i;
int tag_bytes = journal_tag_bytes(journal);
/*
* First job: lock down the current transaction and wait for
* all outstanding updates to complete.
*/
#ifdef COMMIT_STATS
spin_lock(&journal->j_list_lock);
summarise_journal_usage(journal);
spin_unlock(&journal->j_list_lock);
#endif
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
jbd_debug(3, "super block updated\n");
jbd2_journal_update_superblock(journal, 1);
} else {
jbd_debug(3, "superblock not updated\n");
}
J_ASSERT(journal->j_running_transaction != NULL);
J_ASSERT(journal->j_committing_transaction == NULL);
commit_transaction = journal->j_running_transaction;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) {
DEFINE_WAIT(wait);
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
if (commit_transaction->t_updates) {
spin_unlock(&commit_transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock);
schedule();
spin_lock(&journal->j_state_lock);
spin_lock(&commit_transaction->t_handle_lock);
}
finish_wait(&journal->j_wait_updates, &wait);
}
spin_unlock(&commit_transaction->t_handle_lock);
J_ASSERT (commit_transaction->t_outstanding_credits <=
journal->j_max_transaction_buffers);
/*
* First thing we are allowed to do is to discard any remaining
* BJ_Reserved buffers. Note, it is _not_ permissible to assume
* that there are no such buffers: if a large filesystem
* operation like a truncate needs to split itself over multiple
* transactions, then it may try to do a jbd2_journal_restart() while
* there are still BJ_Reserved buffers outstanding. These must
* be released cleanly from the current transaction.
*
* In this case, the filesystem must still reserve write access
* again before modifying the buffer in the new transaction, but
* we do not require it to remember exactly which old buffers it
* has reserved. This is consistent with the existing behaviour
* that multiple jbd2_journal_get_write_access() calls to the same
* buffer are perfectly permissable.
*/
while (commit_transaction->t_reserved_list) {
jh = commit_transaction->t_reserved_list;
JBUFFER_TRACE(jh, "reserved, unused: refile");
/*
* A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
* leave undo-committed data.
*/
if (jh->b_committed_data) {
struct buffer_head *bh = jh2bh(jh);
//.........这里部分代码省略.........
/*
* Clean up a transaction's checkpoint list.
*
* We wait for any pending IO to complete and make sure any clean
* buffers are removed from the transaction.
*
* Return 1 if we performed any actions which might have destroyed the
* checkpoint. (journal_remove_checkpoint() deletes the transaction when
* the last checkpoint buffer is cleansed)
*
* Called with the journal locked.
* Called with journal_datalist_lock held.
*/
static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
{
struct journal_head *jh, *next_jh, *last_jh;
struct buffer_head *bh;
int ret = 0;
assert_spin_locked(&journal_datalist_lock);
jh = transaction->t_checkpoint_list;
if (!jh)
return 0;
last_jh = jh->b_cpprev;
next_jh = jh;
do {
jh = next_jh;
bh = jh2bh(jh);
if (buffer_locked(bh)) {
atomic_inc(&bh->b_count);
spin_unlock(&journal_datalist_lock);
unlock_journal(journal);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
__brelse(bh);
goto out_return_1;
}
if (jh->b_transaction != NULL) {
transaction_t *transaction = jh->b_transaction;
tid_t tid = transaction->t_tid;
spin_unlock(&journal_datalist_lock);
log_start_commit(journal, transaction);
unlock_journal(journal);
log_wait_commit(journal, tid);
goto out_return_1;
}
/*
* We used to test for (jh->b_list != BUF_CLEAN) here.
* But unmap_underlying_metadata() can place buffer onto
* BUF_CLEAN. Since refile_buffer() no longer takes buffers
* off checkpoint lists, we cope with it here
*/
/*
* AKPM: I think the buffer_jdirty test is redundant - it
* shouldn't have NULL b_transaction?
*/
next_jh = jh->b_cpnext;
if (!buffer_dirty(bh) && !buffer_jdirty(bh)) {
BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh);
__journal_remove_journal_head(bh);
refile_buffer(bh);
__brelse(bh);
ret = 1;
}
jh = next_jh;
} while (jh != last_jh);
return ret;
out_return_1:
lock_journal(journal);
spin_lock(&journal_datalist_lock);
return 1;
}
/*
* Clean up a transaction's checkpoint list.
*
* We wait for any pending IO to complete and make sure any clean
* buffers are removed from the transaction.
*
* Return 1 if we performed any actions which might have destroyed the
* checkpoint. (journal_remove_checkpoint() deletes the transaction when
* the last checkpoint buffer is cleansed)
*
* Called with j_list_lock held.
*/
static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
{
struct journal_head *jh, *next_jh, *last_jh;
struct buffer_head *bh;
int ret = 0;
assert_spin_locked(&journal->j_list_lock);
jh = transaction->t_checkpoint_list;
if (!jh)
return 0;
last_jh = jh->b_cpprev;
next_jh = jh;
do {
jh = next_jh;
bh = jh2bh(jh);
if (buffer_locked(bh)) {
atomic_inc(&bh->b_count);
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
__brelse(bh);
goto out_return_1;
}
/*
* This is foul
*/
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
goto out_return_1;
}
if (jh->b_transaction != NULL) {
transaction_t *t = jh->b_transaction;
tid_t tid = t->t_tid;
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
log_start_commit(journal, tid);
log_wait_commit(journal, tid);
goto out_return_1;
}
/*
* AKPM: I think the buffer_jbddirty test is redundant - it
* shouldn't have NULL b_transaction?
*/
next_jh = jh->b_cpnext;
if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
__brelse(bh);
ret = 1;
} else {
jbd_unlock_bh_state(bh);
}
} while (jh != last_jh);
return ret;
out_return_1:
spin_lock(&journal->j_list_lock);
return 1;
}
/*
* Try to add the new entry to the inline data.
* If succeeds, return 0. If not, extended the inline dir and copied data to
* the new created block.
*/
int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
int ret, inline_size;
void *inline_start, *backup_buf = NULL;
struct buffer_head *dir_block = NULL;
struct ext4_iloc iloc;
int blocksize = inode->i_sb->s_blocksize;
struct inode *dir = dentry->d_parent->d_inode;
ret = ext4_get_inode_loc(dir, &iloc);
if (ret)
return ret;
down_write(&EXT4_I(dir)->xattr_sem);
if (!ext4_has_inline_data(dir))
goto out;
inline_start = ext4_raw_inode(&iloc)->i_block;
inline_size = EXT4_MIN_INLINE_DATA_SIZE;
ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
/* check whether it can be inserted to inline xattr space. */
inline_size = EXT4_I(dir)->i_inline_size -
EXT4_MIN_INLINE_DATA_SIZE;
if (inline_size > 0) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
}
/* Try to add more xattr space.*/
ret = ext4_update_inline_dir(handle, dentry, dir, &iloc);
if (ret && ret != -ENOSPC)
goto out;
else if (!ret) {
inline_size = EXT4_I(dir)->i_inline_size -
EXT4_MIN_INLINE_DATA_SIZE;
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
}
/*
* The inline space is filled up, so create a new block for it.
* As the extent tree will be created, we have to save the inline
* dir first.
*/
inline_size = EXT4_I(dir)->i_inline_size;
backup_buf = kmalloc(inline_size, GFP_NOFS);
if (!backup_buf) {
ret = -ENOMEM;
goto out;
}
memcpy(backup_buf, (void *)ext4_raw_inode(&iloc)->i_block,
EXT4_MIN_INLINE_DATA_SIZE);
if (inline_size > EXT4_MIN_INLINE_DATA_SIZE)
memcpy(backup_buf + EXT4_MIN_INLINE_DATA_SIZE,
ext4_get_inline_xattr_pos(dir, &iloc),
inline_size - EXT4_MIN_INLINE_DATA_SIZE);
/* clear the entry and the flag in dir now. */
ret = ext4_destroy_inline_data_nolock(handle, dir);
if (ret)
goto out;
dir->i_size = EXT4_I(dir)->i_disksize = blocksize;
dir_block = ext4_bread(handle, dir, 0, 1, &ret);
if (!dir_block)
goto out;
BUFFER_TRACE(dir_block, "get_write_access");
ret = ext4_journal_get_write_access(handle, dir_block);
if (ret)
goto out;
memcpy(dir_block->b_data, backup_buf, inline_size);
/* Set the final de to cover the whole block. */
ext4_update_final_de(dir_block->b_data, inline_size,
blocksize);
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
ret = ext4_handle_dirty_metadata(handle, dir, dir_block);
//.........这里部分代码省略.........
/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
* free space and a low directory-to-inode ratio; if that fails, then of
* the groups with above-average free space, that group with the fewest
* directories already is chosen.
*
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
{
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
int group;
unsigned long ino = 0;
struct inode * inode;
struct ext3_group_desc * gdp = NULL;
struct ext3_super_block * es;
struct ext3_inode_info *ei;
struct ext3_sb_info *sbi;
int err = 0;
struct inode *ret;
int i;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
sb = dir->i_sb;
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
ei = EXT3_I(inode);
sbi = EXT3_SB(sb);
es = sbi->s_es;
if (S_ISDIR(mode)) {
if (test_opt (sb, OLDALLOC))
group = find_group_dir(sb, dir);
else
group = find_group_orlov(sb, dir);
} else
group = find_group_other(sb, dir);
err = -ENOSPC;
if (group == -1)
goto out;
for (i = 0; i < sbi->s_groups_count; i++) {
gdp = ext3_get_group_desc(sb, group, &bh2);
err = -EIO;
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh)
goto fail;
ino = 0;
repeat_in_this_group:
ino = ext3_find_next_zero_bit((unsigned long *)
bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
if (ino < EXT3_INODES_PER_GROUP(sb)) {
int credits = 0;
BUFFER_TRACE(bitmap_bh, "get_write_access");
err = ext3_journal_get_write_access_credits(handle,
bitmap_bh, &credits);
if (err)
goto fail;
if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
ino, bitmap_bh->b_data)) {
/* we won it */
BUFFER_TRACE(bitmap_bh,
"call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle,
bitmap_bh);
if (err)
goto fail;
goto got;
}
/* we lost it */
journal_release_buffer(handle, bitmap_bh, credits);
if (++ino < EXT3_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
}
/*
* This case is possible in concurrent environment. It is very
* rare. We cannot repeat the find_group_xxx() call because
* that will simply return the same blockgroup, because the
* group descriptor metadata has not yet been updated.
* So we just go onto the next blockgroup.
*/
if (++group == sbi->s_groups_count)
group = 0;
//.........这里部分代码省略.........
请发表评论