patch-2.3.8 linux/fs/buffer.c
Next file: linux/fs/ext2/truncate.c
Previous file: linux/fs/block_dev.c
Back to the patch index
Back to the overall index
- Lines: 411
- Date:
Tue Jun 22 14:21:33 1999
- Orig file:
v2.3.7/linux/fs/buffer.c
- Orig date:
Mon Jun 21 11:17:58 1999
diff -u --recursive --new-file v2.3.7/linux/fs/buffer.c linux/fs/buffer.c
@@ -103,10 +103,8 @@
int nref_dirt; /* Dirty buffer threshold for activating bdflush
when trying to refill buffers. */
int dummy1; /* unused */
- int age_buffer; /* Time for normal buffer to age before
- we flush it */
- int age_super; /* Time for superblock to age before we
- flush it */
+ int age_buffer; /* Time for normal buffer to age before we flush it */
+ int age_super; /* Time for superblock to age before we flush it */
int dummy2; /* unused */
int dummy3; /* unused */
} b_un;
@@ -746,21 +744,6 @@
return bh;
}
-void set_writetime(struct buffer_head * buf, int flag)
-{
- int newtime;
-
- if (buffer_dirty(buf)) {
- /* Move buffer to dirty list if jiffies is clear. */
- newtime = jiffies + (flag ? bdf_prm.b_un.age_super :
- bdf_prm.b_un.age_buffer);
- if(!buf->b_flushtime || buf->b_flushtime > newtime)
- buf->b_flushtime = newtime;
- } else {
- buf->b_flushtime = 0;
- }
-}
-
/*
* Put a buffer into the appropriate list, without side-effects.
*/
@@ -778,27 +761,29 @@
* pressures on different devices - thus the (currently unused)
* 'dev' parameter.
*/
+int too_many_dirty_buffers;
+
void balance_dirty(kdev_t dev)
{
int dirty = nr_buffers_type[BUF_DIRTY];
int ndirty = bdf_prm.b_un.ndirty;
if (dirty > ndirty) {
- int wait = 0;
- if (dirty > 2*ndirty)
- wait = 1;
- wakeup_bdflush(wait);
+ if (dirty > 2*ndirty) {
+ too_many_dirty_buffers = 1;
+ wakeup_bdflush(1);
+ return;
+ }
+ wakeup_bdflush(0);
}
+ too_many_dirty_buffers = 0;
+ return;
}
-atomic_t too_many_dirty_buffers;
-
static inline void __mark_dirty(struct buffer_head *bh, int flag)
{
- set_writetime(bh, flag);
+ bh->b_flushtime = jiffies + (flag ? bdf_prm.b_un.age_super : bdf_prm.b_un.age_buffer);
refile_buffer(bh);
- if (atomic_read(&too_many_dirty_buffers))
- balance_dirty(bh->b_dev);
}
void __mark_buffer_dirty(struct buffer_head *bh, int flag)
@@ -841,9 +826,6 @@
*/
void __brelse(struct buffer_head * buf)
{
- /* If dirty, mark the time this buffer should be written back. */
- set_writetime(buf, 0);
- refile_buffer(buf);
touch_buffer(buf);
if (buf->b_count) {
@@ -1401,9 +1383,7 @@
if (!bh->b_blocknr) {
err = -EIO;
- down(&inode->i_sem);
phys = fs_get_block (inode, block, 1, &err, &created);
- up(&inode->i_sem);
if (!phys)
goto out;
@@ -1491,9 +1471,7 @@
}
if (!bh->b_blocknr) {
err = -EIO;
- down(&inode->i_sem);
phys = fs_get_block (inode, block, 1, &err, &created);
- up(&inode->i_sem);
if (!phys)
goto out;
@@ -1505,16 +1483,19 @@
* We also rely on the fact that filesystem holes
* cannot be written.
*/
- if (!created && (start_offset ||
- (end_bytes && (i == end_block)))) {
- bh->b_state = 0;
- ll_rw_block(READ, 1, &bh);
- lock_kernel();
- wait_on_buffer(bh);
- unlock_kernel();
- err = -EIO;
- if (!buffer_uptodate(bh))
- goto out;
+ if (start_offset || (end_bytes && (i == end_block))) {
+ if (created) {
+ memset(bh->b_data, 0, bh->b_size);
+ } else {
+ bh->b_state = 0;
+ ll_rw_block(READ, 1, &bh);
+ lock_kernel();
+ wait_on_buffer(bh);
+ unlock_kernel();
+ err = -EIO;
+ if (!buffer_uptodate(bh))
+ goto out;
+ }
}
bh->b_state = (1<<BH_Uptodate);
@@ -1524,21 +1505,17 @@
*/
bh->b_end_io = end_buffer_io_sync;
set_bit(BH_Uptodate, &bh->b_state);
+ created = 0;
}
err = -EFAULT;
+ len = blocksize;
if (start_offset) {
len = start_bytes;
start_offset = 0;
- } else
- if (end_bytes && (i == end_block)) {
+ } else if (end_bytes && (i == end_block)) {
len = end_bytes;
end_bytes = 0;
- } else {
- /*
- * Overwritten block.
- */
- len = blocksize;
}
if (copy_from_user(target_buf, buf, len))
goto out;
@@ -1549,8 +1526,24 @@
* we dirty buffers only after copying the data into
* the page - this way we can dirty the buffer even if
* the bh is still doing IO.
+ *
+ * NOTE! This also does a direct dirty balace check,
+ * rather than relying on bdflush just waking up every
+ * once in a while. This is to catch (and slow down)
+ * the processes that write tons of buffer..
+ *
+ * Note how we do NOT want to do this in the full block
+ * case: full pages are flushed not by the people who
+ * dirtied them, but by people who need memory. And we
+ * should not penalize them for somebody else writing
+ * lots of dirty pages.
*/
- atomic_mark_buffer_dirty(bh,0);
+ if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
+ __atomic_mark_buffer_dirty(bh, bdf_prm.b_un.age_buffer);
+ if (too_many_dirty_buffers)
+ balance_dirty(bh->b_dev);
+ }
+
skip:
i++;
block++;
@@ -1827,6 +1820,9 @@
tmp = tmp->b_this_page;
if (!buffer_busy(p))
continue;
+
+ too_many_dirty_buffers = 1;
+ wakeup_bdflush(0);
return 0;
} while (tmp != bh);
@@ -2033,8 +2029,6 @@
if (buffer_locked(bh) || !buffer_dirty(bh))
continue;
ndirty++;
- if(time_before(jiffies, bh->b_flushtime))
- continue;
nwritten++;
next->b_count++;
bh->b_count++;
@@ -2102,28 +2096,13 @@
return error;
}
-/* This is the actual bdflush daemon itself. It used to be started from
+/*
+ * This is the actual bdflush daemon itself. It used to be started from
* the syscall above, but now we launch it ourselves internally with
- * kernel_thread(...) directly after the first thread in init/main.c */
-
-/* To prevent deadlocks for a loop device:
- * 1) Do non-blocking writes to loop (avoids deadlock with running
- * out of request blocks).
- * 2) But do a blocking write if the only dirty buffers are loop buffers
- * (otherwise we go into an infinite busy-loop).
- * 3) Quit writing loop blocks if a freelist went low (avoids deadlock
- * with running out of free buffers for loop's "real" device).
-*/
+ * kernel_thread(...) directly after the first thread in init/main.c
+ */
int bdflush(void * unused)
{
- int i;
- int ndirty;
- int nlist;
- int ncount;
- struct buffer_head * bh, *next;
- int major;
- int wrta_cmd = WRITEA; /* non-blocking write for LOOP */
-
/*
* We have a bare-bones task_struct, and really should fill
* in a few more things so "top" and /proc/2/{exe,root,cwd}
@@ -2143,99 +2122,91 @@
lock_kernel();
for (;;) {
-#ifdef DEBUG
- printk("bdflush() activated...");
-#endif
+ int nlist;
CHECK_EMERGENCY_SYNC
- ncount = 0;
-#ifdef DEBUG
- for(nlist = 0; nlist < NR_LIST; nlist++)
-#else
for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)
-#endif
- {
- ndirty = 0;
- repeat:
-
- bh = lru_list[nlist];
- if(bh)
- for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty;
- bh = next) {
- /* We may have stalled while waiting for I/O to complete. */
- if(bh->b_list != nlist) goto repeat;
- next = bh->b_next_free;
- if(!lru_list[nlist]) {
- printk("Dirty list empty %d\n", i);
- break;
- }
-
- /* Clean buffer on dirty list? Refile it */
- if (nlist == BUF_DIRTY && !buffer_dirty(bh)) {
- refile_buffer(bh);
- continue;
- }
-
- /* Unlocked buffer on locked list? Refile it */
- if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
- refile_buffer(bh);
- continue;
- }
-
- if (buffer_locked(bh) || !buffer_dirty(bh))
- continue;
- major = MAJOR(bh->b_dev);
- /* Should we write back buffers that are shared or not??
- currently dirty buffers are not shared, so it does not matter */
- next->b_count++;
- bh->b_count++;
- ndirty++;
- bh->b_flushtime = 0;
- if (major == LOOP_MAJOR) {
- ll_rw_block(wrta_cmd,1, &bh);
- wrta_cmd = WRITEA;
- if (buffer_dirty(bh))
- --ndirty;
- }
- else
- ll_rw_block(WRITE, 1, &bh);
-#ifdef DEBUG
- if(nlist != BUF_DIRTY) ncount++;
-#endif
- bh->b_count--;
- next->b_count--;
- wake_up(&buffer_wait);
- }
- }
-#ifdef DEBUG
- if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
- printk("sleeping again.\n");
-#endif
- /* If we didn't write anything, but there are still
- * dirty buffers, then make the next write to a
- * loop device to be a blocking write.
- * This lets us block--which we _must_ do! */
- if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
- wrta_cmd = WRITE;
- continue;
+ {
+ int nr;
+ int written = 0;
+ struct buffer_head *next;
+ int major;
+
+ repeat:
+ next = lru_list[nlist];
+ nr = nr_buffers_type[nlist];
+
+ while (nr-- > 0) {
+ struct buffer_head *bh = next;
+ /* We may have stalled while waiting for I/O to complete. */
+ if (next->b_list != nlist)
+ goto repeat;
+ next = next->b_next_free;
+
+ /* Clean buffer on dirty list? Refile it */
+ if (nlist == BUF_DIRTY && !buffer_dirty(bh)) {
+ refile_buffer(bh);
+ continue;
+ }
+
+ /* Unlocked buffer on locked list? Refile it */
+ if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
+ refile_buffer(bh);
+ continue;
+ }
+
+ /*
+ * If we aren't in panic mode, don't write out too much
+ * at a time. Also, don't write out buffers we don't really
+ * have to write out yet..
+ */
+ if (!too_many_dirty_buffers) {
+ if (written > bdf_prm.b_un.ndirty)
+ break;
+ if (time_before(jiffies, bh->b_flushtime))
+ continue;
+ }
+
+ if (buffer_locked(bh) || !buffer_dirty(bh))
+ continue;
+
+ major = MAJOR(bh->b_dev);
+ if (next)
+ next->b_count++;
+ bh->b_count++;
+ written++;
+ bh->b_flushtime = 0;
+
+ /*
+ * For the loop major we can try to do asynchronous writes,
+ * but we have to guarantee that we're making some progress..
+ */
+ if (major == LOOP_MAJOR && written > 1) {
+ ll_rw_block(WRITEA, 1, &bh);
+ if (buffer_dirty(bh))
+ --written;
+ } else
+ ll_rw_block(WRITE, 1, &bh);
+
+ bh->b_count--;
+ if (next)
+ next->b_count--;
+ wake_up(&buffer_wait);
+ }
}
run_task_queue(&tq_disk);
wake_up(&bdflush_done);
/*
* If there are still a lot of dirty buffers around,
- * skip the sleep and flush some more
+ * skip the sleep and flush some more. Otherwise, we
+ * sleep for a while and mark us as not being in panic
+ * mode..
*/
- if ((ndirty == 0) || (nr_buffers_type[BUF_DIRTY] <=
- nr_buffers * bdf_prm.b_un.nfract/100)) {
-
- atomic_set(&too_many_dirty_buffers, 0);
- spin_lock_irq(¤t->sigmask_lock);
- flush_signals(current);
- spin_unlock_irq(¤t->sigmask_lock);
-
- interruptible_sleep_on(&bdflush_wait);
+ if (!too_many_dirty_buffers || nr_buffers_type[BUF_DIRTY] < bdf_prm.b_un.ndirty) {
+ too_many_dirty_buffers = 0;
+ sleep_on_timeout(&bdflush_wait, 5*HZ);
}
}
}
FUNET's LINUX-ADM group, [email protected]
TCL-scripts by Sam Shen (who was at: [email protected])