Mm/page_io.c

34938 /* 34939 * linux/mm/page_io.c 34940 * 34941 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 34942 * 34943 * Swap reorganised 29.12.95, 34944 * Asynchronous swapping added 30.12.95. Stephen Tweedie 34945 * Removed race in async swapping. 14.4.1996. Bruno 34946 * Haible 34947 * Add swap of shared pages through the page 34948 * cache. 20.2.1998. Stephen Tweedie 34949 * Always use brw_page, life becomes simpler. 12 May 34950 * 1998 Eric Biederman */ 34951 34952 #include <linux/mm.h> 34953 #include <linux/kernel_stat.h> 34954 #include <linux/swap.h> 34955 #include <linux/locks.h> 34956 #include <linux/swapctl.h> 34957 34958 #include <asm/pgtable.h> 34959 34960 static struct wait_queue * lock_queue = NULL; 34961 34962 /* Reads or writes a swap page. 34963 * wait=1: start I/O and wait for completion. 34964 * wait=0: start asynchronous I/O. 34965 * 34966 * Important prevention of race condition: the caller 34967 * *must* atomically create a unique swap cache entry for 34968 * this swap page before calling rw_swap_page, and must 34969 * lock that page. By ensuring that there is a single 34970 * page of memory reserved for the swap entry, the normal 34971 * VM page lock on that page also doubles as a lock on 34972 * swap entries. Having only one lock to deal with per 34973 * swap entry (rather than locking swap and memory 34974 * independently) also makes it easier to make certain 34975 * swapping operations atomic, which is particularly 34976 * important when we are trying to ensure that shared 34977 * pages stay shared while being swapped. */ 34978 34979 static void rw_swap_page_base(int rw, 34980 unsigned long entry, struct page *page, int wait) 34981 { 34982 unsigned long type, offset; 34983 struct swap_info_struct * p; 34984 int zones[PAGE_SIZE/512]; 34985 int zones_used; 34986 kdev_t dev = 0; 34987 int block_size; 34988 34989 #ifdef DEBUG_SWAP 34990 printk("DebugVM: %s_swap_page entry %08lx, " 34991 "page %p (count %d), %s\n", 34992 (rw == READ) ? "read" : "write", 34993 entry, (char *) page_address(page), 34994 atomic_read(&page->count), 34995 wait ? "wait" : "nowait"); 34996 #endif 34997 34998 type = SWP_TYPE(entry); 34999 if (type >= nr_swapfiles) { 35000 printk("Internal error: bad swap-device\n"); 35001 return; 35002 } 35003 35004 /* Don't allow too many pending pages in flight.. */ 35005 if (atomic_read(&nr_async_pages) > 35006 pager_daemon.swap_cluster) 35007 wait = 1; 35008 35009 p = &swap_info[type]; 35010 offset = SWP_OFFSET(entry); 35011 if (offset >= p->max) { 35012 printk("rw_swap_page: weirdness\n"); 35013 return; 35014 } 35015 if (p->swap_map && !p->swap_map[offset]) { 35016 printk(KERN_ERR "rw_swap_page: " 35017 "Trying to %s unallocated swap (%08lx)\n", 35018 (rw == READ) ? "read" : "write", entry); 35019 return; 35020 } 35021 if (!(p->flags & SWP_USED)) { 35022 printk(KERN_ERR "rw_swap_page: " 35023 "Trying to swap to unused swap-device\n"); 35024 return; 35025 } 35026 35027 if (!PageLocked(page)) { 35028 printk(KERN_ERR "VM: swap page is unlocked\n"); 35029 return; 35030 } 35031 35032 if (PageSwapCache(page)) { 35033 /* Make sure we are the only process doing I/O with 35034 * this swap page. */ 35035 while (test_and_set_bit(offset,p->swap_lockmap)) { 35036 run_task_queue(&tq_disk); 35037 sleep_on(&lock_queue); 35038 } 35039 35040 /* Make sure that we have a swap cache association 35041 * for this page. We need this to find which swap 35042 * page to unlock once the swap IO has completed to 35043 * the physical page. If the page is not already in 35044 * the cache, just overload the offset entry as if it 35045 * were: we are not allowed to manipulate the inode 35046 * hashing for locked pages. */ 35047 if (page->offset != entry) { 35048 printk ("swap entry mismatch"); 35049 return; 35050 } 35051 } 35052 if (rw == READ) { 35053 clear_bit(PG_uptodate, &page->flags); 35054 kstat.pswpin++; 35055 } else 35056 kstat.pswpout++; 35057 35058 atomic_inc(&page->count); 35059 if (p->swap_device) { 35060 zones[0] = offset; 35061 zones_used = 1; 35062 dev = p->swap_device; 35063 block_size = PAGE_SIZE; 35064 } else if (p->swap_file) { 35065 struct inode *swapf = p->swap_file->d_inode; 35066 int i; 35067 if (swapf->i_op->bmap == NULL 35068 && swapf->i_op->smap != NULL){ 35069 /* With MS-DOS, we use msdos_smap which returns a 35070 * sector number (not a cluster or block number). 35071 * It is a patch to enable the UMSDOS project. 35072 * Other people are working on better solution. 35073 * 35074 * It sounds like ll_rw_swap_file defined its 35075 * operation size (sector size) based on PAGE_SIZE 35076 * and the number of blocks to read. So using bmap 35077 * or smap should work even if smap will require 35078 * more blocks. */ 35079 int j; 35080 unsigned int block = offset << 3; 35081 35082 for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){ 35083 if (!(zones[i] = 35084 swapf->i_op->smap(swapf,block++))) { 35085 printk("rw_swap_page: bad swap file\n"); 35086 return; 35087 } 35088 } 35089 block_size = 512; 35090 }else{ 35091 int j; 35092 unsigned int block = offset 35093 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); 35094 35095 block_size = swapf->i_sb->s_blocksize; 35096 for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size) 35097 if (!(zones[i] = bmap(swapf,block++))) { 35098 printk("rw_swap_page: bad swap file\n"); 35099 return; 35100 } 35101 zones_used = i; 35102 dev = swapf->i_dev; 35103 } 35104 } else { 35105 printk(KERN_ERR 35106 "rw_swap_page: no swap file or device\n"); 35107 /* Do some cleaning up so if this ever happens we can 35108 * hopefully trigger controlled shutdown. */ 35109 if (PageSwapCache(page)) { 35110 if (!test_and_clear_bit(offset,p->swap_lockmap)) 35111 printk("swap_after_unlock_page: lock already " 35112 "cleared\n"); 35113 wake_up(&lock_queue); 35114 } 35115 atomic_dec(&page->count); 35116 return; 35117 } 35118 if (!wait) { 35119 set_bit(PG_decr_after, &page->flags); 35120 atomic_inc(&nr_async_pages); 35121 } 35122 if (PageSwapCache(page)) { 35123 /* only lock/unlock swap cache pages! */ 35124 set_bit(PG_swap_unlock_after, &page->flags); 35125 } 35126 set_bit(PG_free_after, &page->flags); 35127 35128 /* block_size == PAGE_SIZE/zones_used */ 35129 brw_page(rw, page, dev, zones, block_size, 0); 35130 35131 /* Note! For consistency we do all of the logic, 35132 * decrementing the page count, and unlocking the page 35133 * in the swap lock map - in the IO completion handler. 35134 */ 35135 if (!wait) 35136 return; 35137 wait_on_page(page); 35138 /* This shouldn't happen, but check to be sure. */ 35139 if (atomic_read(&page->count) == 0) 35140 printk(KERN_ERR 35141 "rw_swap_page: page unused while waiting!\n"); 35142 35143 #ifdef DEBUG_SWAP 35144 printk("DebugVM: %s_swap_page finished on page %p " 35145 "(count %d)\n", 35146 (rw == READ) ? "read" : "write", 35147 (char *) page_adddress(page), 35148 atomic_read(&page->count)); 35149 #endif 35150 } 35151 35152 /* Note: We could remove this totally asynchronous 35153 * function, and improve swap performance, and remove the 35154 * need for the swap lock map, by not removing pages from 35155 * the swap cache until after I/O has been processed and 35156 * letting remove_from_page_cache decrement the swap 35157 * count just before it removes the page from the page 35158 * cache. */ 35159 /* This is run when asynchronous page I/O has 35160 * completed. */ 35161 void swap_after_unlock_page (unsigned long entry) 35162 { 35163 unsigned long type, offset; 35164 struct swap_info_struct * p; 35165 35166 type = SWP_TYPE(entry); 35167 if (type >= nr_swapfiles) { 35168 printk("swap_after_unlock_page: bad swap-device\n"); 35169 return; 35170 } 35171 p = &swap_info[type]; 35172 offset = SWP_OFFSET(entry); 35173 if (offset >= p->max) { 35174 printk("swap_after_unlock_page: weirdness\n"); 35175 return; 35176 } 35177 if (!test_and_clear_bit(offset,p->swap_lockmap)) 35178 printk("swap_after_unlock_page: " 35179 "lock already cleared\n"); 35180 wake_up(&lock_queue); 35181 } 35182 35183 /* A simple wrapper so the base function doesn't need to 35184 * enforce that all swap pages go through the swap cache! 35185 */ 35186 void rw_swap_page(int rw, unsigned long entry, char *buf, 35187 int wait) 35188 { 35189 struct page *page = mem_map + MAP_NR(buf); 35190 35191 if (page->inode && page->inode != &swapper_inode) 35192 panic("Tried to swap a non-swapper page"); 35193 35194 /* Make sure that we have a swap cache association for 35195 * this page. We need this to find which swap page to 35196 * unlock once the swap IO has completed to the 35197 * physical page. If the page is not already in the 35198 * cache, just overload the offset entry as if it were: 35199 * we are not allowed to manipulate the inode hashing 35200 * for locked pages. */ 35201 if (!PageSwapCache(page)) { 35202 printk("VM: swap page is not in swap cache\n"); 35203 return; 35204 } 35205 if (page->offset != entry) { 35206 printk ("swap entry mismatch"); 35207 return; 35208 } 35209 rw_swap_page_base(rw, entry, page, wait); 35210 } 35211 35212 /* Setting up a new swap file needs a simple wrapper just 35213 * to read the swap signature. SysV shared memory also 35214 * needs a simple wrapper. */ 35215 void rw_swap_page_nocache(int rw, 35216 unsigned long entry, char *buffer) 35217 { 35218 struct page *page; 35219 35220 page = mem_map + MAP_NR((unsigned long) buffer); 35221 wait_on_page(page); 35222 set_bit(PG_locked, &page->flags); 35223 if (test_and_set_bit(PG_swap_cache, &page->flags)) { 35224 printk("VM: read_swap_page: " 35225 "page already in swap cache!\n"); 35226 return; 35227 } 35228 if (page->inode) { 35229 printk ("VM: read_swap_page: " 35230 "page already in page cache!\n"); 35231 return; 35232 } 35233 page->inode = &swapper_inode; 35234 page->offset = entry; 35235 /* Protect from shrink_mmap() */ 35236 atomic_inc(&page->count); 35237 rw_swap_page(rw, entry, buffer, 1); 35238 atomic_dec(&page->count); 35239 page->inode = 0; 35240 clear_bit(PG_swap_cache, &page->flags); 35241 } 35242 35243 /* shmfs needs a version that doesn't put the page in the 35244 * page cache! The swap lock map insists that pages be 35245 * in the page cache! Therefore we can't use it. Later 35246 * when we can remove the need for the lock map and we 35247 * can reduce the number of functions exported. */ 35248 void rw_swap_page_nolock(int rw, unsigned long entry, 35249 char *buffer, int wait) 35250 { 35251 struct page *page = 35252 mem_map + MAP_NR((unsigned long) buffer); 35253 35254 if (!PageLocked(page)) { 35255 printk("VM: rw_swap_page_nolock: " 35256 "page not locked!\n"); 35257 return; 35258 } 35259 if (PageSwapCache(page)) { 35260 printk("VM: rw_swap_page_nolock: " 35261 "page in swap cache!\n"); 35262 return; 35263 } 35264 rw_swap_page_base(rw, entry, page, wait); 35265 }

Содержание раздела