diff options
Diffstat (limited to 'drivers/staging/zsmalloc')
-rw-r--r-- | drivers/staging/zsmalloc/Kconfig | 4 | ||||
-rw-r--r-- | drivers/staging/zsmalloc/zsmalloc-main.c | 233 | ||||
-rw-r--r-- | drivers/staging/zsmalloc/zsmalloc.h | 20 | ||||
-rw-r--r-- | drivers/staging/zsmalloc/zsmalloc_int.h | 6 |
4 files changed, 178 insertions, 85 deletions
diff --git a/drivers/staging/zsmalloc/Kconfig b/drivers/staging/zsmalloc/Kconfig index a5ab7200626f..908456565796 100644 --- a/drivers/staging/zsmalloc/Kconfig +++ b/drivers/staging/zsmalloc/Kconfig @@ -1,9 +1,5 @@ config ZSMALLOC tristate "Memory allocator for compressed pages" - # X86 dependency is because of the use of __flush_tlb_one and set_pte - # in zsmalloc-main.c. - # TODO: convert these to portable functions - depends on X86 default n help zsmalloc is a slab-based memory allocator designed to store diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c index 449673773286..8b0bcb626a7f 100644 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ b/drivers/staging/zsmalloc/zsmalloc-main.c @@ -10,6 +10,54 @@ * Released under the terms of GNU General Public License Version 2.0 */ + +/* + * This allocator is designed for use with zcache and zram. Thus, the + * allocator is supposed to work well under low memory conditions. In + * particular, it never attempts higher order page allocation which is + * very likely to fail under memory pressure. On the other hand, if we + * just use single (0-order) pages, it would suffer from very high + * fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy + * an entire page. This was one of the major issues with its predecessor + * (xvmalloc). + * + * To overcome these issues, zsmalloc allocates a bunch of 0-order pages + * and links them together using various 'struct page' fields. These linked + * pages act as a single higher-order page i.e. an object can span 0-order + * page boundaries. The code refers to these linked pages as a single entity + * called zspage. + * + * Following is how we use various fields and flags of underlying + * struct page(s) to form a zspage. + * + * Usage of struct page fields: + * page->first_page: points to the first component (0-order) page + * page->index (union with page->freelist): offset of the first object + * starting in this page. For the first page, this is + * always 0, so we use this field (aka freelist) to point + * to the first free object in zspage. + * page->lru: links together all component pages (except the first page) + * of a zspage + * + * For _first_ page only: + * + * page->private (union with page->first_page): refers to the + * component page after the first page + * page->freelist: points to the first free object in zspage. + * Free objects are linked together using in-place + * metadata. + * page->objects: maximum number of objects we can store in this + * zspage (class->zspage_order * PAGE_SIZE / class->size) + * page->lru: links together first pages of various zspages. + * Basically forming list of zspages in a fullness group. + * page->mapping: class index and fullness group of the zspage + * + * Usage of struct page flags: + * PG_private: identifies the first component page + * PG_private2: identifies the last component page + * + */ + #ifdef CONFIG_ZSMALLOC_DEBUG #define DEBUG #endif @@ -247,13 +295,11 @@ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) } /* Decode <page, obj_idx> pair from the given object handle */ -static void obj_handle_to_location(void *handle, struct page **page, +static void obj_handle_to_location(unsigned long handle, struct page **page, unsigned long *obj_idx) { - unsigned long hval = (unsigned long)handle; - - *page = pfn_to_page(hval >> OBJ_INDEX_BITS); - *obj_idx = hval & OBJ_INDEX_MASK; + *page = pfn_to_page(handle >> OBJ_INDEX_BITS); + *obj_idx = handle & OBJ_INDEX_MASK; } static unsigned long obj_idx_to_offset(struct page *page, @@ -354,7 +400,7 @@ static void init_zspage(struct page *first_page, struct size_class *class) static struct page *alloc_zspage(struct size_class *class, gfp_t flags) { int i, error; - struct page *first_page = NULL; + struct page *first_page = NULL, *uninitialized_var(prev_page); /* * Allocate individual pages and link them together as: @@ -369,7 +415,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) */ error = -ENOMEM; for (i = 0; i < class->pages_per_zspage; i++) { - struct page *page, *prev_page; + struct page *page; page = alloc_page(flags); if (!page) @@ -424,12 +470,51 @@ static struct page *find_get_zspage(struct size_class *class) return page; } +static void zs_copy_map_object(char *buf, struct page *firstpage, + int off, int size) +{ + struct page *pages[2]; + int sizes[2]; + void *addr; + + pages[0] = firstpage; + pages[1] = get_next_page(firstpage); + BUG_ON(!pages[1]); + + sizes[0] = PAGE_SIZE - off; + sizes[1] = size - sizes[0]; + + /* copy object to per-cpu buffer */ + addr = kmap_atomic(pages[0]); + memcpy(buf, addr + off, sizes[0]); + kunmap_atomic(addr); + addr = kmap_atomic(pages[1]); + memcpy(buf + sizes[0], addr, sizes[1]); + kunmap_atomic(addr); +} -/* - * If this becomes a separate module, register zs_init() with - * module_init(), zs_exit with module_exit(), and remove zs_initialized -*/ -static int zs_initialized; +static void zs_copy_unmap_object(char *buf, struct page *firstpage, + int off, int size) +{ + struct page *pages[2]; + int sizes[2]; + void *addr; + + pages[0] = firstpage; + pages[1] = get_next_page(firstpage); + BUG_ON(!pages[1]); + + sizes[0] = PAGE_SIZE - off; + sizes[1] = size - sizes[0]; + + /* copy per-cpu buffer to object */ + addr = kmap_atomic(pages[0]); + memcpy(addr + off, buf, sizes[0]); + kunmap_atomic(addr); + addr = kmap_atomic(pages[1]); + memcpy(addr, buf + sizes[0], sizes[1]); + kunmap_atomic(addr); +} static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action, void *pcpu) @@ -440,18 +525,23 @@ static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action, switch (action) { case CPU_UP_PREPARE: area = &per_cpu(zs_map_area, cpu); - if (area->vm) - break; - area->vm = alloc_vm_area(2 * PAGE_SIZE, area->vm_ptes); - if (!area->vm) - return notifier_from_errno(-ENOMEM); + /* + * Make sure we don't leak memory if a cpu UP notification + * and zs_init() race and both call zs_cpu_up() on the same cpu + */ + if (area->vm_buf) + return 0; + area->vm_buf = (char *)__get_free_page(GFP_KERNEL); + if (!area->vm_buf) + return -ENOMEM; + return 0; break; case CPU_DEAD: case CPU_UP_CANCELED: area = &per_cpu(zs_map_area, cpu); - if (area->vm) - free_vm_area(area->vm); - area->vm = NULL; + if (area->vm_buf) + free_page((unsigned long)area->vm_buf); + area->vm_buf = NULL; break; } @@ -489,7 +579,7 @@ fail: struct zs_pool *zs_create_pool(const char *name, gfp_t flags) { - int i, error, ovhd_size; + int i, ovhd_size; struct zs_pool *pool; if (!name) @@ -516,28 +606,9 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t flags) } - /* - * If this becomes a separate module, register zs_init with - * module_init, and remove this block - */ - if (!zs_initialized) { - error = zs_init(); - if (error) - goto cleanup; - zs_initialized = 1; - } - pool->flags = flags; pool->name = name; - error = 0; /* Success */ - -cleanup: - if (error) { - zs_destroy_pool(pool); - pool = NULL; - } - return pool; } EXPORT_SYMBOL_GPL(zs_create_pool); @@ -568,12 +639,12 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool); * @size: size of block to allocate * * On success, handle to the allocated object is returned, - * otherwise NULL. + * otherwise 0. * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail. */ -void *zs_malloc(struct zs_pool *pool, size_t size) +unsigned long zs_malloc(struct zs_pool *pool, size_t size) { - void *obj; + unsigned long obj; struct link_free *link; int class_idx; struct size_class *class; @@ -582,7 +653,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size) unsigned long m_objidx, m_offset; if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) - return NULL; + return 0; class_idx = get_size_class_index(size); class = &pool->size_class[class_idx]; @@ -595,14 +666,14 @@ void *zs_malloc(struct zs_pool *pool, size_t size) spin_unlock(&class->lock); first_page = alloc_zspage(class, pool->flags); if (unlikely(!first_page)) - return NULL; + return 0; set_zspage_mapping(first_page, class->index, ZS_EMPTY); spin_lock(&class->lock); class->pages_allocated += class->pages_per_zspage; } - obj = first_page->freelist; + obj = (unsigned long)first_page->freelist; obj_handle_to_location(obj, &m_page, &m_objidx); m_offset = obj_idx_to_offset(m_page, m_objidx, class->size); @@ -621,7 +692,7 @@ void *zs_malloc(struct zs_pool *pool, size_t size) } EXPORT_SYMBOL_GPL(zs_malloc); -void zs_free(struct zs_pool *pool, void *obj) +void zs_free(struct zs_pool *pool, unsigned long obj) { struct link_free *link; struct page *first_page, *f_page; @@ -648,7 +719,7 @@ void zs_free(struct zs_pool *pool, void *obj) + f_offset); link->next = first_page->freelist; kunmap_atomic(link); - first_page->freelist = obj; + first_page->freelist = (void *)obj; first_page->inuse--; fullness = fix_fullness_group(pool, first_page); @@ -670,9 +741,15 @@ EXPORT_SYMBOL_GPL(zs_free); * * Before using an object allocated from zs_malloc, it must be mapped using * this function. When done with the object, it must be unmapped using - * zs_unmap_object + * zs_unmap_object. + * + * Only one object can be mapped per cpu at a time. There is no protection + * against nested mappings. + * + * This function returns with preemption and page faults disabled. */ -void *zs_map_object(struct zs_pool *pool, void *handle) +void *zs_map_object(struct zs_pool *pool, unsigned long handle, + enum zs_mapmode mm) { struct page *page; unsigned long obj_idx, off; @@ -693,26 +770,20 @@ void *zs_map_object(struct zs_pool *pool, void *handle) if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ area->vm_addr = kmap_atomic(page); - } else { - /* this object spans two pages */ - struct page *nextp; - - nextp = get_next_page(page); - BUG_ON(!nextp); - - - set_pte(area->vm_ptes[0], mk_pte(page, PAGE_KERNEL)); - set_pte(area->vm_ptes[1], mk_pte(nextp, PAGE_KERNEL)); - - /* We pre-allocated VM area so mapping can never fail */ - area->vm_addr = area->vm->addr; + return area->vm_addr + off; } - return area->vm_addr + off; + /* disable page faults to match kmap_atomic() return conditions */ + pagefault_disable(); + + if (mm != ZS_MM_WO) + zs_copy_map_object(area->vm_buf, page, off, class->size); + area->vm_addr = NULL; + return area->vm_buf; } EXPORT_SYMBOL_GPL(zs_map_object); -void zs_unmap_object(struct zs_pool *pool, void *handle) +void zs_unmap_object(struct zs_pool *pool, unsigned long handle) { struct page *page; unsigned long obj_idx, off; @@ -722,6 +793,17 @@ void zs_unmap_object(struct zs_pool *pool, void *handle) struct size_class *class; struct mapping_area *area; + area = &__get_cpu_var(zs_map_area); + /* single-page object fastpath */ + if (area->vm_addr) { + kunmap_atomic(area->vm_addr); + goto out; + } + + /* no write fastpath */ + if (area->vm_mm == ZS_MM_RO) + goto pfenable; + BUG_ON(!handle); obj_handle_to_location(handle, &page, &obj_idx); @@ -729,15 +811,12 @@ void zs_unmap_object(struct zs_pool *pool, void *handle) class = &pool->size_class[class_idx]; off = obj_idx_to_offset(page, obj_idx, class->size); - area = &__get_cpu_var(zs_map_area); - if (off + class->size <= PAGE_SIZE) { - kunmap_atomic(area->vm_addr); - } else { - set_pte(area->vm_ptes[0], __pte(0)); - set_pte(area->vm_ptes[1], __pte(0)); - __flush_tlb_one((unsigned long)area->vm_addr); - __flush_tlb_one((unsigned long)area->vm_addr + PAGE_SIZE); - } + zs_copy_unmap_object(area->vm_buf, page, off, class->size); + +pfenable: + /* enable page faults to match kunmap_atomic() return conditions */ + pagefault_enable(); +out: put_cpu_var(zs_map_area); } EXPORT_SYMBOL_GPL(zs_unmap_object); @@ -753,3 +832,9 @@ u64 zs_get_total_size_bytes(struct zs_pool *pool) return npages << PAGE_SHIFT; } EXPORT_SYMBOL_GPL(zs_get_total_size_bytes); + +module_init(zs_init); +module_exit(zs_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/drivers/staging/zsmalloc/zsmalloc.h index 949384ee7491..de2e8bfbcc06 100644 --- a/drivers/staging/zsmalloc/zsmalloc.h +++ b/drivers/staging/zsmalloc/zsmalloc.h @@ -15,16 +15,28 @@ #include <linux/types.h> +/* + * zsmalloc mapping modes + * + * NOTE: These only make a difference when a mapped object spans pages +*/ +enum zs_mapmode { + ZS_MM_RW, /* normal read-write mapping */ + ZS_MM_RO, /* read-only (no copy-out at unmap time) */ + ZS_MM_WO /* write-only (no copy-in at map time) */ +}; + struct zs_pool; struct zs_pool *zs_create_pool(const char *name, gfp_t flags); void zs_destroy_pool(struct zs_pool *pool); -void *zs_malloc(struct zs_pool *pool, size_t size); -void zs_free(struct zs_pool *pool, void *obj); +unsigned long zs_malloc(struct zs_pool *pool, size_t size); +void zs_free(struct zs_pool *pool, unsigned long obj); -void *zs_map_object(struct zs_pool *pool, void *handle); -void zs_unmap_object(struct zs_pool *pool, void *handle); +void *zs_map_object(struct zs_pool *pool, unsigned long handle, + enum zs_mapmode mm); +void zs_unmap_object(struct zs_pool *pool, unsigned long handle); u64 zs_get_total_size_bytes(struct zs_pool *pool); diff --git a/drivers/staging/zsmalloc/zsmalloc_int.h b/drivers/staging/zsmalloc/zsmalloc_int.h index 6fd32a9e0315..528051767733 100644 --- a/drivers/staging/zsmalloc/zsmalloc_int.h +++ b/drivers/staging/zsmalloc/zsmalloc_int.h @@ -110,9 +110,9 @@ enum fullness_group { static const int fullness_threshold_frac = 4; struct mapping_area { - struct vm_struct *vm; - pte_t *vm_ptes[2]; - char *vm_addr; + char *vm_buf; /* copy buffer for objects that span pages */ + char *vm_addr; /* address of kmap_atomic()'ed pages */ + enum zs_mapmode vm_mm; /* mapping mode */ }; struct size_class { |