| /* |
| * Copyright (C) 2010 The Chromium OS Authors <chromium-os-dev@chromium.org> |
| * |
| * Device-Mapper block hash tree interface. |
| * See Documentation/device-mapper/dm-bht.txt for details. |
| * |
| * This file is released under the GPL. |
| */ |
| |
| #include <asm/atomic.h> |
| #include <asm/page.h> |
| #include <linux/bitops.h> /* for fls() */ |
| #include <linux/bug.h> |
| #include <linux/cpumask.h> /* nr_cpu_ids */ |
| /* #define CONFIG_DM_DEBUG 1 */ |
| #include <linux/device-mapper.h> |
| #include <linux/err.h> |
| #include <linux/errno.h> |
| #include <linux/gfp.h> |
| #include <linux/dm-bht.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| #include <linux/scatterlist.h> |
| #include <linux/slab.h> /* k*alloc */ |
| #include <linux/string.h> /* memset */ |
| |
| #define DM_MSG_PREFIX "dm bht" |
| |
| /* For sector formatting. */ |
| #if defined(_LP64) || defined(__LP64__) || __BITS_PER_LONG == 64 |
| #define __PRIS_PREFIX "z" |
| #else |
| #define __PRIS_PREFIX "ll" |
| #endif |
| #define PRIu64 __PRIS_PREFIX "u" |
| |
| |
| /*----------------------------------------------- |
| * Utilities |
| *-----------------------------------------------*/ |
| |
| static u8 from_hex(u8 ch) |
| { |
| if ((ch >= '0') && (ch <= '9')) |
| return ch - '0'; |
| if ((ch >= 'a') && (ch <= 'f')) |
| return ch - 'a' + 10; |
| if ((ch >= 'A') && (ch <= 'F')) |
| return ch - 'A' + 10; |
| return -1; |
| } |
| |
| /** |
| * dm_bht_bin_to_hex - converts a binary stream to human-readable hex |
| * @binary: a byte array of length @binary_len |
| * @hex: a byte array of length @binary_len * 2 + 1 |
| */ |
| static void dm_bht_bin_to_hex(u8 *binary, u8 *hex, unsigned int binary_len) |
| { |
| while (binary_len-- > 0) { |
| sprintf((char *__restrict__)hex, "%02hhx", (int)*binary); |
| hex += 2; |
| binary++; |
| } |
| } |
| |
| /** |
| * dm_bht_hex_to_bin - converts a hex stream to binary |
| * @binary: a byte array of length @binary_len |
| * @hex: a byte array of length @binary_len * 2 + 1 |
| */ |
| static void dm_bht_hex_to_bin(u8 *binary, const u8 *hex, |
| unsigned int binary_len) |
| { |
| while (binary_len-- > 0) { |
| *binary = from_hex(*(hex++)); |
| *binary *= 16; |
| *binary += from_hex(*(hex++)); |
| binary++; |
| } |
| } |
| |
| static void dm_bht_log_mismatch(struct dm_bht *bht, u8 *given, u8 *computed) |
| { |
| u8 given_hex[DM_BHT_MAX_DIGEST_SIZE * 2 + 1]; |
| u8 computed_hex[DM_BHT_MAX_DIGEST_SIZE * 2 + 1]; |
| dm_bht_bin_to_hex(given, given_hex, bht->digest_size); |
| dm_bht_bin_to_hex(computed, computed_hex, bht->digest_size); |
| DMERR_LIMIT("%s != %s", given_hex, computed_hex); |
| } |
| |
| /* Used for turning verifiers into computers */ |
| typedef int (*dm_bht_compare_cb)(struct dm_bht *, u8 *, u8 *); |
| |
| /** |
| * dm_bht_compute_hash: hashes a page of data |
| */ |
| static int dm_bht_compute_hash(struct dm_bht *bht, const void *block, |
| u8 *digest) |
| { |
| struct hash_desc *hash_desc = &bht->hash_desc[smp_processor_id()]; |
| struct scatterlist sg; |
| |
| /* TODO(msb): Once we supporting block_size < PAGE_SIZE, change this to: |
| * offset_into_page + length < page_size |
| * For now just check that block is page-aligned. |
| */ |
| /* |
| * TODO(msb): Re-enable once user-space code is modified to use |
| * aligned buffers. |
| * BUG_ON(!IS_ALIGNED((uintptr_t)block, PAGE_SIZE)); |
| */ |
| |
| sg_init_table(&sg, 1); |
| sg_set_buf(&sg, block, PAGE_SIZE); |
| /* Note, this is synchronous. */ |
| if (crypto_hash_init(hash_desc)) { |
| DMCRIT("failed to reinitialize crypto hash (proc:%d)", |
| smp_processor_id()); |
| return -EINVAL; |
| } |
| if (crypto_hash_digest(hash_desc, &sg, PAGE_SIZE, digest)) { |
| DMCRIT("crypto_hash_digest failed"); |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |
| |
| static __always_inline struct dm_bht_level *dm_bht_get_level(struct dm_bht *bht, |
| unsigned int depth) |
| { |
| return &bht->levels[depth]; |
| } |
| |
| static __always_inline unsigned int dm_bht_get_level_shift(struct dm_bht *bht, |
| unsigned int depth) |
| { |
| return (bht->depth - depth) * bht->node_count_shift; |
| } |
| |
| /* For the given depth, this is the entry index. At depth+1 it is the node |
| * index for depth. |
| */ |
| static __always_inline unsigned int dm_bht_index_at_level(struct dm_bht *bht, |
| unsigned int depth, |
| unsigned int leaf) |
| { |
| return leaf >> dm_bht_get_level_shift(bht, depth); |
| } |
| |
| static __always_inline u8 *dm_bht_node(struct dm_bht *bht, |
| struct dm_bht_entry *entry, |
| unsigned int node_index) |
| { |
| return &entry->nodes[node_index * bht->digest_size]; |
| } |
| |
| static inline struct dm_bht_entry *dm_bht_get_entry(struct dm_bht *bht, |
| unsigned int depth, |
| unsigned int block_index) |
| { |
| unsigned int index = dm_bht_index_at_level(bht, depth, block_index); |
| struct dm_bht_level *level = dm_bht_get_level(bht, depth); |
| |
| BUG_ON(index >= level->count); |
| |
| return &level->entries[index]; |
| } |
| |
| static inline u8 *dm_bht_get_node(struct dm_bht *bht, |
| struct dm_bht_entry *entry, |
| unsigned int depth, |
| unsigned int block_index) |
| { |
| unsigned int index = dm_bht_index_at_level(bht, depth, block_index); |
| |
| return dm_bht_node(bht, entry, index % bht->node_count); |
| } |
| |
| |
| /*----------------------------------------------- |
| * Implementation functions |
| *-----------------------------------------------*/ |
| |
| static int dm_bht_initialize_entries(struct dm_bht *bht); |
| |
| static int dm_bht_read_callback_stub(void *ctx, sector_t start, u8 *dst, |
| sector_t count, |
| struct dm_bht_entry *entry); |
| static int dm_bht_write_callback_stub(void *ctx, sector_t start, |
| u8 *dst, sector_t count, |
| struct dm_bht_entry *entry); |
| |
| /** |
| * dm_bht_create - prepares @bht for us |
| * @bht: pointer to a dm_bht_create()d bht |
| * @depth: tree depth without the root; including block hashes |
| * @block_count:the number of block hashes / tree leaves |
| * @alg_name: crypto hash algorithm name |
| * |
| * Returns 0 on success. |
| * |
| * Callers can offset into devices by storing the data in the io callbacks. |
| * TODO(wad) bust up into smaller helpers |
| */ |
| int dm_bht_create(struct dm_bht *bht, unsigned int depth, |
| unsigned int block_count, const char *alg_name) |
| { |
| int status = 0; |
| int cpu = 0; |
| |
| /* Allocate enough crypto contexts to be able to perform verifies |
| * on all available CPUs. |
| */ |
| bht->hash_desc = (struct hash_desc *) |
| kcalloc(nr_cpu_ids, sizeof(struct hash_desc), GFP_KERNEL); |
| if (!bht->hash_desc) { |
| DMERR("failed to allocate crypto hash contexts"); |
| return -ENOMEM; |
| } |
| |
| /* Setup the hash first. Its length determines much of the bht layout */ |
| for (cpu = 0; cpu < nr_cpu_ids; ++cpu) { |
| bht->hash_desc[cpu].tfm = crypto_alloc_hash(alg_name, 0, 0); |
| if (IS_ERR(bht->hash_desc[cpu].tfm)) { |
| DMERR("failed to allocate crypto hash '%s'", alg_name); |
| status = -ENOMEM; |
| bht->hash_desc[cpu].tfm = NULL; |
| goto bad_hash_alg; |
| } |
| } |
| bht->digest_size = crypto_hash_digestsize(bht->hash_desc[0].tfm); |
| /* We expect to be able to pack >=2 hashes into a page */ |
| if (PAGE_SIZE / bht->digest_size < 2) { |
| DMERR("too few hashes fit in a page"); |
| status = -EINVAL; |
| goto bad_digest_len; |
| } |
| |
| if (bht->digest_size > DM_BHT_MAX_DIGEST_SIZE) { |
| DMERR("DM_BHT_MAX_DIGEST_SIZE too small for chosen digest"); |
| status = -EINVAL; |
| goto bad_digest_len; |
| } |
| |
| bht->root_digest = (u8 *) kzalloc(bht->digest_size, GFP_KERNEL); |
| if (!bht->root_digest) { |
| DMERR("failed to allocate memory for root digest"); |
| status = -ENOMEM; |
| goto bad_root_digest_alloc; |
| } |
| /* We use the same defines for root state but just: |
| * UNALLOCATED, REQUESTED, and VERIFIED since the workflow is |
| * different. |
| */ |
| atomic_set(&bht->root_state, DM_BHT_ENTRY_UNALLOCATED); |
| |
| /* Configure the tree */ |
| bht->block_count = block_count; |
| DMDEBUG("Setting block_count %u", block_count); |
| if (block_count == 0) { |
| DMERR("block_count must be non-zero"); |
| status = -EINVAL; |
| goto bad_block_count; |
| } |
| |
| /* Each dm_bht_entry->nodes is one page. The node code tracks |
| * how many nodes fit into one entry where a node is a single |
| * hash (message digest). |
| */ |
| bht->node_count_shift = fls(PAGE_SIZE / bht->digest_size) - 1; |
| /* Round down to the nearest power of two. This makes indexing |
| * into the tree much less painful. |
| */ |
| bht->node_count = 1 << bht->node_count_shift; |
| |
| /* This is unlikely to happen, but with 64k pages, who knows. */ |
| if (bht->node_count > UINT_MAX / bht->digest_size) { |
| DMERR("node_count * hash_len exceeds UINT_MAX!"); |
| status = -EINVAL; |
| goto bad_node_count; |
| } |
| |
| /* if depth == 0, create a "regular" trie with a single root block */ |
| if (depth == 0) |
| depth = DIV_ROUND_UP(fls(block_count - 1), |
| bht->node_count_shift); |
| if (depth > UINT_MAX / sizeof(struct dm_bht_level)) { |
| DMERR("bht depth is invalid: %u", depth); |
| status = -EINVAL; |
| goto bad_depth; |
| } |
| DMDEBUG("Setting depth to %u.", depth); |
| bht->depth = depth; |
| |
| /* Ensure that we can safely shift by this value. */ |
| if (depth * bht->node_count_shift >= sizeof(unsigned int) * 8) { |
| DMERR("specified depth and node_count_shift is too large"); |
| status = -EINVAL; |
| goto bad_node_count; |
| } |
| |
| /* Allocate levels. Each level of the tree may have an arbitrary number |
| * of dm_bht_entry structs. Each entry contains node_count nodes. |
| * Each node in the tree is a cryptographic digest of either node_count |
| * nodes on the subsequent level or of a specific block on disk. |
| */ |
| bht->levels = (struct dm_bht_level *) |
| kcalloc(depth, sizeof(struct dm_bht_level), GFP_KERNEL); |
| if (!bht->levels) { |
| DMERR("failed to allocate tree levels"); |
| status = -ENOMEM; |
| goto bad_level_alloc; |
| } |
| |
| /* Setup callback stubs */ |
| bht->read_cb = &dm_bht_read_callback_stub; |
| bht->write_cb = &dm_bht_write_callback_stub; |
| |
| status = dm_bht_initialize_entries(bht); |
| if (status) |
| goto bad_entries_alloc; |
| |
| return 0; |
| |
| bad_entries_alloc: |
| while (bht->depth-- > 0) |
| kfree(bht->levels[bht->depth].entries); |
| kfree(bht->levels); |
| bad_node_count: |
| bad_level_alloc: |
| bad_block_count: |
| bad_depth: |
| kfree(bht->root_digest); |
| bad_root_digest_alloc: |
| bad_digest_len: |
| for (cpu = 0; cpu < nr_cpu_ids; ++cpu) |
| if (bht->hash_desc[cpu].tfm) |
| crypto_free_hash(bht->hash_desc[cpu].tfm); |
| bad_hash_alg: |
| kfree(bht->hash_desc); |
| return status; |
| } |
| EXPORT_SYMBOL(dm_bht_create); |
| |
| static int dm_bht_initialize_entries(struct dm_bht *bht) |
| { |
| /* The last_index represents the index into the last |
| * block digest that will be stored in the tree. By walking the |
| * tree with that index, it is possible to compute the total number |
| * of entries needed at each level in the tree. |
| * |
| * Since each entry will contain up to |node_count| nodes of the tree, |
| * it is possible that the last index may not be at the end of a given |
| * entry->nodes. In that case, it is assumed the value is padded. |
| * |
| * Note, we treat both the tree root (1 hash) and the tree leaves |
| * independently from the bht data structures. Logically, the root is |
| * depth=-1 and the block layer level is depth=bht->depth |
| */ |
| unsigned int last_index = ALIGN(bht->block_count, bht->node_count) - 1; |
| unsigned int total_entries = 0; |
| struct dm_bht_level *level = NULL; |
| unsigned int depth; |
| |
| /* check that the largest level->count can't result in an int overflow |
| * on allocation or sector calculation. |
| */ |
| if (((last_index >> bht->node_count_shift) + 1) > |
| UINT_MAX / max((unsigned int)sizeof(struct dm_bht_entry), |
| (unsigned int)to_sector(PAGE_SIZE))) { |
| DMCRIT("required entries %u is too large", |
| last_index + 1); |
| return -EINVAL; |
| } |
| |
| /* Track the current sector location for each level so we don't have to |
| * compute it during traversals. |
| */ |
| bht->sectors = 0; |
| for (depth = 0; depth < bht->depth; ++depth) { |
| level = dm_bht_get_level(bht, depth); |
| level->count = dm_bht_index_at_level(bht, depth, |
| last_index) + 1; |
| DMDEBUG("depth: %u entries: %u", depth, level->count); |
| /* TODO(wad) consider the case where the data stored for each |
| * level is done with contiguous pages (instead of using |
| * entry->nodes) and the level just contains two bitmaps: |
| * (a) which pages have been loaded from disk |
| * (b) which specific nodes have been verified. |
| */ |
| level->entries = (struct dm_bht_entry *) |
| kcalloc(level->count, |
| sizeof(struct dm_bht_entry), |
| GFP_KERNEL); |
| if (!level->entries) { |
| DMERR("failed to allocate entries for depth %u", |
| bht->depth); |
| /* let the caller clean up the mess */ |
| return -ENOMEM; |
| } |
| total_entries += level->count; |
| level->sector = bht->sectors; |
| /* number of sectors per entry * entries at this level */ |
| bht->sectors += level->count * to_sector(PAGE_SIZE); |
| /* not ideal, but since unsigned overflow behavior is defined */ |
| if (bht->sectors < level->sector) { |
| DMCRIT("level sector calculation overflowed"); |
| return -EINVAL; |
| } |
| } |
| |
| /* Go ahead and reserve enough space for everything. We really don't |
| * want memory allocation failures. Once we start freeing verified |
| * entries, then we can reduce this reservation. |
| */ |
| bht->entry_pool = mempool_create_page_pool(total_entries, 0); |
| if (!bht->entry_pool) { |
| DMERR("failed to allocate mempool"); |
| return -ENOMEM; |
| } |
| |
| return 0; |
| } |
| |
| static int dm_bht_read_callback_stub(void *ctx, sector_t start, u8 *dst, |
| sector_t count, struct dm_bht_entry *entry) |
| { |
| DMCRIT("dm_bht_read_callback_stub called!"); |
| dm_bht_read_completed(entry, -EIO); |
| return -EIO; |
| } |
| |
| static int dm_bht_write_callback_stub(void *ctx, sector_t start, |
| u8 *dst, sector_t count, |
| struct dm_bht_entry *entry) |
| { |
| DMCRIT("dm_bht_write_callback_stub called!"); |
| dm_bht_write_completed(entry, -EIO); |
| return -EIO; |
| } |
| |
| /** |
| * dm_bht_read_completed |
| * @entry: pointer to the entry that's been loaded |
| * @status: I/O status. Non-zero is failure. |
| * MUST always be called after a read_cb completes. |
| */ |
| void dm_bht_read_completed(struct dm_bht_entry *entry, int status) |
| { |
| if (status) { |
| /* TODO(wad) add retry support */ |
| DMCRIT("an I/O error occurred while reading entry"); |
| atomic_set(&entry->state, DM_BHT_ENTRY_ERROR_IO); |
| /* entry->nodes will be freed later */ |
| return; |
| } |
| BUG_ON(atomic_read(&entry->state) != DM_BHT_ENTRY_PENDING); |
| atomic_set(&entry->state, DM_BHT_ENTRY_READY); |
| } |
| EXPORT_SYMBOL(dm_bht_read_completed); |
| |
| /** |
| * dm_bht_write_completed |
| * @entry: pointer to the entry that's been loaded |
| * @status: I/O status. Non-zero is failure. |
| * Should be called after a write_cb completes. Currently only catches |
| * errors which more writers don't care about. |
| */ |
| void dm_bht_write_completed(struct dm_bht_entry *entry, int status) |
| { |
| if (status) { |
| DMCRIT("an I/O error occurred while writing entry"); |
| atomic_set(&entry->state, DM_BHT_ENTRY_ERROR_IO); |
| /* entry->nodes will be freed later */ |
| return; |
| } |
| } |
| EXPORT_SYMBOL(dm_bht_write_completed); |
| |
| |
| /* dm_bht_maybe_read_entries |
| * Attempts to atomically acquire each entry, allocated any needed |
| * memory, and issues I/O callbacks to load the hashes from disk. |
| * Returns 0 if all entries are loaded and verified. On error, the |
| * return value is negative. When positive, it is the state values |
| * ORd. |
| */ |
| static int dm_bht_maybe_read_entries(struct dm_bht *bht, void *ctx, |
| unsigned int depth, unsigned int index, |
| unsigned int count, bool until_exist) |
| { |
| struct dm_bht_level *level; |
| struct dm_bht_entry *entry, *last_entry; |
| sector_t current_sector; |
| int state = 0; |
| int status = 0; |
| struct page *node_page = NULL; |
| BUG_ON(depth >= bht->depth); |
| |
| level = &bht->levels[depth]; |
| if (count > level->count - index) { |
| DMERR("dm_bht_maybe_read_entries(d=%u,ei=%u,count=%u): " |
| "index+count exceeds available entries %u", |
| depth, index, count, level->count); |
| return -EINVAL; |
| } |
| /* XXX: hardcoding PAGE_SIZE means that a perfectly valid image |
| * on one system may not work on a different kernel. |
| * TODO(wad) abstract PAGE_SIZE with a bht->entry_size or |
| * at least a define and ensure bht->entry_size is |
| * sector aligned at least. |
| */ |
| current_sector = level->sector + to_sector(index * PAGE_SIZE); |
| for (entry = &level->entries[index], last_entry = entry + count; |
| entry < last_entry; |
| ++entry, current_sector += to_sector(PAGE_SIZE)) { |
| /* If the entry's state is UNALLOCATED, then we'll claim it |
| * for allocation and loading. |
| */ |
| state = atomic_cmpxchg(&entry->state, |
| DM_BHT_ENTRY_UNALLOCATED, |
| DM_BHT_ENTRY_PENDING); |
| DMDEBUG("dm_bht_maybe_read_entries(d=%u,ei=%u,count=%u): " |
| "ei=%lu, state=%d", |
| depth, index, count, |
| (unsigned long)(entry - level->entries), state); |
| if (state <= DM_BHT_ENTRY_ERROR) { |
| DMCRIT("entry %u is in an error state", index); |
| return state; |
| } |
| |
| /* Currently, the verified state is unused. */ |
| if (state == DM_BHT_ENTRY_VERIFIED) { |
| if (until_exist) |
| return 0; |
| /* Makes 0 == verified. Is that ideal? */ |
| continue; |
| } |
| |
| if (state != DM_BHT_ENTRY_UNALLOCATED) { |
| /* PENDING, READY, ... */ |
| if (until_exist) |
| return state; |
| status |= state; |
| continue; |
| } |
| /* Current entry is claimed for allocation and loading */ |
| node_page = (struct page *) mempool_alloc(bht->entry_pool, |
| GFP_NOIO); |
| if (!node_page) { |
| DMCRIT("failed to allocate memory for " |
| "entry->nodes from pool"); |
| return -ENOMEM; |
| } |
| /* dm-bht guarantees page-aligned memory for callbacks. */ |
| entry->nodes = page_address(node_page); |
| /* Let the caller know that not all the data is yet available */ |
| status |= DM_BHT_ENTRY_REQUESTED; |
| /* Issue the read callback */ |
| /* TODO(wad) error check callback here too */ |
| DMDEBUG("dm_bht_maybe_read_entries(d=%u,ei=%u,count=%u): " |
| "reading %lu", |
| depth, index, count, |
| (unsigned long)(entry - level->entries)); |
| bht->read_cb(ctx, /* external context */ |
| current_sector, /* starting sector */ |
| entry->nodes, /* destination */ |
| to_sector(PAGE_SIZE), |
| entry); /* io context */ |
| |
| } |
| /* Should only be 0 if all entries were verified and not just ready */ |
| return status; |
| } |
| |
| static int dm_bht_compare_hash(struct dm_bht *bht, u8 *known, u8 *computed) |
| { |
| return memcmp(known, computed, bht->digest_size); |
| } |
| |
| static int dm_bht_update_hash(struct dm_bht *bht, u8 *known, u8 *computed) |
| { |
| #ifdef CONFIG_DM_DEBUG |
| u8 hex[DM_BHT_MAX_DIGEST_SIZE * 2 + 1]; |
| #endif |
| memcpy(known, computed, bht->digest_size); |
| #ifdef CONFIG_DM_DEBUG |
| dm_bht_bin_to_hex(computed, hex, bht->digest_size); |
| DMDEBUG("updating with hash: %s", hex); |
| #endif |
| return 0; |
| } |
| |
| /* Walk all entries at level 0 to compute the root digest. |
| * 0 on success. |
| */ |
| static int dm_bht_compute_root(struct dm_bht *bht, u8 *digest) |
| { |
| struct dm_bht_entry *entry; |
| unsigned int count; |
| struct scatterlist sg; /* feeds digest() */ |
| struct hash_desc *hash_desc; |
| hash_desc = &bht->hash_desc[smp_processor_id()]; |
| entry = bht->levels[0].entries; |
| |
| if (crypto_hash_init(hash_desc)) { |
| DMCRIT("failed to reinitialize crypto hash (proc:%d)", |
| smp_processor_id()); |
| return -EINVAL; |
| } |
| |
| /* Point the scatterlist to the entries, then compute the digest */ |
| for (count = 0; count < bht->levels[0].count; ++count, ++entry) { |
| if (atomic_read(&entry->state) <= DM_BHT_ENTRY_PENDING) { |
| DMCRIT("data not ready to compute root: %u", |
| count); |
| return 1; |
| } |
| sg_init_table(&sg, 1); |
| sg_set_buf(&sg, entry->nodes, PAGE_SIZE); |
| if (crypto_hash_update(hash_desc, &sg, PAGE_SIZE)) { |
| DMCRIT("Failed to update crypto hash"); |
| return -EINVAL; |
| } |
| } |
| |
| if (crypto_hash_final(hash_desc, digest)) { |
| DMCRIT("Failed to compute final digest"); |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |
| |
| static int dm_bht_verify_root(struct dm_bht *bht, |
| dm_bht_compare_cb compare_cb) |
| { |
| int status = 0; |
| u8 digest[DM_BHT_MAX_DIGEST_SIZE]; |
| if (atomic_read(&bht->root_state) == DM_BHT_ENTRY_VERIFIED) |
| return 0; |
| status = dm_bht_compute_root(bht, digest); |
| if (status) { |
| DMCRIT("Failed to compute root digest for verification"); |
| return status; |
| } |
| DMDEBUG("root computed"); |
| status = compare_cb(bht, bht->root_digest, digest); |
| if (status) { |
| DMCRIT("invalid root digest: %d", status); |
| dm_bht_log_mismatch(bht, bht->root_digest, digest); |
| return DM_BHT_ENTRY_ERROR_MISMATCH; |
| } |
| /* Could do a cmpxchg, but this should be safe. */ |
| atomic_set(&bht->root_state, DM_BHT_ENTRY_VERIFIED); |
| return 0; |
| } |
| |
| /* dm_bht_verify_path |
| * Verifies the path. Returns 0 on ok. |
| */ |
| static int dm_bht_verify_path(struct dm_bht *bht, unsigned int block_index, |
| const void *block) |
| { |
| unsigned int depth = bht->depth; |
| struct dm_bht_entry *entry; |
| int state; |
| |
| do { |
| u8 digest[DM_BHT_MAX_DIGEST_SIZE]; |
| u8 *node; |
| |
| /* Need to check that the hash of the current block is accurate |
| * in its parent. |
| */ |
| entry = dm_bht_get_entry(bht, depth - 1, block_index); |
| state = atomic_read(&entry->state); |
| /* This call is only safe if all nodes along the path |
| * are already populated (i.e. READY) via dm_bht_populate. |
| */ |
| BUG_ON(state < DM_BHT_ENTRY_READY); |
| node = dm_bht_get_node(bht, entry, depth, block_index); |
| |
| if (dm_bht_compute_hash(bht, block, digest) || |
| dm_bht_compare_hash(bht, digest, node)) |
| goto mismatch; |
| |
| /* Keep the containing block of hashes to be verified in the |
| * next pass. |
| */ |
| block = entry->nodes; |
| } while (--depth > 0 && state != DM_BHT_ENTRY_VERIFIED); |
| |
| /* Mark path to leaf as verified. */ |
| for (depth++; depth < bht->depth; depth++) { |
| entry = dm_bht_get_entry(bht, depth, block_index); |
| /* At this point, entry can only be in VERIFIED or READY state. |
| * So it is safe to use atomic_set instead of atomic_cmpxchg. |
| */ |
| atomic_set(&entry->state, DM_BHT_ENTRY_VERIFIED); |
| } |
| |
| DMDEBUG("verify_path: node %u is verified to root", block_index); |
| return 0; |
| |
| mismatch: |
| DMERR("verify_path: failed to verify hash against parent (d=%u,bi=%u)", |
| depth, block_index); |
| return DM_BHT_ENTRY_ERROR_MISMATCH; |
| } |
| |
| /** |
| * dm_bht_store_block - sets a given block's hash in the tree |
| * @bht: pointer to a dm_bht_create()d bht |
| * @block_index:numeric index of the block in the tree |
| * @digest: array of u8s containing the digest of length @bht->digest_size |
| * |
| * Returns 0 on success, >0 when data is pending, and <0 when a IO or other |
| * error has occurred. |
| * |
| * If the containing entry in the tree is unallocated, it will allocate memory |
| * and mark the entry as ready. All other block entries will be 0s. This |
| * function is not safe for simultaneous use when verifying data and should not |
| * be used if the @bht is being accessed by any other functions in any other |
| * threads/processes. |
| * |
| * It is expected that virt_to_page will work on |block_data|. |
| */ |
| int dm_bht_store_block(struct dm_bht *bht, unsigned int block_index, |
| u8 *block_data) |
| { |
| int depth; |
| unsigned int index; |
| unsigned int node_index; |
| struct dm_bht_entry *entry; |
| struct dm_bht_level *level; |
| int state; |
| struct page *node_page = NULL; |
| |
| /* Look at the last level of nodes above the leaves (data blocks) */ |
| depth = bht->depth - 1; |
| |
| /* Index into the level */ |
| level = dm_bht_get_level(bht, depth); |
| index = dm_bht_index_at_level(bht, depth, block_index); |
| /* Grab the node index into the current entry by getting the |
| * index at the leaf-level. |
| */ |
| node_index = dm_bht_index_at_level(bht, depth + 1, block_index) % |
| bht->node_count; |
| entry = &level->entries[index]; |
| |
| DMDEBUG("Storing block %u in d=%d,ei=%u,ni=%u,s=%d", |
| block_index, depth, index, node_index, |
| atomic_read(&entry->state)); |
| |
| state = atomic_cmpxchg(&entry->state, |
| DM_BHT_ENTRY_UNALLOCATED, |
| DM_BHT_ENTRY_PENDING); |
| /* !!! Note. It is up to the users of the update interface to |
| * ensure the entry data is fully populated prior to use. |
| * The number of updated entries is NOT tracked. |
| */ |
| if (state == DM_BHT_ENTRY_UNALLOCATED) { |
| node_page = (struct page *) mempool_alloc(bht->entry_pool, |
| GFP_KERNEL); |
| if (!node_page) { |
| atomic_set(&entry->state, DM_BHT_ENTRY_ERROR); |
| return -ENOMEM; |
| } |
| entry->nodes = page_address(node_page); |
| memset(entry->nodes, 0, PAGE_SIZE); |
| /* TODO(wad) could expose this to the caller to that they |
| * can transition from unallocated to ready manually. |
| */ |
| atomic_set(&entry->state, DM_BHT_ENTRY_READY); |
| } else if (state <= DM_BHT_ENTRY_ERROR) { |
| DMCRIT("leaf entry for block %u is invalid", |
| block_index); |
| return state; |
| } else if (state == DM_BHT_ENTRY_PENDING) { |
| DMERR("leaf data is pending for block %u", block_index); |
| return 1; |
| } |
| |
| dm_bht_compute_hash(bht, block_data, |
| dm_bht_node(bht, entry, node_index)); |
| return 0; |
| } |
| EXPORT_SYMBOL(dm_bht_store_block); |
| |
| /** |
| * dm_bht_zeroread_callback - read callback which always returns 0s |
| * @ctx: ignored |
| * @start: ignored |
| * @data: buffer to write 0s to |
| * @count: number of sectors worth of data to write |
| * @complete_ctx: opaque context for @completed |
| * @completed: callback to confirm end of data read |
| * |
| * Always returns 0. |
| * |
| * Meant for use by dm_compute() callers. It allows dm_populate to |
| * be used to pre-fill a tree with zeroed out entry nodes. |
| */ |
| int dm_bht_zeroread_callback(void *ctx, sector_t start, u8 *dst, |
| sector_t count, struct dm_bht_entry *entry) |
| { |
| memset(dst, 0, to_bytes(count)); |
| dm_bht_read_completed(entry, 0); |
| return 0; |
| } |
| EXPORT_SYMBOL(dm_bht_zeroread_callback); |
| |
| /** |
| * dm_bht_compute - computes and updates all non-block-level hashes in a tree |
| * @bht: pointer to a dm_bht_create()d bht |
| * @read_cb_ctx:opaque read_cb context for all I/O on this call |
| * |
| * Returns 0 on success, >0 when data is pending, and <0 when a IO or other |
| * error has occurred. |
| * |
| * Walks the tree and computes the hashes at each level from the |
| * hashes below. This can only be called once per tree creation |
| * since it will mark entries verified. Expects dm_bht_populate() to |
| * correctly populate the tree from the read_callback_stub. |
| * |
| * This function should not be used when verifying the same tree and |
| * should not be used with multiple simultaneous operators on @bht. |
| */ |
| int dm_bht_compute(struct dm_bht *bht, void *read_cb_ctx) |
| { |
| int depth, r; |
| |
| for (depth = bht->depth - 2; depth >= 0; depth--) { |
| struct dm_bht_level *level = dm_bht_get_level(bht, depth); |
| struct dm_bht_level *child_level = level + 1; |
| struct dm_bht_entry *entry = level->entries; |
| struct dm_bht_entry *child = child_level->entries; |
| unsigned int i, j; |
| |
| r = dm_bht_maybe_read_entries(bht, read_cb_ctx, depth, |
| 0, level->count, true); |
| if (r < 0) { |
| DMCRIT("an error occurred while reading entry"); |
| goto out; |
| } |
| |
| for (i = 0; i < level->count; i++, entry++) { |
| unsigned int count = bht->node_count; |
| if (i == (level->count - 1)) |
| count = child_level->count % bht->node_count; |
| if (count == 0) |
| count = bht->node_count; |
| for (j = 0; j < count; j++, child++) { |
| u8 *block = child->nodes; |
| u8 *digest = dm_bht_node(bht, entry, j); |
| |
| r = dm_bht_compute_hash(bht, block, digest); |
| if (r) { |
| DMERR("Failed to update (d=%u,i=%u)", |
| depth, i); |
| goto out; |
| } |
| } |
| } |
| } |
| /* Don't forget the root digest! */ |
| DMDEBUG("Calling verify_root with update_hash"); |
| r = dm_bht_verify_root(bht, dm_bht_update_hash); |
| out: |
| return r; |
| } |
| EXPORT_SYMBOL(dm_bht_compute); |
| |
| /** |
| * dm_bht_sync - writes the tree in memory to disk |
| * @bht: pointer to a dm_bht_create()d bht |
| * @write_ctx: callback context for writes issued |
| * |
| * Since all entry nodes are PAGE_SIZE, the data will be pre-aligned and |
| * padded. |
| */ |
| int dm_bht_sync(struct dm_bht *bht, void *write_cb_ctx) |
| { |
| unsigned int depth; |
| int ret = 0; |
| int state; |
| sector_t sector; |
| struct dm_bht_level *level; |
| struct dm_bht_entry *entry; |
| struct dm_bht_entry *entry_end; |
| |
| for (depth = 0; depth < bht->depth; ++depth) { |
| level = dm_bht_get_level(bht, depth); |
| entry_end = level->entries + level->count; |
| sector = level->sector; |
| for (entry = level->entries; entry < entry_end; ++entry) { |
| state = atomic_read(&entry->state); |
| if (state <= DM_BHT_ENTRY_PENDING) { |
| DMERR("At depth %d, entry %lu is not ready", |
| depth, |
| (unsigned long)(entry - level->entries)); |
| return state; |
| } |
| ret = bht->write_cb(write_cb_ctx, |
| sector, |
| entry->nodes, |
| to_sector(PAGE_SIZE), |
| entry); |
| if (ret) { |
| DMCRIT("an error occurred writing entry %lu", |
| (unsigned long)(entry - level->entries)); |
| return ret; |
| } |
| sector += to_sector(PAGE_SIZE); |
| } |
| } |
| |
| return 0; |
| } |
| EXPORT_SYMBOL(dm_bht_sync); |
| |
| /** |
| * dm_bht_is_populated - check that entries from disk needed to verify a given |
| * block are all ready |
| * @bht: pointer to a dm_bht_create()d bht |
| * @block_index:specific block data is expected from |
| * |
| * Callers may wish to call dm_bht_is_populated() when checking an io |
| * for which entries were already pending. |
| */ |
| bool dm_bht_is_populated(struct dm_bht *bht, unsigned int block_index) |
| { |
| unsigned int depth; |
| |
| if (atomic_read(&bht->root_state) < DM_BHT_ENTRY_READY) |
| return false; |
| |
| for (depth = bht->depth - 1; depth > 0; depth--) { |
| struct dm_bht_entry *entry = dm_bht_get_entry(bht, depth, |
| block_index); |
| if (atomic_read(&entry->state) < DM_BHT_ENTRY_READY) |
| return false; |
| } |
| |
| return true; |
| } |
| EXPORT_SYMBOL(dm_bht_is_populated); |
| |
| /** |
| * dm_bht_populate - reads entries from disk needed to verify a given block |
| * @bht: pointer to a dm_bht_create()d bht |
| * @read_cb_ctx:context used for all read_cb calls on this request |
| * @block_index:specific block data is expected from |
| * |
| * Callers may wish to call dm_bht_populate(0) immediately after initialization |
| * to start loading in all the level[0] entries. |
| */ |
| int dm_bht_populate(struct dm_bht *bht, void *read_cb_ctx, |
| unsigned int block_index) |
| { |
| unsigned int depth; |
| struct dm_bht_level *level; |
| int populated = 0; /* return value */ |
| unsigned int entry_index = 0; |
| int read_status = 0; |
| int root_state = 0; |
| |
| if (block_index >= bht->block_count) { |
| DMERR("Request to populate for invalid block: %u", |
| block_index); |
| return -EIO; |
| } |
| DMDEBUG("dm_bht_populate(%u)", block_index); |
| |
| /* Load in all of level 0 if the root is unverified */ |
| root_state = atomic_read(&bht->root_state); |
| /* TODO(wad) create a separate io object for the root request which |
| * can continue on and be verified and stop making every request |
| * check. |
| */ |
| if (root_state != DM_BHT_ENTRY_VERIFIED) { |
| DMDEBUG("root data is not yet loaded"); |
| /* If positive, it means some are pending. */ |
| populated = dm_bht_maybe_read_entries(bht, read_cb_ctx, 0, 0, |
| bht->levels[0].count, |
| true); |
| if (populated < 0) { |
| DMCRIT("an error occurred while reading level[0]"); |
| /* TODO(wad) define std error codes */ |
| return populated; |
| } |
| } |
| |
| for (depth = 1; depth < bht->depth; ++depth) { |
| level = dm_bht_get_level(bht, depth); |
| entry_index = dm_bht_index_at_level(bht, depth, |
| block_index); |
| DMDEBUG("populate for bi=%u on d=%d ei=%u (max=%u)", |
| block_index, depth, entry_index, level->count); |
| |
| /* Except for the root node case, we should only ever need |
| * to load one entry along the path. |
| */ |
| read_status = dm_bht_maybe_read_entries(bht, read_cb_ctx, |
| depth, entry_index, |
| 1, false); |
| if (unlikely(read_status < 0)) { |
| DMCRIT("failure occurred reading entry %u depth %u", |
| entry_index, depth); |
| return read_status; |
| } |
| /* Accrue return code flags */ |
| populated |= read_status; |
| } |
| |
| /* All nodes are ready. The hash for the block_index can be verified */ |
| return populated; |
| } |
| EXPORT_SYMBOL(dm_bht_populate); |
| |
| |
| /** |
| * dm_bht_verify_block - checks that all nodes in the path for @block are valid |
| * @bht: pointer to a dm_bht_create()d bht |
| * @block_index:specific block data is expected from |
| * @block: virtual address of the block data in memory |
| * (must be aligned to block size) |
| * |
| * Returns 0 on success, 1 on missing data, and a negative error |
| * code on verification failure. All supporting functions called |
| * should return similarly. |
| */ |
| int dm_bht_verify_block(struct dm_bht *bht, unsigned int block_index, |
| const void *block) |
| { |
| int r = 0; |
| |
| /* Make sure that the root has been verified */ |
| if (atomic_read(&bht->root_state) != DM_BHT_ENTRY_VERIFIED) { |
| r = dm_bht_verify_root(bht, dm_bht_compare_hash); |
| if (r) { |
| DMERR_LIMIT("Failed to verify root: %d", r); |
| goto out; |
| } |
| } |
| |
| /* Now check levels in between */ |
| r = dm_bht_verify_path(bht, block_index, block); |
| if (r) |
| DMERR_LIMIT("Failed to verify block: %u (%d)", block_index, r); |
| |
| out: |
| return r; |
| } |
| EXPORT_SYMBOL(dm_bht_verify_block); |
| |
| /** |
| * dm_bht_destroy - cleans up all memory used by @bht |
| * @bht: pointer to a dm_bht_create()d bht |
| * |
| * Returns 0 on success. Does not free @bht itself. |
| */ |
| int dm_bht_destroy(struct dm_bht *bht) |
| { |
| unsigned int depth; |
| int cpu = 0; |
| |
| kfree(bht->root_digest); |
| |
| depth = bht->depth; |
| while (depth-- != 0) { |
| struct dm_bht_entry *entry = bht->levels[depth].entries; |
| struct dm_bht_entry *entry_end = entry + |
| bht->levels[depth].count; |
| int state = 0; |
| for (; entry < entry_end; ++entry) { |
| state = atomic_read(&entry->state); |
| switch (state) { |
| /* At present, no other states free memory, |
| * but that will change. |
| */ |
| case DM_BHT_ENTRY_UNALLOCATED: |
| /* Allocated with improper state */ |
| BUG_ON(entry->nodes); |
| continue; |
| default: |
| BUG_ON(!entry->nodes); |
| mempool_free(virt_to_page(entry->nodes), |
| bht->entry_pool); |
| break; |
| } |
| } |
| kfree(bht->levels[depth].entries); |
| bht->levels[depth].entries = NULL; |
| } |
| mempool_destroy(bht->entry_pool); |
| kfree(bht->levels); |
| for (cpu = 0; cpu < nr_cpu_ids; ++cpu) |
| if (bht->hash_desc[cpu].tfm) |
| crypto_free_hash(bht->hash_desc[cpu].tfm); |
| kfree(bht->hash_desc); |
| return 0; |
| } |
| EXPORT_SYMBOL(dm_bht_destroy); |
| |
| /*----------------------------------------------- |
| * Accessors |
| *-----------------------------------------------*/ |
| |
| /** |
| * dm_bht_sectors - return the sectors required on disk |
| * @bht: pointer to a dm_bht_create()d bht |
| */ |
| sector_t dm_bht_sectors(const struct dm_bht *bht) |
| { |
| return bht->sectors; |
| } |
| EXPORT_SYMBOL(dm_bht_sectors); |
| |
| /** |
| * dm_bht_set_read_cb - set read callback |
| * @bht: pointer to a dm_bht_create()d bht |
| * @read_cb: callback function used for all read requests by @bht |
| */ |
| void dm_bht_set_read_cb(struct dm_bht *bht, dm_bht_callback read_cb) |
| { |
| bht->read_cb = read_cb; |
| } |
| EXPORT_SYMBOL(dm_bht_set_read_cb); |
| |
| /** |
| * dm_bht_set_write_cb - set write callback |
| * @bht: pointer to a dm_bht_create()d bht |
| * @write_cb: callback function used for all write requests by @bht |
| */ |
| void dm_bht_set_write_cb(struct dm_bht *bht, dm_bht_callback write_cb) |
| { |
| bht->write_cb = write_cb; |
| } |
| EXPORT_SYMBOL(dm_bht_set_write_cb); |
| |
| /** |
| * dm_bht_set_root_hexdigest - sets an unverified root digest hash from hex |
| * @bht: pointer to a dm_bht_create()d bht |
| * @hexdigest: array of u8s containing the new digest in binary |
| * Returns non-zero on error. hexdigest should be NUL terminated. |
| */ |
| int dm_bht_set_root_hexdigest(struct dm_bht *bht, const u8 *hexdigest) |
| { |
| if (!bht->root_digest) { |
| DMCRIT("No allocation for root digest. Call dm_bht_create"); |
| return -1; |
| } |
| /* Make sure we have at least the bytes expected */ |
| if (strnlen((char *)hexdigest, bht->digest_size * 2) != |
| bht->digest_size * 2) { |
| DMERR("root digest length does not match hash algorithm"); |
| return -1; |
| } |
| dm_bht_hex_to_bin(bht->root_digest, hexdigest, bht->digest_size); |
| #ifdef CONFIG_DM_DEBUG |
| DMINFO("Set root digest to %s. Parsed as -> ", hexdigest); |
| dm_bht_log_mismatch(bht, bht->root_digest, bht->root_digest); |
| #endif |
| /* Mark the root as unallocated to ensure that it transitions to |
| * requested just in case the levels aren't loaded at this point. |
| */ |
| atomic_set(&bht->root_state, DM_BHT_ENTRY_UNALLOCATED); |
| return 0; |
| } |
| EXPORT_SYMBOL(dm_bht_set_root_hexdigest); |
| |
| /** |
| * dm_bht_root_hexdigest - returns root digest in hex |
| * @bht: pointer to a dm_bht_create()d bht |
| * @hexdigest: u8 array of size @available |
| * @available: must be bht->digest_size * 2 + 1 |
| */ |
| int dm_bht_root_hexdigest(struct dm_bht *bht, u8 *hexdigest, int available) |
| { |
| if (available < 0 || |
| ((unsigned int) available) < bht->digest_size * 2 + 1) { |
| DMERR("hexdigest has too few bytes available"); |
| return -EINVAL; |
| } |
| if (!bht->root_digest) { |
| DMERR("no root digest exists to export"); |
| if (available > 0) |
| *hexdigest = 0; |
| return -1; |
| } |
| dm_bht_bin_to_hex(bht->root_digest, hexdigest, bht->digest_size); |
| return 0; |
| } |
| EXPORT_SYMBOL(dm_bht_root_hexdigest); |