| // SPDX-License-Identifier: GPL-2.0 | 
 | /* | 
 |  * Copyright 2017 Omnibond Systems, L.L.C. | 
 |  */ | 
 |  | 
 | #include "protocol.h" | 
 | #include "orangefs-kernel.h" | 
 | #include "orangefs-bufmap.h" | 
 |  | 
 | struct orangefs_dir_part { | 
 | 	struct orangefs_dir_part *next; | 
 | 	size_t len; | 
 | }; | 
 |  | 
 | struct orangefs_dir { | 
 | 	__u64 token; | 
 | 	struct orangefs_dir_part *part; | 
 | 	loff_t end; | 
 | 	int error; | 
 | }; | 
 |  | 
 | #define PART_SHIFT (24) | 
 | #define PART_SIZE (1<<24) | 
 | #define PART_MASK (~(PART_SIZE - 1)) | 
 |  | 
 | /* | 
 |  * There can be up to 512 directory entries.  Each entry is encoded as | 
 |  * follows: | 
 |  * 4 bytes: string size (n) | 
 |  * n bytes: string | 
 |  * 1 byte: trailing zero | 
 |  * padding to 8 bytes | 
 |  * 16 bytes: khandle | 
 |  * padding to 8 bytes | 
 |  * | 
 |  * The trailer_buf starts with a struct orangefs_readdir_response_s | 
 |  * which must be skipped to get to the directory data. | 
 |  * | 
 |  * The data which is received from the userspace daemon is termed a | 
 |  * part and is stored in a linked list in case more than one part is | 
 |  * needed for a large directory. | 
 |  * | 
 |  * The position pointer (ctx->pos) encodes the part and offset on which | 
 |  * to begin reading at.  Bits above PART_SHIFT encode the part and bits | 
 |  * below PART_SHIFT encode the offset.  Parts are stored in a linked | 
 |  * list which grows as data is received from the server.  The overhead | 
 |  * associated with managing the list is presumed to be small compared to | 
 |  * the overhead of communicating with the server. | 
 |  * | 
 |  * As data is received from the server, it is placed at the end of the | 
 |  * part list.  Data is parsed from the current position as it is needed. | 
 |  * When data is determined to be corrupt, it is either because the | 
 |  * userspace component has sent back corrupt data or because the file | 
 |  * pointer has been moved to an invalid location.  Since the two cannot | 
 |  * be differentiated, return EIO. | 
 |  * | 
 |  * Part zero is synthesized to contains `.' and `..'.  Part one is the | 
 |  * first part of the part list. | 
 |  */ | 
 |  | 
 | static int do_readdir(struct orangefs_inode_s *oi, | 
 |     struct orangefs_dir *od, struct dentry *dentry, | 
 |     struct orangefs_kernel_op_s *op) | 
 | { | 
 | 	struct orangefs_readdir_response_s *resp; | 
 | 	int bufi, r; | 
 |  | 
 | 	/* | 
 | 	 * Despite the badly named field, readdir does not use shared | 
 | 	 * memory.  However, there are a limited number of readdir | 
 | 	 * slots, which must be allocated here.  This flag simply tells | 
 | 	 * the op scheduler to return the op here for retry. | 
 | 	 */ | 
 | 	op->uses_shared_memory = 1; | 
 | 	op->upcall.req.readdir.refn = oi->refn; | 
 | 	op->upcall.req.readdir.token = od->token; | 
 | 	op->upcall.req.readdir.max_dirent_count = | 
 | 	    ORANGEFS_MAX_DIRENT_COUNT_READDIR; | 
 |  | 
 | again: | 
 | 	bufi = orangefs_readdir_index_get(); | 
 | 	if (bufi < 0) { | 
 | 		od->error = bufi; | 
 | 		return bufi; | 
 | 	} | 
 |  | 
 | 	op->upcall.req.readdir.buf_index = bufi; | 
 |  | 
 | 	r = service_operation(op, "orangefs_readdir", | 
 | 	    get_interruptible_flag(dentry->d_inode)); | 
 |  | 
 | 	orangefs_readdir_index_put(bufi); | 
 |  | 
 | 	if (op_state_purged(op)) { | 
 | 		if (r == -EAGAIN) { | 
 | 			vfree(op->downcall.trailer_buf); | 
 | 			goto again; | 
 | 		} else if (r == -EIO) { | 
 | 			vfree(op->downcall.trailer_buf); | 
 | 			od->error = r; | 
 | 			return r; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	if (r < 0) { | 
 | 		vfree(op->downcall.trailer_buf); | 
 | 		od->error = r; | 
 | 		return r; | 
 | 	} else if (op->downcall.status) { | 
 | 		vfree(op->downcall.trailer_buf); | 
 | 		od->error = op->downcall.status; | 
 | 		return op->downcall.status; | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * The maximum size is size per entry times the 512 entries plus | 
 | 	 * the header.  This is well under the limit. | 
 | 	 */ | 
 | 	if (op->downcall.trailer_size > PART_SIZE) { | 
 | 		vfree(op->downcall.trailer_buf); | 
 | 		od->error = -EIO; | 
 | 		return -EIO; | 
 | 	} | 
 |  | 
 | 	resp = (struct orangefs_readdir_response_s *) | 
 | 	    op->downcall.trailer_buf; | 
 | 	od->token = resp->token; | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int parse_readdir(struct orangefs_dir *od, | 
 |     struct orangefs_kernel_op_s *op) | 
 | { | 
 | 	struct orangefs_dir_part *part, *new; | 
 | 	size_t count; | 
 |  | 
 | 	count = 1; | 
 | 	part = od->part; | 
 | 	while (part) { | 
 | 		count++; | 
 | 		if (part->next) | 
 | 			part = part->next; | 
 | 		else | 
 | 			break; | 
 | 	} | 
 |  | 
 | 	new = (void *)op->downcall.trailer_buf; | 
 | 	new->next = NULL; | 
 | 	new->len = op->downcall.trailer_size - | 
 | 	    sizeof(struct orangefs_readdir_response_s); | 
 | 	if (!od->part) | 
 | 		od->part = new; | 
 | 	else | 
 | 		part->next = new; | 
 | 	count++; | 
 | 	od->end = count << PART_SHIFT; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int orangefs_dir_more(struct orangefs_inode_s *oi, | 
 |     struct orangefs_dir *od, struct dentry *dentry) | 
 | { | 
 | 	struct orangefs_kernel_op_s *op; | 
 | 	int r; | 
 |  | 
 | 	op = op_alloc(ORANGEFS_VFS_OP_READDIR); | 
 | 	if (!op) { | 
 | 		od->error = -ENOMEM; | 
 | 		return -ENOMEM; | 
 | 	} | 
 | 	r = do_readdir(oi, od, dentry, op); | 
 | 	if (r) { | 
 | 		od->error = r; | 
 | 		goto out; | 
 | 	} | 
 | 	r = parse_readdir(od, op); | 
 | 	if (r) { | 
 | 		od->error = r; | 
 | 		goto out; | 
 | 	} | 
 |  | 
 | 	od->error = 0; | 
 | out: | 
 | 	op_release(op); | 
 | 	return od->error; | 
 | } | 
 |  | 
 | static int fill_from_part(struct orangefs_dir_part *part, | 
 |     struct dir_context *ctx) | 
 | { | 
 | 	const int offset = sizeof(struct orangefs_readdir_response_s); | 
 | 	struct orangefs_khandle *khandle; | 
 | 	__u32 *len, padlen; | 
 | 	loff_t i; | 
 | 	char *s; | 
 | 	i = ctx->pos & ~PART_MASK; | 
 |  | 
 | 	/* The file offset from userspace is too large. */ | 
 | 	if (i > part->len) | 
 | 		return 1; | 
 |  | 
 | 	/* | 
 | 	 * If the seek pointer is positioned just before an entry it | 
 | 	 * should find the next entry. | 
 | 	 */ | 
 | 	if (i % 8) | 
 | 		i = i + (8 - i%8)%8; | 
 |  | 
 | 	while (i < part->len) { | 
 | 		if (part->len < i + sizeof *len) | 
 | 			break; | 
 | 		len = (void *)part + offset + i; | 
 | 		/* | 
 | 		 * len is the size of the string itself.  padlen is the | 
 | 		 * total size of the encoded string. | 
 | 		 */ | 
 | 		padlen = (sizeof *len + *len + 1) + | 
 | 		    (8 - (sizeof *len + *len + 1)%8)%8; | 
 | 		if (part->len < i + padlen + sizeof *khandle) | 
 | 			goto next; | 
 | 		s = (void *)part + offset + i + sizeof *len; | 
 | 		if (s[*len] != 0) | 
 | 			goto next; | 
 | 		khandle = (void *)part + offset + i + padlen; | 
 | 		if (!dir_emit(ctx, s, *len, | 
 | 		    orangefs_khandle_to_ino(khandle), | 
 | 		    DT_UNKNOWN)) | 
 | 			return 0; | 
 | 		i += padlen + sizeof *khandle; | 
 | 		i = i + (8 - i%8)%8; | 
 | 		BUG_ON(i > part->len); | 
 | 		ctx->pos = (ctx->pos & PART_MASK) | i; | 
 | 		continue; | 
 | next: | 
 | 		i += 8; | 
 | 	} | 
 | 	return 1; | 
 | } | 
 |  | 
 | static int orangefs_dir_fill(struct orangefs_inode_s *oi, | 
 |     struct orangefs_dir *od, struct dentry *dentry, | 
 |     struct dir_context *ctx) | 
 | { | 
 | 	struct orangefs_dir_part *part; | 
 | 	size_t count; | 
 |  | 
 | 	count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; | 
 |  | 
 | 	part = od->part; | 
 | 	while (part->next && count) { | 
 | 		count--; | 
 | 		part = part->next; | 
 | 	} | 
 | 	/* This means the userspace file offset is invalid. */ | 
 | 	if (count) { | 
 | 		od->error = -EIO; | 
 | 		return -EIO; | 
 | 	} | 
 |  | 
 | 	while (part && part->len) { | 
 | 		int r; | 
 | 		r = fill_from_part(part, ctx); | 
 | 		if (r < 0) { | 
 | 			od->error = r; | 
 | 			return r; | 
 | 		} else if (r == 0) { | 
 | 			/* Userspace buffer is full. */ | 
 | 			break; | 
 | 		} else { | 
 | 			/* | 
 | 			 * The part ran out of data.  Move to the next | 
 | 			 * part. */ | 
 | 			ctx->pos = (ctx->pos & PART_MASK) + | 
 | 			    (1 << PART_SHIFT); | 
 | 			part = part->next; | 
 | 		} | 
 | 	} | 
 | 	return 0; | 
 | } | 
 |  | 
 | static loff_t orangefs_dir_llseek(struct file *file, loff_t offset, | 
 |     int whence) | 
 | { | 
 | 	struct orangefs_dir *od = file->private_data; | 
 | 	/* | 
 | 	 * Delete the stored data so userspace sees new directory | 
 | 	 * entries. | 
 | 	 */ | 
 | 	if (!whence && offset < od->end) { | 
 | 		struct orangefs_dir_part *part = od->part; | 
 | 		while (part) { | 
 | 			struct orangefs_dir_part *next = part->next; | 
 | 			vfree(part); | 
 | 			part = next; | 
 | 		} | 
 | 		od->token = ORANGEFS_ITERATE_START; | 
 | 		od->part = NULL; | 
 | 		od->end = 1 << PART_SHIFT; | 
 | 	} | 
 | 	return default_llseek(file, offset, whence); | 
 | } | 
 |  | 
 | static int orangefs_dir_iterate(struct file *file, | 
 |     struct dir_context *ctx) | 
 | { | 
 | 	struct orangefs_inode_s *oi; | 
 | 	struct orangefs_dir *od; | 
 | 	struct dentry *dentry; | 
 | 	int r; | 
 |  | 
 | 	dentry = file->f_path.dentry; | 
 | 	oi = ORANGEFS_I(dentry->d_inode); | 
 | 	od = file->private_data; | 
 |  | 
 | 	if (od->error) | 
 | 		return od->error; | 
 |  | 
 | 	if (ctx->pos == 0) { | 
 | 		if (!dir_emit_dot(file, ctx)) | 
 | 			return 0; | 
 | 		ctx->pos++; | 
 | 	} | 
 | 	if (ctx->pos == 1) { | 
 | 		if (!dir_emit_dotdot(file, ctx)) | 
 | 			return 0; | 
 | 		ctx->pos = 1 << PART_SHIFT; | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * The seek position is in the first synthesized part but is not | 
 | 	 * valid. | 
 | 	 */ | 
 | 	if ((ctx->pos & PART_MASK) == 0) | 
 | 		return -EIO; | 
 |  | 
 | 	r = 0; | 
 |  | 
 | 	/* | 
 | 	 * Must read more if the user has sought past what has been read | 
 | 	 * so far.  Stop a user who has sought past the end. | 
 | 	 */ | 
 | 	while (od->token != ORANGEFS_ITERATE_END && | 
 | 	    ctx->pos > od->end) { | 
 | 		r = orangefs_dir_more(oi, od, dentry); | 
 | 		if (r) | 
 | 			return r; | 
 | 	} | 
 | 	if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) | 
 | 		return -EIO; | 
 |  | 
 | 	/* Then try to fill if there's any left in the buffer. */ | 
 | 	if (ctx->pos < od->end) { | 
 | 		r = orangefs_dir_fill(oi, od, dentry, ctx); | 
 | 		if (r) | 
 | 			return r; | 
 | 	} | 
 |  | 
 | 	/* Finally get some more and try to fill. */ | 
 | 	if (od->token != ORANGEFS_ITERATE_END) { | 
 | 		r = orangefs_dir_more(oi, od, dentry); | 
 | 		if (r) | 
 | 			return r; | 
 | 		r = orangefs_dir_fill(oi, od, dentry, ctx); | 
 | 	} | 
 |  | 
 | 	return r; | 
 | } | 
 |  | 
 | static int orangefs_dir_open(struct inode *inode, struct file *file) | 
 | { | 
 | 	struct orangefs_dir *od; | 
 | 	file->private_data = kmalloc(sizeof(struct orangefs_dir), | 
 | 	    GFP_KERNEL); | 
 | 	if (!file->private_data) | 
 | 		return -ENOMEM; | 
 | 	od = file->private_data; | 
 | 	od->token = ORANGEFS_ITERATE_START; | 
 | 	od->part = NULL; | 
 | 	od->end = 1 << PART_SHIFT; | 
 | 	od->error = 0; | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int orangefs_dir_release(struct inode *inode, struct file *file) | 
 | { | 
 | 	struct orangefs_dir *od = file->private_data; | 
 | 	struct orangefs_dir_part *part = od->part; | 
 | 	while (part) { | 
 | 		struct orangefs_dir_part *next = part->next; | 
 | 		vfree(part); | 
 | 		part = next; | 
 | 	} | 
 | 	kfree(od); | 
 | 	return 0; | 
 | } | 
 |  | 
 | const struct file_operations orangefs_dir_operations = { | 
 | 	.llseek = orangefs_dir_llseek, | 
 | 	.read = generic_read_dir, | 
 | 	.iterate_shared = orangefs_dir_iterate, | 
 | 	.open = orangefs_dir_open, | 
 | 	.release = orangefs_dir_release | 
 | }; |