pack struct file for cacheline optimizations
COPYRIGHT NOTICE: text and code copied or derived from git commit message, mailinglist, or codebase are subject to the linux kernel licensing rules
I reading struct file
related source code and … realized that my copy of
source code was very different than the mainline kernel code (… well this was
weird to me, like, who has the balls to change a data structure as important as
struct file
so dramatically? … anyways I found this recent patch (merged
Aug. 2024). Which reordered the struct members in the name of cache line
performance.
Anyways it is to
shrunk struct file to 192 bytes aka 3 cachelines reorder struct file to not leave any holes or have members cross cachelines.
It’s said to boost performance but they didn’t say how much.
Take away?
- don’t offset into an data structure because this kind of change could happen. You may know what your are doing but others don’t know your implements of hacks when change the code. Or more generally, don’t assume internal data layout (padding, aligning, ordering…) of a struct.
- it turns out that you could out-smart the compiler!
Original patch & discussions:
https://lore.kernel.org/all/20240824-peinigen-hocken-7384b977c643@brauner/
commit c0390d541128e8820af8177a572d9d87ff68a3bb
Author: Christian Brauner <brauner@kernel.org>
Date: Fri Aug 23 21:06:58 2024 +0200
fs: pack struct file
Now that we shrunk struct file to 192 bytes aka 3 cachelines reorder
struct file to not leave any holes or have members cross cachelines.
Add a short comment to each of the fields and mark the cachelines.
It's possible that we may have to tweak this based on profiling in the
future. So far I had Jens test this comparing io_uring with non-fixed
and fixed files and it improved performance. The layout is a combination
of Jens' and my changes.
Link: https: //lore.kernel.org/r/20240824-peinigen-hocken-7384b977c643@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 095a956aeb29..af8bbd4eeb3a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -987,52 +987,63 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
index < ra->start + ra->size);
}
-/*
- * f_{lock,count,pos_lock} members can be highly contended and share
- * the same cacheline. f_{lock,mode} are very frequently used together
- * and so share the same cacheline as well. The read-mostly
- * f_{path,inode,op} are kept on a separate cacheline.
+/**
+ * struct file - Represents a file
+ * @f_count: reference count
+ * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
+ * @f_mode: FMODE_* flags often used in hotpaths
+ * @f_op: file operations
+ * @f_mapping: Contents of a cacheable, mappable object.
+ * @private_data: filesystem or driver specific data
+ * @f_inode: cached inode
+ * @f_flags: file flags
+ * @f_iocb_flags: iocb flags
+ * @f_cred: stashed credentials of creator/opener
+ * @f_path: path of the file
+ * @f_pos_lock: lock protecting file position
+ * @f_pos: file position
+ * @f_version: file version
+ * @f_security: LSM security context of this file
+ * @f_owner: file owner
+ * @f_wb_err: writeback error
+ * @f_sb_err: per sb writeback errors
+ * @f_ep: link of all epoll hooks for this file
+ * @f_task_work: task work entry point
+ * @f_llist: work queue entrypoint
+ * @f_ra: file's readahead state
*/
struct file {
- union {
- /* fput() uses task work when closing and freeing file (default). */
- struct callback_head f_task_work;
- /* fput() must use workqueue (most kernel threads). */
- struct llist_node f_llist;
- /* Invalid after last fput(). */
- struct file_ra_state f_ra;
- };
- /*
- * Protects f_ep, f_flags.
- * Must not be taken from IRQ context.
- */
- spinlock_t f_lock;
- fmode_t f_mode;
- atomic_long_t f_count;
- struct mutex f_pos_lock;
- loff_t f_pos;
- unsigned int f_flags;
- unsigned int f_iocb_flags;
- struct fown_struct *f_owner;
- const struct cred *f_cred;
- struct path f_path;
- struct inode *f_inode; /* cached value */
+ atomic_long_t f_count;
+ spinlock_t f_lock;
+ fmode_t f_mode;
const struct file_operations *f_op;
-
- u64 f_version;
+ struct address_space *f_mapping;
+ void *private_data;
+ struct inode *f_inode;
+ unsigned int f_flags;
+ unsigned int f_iocb_flags;
+ const struct cred *f_cred;
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct path f_path;
+ struct mutex f_pos_lock;
+ loff_t f_pos;
+ u64 f_version;
+ /* --- cacheline 2 boundary (128 bytes) --- */
#ifdef CONFIG_SECURITY
- void *f_security;
+ void *f_security;
#endif
- /* needed for tty driver, and maybe others */
- void *private_data;
-
+ struct fown_struct *f_owner;
+ errseq_t f_wb_err;
+ errseq_t f_sb_err;
#ifdef CONFIG_EPOLL
- /* Used by fs/eventpoll.c to link all the hooks to this file */
- struct hlist_head *f_ep;
-#endif /* #ifdef CONFIG_EPOLL */
- struct address_space *f_mapping;
- errseq_t f_wb_err;
- errseq_t f_sb_err; /* for syncfs */
+ struct hlist_head *f_ep;
+#endif
+ union {
+ struct callback_head f_task_work;
+ struct llist_node f_llist;
+ struct file_ra_state f_ra;
+ };
+ /* --- cacheline 3 boundary (192 bytes) --- */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */