fs/logfs/logfs_abi.h - kernel/bruno - Git at Google

 /*
  * fs/logfs/logfs_abi.h
  *
  * As should be obvious for Linux kernel code, license is GPLv2
  *
  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
  *
  * Public header for logfs.
  */
 #ifndef FS_LOGFS_LOGFS_ABI_H
 #define FS_LOGFS_LOGFS_ABI_H

 /* For out-of-kernel compiles */
 #ifndef BUILD_BUG_ON
 #define BUILD_BUG_ON(condition) /**/
 #endif

 #define SIZE_CHECK(type, size)					\
 static inline void check_##type(void)				\
 {								\
 	BUILD_BUG_ON(sizeof(struct type) != (size));		\
 }

 /*
  * Throughout the logfs code, we're constantly dealing with blocks at
  * various positions or offsets.  To remove confusion, we stricly
  * distinguish between a "position" - the logical position within a
  * file and an "offset" - the physical location within the device.
  *
  * Any usage of the term offset for a logical location or position for
  * a physical one is a bug and should get fixed.
  */

 /*
  * Block are allocated in one of several segments depending on their
  * level.  The following levels are used:
  *  0	- regular data block
  *  1	- i1 indirect blocks
  *  2	- i2 indirect blocks
  *  3	- i3 indirect blocks
  *  4	- i4 indirect blocks
  *  5	- i5 indirect blocks
  *  6	- ifile data blocks
  *  7	- ifile i1 indirect blocks
  *  8	- ifile i2 indirect blocks
  *  9	- ifile i3 indirect blocks
  * 10	- ifile i4 indirect blocks
  * 11	- ifile i5 indirect blocks
  * Potential levels to be used in the future:
  * 12	- gc recycled blocks, long-lived data
  * 13	- replacement blocks, short-lived data
  *
  * Levels 1-11 are necessary for robust gc operations and help separate
  * short-lived metadata from longer-lived file data.  In the future,
  * file data should get separated into several segments based on simple
  * heuristics.  Old data recycled during gc operation is expected to be
  * long-lived.  New data is of uncertain life expectancy.  New data
  * used to replace older blocks in existing files is expected to be
  * short-lived.
  */


 /* Magic numbers.  64bit for superblock, 32bit for statfs f_type */
 #define LOGFS_MAGIC		0x7a3a8e5cb9d5bf67ull
 #define LOGFS_MAGIC_U32		0xc97e8168u

 /*
  * Various blocksize related macros.  Blocksize is currently fixed at 4KiB.
  * Sooner or later that should become configurable and the macros replaced
  * by something superblock-dependent.  Pointers in indirect blocks are and
  * will remain 64bit.
  *
  * LOGFS_BLOCKSIZE	- self-explaining
  * LOGFS_BLOCK_FACTOR	- number of pointers per indirect block
  * LOGFS_BLOCK_BITS	- log2 of LOGFS_BLOCK_FACTOR, used for shifts
  */
 #define LOGFS_BLOCKSIZE		(4096ull)
 #define LOGFS_BLOCK_FACTOR	(LOGFS_BLOCKSIZE / sizeof(u64))
 #define LOGFS_BLOCK_BITS	(9)

 /*
  * Number of blocks at various levels of indirection.  There are 16 direct
  * block pointers plus a single indirect pointer.
  */
 #define I0_BLOCKS		(16)
 #define I1_BLOCKS		LOGFS_BLOCK_FACTOR
 #define I2_BLOCKS		(LOGFS_BLOCK_FACTOR * I1_BLOCKS)
 #define I3_BLOCKS		(LOGFS_BLOCK_FACTOR * I2_BLOCKS)
 #define I4_BLOCKS		(LOGFS_BLOCK_FACTOR * I3_BLOCKS)
 #define I5_BLOCKS		(LOGFS_BLOCK_FACTOR * I4_BLOCKS)

 #define INDIRECT_INDEX		I0_BLOCKS
 #define LOGFS_EMBEDDED_FIELDS	(I0_BLOCKS + 1)

 /*
  * Sizes at which files require another level of indirection.  Files smaller
  * than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
  * similar like ext2 fast symlinks.
  *
  * Data at a position smaller than LOGFS_I0_SIZE is accessed through the
  * direct pointers, else through the 1x indirect pointer and so forth.
  */
 #define LOGFS_EMBEDDED_SIZE	(LOGFS_EMBEDDED_FIELDS * sizeof(u64))
 #define LOGFS_I0_SIZE		(I0_BLOCKS * LOGFS_BLOCKSIZE)
 #define LOGFS_I1_SIZE		(I1_BLOCKS * LOGFS_BLOCKSIZE)
 #define LOGFS_I2_SIZE		(I2_BLOCKS * LOGFS_BLOCKSIZE)
 #define LOGFS_I3_SIZE		(I3_BLOCKS * LOGFS_BLOCKSIZE)
 #define LOGFS_I4_SIZE		(I4_BLOCKS * LOGFS_BLOCKSIZE)
 #define LOGFS_I5_SIZE		(I5_BLOCKS * LOGFS_BLOCKSIZE)

 /*
  * Each indirect block pointer must have this flag set, if all block pointers
  * behind it are set, i.e. there is no hole hidden in the shadow of this
  * indirect block pointer.
  */
 #define LOGFS_FULLY_POPULATED (1ULL << 63)
 #define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)

 /*
  * LogFS needs to separate data into levels.  Each level is defined as the
  * maximal possible distance from the master inode (inode of the inode file).
  * Data blocks reside on level 0, 1x indirect block on level 1, etc.
  * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
  * This effort is necessary to guarantee garbage collection to always make
  * progress.
  *
  * LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
  * LOGFS_MAX_LEVELS is one more for the actual data level of a file.  It is
  * the maximal number of levels for one file.
  * LOGFS_NO_AREAS is twice that, as the inode file and regular files are
  * effectively stacked on top of each other.
  */
 #define LOGFS_MAX_INDIRECT	(5)
 #define LOGFS_MAX_LEVELS	(LOGFS_MAX_INDIRECT + 1)
 #define LOGFS_NO_AREAS		(2 * LOGFS_MAX_LEVELS)

 /* Maximum size of filenames */
 #define LOGFS_MAX_NAMELEN	(255)

 /* Number of segments in the primary journal. */
 #define LOGFS_JOURNAL_SEGS	(16)

 /* Maximum number of free/erased/etc. segments in journal entries */
 #define MAX_CACHED_SEGS		(64)


 /*
  * LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
  * LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
  * its header,
  * LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
  * its segment header and the padded space at the end when no further objects
  * fit.
  */
 #define LOGFS_OBJECT_HEADERSIZE	(0x1c)
 #define LOGFS_SEGMENT_HEADERSIZE (0x18)
 #define LOGFS_MAX_OBJECTSIZE	(LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)
 #define LOGFS_SEGMENT_RESERVE	\
 	(LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)

 /*
  * Segment types:
  * SEG_SUPER	- Data or indirect block
  * SEG_JOURNAL	- Inode
  * SEG_OSTORE	- Dentry
  */
 enum {
 	SEG_SUPER	= 0x01,
 	SEG_JOURNAL	= 0x02,
 	SEG_OSTORE	= 0x03,
 };

 /**
  * struct logfs_segment_header - per-segment header in the ostore
  *
  * @crc:			crc32 of header (there is no data)
  * @pad:			unused, must be 0
  * @type:			segment type, see above
  * @level:			GC level for all objects in this segment
  * @segno:			segment number
  * @ec:				erase count for this segment
  * @gec:			global erase count at time of writing
  */
 struct logfs_segment_header {
 	__be32	crc;
 	__be16	pad;
 	__u8	type;
 	__u8	level;
 	__be32	segno;
 	__be32	ec;
 	__be64	gec;
 };

 SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);

 #define LOGFS_FEATURES_INCOMPAT		(0ull)
 #define LOGFS_FEATURES_RO_COMPAT	(0ull)
 #define LOGFS_FEATURES_COMPAT		(0ull)

 /**
  * struct logfs_disk_super - on-medium superblock
  *
  * @ds_magic:			magic number, must equal LOGFS_MAGIC
  * @ds_crc:			crc32 of structure starting with the next field
  * @ds_ifile_levels:		maximum number of levels for ifile
  * @ds_iblock_levels:		maximum number of levels for regular files
  * @ds_data_levels:		number of separate levels for data
  * @pad0:			reserved, must be 0
  * @ds_feature_incompat:	incompatible filesystem features
  * @ds_feature_ro_compat:	read-only compatible filesystem features
  * @ds_feature_compat:		compatible filesystem features
  * @ds_flags:			flags
  * @ds_segment_shift:		log2 of segment size
  * @ds_block_shift:		log2 of block size
  * @ds_write_shift:		log2 of write size
  * @pad1:			reserved, must be 0
  * @ds_journal_seg:		segments used by primary journal
  * @ds_root_reserve:		bytes reserved for the superuser
  * @ds_speed_reserve:		bytes reserved to speed up GC
  * @ds_bad_seg_reserve:		number of segments reserved to handle bad blocks
  * @pad2:			reserved, must be 0
  * @pad3:			reserved, must be 0
  *
  * Contains only read-only fields.  Read-write fields like the amount of used
  * space is tracked in the dynamic superblock, which is stored in the journal.
  */
 struct logfs_disk_super {
 	struct logfs_segment_header ds_sh;
 	__be64	ds_magic;

 	__be32	ds_crc;
 	__u8	ds_ifile_levels;
 	__u8	ds_iblock_levels;
 	__u8	ds_data_levels;
 	__u8	ds_segment_shift;
 	__u8	ds_block_shift;
 	__u8	ds_write_shift;
 	__u8	pad0[6];

 	__be64	ds_filesystem_size;
 	__be32	ds_segment_size;
 	__be32  ds_bad_seg_reserve;

 	__be64	ds_feature_incompat;
 	__be64	ds_feature_ro_compat;

 	__be64	ds_feature_compat;
 	__be64	ds_feature_flags;

 	__be64	ds_root_reserve;
 	__be64  ds_speed_reserve;

 	__be32	ds_journal_seg[LOGFS_JOURNAL_SEGS];

 	__be64	ds_super_ofs[2];
 	__be64	pad3[8];
 };

 SIZE_CHECK(logfs_disk_super, 256);

 /*
  * Object types:
  * OBJ_BLOCK	- Data or indirect block
  * OBJ_INODE	- Inode
  * OBJ_DENTRY	- Dentry
  */
 enum {
 	OBJ_BLOCK	= 0x04,
 	OBJ_INODE	= 0x05,
 	OBJ_DENTRY	= 0x06,
 };

 /**
  * struct logfs_object_header - per-object header in the ostore
  *
  * @crc:			crc32 of header, excluding data_crc
  * @len:			length of data
  * @type:			object type, see above
  * @compr:			compression type
  * @ino:			inode number
  * @bix:			block index
  * @data_crc:			crc32 of payload
  */
 struct logfs_object_header {
 	__be32	crc;
 	__be16	len;
 	__u8	type;
 	__u8	compr;
 	__be64	ino;
 	__be64	bix;
 	__be32	data_crc;
 } __attribute__((packed));

 SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);

 /*
  * Reserved inode numbers:
  * LOGFS_INO_MASTER	- master inode (for inode file)
  * LOGFS_INO_ROOT	- root directory
  * LOGFS_INO_SEGFILE	- per-segment used bytes and erase count
  */
 enum {
 	LOGFS_INO_MAPPING	= 0x00,
 	LOGFS_INO_MASTER	= 0x01,
 	LOGFS_INO_ROOT		= 0x02,
 	LOGFS_INO_SEGFILE	= 0x03,
 	LOGFS_RESERVED_INOS	= 0x10,
 };

 /*
  * Inode flags.  High bits should never be written to the medium.  They are
  * reserved for in-memory usage.
  * Low bits should either remain in sync with the corresponding FS_*_FL or
  * reuse slots that obviously don't make sense for logfs.
  *
  * LOGFS_IF_DIRTY	Inode must be written back
  * LOGFS_IF_ZOMBIE	Inode has been deleted
  * LOGFS_IF_STILLBORN	-ENOSPC happened when creating inode
  */
 #define LOGFS_IF_COMPRESSED	0x00000004 /* == FS_COMPR_FL */
 #define LOGFS_IF_DIRTY		0x20000000
 #define LOGFS_IF_ZOMBIE		0x40000000
 #define LOGFS_IF_STILLBORN	0x80000000

 /* Flags available to chattr */
 #define LOGFS_FL_USER_VISIBLE	(LOGFS_IF_COMPRESSED)
 #define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
 /* Flags inherited from parent directory on file/directory creation */
 #define LOGFS_FL_INHERITED	(LOGFS_IF_COMPRESSED)

 /**
  * struct logfs_disk_inode - on-medium inode
  *
  * @di_mode:			file mode
  * @di_pad:			reserved, must be 0
  * @di_flags:			inode flags, see above
  * @di_uid:			user id
  * @di_gid:			group id
  * @di_ctime:			change time
  * @di_mtime:			modify time
  * @di_refcount:		reference count (aka nlink or link count)
  * @di_generation:		inode generation, for nfs
  * @di_used_bytes:		number of bytes used
  * @di_size:			file size
  * @di_data:			data pointers
  */
 struct logfs_disk_inode {
 	__be16	di_mode;
 	__u8	di_height;
 	__u8	di_pad;
 	__be32	di_flags;
 	__be32	di_uid;
 	__be32	di_gid;

 	__be64	di_ctime;
 	__be64	di_mtime;

 	__be64	di_atime;
 	__be32	di_refcount;
 	__be32	di_generation;

 	__be64	di_used_bytes;
 	__be64	di_size;

 	__be64	di_data[LOGFS_EMBEDDED_FIELDS];
 };

 SIZE_CHECK(logfs_disk_inode, 200);

 #define INODE_POINTER_OFS \
 	(offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))
 #define INODE_USED_OFS \
 	(offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))
 #define INODE_SIZE_OFS \
 	(offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))
 #define INODE_HEIGHT_OFS	(0)

 /**
  * struct logfs_disk_dentry - on-medium dentry structure
  *
  * @ino:			inode number
  * @namelen:			length of file name
  * @type:			file type, identical to bits 12..15 of mode
  * @name:			file name
  */
 /* FIXME: add 6 bytes of padding to remove the __packed */
 struct logfs_disk_dentry {
 	__be64	ino;
 	__be16	namelen;
 	__u8	type;
 	__u8	name[LOGFS_MAX_NAMELEN];
 } __attribute__((packed));

 SIZE_CHECK(logfs_disk_dentry, 266);

 #define RESERVED		0xffffffff
 #define BADSEG			0xffffffff
 /**
  * struct logfs_segment_entry - segment file entry
  *
  * @ec_level:			erase count and level
  * @valid:			number of valid bytes
  *
  * Segment file contains one entry for every segment.  ec_level contains the
  * erasecount in the upper 28 bits and the level in the lower 4 bits.  An
  * ec_level of BADSEG (-1) identifies bad segments.  valid contains the number
  * of valid bytes or RESERVED (-1 again) if the segment is used for either the
  * superblock or the journal, or when the segment is bad.
  */
 struct logfs_segment_entry {
 	__be32	ec_level;
 	__be32	valid;
 };

 SIZE_CHECK(logfs_segment_entry, 8);

 /**
  * struct logfs_journal_header - header for journal entries (JEs)
  *
  * @h_crc:			crc32 of journal entry
  * @h_len:			length of compressed journal entry,
  *				not including header
  * @h_datalen:			length of uncompressed data
  * @h_type:			JE type
  * @h_compr:			compression type
  * @h_pad:			reserved
  */
 struct logfs_journal_header {
 	__be32	h_crc;
 	__be16	h_len;
 	__be16	h_datalen;
 	__be16	h_type;
 	__u8	h_compr;
 	__u8	h_pad[5];
 };

 SIZE_CHECK(logfs_journal_header, 16);

 /*
  * Life expectency of data.
  * VIM_DEFAULT		- default vim
  * VIM_SEGFILE		- for segment file only - very short-living
  * VIM_GC		- GC'd data - likely long-living
  */
 enum logfs_vim {
 	VIM_DEFAULT	= 0,
 	VIM_SEGFILE	= 1,
 };

 /**
  * struct logfs_je_area - wbuf header
  *
  * @segno:			segment number of area
  * @used_bytes:			number of bytes already used
  * @gc_level:			GC level
  * @vim:			life expectancy of data
  *
  * "Areas" are segments currently being used for writing.  There is at least
  * one area per GC level.  Several may be used to separate long-living from
  * short-living data.  If an area with unknown vim is encountered, it can
  * simply be closed.
  * The write buffer immediately follow this header.
  */
 struct logfs_je_area {
 	__be32	segno;
 	__be32	used_bytes;
 	__u8	gc_level;
 	__u8	vim;
 } __attribute__((packed));

 SIZE_CHECK(logfs_je_area, 10);

 #define MAX_JOURNAL_HEADER \
 	(sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))

 /**
  * struct logfs_je_dynsb - dynamic superblock
  *
  * @ds_gec:			global erase count
  * @ds_sweeper:			current position of GC "sweeper"
  * @ds_rename_dir:		source directory ino (see dir.c documentation)
  * @ds_rename_pos:		position of source dd (see dir.c documentation)
  * @ds_victim_ino:		victims of incomplete dir operation (see dir.c)
  * @ds_victim_ino:		parent inode of victim (see dir.c)
  * @ds_used_bytes:		number of used bytes
  */
 struct logfs_je_dynsb {
 	__be64	ds_gec;
 	__be64	ds_sweeper;

 	__be64	ds_rename_dir;
 	__be64	ds_rename_pos;

 	__be64	ds_victim_ino;
 	__be64	ds_victim_parent; /* XXX */

 	__be64	ds_used_bytes;
 	__be32	ds_generation;
 	__be32	pad;
 };

 SIZE_CHECK(logfs_je_dynsb, 64);

 /**
  * struct logfs_je_anchor - anchor of filesystem tree, aka master inode
  *
  * @da_size:			size of inode file
  * @da_last_ino:		last created inode
  * @da_used_bytes:		number of bytes used
  * @da_data:			data pointers
  */
 struct logfs_je_anchor {
 	__be64	da_size;
 	__be64	da_last_ino;

 	__be64	da_used_bytes;
 	u8	da_height;
 	u8	pad[7];

 	__be64	da_data[LOGFS_EMBEDDED_FIELDS];
 };

 SIZE_CHECK(logfs_je_anchor, 168);

 /**
  * struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
  *
  * @so_segment:			segments used for 2nd journal
  *
  * Length of the array is given by h_len field in the header.
  */
 struct logfs_je_spillout {
 	__be64	so_segment[0];
 };

 SIZE_CHECK(logfs_je_spillout, 0);

 /**
  * struct logfs_je_journal_ec - erase counts for all journal segments
  *
  * @ec:				erase count
  *
  * Length of the array is given by h_len field in the header.
  */
 struct logfs_je_journal_ec {
 	__be32	ec[0];
 };

 SIZE_CHECK(logfs_je_journal_ec, 0);

 /**
  * struct logfs_je_free_segments - list of free segmetns with erase count
  */
 struct logfs_je_free_segments {
 	__be32	segno;
 	__be32	ec;
 };

 SIZE_CHECK(logfs_je_free_segments, 8);

 /**
  * struct logfs_seg_alias - list of segment aliases
  */
 struct logfs_seg_alias {
 	__be32	old_segno;
 	__be32	new_segno;
 };

 SIZE_CHECK(logfs_seg_alias, 8);

 /**
  * struct logfs_obj_alias - list of object aliases
  */
 struct logfs_obj_alias {
 	__be64	ino;
 	__be64	bix;
 	__be64	val;
 	u8	level;
 	u8	pad[5];
 	__be16	child_no;
 };

 SIZE_CHECK(logfs_obj_alias, 32);

 /**
  * Compression types.
  *
  * COMPR_NONE	- uncompressed
  * COMPR_ZLIB	- compressed with zlib
  */
 enum {
 	COMPR_NONE	= 0,
 	COMPR_ZLIB	= 1,
 };

 /*
  * Journal entries come in groups of 16.  First group contains unique
  * entries, next groups contain one entry per level
  *
  * JE_FIRST	- smallest possible journal entry number
  *
  * JEG_BASE	- base group, containing unique entries
  * JE_COMMIT	- commit entry, validates all previous entries
  * JE_DYNSB	- dynamic superblock, anything that ought to be in the
  *		  superblock but cannot because it is read-write data
  * JE_ANCHOR	- anchor aka master inode aka inode file's inode
  * JE_ERASECOUNT  erasecounts for all journal segments
  * JE_SPILLOUT	- unused
  * JE_SEG_ALIAS	- aliases segments
  * JE_AREA	- area description
  *
  * JE_LAST	- largest possible journal entry number
  */
 enum {
 	JE_FIRST	= 0x01,

 	JEG_BASE	= 0x00,
 	JE_COMMIT	= 0x02,
 	JE_DYNSB	= 0x03,
 	JE_ANCHOR	= 0x04,
 	JE_ERASECOUNT	= 0x05,
 	JE_SPILLOUT	= 0x06,
 	JE_OBJ_ALIAS	= 0x0d,
 	JE_AREA		= 0x0e,

 	JE_LAST		= 0x0e,
 };

 #endif
	/*
	* fs/logfs/logfs_abi.h
	*
	* As should be obvious for Linux kernel code, license is GPLv2
	*
	* Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
	*
	* Public header for logfs.
	*/
	#ifndef FS_LOGFS_LOGFS_ABI_H
	#define FS_LOGFS_LOGFS_ABI_H

	/* For out-of-kernel compiles */
	#ifndef BUILD_BUG_ON
	#define BUILD_BUG_ON(condition) /**/
	#endif

	#define SIZE_CHECK(type, size) \
	static inline void check_##type(void) \
	{ \
	BUILD_BUG_ON(sizeof(struct type) != (size)); \
	}

	/*
	* Throughout the logfs code, we're constantly dealing with blocks at
	* various positions or offsets. To remove confusion, we stricly
	* distinguish between a "position" - the logical position within a
	* file and an "offset" - the physical location within the device.
	*
	* Any usage of the term offset for a logical location or position for
	* a physical one is a bug and should get fixed.
	*/

	/*
	* Block are allocated in one of several segments depending on their
	* level. The following levels are used:
	* 0 - regular data block
	* 1 - i1 indirect blocks
	* 2 - i2 indirect blocks
	* 3 - i3 indirect blocks
	* 4 - i4 indirect blocks
	* 5 - i5 indirect blocks
	* 6 - ifile data blocks
	* 7 - ifile i1 indirect blocks
	* 8 - ifile i2 indirect blocks
	* 9 - ifile i3 indirect blocks
	* 10 - ifile i4 indirect blocks
	* 11 - ifile i5 indirect blocks
	* Potential levels to be used in the future:
	* 12 - gc recycled blocks, long-lived data
	* 13 - replacement blocks, short-lived data
	*
	* Levels 1-11 are necessary for robust gc operations and help separate
	* short-lived metadata from longer-lived file data. In the future,
	* file data should get separated into several segments based on simple
	* heuristics. Old data recycled during gc operation is expected to be
	* long-lived. New data is of uncertain life expectancy. New data
	* used to replace older blocks in existing files is expected to be
	* short-lived.
	*/


	/* Magic numbers. 64bit for superblock, 32bit for statfs f_type */
	#define LOGFS_MAGIC 0x7a3a8e5cb9d5bf67ull
	#define LOGFS_MAGIC_U32 0xc97e8168u

	/*
	* Various blocksize related macros. Blocksize is currently fixed at 4KiB.
	* Sooner or later that should become configurable and the macros replaced
	* by something superblock-dependent. Pointers in indirect blocks are and
	* will remain 64bit.
	*
	* LOGFS_BLOCKSIZE - self-explaining
	* LOGFS_BLOCK_FACTOR - number of pointers per indirect block
	* LOGFS_BLOCK_BITS - log2 of LOGFS_BLOCK_FACTOR, used for shifts
	*/
	#define LOGFS_BLOCKSIZE (4096ull)
	#define LOGFS_BLOCK_FACTOR (LOGFS_BLOCKSIZE / sizeof(u64))
	#define LOGFS_BLOCK_BITS (9)

	/*
	* Number of blocks at various levels of indirection. There are 16 direct
	* block pointers plus a single indirect pointer.
	*/
	#define I0_BLOCKS (16)
	#define I1_BLOCKS LOGFS_BLOCK_FACTOR
	#define I2_BLOCKS (LOGFS_BLOCK_FACTOR * I1_BLOCKS)
	#define I3_BLOCKS (LOGFS_BLOCK_FACTOR * I2_BLOCKS)
	#define I4_BLOCKS (LOGFS_BLOCK_FACTOR * I3_BLOCKS)
	#define I5_BLOCKS (LOGFS_BLOCK_FACTOR * I4_BLOCKS)

	#define INDIRECT_INDEX I0_BLOCKS
	#define LOGFS_EMBEDDED_FIELDS (I0_BLOCKS + 1)

	/*
	* Sizes at which files require another level of indirection. Files smaller
	* than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
	* similar like ext2 fast symlinks.
	*
	* Data at a position smaller than LOGFS_I0_SIZE is accessed through the
	* direct pointers, else through the 1x indirect pointer and so forth.
	*/
	#define LOGFS_EMBEDDED_SIZE (LOGFS_EMBEDDED_FIELDS * sizeof(u64))
	#define LOGFS_I0_SIZE (I0_BLOCKS * LOGFS_BLOCKSIZE)
	#define LOGFS_I1_SIZE (I1_BLOCKS * LOGFS_BLOCKSIZE)
	#define LOGFS_I2_SIZE (I2_BLOCKS * LOGFS_BLOCKSIZE)
	#define LOGFS_I3_SIZE (I3_BLOCKS * LOGFS_BLOCKSIZE)
	#define LOGFS_I4_SIZE (I4_BLOCKS * LOGFS_BLOCKSIZE)
	#define LOGFS_I5_SIZE (I5_BLOCKS * LOGFS_BLOCKSIZE)

	/*
	* Each indirect block pointer must have this flag set, if all block pointers
	* behind it are set, i.e. there is no hole hidden in the shadow of this
	* indirect block pointer.
	*/
	#define LOGFS_FULLY_POPULATED (1ULL << 63)
	#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)

	/*
	* LogFS needs to separate data into levels. Each level is defined as the
	* maximal possible distance from the master inode (inode of the inode file).
	* Data blocks reside on level 0, 1x indirect block on level 1, etc.
	* Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
	* This effort is necessary to guarantee garbage collection to always make
	* progress.
	*
	* LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
	* LOGFS_MAX_LEVELS is one more for the actual data level of a file. It is
	* the maximal number of levels for one file.
	* LOGFS_NO_AREAS is twice that, as the inode file and regular files are
	* effectively stacked on top of each other.
	*/
	#define LOGFS_MAX_INDIRECT (5)
	#define LOGFS_MAX_LEVELS (LOGFS_MAX_INDIRECT + 1)
	#define LOGFS_NO_AREAS (2 * LOGFS_MAX_LEVELS)

	/* Maximum size of filenames */
	#define LOGFS_MAX_NAMELEN (255)

	/* Number of segments in the primary journal. */
	#define LOGFS_JOURNAL_SEGS (16)

	/* Maximum number of free/erased/etc. segments in journal entries */
	#define MAX_CACHED_SEGS (64)


	/*
	* LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
	* LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
	* its header,
	* LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
	* its segment header and the padded space at the end when no further objects
	* fit.
	*/
	#define LOGFS_OBJECT_HEADERSIZE (0x1c)
	#define LOGFS_SEGMENT_HEADERSIZE (0x18)
	#define LOGFS_MAX_OBJECTSIZE (LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)
	#define LOGFS_SEGMENT_RESERVE \
	(LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)

	/*
	* Segment types:
	* SEG_SUPER - Data or indirect block
	* SEG_JOURNAL - Inode
	* SEG_OSTORE - Dentry
	*/
	enum {
	SEG_SUPER = 0x01,
	SEG_JOURNAL = 0x02,
	SEG_OSTORE = 0x03,
	};

	/**
	* struct logfs_segment_header - per-segment header in the ostore
	*
	* @crc: crc32 of header (there is no data)
	* @pad: unused, must be 0
	* @type: segment type, see above
	* @level: GC level for all objects in this segment
	* @segno: segment number
	* @ec: erase count for this segment
	* @gec: global erase count at time of writing
	*/
	struct logfs_segment_header {
	__be32 crc;
	__be16 pad;
	__u8 type;
	__u8 level;
	__be32 segno;
	__be32 ec;
	__be64 gec;
	};

	SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);

	#define LOGFS_FEATURES_INCOMPAT (0ull)
	#define LOGFS_FEATURES_RO_COMPAT (0ull)
	#define LOGFS_FEATURES_COMPAT (0ull)

	/**
	* struct logfs_disk_super - on-medium superblock
	*
	* @ds_magic: magic number, must equal LOGFS_MAGIC
	* @ds_crc: crc32 of structure starting with the next field
	* @ds_ifile_levels: maximum number of levels for ifile
	* @ds_iblock_levels: maximum number of levels for regular files
	* @ds_data_levels: number of separate levels for data
	* @pad0: reserved, must be 0
	* @ds_feature_incompat: incompatible filesystem features
	* @ds_feature_ro_compat: read-only compatible filesystem features
	* @ds_feature_compat: compatible filesystem features
	* @ds_flags: flags
	* @ds_segment_shift: log2 of segment size
	* @ds_block_shift: log2 of block size
	* @ds_write_shift: log2 of write size
	* @pad1: reserved, must be 0
	* @ds_journal_seg: segments used by primary journal
	* @ds_root_reserve: bytes reserved for the superuser
	* @ds_speed_reserve: bytes reserved to speed up GC
	* @ds_bad_seg_reserve: number of segments reserved to handle bad blocks
	* @pad2: reserved, must be 0
	* @pad3: reserved, must be 0
	*
	* Contains only read-only fields. Read-write fields like the amount of used
	* space is tracked in the dynamic superblock, which is stored in the journal.
	*/
	struct logfs_disk_super {
	struct logfs_segment_header ds_sh;
	__be64 ds_magic;

	__be32 ds_crc;
	__u8 ds_ifile_levels;
	__u8 ds_iblock_levels;
	__u8 ds_data_levels;
	__u8 ds_segment_shift;
	__u8 ds_block_shift;
	__u8 ds_write_shift;
	__u8 pad0[6];

	__be64 ds_filesystem_size;
	__be32 ds_segment_size;
	__be32 ds_bad_seg_reserve;

	__be64 ds_feature_incompat;
	__be64 ds_feature_ro_compat;

	__be64 ds_feature_compat;
	__be64 ds_feature_flags;

	__be64 ds_root_reserve;
	__be64 ds_speed_reserve;

	__be32 ds_journal_seg[LOGFS_JOURNAL_SEGS];

	__be64 ds_super_ofs[2];
	__be64 pad3[8];
	};

	SIZE_CHECK(logfs_disk_super, 256);

	/*
	* Object types:
	* OBJ_BLOCK - Data or indirect block
	* OBJ_INODE - Inode
	* OBJ_DENTRY - Dentry
	*/
	enum {
	OBJ_BLOCK = 0x04,
	OBJ_INODE = 0x05,
	OBJ_DENTRY = 0x06,
	};

	/**
	* struct logfs_object_header - per-object header in the ostore
	*
	* @crc: crc32 of header, excluding data_crc
	* @len: length of data
	* @type: object type, see above
	* @compr: compression type
	* @ino: inode number
	* @bix: block index
	* @data_crc: crc32 of payload
	*/
	struct logfs_object_header {
	__be32 crc;
	__be16 len;
	__u8 type;
	__u8 compr;
	__be64 ino;
	__be64 bix;
	__be32 data_crc;
	} __attribute__((packed));

	SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);

	/*
	* Reserved inode numbers:
	* LOGFS_INO_MASTER - master inode (for inode file)
	* LOGFS_INO_ROOT - root directory
	* LOGFS_INO_SEGFILE - per-segment used bytes and erase count
	*/
	enum {
	LOGFS_INO_MAPPING = 0x00,
	LOGFS_INO_MASTER = 0x01,
	LOGFS_INO_ROOT = 0x02,
	LOGFS_INO_SEGFILE = 0x03,
	LOGFS_RESERVED_INOS = 0x10,
	};

	/*
	* Inode flags. High bits should never be written to the medium. They are
	* reserved for in-memory usage.
	* Low bits should either remain in sync with the corresponding FS_*_FL or
	* reuse slots that obviously don't make sense for logfs.
	*
	* LOGFS_IF_DIRTY Inode must be written back
	* LOGFS_IF_ZOMBIE Inode has been deleted
	* LOGFS_IF_STILLBORN -ENOSPC happened when creating inode
	*/
	#define LOGFS_IF_COMPRESSED 0x00000004 /* == FS_COMPR_FL */
	#define LOGFS_IF_DIRTY 0x20000000
	#define LOGFS_IF_ZOMBIE 0x40000000
	#define LOGFS_IF_STILLBORN 0x80000000

	/* Flags available to chattr */
	#define LOGFS_FL_USER_VISIBLE (LOGFS_IF_COMPRESSED)
	#define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
	/* Flags inherited from parent directory on file/directory creation */
	#define LOGFS_FL_INHERITED (LOGFS_IF_COMPRESSED)

	/**
	* struct logfs_disk_inode - on-medium inode
	*
	* @di_mode: file mode
	* @di_pad: reserved, must be 0
	* @di_flags: inode flags, see above
	* @di_uid: user id
	* @di_gid: group id
	* @di_ctime: change time
	* @di_mtime: modify time
	* @di_refcount: reference count (aka nlink or link count)
	* @di_generation: inode generation, for nfs
	* @di_used_bytes: number of bytes used
	* @di_size: file size
	* @di_data: data pointers
	*/
	struct logfs_disk_inode {
	__be16 di_mode;
	__u8 di_height;
	__u8 di_pad;
	__be32 di_flags;
	__be32 di_uid;
	__be32 di_gid;

	__be64 di_ctime;
	__be64 di_mtime;

	__be64 di_atime;
	__be32 di_refcount;
	__be32 di_generation;

	__be64 di_used_bytes;
	__be64 di_size;

	__be64 di_data[LOGFS_EMBEDDED_FIELDS];
	};

	SIZE_CHECK(logfs_disk_inode, 200);

	#define INODE_POINTER_OFS \
	(offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))
	#define INODE_USED_OFS \
	(offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))
	#define INODE_SIZE_OFS \
	(offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))
	#define INODE_HEIGHT_OFS (0)

	/**
	* struct logfs_disk_dentry - on-medium dentry structure
	*
	* @ino: inode number
	* @namelen: length of file name
	* @type: file type, identical to bits 12..15 of mode
	* @name: file name
	*/
	/* FIXME: add 6 bytes of padding to remove the __packed */
	struct logfs_disk_dentry {
	__be64 ino;
	__be16 namelen;
	__u8 type;
	__u8 name[LOGFS_MAX_NAMELEN];
	} __attribute__((packed));

	SIZE_CHECK(logfs_disk_dentry, 266);

	#define RESERVED 0xffffffff
	#define BADSEG 0xffffffff
	/**
	* struct logfs_segment_entry - segment file entry
	*
	* @ec_level: erase count and level
	* @valid: number of valid bytes
	*
	* Segment file contains one entry for every segment. ec_level contains the
	* erasecount in the upper 28 bits and the level in the lower 4 bits. An
	* ec_level of BADSEG (-1) identifies bad segments. valid contains the number
	* of valid bytes or RESERVED (-1 again) if the segment is used for either the
	* superblock or the journal, or when the segment is bad.
	*/
	struct logfs_segment_entry {
	__be32 ec_level;
	__be32 valid;
	};

	SIZE_CHECK(logfs_segment_entry, 8);

	/**
	* struct logfs_journal_header - header for journal entries (JEs)
	*
	* @h_crc: crc32 of journal entry
	* @h_len: length of compressed journal entry,
	* not including header
	* @h_datalen: length of uncompressed data
	* @h_type: JE type
	* @h_compr: compression type
	* @h_pad: reserved
	*/
	struct logfs_journal_header {
	__be32 h_crc;
	__be16 h_len;
	__be16 h_datalen;
	__be16 h_type;
	__u8 h_compr;
	__u8 h_pad[5];
	};

	SIZE_CHECK(logfs_journal_header, 16);

	/*
	* Life expectency of data.
	* VIM_DEFAULT - default vim
	* VIM_SEGFILE - for segment file only - very short-living
	* VIM_GC - GC'd data - likely long-living
	*/
	enum logfs_vim {
	VIM_DEFAULT = 0,
	VIM_SEGFILE = 1,
	};

	/**
	* struct logfs_je_area - wbuf header
	*
	* @segno: segment number of area
	* @used_bytes: number of bytes already used
	* @gc_level: GC level
	* @vim: life expectancy of data
	*
	* "Areas" are segments currently being used for writing. There is at least
	* one area per GC level. Several may be used to separate long-living from
	* short-living data. If an area with unknown vim is encountered, it can
	* simply be closed.
	* The write buffer immediately follow this header.
	*/
	struct logfs_je_area {
	__be32 segno;
	__be32 used_bytes;
	__u8 gc_level;
	__u8 vim;
	} __attribute__((packed));

	SIZE_CHECK(logfs_je_area, 10);

	#define MAX_JOURNAL_HEADER \
	(sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))

	/**
	* struct logfs_je_dynsb - dynamic superblock
	*
	* @ds_gec: global erase count
	* @ds_sweeper: current position of GC "sweeper"
	* @ds_rename_dir: source directory ino (see dir.c documentation)
	* @ds_rename_pos: position of source dd (see dir.c documentation)
	* @ds_victim_ino: victims of incomplete dir operation (see dir.c)
	* @ds_victim_ino: parent inode of victim (see dir.c)
	* @ds_used_bytes: number of used bytes
	*/
	struct logfs_je_dynsb {
	__be64 ds_gec;
	__be64 ds_sweeper;

	__be64 ds_rename_dir;
	__be64 ds_rename_pos;

	__be64 ds_victim_ino;
	__be64 ds_victim_parent; /* XXX */

	__be64 ds_used_bytes;
	__be32 ds_generation;
	__be32 pad;
	};

	SIZE_CHECK(logfs_je_dynsb, 64);

	/**
	* struct logfs_je_anchor - anchor of filesystem tree, aka master inode
	*
	* @da_size: size of inode file
	* @da_last_ino: last created inode
	* @da_used_bytes: number of bytes used
	* @da_data: data pointers
	*/
	struct logfs_je_anchor {
	__be64 da_size;
	__be64 da_last_ino;

	__be64 da_used_bytes;
	u8 da_height;
	u8 pad[7];

	__be64 da_data[LOGFS_EMBEDDED_FIELDS];
	};

	SIZE_CHECK(logfs_je_anchor, 168);

	/**
	* struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
	*
	* @so_segment: segments used for 2nd journal
	*
	* Length of the array is given by h_len field in the header.
	*/
	struct logfs_je_spillout {
	__be64 so_segment[0];
	};

	SIZE_CHECK(logfs_je_spillout, 0);

	/**
	* struct logfs_je_journal_ec - erase counts for all journal segments
	*
	* @ec: erase count
	*
	* Length of the array is given by h_len field in the header.
	*/
	struct logfs_je_journal_ec {
	__be32 ec[0];
	};

	SIZE_CHECK(logfs_je_journal_ec, 0);

	/**
	* struct logfs_je_free_segments - list of free segmetns with erase count
	*/
	struct logfs_je_free_segments {
	__be32 segno;
	__be32 ec;
	};

	SIZE_CHECK(logfs_je_free_segments, 8);

	/**
	* struct logfs_seg_alias - list of segment aliases
	*/
	struct logfs_seg_alias {
	__be32 old_segno;
	__be32 new_segno;
	};

	SIZE_CHECK(logfs_seg_alias, 8);

	/**
	* struct logfs_obj_alias - list of object aliases
	*/
	struct logfs_obj_alias {
	__be64 ino;
	__be64 bix;
	__be64 val;
	u8 level;
	u8 pad[5];
	__be16 child_no;
	};

	SIZE_CHECK(logfs_obj_alias, 32);

	/**
	* Compression types.
	*
	* COMPR_NONE - uncompressed
	* COMPR_ZLIB - compressed with zlib
	*/
	enum {
	COMPR_NONE = 0,
	COMPR_ZLIB = 1,
	};

	/*
	* Journal entries come in groups of 16. First group contains unique
	* entries, next groups contain one entry per level
	*
	* JE_FIRST - smallest possible journal entry number
	*
	* JEG_BASE - base group, containing unique entries
	* JE_COMMIT - commit entry, validates all previous entries
	* JE_DYNSB - dynamic superblock, anything that ought to be in the
	* superblock but cannot because it is read-write data
	* JE_ANCHOR - anchor aka master inode aka inode file's inode
	* JE_ERASECOUNT erasecounts for all journal segments
	* JE_SPILLOUT - unused
	* JE_SEG_ALIAS - aliases segments
	* JE_AREA - area description
	*
	* JE_LAST - largest possible journal entry number
	*/
	enum {
	JE_FIRST = 0x01,

	JEG_BASE = 0x00,
	JE_COMMIT = 0x02,
	JE_DYNSB = 0x03,
	JE_ANCHOR = 0x04,
	JE_ERASECOUNT = 0x05,
	JE_SPILLOUT = 0x06,
	JE_OBJ_ALIAS = 0x0d,
	JE_AREA = 0x0e,

	JE_LAST = 0x0e,
	};

	#endif