/*
 *   Copyright (c) International Business Machines  Corp., 2001
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or 
 *   (at your option) any later version.
 * 
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software 
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: lvmregmgr
 * File: lvm_io.c
 *
 * Description: This file contains all functions pertaining to disk I/O within
 *              the LVM region manager plugin. This file should be the only
 *              location of calls to READ and WRITE (except the lvm_read and
 *              lvm_write API calls).
 */ 

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <plugin.h>
#include "lvmregmgr.h"


/* Function: bytes_to_sectors
 *
 *	Convert a number of bytes into a number of sectors, rounding up.
 */
inline u_int64_t bytes_to_sectors( u_int64_t bytes )
{
	return( (bytes >> EVMS_VSECTOR_SIZE_SHIFT) + ((bytes & (EVMS_VSECTOR_SIZE-1)) ? 1 : 0) );
}


inline u_int64_t sectors_to_bytes( u_int64_t sectors )
{
	return( sectors << EVMS_VSECTOR_SIZE_SHIFT );
}


/* Function: lvm_log2
 *
 *	Calculate the log-base2 of the argument
 */
inline long lvm_log2( long arg )
{
	long	result = -1;
	long	tmp;

	if ( arg ) {
		tmp = arg;
		result++;
		while ( ! (tmp & 1) ) {
			result++;
			tmp >>= 1;
		}
		if ( tmp != 1 ) {
			// arg isn't a power of 2!
			result = -2;
		}
	}
	return result;
}


/* Function: lvm_convert_pv
 *
 *	pv : pv_disk_t structure to be converted
 *
 *	Convert a PV structure from memory-order to disk-order (or vice-versa).
 */
static inline void lvm_endian_convert_pv( pv_disk_t * pv )
{
	LOG_ENTRY;

	pv->version			= DISK_TO_CPU16(pv->version);
	pv->pv_on_disk.base		= DISK_TO_CPU32(pv->pv_on_disk.base);
	pv->pv_on_disk.size		= DISK_TO_CPU32(pv->pv_on_disk.size);
	pv->vg_on_disk.base		= DISK_TO_CPU32(pv->vg_on_disk.base);
	pv->vg_on_disk.size		= DISK_TO_CPU32(pv->vg_on_disk.size);
	pv->pv_uuidlist_on_disk.base	= DISK_TO_CPU32(pv->pv_uuidlist_on_disk.base);
	pv->pv_uuidlist_on_disk.size	= DISK_TO_CPU32(pv->pv_uuidlist_on_disk.size);
	pv->lv_on_disk.base		= DISK_TO_CPU32(pv->lv_on_disk.base);
	pv->lv_on_disk.size		= DISK_TO_CPU32(pv->lv_on_disk.size);
	pv->pe_on_disk.base		= DISK_TO_CPU32(pv->pe_on_disk.base);
	pv->pe_on_disk.size		= DISK_TO_CPU32(pv->pe_on_disk.size);
	pv->pv_major			= DISK_TO_CPU32(pv->pv_major);
	pv->pv_number			= DISK_TO_CPU32(pv->pv_number);
	pv->pv_status			= DISK_TO_CPU32(pv->pv_status);
	pv->pv_allocatable		= DISK_TO_CPU32(pv->pv_allocatable);
	pv->pv_size			= DISK_TO_CPU32(pv->pv_size);
	pv->lv_cur			= DISK_TO_CPU32(pv->lv_cur);
	pv->pe_size			= DISK_TO_CPU32(pv->pe_size);
	pv->pe_total			= DISK_TO_CPU32(pv->pe_total);
	pv->pe_allocated		= DISK_TO_CPU32(pv->pe_allocated);
	pv->pe_start			= DISK_TO_CPU32(pv->pe_start);

	LOG_EXIT(0);
}


/* Function: lvm_read_pv
 *
 *	segment : storage object to read the metadata from
 *	pv : location to put a pointer to the PV metadata
 *
 *	Read the PV metadata from the specified segment. Check for an LVM PV
 *	signature. Allocate space for and return a pointer to the metadata
 *	that was read. If any error occurs, *pv will be set to NULL and no
 *	memory will be allocated.
 */
int lvm_read_pv(storage_object_t	* segment,
		pv_disk_t		** pv )
{
	pv_disk_t * pv_buffer;

	LOG_ENTRY;
	LOG_EXTRA("Reading PV metadata from object %s\n", segment->name);

	*pv = NULL;

	// Buffer for reading the PV metadata.
	pv_buffer = lvm_engine->engine_alloc(LVM_PV_DISK_SIZE);
	if ( ! pv_buffer ) {
		LOG_CRITICAL("Memory error creating buffer to read PV metadata from object %s\n", segment->name);
		RETURN(ENOMEM);
	}
	
	// Read the first sector.
	if ( READ(segment, bytes_to_sectors(LVM_PV_DISK_BASE),
			bytes_to_sectors(LVM_PV_DISK_SIZE), pv_buffer) ) {
		LOG_SERIOUS("Error reading PV metadata from object %s\n", segment->name);
		lvm_engine->engine_free(pv_buffer);
		RETURN(EIO);
	}

	// Endian-neutral conversion of PV metadata.
	lvm_endian_convert_pv(pv_buffer);

	// Check for an LVM signature and make sure the sizes match.
	// Versions 1 and 2 are both valid now.
	if ( ! ( pv_buffer->id[0] == 'H' &&
	         pv_buffer->id[1] == 'M' &&
	         (pv_buffer->version == 1 || pv_buffer->version == 2) &&
	         pv_buffer->pv_size == segment->size ) ) {
		LOG_EXTRA("Object %s is not an LVM PV\n", segment->name);
		lvm_engine->engine_free(pv_buffer);
		RETURN(EINVAL);
	}

	// This is a valid PV. Allocate a new pv_disk_t.
	*pv = lvm_engine->engine_alloc(sizeof(pv_disk_t));
	if ( ! *pv ) {
		LOG_CRITICAL("Memory error creating new PV for object %s\n", segment->name);
		lvm_engine->engine_free(pv_buffer);
		RETURN(ENOMEM);
	}

	// Copy the metadata.
	memcpy(*pv, pv_buffer, sizeof(pv_disk_t));
	lvm_engine->engine_free(pv_buffer);
	RETURN(0);
}


/* Function: lvm_write_pv
 *
 *	Write the PV metadata sector to the specified PV.
 */
int lvm_write_pv( lvm_physical_volume_t * pv_entry )
{
	pv_disk_t	* pv_buffer;
	int		rc = 0;

	LOG_ENTRY;

	// Buffer for writing the PV metadata.
	pv_buffer = lvm_engine->engine_alloc(LVM_PV_DISK_SIZE);
	if ( ! pv_buffer ) {
		LOG_CRITICAL("Memory error creating buffer to write PV metadata to object %s\n", pv_entry->segment->name);
		RETURN(ENOMEM);
	}

	memcpy(pv_buffer, pv_entry->pv, sizeof(pv_disk_t));

	// Endian conversion of PV metadata.
	lvm_endian_convert_pv(pv_buffer);

	// Write the metadata.
	if ( WRITE(pv_entry->segment, bytes_to_sectors(LVM_PV_DISK_BASE), 
			bytes_to_sectors(LVM_PV_DISK_SIZE), pv_buffer) ) {
		LOG_SERIOUS("Error writing PV metadata to object %s\n", pv_entry->segment->name);
		rc = EIO;
	}

	lvm_engine->engine_free(pv_buffer);
	RETURN(rc);
}


/* Function: lvm_erase_pv
 *
 *	Clear the PV metadata from the specified PV.
 */
int lvm_erase_pv( storage_object_t * object )
{
	int rc;

	LOG_ENTRY;

	rc = KILL_SECTORS(object,
			bytes_to_sectors(LVM_PV_DISK_BASE),
			bytes_to_sectors(LVM_PV_DISK_SIZE));

	RETURN(rc);
}


/* Function: lvm_endian_convert_vg
 *
 *	vg : vg_disk_t structure to be converted
 *
 *	Convert a VG structure from memory-order to disk-order (or vice-versa).
 */
static inline void lvm_endian_convert_vg( vg_disk_t * vg )
{
	LOG_ENTRY;

	vg->vg_number	= DISK_TO_CPU32(vg->vg_number);
	vg->vg_access	= DISK_TO_CPU32(vg->vg_access);
	vg->vg_status	= DISK_TO_CPU32(vg->vg_status);
	vg->lv_max	= DISK_TO_CPU32(vg->lv_max);
	vg->lv_cur	= DISK_TO_CPU32(vg->lv_cur);
	vg->lv_open	= DISK_TO_CPU32(vg->lv_open);
	vg->pv_max	= DISK_TO_CPU32(vg->pv_max);
	vg->pv_cur	= DISK_TO_CPU32(vg->pv_cur);
	vg->pv_act	= DISK_TO_CPU32(vg->pv_act);
	vg->dummy	= DISK_TO_CPU32(vg->dummy);
	vg->vgda	= DISK_TO_CPU32(vg->vgda);
	vg->pe_size	= DISK_TO_CPU32(vg->pe_size);
	vg->pe_total	= DISK_TO_CPU32(vg->pe_total);
	vg->pe_allocated= DISK_TO_CPU32(vg->pe_allocated);
	vg->pvg_total	= DISK_TO_CPU32(vg->pvg_total);

	LOG_EXIT(0);
}


/* Function: lvm_read_vg
 *
 *	segment : Storage object to read the metadata from.
 *	pv : PV metadata for this segment.
 *	vg : Location to store a pointer to the VG metadata.
 *
 *	Read the VG metadata from the specified segment/PV. Allocate space for
 *	and return the metadata that was read.
 */
int lvm_read_vg(storage_object_t	* segment,
		pv_disk_t		* pv,
		vg_disk_t		** vg )
{
	vg_disk_t	* vg_buffer;
	int		vg_sectors;

	LOG_ENTRY;
	LOG_EXTRA("Reading VG metadata from object %s\n", segment->name);

	*vg = NULL;

	// Allocate a buffer to read the VG metadata
	vg_sectors = bytes_to_sectors(pv->vg_on_disk.size);
	vg_buffer = lvm_engine->engine_alloc(sectors_to_bytes(vg_sectors));
	if ( ! vg_buffer ) {
		LOG_CRITICAL("Memory error creating buffer to read VG metadata from object %s.\n", segment->name);
		RETURN(ENOMEM);
	}

	// Read the VG metadata.
	if ( READ(segment, bytes_to_sectors(pv->vg_on_disk.base), vg_sectors, vg_buffer) ) {
		LOG_SERIOUS("Error reading VG metadata from object %s\n", segment->name);
		lvm_engine->engine_free(vg_buffer);
		RETURN(EIO);
	}

	// Endian-neutral conversion of VG metadata
	lvm_endian_convert_vg(vg_buffer);

	// Allocate a new vg_disk_t to return.
	*vg = lvm_engine->engine_alloc(sizeof(vg_disk_t));
	if ( ! *vg ) {
		LOG_CRITICAL("Memory error creating new VG structure for object %s\n", segment->name);
		lvm_engine->engine_free(vg_buffer);
		RETURN(ENOMEM);
	}

	// Copy metadata.
	memcpy(*vg, vg_buffer, sizeof(vg_disk_t));
	lvm_engine->engine_free(vg_buffer);
	RETURN(0);
}


/* Function: lvm_write_vg
 *
 *	Write the VG metadata to the specified PV.
 */
int lvm_write_vg( lvm_physical_volume_t * pv_entry )
{
	vg_disk_t		* vg_buffer;
	lvm_volume_group_t	* group		= pv_entry->group;
	storage_object_t	* segment	= pv_entry->segment;
	pv_disk_t		* pv		= pv_entry->pv;
	unsigned long		vg_sectors;
	int			rc		= 0;

	LOG_ENTRY;

	// Allocate buffer for writing the VG metadata.
	vg_sectors = bytes_to_sectors(pv->vg_on_disk.size);
	vg_buffer = lvm_engine->engine_alloc(sectors_to_bytes(vg_sectors));
	if ( ! vg_buffer ) {
		LOG_CRITICAL("Memory error creating buffer to write VG metadata to object %s.\n", segment->name);
		RETURN(ENOMEM);
	}

	memcpy(vg_buffer, group->vg, sizeof(vg_disk_t));

	// Endian conversion of PV metadata
	lvm_endian_convert_vg(vg_buffer);

	// Write the metadata.
	if ( WRITE(segment, bytes_to_sectors(pv->vg_on_disk.base), vg_sectors, vg_buffer) ) {
		LOG_SERIOUS("Error writing VG metadata to object %s\n", segment->name);
		rc = EIO;
	}
	
	lvm_engine->engine_free(vg_buffer);
	RETURN(rc);
}


/* Function: lvm_read_uuid_list
 *
 *	segment : Object to read the UUIDs from
 *	pv : PV metadata for this segment
 *	group : Group to read the UUIDs into
 *
 *	Read the list of PV UUIDs from the specified segment. The specified
 *	group must already have a memory buffer to store this data. The first
 *	entry in the UUID list (entry 0), corresponds to the first PV in the
 *	group (pv_number 1). In order that we can index the UUID list by PV
 *	number, skip the first slot in the group's UUID list when reading the
 *	list from disk.
 */
int lvm_read_uuid_list( storage_object_t	* segment,
			pv_disk_t		* pv,
			lvm_volume_group_t	* group )
{
	char	* uuid_buffer;
	int	uuid_sectors;
	int	i, rc = 0;

	LOG_ENTRY;

	// Only read in the UUID list if it hasn't been already.
	if ( ! (group->flags & LVM_VG_FLAG_UUID_LIST_PRESENT) ) {
		LOG_DETAILS("Reading PV UUIDs for container %s\n", group->container->name);

		// Allocate I/O buffer.
		uuid_sectors = bytes_to_sectors(pv->pv_uuidlist_on_disk.size);
		uuid_buffer = lvm_engine->engine_alloc(sectors_to_bytes(uuid_sectors));
		if ( ! uuid_buffer ) {
			LOG_CRITICAL("Memory error creating buffer to read UUID list from object %s\n", segment->name);
			RETURN(ENOMEM);
		}

		// Read the array from the PV.
		rc = READ(segment, bytes_to_sectors(pv->pv_uuidlist_on_disk.base), uuid_sectors, uuid_buffer);
		if (rc) {
			LOG_SERIOUS("Error reading PV UUID list from object %s\n", segment->name);
			lvm_engine->engine_free(uuid_buffer);
			RETURN(rc);
		}

		// Copy each valid UUID to the group. UUIDs are char-strings,
		// so no endian conversion is necessary.
		for ( i = 0; i < group->vg->pv_max; i++ ) {
			if ( uuid_buffer[i*NAME_LEN] ) {
				if ( ! group->uuid_list[i+1] ) {
					group->uuid_list[i+1] = lvm_engine->engine_alloc(UUID_LEN);
					if ( ! group->uuid_list[i+1] ) {
						LOG_CRITICAL("Memory error creating string for UUID entry %d in container %s\n",
							i+1, group->container->name);
						lvm_engine->engine_free(uuid_buffer);
						RETURN(ENOMEM);
					}
				}
				memcpy(group->uuid_list[i+1], &(uuid_buffer[i*NAME_LEN]), UUID_LEN);
			}
		}

		group->flags |= LVM_VG_FLAG_UUID_LIST_PRESENT;
		lvm_engine->engine_free(uuid_buffer);
	}
	else {
		LOG_EXTRA("Already read PV UUIDs for container %s\n", group->container->name);
	}

	RETURN(0);
}


/* Function: lvm_write_uuid_list
 *
 *	Write the list of PV UUIDs to the specified PV. The group stores the
 *	UUID list offset by one, so it can use the PV number to index the
 *	table internally. So be sure to leave out the first entry in the
 *	table when writing it to the PV.
 */
int lvm_write_uuid_list( lvm_physical_volume_t * pv_entry )
{
	lvm_volume_group_t	* group		= pv_entry->group;
	storage_object_t	* segment	= pv_entry->segment;
	pv_disk_t		* pv		= pv_entry->pv;
	char			* uuid_buffer;
	int			uuid_sectors;
	int			i, rc;

	LOG_ENTRY;

	// Allocate I/O buffer.
	uuid_sectors = bytes_to_sectors(pv->pv_uuidlist_on_disk.size);
	uuid_buffer = lvm_engine->engine_alloc(sectors_to_bytes(uuid_sectors));
	if ( ! uuid_buffer ) {
		LOG_CRITICAL("Memory error creating buffer to write UUID list to object %s\n", segment->name);
		RETURN(ENOMEM);
	}

	// Copy all valid UUIDs from the group to the buffer. UUIDs are
	// char-strings, so no endian conversion is necessary.
	for ( i = 0; i < group->vg->pv_max; i++ ) {
		if ( group->uuid_list[i+1] ) {
			memcpy(&(uuid_buffer[i*NAME_LEN]), group->uuid_list[i+1], UUID_LEN);
		}
	}

	// Write the array to the PV.
	rc = WRITE(segment, bytes_to_sectors(pv->pv_uuidlist_on_disk.base), uuid_sectors, uuid_buffer);
	if (rc) {
		LOG_SERIOUS("Error writing UUID list to object %s\n", segment->name);
	}

	lvm_engine->engine_free(uuid_buffer);
	RETURN(rc);
}


/* Function: lvm_endian_convert_lv
 *
 *	Convert a LV structure from memory-order to disk-order (or vice-versa).
 */
static inline void lvm_endian_convert_lv( lv_disk_t * lv )
{
	lv->lv_access		= DISK_TO_CPU32(lv->lv_access);
	lv->lv_status		= DISK_TO_CPU32(lv->lv_status);
	lv->lv_open		= DISK_TO_CPU32(lv->lv_open);
	lv->lv_dev		= DISK_TO_CPU32(lv->lv_dev);
	lv->lv_number		= DISK_TO_CPU32(lv->lv_number);
	lv->lv_mirror_copies	= DISK_TO_CPU32(lv->lv_mirror_copies);
	lv->lv_recovery		= DISK_TO_CPU32(lv->lv_recovery);
	lv->lv_schedule		= DISK_TO_CPU32(lv->lv_schedule);
	lv->lv_size		= DISK_TO_CPU32(lv->lv_size);
	lv->lv_snapshot_minor	= DISK_TO_CPU32(lv->lv_snapshot_minor);
	lv->lv_chunk_size	= DISK_TO_CPU16(lv->lv_chunk_size);
	lv->dummy		= DISK_TO_CPU16(lv->dummy);
	lv->lv_allocated_le	= DISK_TO_CPU32(lv->lv_allocated_le);
	lv->lv_stripes		= DISK_TO_CPU32(lv->lv_stripes);
	lv->lv_stripesize	= DISK_TO_CPU32(lv->lv_stripesize);
	lv->lv_badblock		= DISK_TO_CPU32(lv->lv_badblock);
	lv->lv_allocation	= DISK_TO_CPU32(lv->lv_allocation);
	lv->lv_io_timeout	= DISK_TO_CPU32(lv->lv_io_timeout);
	lv->lv_read_ahead	= DISK_TO_CPU32(lv->lv_read_ahead);
}


/* Function: lvm_endian_convert_lvs
 *
 *	Endian-convert all LV structures in a volume group.
 */
int lvm_endian_convert_lvs( lvm_volume_group_t * group )
{
	int i;
	LOG_ENTRY;
	for ( i = 0; i < MAX_LV; i++ ) {
		lvm_endian_convert_lv(&(group->lv_array[i]));
	}
	RETURN(0);
}


/* Function: lvm_read_lv_array
 *
 *	group : Group to read the LV array for.
 *
 *	Read the LV metadata from the first PV in the group. The group needs
 *	to already have a memory buffer to store this data. If the read on the
 *	first PV fails, continue reading from the remaining PVs until one works.
 */
int lvm_read_lv_array( lvm_volume_group_t * group )
{
	storage_object_t	* segment;
	pv_disk_t		* pv;
	int			i, rc = 1;

	LOG_ENTRY;

	// Only read the LV array if it hasn't been already.
	if ( ! (group->flags & LVM_VG_FLAG_LV_LIST_PRESENT) ) {
		LOG_DETAILS("Reading LV metadata for container %s\n", group->container->name);

		for ( i = 1; i <= MAX_PV; i++ ) {
			// Find the next PV in the group
			if ( ! group->pv_list[i] ) {
				continue;
			}

			segment	= group->pv_list[i]->segment;
			pv	= group->pv_list[i]->pv;
			rc = READ(segment, bytes_to_sectors(pv->lv_on_disk.base),
				bytes_to_sectors(pv->lv_on_disk.size), group->lv_array);
			if (rc) {
				LOG_SERIOUS("Error reading LV metadata from object %s\n", segment->name);
				continue;
			}

			// Endian conversion of LV metadata.
			lvm_endian_convert_lvs(group);

			group->flags |= LVM_VG_FLAG_LV_LIST_PRESENT;
			break;
		}

		if (rc) {
			LOG_SERIOUS("Failed to read LV metadata from all objects in container %s\n", group->container->name);
			RETURN(EIO);
		}
	}
	else {
		LOG_EXTRA("Already read LV metadata for container %s\n", group->container->name);
	}

	RETURN(0);
}


/* Function: lvm_write_lv_array
 *
 *	Write the LV metadata to the specified PV.
 */
int lvm_write_lv_array( lvm_physical_volume_t * pv_entry )
{
	lvm_volume_group_t	* group		= pv_entry->group;
	storage_object_t	* segment	= pv_entry->segment;
	pv_disk_t		* pv		= pv_entry->pv;

	LOG_ENTRY;

	// Endian-conversion is done outside this function as an optimization.

	if ( WRITE(segment, bytes_to_sectors(pv->lv_on_disk.base),
			bytes_to_sectors(pv->lv_on_disk.size), group->lv_array) ) {
		LOG_SERIOUS("Error writing LV array to object %s\n", segment->name);
		RETURN(EIO);
	}

	RETURN(0);
}


static inline void lvm_endian_convert_pe( pe_disk_t * pe )
{
	pe->lv_num = DISK_TO_CPU16(pe->lv_num);
	pe->le_num = DISK_TO_CPU16(pe->le_num);
}


static inline void lvm_endian_convert_pe_map( lvm_physical_volume_t * pv_entry )
{
	int i;
	LOG_ENTRY;
	for ( i = 0; i < pv_entry->pv->pe_total; i++ ) {
		lvm_endian_convert_pe(&(pv_entry->pe_map[i]));
	}
	LOG_EXIT(0);
}


/* Function: lvm_read_pe_map
 *
 *	Read the PE maps from the specified PV. This PV needs to already
 *	have a buffer to hold the PE maps.
 */
int lvm_read_pe_map( lvm_physical_volume_t * pv_entry )
{
	storage_object_t	* segment	= pv_entry->segment;
	pv_disk_t		* pv		= pv_entry->pv;

	LOG_ENTRY;

	// Don't have re-discovery in the engine, so each PV will only be
	// discovered once, so PE map should never be re-read.

	if ( READ(segment, bytes_to_sectors(pv->pe_on_disk.base),
			pv_entry->pe_map_sectors, pv_entry->pe_map) ) {
		LOG_SERIOUS("Error reading PE map from object %s\n", segment->name);
		RETURN(EIO);
	}

	lvm_endian_convert_pe_map(pv_entry);

	RETURN(0);
}


/* Function: lvm_write_pe_map
 *
 *	Write the PE map to the specified PV.
 */
int lvm_write_pe_map( lvm_physical_volume_t * pv_entry )
{
	storage_object_t	* segment	= pv_entry->segment;
	pv_disk_t		* pv		= pv_entry->pv;

	LOG_ENTRY;

	lvm_endian_convert_pe_map(pv_entry);

	if ( WRITE(segment, bytes_to_sectors(pv->pe_on_disk.base),
			pv_entry->pe_map_sectors, pv_entry->pe_map) ) {
		LOG_SERIOUS("Error writing PE map to object %s\n", segment->name);
		RETURN(EIO);
	}

	lvm_endian_convert_pe_map(pv_entry);

	RETURN(0);
}


/* Function: lvm_erase_group_metadata
 *
 *	This function erases all of the group-specific metadata from the
 *	specified PV. This MUST be called BEFORE the PV is removed from
 *	the group, or the metadata pointers will have been erased.
 */
int lvm_erase_group_metadata( lvm_physical_volume_t * pv_entry )
{
	storage_object_t	* segment	= pv_entry->segment;
	pv_disk_t		* pv		= pv_entry->pv;

	LOG_ENTRY;

	// Erase the PE map, the LV structures, the UUID list,
	// and the VG structure.
	KILL_SECTORS(segment, bytes_to_sectors(pv->pe_on_disk.base), pv_entry->pe_map_sectors);
	KILL_SECTORS(segment, bytes_to_sectors(pv->lv_on_disk.base), bytes_to_sectors(pv->lv_on_disk.size));
	KILL_SECTORS(segment, bytes_to_sectors(pv->pv_uuidlist_on_disk.base), bytes_to_sectors(pv->pv_uuidlist_on_disk.size));
	KILL_SECTORS(segment, bytes_to_sectors(pv->vg_on_disk.base), bytes_to_sectors(pv->vg_on_disk.size));

	RETURN(0);
}


/* Function: lvm_zero_first_1k
 *
 *	Clear the first logical 1k of the specified volume. This ignores some
 *	of the checks in lvm_write. For instance, when we create a new snapshot
 *	volume, we actually do want this write to proceed. Since this is always
 *	the first 1k of the volume, there will never be the potential for
 *	crossing a PE or stripe boundary.
 *
 *	Changing this to do a KILL_SECTORS instead of a WRITE, since this is
 *	now called at creation time instead of commit time.
 */
int lvm_zero_first_1k( lvm_logical_volume_t * volume )
{
	lvm_physical_volume_t	* pv_entry = NULL;
	lsn_t			new_lsn = 0;
	sector_count_t		new_count = 0;
	int			rc = 0;

	LOG_ENTRY;

	lvm_remap_sector(volume, 0, 2, &new_lsn, &new_count, &pv_entry);

	if ( !pv_entry || !new_lsn ) {
		rc = EIO;
	}
	else if ( (rc = KILL_SECTORS(pv_entry->segment, new_lsn, new_count)) ) {
		LOG_SERIOUS("Error clearing first 1k of region %s\n", volume->region->name);
	}

	RETURN(rc);
}


/* Function: lvm_remap_sector
 *
 *	This function is used by Read and Write to remap volume-relative LBA to
 *	PV-relative LBA.
 */
int lvm_remap_sector(	lvm_logical_volume_t	* volume,
			lsn_t			org_sector,
			sector_count_t		org_size,
			lsn_t			* new_sector,
			sector_count_t		* new_size,
			lvm_physical_volume_t	** pv_entry )
{
	lv_disk_t	* lv = volume->lv;
	vg_disk_t	* vg = volume->group->vg;
	u_int32_t	sectors_per_column;
	u_int32_t	column;
	u_int32_t	sector_in_column;
	u_int32_t	stripe_in_column;
	u_int32_t	le_in_column;
	u_int32_t	stripe_in_le;
	u_int32_t	offset_in_stripe;
	u_int32_t	offset_in_le;
	u_int32_t	le;
	u_int32_t	columns;

	LOG_ENTRY;

	*new_size	= org_size;

	// Check if the volume is striped. If the request crosses
	// a stripe boundary, reset the new_size appropriately.
	if ( lv->lv_stripes > 1 ) {
       		sectors_per_column	= lv->lv_stripes * vg->pe_size;
       		column			= org_sector / sectors_per_column;
       		sector_in_column	= org_sector % sectors_per_column;
       		stripe_in_column	= sector_in_column / lv->lv_stripesize;
       		le_in_column		= stripe_in_column % lv->lv_stripes;
       		columns			= lv->lv_allocated_le / lv->lv_stripes;
       		le			= column + (columns * le_in_column);

       		offset_in_stripe	= org_sector % lv->lv_stripesize;
       		stripe_in_le		= stripe_in_column / lv->lv_stripes;
       		offset_in_le		= offset_in_stripe + stripe_in_le * lv->lv_stripesize;

		if ( offset_in_stripe + org_size > lv->lv_stripesize ) {
			*new_size = lv->lv_stripesize - offset_in_stripe;
		}
	}
	// Non-striped volume. Just find the LE and offset. If the request
	// crosses a PE boundary, reset the new_size appropriately.
	else {
       		le		= org_sector / vg->pe_size;
       		offset_in_le	= org_sector % vg->pe_size;

		if ( offset_in_le + org_size > vg->pe_size ) {
			*new_size = vg->pe_size - offset_in_le;
		}
	}

	*new_sector	= volume->le_map[le].pe_sector_offset + offset_in_le;
	*pv_entry	= volume->le_map[le].owning_pv;

	RETURN(0);
}


