HEX
Server: LiteSpeed
System: Linux CentOS-79-64-minimal 3.10.0-1160.119.1.el7.x86_64 #1 SMP Tue Jun 4 14:43:51 UTC 2024 x86_64
User: vishn3436 (5293)
PHP: 8.0.15
Disabled: NONE
Upload Files
File: //usr/src/dattobd-0.11.2/tracer.c
// SPDX-License-Identifier: GPL-2.0-only

/*
 * Copyright (C) 2022 Datto Inc.
 */

#include "tracer.h"

#include "bio_request_callback.h"
#include "blkdev.h"
#include "callback_refs.h"
#include "cow_manager.h"
#include "filesystem.h"
#include "hints.h"
#include "logging.h"
#include "module_control.h"
#include "module_threads.h"
#include "mrf.h"
#include "snap_device.h"
#include "snap_ops.h"
#include "submit_bio.h"
#include "task_helper.h"
#include "tracer_helper.h"
#include "tracing_params.h"
#include <linux/blk-mq.h>
#ifdef HAVE_BLK_ALLOC_QUEUE
//#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0)
#include <linux/percpu-refcount.h>
#endif

#if !defined(HAVE_BDEV_STACK_LIMITS) && !defined(HAVE_BLK_SET_DEFAULT_LIMITS)
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)

#ifndef min_not_zero
#define min_not_zero(l, r) ((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r))
#endif

static int blk_stack_limits(struct request_queue *t, struct request_queue *b,
                            sector_t offset)
{
        t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
        t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
        t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
        t->seg_boundary_mask =
                min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask);
        t->max_phys_segments =
                min_not_zero(t->max_phys_segments, b->max_phys_segments);
        t->max_hw_segments =
                min_not_zero(t->max_hw_segments, b->max_hw_segments);
        t->max_segment_size =
                min_not_zero(t->max_segment_size, b->max_segment_size);
        return 0;
}

static int dattobd_bdev_stack_limits(struct request_queue *t,
                                     struct block_device *bdev, sector_t start)
{
        struct request_queue *bq = bdev_get_queue(bdev);
        start += get_start_sect(bdev);
        return blk_stack_limits(t, bq, start << 9);
}

#elif !defined(HAVE_BDEV_STACK_LIMITS)
//#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)

static int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
                             sector_t start)
{
        struct request_queue *bq = bdev_get_queue(bdev);
        start += get_start_sect(bdev);
        return blk_stack_limits(t, &bq->limits, start << 9);
}

#define dattobd_bdev_stack_limits(queue, bdev, start)                          \
        bdev_stack_limits(&(queue)->limits, bdev, start)
#else
#define dattobd_bdev_stack_limits(queue, bdev, start)                          \
        bdev_stack_limits(&(queue)->limits, bdev, start)
#endif // # !HAVE_BDEV_STACK_LIMITS) && !HAVE_BLK_SET_DEFAULT_LIMITS

// Helpers to get/set either the make_request_fn or the submit_bio function 
// pointers in a block device.
static inline BIO_REQUEST_CALLBACK_FN* dattobd_get_bd_fn(
    struct block_device *bdev)
{
#ifdef USE_BDOPS_SUBMIT_BIO
    return bdev->bd_disk->fops->submit_bio;
#else
    return bdev->bd_disk->queue->make_request_fn;
#endif
}

#ifndef HAVE_BLK_SET_DEFAULT_LIMITS
#define blk_set_default_limits(ql)
#endif

#define __tracer_setup_cow_new(dev, bdev, cow_path, size, fallocated_space,    \
                               cache_size, uuid, seqid)                        \
        __tracer_setup_cow(dev, bdev, cow_path, size, fallocated_space,        \
                           cache_size, uuid, seqid, 0)
#define __tracer_setup_cow_reload_snap(dev, bdev, cow_path, size, cache_size)  \
        __tracer_setup_cow(dev, bdev, cow_path, size, 0, cache_size, NULL, 0, 1)
#define __tracer_setup_cow_reload_inc(dev, bdev, cow_path, size, cache_size)   \
        __tracer_setup_cow(dev, bdev, cow_path, size, 0, cache_size, NULL, 0, 2)
#define __tracer_setup_cow_reopen(dev, bdev, cow_path)                         \
        __tracer_setup_cow(dev, bdev, cow_path, 0, 0, 0, NULL, 0, 3)

#define __tracer_destroy_cow_free(dev) __tracer_destroy_cow(dev, 0)
#define __tracer_destroy_cow_sync_and_free(dev) __tracer_destroy_cow(dev, 1)
#define __tracer_destroy_cow_sync_and_close(dev) __tracer_destroy_cow(dev, 2)

#define __tracer_setup_inc_cow_thread(dev, minor)                              \
        __tracer_setup_cow_thread(dev, minor, 0)
#define __tracer_setup_snap_cow_thread(dev, minor)                             \
        __tracer_setup_cow_thread(dev, minor, 1)
#ifndef HAVE_BLK_SET_STACKING_LIMITS
#define blk_set_stacking_limits(ql) blk_set_default_limits(ql)
#endif

#ifdef HAVE_BIOSET_NEED_BVECS_FLAG
#define dattobd_bioset_create(bio_size, bvec_size, scale)                      \
        bioset_create(bio_size, bvec_size, BIOSET_NEED_BVECS)
#elif defined HAVE_BIOSET_CREATE_3
#define dattobd_bioset_create(bio_size, bvec_size, scale)                      \
        bioset_create(bio_size, bvec_size, scale)
#else
#define dattobd_bioset_create(bio_size, bvec_size, scale)                      \
        bioset_create(bio_size, scale)
#endif

#define tracer_setup_unverified_inc(dev, minor, bdev_path, cow_path,           \
                                    cache_size)                                \
        __tracer_setup_unverified(dev, minor, bdev_path, cow_path, cache_size, \
                                  0)
#define tracer_setup_unverified_snap(dev, minor, bdev_path, cow_path,          \
                                     cache_size)                               \
        __tracer_setup_unverified(dev, minor, bdev_path, cow_path, cache_size, \
                                  1)

#define ROUND_UP(x, chunk) ((((x) + (chunk)-1) / (chunk)) * (chunk))
#define ROUND_DOWN(x, chunk) (((x) / (chunk)) * (chunk))

// macros for defining sector and block sizes
#define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE)
#define BLOCK_TO_SECTOR(block) ((block)*SECTORS_PER_BLOCK)

void dattobd_free_request_tracking_ptr(struct snap_device *dev)
{
    dev->sd_orig_request_fn = NULL;
}
 
/**
 * snap_trace_bio() - Traces a bio when snapshotting.  For bio reads there is
 * nothing to do and the request is passed to the original driver.  For writes
 * the original data must be read and in the event that the original bio
 * cannot be read in a single try multiple attempts are made by creating
 * additional bio requests until the original bio is fully processed.
 *
 * @dev: The &struct snap_device that keeps device state.
 * @bio: The &struct bio which describes the I/O.
 *
 * Return:
 * * 0 - success
 * * !0 - error
 */
static int snap_trace_bio(struct snap_device *dev, struct bio *bio)
{
        int ret;
        struct bio *new_bio = NULL;
        struct tracing_params *tp = NULL;
        sector_t start_sect, end_sect;
        unsigned int bytes, pages;

        // if we don't need to cow this bio just call the real mrf normally
        if (!bio_needs_cow(bio, dev->sd_cow_inode))
                goto call_orig;

        // the cow manager works in 4096 byte blocks, so read clones must also
        // be 4096 byte aligned
        start_sect = ROUND_DOWN(bio_sector(bio) - dev->sd_sect_off,
                        SECTORS_PER_BLOCK) +
                dev->sd_sect_off;
        end_sect = ROUND_UP(bio_sector(bio) + (bio_size(bio) / SECTOR_SIZE) -
                        dev->sd_sect_off,
                        SECTORS_PER_BLOCK) +
                dev->sd_sect_off;
        pages = (end_sect - start_sect) / SECTORS_PER_PAGE;

        // allocate tracing_params struct to hold all pointers we will need
        // across contexts
        ret = tp_alloc(dev, bio, &tp);
        if (ret)
        {
                LOG_ERROR(ret, "error tracing bio for snapshot");
                tracer_set_fail_state(dev, ret);
                goto call_orig;
        }

        while (1) {
                // allocate and populate read bio clone. This bio may not have all the
                // pages we need due to queue restrictions
                ret = bio_make_read_clone(dev_bioset(dev), tp, bio, start_sect, pages,
                                        &new_bio, &bytes);
                if (ret)
                        goto error;

                // set pointers for read clone
                ret = tp_add(tp, new_bio);
                if (ret)
                        goto error;

                atomic64_inc(&dev->sd_submitted_cnt);
                smp_wmb();

                dattobd_submit_bio(new_bio);

                // if our bio didn't cover the entire clone we must keep creating bios
                // until we have
                if (bytes / PAGE_SIZE < pages) {
                        start_sect += bytes / SECTOR_SIZE;
                        pages -= bytes / PAGE_SIZE;
                        continue;
                }
                
                break;
        }
        
        // drop our reference to the tp
        tp_put(tp);

        return 0;

error:
        LOG_ERROR(ret, "error tracing bio for snapshot");
        tracer_set_fail_state(dev, ret);

        // clean up the bio we allocated (but did not submit)
        if (new_bio)
                bio_free_clone(new_bio);

        if (tp)
                tp_put(tp);
        
        return 0;

call_orig:
        return SUBMIT_BIO_REAL(dev, bio);
}

/**
 * inc_make_sset() - This allocates a recordkeeping object to remember the
 * changed passed in the call.  This object is then queued for processing
 * by a kernel thread.
 *
 * @dev: the &struct snap_device used to compute the relative sector offset.
 * @sect: the absolute sector offset of the first changed sector
 * @len: the length of the changes
 *
 * Return:
 * * 0 - success
 * * !0 - an errno indicating the error
 */
static int inc_make_sset(struct snap_device *dev, sector_t sect,
                         unsigned int len)
{
        struct sector_set *sset;

        // allocate sector set to hold record of change sectors
        sset = kmalloc(sizeof(struct sector_set), GFP_NOIO);
        if (!sset) {
                LOG_ERROR(-ENOMEM, "error allocating sector set");
                return -ENOMEM;
        }

        sset->sect = sect - dev->sd_sect_off;
        sset->len = len;

        // queue sset for processing by kernel thread
        sset_queue_add(&dev->sd_pending_ssets, sset);

        return 0;
}

/**
 * inc_trace_bio() - Determines the regions modified by the @bio and
 * queues their affected regions so that a record of what changed can be
 * kept.  The bio is then processed by the original io submit function
 * (make_request_fn or submit_bio function ptr) so that the
 * modification can be made permanent.  This mode of tracing only
 * records what has changed and does not COW data.
 *
 * @dev: The &struct snap_device that keeps device state.
 * @bio: The &struct bio which describes the I/O.
 *
 * Return:
 * * 0 - success
 * * !0 - an errno indicating the error
 */
static int inc_trace_bio(struct snap_device *dev, struct bio *bio)
{
        int ret = 0, is_initialized = 0;
        sector_t start_sect = 0, end_sect = bio_sector(bio);
        bio_iter_t iter;
        bio_iter_bvec_t bvec;

#ifdef HAVE_ENUM_REQ_OPF
        //#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)
        if (bio_op(bio) == REQ_OP_WRITE_ZEROES) {
                ret = inc_make_sset(dev, bio_sector(bio),
                                    bio_size(bio) / SECTOR_SIZE);
                goto out;
        }
#endif

        bio_for_each_segment (bvec, bio, iter) {
                if (page_get_inode(bio_iter_page(bio, iter)) !=
                    dev->sd_cow_inode) {
                        if (!is_initialized) {
                                is_initialized = 1;
                                start_sect = end_sect;
                        }
                } else {
                        if (is_initialized && end_sect - start_sect > 0) {
                                ret = inc_make_sset(dev, start_sect,
                                                    end_sect - start_sect);
                                if (ret)
                                        goto out;
                        }
                        is_initialized = 0;
                }
                end_sect += (bio_iter_len(bio, iter) >> 9);
        }

        if (is_initialized && end_sect - start_sect > 0) {
                ret = inc_make_sset(dev, start_sect, end_sect - start_sect);
                if (ret)
                        goto out;
        }

out:
        if (ret) {
                LOG_ERROR(ret, "error tracing bio for incremental");
                tracer_set_fail_state(dev, ret);
                ret = 0;
        }

        // call the original mrf
        ret = SUBMIT_BIO_REAL(dev, bio);

        return ret;
}

/**
 * bdev_is_already_traced() - Checks to for the existance of the
 * &struct block_device in this driver's tracking state.
 *
 * @bdev: The &struct block_device in question.
 *
 * Return:
 * * 0 - not being traced
 * * 1 - already being traced
 */
static int bdev_is_already_traced(const struct block_device *bdev)
{
        int i;
        struct snap_device *dev;

        tracer_for_each(dev, i)
        {
                if (!dev || test_bit(UNVERIFIED, &dev->sd_state))
                        continue;
                if (dev->sd_base_dev == bdev)
                        return 1;
        }

        return 0;
}

/**
 * file_is_on_bdev() - Checks to see if the &struct file object is contained
 * within the &struct block_device device.
 *
 * @file: the file to check
 * @bdev: the &struct block_device that might hold the @file.
 *
 * Return:
 * * 0 - the @file is not on the @bdev.
 * * !0 - the @file is on the @bdev.
 */
static int file_is_on_bdev(const struct file *file, struct block_device *bdev)
{
        struct super_block *sb = dattobd_get_super(bdev);
        int ret = 0;
        if (sb) {
                ret = ((dattobd_get_mnt(file))->mnt_sb == sb);
                dattobd_drop_super(sb);
        }
        return ret;
}

/**
 * minor_range_recalculate() - Updates the device minors tracked by this
 * driver.  This must be done whenever a minor number is no longer in use.
 */
static void minor_range_recalculate(void)
{
        unsigned int i, highest = 0, lowest = dattobd_max_snap_devices - 1;
        struct snap_device *dev;

        tracer_for_each_full(dev, i)
        {
                if (!dev)
                        continue;

                if (i < lowest)
                        lowest = i;
                if (i > highest)
                        highest = i;
        }

        lowest_minor = lowest;
        highest_minor = highest;
}

/**
 * minor_range_include() - Used to possibly expand the bounds kept to track
 * the minimum and maximum device minor numbers services by this driver.
 *
 * @minor: the device's minor number
 */
static void minor_range_include(unsigned int minor)
{
        if (minor < lowest_minor)
                lowest_minor = minor;
        if (minor > highest_minor)
                highest_minor = minor;
}

/**
 * __tracer_init() - initializes the &struct snap_device object.
 *
 * @dev: the &struct snap_device used to track changes to a snapshot device.
 */
static void __tracer_init(struct snap_device *dev)
{
        LOG_DEBUG("initializing tracer");
        atomic_set(&dev->sd_fail_code, 0);
        atomic_set(&dev->sd_active, 0);
        bio_queue_init(&dev->sd_cow_bios);
        bio_queue_init(&dev->sd_orig_bios);
        sset_queue_init(&dev->sd_pending_ssets);
}

/**
 * tracer_alloc() - Allocates and initializes the &struct snap_device object
 * used to track changes to the newly created snapshot device.
 *
 * @dev_ptr: resultant &struct snap_device allocated by this call.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
int tracer_alloc(struct snap_device **dev_ptr)
{
        int ret;
        struct snap_device *dev;

        // allocate struct
        LOG_DEBUG("allocating device struct");
        dev = kzalloc(sizeof(struct snap_device), GFP_KERNEL);
        if (!dev) {
                ret = -ENOMEM;
                LOG_ERROR(ret, "error allocating memory for device struct");
                goto error;
        }

        __tracer_init(dev);

        *dev_ptr = dev;
        return 0;

error:
        LOG_ERROR(ret, "error allocating device struct");
        if (dev)
                kfree(dev);

        *dev_ptr = NULL;
        return ret;
}

/**
 * __tracer_destroy_cow() - Tears down COW tracking state, deallocating the
 * &struct cow_manager object in the process.
 *
 * @dev: The &struct snap_device that keeps snapshot device state.
 * @close_method: The close method.
 *                * 0: frees memory and unlinks the backing file.
 *                * 1: flushes section cache, closes COW file, deallocates
 *                     &struct cow_manager.
 *                * 2: flushes section cache, closes COW file.
 *                * other: undefined.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_destroy_cow(struct snap_device *dev, int close_method)
{
        int ret = 0;

        dev->sd_cow_inode = NULL;
        dev->sd_falloc_size = 0;
        dev->sd_cache_size = 0;

        if (dev->sd_cow) {
                LOG_DEBUG("destroying cow manager");

                if (close_method == 0) {
                        cow_free(dev->sd_cow);
                        dev->sd_cow = NULL;
                } else if (close_method == 1) {
                        ret = cow_sync_and_free(dev->sd_cow);
                        dev->sd_cow = NULL;
                } else if (close_method == 2) {
                        ret = cow_sync_and_close(dev->sd_cow);
                        task_work_flush();
                }
        }

        return ret;
}

/**
 * __tracer_setup_cow() - Sets up the COW tracking structures.
 *
 * @dev: The &struct snap_device that keeps snapshot device state.
 * @bdev: The &struct block_device that stores the COW data.
 * @cow_path: The path to the COW backing file.
 * @size: The number of sectors to allocate to the COW file.
 * @fallocated_space: A value of zero defaults the size.
 * @cache_size: Limits the size of the COW section cache (in bytes).
 * @uuid: A unique UUID assigned to a series of snapshots, or NULL to
 *        auto-generate a UUID.
 * @seqid: The current sequence ID to use in the header.
 * @open_method: The open method.
 *               The value of open_method determines how the
 *               &struct cow_manager and its cache will be handled.
 *               * 0: creates and initializes a new COW file.
 *               * 3: opens an existing COW file.
 *               * other: reloads the COW manager but not the cache.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_setup_cow(struct snap_device *dev,
                              struct block_device *bdev, const char *cow_path,
                              sector_t size, unsigned long fallocated_space,
                              unsigned long cache_size, const uint8_t *uuid,
                              uint64_t seqid, int open_method)
{
        int ret;
        uint64_t max_file_size;
        char bdev_name[BDEVNAME_SIZE];

        bdevname(bdev, bdev_name);

        if (open_method == 3) {
                // reopen the cow manager
                LOG_DEBUG("reopening the cow manager with file '%s'", cow_path);
                ret = cow_reopen(dev->sd_cow, cow_path);
                if (ret)
                        goto error;
        } else {
                if (!cache_size)
                        dev->sd_cache_size = dattobd_cow_max_memory_default;
                else
                        dev->sd_cache_size = cache_size;

                if (open_method == 0) {
                        // calculate how much space should be allocated to the
                        // cow file
                        if (!fallocated_space) {
                                max_file_size =
                                        size * SECTOR_SIZE *
                                        dattobd_cow_fallocate_percentage_default;
                                do_div(max_file_size, 100);
                                dev->sd_falloc_size = max_file_size;
                                do_div(dev->sd_falloc_size, (1024 * 1024));
                        } else {
                                max_file_size =
                                        fallocated_space * (1024 * 1024);
                                dev->sd_falloc_size = fallocated_space;
                        }

                        // create and open the cow manager
                        LOG_DEBUG("creating cow manager");
                        ret = cow_init(cow_path, SECTOR_TO_BLOCK(size),
                                       COW_SECTION_SIZE, dev->sd_cache_size,
                                       max_file_size, uuid, seqid,
                                       &dev->sd_cow);
                        if (ret)
                                goto error;
                } else {
                        // reload the cow manager
                        LOG_DEBUG("reloading cow manager");
                        ret = cow_reload(cow_path, SECTOR_TO_BLOCK(size),
                                         COW_SECTION_SIZE, dev->sd_cache_size,
                                         (open_method == 2), &dev->sd_cow);
                        if (ret)
                                goto error;

                        dev->sd_falloc_size = dev->sd_cow->file_max;
                        do_div(dev->sd_falloc_size, (1024 * 1024));
                }
        }

        // verify that file is on block device
        if (!file_is_on_bdev(dev->sd_cow->filp, bdev)) {
                ret = -EINVAL;
                LOG_ERROR(ret, "'%s' is not on '%s'", cow_path, bdev_name);
                goto error;
        }

        // find the cow file's inode number
        LOG_DEBUG("finding cow file inode");
        dev->sd_cow_inode = dattobd_get_dentry(dev->sd_cow->filp)->d_inode;

        return 0;

error:
        LOG_ERROR(ret, "error setting up cow manager");
        if (open_method != 3)
                __tracer_destroy_cow_free(dev);
        return ret;
}

/**
 * __tracer_destroy_base_dev() - Tears down the base block device.
 *
 * @dev: The &struct snap_device object pointer.
 */
static void __tracer_destroy_base_dev(struct snap_device *dev)
{
        dev->sd_size = 0;
        dev->sd_sect_off = 0;

        if (dev->sd_bdev_path) {
                LOG_DEBUG("freeing base block device path");
                kfree(dev->sd_bdev_path);
                dev->sd_bdev_path = NULL;
        }

        if (dev->sd_base_dev) {
                LOG_DEBUG("freeing base block device");
                dattobd_blkdev_put(dev->sd_base_dev);
                dev->sd_base_dev = NULL;
        }
}

/**
 * __tracer_setup_base_dev() - Sets up the base block device.
 *
 * @dev: The &struct snap_device object pointer.
 * @bdev_path: The block device path, e.g., '/dev/loop0'.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_setup_base_dev(struct snap_device *dev,
                                   const char *bdev_path)
{
        int ret;

        // open the base block device
        LOG_DEBUG("finding block device");
        dev->sd_base_dev = blkdev_get_by_path(bdev_path, FMODE_READ, NULL);
        if (IS_ERR(dev->sd_base_dev)) {
                ret = PTR_ERR(dev->sd_base_dev);
                dev->sd_base_dev = NULL;
                LOG_ERROR(ret, "error finding block device '%s'", bdev_path);
                goto error;
        } else if (!dev->sd_base_dev->bd_disk) {
                ret = -EFAULT;
                LOG_ERROR(ret, "error finding block device gendisk");
                goto error;
        }

        // check block device is not already being traced
        LOG_DEBUG("checking block device is not already being traced");
        if (bdev_is_already_traced(dev->sd_base_dev)) {
                ret = -EINVAL;
                LOG_ERROR(ret, "block device is already being traced");
                goto error;
        }

        // fetch the absolute pathname for the base device
        LOG_DEBUG("fetching the absolute pathname for the base device");
        ret = pathname_to_absolute(bdev_path, &dev->sd_bdev_path, NULL);
        if (ret)
                goto error;

        // check if device represents a partition, calculate size and offset
        LOG_DEBUG("calculating block device size and offset");
        if (bdev_whole(dev->sd_base_dev) != dev->sd_base_dev) {
                dev->sd_sect_off = get_start_sect(dev->sd_base_dev);
                dev->sd_size = dattobd_bdev_size(dev->sd_base_dev);
        } else {
                dev->sd_sect_off = 0;
                dev->sd_size = get_capacity(dev->sd_base_dev->bd_disk);
        }

        LOG_DEBUG("bdev size = %llu, offset = %llu",
                  (unsigned long long)dev->sd_size,
                  (unsigned long long)dev->sd_sect_off);

        return 0;

error:
        LOG_ERROR(ret, "error setting up base block device");
        __tracer_destroy_base_dev(dev);
        return ret;
}

/**
 * __tracer_copy_base_dev() - Copies base block device fields from @src
 *                            to @dest.
 *
 * @src: The &struct snap_device source object pointer.
 * @dest: The &struct snap_device destination object pointer.
 */
static void __tracer_copy_base_dev(const struct snap_device *src,
                                   struct snap_device *dest)
{
        dest->sd_size = src->sd_size;
        dest->sd_sect_off = src->sd_sect_off;
        dest->sd_base_dev = src->sd_base_dev;
        dest->sd_bdev_path = src->sd_bdev_path;
}

#ifdef HAVE_MERGE_BVEC_FN
#ifdef HAVE_BVEC_MERGE_DATA

/**
 * snap_merge_bvec() - Determines if it can augment an existing request with
 *                     more data.  Requests queues usually have fixed size
 *                     limits for their requests but specialized devices might
 *                     have varying limits.
 *
 * @q: The &struct request_queue object pointer.
 * @bvm: the &struct bvec_merge_data passed to the merge_bvec_fn() call.
 * @bvec: the &struct bio_vec passed to the merge_bvec_fn() call.
 *
 * Return: the result returned from the wrapped function.
 */
static int snap_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm,
                           struct bio_vec *bvec)
{
        struct snap_device *dev = q->queuedata;
        struct request_queue *base_queue = bdev_get_queue(dev->sd_base_dev);

        bvm->bi_bdev = dev->sd_base_dev;

        return base_queue->merge_bvec_fn(base_queue, bvm, bvec);
}

#else

/**
 * snap_merge_bvec() - Determines if it can augment an existing request with
 *                     more data.  Requests queues usually have fixed size
 *                     limits for their requests but specialized devices might
 *                     have varying limits.
 *
 * @q: The &struct request_queue object pointer.
 * @bio_bvm: the &struct bvec_merge_data passed to the merge_bvec_fn() call.
 * @bvec: the &struct bio_vec passed to the merge_bvec_fn() call.
 *
 * Return: the result returned from the wrapped function.
 */
static int snap_merge_bvec(struct request_queue *q, struct bio *bio_bvm,
                           struct bio_vec *bvec)
{
        struct snap_device *dev = q->queuedata;
        struct request_queue *base_queue = bdev_get_queue(dev->sd_base_dev);

        bio_bvm->bi_bdev = dev->sd_base_dev;

        return base_queue->merge_bvec_fn(base_queue, bio_bvm, bvec);
}
#endif
#endif

/**
 * __tracer_copy_cow() - Copies COW fields from @src to @dest.
 *
 * @src: The &struct snap_device source object pointer.
 * @dest: The &struct snap_device destination object pointer.
 */
static void __tracer_copy_cow(const struct snap_device *src,
                              struct snap_device *dest)
{
        dest->sd_cow = src->sd_cow;
        dest->sd_cow_inode = src->sd_cow_inode;
        dest->sd_cache_size = src->sd_cache_size;
        dest->sd_falloc_size = src->sd_falloc_size;
}

/**
 * __tracer_destroy_cow_path() - Tears down the COW path
 *
 * @dev: The &struct snap_device object pointer.
 */
static void __tracer_destroy_cow_path(struct snap_device *dev)
{
        if (dev->sd_cow_path) {
                LOG_DEBUG("freeing cow path");
                kfree(dev->sd_cow_path);
                dev->sd_cow_path = NULL;
        }
}

/**
 * __tracer_setup_cow_path() - Sets up the COW file path given a &struct file.
 *
 * @dev: The &struct snap_device object pointer.
 * @cow_file: The &struct file object pointer.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_setup_cow_path(struct snap_device *dev,
                                   const struct file *cow_file)
{
        int ret;

        // get the pathname of the cow file (relative to the mountpoint)
        LOG_DEBUG("getting relative pathname of cow file");
        ret = dentry_get_relative_pathname(dattobd_get_dentry(cow_file),
                                           &dev->sd_cow_path, NULL);
        if (ret)
                goto error;

        return 0;

error:
        LOG_ERROR(ret, "error setting up cow file path");
        __tracer_destroy_cow_path(dev);
        return ret;
}

/**
 * __tracer_setup_cow_path() - Sets up the COW file path given a &struct file.
 *
 * @dev: The &struct snap_device object pointer.
 * @cow_file: The &struct file object pointer.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static void __tracer_copy_cow_path(const struct snap_device *src,
                                   struct snap_device *dest)
{
        dest->sd_cow_path = src->sd_cow_path;
}

/**
 * __tracer_bioset_exit() - Releases the bioset within the &struct snap_device.
 *
 * @dev: The &struct snap_device object pointer.
 */
static void __tracer_bioset_exit(struct snap_device *dev)
{
#ifndef HAVE_BIOSET_INIT
        //#if LINUX_VERSION_CODE < KERNEL_VERSION(4,18,0)
        if (dev->sd_bioset) {
                LOG_DEBUG("freeing bio set");
                bioset_free(dev->sd_bioset);
                dev->sd_bioset = NULL;
        }
#else
        bioset_exit(&dev->sd_bioset);
#endif
}

/**
 * __tracer_destroy_snap() - Tears down a snap device.
 *
 * @dev: The &struct snap_device object pointer.
 */
static void __tracer_destroy_snap(struct snap_device *dev)
{
        if (dev->sd_mrf_thread) {
                LOG_DEBUG("stopping mrf thread");
                kthread_stop(dev->sd_mrf_thread);
                dev->sd_mrf_thread = NULL;
        }

        if (dev->sd_gd) {
                LOG_DEBUG("freeing gendisk");
#ifdef GENHD_FL_UP
                if (dev->sd_gd->flags & GENHD_FL_UP)
#else
                if (disk_live(dev->sd_gd))
#endif
                        del_gendisk(dev->sd_gd);
                put_disk(dev->sd_gd);
                dev->sd_gd = NULL;
        }

        if (dev->sd_queue) {
                LOG_DEBUG("freeing request queue");
                blk_cleanup_queue(dev->sd_queue);
                dev->sd_queue = NULL;
        }

        __tracer_bioset_exit(dev);
}

/**
 * __tracer_bioset_init() - Initializes the bioset field for the
 *                          &struct snap_device.
 *
 * @dev: The &struct snap_device object pointer.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_bioset_init(struct snap_device *dev)
{
#ifndef HAVE_BIOSET_INIT
        //#if LINUX_VERSION_CODE < KERNEL_VERSION(4,18,0)
        dev->sd_bioset = dattobd_bioset_create(BIO_SET_SIZE, BIO_SET_SIZE, 0);
        if (!dev->sd_bioset)
                return -ENOMEM;
        return 0;
#else
        return bioset_init(&dev->sd_bioset, BIO_SET_SIZE, BIO_SET_SIZE,
                           BIOSET_NEED_BVECS);
#endif
}

/**
 * __tracer_setup_snap() - Allocates &struct snap_device fields for use when
 *                         tracking an active snapshot.  Also sets up the
 *                         read-only disk used to present a snapshot image of
 *                         the underlying live volume and registers it with
 *                         the kernel.
 *
 * @dev: The &struct snap_device object pointer.
 * @minor: the device's minor number.
 * @bdev: The &struct block_device that stores the COW data.
 * @size: The number of sectors to allocate to the block device.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_setup_snap(struct snap_device *dev, unsigned int minor,
                               struct block_device *bdev, sector_t size)
{
        int ret;
        
        ret = __tracer_bioset_init(dev);
        if (ret) {
                LOG_ERROR(ret, "error initializing bio set");
                goto error;
        }

        // allocate a gendisk struct
        LOG_DEBUG("allocating gendisk");
#ifdef HAVE_BLK_ALLOC_DISK
        dev->sd_gd = blk_alloc_disk(NUMA_NO_NODE);
#else
        dev->sd_gd = alloc_disk(1);
#endif
        if (!dev->sd_gd) {
                ret = -ENOMEM;
                LOG_ERROR(ret, "error allocating gendisk");
                goto error;
        }

        // allocate request queue
        LOG_DEBUG("allocating queue"); 
#ifdef HAVE_BLK_ALLOC_QUEUE_1
        //#if LINUX_VERSION_CODE < KERNEL_VERSION(5,7,0)
        dev->sd_queue = blk_alloc_queue(GFP_KERNEL);
#elif defined HAVE_BLK_ALLOC_QUEUE_RH_2 // el8
        dev->sd_queue = blk_alloc_queue_rh(snap_mrf, NUMA_NO_NODE);
#elif defined HAVE_BLK_ALLOC_QUEUE_2
        dev->sd_queue = blk_alloc_queue(snap_mrf, NUMA_NO_NODE);
#elif !defined HAVE_BLK_ALLOC_DISK
        //#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,9,0)
        dev->sd_queue = blk_alloc_queue(NUMA_NO_NODE);
#else
        dev->sd_queue = dev->sd_gd->queue;
#endif

        if (!dev->sd_queue) {
                ret = -ENOMEM;
                LOG_ERROR(ret, "error allocating request queue");
                goto error;
        }

#if !defined HAVE_BLK_ALLOC_QUEUE && !defined USE_BDOPS_SUBMIT_BIO
        LOG_DEBUG("setting up make request function");
        blk_queue_make_request(dev->sd_queue, snap_mrf);
#endif

        // give our request queue the same properties as the base device's
        LOG_DEBUG("setting queue limits");
        blk_set_stacking_limits(&dev->sd_queue->limits);
        dattobd_bdev_stack_limits(dev->sd_queue, bdev, 0);

#ifdef HAVE_MERGE_BVEC_FN
        // use a thin wrapper around the base device's merge_bvec_fn
        if (bdev_get_queue(bdev)->merge_bvec_fn)
                blk_queue_merge_bvec(dev->sd_queue, snap_merge_bvec);
#endif

        // initialize gendisk and request queue values
        LOG_DEBUG("initializing gendisk");
        dev->sd_queue->queuedata = dev;
        dev->sd_gd->private_data = dev;
        dev->sd_gd->major = major;
        dev->sd_gd->first_minor = minor;
        dev->sd_gd->minors = 1;
        dev->sd_gd->fops = get_snap_ops();
        dev->sd_gd->queue = dev->sd_queue;

        // name our gendisk
        LOG_DEBUG("naming gendisk");
        snprintf(dev->sd_gd->disk_name, 32, SNAP_DEVICE_NAME, minor);

        // set the capacity of our gendisk
        LOG_DEBUG("block device size: %llu", (unsigned long long)size);
        set_capacity(dev->sd_gd, size);

#ifdef HAVE_GENHD_FL_NO_PART_SCAN
        //#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
        // disable partition scanning (the device should not have any
        // sub-partitions)
        dev->sd_gd->flags |= GENHD_FL_NO_PART_SCAN;
#endif

        // set the device as read-only
        set_disk_ro(dev->sd_gd, 1);

        // register gendisk with the kernel
        LOG_DEBUG("adding disk");
        add_disk(dev->sd_gd);

        LOG_DEBUG("starting mrf kernel thread");
        dev->sd_mrf_thread = kthread_run(snap_mrf_thread, dev,
                                         SNAP_MRF_THREAD_NAME_FMT, minor);
        if (IS_ERR(dev->sd_mrf_thread)) {
                ret = PTR_ERR(dev->sd_mrf_thread);
                dev->sd_mrf_thread = NULL;
                LOG_ERROR(ret, "error starting mrf kernel thread");
                goto error;
        }

        atomic64_set(&dev->sd_submitted_cnt, 0);
        atomic64_set(&dev->sd_received_cnt, 0);

        return 0;

error:
        LOG_ERROR(ret, "error setting up snapshot");
        __tracer_destroy_snap(dev);
        return ret;
}

/**
 * __tracer_bioset_exit() - Releases the bioset within the &struct snap_device.
 *
 * @dev: The &struct snap_device object pointer.
 */
static void __tracer_destroy_cow_thread(struct snap_device *dev)
{
        if (dev->sd_cow_thread) {
                LOG_DEBUG("stopping cow thread");
                kthread_stop(dev->sd_cow_thread);
                dev->sd_cow_thread = NULL;
        }
}

/**
 * __tracer_setup_cow_thread() - Creates a COW thread and associates it with
 *                               the &struct snap_device.
 *
 * @dev: The &struct snap_device object pointer.
 * @minor: the device's minor number.
 * @is_snap: snapshot or incremental.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_setup_cow_thread(struct snap_device *dev,
                                     unsigned int minor, int is_snap)
{
        int ret;

        LOG_DEBUG("creating kernel cow thread");
        if (is_snap)
                dev->sd_cow_thread = kthread_create(
                        snap_cow_thread, dev, SNAP_COW_THREAD_NAME_FMT, minor);
        else
                dev->sd_cow_thread = kthread_create(inc_sset_thread, dev,
                                                    INC_THREAD_NAME_FMT, minor);

        if (IS_ERR(dev->sd_cow_thread)) {
                ret = PTR_ERR(dev->sd_cow_thread);
                dev->sd_cow_thread = NULL;
                LOG_ERROR(ret, "error creating kernel thread");
                goto error;
        }

        return 0;

error:
        LOG_ERROR(ret, "error setting up cow thread");
        __tracer_destroy_cow_thread(dev);
        return ret;
}

#ifdef USE_BDOPS_SUBMIT_BIO
#ifdef HAVE_FTRACE_REGS
static void notrace ftrace_handler_submit_bio_noacct(unsigned long ip,
        unsigned long parent_ip,
        struct ftrace_ops *fops,
        struct ftrace_regs *fregs);
#else
static void notrace ftrace_handler_submit_bio_noacct(unsigned long ip,
        unsigned long parent_ip,
        struct ftrace_ops *fops,
        struct pt_regs *fregs);
#endif

unsigned char* funcname_submit_bio_noacct = "submit_bio_noacct";
struct ftrace_ops ops_submit_bio_noacct = {
	.func = ftrace_handler_submit_bio_noacct,
	.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT | FTRACE_OPS_FL_IPMODIFY
};
#endif

int tracer_registered = 0;
int register_tracer_filter(void)
{
        int ret = 0;
#ifdef USE_BDOPS_SUBMIT_BIO
        ret = ftrace_set_filter(
                &ops_submit_bio_noacct,
                funcname_submit_bio_noacct,
                strlen(funcname_submit_bio_noacct),
                0);
        if (ret)
        {
                return ret;
        }

        ret = register_ftrace_function(&ops_submit_bio_noacct);
#endif
        tracer_registered = 1;
        return ret;
}

int unregister_tracer_filter(void)
{
        if (tracer_registered) {
                tracer_registered = 0;
#ifdef USE_BDOPS_SUBMIT_BIO
                return unregister_ftrace_function(&ops_submit_bio_noacct);
#endif
        }
        return 0;
}

/**
 * __tracer_transition_tracing() - Starts or ends tracing on @bdev depending
 *                                 on whether @dev is defined.  The @bdev is
 *                                 frozen while transitioning and then thawed
 *                                 afterwards so that requests  can be
 *                                 reinstated on @bdev.
 *
 * @dev: The &struct snap_device object pointer.
 * @bdev: The &struct block_device that stores the COW data.
 * @new_bio_tracking_ptr: Optional function pointer to be used by the snapshot disk
 *         i/o handling, may be NULL to continue using the current function pointer.
 * @dev_ptr: Contains the output &struct snap_device when successful.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_transition_tracing(
    struct snap_device *dev,
    struct block_device *bdev,
    BIO_REQUEST_CALLBACK_FN *new_bio_tracking_ptr,
    struct snap_device **dev_ptr)
{
        int ret;
        struct super_block *origsb = dattobd_get_super(bdev);
        #ifdef HAVE_FREEZE_SB
        struct super_block *sb = NULL;
        #endif
        char bdev_name[BDEVNAME_SIZE];
        MAYBE_UNUSED(ret);

        bdevname(bdev, bdev_name);
        if(origsb){
                dattobd_drop_super(origsb);

                //freeze and sync block device
                LOG_DEBUG("freezing '%s'", bdev_name);
#ifdef HAVE_FREEZE_SB
//#if LINUX_VERSION_CODE < KERNEL_VERSION(5,11,0)
                sb = freeze_bdev(bdev);
                if(!sb){
                        LOG_ERROR(-EFAULT, "error freezing '%s': null",
                                  bdev_name);
                        return -EFAULT;
                }else if(IS_ERR(sb)){
                        LOG_ERROR((int)PTR_ERR(sb),
                                  "error freezing '%s': error", bdev_name);
                        return (int)PTR_ERR(sb);
                }
#else
                ret = freeze_bdev(bdev);
                if (ret) {
                        LOG_ERROR(ret, "error freezing '%s'", bdev_name);
                        return -ret;
                }
#endif
        }
        else {
                LOG_WARN(
                        "warning: no super found for device '%s', "
                        "unable to freeze it",
                        bdev_name);
        }
        smp_wmb();
        if(dev){
                LOG_DEBUG("starting tracing");
                *dev_ptr = dev;
                smp_wmb();
#ifndef USE_BDOPS_SUBMIT_BIO
                if(new_bio_tracking_ptr){
                        bdev->bd_disk->queue->make_request_fn = 
                                new_bio_tracking_ptr;
                }
#endif
                atomic_inc(&(*dev_ptr)->sd_active);
        }else{
                LOG_DEBUG("ending tracing");
                atomic_dec(&(*dev_ptr)->sd_active);
#ifndef USE_BDOPS_SUBMIT_BIO
                new_bio_tracking_ptr = mrf_put(bdev);
                if (new_bio_tracking_ptr){
                        bdev->bd_disk->queue->make_request_fn =
                                new_bio_tracking_ptr;
                }
#endif
                *dev_ptr = dev;
                smp_wmb();
        }
        if(origsb){
                // thaw the block device
                LOG_DEBUG("thawing '%s'", bdev_name);
#ifdef HAVE_THAW_BDEV_INT
                ret = thaw_bdev(bdev, sb);
#else
                ret = thaw_bdev(bdev);
#endif
                if(ret){
                        LOG_ERROR(ret, "error thawing '%s'", bdev_name);
                        // We can't reasonably undo what we've done at this 
                        // point, and we've replaced the mrf. pretend we 
                        // succeeded so we don't break the block device
                }
        }
        return 0;
}

/**     
 * tracing_fn() - This is the entry point for in-flight i/o we intercepted.
 * @q: The &struct request_queue.
 * @bio: The &struct bio which describes the I/O.
 *
 * If the BIO has been marked as passthrough then the block device's
 * original device's function pointer for handling i/o is used to process the BIO.
 * Otherwise, depending on whether we're in snapshot or incremental mode, the appropriate 
 * handler is called.
 *
 * Return: varies across versions of Linux and is what's expected by each for
 *         a make request function.
 */
#ifdef USE_BDOPS_SUBMIT_BIO
static asmlinkage MRF_RETURN_TYPE tracing_fn(struct bio *bio)
#else
static MRF_RETURN_TYPE tracing_fn(struct request_queue *q, struct bio *bio)
#endif
{
        int i, ret = 0;
        struct snap_device *dev = NULL;
        MAYBE_UNUSED(ret);

        smp_rmb();
        tracer_for_each(dev, i)
        {
                if (!tracer_is_bio_for_dev(dev, bio)) continue;
                // If we get here, then we know this is a device we're managing
                // and the current bio belongs to said device.
                if (dattobd_bio_op_flagged(bio, DATTOBD_PASSTHROUGH))
                {
                        dattobd_bio_op_clear_flag(bio, DATTOBD_PASSTHROUGH);
                }
                else
                {
                        if (tracer_should_trace_bio(dev, bio))
                        {
                                if (test_bit(SNAPSHOT, &dev->sd_state))
                                        ret = snap_trace_bio(dev, bio);
                                else
                                        ret = inc_trace_bio(dev, bio);
                                goto out;
                        }
                }

                // Now we can submit the bio.
                ret = SUBMIT_BIO_REAL(dev, bio);

                goto out;
                
        } // tracer_for_each(dev, i)

#ifdef USE_BDOPS_SUBMIT_BIO
        ret = SUBMIT_BIO_REAL(NULL, bio);
#endif

out:
        MRF_RETURN(ret);
}

#ifdef USE_BDOPS_SUBMIT_BIO
#ifdef HAVE_FTRACE_REGS
static void notrace ftrace_handler_submit_bio_noacct(unsigned long ip,
        unsigned long parent_ip,
        struct ftrace_ops *fops,
        struct ftrace_regs *fregs)
{
        ftrace_instruction_pointer_set(fregs, (unsigned long)tracing_fn);
}
#else
static void notrace ftrace_handler_submit_bio_noacct(unsigned long ip,
        unsigned long parent_ip,
        struct ftrace_ops *fops,
        struct pt_regs *fregs)
{
        fregs->ip = (unsigned long)tracing_fn;
}
#endif
#endif

#ifndef USE_BDOPS_SUBMIT_BIO

/**
 * dattobd_find_orig_mrf() - Locates the original MRF function associated with
 *                   the @bdev block device.  All tracked block devices
 *                   are checked until a match is found.
 *
 * @bdev: The &struct block_device that stores the COW data.
 * @mrf: The original MRF function, if found.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int dattobd_find_orig_mrf(struct block_device *bdev,
                                 make_request_fn **mrf){
        int i;
        struct snap_device *dev;
        struct request_queue *q = bdev_get_queue(bdev);
        make_request_fn *orig_mrf = dattobd_get_bd_mrf(bdev);

        if(orig_mrf != tracing_fn){
#ifdef HAVE_BLK_MQ_MAKE_REQUEST
                // Linux version 5.8
                if (!orig_mrf){
                        orig_mrf = dattobd_null_mrf;
                        LOG_DEBUG(
                            "original mrf is empty, set to dattobd_null_mrf");
                }
#endif
            *mrf = orig_mrf;
            return 0;
        }

        tracer_for_each(dev, i){
                if(!dev || test_bit(UNVERIFIED, &dev->sd_state)) continue;
                if(q == bdev_get_queue(dev->sd_base_dev)){
                        *mrf = dev->sd_orig_request_fn;
                        return 0;
                }
        }

        *mrf = NULL;
        return -EFAULT;
}

#endif

/**
 * __tracer_should_reset_mrf() - Searches the traced devices and verifies that
 * the device would have had a make_request_fn when tracing was initiated.
 * @dev: The &struct snap_device object pointer.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
static int __tracer_should_reset_mrf(const struct snap_device *dev)
{
    int i;
    struct snap_device *cur_dev;
    struct request_queue *q = bdev_get_queue(dev->sd_base_dev);

#ifndef USE_BDOPS_SUBMIT_BIO
    if (GET_BIO_REQUEST_TRACKING_PTR(dev->sd_base_dev) != tracing_fn) return 0;
#endif
    if (dev != snap_devices[dev->sd_minor]) return 0;

    //return 0 if there is another device tracing the same queue as dev.
    if (snap_devices){
        tracer_for_each(cur_dev, i){
            if (!cur_dev || test_bit(UNVERIFIED, &cur_dev->sd_state) 
                || cur_dev == dev) continue;
            if (q == bdev_get_queue(cur_dev->sd_base_dev)) return 0;
        }
    }

    return 1;
}

/**
 * __tracer_destroy_tracing() - Stops tracing of the &struct snap_device
 *                              and possibly reinstates the original MRF
 *                              function if necessary.
 *
 * @dev: The &struct snap_device object pointer.
 */
static void __tracer_destroy_tracing(struct snap_device *dev)
{
        if(dev->sd_orig_request_fn){
                LOG_DEBUG("replacing make_request_fn if needed");
                if(__tracer_should_reset_mrf(dev)){
                        __tracer_transition_tracing(
                            NULL,
                            dev->sd_base_dev,
                            dev->sd_orig_request_fn,
                            &snap_devices[dev->sd_minor]
                        );
                }
        else
        {
                __tracer_transition_tracing(
                        NULL,
                        dev->sd_base_dev,
                        NULL,
                        &snap_devices[dev->sd_minor]
                );
        }
        smp_wmb();
        dattobd_free_request_tracking_ptr(dev);

        }
        else if(snap_devices[dev->sd_minor] == dev)
        {
                smp_wmb();
                snap_devices[dev->sd_minor] = NULL;
                smp_wmb();
        }

        dev->sd_minor = 0;
        minor_range_recalculate();
}

/**
 * __tracer_setup_tracing_unverified() - Assigns @dev to the array of snap
 *                                       devices begin tracked by this
 *                                       driver.
 *
 * @dev: The &struct snap_device object pointer.
 * @minor: the device's minor number.
 */
static void __tracer_setup_tracing_unverified(struct snap_device *dev,
                                              unsigned int minor)
{
    minor_range_include(minor);
    smp_wmb();
    dev->sd_minor = minor;
    snap_devices[minor] = dev;
    smp_wmb();
}

/**
 * __tracer_setup_tracing() - Adds @minor to the range of included tracked
 *                            minors, saves the original io submit function ptr
 *                            and replaces it with the tracing_fn function ptr for 
 *                            the block device associated with our &struct snap_device.
 *
 * @dev: The &struct snap_device object pointer.
 * @minor: the device's minor number.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
int __tracer_setup_tracing(struct snap_device *dev, unsigned int minor)
{
        int ret = 0;

        dev->sd_minor = minor;
        minor_range_include(minor);

        // get the base block device's make_request_fn
        LOG_DEBUG("getting the base block device's make_request_fn");

#ifndef USE_BDOPS_SUBMIT_BIO
        ret = dattobd_find_orig_mrf(dev->sd_base_dev, &dev->sd_orig_request_fn);
        if (ret)
                goto error;
#endif
        ret = __tracer_transition_tracing(
                dev,
                dev->sd_base_dev,
                tracing_fn,
                &snap_devices[minor]);
        if (ret)
                goto error;
        return 0;

error:
        LOG_ERROR(ret, "error setting up tracing");
        dev->sd_minor = 0;
        dev->sd_orig_request_fn = NULL;
        minor_range_recalculate();
        return ret;
}

/**
 * __tracer_setup_unverified() - Sets up tracing for an unverified device.
 *
 * @dev: The &struct snap_device object pointer.
 * @minor: the device's minor number.
 * @bdev_path: The block device path, e.g., '/dev/loop0'.
 * @cow_path: The path to the COW backing file.
 * @cache_size: Limits the size of the COW section cache (in bytes).
 * @is_snap: snapshot or incremental.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
int __tracer_setup_unverified(struct snap_device *dev, unsigned int minor,
                              const char *bdev_path, const char *cow_path,
                              unsigned long cache_size, int is_snap)
{
        if (is_snap)
                set_bit(SNAPSHOT, &dev->sd_state);
        else
                clear_bit(SNAPSHOT, &dev->sd_state);
        clear_bit(ACTIVE, &dev->sd_state);
        set_bit(UNVERIFIED, &dev->sd_state);

        dev->sd_cache_size = cache_size;

        dev->sd_bdev_path = kstrdup(bdev_path, GFP_KERNEL);
        if (!dev->sd_bdev_path)
                goto error;

        dev->sd_cow_path = kstrdup(cow_path, GFP_KERNEL);
        if (!dev->sd_cow_path)
                goto error;

        // add the tracer to the array of devices
        __tracer_setup_tracing_unverified(dev, minor);

        return 0;

error:
        LOG_ERROR(-ENOMEM, "error setting up unverified tracer");
        tracer_destroy(dev);
        return -ENOMEM;
}

/************************SETUP / DESTROY FUNCTIONS************************/

/**
 * tracer_destroy() - Tears down tracing of the snap device freeing necessary
 *                    fields in the process.
 *
 * @dev: The &struct snap_device object pointer.
 */
void tracer_destroy(struct snap_device *dev)
{
        __tracer_destroy_tracing(dev);
        __tracer_destroy_cow_thread(dev);
        __tracer_destroy_snap(dev);
        __tracer_destroy_cow_path(dev);
        __tracer_destroy_cow_free(dev);
        __tracer_destroy_base_dev(dev);
}

/**
 * tracer_setup_active_snap() - Sets up for a snapshot.
 *
 * @dev: The &struct snap_device object pointer.
 * @minor: the device's minor number.
 * @bdev_path: The block device path, e.g., '/dev/loop0'.
 * @cow_path: The path to the COW backing file.
 * @fallocated_space: A value of zero defaults the size.
 * @cache_size: Limits the size of the COW section cache (in bytes).
 *
 * This call sets up the snapshot device, creates the COW file including the
 * data region, determines the COW path, sets up the COW thread, and finally
 * initiates tracing.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
int tracer_setup_active_snap(struct snap_device *dev, unsigned int minor,
                             const char *bdev_path, const char *cow_path,
                             unsigned long fallocated_space,
                             unsigned long cache_size)
{
        int ret;

        set_bit(SNAPSHOT, &dev->sd_state);
        set_bit(ACTIVE, &dev->sd_state);
        clear_bit(UNVERIFIED, &dev->sd_state);

        // setup base device
        ret = __tracer_setup_base_dev(dev, bdev_path);
        if (ret)
                goto error;

        // setup the cow manager
        ret = __tracer_setup_cow_new(dev, dev->sd_base_dev, cow_path,
                                     dev->sd_size, fallocated_space, cache_size,
                                     NULL, 1);
        if (ret)
                goto error;

        // setup the cow path
        ret = __tracer_setup_cow_path(dev, dev->sd_cow->filp);
        if (ret)
                goto error;

#ifndef USE_BDOPS_SUBMIT_BIO
        // retain an association between the original mrf and the block device
        ret = mrf_get(dev->sd_base_dev, GET_BIO_REQUEST_TRACKING_PTR(dev->sd_base_dev));
        if (ret)
                goto error;
#endif

        // setup the snapshot values
        ret = __tracer_setup_snap(dev, minor, dev->sd_base_dev, dev->sd_size);
        if (ret)
                goto error;

        // setup the cow thread and run it
        ret = __tracer_setup_snap_cow_thread(dev, minor);
        if (ret)
                goto error;

        wake_up_process(dev->sd_cow_thread);

        // inject the tracing function
        ret = __tracer_setup_tracing(dev, minor);
        if (ret)
                goto error;

        return 0;

error:
        LOG_ERROR(ret, "error setting up tracer as active snapshot");
        tracer_destroy(dev);
        return ret;
}

/************************IOCTL TRANSITION FUNCTIONS************************/

/**
 * tracer_active_snap_to_inc() - Transitions from snapshot mode to incremental
 * tracking.
 * @old_dev: The &struct snap_device being replaced by this call.
 *
 * Return:
 * * 0 - success
 * * !0 - errno indicating the error
 */
int tracer_active_snap_to_inc(struct snap_device *old_dev)
{
        int ret;
        struct snap_device *dev;
        char *abs_path = NULL;
        int abs_path_len;

        // allocate new tracer
        ret = tracer_alloc(&dev);
        if (ret)
                return ret;

        clear_bit(SNAPSHOT, &dev->sd_state);
        set_bit(ACTIVE, &dev->sd_state);
        clear_bit(UNVERIFIED, &dev->sd_state);

        // copy / set fields we need
        __tracer_copy_base_dev(old_dev, dev);
        __tracer_copy_cow_path(old_dev, dev);

        // copy cow manager to new device. Care must be taken to make sure it
        // isn't used by multiple threads at once.
        __tracer_copy_cow(old_dev, dev);

        // setup the cow thread
        ret = __tracer_setup_inc_cow_thread(dev, old_dev->sd_minor);
        if (ret)
                goto error;

        // inject the tracing function
        dev->sd_orig_request_fn = old_dev->sd_orig_request_fn;
        ret = __tracer_setup_tracing(dev, old_dev->sd_minor);
        if (ret)
                goto error;

        // Below this point, we are commited to the new device, so we must make
        // sure it is in a good state.

        // stop the old cow thread. Must be done before starting the new cow
        // thread to prevent concurrent access.
        __tracer_destroy_cow_thread(old_dev);

        // sanity check to ensure no errors have occurred while cleaning up the
        // old cow thread
        ret = tracer_read_fail_state(old_dev);
        if (ret) {
                LOG_ERROR(
                        ret,
                        "errors occurred while cleaning up cow thread, putting "
                        "incremental into error state");
                tracer_set_fail_state(dev, ret);

                // must make up the new thread regardless of errors so that any
                // queued ssets are cleaned up
                wake_up_process(dev->sd_cow_thread);

                // clean up the old device no matter what
                __tracer_destroy_snap(old_dev);
                kfree(old_dev);

                return ret;
        }

        // wake up new cow thread. Must happen regardless of errors syncing the
        // old cow thread in order to ensure no IO's are leaked.
        wake_up_process(dev->sd_cow_thread);

        // truncate the cow file
        ret = cow_truncate_to_index(dev->sd_cow);
        if (ret) {
                // not a critical error, we can just print a warning
                file_get_absolute_pathname(dev->sd_cow->filp, &abs_path,
                                           &abs_path_len);
                if (!abs_path) {
                        LOG_WARN("warning: cow file truncation failed, "
                                 "incremental will use more "
                                 "disk space than needed");
                } else {
                        LOG_WARN("warning: failed to truncate '%s', "
                                 "incremental will use more "
                                 "disk space than needed",
                                 abs_path);
                        kfree(abs_path);
                }
        }

        // destroy the unneeded fields of the old_dev and the old_dev itself
        __tracer_destroy_snap(old_dev);
        kfree(old_dev);

        return 0;

error:
        LOG_ERROR(ret, "error transitioning to incremental mode");
        __tracer_destroy_cow_thread(dev);
        kfree(dev);

        return ret;
}

/**
 * tracer_active_inc_to_snap() - Transitions from incremental mode to
 * snapshot mode.
 *
 * @old_dev: The &struct snap_device tracing in incremental mode.
 * @cow_path: The path to the COW backing file.
 * @fallocated_space: A value of zero carries over the current setting.
 *
 * Return:
 * * 0 - success
 * * !0 - error
 */
int tracer_active_inc_to_snap(struct snap_device *old_dev, const char *cow_path,
                              unsigned long fallocated_space)
{
        int ret;
        struct snap_device *dev;

        // allocate new tracer
        ret = tracer_alloc(&dev);
        if (ret)
                return ret;

        set_bit(SNAPSHOT, &dev->sd_state);
        set_bit(ACTIVE, &dev->sd_state);
        clear_bit(UNVERIFIED, &dev->sd_state);

        fallocated_space =
                (fallocated_space) ? fallocated_space : old_dev->sd_falloc_size;

        // copy / set fields we need
        __tracer_copy_base_dev(old_dev, dev);

        // setup the cow manager
        ret = __tracer_setup_cow_new(dev, dev->sd_base_dev, cow_path,
                                     dev->sd_size, fallocated_space,
                                     dev->sd_cache_size, old_dev->sd_cow->uuid,
                                     old_dev->sd_cow->seqid + 1);
        if (ret)
                goto error;

        // setup the cow path
        ret = __tracer_setup_cow_path(dev, dev->sd_cow->filp);
        if (ret)
                goto error;

        // setup the snapshot values
        ret = __tracer_setup_snap(dev, old_dev->sd_minor, dev->sd_base_dev,
                                  dev->sd_size);
        if (ret)
                goto error;

        // setup the cow thread
        ret = __tracer_setup_snap_cow_thread(dev, old_dev->sd_minor);
        if (ret)
                goto error;

        // start tracing (overwrites old_dev's tracing)
        dev->sd_orig_request_fn = old_dev->sd_orig_request_fn;
        ret = __tracer_setup_tracing(dev, old_dev->sd_minor);
        if (ret)
                goto error;

        // stop the old cow thread and start the new one
        __tracer_destroy_cow_thread(old_dev);
        wake_up_process(dev->sd_cow_thread);

        // destroy the unneeded fields of the old_dev and the old_dev itself
        __tracer_destroy_cow_path(old_dev);
        __tracer_destroy_cow_sync_and_free(old_dev);
        kfree(old_dev);

        return 0;

error:
        LOG_ERROR(ret, "error transitioning tracer to snapshot mode");
        __tracer_destroy_cow_thread(dev);
        __tracer_destroy_snap(dev);
        __tracer_destroy_cow_path(dev);
        __tracer_destroy_cow_free(dev);
        kfree(dev);

        return ret;
}

/**
 * tracer_reconfigure() - Reconfigures the cache size associated with @dev.
 *
 * @dev: The &struct snap_device object pointer.
 * @cache_size: Limits the size of the COW section cache (in bytes).
 */
void tracer_reconfigure(struct snap_device *dev, unsigned long cache_size)
{
        dev->sd_cache_size = cache_size;
        if (!cache_size)
                cache_size = dattobd_cow_max_memory_default;
        if (test_bit(ACTIVE, &dev->sd_state))
                cow_modify_cache_size(dev->sd_cow, cache_size);
}

/**
 * tracer_dattobd_info() - Copies relevant, current information in @dev to
 *                         @info.
 *
 * @dev: The source &struct snap_device tracking block device state.
 * @info: A destination &struct dattobd_info object pointer.
 */
void tracer_dattobd_info(const struct snap_device *dev,
                         struct dattobd_info *info)
{
        info->minor = dev->sd_minor;
        info->state = dev->sd_state;
        info->error = tracer_read_fail_state(dev);
        info->cache_size = (dev->sd_cache_size) ?
                                   dev->sd_cache_size :
                                   dattobd_cow_max_memory_default;
        strlcpy(info->cow, dev->sd_cow_path, PATH_MAX);
        strlcpy(info->bdev, dev->sd_bdev_path, PATH_MAX);

        if (!test_bit(UNVERIFIED, &dev->sd_state)) {
                info->falloc_size = dev->sd_cow->file_max;
                info->seqid = dev->sd_cow->seqid;
                memcpy(info->uuid, dev->sd_cow->uuid, COW_UUID_SIZE);
                info->version = dev->sd_cow->version;
                info->nr_changed_blocks = dev->sd_cow->nr_changed_blocks;
        } else {
                info->falloc_size = 0;
                info->seqid = 0;
                memset(info->uuid, 0, COW_UUID_SIZE);
        }
}

/************************AUTOMATIC TRANSITION FUNCTIONS************************/

/**
 * __tracer_active_to_dormant() - Transitions from ACTIVE to DORMANT.  This
 *                                happens, for example, when the underlying
 *                                block device becomes unwritable.
 *
 * @dev: The &struct snap_device object pointer.
 */
void __tracer_active_to_dormant(struct snap_device *dev)
{
        int ret;

        // stop the cow thread
        __tracer_destroy_cow_thread(dev);

        // close the cow manager
        ret = __tracer_destroy_cow_sync_and_close(dev);
        if (ret)
                goto error;

        // mark as dormant
        smp_wmb();
        clear_bit(ACTIVE, &dev->sd_state);

        return;

error:
        LOG_ERROR(ret, "error transitioning tracer to dormant state");
        tracer_set_fail_state(dev, ret);
}

/**
 * __tracer_unverified_snap_to_active() - Initiates tracking from a device
 *                                        that was unverified and makes it
 *                                        active.
 *
 * @dev: The &struct snap_device object pointer.
 * @user_mount_path: A userspace supplied path used to build the COW file path.
 */
void __tracer_unverified_snap_to_active(struct snap_device *dev,
                                        const char __user *user_mount_path)
{
        int ret;
        unsigned int minor = dev->sd_minor;
        char *cow_path, *bdev_path = dev->sd_bdev_path,
                        *rel_path = dev->sd_cow_path;
        unsigned long cache_size = dev->sd_cache_size;

        // remove tracing while we setup the struct
        __tracer_destroy_tracing(dev);

        // mark as active
        set_bit(ACTIVE, &dev->sd_state);
        clear_bit(UNVERIFIED, &dev->sd_state);

        dev->sd_bdev_path = NULL;
        dev->sd_cow_path = NULL;

        // setup base device
        ret = __tracer_setup_base_dev(dev, bdev_path);
        if (ret)
                goto error;

        // generate the full pathname
        ret = user_mount_pathname_concat(user_mount_path, rel_path, &cow_path);
        if (ret)
                goto error;

        // setup the cow manager
        ret = __tracer_setup_cow_reload_snap(dev, dev->sd_base_dev, cow_path,
                                             dev->sd_size, dev->sd_cache_size);
        if (ret)
                goto error;

        // setup the cow path
        ret = __tracer_setup_cow_path(dev, dev->sd_cow->filp);
        if (ret)
                goto error;

#ifndef USE_BDOPS_SUBMIT_BIO
        // retain an association between the original mrf and the block device
        ret = mrf_get(dev->sd_base_dev, GET_BIO_REQUEST_TRACKING_PTR(dev->sd_base_dev));
        if (ret)
                goto error;
#endif

        // setup the snapshot values
        ret = __tracer_setup_snap(dev, minor, dev->sd_base_dev, dev->sd_size);
        if (ret)
                goto error;

        // setup the cow thread and run it
        ret = __tracer_setup_snap_cow_thread(dev, minor);
        if (ret)
                goto error;

        wake_up_process(dev->sd_cow_thread);

        // inject the tracing function
        ret = __tracer_setup_tracing(dev, minor);
        if (ret)
                goto error;

        kfree(bdev_path);
        kfree(rel_path);
        kfree(cow_path);

        return;

error:
        LOG_ERROR(ret, "error transitioning snapshot tracer to active state");
        tracer_destroy(dev);
        tracer_setup_unverified_snap(dev, minor, bdev_path, rel_path,
                                     cache_size);
        tracer_set_fail_state(dev, ret);
        kfree(bdev_path);
        kfree(rel_path);
        if (cow_path)
                kfree(cow_path);
}

/**
 * __tracer_unverified_inc_to_active() - Moves from an unverified state to
 *                                       and active state.
 * @dev: The &struct snap_device object pointer.
 * @user_mount_path: A userspace supplied path used to build the COW file path.
 *
 * Tracing is configured for the block device after this call completes.
 */
void __tracer_unverified_inc_to_active(struct snap_device *dev,
                                       const char __user *user_mount_path)
{
        int ret;
        unsigned int minor = dev->sd_minor;
        char *cow_path, *bdev_path = dev->sd_bdev_path,
                        *rel_path = dev->sd_cow_path;
        unsigned long cache_size = dev->sd_cache_size;

        // remove tracing while we setup the struct
        __tracer_destroy_tracing(dev);

        // mark as active
        set_bit(ACTIVE, &dev->sd_state);
        clear_bit(UNVERIFIED, &dev->sd_state);

        dev->sd_bdev_path = NULL;
        dev->sd_cow_path = NULL;

        // setup base device
        ret = __tracer_setup_base_dev(dev, bdev_path);
        if (ret)
                goto error;

        // generate the full pathname
        ret = user_mount_pathname_concat(user_mount_path, rel_path, &cow_path);
        if (ret)
                goto error;

        // setup the cow manager
        ret = __tracer_setup_cow_reload_inc(dev, dev->sd_base_dev, cow_path,
                                            dev->sd_size, dev->sd_cache_size);
        if (ret)
                goto error;

        // setup the cow path
        ret = __tracer_setup_cow_path(dev, dev->sd_cow->filp);
        if (ret)
                goto error;

#ifndef USE_BDOPS_SUBMIT_BIO
        // retain an association between the original mrf and the block device
        ret = mrf_get(dev->sd_base_dev, GET_BIO_REQUEST_TRACKING_PTR(dev->sd_base_dev));
        if (ret)
                goto error;
#endif

        // setup the cow thread and run it
        ret = __tracer_setup_inc_cow_thread(dev, minor);
        if (ret)
                goto error;

        wake_up_process(dev->sd_cow_thread);

        // inject the tracing function
        ret = __tracer_setup_tracing(dev, minor);
        if (ret)
                goto error;

        kfree(bdev_path);
        kfree(rel_path);
        kfree(cow_path);

        return;

error:
        LOG_ERROR(ret, "error transitioning incremental to active state");
        tracer_destroy(dev);
        tracer_setup_unverified_inc(dev, minor, bdev_path, rel_path,
                                    cache_size);
        tracer_set_fail_state(dev, ret);
        kfree(bdev_path);
        kfree(rel_path);
        if (cow_path)
                kfree(cow_path);
}

/**
 * __tracer_dormant_to_active() - Starts tracing on a previously traced
 *                                device.
 * @dev: The &struct snap_device object pointer.
 * @user_mount_path: A userspace supplied path used to build the COW file path.
 *
 * Will continue tracing in the previous mode, i.e., snapshot or incremental.
 */
void __tracer_dormant_to_active(struct snap_device *dev,
                                const char __user *user_mount_path)
{
        int ret;
        char *cow_path;

        // generate the full pathname
        ret = user_mount_pathname_concat(user_mount_path, dev->sd_cow_path,
                                         &cow_path);
        if (ret)
                goto error;

        // setup the cow manager
        ret = __tracer_setup_cow_reopen(dev, dev->sd_base_dev, cow_path);
        if (ret)
                goto error;

        // restart the cow thread
        if (test_bit(SNAPSHOT, &dev->sd_state))
                ret = __tracer_setup_snap_cow_thread(dev, dev->sd_minor);
        else
                ret = __tracer_setup_inc_cow_thread(dev, dev->sd_minor);

        if (ret)
                goto error;

        wake_up_process(dev->sd_cow_thread);

        // set the state to active
        smp_wmb();
        set_bit(ACTIVE, &dev->sd_state);
        clear_bit(UNVERIFIED, &dev->sd_state);

        kfree(cow_path);

        return;

error:
        LOG_ERROR(ret, "error transitioning tracer to active state");
        if (cow_path)
                kfree(cow_path);
        tracer_set_fail_state(dev, ret);
}