/**
 * SPE program to do a memset, by DMAing a SPE buffer into an address
 * provided by the PPE.
 *
 * The argv argument to main will contain the address of our parameters,
 * contained within a spe_args structure. We need to DMA this into the
 * SPE local store first.
 *
 * We then set up a local buffer, and set it to the byte we want to memset
 *
 * This bufffer is then DMA-ed to the PPE address specified in the spe_args
 * structure, until we've copied the correct amount of data to the PPE.
 *
 * Since the MFC generally works on 16-byte addresses, we 16-byte align
 * all buffers that will be used in DMA transfers. Rather than hardcoding
 * 16 all over the place, we use SPE_ALIGN instead.
 */

#include <stdio.h>
#include <stdint.h>
#include <spu_mfcio.h>
#include <string.h>

#include "common.h"

#define CHUNK_SIZE 4096

/*
 * Our local buffer to DMA out to the PPE. This needs to be aligned to
 * a SPE_ALIGN-byte boundary
 */
uint8_t buf[CHUNK_SIZE] __attribute((aligned(SPE_ALIGN)));

/*
 * The argv argument will be populated with the address that the PPE provided,
 * from the 4th argument to spe_context_run()
 */
int main(uint64_t speid, uint64_t argv, uint64_t envp)
{
	struct spe_args args __attribute__((aligned(SPE_ALIGN)));

	/* DMA the spe_args struct into the SPE. The mfc_get function
	 * takes the following arguments, in order:
	 *
	 * - The local buffer pointer to DMA into
	 * - The remote address to DMA from
	 * - A tag (0 to 15) to assign to this DMA transaction. The tag is
	 *   later used to wait for this particular DMA to complete.
	 * - The transfer class ID (don't worry about this one)
	 * - The replacement class ID (don't worry about this one either)
	 */
	mfc_get(&args, argv, sizeof(args), 0, 0, 0);

	/* Wait for the DMA to complete - we write the tag mask with
	 * (1 << tag), where tag is 0 in this case */
	mfc_write_tag_mask(1 << 0);
	mfc_read_tag_status_all();

	/* initialise our local buffer */
	memset(buf, args.c, sizeof(buf));

	/* do the actual data transfers, in CHUNK_SIZE amounts, until there
	 * is no data left to transfer */
	while (args.buf_size > 0) {
		int transfer_size = args.buf_size;

		if (transfer_size > CHUNK_SIZE)
			transfer_size = CHUNK_SIZE;

		/* Do a DMA PUT to copy data from the SPE to the PPE. Since
		 * we know that the previous DMA (ie, using tag 0) has finised,
		 * we're free to re-use tag 0 */
		mfc_put(&buf, args.buf_addr, transfer_size, 0, 0, 0);

		/* Wait for the DMA to complete */
		mfc_write_tag_mask(1 << 0);
		mfc_read_tag_status_all();

		args.buf_size -= transfer_size;
		args.buf_addr += transfer_size;
	}

	return 0;
}
