Actions

Arrange timed code.cpp

From Just in Time

Revision as of 07:43, 27 February 2014 by Danny (talk | contribs) (replace quick hack version with the deluxe, 20% extra version)
//
// Copyright (c) 2013,2014 Danny Havenith
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//

#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <vector>
#include <list>
#include <algorithm>
#include <limits>
#include <boost/regex.hpp>

using std::vector;
using std::string;
using std::list;
using std::map;

/// Simple struct that can be used as an input to describe blocks of assembly instructions with jumps and labels
struct Instruction {
    string label;
    string instruction;
};
using Instructions = vector<Instruction>;


/// a contiguous block of assembly instructions.
/// This structure holds information about the size of the block,
/// all relative jumps that emanate from this block and all labels that
/// are defined in this block.
class Block
{
public:
	Block( const Instructions &code)
	:code(code)
	{
		update_labels();
	}


    typedef map<string, int> LabelMap;
    string   name;
    LabelMap labels;
    LabelMap jumps;
    Instructions code;

private:
	void update_labels();

};

using Blocks = list<Block>;

void Block::update_labels()
{
	static const boost::regex branch("BR..\\s+(\\S+)\\s*");
    int current_line = 0;
    for( const auto &ins: code)
    {
		if (!ins.label.empty())
		{
			if (current_line == 0) name = ins.label;
			labels[ins.label] = current_line;
		}

		boost::smatch match;
		if (regex_match( ins.instruction, match, branch))
		{
			jumps[ match[1]] = current_line;
		}
		++current_line;
    }
}

/// Given a sequence of blocks, determine the absolute addresses of the labels that
/// appear in each block.
Block::LabelMap map_labels( const Blocks& blocks)
{
    Block::LabelMap label_addresses;

    // determine absolute labels
    int current_block_startaddress = 0;
    for( const auto &b: blocks)
    {
        for( const auto &label: b.labels)
        {
            label_addresses[label.first] = label.second
                    + current_block_startaddress;
        }
        current_block_startaddress += b.code.size();
    }

    return label_addresses;
}

/// Determine whether a sequence of blocks matches the criterium of valid jumps.
/// This criterium is matched if all jumps are close enough to their destination labels.
/// Close enough, in this case, means that the offset between jump and destination must fall within [-63,64]
/// This function is faster than the calculate_fit() function because it can terminate immediately if it finds a jump
/// that does not qualify.
bool is_fit( const Blocks &blocks)
{
    Block::LabelMap label_addresses = map_labels(blocks);

    // see if all jumps are in range.
    int current_block_startaddress = 0;
    for( const auto &b: blocks)
    {
        for( const auto &jump: b.jumps)
         {
            int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
            if (offset > 64 || offset < -63) return false;
        }
        current_block_startaddress += b.code.size();
    }

    return true;
}

/**
 *
 * This is largely a copy of the is_fit() function with some changes. Instead of returning a fit/not-fit boolean,
 * this function returns a score that is calculated as the maximum (absolute) jump distance.
 * This function is slightly less efficient than that is_fit() function, because it needss to complete the
 * calculation, even if the score is higher than the fitness criterium.
 *
 */
int calculate_fit( const Blocks &blocks)
{
    Block::LabelMap label_addresses = map_labels( blocks);

    // calculate the maximum jump offset in this sequence of blocks.
    int max_offset = 0;
    int current_block_startaddress = 0;
    for( const auto &b: blocks)
    {
        for ( const auto &jump: b.jumps)
        {
            int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
            if (offset > max_offset) max_offset = offset;
            if (-offset > max_offset) max_offset = -offset;
        }
        current_block_startaddress += b.code.size();
    }

    return max_offset;
}

/**
 * Replace adjacent NOPs with single RJMP instructions.
 * The replacement is only performed if the second NOP has no label.
 */
void optimize_nops( Instructions code, Blocks &results)
{
	static int label_counter = 0;
	auto position = code.begin();
	auto previous_position = position;
	auto both_are_nop = [](const Instruction &left, const Instruction &right){
		return left.instruction == "NOP" && right.instruction == "NOP" && right.label.empty();
		};

	//Instructions first_instructions;
	// find adjacent NOPs and break up the block, using RJMP to jump over the second NOP
	position = std::adjacent_find( previous_position, code.end(), both_are_nop);
	while ( code.end() != position)
	{
		Instructions first_instructions;
		first_instructions.insert( first_instructions.end(), previous_position, position);
		previous_position = position+2;
		if (previous_position->label.empty()) previous_position->label = "brk" + std::to_string( label_counter++);
		first_instructions.push_back({position->label, "RJMP " + previous_position->label});
		results.push_back( Block(first_instructions));
		position = std::adjacent_find( previous_position, code.end(), both_are_nop);
	}

	Instructions first_instructions;
	first_instructions.insert( first_instructions.end(), previous_position, position);
	results.push_back( Block(first_instructions));
}



/**
 * Read assembly instructions from a text file.
 * This function assumes the following:
 *     - each line consists of an optional label, followed by (non-optional) whitespace, followed by an instruction
 *     - the text consists of blocks, each block starts with a line that has a label and ends with an RJMP instruction
 *     - the text lines inbetween blocks can be ignored.
 *     - a line with a label, but without instruction is considered to have a "NOP" instruction
 *     - lines with the instruction "^^^" (possibly surrounded by whitespace) are ignored.
 *
 *     When a sequence of two adjacent NOPs is encountered, for which the second NOP has no label,
 *     the two NOPs will be replaced by a single RJMP instruction.
 */
Blocks parse_code( std::istream &input)
{
	// an instruction consists of an optional label, followed by whitespace, followed by the instruction.
	static const boost::regex instruction_pattern("^([^\\s]*)\\s+(.*)$");
	static const boost::regex rjmp("RJMP\\s+(.*)");
	static const boost::regex continuation("\\s*\\^\\^\\^\\s*");

	bool in_block = false; // whether we've seen the first label of a block
	std::string buffer;
	Blocks result;
	Instructions current_block;
	while (getline( input, buffer))
	{
		boost::smatch match;
		regex_match( buffer, match, instruction_pattern);

		in_block = in_block || match[1].length() != 0;
		if (in_block)
		{
			string label = match[1];
			string ins = match[2];
			if (ins.empty()) ins = "NOP";

			if (!regex_match( ins, continuation))
			{
				current_block.push_back( {label, ins});
			}

			if (regex_match( ins, rjmp))
			{
				optimize_nops( current_block, result);
				in_block = false;
				current_block.clear();
			}
		}
	}

	if (!current_block.empty())
	{
		optimize_nops( current_block, result);
	}

	return result;
}

/**
 * print a single block of instructions.
 */
void print_block( const Block &block, std::ostream &out)
{
	for (const auto &ins: block.code)
	{
		string label = ins.label.empty()?"":ins.label + ":";
		out << '"' << label << string( 8-label.size(), ' ') << ins.instruction << string(40 - ins.instruction.size(), ' ') << "\\n\"" << '\n';
	}
}

/**
 * print all blocks in a vector of blocks, in the order in which they appear in the vector.
 */
void print_blocks( const Blocks &blocks, std::ostream &out)
{
	for (const auto &b: blocks)
	{
		print_block( b, out);
	}
}

void add_block_at_optimum( Blocks &blocks, Block block)
{
	auto optimum = std::numeric_limits<int>::max();
	Blocks result;

	for (auto it = blocks.begin(); it != blocks.end();++it)
	{
		Blocks candidate( blocks.begin(), it);
		candidate.push_back( block);
		candidate.insert( candidate.end(), it, blocks.end());
		auto fit = calculate_fit( candidate);
		if (fit < optimum)
		{
			optimum = fit;
			std::swap( result, candidate);
		}
	}

	blocks.push_back( block);
	if (calculate_fit( blocks) >= optimum)
	{
		std::swap( blocks, result);
	}
}

/**
 * Heuristic optimization of blocks. Just add the blocks one-by-one to a new list of blocks.
 * This is not guaranteed to find the optimum arrangement. The result depends on the order in which
 * the blocks are offered.
 */
Blocks optimize_blocks( Blocks blocks)
{
	Blocks result;

	for (const auto &block: blocks)
	{
		add_block_at_optimum( result, block);
	}

	return result;
}


/**
 * Heuristic optimization of blocks.
 * Retry the optimize_blocks() function with blocks in random order and return if a fitting block sequence is found or
 * if the iteration count is exceeded.
 */
Blocks optimize_blocks( Blocks blocks, unsigned int iteration_count)
{
    std::random_device rd;
    std::mt19937 g(rd());

	Blocks best_blocks;
    auto fit_found = false;
    while (!fit_found && iteration_count--)
    {
    	vector<Block> temp( blocks.begin(), blocks.end());
    	std::shuffle( temp.begin(), temp.end(), g);
    	blocks.assign( temp.begin(), temp.end());
    	best_blocks = optimize_blocks( blocks);
    	fit_found = is_fit( best_blocks);
    }

    return best_blocks;
}

/**
 * Read a list of assembly instructions and try to re-order those instructions so that the maximum jump distance
 * is not higher than the maximum  allowed jump distance, preserving the semantics of the program.
 * Additionally, empty statements within blocks are translated to NOP instructions and adjacent NOPs are
 * translated into RJMP instructions to save space.
 *
 * assembly instructions will be read from stdin, or if a file name is given from that file name.
 * output is to standard out.
 */
int main( int argc, char *argv[])
{
	using std::cout;
	using std::cin;


	Blocks blocks;
	if (argc >1)
	{
		std::ifstream inputfile(argv[1]);
		blocks = parse_code( inputfile);
	}
	else
	{
		blocks = parse_code( cin);
	}

    cout << blocks.size() << " blocks in this code\n";

    // try to find an optimum block sequence or give up if we don't succeed after 10000 iterations.
    Blocks best_blocks = optimize_blocks( blocks, 10000);

    print_blocks( best_blocks, cout);

    if (is_fit(best_blocks))
    {

        cout << "\nSuccess, longest jump distance is:" << calculate_fit( best_blocks) << "\n";
    }
    else
    {
        cout << "\nNothing longest jump distance is:" << calculate_fit( best_blocks) << "\n";
    }

    return 0;
}