Actions

Difference between revisions of "Arrange timed code.cpp"

From Just in Time

(Update code to latest version)
(replace quick hack version with the deluxe, 20% extra version)
Line 1: Line 1:
 
<syntaxhighlight lang="cpp">
 
<syntaxhighlight lang="cpp">
 
//
 
//
// Copyright (c) 2013 Danny Havenith
+
// Copyright (c) 2013,2014 Danny Havenith
 
//
 
//
 
// Distributed under the Boost Software License, Version 1.0. (See accompanying
 
// Distributed under the Boost Software License, Version 1.0. (See accompanying
Line 8: Line 8:
  
 
#include <iostream>
 
#include <iostream>
 +
#include <fstream>
 
#include <string>
 
#include <string>
 
#include <map>
 
#include <map>
 
#include <vector>
 
#include <vector>
 +
#include <list>
 
#include <algorithm>
 
#include <algorithm>
 
#include <limits>
 
#include <limits>
#include <boost/foreach.hpp>
+
#include <boost/regex.hpp>
 +
 
 +
using std::vector;
 +
using std::string;
 +
using std::list;
 +
using std::map;
  
using namespace std;
 
 
/// Simple struct that can be used as an input to describe blocks of assembly instructions with jumps and labels
 
/// Simple struct that can be used as an input to describe blocks of assembly instructions with jumps and labels
struct labelinfo {
+
struct Instruction {
 
     string label;
 
     string label;
     string jumpto;
+
     string instruction;
 
};
 
};
 +
using Instructions = vector<Instruction>;
 +
  
 
/// a contiguous block of assembly instructions.
 
/// a contiguous block of assembly instructions.
 
/// This structure holds information about the size of the block,
 
/// This structure holds information about the size of the block,
/// all relative jumps that emanate from this block and all labes that
+
/// all relative jumps that emanate from this block and all labels that
 
/// are defined in this block.
 
/// are defined in this block.
struct block
+
class Block
 
{
 
{
     typedef map<string, int> labelmap;
+
public:
    int      size;
+
Block( const Instructions &code)
 +
:code(code)
 +
{
 +
update_labels();
 +
}
 +
 
 +
 
 +
     typedef map<string, int> LabelMap;
 
     string  name;
 
     string  name;
     labelmap labels;
+
     LabelMap labels;
     labelmap jumps;
+
     LabelMap jumps;
 +
    Instructions code;
 +
 
 +
private:
 +
void update_labels();
  
    bool operator<( const block &other) const
 
    {
 
        return name < other.name;
 
    }
 
 
};
 
};
  
/// Parse labelinfo structs and create disjunct blocks.
+
using Blocks = list<Block>;
/// This function will traverse the range of labelinfos and create blocks on-the-go.
+
 
/// The last statement of a block is recognized by the label "END", whenever this label is encountered all
+
void Block::update_labels()
/// statements so far will be combined in one block and this block will be appended to the result.
 
vector<block> parse_blocks( labelinfo *begin, labelinfo *end)
 
 
{
 
{
    vector<block> result;
+
static const boost::regex branch("BR..\\s+(\\S+)\\s*");
 
 
 
     int current_line = 0;
 
     int current_line = 0;
     block current_block;
+
     for( const auto &ins: code)
    while (begin != end)
 
 
     {
 
     {
        if (begin->label == "END")
+
if (!ins.label.empty())
        {
+
{
            current_block.size = current_line + 1;
+
if (current_line == 0) name = ins.label;
            result.push_back( current_block);
+
labels[ins.label] = current_line;
            current_line = 0;
+
}
            current_block = block();
+
 
        }
+
boost::smatch match;
        else
+
if (regex_match( ins.instruction, match, branch))
        {
+
{
            if (!begin->label.empty())
+
jumps[ match[1]] = current_line;
            {
+
}
                if (current_line == 0) current_block.name = begin->label;
+
++current_line;
                current_block.labels[begin->label] = current_line;
 
            }
 
            if (!begin->jumpto.empty())
 
            {
 
                current_block.jumps[begin->jumpto] = current_line;
 
            }
 
            ++current_line;
 
        }
 
        ++begin;
 
 
     }
 
     }
    return result;
 
 
}
 
}
  
 
/// Given a sequence of blocks, determine the absolute addresses of the labels that
 
/// Given a sequence of blocks, determine the absolute addresses of the labels that
 
/// appear in each block.
 
/// appear in each block.
block::labelmap map_labels( const vector<block>& blocks)
+
Block::LabelMap map_labels( const Blocks& blocks)
 
{
 
{
     block::labelmap label_addresses;
+
     Block::LabelMap label_addresses;
  
 
     // determine absolute labels
 
     // determine absolute labels
 
     int current_block_startaddress = 0;
 
     int current_block_startaddress = 0;
     BOOST_FOREACH( const block &b, blocks)
+
     for( const auto &b: blocks)
 
     {
 
     {
         BOOST_FOREACH( const block::labelmap::value_type &label, b.labels)
+
         for( const auto &label: b.labels)
 
         {
 
         {
 
             label_addresses[label.first] = label.second
 
             label_addresses[label.first] = label.second
 
                     + current_block_startaddress;
 
                     + current_block_startaddress;
 
         }
 
         }
         current_block_startaddress += b.size;
+
         current_block_startaddress += b.code.size();
 
     }
 
     }
  
Line 103: Line 104:
 
/// This function is faster than the calculate_fit() function because it can terminate immediately if it finds a jump
 
/// This function is faster than the calculate_fit() function because it can terminate immediately if it finds a jump
 
/// that does not qualify.
 
/// that does not qualify.
bool is_fit( const vector<block> &blocks)
+
bool is_fit( const Blocks &blocks)
 
{
 
{
     block::labelmap label_addresses = map_labels(blocks);
+
     Block::LabelMap label_addresses = map_labels(blocks);
  
 
     // see if all jumps are in range.
 
     // see if all jumps are in range.
 
     int current_block_startaddress = 0;
 
     int current_block_startaddress = 0;
     BOOST_FOREACH( const block &b, blocks)
+
     for( const auto &b: blocks)
 
     {
 
     {
         BOOST_FOREACH( const block::labelmap::value_type &jump, b.jumps)
+
         for( const auto &jump: b.jumps)
 
         {
 
         {
 
             int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
 
             int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
 
             if (offset > 64 || offset < -63) return false;
 
             if (offset > 64 || offset < -63) return false;
 
         }
 
         }
         current_block_startaddress += b.size;
+
         current_block_startaddress += b.code.size();
 
     }
 
     }
  
Line 122: Line 123:
 
}
 
}
  
/// This is largely a copy of the is_fit() function with some changes. Instead of returning a fit/not-fit boolean,
+
/**
/// this function returns a score that is calculated as the maximum (absolute) jump distance.
+
*
/// This function is slightly less efficient than that is_fit() function, because it needss to complete the
+
* This is largely a copy of the is_fit() function with some changes. Instead of returning a fit/not-fit boolean,
/// calculation, even if the score is higher than the fitness criterium.
+
* this function returns a score that is calculated as the maximum (absolute) jump distance.
int calculate_fit( const vector<block> &blocks)
+
* This function is slightly less efficient than that is_fit() function, because it needss to complete the
 +
* calculation, even if the score is higher than the fitness criterium.
 +
*
 +
*/
 +
int calculate_fit( const Blocks &blocks)
 
{
 
{
     block::labelmap label_addresses = map_labels( blocks);
+
     Block::LabelMap label_addresses = map_labels( blocks);
  
 
     // calculate the maximum jump offset in this sequence of blocks.
 
     // calculate the maximum jump offset in this sequence of blocks.
 
     int max_offset = 0;
 
     int max_offset = 0;
 
     int current_block_startaddress = 0;
 
     int current_block_startaddress = 0;
     BOOST_FOREACH( const block &b, blocks)
+
     for( const auto &b: blocks)
 
     {
 
     {
         BOOST_FOREACH( const block::labelmap::value_type &jump, b.jumps)
+
         for ( const auto &jump: b.jumps)
 
         {
 
         {
 
             int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
 
             int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
Line 141: Line 146:
 
             if (-offset > max_offset) max_offset = -offset;
 
             if (-offset > max_offset) max_offset = -offset;
 
         }
 
         }
         current_block_startaddress += b.size;
+
         current_block_startaddress += b.code.size();
 
     }
 
     }
  
Line 147: Line 152:
 
}
 
}
  
/// Description of the code blocks. This was created with some spreadsheet magic from the original code spreadsheet
+
/**
/// because I was too lazy to implement input text parsing.
+
* Replace adjacent NOPs with single RJMP instructions.
labelinfo labels[] = {
+
* The replacement is only performed if the second NOP has no label.
        {"P1x17"   ,""},
+
*/
        {"P1x18"    ,""},
+
void optimize_nops( Instructions code, Blocks &results)
        {"" ,""},
+
{
        {"L1x00"   ,""},
+
static int label_counter = 0;
        {"" ,""},
+
auto position = code.begin();
        {"" ,"L1104"},
+
auto previous_position = position;
        {"" ,""},
+
auto both_are_nop = [](const Instruction &left, const Instruction &right){
        {"" ,"M1006"},
+
return left.instruction == "NOP" && right.instruction == "NOP" && right.label.empty();
        {"" ,""},
+
};
        {"" ,""},
+
 
        {"" ,""},
+
//Instructions first_instructions;
        {"" ,""},
+
// find adjacent NOPs and break up the block, using RJMP to jump over the second NOP
        {"" ,""},
+
position = std::adjacent_find( previous_position, code.end(), both_are_nop);
        {"" ,""},
+
while ( code.end() != position)
        {"" ,""},
+
{
        {"" ,""},
+
Instructions first_instructions;
        {"" ,"Hx017"},
+
first_instructions.insert( first_instructions.end(), previous_position, position);
        {"" ,""},
+
previous_position = position+2;
        {"" ,"P0x19"},
+
if (previous_position->label.empty()) previous_position->label = "brk" + std::to_string( label_counter++);
        {"END" ,""},
+
first_instructions.push_back({position->label, "RJMP " + previous_position->label});
        {"L1104"    ,""},
+
results.push_back( Block(first_instructions));
        {"" ,"Mx107"},
+
position = std::adjacent_find( previous_position, code.end(), both_are_nop);
        {"" ,""},
+
}
        {"" ,""},
+
 
        {"" ,""},
+
Instructions first_instructions;
        {"" ,""},
+
first_instructions.insert( first_instructions.end(), previous_position, position);
        {"" ,""},
+
results.push_back( Block(first_instructions));
        {"" ,"Hx115"},
+
}
        {"" ,""},
+
 
        {"" ,"P1x17"},
+
 
        {"END"  ,""},
+
 
        {"L0005"    ,""},
+
/**
        {"" ,"Mx008"},
+
* Read assembly instructions from a text file.
        {"" ,""},
+
* This function assumes the following:
        {"" ,""},
+
*    - each line consists of an optional label, followed by (non-optional) whitespace, followed by an instruction
        {"" ,""},
+
*    - the text consists of blocks, each block starts with a line that has a label and ends with an RJMP instruction
        {"" ,""},
+
*    - the text lines inbetween blocks can be ignored.
        {"" ,""},
+
*    - a line with a label, but without instruction is considered to have a "NOP" instruction
        {"" ,""},
+
*    - lines with the instruction "^^^" (possibly surrounded by whitespace) are ignored.
        {"" ,"Hx017"},
+
*
        {"" ,""},
+
*    When a sequence of two adjacent NOPs is encountered, for which the second NOP has no label,
        {"" ,"P0x19"},
+
*    the two NOPs will be replaced by a single RJMP instruction.
        {"END"  ,""},
+
  */
        {"P0x18"    ,""},
+
Blocks parse_code( std::istream &input)
        {"P0x19"    ,""},
+
{
        {"L0x00"    ,""},
+
// an instruction consists of an optional label, followed by whitespace, followed by the instruction.
        {"" ,""},
+
static const boost::regex instruction_pattern("^([^\\s]*)\\s+(.*)$");
        {"" ,""},
+
static const boost::regex rjmp("RJMP\\s+(.*)");
        {"" ,"L0005"},
+
static const boost::regex continuation("\\s*\\^\\^\\^\\s*");
        {"" ,""},
+
 
        {"" ,"Mx107"},
+
bool in_block = false; // whether we've seen the first label of a block
        {"" ,""},
+
std::string buffer;
        {"" ,""},
+
Blocks result;
        {"" ,""},
+
Instructions current_block;
        {"" ,""},
+
while (getline( input, buffer))
        {"" ,""},
+
{
        {"" ,"Hx115"},
+
boost::smatch match;
        {"" ,""},
+
regex_match( buffer, match, instruction_pattern);
        {"" ,"P1x17"},
+
 
        {"END" ,""},
+
in_block = in_block || match[1].length() != 0;
        {"M1006"    ,""},
+
if (in_block)
        {"" ,""},
+
{
        {"Mx008"   ,""},
+
string label = match[1];
        {"" ,""},
+
string ins = match[2];
        {"" ,""},
+
if (ins.empty()) ins = "NOP";
        {"" ,""},
+
 
        {"" ,""},
+
if (!regex_match( ins, continuation))
        {"" ,""},
+
{
        {"" ,""},
+
current_block.push_back( {label, ins});
        {"" ,"P1x17"},
+
}
        {"END" ,""},
+
 
        {"Mx107"    ,""},
+
if (regex_match( ins, rjmp))
        {"" ,""},
+
{
        {"" ,""},
+
optimize_nops( current_block, result);
        {"" ,""},
+
in_block = false;
        {"" ,""},
+
current_block.clear();
        {"" ,""},
+
}
        {"" ,""},
+
}
        {"" ,""},
+
}
        {"" ,"P1x17"},
+
 
        {"END"  ,""},
+
if (!current_block.empty())
        {"Hx115"    ,""},
+
{
        {"" ,""},
+
optimize_nops( current_block, result);
        {"" ,""},
+
}
        {"" ,""},
+
 
        {"" ,""},
+
return result;
        {"" ,""},
+
}
        {"END" ,""},
+
 
        {"Hx017"    ,""},
+
/**
        {"" ,""},
+
* print a single block of instructions.
        {"" ,""},
+
  */
        {"" ,""},
+
void print_block( const Block &block, std::ostream &out)
        {"END"  ,""},
+
{
};
+
for (const auto &ins: block.code)
 +
{
 +
string label = ins.label.empty()?"":ins.label + ":";
 +
out << '"' << label << string( 8-label.size(), ' ') << ins.instruction << string(40 - ins.instruction.size(), ' ') << "\\n\"" << '\n';
 +
}
 +
}
 +
 
 +
/**
 +
* print all blocks in a vector of blocks, in the order in which they appear in the vector.
 +
  */
 +
void print_blocks( const Blocks &blocks, std::ostream &out)
 +
{
 +
for (const auto &b: blocks)
 +
{
 +
print_block( b, out);
 +
}
 +
}
 +
 
 +
void add_block_at_optimum( Blocks &blocks, Block block)
 +
{
 +
auto optimum = std::numeric_limits<int>::max();
 +
Blocks result;
 +
 
 +
for (auto it = blocks.begin(); it != blocks.end();++it)
 +
{
 +
Blocks candidate( blocks.begin(), it);
 +
candidate.push_back( block);
 +
candidate.insert( candidate.end(), it, blocks.end());
 +
auto fit = calculate_fit( candidate);
 +
if (fit < optimum)
 +
{
 +
optimum = fit;
 +
std::swap( result, candidate);
 +
}
 +
}
 +
 
 +
blocks.push_back( block);
 +
if (calculate_fit( blocks) >= optimum)
 +
{
 +
std::swap( blocks, result);
 +
}
 +
}
 +
 
 +
/**
 +
* Heuristic optimization of blocks. Just add the blocks one-by-one to a new list of blocks.
 +
* This is not guaranteed to find the optimum arrangement. The result depends on the order in which
 +
* the blocks are offered.
 +
  */
 +
Blocks optimize_blocks( Blocks blocks)
 +
{
 +
Blocks result;
 +
 
 +
for (const auto &block: blocks)
 +
{
 +
add_block_at_optimum( result, block);
 +
}
 +
 
 +
return result;
 +
}
 +
 
  
/// Print out the optimum sequence of code blocks (=with minimum max jump distance).
+
/**
/// This function will simply iterate over all 40K permutations of 8 blocks and calculate the score for each
+
* Heuristic optimization of blocks.
/// permutation.
+
* Retry the optimize_blocks() function with blocks in random order and return if a fitting block sequence is found or
int main()
+
* if the iteration count is exceeded.
 +
*/
 +
Blocks optimize_blocks( Blocks blocks, unsigned int iteration_count)
 
{
 
{
     vector<block> blocks = parse_blocks( labels, labels + sizeof labels/sizeof labels[0] );
+
     std::random_device rd;
     sort( blocks.begin(), blocks.end());
+
     std::mt19937 g(rd());
  
     BOOST_FOREACH( const block &b, blocks)
+
Blocks best_blocks;
 +
     auto fit_found = false;
 +
    while (!fit_found && iteration_count--)
 
     {
 
     {
        cout << b.name << "\n";
+
    vector<Block> temp( blocks.begin(), blocks.end());
 +
    std::shuffle( temp.begin(), temp.end(), g);
 +
    blocks.assign( temp.begin(), temp.end());
 +
    best_blocks = optimize_blocks( blocks);
 +
    fit_found = is_fit( best_blocks);
 
     }
 
     }
  
     vector<block> best_blocks;
+
     return best_blocks;
    int min_offset = numeric_limits<int>::max();
+
}
    do
+
 
    {
+
/**
        int new_offset = calculate_fit( blocks);
+
* Read a list of assembly instructions and try to re-order those instructions so that the maximum jump distance
        if (new_offset < min_offset)
+
* is not higher than the maximum  allowed jump distance, preserving the semantics of the program.
        {
+
* Additionally, empty statements within blocks are translated to NOP instructions and adjacent NOPs are
            best_blocks = blocks;
+
* translated into RJMP instructions to save space.
            min_offset = new_offset;
+
*
            cout << min_offset << ", ";
+
* assembly instructions will be read from stdin, or if a file name is given from that file name.
        }
+
* output is to standard out.
     } while (next_permutation( blocks.begin(), blocks.end()));
+
*/
 +
int main( int argc, char *argv[])
 +
{
 +
using std::cout;
 +
using std::cin;
 +
 
 +
 
 +
Blocks blocks;
 +
if (argc >1)
 +
{
 +
std::ifstream inputfile(argv[1]);
 +
blocks = parse_code( inputfile);
 +
}
 +
else
 +
{
 +
blocks = parse_code( cin);
 +
}
 +
 
 +
    cout << blocks.size() << " blocks in this code\n";
 +
 
 +
     // try to find an optimum block sequence or give up if we don't succeed after 10000 iterations.
 +
    Blocks best_blocks = optimize_blocks( blocks, 10000);
  
 +
    print_blocks( best_blocks, cout);
  
 
     if (is_fit(best_blocks))
 
     if (is_fit(best_blocks))
 
     {
 
     {
         cout << "\nSuccess:\n";
+
 
 +
         cout << "\nSuccess, longest jump distance is:" << calculate_fit( best_blocks) << "\n";
 
     }
 
     }
 
     else
 
     else
 
     {
 
     {
         cout << "\nNothing\n";
+
         cout << "\nNothing longest jump distance is:" << calculate_fit( best_blocks) << "\n";
    }
 
 
 
    BOOST_FOREACH( const block &b, best_blocks)
 
    {
 
        cout << b.name << "\n";
 
 
     }
 
     }
  
 
     return 0;
 
     return 0;
 
}
 
}
 
 
</syntaxhighlight>
 
</syntaxhighlight>

Revision as of 07:43, 27 February 2014

//
// Copyright (c) 2013,2014 Danny Havenith
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//

#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <vector>
#include <list>
#include <algorithm>
#include <limits>
#include <boost/regex.hpp>

using std::vector;
using std::string;
using std::list;
using std::map;

/// Simple struct that can be used as an input to describe blocks of assembly instructions with jumps and labels
struct Instruction {
    string label;
    string instruction;
};
using Instructions = vector<Instruction>;


/// a contiguous block of assembly instructions.
/// This structure holds information about the size of the block,
/// all relative jumps that emanate from this block and all labels that
/// are defined in this block.
class Block
{
public:
	Block( const Instructions &code)
	:code(code)
	{
		update_labels();
	}


    typedef map<string, int> LabelMap;
    string   name;
    LabelMap labels;
    LabelMap jumps;
    Instructions code;

private:
	void update_labels();

};

using Blocks = list<Block>;

void Block::update_labels()
{
	static const boost::regex branch("BR..\\s+(\\S+)\\s*");
    int current_line = 0;
    for( const auto &ins: code)
    {
		if (!ins.label.empty())
		{
			if (current_line == 0) name = ins.label;
			labels[ins.label] = current_line;
		}

		boost::smatch match;
		if (regex_match( ins.instruction, match, branch))
		{
			jumps[ match[1]] = current_line;
		}
		++current_line;
    }
}

/// Given a sequence of blocks, determine the absolute addresses of the labels that
/// appear in each block.
Block::LabelMap map_labels( const Blocks& blocks)
{
    Block::LabelMap label_addresses;

    // determine absolute labels
    int current_block_startaddress = 0;
    for( const auto &b: blocks)
    {
        for( const auto &label: b.labels)
        {
            label_addresses[label.first] = label.second
                    + current_block_startaddress;
        }
        current_block_startaddress += b.code.size();
    }

    return label_addresses;
}

/// Determine whether a sequence of blocks matches the criterium of valid jumps.
/// This criterium is matched if all jumps are close enough to their destination labels.
/// Close enough, in this case, means that the offset between jump and destination must fall within [-63,64]
/// This function is faster than the calculate_fit() function because it can terminate immediately if it finds a jump
/// that does not qualify.
bool is_fit( const Blocks &blocks)
{
    Block::LabelMap label_addresses = map_labels(blocks);

    // see if all jumps are in range.
    int current_block_startaddress = 0;
    for( const auto &b: blocks)
    {
        for( const auto &jump: b.jumps)
         {
            int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
            if (offset > 64 || offset < -63) return false;
        }
        current_block_startaddress += b.code.size();
    }

    return true;
}

/**
 *
 * This is largely a copy of the is_fit() function with some changes. Instead of returning a fit/not-fit boolean,
 * this function returns a score that is calculated as the maximum (absolute) jump distance.
 * This function is slightly less efficient than that is_fit() function, because it needss to complete the
 * calculation, even if the score is higher than the fitness criterium.
 *
 */
int calculate_fit( const Blocks &blocks)
{
    Block::LabelMap label_addresses = map_labels( blocks);

    // calculate the maximum jump offset in this sequence of blocks.
    int max_offset = 0;
    int current_block_startaddress = 0;
    for( const auto &b: blocks)
    {
        for ( const auto &jump: b.jumps)
        {
            int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
            if (offset > max_offset) max_offset = offset;
            if (-offset > max_offset) max_offset = -offset;
        }
        current_block_startaddress += b.code.size();
    }

    return max_offset;
}

/**
 * Replace adjacent NOPs with single RJMP instructions.
 * The replacement is only performed if the second NOP has no label.
 */
void optimize_nops( Instructions code, Blocks &results)
{
	static int label_counter = 0;
	auto position = code.begin();
	auto previous_position = position;
	auto both_are_nop = [](const Instruction &left, const Instruction &right){
		return left.instruction == "NOP" && right.instruction == "NOP" && right.label.empty();
		};

	//Instructions first_instructions;
	// find adjacent NOPs and break up the block, using RJMP to jump over the second NOP
	position = std::adjacent_find( previous_position, code.end(), both_are_nop);
	while ( code.end() != position)
	{
		Instructions first_instructions;
		first_instructions.insert( first_instructions.end(), previous_position, position);
		previous_position = position+2;
		if (previous_position->label.empty()) previous_position->label = "brk" + std::to_string( label_counter++);
		first_instructions.push_back({position->label, "RJMP " + previous_position->label});
		results.push_back( Block(first_instructions));
		position = std::adjacent_find( previous_position, code.end(), both_are_nop);
	}

	Instructions first_instructions;
	first_instructions.insert( first_instructions.end(), previous_position, position);
	results.push_back( Block(first_instructions));
}



/**
 * Read assembly instructions from a text file.
 * This function assumes the following:
 *     - each line consists of an optional label, followed by (non-optional) whitespace, followed by an instruction
 *     - the text consists of blocks, each block starts with a line that has a label and ends with an RJMP instruction
 *     - the text lines inbetween blocks can be ignored.
 *     - a line with a label, but without instruction is considered to have a "NOP" instruction
 *     - lines with the instruction "^^^" (possibly surrounded by whitespace) are ignored.
 *
 *     When a sequence of two adjacent NOPs is encountered, for which the second NOP has no label,
 *     the two NOPs will be replaced by a single RJMP instruction.
 */
Blocks parse_code( std::istream &input)
{
	// an instruction consists of an optional label, followed by whitespace, followed by the instruction.
	static const boost::regex instruction_pattern("^([^\\s]*)\\s+(.*)$");
	static const boost::regex rjmp("RJMP\\s+(.*)");
	static const boost::regex continuation("\\s*\\^\\^\\^\\s*");

	bool in_block = false; // whether we've seen the first label of a block
	std::string buffer;
	Blocks result;
	Instructions current_block;
	while (getline( input, buffer))
	{
		boost::smatch match;
		regex_match( buffer, match, instruction_pattern);

		in_block = in_block || match[1].length() != 0;
		if (in_block)
		{
			string label = match[1];
			string ins = match[2];
			if (ins.empty()) ins = "NOP";

			if (!regex_match( ins, continuation))
			{
				current_block.push_back( {label, ins});
			}

			if (regex_match( ins, rjmp))
			{
				optimize_nops( current_block, result);
				in_block = false;
				current_block.clear();
			}
		}
	}

	if (!current_block.empty())
	{
		optimize_nops( current_block, result);
	}

	return result;
}

/**
 * print a single block of instructions.
 */
void print_block( const Block &block, std::ostream &out)
{
	for (const auto &ins: block.code)
	{
		string label = ins.label.empty()?"":ins.label + ":";
		out << '"' << label << string( 8-label.size(), ' ') << ins.instruction << string(40 - ins.instruction.size(), ' ') << "\\n\"" << '\n';
	}
}

/**
 * print all blocks in a vector of blocks, in the order in which they appear in the vector.
 */
void print_blocks( const Blocks &blocks, std::ostream &out)
{
	for (const auto &b: blocks)
	{
		print_block( b, out);
	}
}

void add_block_at_optimum( Blocks &blocks, Block block)
{
	auto optimum = std::numeric_limits<int>::max();
	Blocks result;

	for (auto it = blocks.begin(); it != blocks.end();++it)
	{
		Blocks candidate( blocks.begin(), it);
		candidate.push_back( block);
		candidate.insert( candidate.end(), it, blocks.end());
		auto fit = calculate_fit( candidate);
		if (fit < optimum)
		{
			optimum = fit;
			std::swap( result, candidate);
		}
	}

	blocks.push_back( block);
	if (calculate_fit( blocks) >= optimum)
	{
		std::swap( blocks, result);
	}
}

/**
 * Heuristic optimization of blocks. Just add the blocks one-by-one to a new list of blocks.
 * This is not guaranteed to find the optimum arrangement. The result depends on the order in which
 * the blocks are offered.
 */
Blocks optimize_blocks( Blocks blocks)
{
	Blocks result;

	for (const auto &block: blocks)
	{
		add_block_at_optimum( result, block);
	}

	return result;
}


/**
 * Heuristic optimization of blocks.
 * Retry the optimize_blocks() function with blocks in random order and return if a fitting block sequence is found or
 * if the iteration count is exceeded.
 */
Blocks optimize_blocks( Blocks blocks, unsigned int iteration_count)
{
    std::random_device rd;
    std::mt19937 g(rd());

	Blocks best_blocks;
    auto fit_found = false;
    while (!fit_found && iteration_count--)
    {
    	vector<Block> temp( blocks.begin(), blocks.end());
    	std::shuffle( temp.begin(), temp.end(), g);
    	blocks.assign( temp.begin(), temp.end());
    	best_blocks = optimize_blocks( blocks);
    	fit_found = is_fit( best_blocks);
    }

    return best_blocks;
}

/**
 * Read a list of assembly instructions and try to re-order those instructions so that the maximum jump distance
 * is not higher than the maximum  allowed jump distance, preserving the semantics of the program.
 * Additionally, empty statements within blocks are translated to NOP instructions and adjacent NOPs are
 * translated into RJMP instructions to save space.
 *
 * assembly instructions will be read from stdin, or if a file name is given from that file name.
 * output is to standard out.
 */
int main( int argc, char *argv[])
{
	using std::cout;
	using std::cin;


	Blocks blocks;
	if (argc >1)
	{
		std::ifstream inputfile(argv[1]);
		blocks = parse_code( inputfile);
	}
	else
	{
		blocks = parse_code( cin);
	}

    cout << blocks.size() << " blocks in this code\n";

    // try to find an optimum block sequence or give up if we don't succeed after 10000 iterations.
    Blocks best_blocks = optimize_blocks( blocks, 10000);

    print_blocks( best_blocks, cout);

    if (is_fit(best_blocks))
    {

        cout << "\nSuccess, longest jump distance is:" << calculate_fit( best_blocks) << "\n";
    }
    else
    {
        cout << "\nNothing longest jump distance is:" << calculate_fit( best_blocks) << "\n";
    }

    return 0;
}