//
// Copyright (c) 2013,2014 Danny Havenith
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <vector>
#include <list>
#include <algorithm>
#include <limits>
#include <boost/regex.hpp>
using std::vector;
using std::string;
using std::list;
using std::map;
/// Simple struct that can be used as an input to describe blocks of assembly instructions with jumps and labels
struct Instruction {
string label;
string instruction;
};
using Instructions = vector<Instruction>;
/// a contiguous block of assembly instructions.
/// This structure holds information about the size of the block,
/// all relative jumps that emanate from this block and all labels that
/// are defined in this block.
class Block
{
public:
Block( const Instructions &code)
:code(code)
{
update_labels();
}
typedef map<string, int> LabelMap;
string name;
LabelMap labels;
LabelMap jumps;
Instructions code;
private:
void update_labels();
};
using Blocks = list<Block>;
void Block::update_labels()
{
static const boost::regex branch("BR..\\s+(\\S+)\\s*");
int current_line = 0;
for( const auto &ins: code)
{
if (!ins.label.empty())
{
if (current_line == 0) name = ins.label;
labels[ins.label] = current_line;
}
boost::smatch match;
if (regex_match( ins.instruction, match, branch))
{
jumps[ match[1]] = current_line;
}
++current_line;
}
}
/// Given a sequence of blocks, determine the absolute addresses of the labels that
/// appear in each block.
Block::LabelMap map_labels( const Blocks& blocks)
{
Block::LabelMap label_addresses;
// determine absolute labels
int current_block_startaddress = 0;
for( const auto &b: blocks)
{
for( const auto &label: b.labels)
{
label_addresses[label.first] = label.second
+ current_block_startaddress;
}
current_block_startaddress += b.code.size();
}
return label_addresses;
}
/// Determine whether a sequence of blocks matches the criterium of valid jumps.
/// This criterium is matched if all jumps are close enough to their destination labels.
/// Close enough, in this case, means that the offset between jump and destination must fall within [-63,64]
/// This function is faster than the calculate_fit() function because it can terminate immediately if it finds a jump
/// that does not qualify.
bool is_fit( const Blocks &blocks)
{
Block::LabelMap label_addresses = map_labels(blocks);
// see if all jumps are in range.
int current_block_startaddress = 0;
for( const auto &b: blocks)
{
for( const auto &jump: b.jumps)
{
int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
if (offset > 64 || offset < -63) return false;
}
current_block_startaddress += b.code.size();
}
return true;
}
/**
*
* This is largely a copy of the is_fit() function with some changes. Instead of returning a fit/not-fit boolean,
* this function returns a score that is calculated as the maximum (absolute) jump distance.
* This function is slightly less efficient than that is_fit() function, because it needss to complete the
* calculation, even if the score is higher than the fitness criterium.
*
*/
int calculate_fit( const Blocks &blocks)
{
Block::LabelMap label_addresses = map_labels( blocks);
// calculate the maximum jump offset in this sequence of blocks.
int max_offset = 0;
int current_block_startaddress = 0;
for( const auto &b: blocks)
{
for ( const auto &jump: b.jumps)
{
int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
if (offset > max_offset) max_offset = offset;
if (-offset > max_offset) max_offset = -offset;
}
current_block_startaddress += b.code.size();
}
return max_offset;
}
/**
* Replace adjacent NOPs with single RJMP instructions.
* The replacement is only performed if the second NOP has no label.
*/
void optimize_nops( Instructions code, Blocks &results)
{
static int label_counter = 0;
auto position = code.begin();
auto previous_position = position;
auto both_are_nop = [](const Instruction &left, const Instruction &right){
return left.instruction == "NOP" && right.instruction == "NOP" && right.label.empty();
};
//Instructions first_instructions;
// find adjacent NOPs and break up the block, using RJMP to jump over the second NOP
position = std::adjacent_find( previous_position, code.end(), both_are_nop);
while ( code.end() != position)
{
Instructions first_instructions;
first_instructions.insert( first_instructions.end(), previous_position, position);
previous_position = position+2;
if (previous_position->label.empty()) previous_position->label = "brk" + std::to_string( label_counter++);
first_instructions.push_back({position->label, "RJMP " + previous_position->label});
results.push_back( Block(first_instructions));
position = std::adjacent_find( previous_position, code.end(), both_are_nop);
}
Instructions first_instructions;
first_instructions.insert( first_instructions.end(), previous_position, position);
results.push_back( Block(first_instructions));
}
/**
* Read assembly instructions from a text file.
* This function assumes the following:
* - each line consists of an optional label, followed by (non-optional) whitespace, followed by an instruction
* - the text consists of blocks, each block starts with a line that has a label and ends with an RJMP instruction
* - the text lines inbetween blocks can be ignored.
* - a line with a label, but without instruction is considered to have a "NOP" instruction
* - lines with the instruction "^^^" (possibly surrounded by whitespace) are ignored.
*
* When a sequence of two adjacent NOPs is encountered, for which the second NOP has no label,
* the two NOPs will be replaced by a single RJMP instruction.
*/
Blocks parse_code( std::istream &input)
{
// an instruction consists of an optional label, followed by whitespace, followed by the instruction.
static const boost::regex instruction_pattern("^([^\\s]*)\\s+(.*)$");
static const boost::regex rjmp("RJMP\\s+(.*)");
static const boost::regex continuation("\\s*\\^\\^\\^\\s*");
bool in_block = false; // whether we've seen the first label of a block
std::string buffer;
Blocks result;
Instructions current_block;
while (getline( input, buffer))
{
boost::smatch match;
regex_match( buffer, match, instruction_pattern);
in_block = in_block || match[1].length() != 0;
if (in_block)
{
string label = match[1];
string ins = match[2];
if (ins.empty()) ins = "NOP";
if (!regex_match( ins, continuation))
{
current_block.push_back( {label, ins});
}
if (regex_match( ins, rjmp))
{
optimize_nops( current_block, result);
in_block = false;
current_block.clear();
}
}
}
if (!current_block.empty())
{
optimize_nops( current_block, result);
}
return result;
}
/**
* print a single block of instructions.
*/
void print_block( const Block &block, std::ostream &out)
{
for (const auto &ins: block.code)
{
string label = ins.label.empty()?"":ins.label + ":";
out << '"' << label << string( 8-label.size(), ' ') << ins.instruction << string(40 - ins.instruction.size(), ' ') << "\\n\"" << '\n';
}
}
/**
* print all blocks in a vector of blocks, in the order in which they appear in the vector.
*/
void print_blocks( const Blocks &blocks, std::ostream &out)
{
for (const auto &b: blocks)
{
print_block( b, out);
}
}
void add_block_at_optimum( Blocks &blocks, Block block)
{
auto optimum = std::numeric_limits<int>::max();
Blocks result;
for (auto it = blocks.begin(); it != blocks.end();++it)
{
Blocks candidate( blocks.begin(), it);
candidate.push_back( block);
candidate.insert( candidate.end(), it, blocks.end());
auto fit = calculate_fit( candidate);
if (fit < optimum)
{
optimum = fit;
std::swap( result, candidate);
}
}
blocks.push_back( block);
if (calculate_fit( blocks) >= optimum)
{
std::swap( blocks, result);
}
}
/**
* Heuristic optimization of blocks. Just add the blocks one-by-one to a new list of blocks.
* This is not guaranteed to find the optimum arrangement. The result depends on the order in which
* the blocks are offered.
*/
Blocks optimize_blocks( Blocks blocks)
{
Blocks result;
for (const auto &block: blocks)
{
add_block_at_optimum( result, block);
}
return result;
}
/**
* Heuristic optimization of blocks.
* Retry the optimize_blocks() function with blocks in random order and return if a fitting block sequence is found or
* if the iteration count is exceeded.
*/
Blocks optimize_blocks( Blocks blocks, unsigned int iteration_count)
{
std::random_device rd;
std::mt19937 g(rd());
Blocks best_blocks;
auto fit_found = false;
while (!fit_found && iteration_count--)
{
vector<Block> temp( blocks.begin(), blocks.end());
std::shuffle( temp.begin(), temp.end(), g);
blocks.assign( temp.begin(), temp.end());
best_blocks = optimize_blocks( blocks);
fit_found = is_fit( best_blocks);
}
return best_blocks;
}
/**
* Read a list of assembly instructions and try to re-order those instructions so that the maximum jump distance
* is not higher than the maximum allowed jump distance, preserving the semantics of the program.
* Additionally, empty statements within blocks are translated to NOP instructions and adjacent NOPs are
* translated into RJMP instructions to save space.
*
* assembly instructions will be read from stdin, or if a file name is given from that file name.
* output is to standard out.
*/
int main( int argc, char *argv[])
{
using std::cout;
using std::cin;
Blocks blocks;
if (argc >1)
{
std::ifstream inputfile(argv[1]);
blocks = parse_code( inputfile);
}
else
{
blocks = parse_code( cin);
}
cout << blocks.size() << " blocks in this code\n";
// try to find an optimum block sequence or give up if we don't succeed after 10000 iterations.
Blocks best_blocks = optimize_blocks( blocks, 10000);
print_blocks( best_blocks, cout);
if (is_fit(best_blocks))
{
cout << "\nSuccess, longest jump distance is:" << calculate_fit( best_blocks) << "\n";
}
else
{
cout << "\nNothing longest jump distance is:" << calculate_fit( best_blocks) << "\n";
}
return 0;
}