Actions

Difference between revisions of "Arrange timed code.cpp"

From Just in Time

m
m
 
(2 intermediate revisions by the same user not shown)
Line 1: Line 1:
 +
This is a small C++ program that will read AVR assembly instructions from a file. It will then chop the list of assembly instructions into blocks at each unconditional jump (RJMP) that is found. Pairs of adjacent nops will also be translated into RJMPs, which also are used to cut up the sources in smaller blocks. Then the program will try to arrange the blocks in such a way that all conditional branches (BRxx instructions) are close enough to their branch target. Close enough means within 64 instructions, which is the maximum jump that a conditional branch can make.
 +
 
<syntaxhighlight lang="cpp">
 
<syntaxhighlight lang="cpp">
 
//
 
//
// Copyright (c) 2013 Danny Havenith
+
// Copyright (c) 2013,2014 Danny Havenith
 
//
 
//
 
// Distributed under the Boost Software License, Version 1.0. (See accompanying
 
// Distributed under the Boost Software License, Version 1.0. (See accompanying
Line 8: Line 10:
  
 
#include <iostream>
 
#include <iostream>
 +
#include <fstream>
 
#include <string>
 
#include <string>
 
#include <map>
 
#include <map>
 
#include <vector>
 
#include <vector>
 +
#include <list>
 
#include <algorithm>
 
#include <algorithm>
 
#include <limits>
 
#include <limits>
#include <boost/foreach.hpp>
+
#include <boost/regex.hpp>
 +
 
 +
using std::vector;
 +
using std::string;
 +
using std::list;
 +
using std::map;
  
using namespace std;
 
 
/// Simple struct that can be used as an input to describe blocks of assembly instructions with jumps and labels
 
/// Simple struct that can be used as an input to describe blocks of assembly instructions with jumps and labels
/// one instance of this struct is created for every instruction word in the assembly code. The member "label" refers to the
+
struct Instruction {
/// label at that location, if one is defined and the member "jumpto" has the name of the label to which this instruction jumps, if it is
 
/// a branch or jump instruction.
 
struct labelinfo {
 
 
     string label;
 
     string label;
     string jumpto;
+
     string instruction;
 
};
 
};
 +
using Instructions = vector<Instruction>;
 +
  
 
/// a contiguous block of assembly instructions.
 
/// a contiguous block of assembly instructions.
Line 29: Line 36:
 
/// all relative jumps that emanate from this block and all labels that
 
/// all relative jumps that emanate from this block and all labels that
 
/// are defined in this block.
 
/// are defined in this block.
struct block
+
class Block
 
{
 
{
    // a map from label strings to relative positions
+
public:
    // for labels this is the relative position of the label
+
Block( const Instructions &code)
    // for jumps this is the position of the jump instruction that jumps to that label.
+
:code(code)
     typedef map<string, int> labelmap;
+
{
    int      size;
+
update_labels();
 +
}
 +
 
 +
 
 +
     typedef map<string, int> LabelMap;
 
     string  name;
 
     string  name;
     labelmap labels;
+
     LabelMap labels;
     labelmap jumps;
+
     LabelMap jumps;
 +
    Instructions code;
 +
 
 +
private:
 +
void update_labels();
  
    bool operator<( const block &other) const
 
    {
 
        return name < other.name;
 
    }
 
 
};
 
};
  
/// Parse labelinfo structs and create disjunct blocks.
+
using Blocks = list<Block>;
/// This function will traverse the range of labelinfos and create blocks on-the-go.
+
 
/// The last statement of a block is recognized by the label "END". Whenever this label is encountered all
+
void Block::update_labels()
/// statements so far will be combined in one block and this block will be added to the result.
 
vector<block> parse_blocks( labelinfo *begin, labelinfo *end)
 
 
{
 
{
    vector<block> result;
+
static const boost::regex branch("BR..\\s+(\\S+)\\s*");
 
 
 
     int current_line = 0;
 
     int current_line = 0;
     block current_block;
+
     for( const auto &ins: code)
    while (begin != end)
 
 
     {
 
     {
        if (begin->label == "END")
+
if (!ins.label.empty())
        {
+
{
            current_block.size = current_line + 1;
+
if (current_line == 0) name = ins.label;
            result.push_back( current_block);
+
labels[ins.label] = current_line;
            current_line = 0;
+
}
            current_block = block();
+
 
        }
+
boost::smatch match;
        else
+
if (regex_match( ins.instruction, match, branch))
        {
+
{
            if (!begin->label.empty())
+
jumps[ match[1]] = current_line;
            {
+
}
                if (current_line == 0) current_block.name = begin->label;
+
++current_line;
                current_block.labels[begin->label] = current_line;
 
            }
 
            if (!begin->jumpto.empty())
 
            {
 
                current_block.jumps[begin->jumpto] = current_line;
 
            }
 
            ++current_line;
 
        }
 
        ++begin;
 
 
     }
 
     }
    return result;
 
 
}
 
}
  
 
/// Given a sequence of blocks, determine the absolute addresses of the labels that
 
/// Given a sequence of blocks, determine the absolute addresses of the labels that
 
/// appear in each block.
 
/// appear in each block.
block::labelmap map_labels( const vector<block>& blocks)
+
Block::LabelMap map_labels( const Blocks& blocks)
 
{
 
{
     block::labelmap label_addresses;
+
     Block::LabelMap label_addresses;
  
 
     // determine absolute labels
 
     // determine absolute labels
 
     int current_block_startaddress = 0;
 
     int current_block_startaddress = 0;
     BOOST_FOREACH( const block &b, blocks)
+
     for( const auto &b: blocks)
 
     {
 
     {
         BOOST_FOREACH( const block::labelmap::value_type &label, b.labels)
+
         for( const auto &label: b.labels)
 
         {
 
         {
 
             label_addresses[label.first] = label.second
 
             label_addresses[label.first] = label.second
 
                     + current_block_startaddress;
 
                     + current_block_startaddress;
 
         }
 
         }
         current_block_startaddress += b.size;
+
         current_block_startaddress += b.code.size();
 
     }
 
     }
  
Line 109: Line 106:
 
/// This function is faster than the calculate_fit() function because it can terminate immediately if it finds a jump
 
/// This function is faster than the calculate_fit() function because it can terminate immediately if it finds a jump
 
/// that does not qualify.
 
/// that does not qualify.
bool is_fit( const vector<block> &blocks)
+
bool is_fit( const Blocks &blocks)
 
{
 
{
     block::labelmap label_addresses = map_labels(blocks);
+
     Block::LabelMap label_addresses = map_labels(blocks);
  
 
     // see if all jumps are in range.
 
     // see if all jumps are in range.
 
     int current_block_startaddress = 0;
 
     int current_block_startaddress = 0;
     BOOST_FOREACH( const block &b, blocks)
+
     for( const auto &b: blocks)
 
     {
 
     {
         BOOST_FOREACH( const block::labelmap::value_type &jump, b.jumps)
+
         for( const auto &jump: b.jumps)
 
         {
 
         {
 
             int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
 
             int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
 
             if (offset > 64 || offset < -63) return false;
 
             if (offset > 64 || offset < -63) return false;
 
         }
 
         }
         current_block_startaddress += b.size;
+
         current_block_startaddress += b.code.size();
 
     }
 
     }
  
Line 128: Line 125:
 
}
 
}
  
/// This is largely a copy of the is_fit() function with some changes. Instead of returning a fit/not-fit boolean,
+
/**
/// this function returns a score that is calculated as the maximum (absolute) jump distance.
+
*
/// This function is slightly less efficient than that is_fit() function, because it needss to complete the
+
* This is largely a copy of the is_fit() function with some changes. Instead of returning a fit/not-fit boolean,
/// calculation, even if the score is higher than the fitness criterium.
+
* this function returns a score that is calculated as the maximum (absolute) jump distance.
int calculate_fit( const vector<block> &blocks)
+
* This function is slightly less efficient than that is_fit() function, because it needss to complete the
 +
* calculation, even if the score is higher than the fitness criterium.
 +
*
 +
*/
 +
int calculate_fit( const Blocks &blocks)
 
{
 
{
     block::labelmap label_addresses = map_labels( blocks);
+
     Block::LabelMap label_addresses = map_labels( blocks);
  
 
     // calculate the maximum jump offset in this sequence of blocks.
 
     // calculate the maximum jump offset in this sequence of blocks.
 
     int max_offset = 0;
 
     int max_offset = 0;
 
     int current_block_startaddress = 0;
 
     int current_block_startaddress = 0;
     BOOST_FOREACH( const block &b, blocks)
+
     for( const auto &b: blocks)
 
     {
 
     {
         BOOST_FOREACH( const block::labelmap::value_type &jump, b.jumps)
+
         for ( const auto &jump: b.jumps)
 
         {
 
         {
 
             int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
 
             int offset = label_addresses[jump.first] - (jump.second + current_block_startaddress);
Line 147: Line 148:
 
             if (-offset > max_offset) max_offset = -offset;
 
             if (-offset > max_offset) max_offset = -offset;
 
         }
 
         }
         current_block_startaddress += b.size;
+
         current_block_startaddress += b.code.size();
 
     }
 
     }
  
Line 153: Line 154:
 
}
 
}
  
/// Description of the code blocks. This was created with some spreadsheet magic from the original code spreadsheet
+
/**
/// because I was too lazy to implement input text parsing.
+
* Replace adjacent NOPs with single RJMP instructions.
labelinfo labels[] = {
+
* The replacement is only performed if the second NOP has no label.
{ "M2", ""},
+
*/
{ "", ""},
+
void optimize_nops( Instructions code, Blocks &results)
{ "", ""},
+
{
{ "", ""},
+
static int label_counter = 0;
{ "", ""},
+
auto position = code.begin();
{ "", ""},
+
auto previous_position = position;
{ "", "H1"},
+
auto both_are_nop = [](const Instruction &left, const Instruction &right){
{ "", ""},
+
return left.instruction == "NOP" && right.instruction == "NOP" && right.label.empty();
{ "", "L16"},
+
};
{ "", ""},
+
 
{ "L9", ""},
+
//Instructions first_instructions;
{ "", ""},
+
// find adjacent NOPs and break up the block, using RJMP to jump over the second NOP
{ "L5", ""},
+
position = std::adjacent_find( previous_position, code.end(), both_are_nop);
{ "", ""},
+
while ( code.end() != position)
{ "", "L1"},
+
{
{ "", ""},
+
Instructions first_instructions;
{ "", "M1"},
+
first_instructions.insert( first_instructions.end(), previous_position, position);
{ "", ""},
+
previous_position = position+2;
{ "", ""},
+
if (previous_position->label.empty()) previous_position->label = "brk" + std::to_string( label_counter++);
{ "", ""},
+
first_instructions.push_back({position->label, "RJMP " + previous_position->label});
{ "", ""},
+
results.push_back( Block(first_instructions));
{ "", ""},
+
position = std::adjacent_find( previous_position, code.end(), both_are_nop);
{ "", ""},
+
}
{ "", ""},
+
 
{ "", ""},
+
Instructions first_instructions;
{ "", "L4"},
+
first_instructions.insert( first_instructions.end(), previous_position, position);
{ "", ""},
+
results.push_back( Block(first_instructions));
{ "L12", ""},
+
}
{ "", ""},
+
 
{ "", ""},
+
 
{ "END", ""},
+
 
{ "L1", ""},
+
/**
{ "", "M2"},
+
* Read assembly instructions from a text file.
{ "", ""},
+
* This function assumes the following:
{ "", ""},
+
*    - each line consists of an optional label, followed by (non-optional) whitespace, followed by an instruction
{ "", ""},
+
*    - the text consists of blocks, each block starts with a line that has a label and ends with an RJMP instruction
{ "", ""},
+
*    - the text lines inbetween blocks can be ignored.
{ "", ""},
+
*    - a line with a label, but without instruction is considered to have a "NOP" instruction
{ "", "L8"},
+
*    - lines with the instruction "^^^" (possibly surrounded by whitespace) are ignored.
{ "", ""},
+
*
{ "L15", ""},
+
*    When a sequence of two adjacent NOPs is encountered, for which the second NOP has no label,
{ "", ""},
+
*    the two NOPs will be replaced by a single RJMP instruction.
{ "", ""},
+
*/
{ "END", ""},
+
Blocks parse_code( std::istream &input)
{ "L10", ""},
+
{
{ "", "M3"},
+
// an instruction consists of an optional label, followed by whitespace, followed by the instruction.
{ "", ""},
+
static const boost::regex instruction_pattern("^([^\\s]*)\\s+(.*)$");
{ "", ""},
+
static const boost::regex rjmp("RJMP\\s+(.*)");
{ "", ""},
+
static const boost::regex continuation("\\s*\\^\\^\\^\\s*");
{ "", ""},
+
 
{ "", ""},
+
bool in_block = false; // whether we've seen the first label of a block
{ "", ""},
+
std::string buffer;
{ "", "L12"},
+
Blocks result;
{ "", ""},
+
Instructions current_block;
{ "L4", ""},
+
while (getline( input, buffer))
{ "", ""},
+
{
{ "", ""},
+
boost::smatch match;
{ "END", ""},
+
regex_match( buffer, match, instruction_pattern);
{ "M4", ""},
+
 
{ "", ""},
+
in_block = in_block || match[1].length() != 0;
{ "", ""},
+
if (in_block)
{ "", ""},
+
{
{ "", ""},
+
string label = match[1];
{ "", ""},
+
string ins = match[2];
{ "", "H1"},
+
if (ins.empty()) ins = "NOP";
{ "", ""},
+
 
{ "", "L9"},
+
if (!regex_match( ins, continuation))
{ "", ""},
+
{
{ "L16", ""},
+
current_block.push_back( {label, ins});
{ "", ""},
+
}
{ "L13", ""},
+
 
{ "", ""},
+
if (regex_match( ins, rjmp))
{ "", ""},
+
{
{ "", "L10"},
+
optimize_nops( current_block, result);
{ "", ""},
+
in_block = false;
{ "", "M4"},
+
current_block.clear();
{ "", ""},
+
}
{ "", ""},
+
}
{ "", ""},
+
}
{ "", ""},
+
 
{ "", ""},
+
if (!current_block.empty())
{ "", "L15"},
+
{
{ "", ""},
+
optimize_nops( current_block, result);
{ "L8", ""},
+
}
{ "", ""},
+
 
{ "", ""},
+
return result;
{ "END", ""},
+
}
{ "M1", ""},
+
 
{ "", ""},
+
/**
{ "", ""},
+
* print a single block of instructions.
{ "", ""},
+
*/
{ "", ""},
+
void print_block( const Block &block, std::ostream &out)
{ "", ""},
+
{
{ "", ""},
+
for (const auto &ins: block.code)
{ "", "H2"},
+
{
{ "", ""},
+
string label = ins.label.empty()?"":ins.label + ":";
{ "", "L9"},
+
out << '"' << label << string( 8-label.size(), ' ') << ins.instruction << string(40 - ins.instruction.size(), ' ') << "\\n\"" << '\n';
{ "", ""},
+
}
{ "END", ""},
+
}
{ "M3", ""},
+
 
{ "", ""},
+
/**
{ "", ""},
+
* print all blocks in a vector of blocks, in the order in which they appear in the vector.
{ "", ""},
+
*/
{ "", ""},
+
void print_blocks( const Blocks &blocks, std::ostream &out)
{ "", "H2"},
+
{
{ "", ""},
+
for (const auto &b: blocks)
{ "", "L16"},
+
{
{ "", ""},
+
print_block( b, out);
{ "END", ""},
+
}
{ "H1", ""},
+
}
{ "", ""},
+
 
{ "", ""},
+
void add_block_at_optimum( Blocks &blocks, Block block)
{ "", ""},
+
{
{ "", ""},
+
auto optimum = std::numeric_limits<int>::max();
{ "END", ""},
+
Blocks result;
{ "H2", ""},
+
 
{ "", ""},
+
for (auto it = blocks.begin(); it != blocks.end();++it)
{ "", ""},
+
{
{ "", ""},
+
Blocks candidate( blocks.begin(), it);