]> git.saurik.com Git - apple/javascriptcore.git/blobdiff - assembler/ARMv7Assembler.h
JavaScriptCore-721.26.tar.gz
[apple/javascriptcore.git] / assembler / ARMv7Assembler.h
index 7cf88736c74923c93ba15a727a5a92a2437a74ec..13ad3e0d3259d83e4147a139082734bc62b376f4 100644 (file)
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,9 +27,7 @@
 #ifndef ARMAssembler_h
 #define ARMAssembler_h
 
-#include <wtf/Platform.h>
-
-#if ENABLE(ASSEMBLER) && PLATFORM_ARM_ARCH(7)
+#if ENABLE(ASSEMBLER) && CPU(ARM_THUMB2)
 
 #include "AssemblerBuffer.h"
 #include <wtf/Assertions.h>
@@ -37,7 +36,7 @@
 
 namespace JSC {
 
-namespace ARM {
+namespace ARMRegisters {
     typedef enum {
         r0,
         r1,
@@ -57,107 +56,122 @@ namespace ARM {
         r15, pc = r15,
     } RegisterID;
 
-    // s0 == d0 == q0
-    // s4 == d2 == q1
-    // etc
     typedef enum {
-        s0 = 0,
-        s1 = 1,
-        s2 = 2,
-        s3 = 3,
-        s4 = 4,
-        s5 = 5,
-        s6 = 6,
-        s7 = 7,
-        s8 = 8,
-        s9 = 9,
-        s10 = 10,
-        s11 = 11,
-        s12 = 12,
-        s13 = 13,
-        s14 = 14,
-        s15 = 15,
-        s16 = 16,
-        s17 = 17,
-        s18 = 18,
-        s19 = 19,
-        s20 = 20,
-        s21 = 21,
-        s22 = 22,
-        s23 = 23,
-        s24 = 24,
-        s25 = 25,
-        s26 = 26,
-        s27 = 27,
-        s28 = 28,
-        s29 = 29,
-        s30 = 30,
-        s31 = 31,
-        d0 = 0 << 1,
-        d1 = 1 << 1,
-        d2 = 2 << 1,
-        d3 = 3 << 1,
-        d4 = 4 << 1,
-        d5 = 5 << 1,
-        d6 = 6 << 1,
-        d7 = 7 << 1,
-        d8 = 8 << 1,
-        d9 = 9 << 1,
-        d10 = 10 << 1,
-        d11 = 11 << 1,
-        d12 = 12 << 1,
-        d13 = 13 << 1,
-        d14 = 14 << 1,
-        d15 = 15 << 1,
-        d16 = 16 << 1,
-        d17 = 17 << 1,
-        d18 = 18 << 1,
-        d19 = 19 << 1,
-        d20 = 20 << 1,
-        d21 = 21 << 1,
-        d22 = 22 << 1,
-        d23 = 23 << 1,
-        d24 = 24 << 1,
-        d25 = 25 << 1,
-        d26 = 26 << 1,
-        d27 = 27 << 1,
-        d28 = 28 << 1,
-        d29 = 29 << 1,
-        d30 = 30 << 1,
-        d31 = 31 << 1,
-        q0 = 0 << 2,
-        q1 = 1 << 2,
-        q2 = 2 << 2,
-        q3 = 3 << 2,
-        q4 = 4 << 2,
-        q5 = 5 << 2,
-        q6 = 6 << 2,
-        q7 = 7 << 2,
-        q8 = 8 << 2,
-        q9 = 9 << 2,
-        q10 = 10 << 2,
-        q11 = 11 << 2,
-        q12 = 12 << 2,
-        q13 = 13 << 2,
-        q14 = 14 << 2,
-        q15 = 15 << 2,
-        q16 = 16 << 2,
-        q17 = 17 << 2,
-        q18 = 18 << 2,
-        q19 = 19 << 2,
-        q20 = 20 << 2,
-        q21 = 21 << 2,
-        q22 = 22 << 2,
-        q23 = 23 << 2,
-        q24 = 24 << 2,
-        q25 = 25 << 2,
-        q26 = 26 << 2,
-        q27 = 27 << 2,
-        q28 = 28 << 2,
-        q29 = 29 << 2,
-        q30 = 30 << 2,
-        q31 = 31 << 2,
-    } FPRegisterID;
+        s0,
+        s1,
+        s2,
+        s3,
+        s4,
+        s5,
+        s6,
+        s7,
+        s8,
+        s9,
+        s10,
+        s11,
+        s12,
+        s13,
+        s14,
+        s15,
+        s16,
+        s17,
+        s18,
+        s19,
+        s20,
+        s21,
+        s22,
+        s23,
+        s24,
+        s25,
+        s26,
+        s27,
+        s28,
+        s29,
+        s30,
+        s31,
+    } FPSingleRegisterID;
+
+    typedef enum {
+        d0,
+        d1,
+        d2,
+        d3,
+        d4,
+        d5,
+        d6,
+        d7,
+        d8,
+        d9,
+        d10,
+        d11,
+        d12,
+        d13,
+        d14,
+        d15,
+        d16,
+        d17,
+        d18,
+        d19,
+        d20,
+        d21,
+        d22,
+        d23,
+        d24,
+        d25,
+        d26,
+        d27,
+        d28,
+        d29,
+        d30,
+        d31,
+    } FPDoubleRegisterID;
+
+    typedef enum {
+        q0,
+        q1,
+        q2,
+        q3,
+        q4,
+        q5,
+        q6,
+        q7,
+        q8,
+        q9,
+        q10,
+        q11,
+        q12,
+        q13,
+        q14,
+        q15,
+        q16,
+        q17,
+        q18,
+        q19,
+        q20,
+        q21,
+        q22,
+        q23,
+        q24,
+        q25,
+        q26,
+        q27,
+        q28,
+        q29,
+        q30,
+        q31,
+    } FPQuadRegisterID;
+
+    inline FPSingleRegisterID asSingle(FPDoubleRegisterID reg)
+    {
+        ASSERT(reg < d16);
+        return (FPSingleRegisterID)(reg << 1);
+    }
+
+    inline FPDoubleRegisterID asDouble(FPSingleRegisterID reg)
+    {
+        ASSERT(!(reg & 1));
+        return (FPDoubleRegisterID)(reg >> 1);
+    }
 }
 
 class ARMv7Assembler;
@@ -199,12 +213,12 @@ class ARMThumbImmediate {
         };
     } PatternBytes;
 
-    ALWAYS_INLINE static int32_t countLeadingZerosPartial(uint32_t& value, int32_t& zeros, const int N)
+    ALWAYS_INLINE static void countLeadingZerosPartial(uint32_t& value, int32_t& zeros, const int N)
     {
-        if (value & ~((1<<N)-1)) /* check for any of the top N bits (of 2N bits) are set */ \
-            value >>= N;         /* if any were set, lose the bottom N */ \
-        else                     /* if none of the top N bits are set, */ \
-            zeros += N;          /* then we have identified N leading zeros */
+        if (value & ~((1 << N) - 1)) /* check for any of the top N bits (of 2N bits) are set */
+            value >>= N;             /* if any were set, lose the bottom N */
+        else                         /* if none of the top N bits are set, */
+            zeros += N;              /* then we have identified N leading zeros */
     }
 
     static int32_t countLeadingZeros(uint32_t value)
@@ -236,6 +250,11 @@ class ARMThumbImmediate {
     ARMThumbImmediate(ThumbImmediateType type, uint16_t value)
         : m_type(TypeUInt16)
     {
+        // Make sure this constructor is only reached with type TypeUInt16;
+        // this extra parameter makes the code a little clearer by making it
+        // explicit at call sites which type is being constructed
+        ASSERT_UNUSED(type, type == TypeUInt16);
+
         m_value.asInt = value;
     }
 
@@ -285,7 +304,7 @@ public:
         }
 
         if ((bytes.byte1 == bytes.byte3) && !(bytes.byte0 | bytes.byte2)) {
-            encoding.immediate = bytes.byte0;
+            encoding.immediate = bytes.byte1;
             encoding.pattern = 2;
             return ARMThumbImmediate(TypeEncoded, encoding);
         }
@@ -350,6 +369,39 @@ private:
     ThumbImmediateValue m_value;
 };
 
+class VFPImmediate {
+public:
+    VFPImmediate(double d)
+        : m_value(-1)
+    {
+        union {
+            uint64_t i;
+            double d;
+        } u;
+
+        u.d = d;
+
+        int sign = static_cast<int>(u.i >> 63);
+        int exponent = static_cast<int>(u.i >> 52) & 0x7ff;
+        uint64_t mantissa = u.i & 0x000fffffffffffffull;
+
+        if ((exponent >= 0x3fc) && (exponent <= 0x403) && !(mantissa & 0x0000ffffffffffffull))
+            m_value = (sign << 7) | ((exponent & 7) << 4) | (int)(mantissa >> 48);
+    }
+
+    bool isValid()
+    {
+        return m_value != -1;
+    }
+    
+    uint8_t value()
+    {
+        return (uint8_t)m_value;
+    }
+
+private:
+    int m_value;
+};
 
 typedef enum {
     SRType_LSL,
@@ -388,27 +440,22 @@ private:
         };
         struct {
             unsigned type   : 2;
-            unsigned amount : 5;
+            unsigned amount : 6;
         };
     } m_u;
 };
 
-
-/*
-Some features of the Thumb instruction set are deprecated in ARMv7. Deprecated features affecting 
-instructions supported by ARMv7-M are as follows: 
-• use of the PC as <Rd> or <Rm> in a 16-bit ADD (SP plus register) instruction 
-• use of the SP as <Rm> in a 16-bit ADD (SP plus register) instruction 
-• use of the SP as <Rm> in a 16-bit CMP (register) instruction 
-• use of MOV (register) instructions in which <Rd> is the SP or PC and <Rm> is also the SP or PC. 
-• use of <Rn> as the lowest-numbered register in the register list of a 16-bit STM instruction with base 
-register writeback 
-*/
-
 class ARMv7Assembler {
 public:
-    typedef ARM::RegisterID RegisterID;
-    typedef ARM::FPRegisterID FPRegisterID;
+    ~ARMv7Assembler()
+    {
+        ASSERT(m_jumpsToLink.isEmpty());
+    }
+
+    typedef ARMRegisters::RegisterID RegisterID;
+    typedef ARMRegisters::FPSingleRegisterID FPSingleRegisterID;
+    typedef ARMRegisters::FPDoubleRegisterID FPDoubleRegisterID;
+    typedef ARMRegisters::FPQuadRegisterID FPQuadRegisterID;
 
     // (HS, LO, HI, LS) -> (AE, B, A, BE)
     // (VS, VC) -> (O, NO)
@@ -428,14 +475,45 @@ public:
         ConditionGT,
         ConditionLE,
         ConditionAL,
-
+        
         ConditionCS = ConditionHS,
         ConditionCC = ConditionLO,
     } Condition;
 
+    enum JumpType { JumpFixed, JumpNoCondition, JumpCondition, JumpNoConditionFixedSize, JumpConditionFixedSize, JumpTypeCount };
+    enum JumpLinkType { LinkInvalid, LinkJumpT1, LinkJumpT2, LinkJumpT3,
+        LinkJumpT4, LinkConditionalJumpT4, LinkBX, LinkConditionalBX, JumpLinkTypeCount };
+    static const int JumpSizes[JumpLinkTypeCount];
+    static const int JumpPaddingSizes[JumpTypeCount];
+    class LinkRecord {
+    public:
+        LinkRecord(intptr_t from, intptr_t to, JumpType type, Condition condition)
+            : m_from(from)
+            , m_to(to)
+            , m_type(type)
+            , m_linkType(LinkInvalid)
+            , m_condition(condition)
+        {
+        }
+        intptr_t from() const { return m_from; }
+        void setFrom(intptr_t from) { m_from = from; }
+        intptr_t to() const { return m_to; }
+        JumpType type() const { return m_type; }
+        JumpLinkType linkType() const { return m_linkType; }
+        void setLinkType(JumpLinkType linkType) { ASSERT(m_linkType == LinkInvalid); m_linkType = linkType; }
+        Condition condition() const { return m_condition; }
+    private:
+        intptr_t m_from : 31;
+        intptr_t m_to : 31;
+        JumpType m_type : 3;
+        JumpLinkType m_linkType : 4;
+        Condition m_condition : 16;
+    };
+    
     class JmpSrc {
         friend class ARMv7Assembler;
         friend class ARMInstructionFormatter;
+        friend class LinkBuffer;
     public:
         JmpSrc()
             : m_offset(-1)
@@ -443,17 +521,32 @@ public:
         }
 
     private:
-        JmpSrc(int offset)
+        JmpSrc(int offset, JumpType type)
             : m_offset(offset)
+            , m_condition(0xffff)
+            , m_type(type)
         {
+            ASSERT(m_type == JumpFixed || m_type == JumpNoCondition || m_type == JumpNoConditionFixedSize);
+        }
+
+        JmpSrc(int offset, JumpType type, Condition condition)
+            : m_offset(offset)
+            , m_condition(condition)
+            , m_type(type)
+        {
+            ASSERT(m_type == JumpFixed || m_type == JumpCondition || m_type == JumpConditionFixedSize);
         }
 
         int m_offset;
+        Condition m_condition : 16;
+        JumpType m_type : 16;
+        
     };
     
     class JmpDst {
         friend class ARMv7Assembler;
         friend class ARMInstructionFormatter;
+        friend class LinkBuffer;
     public:
         JmpDst()
             : m_offset(-1)
@@ -480,56 +573,19 @@ private:
     // ARMv7, Appx-A.6.3
     bool BadReg(RegisterID reg)
     {
-        return (reg == ARM::sp) || (reg == ARM::pc);
-    }
-
-    bool isSingleRegister(FPRegisterID reg)
-    {
-        // Check that the high bit isn't set (q16+), and that the low bit isn't (s1, s3, etc).
-        return !(reg & ~31);
-    }
-
-    bool isDoubleRegister(FPRegisterID reg)
-    {
-        // Check that the high bit isn't set (q16+), and that the low bit isn't (s1, s3, etc).
-        return !(reg & ~(31 << 1));
-    }
-
-    bool isQuadRegister(FPRegisterID reg)
-    {
-        return !(reg & ~(31 << 2));
-    }
-
-    uint32_t singleRegisterNum(FPRegisterID reg)
-    {
-        ASSERT(isSingleRegister(reg));
-        return reg;
+        return (reg == ARMRegisters::sp) || (reg == ARMRegisters::pc);
     }
 
-    uint32_t doubleRegisterNum(FPRegisterID reg)
+    uint32_t singleRegisterMask(FPSingleRegisterID rdNum, int highBitsShift, int lowBitShift)
     {
-        ASSERT(isDoubleRegister(reg));
-        return reg >> 1;
-    }
-
-    uint32_t quadRegisterNum(FPRegisterID reg)
-    {
-        ASSERT(isQuadRegister(reg));
-        return reg >> 2;
-    }
-
-    uint32_t singleRegisterMask(FPRegisterID rd, int highBitsShift, int lowBitShift)
-    {
-        uint32_t rdNum = singleRegisterNum(rd);
         uint32_t rdMask = (rdNum >> 1) << highBitsShift;
         if (rdNum & 1)
             rdMask |= 1 << lowBitShift;
         return rdMask;
     }
 
-    uint32_t doubleRegisterMask(FPRegisterID rd, int highBitShift, int lowBitsShift)
+    uint32_t doubleRegisterMask(FPDoubleRegisterID rdNum, int highBitShift, int lowBitsShift)
     {
-        uint32_t rdNum = doubleRegisterNum(rd);
         uint32_t rdMask = (rdNum & 0xf) << lowBitsShift;
         if (rdNum & 16)
             rdMask |= 1 << highBitShift;
@@ -538,22 +594,17 @@ private:
 
     typedef enum {
         OP_ADD_reg_T1       = 0x1800,
-        OP_ADD_S_reg_T1     = 0x1800,
         OP_SUB_reg_T1       = 0x1A00,
-        OP_SUB_S_reg_T1     = 0x1A00,
         OP_ADD_imm_T1       = 0x1C00,
-        OP_ADD_S_imm_T1     = 0x1C00,
         OP_SUB_imm_T1       = 0x1E00,
-        OP_SUB_S_imm_T1     = 0x1E00,
         OP_MOV_imm_T1       = 0x2000,
         OP_CMP_imm_T1       = 0x2800,
         OP_ADD_imm_T2       = 0x3000,
-        OP_ADD_S_imm_T2     = 0x3000,
         OP_SUB_imm_T2       = 0x3800,
-        OP_SUB_S_imm_T2     = 0x3800,
         OP_AND_reg_T1       = 0x4000,
         OP_EOR_reg_T1       = 0x4040,
         OP_TST_reg_T1       = 0x4200,
+        OP_RSB_imm_T1       = 0x4240,
         OP_CMP_reg_T1       = 0x4280,
         OP_ORR_reg_T1       = 0x4300,
         OP_MVN_reg_T1       = 0x43C0,
@@ -561,11 +612,13 @@ private:
         OP_MOV_reg_T1       = 0x4600,
         OP_BLX              = 0x4700,
         OP_BX               = 0x4700,
-        OP_LDRH_reg_T1      = 0x5A00,
         OP_STR_reg_T1       = 0x5000,
         OP_LDR_reg_T1       = 0x5800,
+        OP_LDRH_reg_T1      = 0x5A00,
+        OP_LDRB_reg_T1      = 0x5C00,
         OP_STR_imm_T1       = 0x6000,
         OP_LDR_imm_T1       = 0x6800,
+        OP_LDRB_imm_T1      = 0x7800,
         OP_LDRH_imm_T1      = 0x8800,
         OP_STR_imm_T2       = 0x9000,
         OP_LDR_imm_T2       = 0x9800,
@@ -574,12 +627,16 @@ private:
         OP_SUB_SP_imm_T1    = 0xB080,
         OP_BKPT             = 0xBE00,
         OP_IT               = 0xBF00,
+        OP_NOP_T1           = 0xBF00,
     } OpcodeID;
 
     typedef enum {
+        OP_B_T1         = 0xD000,
+        OP_B_T2         = 0xE000,
         OP_AND_reg_T2   = 0xEA00,
         OP_TST_reg_T2   = 0xEA10,
         OP_ORR_reg_T2   = 0xEA40,
+        OP_ORR_S_reg_T2 = 0xEA50,
         OP_ASR_imm_T1   = 0xEA4F,
         OP_LSL_imm_T1   = 0xEA4F,
         OP_LSR_imm_T1   = 0xEA4F,
@@ -591,6 +648,19 @@ private:
         OP_SUB_reg_T2   = 0xEBA0,
         OP_SUB_S_reg_T2 = 0xEBB0,
         OP_CMP_reg_T2   = 0xEBB0,
+        OP_VSTR         = 0xED00,
+        OP_VLDR         = 0xED10,
+        OP_VMOV_StoC    = 0xEE00,
+        OP_VMOV_CtoS    = 0xEE10,
+        OP_VMUL_T2      = 0xEE20,
+        OP_VADD_T2      = 0xEE30,
+        OP_VSUB_T2      = 0xEE30,
+        OP_VDIV         = 0xEE80,
+        OP_VCMP         = 0xEEB0,
+        OP_VCVT_FPIVFP  = 0xEEB0,
+        OP_VMOV_IMM_T2  = 0xEEB0,
+        OP_VMRS         = 0xEEB0,
+        OP_B_T3a        = 0xF000,
         OP_B_T4a        = 0xF000,
         OP_AND_imm_T1   = 0xF000,
         OP_TST_imm      = 0xF010,
@@ -604,16 +674,21 @@ private:
         OP_SUB_imm_T3   = 0xF1A0,
         OP_SUB_S_imm_T3 = 0xF1B0,
         OP_CMP_imm_T2   = 0xF1B0,
+        OP_RSB_imm_T2   = 0xF1C0,
         OP_ADD_imm_T4   = 0xF200,
         OP_MOV_imm_T3   = 0xF240,
         OP_SUB_imm_T4   = 0xF2A0,
         OP_MOVT         = 0xF2C0,
+        OP_NOP_T2a      = 0xF3AF,
+        OP_LDRB_imm_T3  = 0xF810,
+        OP_LDRB_reg_T2  = 0xF810,
         OP_LDRH_reg_T2  = 0xF830,
         OP_LDRH_imm_T3  = 0xF830,
         OP_STR_imm_T4   = 0xF840,
         OP_STR_reg_T2   = 0xF840,
         OP_LDR_imm_T4   = 0xF850,
         OP_LDR_reg_T2   = 0xF850,
+        OP_LDRB_imm_T2  = 0xF890,
         OP_LDRH_imm_T2  = 0xF8B0,
         OP_STR_imm_T3   = 0xF8C0,
         OP_LDR_imm_T3   = 0xF8D0,
@@ -625,6 +700,20 @@ private:
     } OpcodeID1;
 
     typedef enum {
+        OP_VADD_T2b     = 0x0A00,
+        OP_VDIVb        = 0x0A00,
+        OP_VLDRb        = 0x0A00,
+        OP_VMOV_IMM_T2b = 0x0A00,
+        OP_VMUL_T2b     = 0x0A00,
+        OP_VSTRb        = 0x0A00,
+        OP_VMOV_CtoSb   = 0x0A10,
+        OP_VMOV_StoCb   = 0x0A10,
+        OP_VMRSb        = 0x0A10,
+        OP_VCMPb        = 0x0A40,
+        OP_VCVT_FPIVFPb = 0x0A40,
+        OP_VSUB_T2b     = 0x0A40,
+        OP_NOP_T2b      = 0x8000,
+        OP_B_T3b        = 0x8000,
         OP_B_T4b        = 0x9000,
     } OpcodeID2;
 
@@ -661,7 +750,7 @@ private:
             | (ifThenElseConditionBit(condition, inst3if) << 2)
             | (ifThenElseConditionBit(condition, inst4if) << 1)
             | 1;
-        ASSERT((condition != ConditionAL) || (mask & (mask - 1)));
+        ASSERT((condition != ConditionAL) || !(mask & (mask - 1)));
         return (condition << 4) | mask;
     }
     uint8_t ifThenElse(Condition condition, bool inst2if, bool inst3if)
@@ -669,39 +758,38 @@ private:
         int mask = (ifThenElseConditionBit(condition, inst2if) << 3)
             | (ifThenElseConditionBit(condition, inst3if) << 2)
             | 2;
-        ASSERT((condition != ConditionAL) || (mask & (mask - 1)));
+        ASSERT((condition != ConditionAL) || !(mask & (mask - 1)));
         return (condition << 4) | mask;
     }
     uint8_t ifThenElse(Condition condition, bool inst2if)
     {
         int mask = (ifThenElseConditionBit(condition, inst2if) << 3)
             | 4;
-        ASSERT((condition != ConditionAL) || (mask & (mask - 1)));
+        ASSERT((condition != ConditionAL) || !(mask & (mask - 1)));
         return (condition << 4) | mask;
     }
 
     uint8_t ifThenElse(Condition condition)
     {
         int mask = 8;
-        ASSERT((condition != ConditionAL) || (mask & (mask - 1)));
         return (condition << 4) | mask;
     }
 
 public:
-
+    
     void add(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
-        ASSERT((rd != ARM::sp) || (rn == ARM::sp));
-        ASSERT(rd != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isValid());
 
-        if (rn == ARM::sp) {
+        if (rn == ARMRegisters::sp) {
             if (!(rd & 8) && imm.isUInt10()) {
                 m_formatter.oneWordOp5Reg3Imm8(OP_ADD_SP_imm_T1, rd, imm.getUInt10() >> 2);
                 return;
-            } else if ((rd == ARM::sp) && imm.isUInt9()) {
+            } else if ((rd == ARMRegisters::sp) && imm.isUInt9()) {
                 m_formatter.oneWordOp9Imm7(OP_ADD_SP_imm_T2, imm.getUInt9() >> 2);
                 return;
             }
@@ -725,9 +813,9 @@ public:
 
     void add(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
-        ASSERT((rd != ARM::sp) || (rn == ARM::sp));
-        ASSERT(rd != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_ADD_reg_T3, rn, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
@@ -749,17 +837,17 @@ public:
     void add_S(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
-        ASSERT((rd != ARM::sp) || (rn == ARM::sp));
-        ASSERT(rd != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isEncodedImm());
 
         if (!((rd | rn) & 8)) {
             if (imm.isUInt3()) {
-                m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_S_imm_T1, (RegisterID)imm.getUInt3(), rn, rd);
+                m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_imm_T1, (RegisterID)imm.getUInt3(), rn, rd);
                 return;
             } else if ((rd == rn) && imm.isUInt8()) {
-                m_formatter.oneWordOp5Reg3Imm8(OP_ADD_S_imm_T2, rd, imm.getUInt8());
+                m_formatter.oneWordOp5Reg3Imm8(OP_ADD_imm_T2, rd, imm.getUInt8());
                 return;
             }
         }
@@ -770,9 +858,9 @@ public:
     // Not allowed in an IT (if then) block?
     void add_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
-        ASSERT((rd != ARM::sp) || (rn == ARM::sp));
-        ASSERT(rd != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_ADD_S_reg_T3, rn, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
@@ -781,7 +869,7 @@ public:
     void add_S(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (!((rd | rn | rm) & 8))
-            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_S_reg_T1, rm, rn, rd);
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_reg_T1, rm, rn, rd);
         else
             add_S(rd, rn, rm, ShiftTypeAndAmount());
     }
@@ -827,27 +915,33 @@ public:
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_ASR_reg_T2, rn, FourFours(0xf, rd, 0, rm));
     }
-
+    
     // Only allowed in IT (if then) block if last instruction.
-    JmpSrc b()
+    JmpSrc b(JumpType type)
     {
         m_formatter.twoWordOp16Op16(OP_B_T4a, OP_B_T4b);
-        return JmpSrc(m_formatter.size());
+        return JmpSrc(m_formatter.size(), type);
     }
     
     // Only allowed in IT (if then) block if last instruction.
-    JmpSrc blx(RegisterID rm)
+    JmpSrc blx(RegisterID rm, JumpType type)
     {
-        ASSERT(rm != ARM::pc);
+        ASSERT(rm != ARMRegisters::pc);
         m_formatter.oneWordOp8RegReg143(OP_BLX, rm, (RegisterID)8);
-        return JmpSrc(m_formatter.size());
+        return JmpSrc(m_formatter.size(), type);
     }
 
     // Only allowed in IT (if then) block if last instruction.
-    JmpSrc bx(RegisterID rm)
+    JmpSrc bx(RegisterID rm, JumpType type, Condition condition)
     {
         m_formatter.oneWordOp8RegReg143(OP_BX, rm, (RegisterID)0);
-        return JmpSrc(m_formatter.size());
+        return JmpSrc(m_formatter.size(), type, condition);
+    }
+
+    JmpSrc bx(RegisterID rm, JumpType type)
+    {
+        m_formatter.oneWordOp8RegReg143(OP_BX, rm, (RegisterID)0);
+        return JmpSrc(m_formatter.size(), type);
     }
 
     void bkpt(uint8_t imm=0)
@@ -857,7 +951,7 @@ public:
 
     void cmn(RegisterID rn, ARMThumbImmediate imm)
     {
-        ASSERT(rn != ARM::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isEncodedImm());
 
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_CMN_imm, rn, (RegisterID)0xf, imm);
@@ -865,7 +959,7 @@ public:
 
     void cmp(RegisterID rn, ARMThumbImmediate imm)
     {
-        ASSERT(rn != ARM::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isEncodedImm());
 
         if (!(rn & 8) && imm.isUInt8())
@@ -876,7 +970,7 @@ public:
 
     void cmp(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
-        ASSERT(rn != ARM::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_CMP_reg_T2, rn, FourFours(shift.hi4(), 0xf, shift.lo4(), rm));
     }
@@ -938,15 +1032,15 @@ public:
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond, inst2if, inst3if, inst4if));
     }
 
-    // rt == ARM::pc only allowed if last instruction in IT (if then) block.
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
     void ldr(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
-        ASSERT(rn != ARM::pc); // LDR (literal)
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(imm.isUInt12());
 
         if (!((rt | rn) & 8) && imm.isUInt7())
             m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDR_imm_T1, imm.getUInt7() >> 2, rn, rt);
-        else if ((rn == ARM::sp) && !(rt & 8) && imm.isUInt10())
+        else if ((rn == ARMRegisters::sp) && !(rt & 8) && imm.isUInt10())
             m_formatter.oneWordOp5Reg3Imm8(OP_LDR_imm_T2, rt, imm.getUInt10() >> 2);
         else
             m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDR_imm_T3, rn, rt, imm.getUInt12());
@@ -965,8 +1059,8 @@ public:
     // if (wback) REG[rn] = _tmp
     void ldr(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
-        ASSERT(rt != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(index || wback);
         ASSERT(!wback | (rt != rn));
     
@@ -985,10 +1079,10 @@ public:
         m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDR_imm_T4, rn, rt, offset);
     }
 
-    // rt == ARM::pc only allowed if last instruction in IT (if then) block.
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
     void ldr(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
     {
-        ASSERT(rn != ARM::pc); // LDR (literal)
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(!BadReg(rm));
         ASSERT(shift <= 3);
 
@@ -998,10 +1092,10 @@ public:
             m_formatter.twoWordOp12Reg4FourFours(OP_LDR_reg_T2, rn, FourFours(rt, 0, shift, rm));
     }
 
-    // rt == ARM::pc only allowed if last instruction in IT (if then) block.
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
     void ldrh(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
-        ASSERT(rn != ARM::pc); // LDR (literal)
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(imm.isUInt12());
 
         if (!((rt | rn) & 8) && imm.isUInt6())
@@ -1023,8 +1117,8 @@ public:
     // if (wback) REG[rn] = _tmp
     void ldrh(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
-        ASSERT(rt != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(index || wback);
         ASSERT(!wback | (rt != rn));
     
@@ -1046,7 +1140,7 @@ public:
     void ldrh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
     {
         ASSERT(!BadReg(rt));   // Memory hint
-        ASSERT(rn != ARM::pc); // LDRH (literal)
+        ASSERT(rn != ARMRegisters::pc); // LDRH (literal)
         ASSERT(!BadReg(rm));
         ASSERT(shift <= 3);
 
@@ -1056,6 +1150,52 @@ public:
             m_formatter.twoWordOp12Reg4FourFours(OP_LDRH_reg_T2, rn, FourFours(rt, 0, shift, rm));
     }
 
+    void ldrb(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    {
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
+        ASSERT(imm.isUInt12());
+
+        if (!((rt | rn) & 8) && imm.isUInt5())
+            m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDRB_imm_T1, imm.getUInt5(), rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDRB_imm_T2, rn, rt, imm.getUInt12());
+    }
+
+    void ldrb(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(index || wback);
+        ASSERT(!wback | (rt != rn));
+
+        bool add = true;
+        if (offset < 0) {
+            add = false;
+            offset = -offset;
+        }
+
+        ASSERT(!(offset & ~0xff));
+
+        offset |= (wback << 8);
+        offset |= (add   << 9);
+        offset |= (index << 10);
+        offset |= (1 << 11);
+
+        m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDRB_imm_T3, rn, rt, offset);
+    }
+
+    void ldrb(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_LDRB_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_LDRB_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+
     void lsl(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
@@ -1145,6 +1285,12 @@ public:
             mvn(rd, rm, ShiftTypeAndAmount());
     }
 
+    void neg(RegisterID rd, RegisterID rm)
+    {
+        ARMThumbImmediate zero = ARMThumbImmediate::makeUInt12(0);
+        sub(rd, zero, rm);
+    }
+
     void orr(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rd));
@@ -1171,6 +1317,24 @@ public:
             orr(rd, rn, rm, ShiftTypeAndAmount());
     }
 
+    void orr_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    {
+        ASSERT(!BadReg(rd));
+        ASSERT(!BadReg(rn));
+        ASSERT(!BadReg(rm));
+        m_formatter.twoWordOp12Reg4FourFours(OP_ORR_S_reg_T2, rn, FourFours(shift.hi4(), rd, shift.lo4(), rm));
+    }
+
+    void orr_S(RegisterID rd, RegisterID rn, RegisterID rm)
+    {
+        if ((rd == rn) && !((rd | rm) & 8))
+            m_formatter.oneWordOp10Reg3Reg3(OP_ORR_reg_T1, rm, rd);
+        else if ((rd == rm) && !((rd | rn) & 8))
+            m_formatter.oneWordOp10Reg3Reg3(OP_ORR_reg_T1, rn, rd);
+        else
+            orr_S(rd, rn, rm, ShiftTypeAndAmount());
+    }
+
     void ror(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
@@ -1197,16 +1361,16 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_SMULL_T1, rn, FourFours(rdLo, rdHi, 0, rm));
     }
 
-    // rt == ARM::pc only allowed if last instruction in IT (if then) block.
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
     void str(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
-        ASSERT(rt != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isUInt12());
 
         if (!((rt | rn) & 8) && imm.isUInt7())
             m_formatter.oneWordOp5Imm5Reg3Reg3(OP_STR_imm_T1, imm.getUInt7() >> 2, rn, rt);
-        else if ((rn == ARM::sp) && !(rt & 8) && imm.isUInt10())
+        else if ((rn == ARMRegisters::sp) && !(rt & 8) && imm.isUInt10())
             m_formatter.oneWordOp5Reg3Imm8(OP_STR_imm_T2, rt, imm.getUInt10() >> 2);
         else
             m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STR_imm_T3, rn, rt, imm.getUInt12());
@@ -1225,8 +1389,8 @@ public:
     // if (wback) REG[rn] = _tmp
     void str(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
-        ASSERT(rt != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(index || wback);
         ASSERT(!wback | (rt != rn));
     
@@ -1245,10 +1409,10 @@ public:
         m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STR_imm_T4, rn, rt, offset);
     }
 
-    // rt == ARM::pc only allowed if last instruction in IT (if then) block.
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
     void str(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
     {
-        ASSERT(rn != ARM::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
         ASSERT(shift <= 3);
 
@@ -1261,12 +1425,12 @@ public:
     void sub(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
-        ASSERT((rd != ARM::sp) || (rn == ARM::sp));
-        ASSERT(rd != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isValid());
 
-        if ((rn == ARM::sp) && (rd == ARM::sp) && imm.isUInt9()) {
+        if ((rn == ARMRegisters::sp) && (rd == ARMRegisters::sp) && imm.isUInt9()) {
             m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, imm.getUInt9() >> 2);
             return;
         } else if (!((rd | rn) & 8)) {
@@ -1287,11 +1451,24 @@ public:
         }
     }
 
+    void sub(RegisterID rd, ARMThumbImmediate imm, RegisterID rn)
+    {
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isValid());
+        ASSERT(imm.isUInt12());
+
+        if (!((rd | rn) & 8) && !imm.getUInt12())
+            m_formatter.oneWordOp10Reg3Reg3(OP_RSB_imm_T1, rn, rd);
+        else
+            m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_RSB_imm_T2, rn, rd, imm);
+    }
+
     void sub(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
-        ASSERT((rd != ARM::sp) || (rn == ARM::sp));
-        ASSERT(rd != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_SUB_reg_T2, rn, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
@@ -1309,20 +1486,20 @@ public:
     void sub_S(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
-        ASSERT((rd != ARM::sp) || (rn == ARM::sp));
-        ASSERT(rd != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isValid());
 
-        if ((rn == ARM::sp) && (rd == ARM::sp) && imm.isUInt9()) {
+        if ((rn == ARMRegisters::sp) && (rd == ARMRegisters::sp) && imm.isUInt9()) {
             m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, imm.getUInt9() >> 2);
             return;
         } else if (!((rd | rn) & 8)) {
             if (imm.isUInt3()) {
-                m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_S_imm_T1, (RegisterID)imm.getUInt3(), rn, rd);
+                m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_imm_T1, (RegisterID)imm.getUInt3(), rn, rd);
                 return;
             } else if ((rd == rn) && imm.isUInt8()) {
-                m_formatter.oneWordOp5Reg3Imm8(OP_SUB_S_imm_T2, rd, imm.getUInt8());
+                m_formatter.oneWordOp5Reg3Imm8(OP_SUB_imm_T2, rd, imm.getUInt8());
                 return;
             }
         }
@@ -1333,9 +1510,9 @@ public:
     // Not allowed in an IT (if then) block?
     void sub_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
-        ASSERT((rd != ARM::sp) || (rn == ARM::sp));
-        ASSERT(rd != ARM::pc);
-        ASSERT(rn != ARM::pc);
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_SUB_S_reg_T2, rn, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
@@ -1344,7 +1521,7 @@ public:
     void sub_S(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (!((rd | rn | rm) & 8))
-            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_S_reg_T1, rm, rn, rd);
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_reg_T1, rm, rn, rd);
         else
             sub_S(rd, rn, rm, ShiftTypeAndAmount());
     }
@@ -1372,62 +1549,75 @@ public:
             m_formatter.oneWordOp10Reg3Reg3(OP_TST_reg_T1, rm, rn);
     }
 
-    void vadd_F64(FPRegisterID rd, FPRegisterID rn, FPRegisterID rm)
+    void vadd_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VADD_T2, OP_VADD_T2b, true, rn, rd, rm);
+    }
+
+    void vcmp_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
     {
-        m_formatter.vfpOp(0x0b00ee30 | doubleRegisterMask(rd, 6, 28) | doubleRegisterMask(rn, 23, 0) | doubleRegisterMask(rm, 21, 16));
+        m_formatter.vfpOp(OP_VCMP, OP_VCMPb, true, VFPOperand(4), rd, rm);
     }
 
-    void vcmp_F64(FPRegisterID rd, FPRegisterID rm)
+    void vcmpz_F64(FPDoubleRegisterID rd)
     {
-        m_formatter.vfpOp(0x0bc0eeb4 | doubleRegisterMask(rd, 6, 28) | doubleRegisterMask(rm, 21, 16));
+        m_formatter.vfpOp(OP_VCMP, OP_VCMPb, true, VFPOperand(5), rd, VFPOperand(0));
     }
 
-    void vcvt_F64_S32(FPRegisterID fd, FPRegisterID sm)
+    void vcvt_F64_S32(FPDoubleRegisterID rd, FPSingleRegisterID rm)
     {
-        m_formatter.vfpOp(0x0bc0eeb8 | doubleRegisterMask(fd, 6, 28) | singleRegisterMask(sm, 16, 21));
+        // boolean values are 64bit (toInt, unsigned, roundZero)
+        m_formatter.vfpOp(OP_VCVT_FPIVFP, OP_VCVT_FPIVFPb, true, vcvtOp(false, false, false), rd, rm);
     }
 
-    void vcvt_S32_F64(FPRegisterID sd, FPRegisterID fm)
+    void vcvtr_S32_F64(FPSingleRegisterID rd, FPDoubleRegisterID rm)
     {
-        m_formatter.vfpOp(0x0bc0eebd | singleRegisterMask(sd, 28, 6) | doubleRegisterMask(fm, 21, 16));
+        // boolean values are 64bit (toInt, unsigned, roundZero)
+        m_formatter.vfpOp(OP_VCVT_FPIVFP, OP_VCVT_FPIVFPb, true, vcvtOp(true, false, true), rd, rm);
     }
 
-    void vldr(FPRegisterID rd, RegisterID rn, int32_t imm)
+    void vdiv_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
     {
-        vmem(rd, rn, imm, true);
+        m_formatter.vfpOp(OP_VDIV, OP_VDIVb, true, rn, rd, rm);
     }
 
-    void vmov(RegisterID rd, FPRegisterID sn)
+    void vldr(FPDoubleRegisterID rd, RegisterID rn, int32_t imm)
     {
-        m_formatter.vfpOp(0x0a10ee10 | (rd << 28) | singleRegisterMask(sn, 0, 23));
+        m_formatter.vfpMemOp(OP_VLDR, OP_VLDRb, true, rn, rd, imm);
     }
 
-    void vmov(FPRegisterID sn, RegisterID rd)
+    void vmov(RegisterID rd, FPSingleRegisterID rn)
     {
-        m_formatter.vfpOp(0x0a10ee00 | (rd << 28) | singleRegisterMask(sn, 0, 23));
+        ASSERT(!BadReg(rd));
+        m_formatter.vfpOp(OP_VMOV_CtoS, OP_VMOV_CtoSb, false, rn, rd, VFPOperand(0));
     }
 
-    // move FPSCR flags to APSR.
-    void vmrs_APSR_nzcv_FPSCR()
+    void vmov(FPSingleRegisterID rd, RegisterID rn)
     {
-        m_formatter.vfpOp(0xfa10eef1);
+        ASSERT(!BadReg(rn));
+        m_formatter.vfpOp(OP_VMOV_StoC, OP_VMOV_StoCb, false, rd, rn, VFPOperand(0));
     }
 
-    void vmul_F64(FPRegisterID rd, FPRegisterID rn, FPRegisterID rm)
+    void vmrs(RegisterID reg = ARMRegisters::pc)
     {
-        m_formatter.vfpOp(0x0b00ee20 | doubleRegisterMask(rd, 6, 28) | doubleRegisterMask(rn, 23, 0) | doubleRegisterMask(rm, 21, 16));
+        ASSERT(reg != ARMRegisters::sp);
+        m_formatter.vfpOp(OP_VMRS, OP_VMRSb, false, VFPOperand(1), VFPOperand(0x10 | reg), VFPOperand(0));
     }
 
-    void vstr(FPRegisterID rd, RegisterID rn, int32_t imm)
+    void vmul_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
     {
-        vmem(rd, rn, imm, false);
+        m_formatter.vfpOp(OP_VMUL_T2, OP_VMUL_T2b, true, rn, rd, rm);
     }
 
-    void vsub_F64(FPRegisterID rd, FPRegisterID rn, FPRegisterID rm)
+    void vstr(FPDoubleRegisterID rd, RegisterID rn, int32_t imm)
     {
-        m_formatter.vfpOp(0x0b40ee30 | doubleRegisterMask(rd, 6, 28) | doubleRegisterMask(rn, 23, 0) | doubleRegisterMask(rm, 21, 16));
+        m_formatter.vfpMemOp(OP_VSTR, OP_VSTRb, true, rn, rd, imm);
     }
 
+    void vsub_F64(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VSUB_T2, OP_VSUB_T2b, true, rn, rd, rm);
+    }
 
     JmpDst label()
     {
@@ -1470,6 +1660,15 @@ public:
     {
         return dst.m_offset - src.m_offset;
     }
+
+    int executableOffsetFor(int location)
+    {
+        if (!location)
+            return 0;
+        return static_cast<int32_t*>(m_formatter.data())[location / sizeof(int32_t) - 1];
+    }
+    
+    int jumpSizeDelta(JumpType jumpType, JumpLinkType jumpLinkType) { return JumpPaddingSizes[jumpType] - JumpSizes[jumpLinkType]; }
     
     // Assembler admin methods:
 
@@ -1478,13 +1677,125 @@ public:
         return m_formatter.size();
     }
 
-    void* executableCopy(ExecutablePool* allocator)
+    static bool linkRecordSourceComparator(const LinkRecord& a, const LinkRecord& b)
     {
-        void* copy = m_formatter.executableCopy(allocator);
-        ASSERT(copy);
-        return copy;
+        return a.from() < b.from();
     }
 
+    bool canCompact(JumpType jumpType)
+    {
+        // The following cannot be compacted:
+        //   JumpFixed: represents custom jump sequence
+        //   JumpNoConditionFixedSize: represents unconditional jump that must remain a fixed size
+        //   JumpConditionFixedSize: represents conditional jump that must remain a fixed size
+        return (jumpType == JumpNoCondition) || (jumpType == JumpCondition);
+    }
+    
+    JumpLinkType computeJumpType(JumpType jumpType, const uint8_t* from, const uint8_t* to)
+    {
+        if (jumpType == JumpFixed)
+            return LinkInvalid;
+        
+        // for patchable jump we must leave space for the longest code sequence
+        if (jumpType == JumpNoConditionFixedSize)
+            return LinkBX;
+        if (jumpType == JumpConditionFixedSize)
+            return LinkConditionalBX;
+        
+        const int paddingSize = JumpPaddingSizes[jumpType];
+        bool mayTriggerErrata = false;
+        
+        if (jumpType == JumpCondition) {
+            // 2-byte conditional T1
+            const uint16_t* jumpT1Location = reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkJumpT1]));
+            if (canBeJumpT1(jumpT1Location, to))
+                return LinkJumpT1;
+            // 4-byte conditional T3
+            const uint16_t* jumpT3Location = reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkJumpT3]));
+            if (canBeJumpT3(jumpT3Location, to, mayTriggerErrata)) {
+                if (!mayTriggerErrata)
+                    return LinkJumpT3;
+            }
+            // 4-byte conditional T4 with IT
+            const uint16_t* conditionalJumpT4Location = 
+            reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkConditionalJumpT4]));
+            if (canBeJumpT4(conditionalJumpT4Location, to, mayTriggerErrata)) {
+                if (!mayTriggerErrata)
+                    return LinkConditionalJumpT4;
+            }
+        } else {
+            // 2-byte unconditional T2
+            const uint16_t* jumpT2Location = reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkJumpT2]));
+            if (canBeJumpT2(jumpT2Location, to))
+                return LinkJumpT2;
+            // 4-byte unconditional T4
+            const uint16_t* jumpT4Location = reinterpret_cast<const uint16_t*>(from - (paddingSize - JumpSizes[LinkJumpT4]));
+            if (canBeJumpT4(jumpT4Location, to, mayTriggerErrata)) {
+                if (!mayTriggerErrata)
+                    return LinkJumpT4;
+            }
+            // use long jump sequence
+            return LinkBX;
+        }
+        
+        ASSERT(jumpType == JumpCondition);
+        return LinkConditionalBX;
+    }
+    
+    JumpLinkType computeJumpType(LinkRecord& record, const uint8_t* from, const uint8_t* to)
+    {
+        JumpLinkType linkType = computeJumpType(record.type(), from, to);
+        record.setLinkType(linkType);
+        return linkType;
+    }
+    
+    void recordLinkOffsets(int32_t regionStart, int32_t regionEnd, int32_t offset)
+    {
+        int32_t ptr = regionStart / sizeof(int32_t);
+        const int32_t end = regionEnd / sizeof(int32_t);
+        int32_t* offsets = static_cast<int32_t*>(m_formatter.data());
+        while (ptr < end)
+            offsets[ptr++] = offset;
+    }
+    
+    Vector<LinkRecord>& jumpsToLink()
+    {
+        std::sort(m_jumpsToLink.begin(), m_jumpsToLink.end(), linkRecordSourceComparator);
+        return m_jumpsToLink;
+    }
+
+    void link(LinkRecord& record, uint8_t* from, uint8_t* to)
+    {
+        switch (record.linkType()) {
+        case LinkJumpT1:
+            linkJumpT1(record.condition(), reinterpret_cast<uint16_t*>(from), to);
+            break;
+        case LinkJumpT2:
+            linkJumpT2(reinterpret_cast<uint16_t*>(from), to);
+            break;
+        case LinkJumpT3:
+            linkJumpT3(record.condition(), reinterpret_cast<uint16_t*>(from), to);
+            break;
+        case LinkJumpT4:
+            linkJumpT4(reinterpret_cast<uint16_t*>(from), to);
+            break;
+        case LinkConditionalJumpT4:
+            linkConditionalJumpT4(record.condition(), reinterpret_cast<uint16_t*>(from), to);
+            break;
+        case LinkConditionalBX:
+            linkConditionalBX(record.condition(), reinterpret_cast<uint16_t*>(from), to);
+            break;
+        case LinkBX:
+            linkBX(reinterpret_cast<uint16_t*>(from), to);
+            break;
+        default:
+            ASSERT_NOT_REACHED();
+            break;
+        }
+    }
+
+    void* unlinkedCode() { return m_formatter.data(); }
+    
     static unsigned getCallReturnOffset(JmpSrc call)
     {
         ASSERT(call.m_offset >= 0);
@@ -1503,11 +1814,7 @@ public:
     {
         ASSERT(to.m_offset != -1);
         ASSERT(from.m_offset != -1);
-
-        uint16_t* location = reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(m_formatter.data()) + from.m_offset);
-        intptr_t relative = to.m_offset - from.m_offset;
-
-        linkWithOffset(location, relative);
+        m_jumpsToLink.append(LinkRecord(from.m_offset, to.m_offset, from.m_type, from.m_condition));
     }
 
     static void linkJump(void* code, JmpSrc from, void* to)
@@ -1515,9 +1822,7 @@ public:
         ASSERT(from.m_offset != -1);
         
         uint16_t* location = reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(code) + from.m_offset);
-        intptr_t relative = reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(location);
-
-        linkWithOffset(location, relative);
+        linkJumpAbsolute(location, to);
     }
 
     // bah, this mathod should really be static, since it is used by the LinkBuffer.
@@ -1541,10 +1846,9 @@ public:
         ASSERT(!(reinterpret_cast<intptr_t>(from) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(to) & 1));
 
-        intptr_t relative = reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(from);
-        linkWithOffset(reinterpret_cast<uint16_t*>(from), relative);
+        linkJumpAbsolute(reinterpret_cast<uint16_t*>(from), to);
 
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(from) - 2, 2 * sizeof(uint16_t));
+        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(from) - 5, 5 * sizeof(uint16_t));
     }
     
     static void relinkCall(void* from, void* to)
@@ -1553,8 +1857,6 @@ public:
         ASSERT(reinterpret_cast<intptr_t>(to) & 1);
 
         setPointer(reinterpret_cast<uint16_t*>(from) - 1, to);
-
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(from) - 5, 4 * sizeof(uint16_t));
     }
 
     static void repatchInt32(void* where, int32_t value)
@@ -1562,8 +1864,6 @@ public:
         ASSERT(!(reinterpret_cast<intptr_t>(where) & 1));
         
         setInt32(where, value);
-
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(where) - 4, 4 * sizeof(uint16_t));
     }
 
     static void repatchPointer(void* where, void* value)
@@ -1571,56 +1871,101 @@ public:
         ASSERT(!(reinterpret_cast<intptr_t>(where) & 1));
         
         setPointer(where, value);
-
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(where) - 4, 4 * sizeof(uint16_t));
     }
 
     static void repatchLoadPtrToLEA(void* where)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(where) & 1));
-
         uint16_t* loadOp = reinterpret_cast<uint16_t*>(where) + 4;
-        ASSERT((*loadOp & 0xfff0) == OP_LDR_reg_T2);
 
-        *loadOp = OP_ADD_reg_T3 | (*loadOp & 0xf);
-        ExecutableAllocator::cacheFlush(loadOp, sizeof(uint16_t));
+        ASSERT((loadOp[0] & 0xfff0) == OP_LDR_reg_T2);
+        ASSERT((loadOp[1] & 0x0ff0) == 0);
+        int rn = loadOp[0] & 0xf;
+        int rt = loadOp[1] >> 12;
+        int rm = loadOp[1] & 0xf;
+
+        loadOp[0] = OP_ADD_reg_T3 | rn;
+        loadOp[1] = rt << 8 | rm;
+        ExecutableAllocator::cacheFlush(loadOp, sizeof(uint32_t));
     }
 
 private:
+    // VFP operations commonly take one or more 5-bit operands, typically representing a
+    // floating point register number.  This will commonly be encoded in the instruction
+    // in two parts, with one single bit field, and one 4-bit field.  In the case of
+    // double precision operands the high bit of the register number will be encoded
+    // separately, and for single precision operands the high bit of the register number
+    // will be encoded individually.
+    // VFPOperand encapsulates a 5-bit VFP operand, with bits 0..3 containing the 4-bit
+    // field to be encoded together in the instruction (the low 4-bits of a double
+    // register number, or the high 4-bits of a single register number), and bit 4
+    // contains the bit value to be encoded individually.
+    struct VFPOperand {
+        explicit VFPOperand(uint32_t value)
+            : m_value(value)
+        {
+            ASSERT(!(m_value & ~0x1f));
+        }
 
-    // Arm vfp addresses can be offset by a 9-bit ones-comp immediate, left shifted by 2.
-    // (i.e. +/-(0..255) 32-bit words)
-    void vmem(FPRegisterID rd, RegisterID rn, int32_t imm, bool isLoad)
-    {
-        bool up;
-        uint32_t offset;
-        if (imm < 0) {
-            offset = -imm;
-            up = false;
-        } else {
-            offset = imm;
-            up = true;
+        VFPOperand(FPDoubleRegisterID reg)
+            : m_value(reg)
+        {
+        }
+
+        VFPOperand(RegisterID reg)
+            : m_value(reg)
+        {
+        }
+
+        VFPOperand(FPSingleRegisterID reg)
+            : m_value(((reg & 1) << 4) | (reg >> 1)) // rotate the lowest bit of 'reg' to the top.
+        {
+        }
+
+        uint32_t bits1()
+        {
+            return m_value >> 4;
         }
 
-        // offset is effectively leftshifted by 2 already (the bottom two bits are zero, and not
-        // reperesented in the instruction.  Left shift by 14, to mov it into position 0x00AA0000.
-        ASSERT((offset & ~(0xff << 2)) == 0);
-        offset <<= 14;
+        uint32_t bits4()
+        {
+            return m_value & 0xf;
+        }
 
-        m_formatter.vfpOp(0x0b00ed00 | offset | (up << 7) | (isLoad << 4) | doubleRegisterMask(rd, 6, 28) | rn);
+        uint32_t m_value;
+    };
+
+    VFPOperand vcvtOp(bool toInteger, bool isUnsigned, bool isRoundZero)
+    {
+        // Cannot specify rounding when converting to float.
+        ASSERT(toInteger || !isRoundZero);
+
+        uint32_t op = 0x8;
+        if (toInteger) {
+            // opc2 indicates both toInteger & isUnsigned.
+            op |= isUnsigned ? 0x4 : 0x5;
+            // 'op' field in instruction is isRoundZero
+            if (isRoundZero)
+                op |= 0x10;
+        } else {
+            // 'op' field in instruction is isUnsigned
+            if (!isUnsigned)
+                op |= 0x10;
+        }
+        return VFPOperand(op);
     }
 
     static void setInt32(void* code, uint32_t value)
     {
         uint16_t* location = reinterpret_cast<uint16_t*>(code);
+        ASSERT(isMOV_imm_T3(location - 4) && isMOVT(location - 2));
 
-        uint16_t lo16 = value;
-        uint16_t hi16 = value >> 16;
-
-        spliceHi5(location - 4, lo16);
-        spliceLo11(location - 3, lo16);
-        spliceHi5(location - 2, hi16);
-        spliceLo11(location - 1, hi16);
+        ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(value));
+        ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(value >> 16));
+        location[-4] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
+        location[-3] = twoWordOp5i6Imm4Reg4EncodedImmSecond((location[-3] >> 8) & 0xf, lo16);
+        location[-2] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
+        location[-1] = twoWordOp5i6Imm4Reg4EncodedImmSecond((location[-1] >> 8) & 0xf, hi16);
 
         ExecutableAllocator::cacheFlush(location - 4, 4 * sizeof(uint16_t));
     }
@@ -1630,41 +1975,259 @@ private:
         setInt32(code, reinterpret_cast<uint32_t>(value));
     }
 
-    // Linking & patching:
-    // This method assumes that the JmpSrc being linked is a T4 b instruction.
-    static void linkWithOffset(uint16_t* instruction, intptr_t relative)
-    {
-        // Currently branches > 16m = mostly deathy.
-        if (((relative << 7) >> 7) != relative) {
-            // FIXME: This CRASH means we cannot turn the JIT on by default on arm-v7.
-            fprintf(stderr, "Error: Cannot link T4b.\n");
-            CRASH();
-        }
+    static bool isB(void* address)
+    {
+        uint16_t* instruction = static_cast<uint16_t*>(address);
+        return ((instruction[0] & 0xf800) == OP_B_T4a) && ((instruction[1] & 0xd000) == OP_B_T4b);
+    }
+
+    static bool isBX(void* address)
+    {
+        uint16_t* instruction = static_cast<uint16_t*>(address);
+        return (instruction[0] & 0xff87) == OP_BX;
+    }
+
+    static bool isMOV_imm_T3(void* address)
+    {
+        uint16_t* instruction = static_cast<uint16_t*>(address);
+        return ((instruction[0] & 0xFBF0) == OP_MOV_imm_T3) && ((instruction[1] & 0x8000) == 0);
+    }
+
+    static bool isMOVT(void* address)
+    {
+        uint16_t* instruction = static_cast<uint16_t*>(address);
+        return ((instruction[0] & 0xFBF0) == OP_MOVT) && ((instruction[1] & 0x8000) == 0);
+    }
+
+    static bool isNOP_T1(void* address)
+    {
+        uint16_t* instruction = static_cast<uint16_t*>(address);
+        return instruction[0] == OP_NOP_T1;
+    }
+
+    static bool isNOP_T2(void* address)
+    {
+        uint16_t* instruction = static_cast<uint16_t*>(address);
+        return (instruction[0] == OP_NOP_T2a) && (instruction[1] == OP_NOP_T2b);
+    }
+
+    static bool canBeJumpT1(const uint16_t* instruction, const void* target)
+    {
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // It does not appear to be documented in the ARM ARM (big surprise), but
+        // for OP_B_T1 the branch displacement encoded in the instruction is 2 
+        // less than the actual displacement.
+        relative -= 2;
+        return ((relative << 23) >> 23) == relative;
+    }
+    
+    static bool canBeJumpT2(const uint16_t* instruction, const void* target)
+    {
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // It does not appear to be documented in the ARM ARM (big surprise), but
+        // for OP_B_T2 the branch displacement encoded in the instruction is 2 
+        // less than the actual displacement.
+        relative -= 2;
+        return ((relative << 20) >> 20) == relative;
+    }
+    
+    static bool canBeJumpT3(const uint16_t* instruction, const void* target, bool& mayTriggerErrata)
+    {
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // From Cortex-A8 errata:
+        // If the 32-bit Thumb-2 branch instruction spans two 4KiB regions and
+        // the target of the branch falls within the first region it is
+        // possible for the processor to incorrectly determine the branch
+        // instruction, and it is also possible in some cases for the processor
+        // to enter a deadlock state.
+        // The instruction is spanning two pages if it ends at an address ending 0x002
+        bool spansTwo4K = ((reinterpret_cast<intptr_t>(instruction) & 0xfff) == 0x002);
+        mayTriggerErrata = spansTwo4K;
+        // The target is in the first page if the jump branch back by [3..0x1002] bytes
+        bool targetInFirstPage = (relative >= -0x1002) && (relative < -2);
+        bool wouldTriggerA8Errata = spansTwo4K && targetInFirstPage;
+        return ((relative << 11) >> 11) == relative && !wouldTriggerA8Errata;
+    }
+    
+    static bool canBeJumpT4(const uint16_t* instruction, const void* target, bool& mayTriggerErrata)
+    {
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // From Cortex-A8 errata:
+        // If the 32-bit Thumb-2 branch instruction spans two 4KiB regions and
+        // the target of the branch falls within the first region it is
+        // possible for the processor to incorrectly determine the branch
+        // instruction, and it is also possible in some cases for the processor
+        // to enter a deadlock state.
+        // The instruction is spanning two pages if it ends at an address ending 0x002
+        bool spansTwo4K = ((reinterpret_cast<intptr_t>(instruction) & 0xfff) == 0x002);
+        mayTriggerErrata = spansTwo4K;
+        // The target is in the first page if the jump branch back by [3..0x1002] bytes
+        bool targetInFirstPage = (relative >= -0x1002) && (relative < -2);
+        bool wouldTriggerA8Errata = spansTwo4K && targetInFirstPage;
+        return ((relative << 7) >> 7) == relative && !wouldTriggerA8Errata;
+    }
+    
+    void linkJumpT1(Condition cond, uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        ASSERT(canBeJumpT1(instruction, target));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // It does not appear to be documented in the ARM ARM (big surprise), but
+        // for OP_B_T1 the branch displacement encoded in the instruction is 2 
+        // less than the actual displacement.
+        relative -= 2;
+        
+        // All branch offsets should be an even distance.
+        ASSERT(!(relative & 1));
+        instruction[-1] = OP_B_T1 | ((cond & 0xf) << 8) | ((relative & 0x1fe) >> 1);
+    }
+    
+    static void linkJumpT2(uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        ASSERT(canBeJumpT2(instruction, target));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // It does not appear to be documented in the ARM ARM (big surprise), but
+        // for OP_B_T2 the branch displacement encoded in the instruction is 2 
+        // less than the actual displacement.
+        relative -= 2;
+        
+        // All branch offsets should be an even distance.
+        ASSERT(!(relative & 1));
+        instruction[-1] = OP_B_T2 | ((relative & 0xffe) >> 1);
+    }
+    
+    void linkJumpT3(Condition cond, uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        bool scratch;
+        UNUSED_PARAM(scratch);
+        ASSERT(canBeJumpT3(instruction, target, scratch));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        
+        // All branch offsets should be an even distance.
+        ASSERT(!(relative & 1));
+        instruction[-2] = OP_B_T3a | ((relative & 0x100000) >> 10) | ((cond & 0xf) << 6) | ((relative & 0x3f000) >> 12);
+        instruction[-1] = OP_B_T3b | ((relative & 0x80000) >> 8) | ((relative & 0x40000) >> 5) | ((relative & 0xffe) >> 1);
+    }
+    
+    static void linkJumpT4(uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        bool scratch;
+        UNUSED_PARAM(scratch);
+        ASSERT(canBeJumpT4(instruction, target, scratch));
         
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
         // ARM encoding for the top two bits below the sign bit is 'peculiar'.
         if (relative >= 0)
             relative ^= 0xC00000;
-
+        
         // All branch offsets should be an even distance.
         ASSERT(!(relative & 1));
-
-        int word1 = ((relative & 0x1000000) >> 14) | ((relative & 0x3ff000) >> 12);
-        int word2 = ((relative & 0x800000) >> 10) | ((relative & 0x400000) >> 11) | ((relative & 0xffe) >> 1);
-
-        instruction[-2] = OP_B_T4a | word1;
-        instruction[-1] = OP_B_T4b | word2;
+        instruction[-2] = OP_B_T4a | ((relative & 0x1000000) >> 14) | ((relative & 0x3ff000) >> 12);
+        instruction[-1] = OP_B_T4b | ((relative & 0x800000) >> 10) | ((relative & 0x400000) >> 11) | ((relative & 0xffe) >> 1);
     }
-
-    // These functions can be used to splice 16-bit immediates back into previously generated instructions.
-    static void spliceHi5(uint16_t* where, uint16_t what)
+    
+    void linkConditionalJumpT4(Condition cond, uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        instruction[-3] = ifThenElse(cond) | OP_IT;
+        linkJumpT4(instruction, target);
+    }
+    
+    static void linkBX(uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        const uint16_t JUMP_TEMPORARY_REGISTER = ARMRegisters::ip;
+        ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) + 1));
+        ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) >> 16));
+        instruction[-5] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
+        instruction[-4] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, lo16);
+        instruction[-3] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
+        instruction[-2] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16);
+        instruction[-1] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3);
+    }
+    
+    void linkConditionalBX(Condition cond, uint16_t* instruction, void* target)
     {
-        uint16_t pattern = (what >> 12) | ((what & 0x0800) >> 1);
-        *where = (*where & 0xFBF0) | pattern;
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        linkBX(instruction, target);
+        instruction[-6] = ifThenElse(cond, true, true) | OP_IT;
+    }
+    
+    static void linkJumpAbsolute(uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        ASSERT((isMOV_imm_T3(instruction - 5) && isMOVT(instruction - 3) && isBX(instruction - 1))
+               || (isNOP_T1(instruction - 5) && isNOP_T2(instruction - 4) && isB(instruction - 2)));
+        
+        bool scratch;
+        if (canBeJumpT4(instruction, target, scratch)) {
+            // There may be a better way to fix this, but right now put the NOPs first, since in the
+            // case of an conditional branch this will be coming after an ITTT predicating *three*
+            // instructions!  Looking backwards to modify the ITTT to an IT is not easy, due to
+            // variable wdith encoding - the previous instruction might *look* like an ITTT but
+            // actually be the second half of a 2-word op.
+            instruction[-5] = OP_NOP_T1;
+            instruction[-4] = OP_NOP_T2a;
+            instruction[-3] = OP_NOP_T2b;
+            linkJumpT4(instruction, target);
+        } else {
+            const uint16_t JUMP_TEMPORARY_REGISTER = ARMRegisters::ip;
+            ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) + 1));
+            ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) >> 16));
+            instruction[-5] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
+            instruction[-4] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, lo16);
+            instruction[-3] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
+            instruction[-2] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16);
+            instruction[-1] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3);
+        }
+    }
+    
+    static uint16_t twoWordOp5i6Imm4Reg4EncodedImmFirst(uint16_t op, ARMThumbImmediate imm)
+    {
+        return op | (imm.m_value.i << 10) | imm.m_value.imm4;
     }
-    static void spliceLo11(uint16_t* where, uint16_t what)
+
+    static uint16_t twoWordOp5i6Imm4Reg4EncodedImmSecond(uint16_t rd, ARMThumbImmediate imm)
     {
-        uint16_t pattern = ((what & 0x0700) << 4) | (what & 0x00FF);
-        *where = (*where & 0x8F00) | pattern;
+        return (imm.m_value.imm3 << 12) | (rd << 8) | imm.m_value.imm8;
     }
 
     class ARMInstructionFormatter {
@@ -1723,8 +2286,11 @@ private:
 
         void twoWordOp5i6Imm4Reg4EncodedImm(OpcodeID1 op, int imm4, RegisterID rd, ARMThumbImmediate imm)
         {
-            m_buffer.putShort(op | (imm.m_value.i << 10) | imm4);
-            m_buffer.putShort((imm.m_value.imm3 << 12) | (rd << 8) | imm.m_value.imm8);
+            ARMThumbImmediate newImm = imm;
+            newImm.m_value.imm4 = imm4;
+
+            m_buffer.putShort(ARMv7Assembler::twoWordOp5i6Imm4Reg4EncodedImmFirst(op, newImm));
+            m_buffer.putShort(ARMv7Assembler::twoWordOp5i6Imm4Reg4EncodedImmSecond(rd, newImm));
         }
 
         void twoWordOp12Reg4Reg4Imm12(OpcodeID1 op, RegisterID reg1, RegisterID reg2, uint16_t imm)
@@ -1733,11 +2299,35 @@ private:
             m_buffer.putShort((reg2 << 12) | imm);
         }
 
-        void vfpOp(int32_t op)
+        // Formats up instructions of the pattern:
+        //    111111111B11aaaa:bbbb222SA2C2cccc
+        // Where 1s in the pattern come from op1, 2s in the pattern come from op2, S is the provided size bit.
+        // Operands provide 5 bit values of the form Aaaaa, Bbbbb, Ccccc.
+        void vfpOp(OpcodeID1 op1, OpcodeID2 op2, bool size, VFPOperand a, VFPOperand b, VFPOperand c)
         {
-            m_buffer.putInt(op);
+            ASSERT(!(op1 & 0x004f));
+            ASSERT(!(op2 & 0xf1af));
+            m_buffer.putShort(op1 | b.bits1() << 6 | a.bits4());
+            m_buffer.putShort(op2 | b.bits4() << 12 | size << 8 | a.bits1() << 7 | c.bits1() << 5 | c.bits4());
         }
 
+        // Arm vfp addresses can be offset by a 9-bit ones-comp immediate, left shifted by 2.
+        // (i.e. +/-(0..255) 32-bit words)
+        void vfpMemOp(OpcodeID1 op1, OpcodeID2 op2, bool size, RegisterID rn, VFPOperand rd, int32_t imm)
+        {
+            bool up = true;
+            if (imm < 0) {
+                imm = -imm;
+                up = false;
+            }
+            
+            uint32_t offset = imm;
+            ASSERT(!(offset & ~0x3fc));
+            offset >>= 2;
+
+            m_buffer.putShort(op1 | (up << 7) | rd.bits1() << 6 | rn);
+            m_buffer.putShort(op2 | rd.bits4() << 12 | size << 8 | offset);
+        }
 
         // Administrative methods:
 
@@ -1749,10 +2339,13 @@ private:
     private:
         AssemblerBuffer m_buffer;
     } m_formatter;
+
+    Vector<LinkRecord> m_jumpsToLink;
+    Vector<int32_t> m_offsets;
 };
 
 } // namespace JSC
 
-#endif // ENABLE(ASSEMBLER) && PLATFORM_ARM_ARCH(7)
+#endif // ENABLE(ASSEMBLER) && CPU(ARM_THUMB2)
 
 #endif // ARMAssembler_h