]> git.saurik.com Git - apple/javascriptcore.git/blobdiff - assembler/ARMv7Assembler.h
JavaScriptCore-1097.13.tar.gz
[apple/javascriptcore.git] / assembler / ARMv7Assembler.h
index 4e394b2069246ade02dd8adac9b197c072939cc9..a8b55d182d669766041249f19d4d067276956cc9 100644 (file)
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,8 +27,6 @@
 #ifndef ARMAssembler_h
 #define ARMAssembler_h
 
-#include <wtf/Platform.h>
-
 #if ENABLE(ASSEMBLER) && CPU(ARM_THUMB2)
 
 #include "AssemblerBuffer.h"
@@ -57,107 +56,122 @@ namespace ARMRegisters {
         r15, pc = r15,
     } RegisterID;
 
-    // s0 == d0 == q0
-    // s4 == d2 == q1
-    // etc
     typedef enum {
-        s0 = 0,
-        s1 = 1,
-        s2 = 2,
-        s3 = 3,
-        s4 = 4,
-        s5 = 5,
-        s6 = 6,
-        s7 = 7,
-        s8 = 8,
-        s9 = 9,
-        s10 = 10,
-        s11 = 11,
-        s12 = 12,
-        s13 = 13,
-        s14 = 14,
-        s15 = 15,
-        s16 = 16,
-        s17 = 17,
-        s18 = 18,
-        s19 = 19,
-        s20 = 20,
-        s21 = 21,
-        s22 = 22,
-        s23 = 23,
-        s24 = 24,
-        s25 = 25,
-        s26 = 26,
-        s27 = 27,
-        s28 = 28,
-        s29 = 29,
-        s30 = 30,
-        s31 = 31,
-        d0 = 0 << 1,
-        d1 = 1 << 1,
-        d2 = 2 << 1,
-        d3 = 3 << 1,
-        d4 = 4 << 1,
-        d5 = 5 << 1,
-        d6 = 6 << 1,
-        d7 = 7 << 1,
-        d8 = 8 << 1,
-        d9 = 9 << 1,
-        d10 = 10 << 1,
-        d11 = 11 << 1,
-        d12 = 12 << 1,
-        d13 = 13 << 1,
-        d14 = 14 << 1,
-        d15 = 15 << 1,
-        d16 = 16 << 1,
-        d17 = 17 << 1,
-        d18 = 18 << 1,
-        d19 = 19 << 1,
-        d20 = 20 << 1,
-        d21 = 21 << 1,
-        d22 = 22 << 1,
-        d23 = 23 << 1,
-        d24 = 24 << 1,
-        d25 = 25 << 1,
-        d26 = 26 << 1,
-        d27 = 27 << 1,
-        d28 = 28 << 1,
-        d29 = 29 << 1,
-        d30 = 30 << 1,
-        d31 = 31 << 1,
-        q0 = 0 << 2,
-        q1 = 1 << 2,
-        q2 = 2 << 2,
-        q3 = 3 << 2,
-        q4 = 4 << 2,
-        q5 = 5 << 2,
-        q6 = 6 << 2,
-        q7 = 7 << 2,
-        q8 = 8 << 2,
-        q9 = 9 << 2,
-        q10 = 10 << 2,
-        q11 = 11 << 2,
-        q12 = 12 << 2,
-        q13 = 13 << 2,
-        q14 = 14 << 2,
-        q15 = 15 << 2,
-        q16 = 16 << 2,
-        q17 = 17 << 2,
-        q18 = 18 << 2,
-        q19 = 19 << 2,
-        q20 = 20 << 2,
-        q21 = 21 << 2,
-        q22 = 22 << 2,
-        q23 = 23 << 2,
-        q24 = 24 << 2,
-        q25 = 25 << 2,
-        q26 = 26 << 2,
-        q27 = 27 << 2,
-        q28 = 28 << 2,
-        q29 = 29 << 2,
-        q30 = 30 << 2,
-        q31 = 31 << 2,
-    } FPRegisterID;
+        s0,
+        s1,
+        s2,
+        s3,
+        s4,
+        s5,
+        s6,
+        s7,
+        s8,
+        s9,
+        s10,
+        s11,
+        s12,
+        s13,
+        s14,
+        s15,
+        s16,
+        s17,
+        s18,
+        s19,
+        s20,
+        s21,
+        s22,
+        s23,
+        s24,
+        s25,
+        s26,
+        s27,
+        s28,
+        s29,
+        s30,
+        s31,
+    } FPSingleRegisterID;
+
+    typedef enum {
+        d0,
+        d1,
+        d2,
+        d3,
+        d4,
+        d5,
+        d6,
+        d7,
+        d8,
+        d9,
+        d10,
+        d11,
+        d12,
+        d13,
+        d14,
+        d15,
+        d16,
+        d17,
+        d18,
+        d19,
+        d20,
+        d21,
+        d22,
+        d23,
+        d24,
+        d25,
+        d26,
+        d27,
+        d28,
+        d29,
+        d30,
+        d31,
+    } FPDoubleRegisterID;
+
+    typedef enum {
+        q0,
+        q1,
+        q2,
+        q3,
+        q4,
+        q5,
+        q6,
+        q7,
+        q8,
+        q9,
+        q10,
+        q11,
+        q12,
+        q13,
+        q14,
+        q15,
+        q16,
+        q17,
+        q18,
+        q19,
+        q20,
+        q21,
+        q22,
+        q23,
+        q24,
+        q25,
+        q26,
+        q27,
+        q28,
+        q29,
+        q30,
+        q31,
+    } FPQuadRegisterID;
+
+    inline FPSingleRegisterID asSingle(FPDoubleRegisterID reg)
+    {
+        ASSERT(reg < d16);
+        return (FPSingleRegisterID)(reg << 1);
+    }
+
+    inline FPDoubleRegisterID asDouble(FPSingleRegisterID reg)
+    {
+        ASSERT(!(reg & 1));
+        return (FPDoubleRegisterID)(reg >> 1);
+    }
 }
 
 class ARMv7Assembler;
@@ -290,7 +304,7 @@ public:
         }
 
         if ((bytes.byte1 == bytes.byte3) && !(bytes.byte0 | bytes.byte2)) {
-            encoding.immediate = bytes.byte0;
+            encoding.immediate = bytes.byte1;
             encoding.pattern = 2;
             return ARMThumbImmediate(TypeEncoded, encoding);
         }
@@ -326,6 +340,8 @@ public:
         return m_type != TypeInvalid;
     }
 
+    uint16_t asUInt16() const { return m_value.asInt; }
+
     // These methods rely on the format of encoded byte values.
     bool isUInt3() { return !(m_value.asInt & 0xfff8); }
     bool isUInt4() { return !(m_value.asInt & 0xfff0); }
@@ -343,8 +359,8 @@ public:
     uint8_t getUInt6() { ASSERT(isUInt6()); return m_value.asInt; }
     uint8_t getUInt7() { ASSERT(isUInt7()); return m_value.asInt; }
     uint8_t getUInt8() { ASSERT(isUInt8()); return m_value.asInt; }
-    uint8_t getUInt9() { ASSERT(isUInt9()); return m_value.asInt; }
-    uint8_t getUInt10() { ASSERT(isUInt10()); return m_value.asInt; }
+    uint16_t getUInt9() { ASSERT(isUInt9()); return m_value.asInt; }
+    uint16_t getUInt10() { ASSERT(isUInt10()); return m_value.asInt; }
     uint16_t getUInt12() { ASSERT(isUInt12()); return m_value.asInt; }
     uint16_t getUInt16() { ASSERT(isUInt16()); return m_value.asInt; }
 
@@ -355,7 +371,6 @@ private:
     ThumbImmediateValue m_value;
 };
 
-
 typedef enum {
     SRType_LSL,
     SRType_LSR,
@@ -365,7 +380,6 @@ typedef enum {
     SRType_RRX = SRType_ROR
 } ARMShiftType;
 
-class ARMv7Assembler;
 class ShiftTypeAndAmount {
     friend class ARMv7Assembler;
 
@@ -393,40 +407,25 @@ private:
         };
         struct {
             unsigned type   : 2;
-            unsigned amount : 5;
+            unsigned amount : 6;
         };
     } m_u;
 };
 
-
-/*
-Some features of the Thumb instruction set are deprecated in ARMv7. Deprecated features affecting 
-instructions supported by ARMv7-M are as follows: 
-• use of the PC as <Rd> or <Rm> in a 16-bit ADD (SP plus register) instruction 
-• use of the SP as <Rm> in a 16-bit ADD (SP plus register) instruction 
-• use of the SP as <Rm> in a 16-bit CMP (register) instruction 
-• use of MOV (register) instructions in which <Rd> is the SP or PC and <Rm> is also the SP or PC. 
-• use of <Rn> as the lowest-numbered register in the register list of a 16-bit STM instruction with base 
-register writeback 
-*/
-
 class ARMv7Assembler {
 public:
-    ~ARMv7Assembler()
-    {
-        ASSERT(m_jumpsToLink.isEmpty());
-    }
-
     typedef ARMRegisters::RegisterID RegisterID;
-    typedef ARMRegisters::FPRegisterID FPRegisterID;
+    typedef ARMRegisters::FPSingleRegisterID FPSingleRegisterID;
+    typedef ARMRegisters::FPDoubleRegisterID FPDoubleRegisterID;
+    typedef ARMRegisters::FPQuadRegisterID FPQuadRegisterID;
 
     // (HS, LO, HI, LS) -> (AE, B, A, BE)
     // (VS, VC) -> (O, NO)
     typedef enum {
         ConditionEQ,
         ConditionNE,
-        ConditionHS,
-        ConditionLO,
+        ConditionHS, ConditionCS = ConditionHS,
+        ConditionLO, ConditionCC = ConditionLO,
         ConditionMI,
         ConditionPL,
         ConditionVS,
@@ -438,119 +437,85 @@ public:
         ConditionGT,
         ConditionLE,
         ConditionAL,
-
-        ConditionCS = ConditionHS,
-        ConditionCC = ConditionLO,
+        ConditionInvalid
     } Condition;
 
-    class JmpSrc {
-        friend class ARMv7Assembler;
-        friend class ARMInstructionFormatter;
-    public:
-        JmpSrc()
-            : m_offset(-1)
-        {
-        }
-
-    private:
-        JmpSrc(int offset)
-            : m_offset(offset)
-        {
-        }
-
-        int m_offset;
+#define JUMP_ENUM_WITH_SIZE(index, value) (((value) << 3) | (index))
+#define JUMP_ENUM_SIZE(jump) ((jump) >> 3) 
+    enum JumpType { JumpFixed = JUMP_ENUM_WITH_SIZE(0, 0), 
+                    JumpNoCondition = JUMP_ENUM_WITH_SIZE(1, 5 * sizeof(uint16_t)),
+                    JumpCondition = JUMP_ENUM_WITH_SIZE(2, 6 * sizeof(uint16_t)),
+                    JumpNoConditionFixedSize = JUMP_ENUM_WITH_SIZE(3, 5 * sizeof(uint16_t)),
+                    JumpConditionFixedSize = JUMP_ENUM_WITH_SIZE(4, 6 * sizeof(uint16_t))
     };
-    
-    class JmpDst {
-        friend class ARMv7Assembler;
-        friend class ARMInstructionFormatter;
+    enum JumpLinkType { 
+        LinkInvalid = JUMP_ENUM_WITH_SIZE(0, 0),
+        LinkJumpT1 = JUMP_ENUM_WITH_SIZE(1, sizeof(uint16_t)),
+        LinkJumpT2 = JUMP_ENUM_WITH_SIZE(2, sizeof(uint16_t)),
+        LinkJumpT3 = JUMP_ENUM_WITH_SIZE(3, 2 * sizeof(uint16_t)),
+        LinkJumpT4 = JUMP_ENUM_WITH_SIZE(4, 2 * sizeof(uint16_t)),
+        LinkConditionalJumpT4 = JUMP_ENUM_WITH_SIZE(5, 3 * sizeof(uint16_t)),
+        LinkBX = JUMP_ENUM_WITH_SIZE(6, 5 * sizeof(uint16_t)),
+        LinkConditionalBX = JUMP_ENUM_WITH_SIZE(7, 6 * sizeof(uint16_t))
+    };
+
+    class LinkRecord {
     public:
-        JmpDst()
-            : m_offset(-1)
-            , m_used(false)
+        LinkRecord(intptr_t from, intptr_t to, JumpType type, Condition condition)
         {
+            data.realTypes.m_from = from;
+            data.realTypes.m_to = to;
+            data.realTypes.m_type = type;
+            data.realTypes.m_linkType = LinkInvalid;
+            data.realTypes.m_condition = condition;
         }
-
-        bool isUsed() const { return m_used; }
-        void used() { m_used = true; }
-    private:
-        JmpDst(int offset)
-            : m_offset(offset)
-            , m_used(false)
+        void operator=(const LinkRecord& other)
         {
-            ASSERT(m_offset == offset);
+            data.copyTypes.content[0] = other.data.copyTypes.content[0];
+            data.copyTypes.content[1] = other.data.copyTypes.content[1];
+            data.copyTypes.content[2] = other.data.copyTypes.content[2];
         }
-
-        int m_offset : 31;
-        int m_used : 1;
+        intptr_t from() const { return data.realTypes.m_from; }
+        void setFrom(intptr_t from) { data.realTypes.m_from = from; }
+        intptr_t to() const { return data.realTypes.m_to; }
+        JumpType type() const { return data.realTypes.m_type; }
+        JumpLinkType linkType() const { return data.realTypes.m_linkType; }
+        void setLinkType(JumpLinkType linkType) { ASSERT(data.realTypes.m_linkType == LinkInvalid); data.realTypes.m_linkType = linkType; }
+        Condition condition() const { return data.realTypes.m_condition; }
+    private:
+        union {
+            struct RealTypes {
+                intptr_t m_from : 31;
+                intptr_t m_to : 31;
+                JumpType m_type : 8;
+                JumpLinkType m_linkType : 8;
+                Condition m_condition : 16;
+            } realTypes;
+            struct CopyTypes {
+                uint32_t content[3];
+            } copyTypes;
+            COMPILE_ASSERT(sizeof(RealTypes) == sizeof(CopyTypes), LinkRecordCopyStructSizeEqualsRealStruct);
+        } data;
     };
 
 private:
 
-    struct LinkRecord {
-        LinkRecord(intptr_t from, intptr_t to)
-            : from(from)
-            , to(to)
-        {
-        }
-
-        intptr_t from;
-        intptr_t to;
-    };
-
     // ARMv7, Appx-A.6.3
     bool BadReg(RegisterID reg)
     {
         return (reg == ARMRegisters::sp) || (reg == ARMRegisters::pc);
     }
 
-    bool isSingleRegister(FPRegisterID reg)
-    {
-        // Check that the high bit isn't set (q16+), and that the low bit isn't (s1, s3, etc).
-        return !(reg & ~31);
-    }
-
-    bool isDoubleRegister(FPRegisterID reg)
-    {
-        // Check that the high bit isn't set (q16+), and that the low bit isn't (s1, s3, etc).
-        return !(reg & ~(31 << 1));
-    }
-
-    bool isQuadRegister(FPRegisterID reg)
+    uint32_t singleRegisterMask(FPSingleRegisterID rdNum, int highBitsShift, int lowBitShift)
     {
-        return !(reg & ~(31 << 2));
-    }
-
-    uint32_t singleRegisterNum(FPRegisterID reg)
-    {
-        ASSERT(isSingleRegister(reg));
-        return reg;
-    }
-
-    uint32_t doubleRegisterNum(FPRegisterID reg)
-    {
-        ASSERT(isDoubleRegister(reg));
-        return reg >> 1;
-    }
-
-    uint32_t quadRegisterNum(FPRegisterID reg)
-    {
-        ASSERT(isQuadRegister(reg));
-        return reg >> 2;
-    }
-
-    uint32_t singleRegisterMask(FPRegisterID rd, int highBitsShift, int lowBitShift)
-    {
-        uint32_t rdNum = singleRegisterNum(rd);
         uint32_t rdMask = (rdNum >> 1) << highBitsShift;
         if (rdNum & 1)
             rdMask |= 1 << lowBitShift;
         return rdMask;
     }
 
-    uint32_t doubleRegisterMask(FPRegisterID rd, int highBitShift, int lowBitsShift)
+    uint32_t doubleRegisterMask(FPDoubleRegisterID rdNum, int highBitShift, int lowBitsShift)
     {
-        uint32_t rdNum = doubleRegisterNum(rd);
         uint32_t rdMask = (rdNum & 0xf) << lowBitsShift;
         if (rdNum & 16)
             rdMask |= 1 << highBitShift;
@@ -559,22 +524,17 @@ private:
 
     typedef enum {
         OP_ADD_reg_T1       = 0x1800,
-        OP_ADD_S_reg_T1     = 0x1800,
         OP_SUB_reg_T1       = 0x1A00,
-        OP_SUB_S_reg_T1     = 0x1A00,
         OP_ADD_imm_T1       = 0x1C00,
-        OP_ADD_S_imm_T1     = 0x1C00,
         OP_SUB_imm_T1       = 0x1E00,
-        OP_SUB_S_imm_T1     = 0x1E00,
         OP_MOV_imm_T1       = 0x2000,
         OP_CMP_imm_T1       = 0x2800,
         OP_ADD_imm_T2       = 0x3000,
-        OP_ADD_S_imm_T2     = 0x3000,
         OP_SUB_imm_T2       = 0x3800,
-        OP_SUB_S_imm_T2     = 0x3800,
         OP_AND_reg_T1       = 0x4000,
         OP_EOR_reg_T1       = 0x4040,
         OP_TST_reg_T1       = 0x4200,
+        OP_RSB_imm_T1       = 0x4240,
         OP_CMP_reg_T1       = 0x4280,
         OP_ORR_reg_T1       = 0x4300,
         OP_MVN_reg_T1       = 0x43C0,
@@ -582,11 +542,19 @@ private:
         OP_MOV_reg_T1       = 0x4600,
         OP_BLX              = 0x4700,
         OP_BX               = 0x4700,
-        OP_LDRH_reg_T1      = 0x5A00,
         OP_STR_reg_T1       = 0x5000,
+        OP_STRH_reg_T1      = 0x5200,
+        OP_STRB_reg_T1      = 0x5400,
+        OP_LDRSB_reg_T1     = 0x5600,
         OP_LDR_reg_T1       = 0x5800,
+        OP_LDRH_reg_T1      = 0x5A00,
+        OP_LDRB_reg_T1      = 0x5C00,
+        OP_LDRSH_reg_T1     = 0x5E00,
         OP_STR_imm_T1       = 0x6000,
         OP_LDR_imm_T1       = 0x6800,
+        OP_STRB_imm_T1      = 0x7000,
+        OP_LDRB_imm_T1      = 0x7800,
+        OP_STRH_imm_T1      = 0x8000,
         OP_LDRH_imm_T1      = 0x8800,
         OP_STR_imm_T2       = 0x9000,
         OP_LDR_imm_T2       = 0x9800,
@@ -599,9 +567,12 @@ private:
     } OpcodeID;
 
     typedef enum {
+        OP_B_T1         = 0xD000,
+        OP_B_T2         = 0xE000,
         OP_AND_reg_T2   = 0xEA00,
         OP_TST_reg_T2   = 0xEA10,
         OP_ORR_reg_T2   = 0xEA40,
+        OP_ORR_S_reg_T2 = 0xEA50,
         OP_ASR_imm_T1   = 0xEA4F,
         OP_LSL_imm_T1   = 0xEA4F,
         OP_LSR_imm_T1   = 0xEA4F,
@@ -613,6 +584,29 @@ private:
         OP_SUB_reg_T2   = 0xEBA0,
         OP_SUB_S_reg_T2 = 0xEBB0,
         OP_CMP_reg_T2   = 0xEBB0,
+        OP_VMOV_CtoD    = 0xEC00,
+        OP_VMOV_DtoC    = 0xEC10,
+        OP_FSTS         = 0xED00,
+        OP_VSTR         = 0xED00,
+        OP_FLDS         = 0xED10,
+        OP_VLDR         = 0xED10,
+        OP_VMOV_CtoS    = 0xEE00,
+        OP_VMOV_StoC    = 0xEE10,
+        OP_VMUL_T2      = 0xEE20,
+        OP_VADD_T2      = 0xEE30,
+        OP_VSUB_T2      = 0xEE30,
+        OP_VDIV         = 0xEE80,
+        OP_VABS_T2      = 0xEEB0,
+        OP_VCMP         = 0xEEB0,
+        OP_VCVT_FPIVFP  = 0xEEB0,
+        OP_VMOV_T2      = 0xEEB0,
+        OP_VMOV_IMM_T2  = 0xEEB0,
+        OP_VMRS         = 0xEEB0,
+        OP_VNEG_T2      = 0xEEB0,
+        OP_VSQRT_T1     = 0xEEB0,
+        OP_VCVTSD_T1    = 0xEEB0,
+        OP_VCVTDS_T1    = 0xEEB0,
+        OP_B_T3a        = 0xF000,
         OP_B_T4a        = 0xF000,
         OP_AND_imm_T1   = 0xF000,
         OP_TST_imm      = 0xF010,
@@ -623,33 +617,72 @@ private:
         OP_ADD_imm_T3   = 0xF100,
         OP_ADD_S_imm_T3 = 0xF110,
         OP_CMN_imm      = 0xF110,
+        OP_ADC_imm      = 0xF140,
         OP_SUB_imm_T3   = 0xF1A0,
         OP_SUB_S_imm_T3 = 0xF1B0,
         OP_CMP_imm_T2   = 0xF1B0,
+        OP_RSB_imm_T2   = 0xF1C0,
+        OP_RSB_S_imm_T2 = 0xF1D0,
         OP_ADD_imm_T4   = 0xF200,
         OP_MOV_imm_T3   = 0xF240,
         OP_SUB_imm_T4   = 0xF2A0,
         OP_MOVT         = 0xF2C0,
+        OP_UBFX_T1      = 0xF3C0,
         OP_NOP_T2a      = 0xF3AF,
+        OP_STRB_imm_T3  = 0xF800,
+        OP_STRB_reg_T2  = 0xF800,
+        OP_LDRB_imm_T3  = 0xF810,
+        OP_LDRB_reg_T2  = 0xF810,
+        OP_STRH_imm_T3  = 0xF820,
+        OP_STRH_reg_T2  = 0xF820,
         OP_LDRH_reg_T2  = 0xF830,
         OP_LDRH_imm_T3  = 0xF830,
         OP_STR_imm_T4   = 0xF840,
         OP_STR_reg_T2   = 0xF840,
         OP_LDR_imm_T4   = 0xF850,
         OP_LDR_reg_T2   = 0xF850,
+        OP_STRB_imm_T2  = 0xF880,
+        OP_LDRB_imm_T2  = 0xF890,
+        OP_STRH_imm_T2  = 0xF8A0,
         OP_LDRH_imm_T2  = 0xF8B0,
         OP_STR_imm_T3   = 0xF8C0,
         OP_LDR_imm_T3   = 0xF8D0,
+        OP_LDRSB_reg_T2 = 0xF910,
+        OP_LDRSH_reg_T2 = 0xF930,
         OP_LSL_reg_T2   = 0xFA00,
         OP_LSR_reg_T2   = 0xFA20,
         OP_ASR_reg_T2   = 0xFA40,
         OP_ROR_reg_T2   = 0xFA60,
+        OP_CLZ          = 0xFAB0,
         OP_SMULL_T1     = 0xFB80,
     } OpcodeID1;
 
     typedef enum {
-        OP_B_T4b        = 0x9000,
+        OP_VADD_T2b     = 0x0A00,
+        OP_VDIVb        = 0x0A00,
+        OP_FLDSb        = 0x0A00,
+        OP_VLDRb        = 0x0A00,
+        OP_VMOV_IMM_T2b = 0x0A00,
+        OP_VMOV_T2b     = 0x0A40,
+        OP_VMUL_T2b     = 0x0A00,
+        OP_FSTSb        = 0x0A00,
+        OP_VSTRb        = 0x0A00,
+        OP_VMOV_StoCb   = 0x0A10,
+        OP_VMOV_CtoSb   = 0x0A10,
+        OP_VMOV_DtoCb   = 0x0A10,
+        OP_VMOV_CtoDb   = 0x0A10,
+        OP_VMRSb        = 0x0A10,
+        OP_VABS_T2b     = 0x0A40,
+        OP_VCMPb        = 0x0A40,
+        OP_VCVT_FPIVFPb = 0x0A40,
+        OP_VNEG_T2b     = 0x0A40,
+        OP_VSUB_T2b     = 0x0A40,
+        OP_VSQRT_T1b    = 0x0A40,
+        OP_VCVTSD_T1b   = 0x0A40,
+        OP_VCVTDS_T1b   = 0x0A40,
         OP_NOP_T2b      = 0x8000,
+        OP_B_T3b        = 0x8000,
+        OP_B_T4b        = 0x9000,
     } OpcodeID2;
 
     struct FourFours {
@@ -685,7 +718,7 @@ private:
             | (ifThenElseConditionBit(condition, inst3if) << 2)
             | (ifThenElseConditionBit(condition, inst4if) << 1)
             | 1;
-        ASSERT((condition != ConditionAL) || (mask & (mask - 1)));
+        ASSERT((condition != ConditionAL) || !(mask & (mask - 1)));
         return (condition << 4) | mask;
     }
     uint8_t ifThenElse(Condition condition, bool inst2if, bool inst3if)
@@ -693,25 +726,35 @@ private:
         int mask = (ifThenElseConditionBit(condition, inst2if) << 3)
             | (ifThenElseConditionBit(condition, inst3if) << 2)
             | 2;
-        ASSERT((condition != ConditionAL) || (mask & (mask - 1)));
+        ASSERT((condition != ConditionAL) || !(mask & (mask - 1)));
         return (condition << 4) | mask;
     }
     uint8_t ifThenElse(Condition condition, bool inst2if)
     {
         int mask = (ifThenElseConditionBit(condition, inst2if) << 3)
             | 4;
-        ASSERT((condition != ConditionAL) || (mask & (mask - 1)));
+        ASSERT((condition != ConditionAL) || !(mask & (mask - 1)));
         return (condition << 4) | mask;
     }
 
     uint8_t ifThenElse(Condition condition)
     {
         int mask = 8;
-        ASSERT((condition != ConditionAL) || (mask & (mask - 1)));
         return (condition << 4) | mask;
     }
 
 public:
+    
+    void adc(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    {
+        // Rd can only be SP if Rn is also SP.
+        ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isEncodedImm());
+
+        m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_ADC_imm, rn, rd, imm);
+    }
 
     void add(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
@@ -722,11 +765,12 @@ public:
         ASSERT(imm.isValid());
 
         if (rn == ARMRegisters::sp) {
+            ASSERT(!(imm.getUInt16() & 3));
             if (!(rd & 8) && imm.isUInt10()) {
-                m_formatter.oneWordOp5Reg3Imm8(OP_ADD_SP_imm_T1, rd, imm.getUInt10() >> 2);
+                m_formatter.oneWordOp5Reg3Imm8(OP_ADD_SP_imm_T1, rd, static_cast<uint8_t>(imm.getUInt10() >> 2));
                 return;
             } else if ((rd == ARMRegisters::sp) && imm.isUInt9()) {
-                m_formatter.oneWordOp9Imm7(OP_ADD_SP_imm_T2, imm.getUInt9() >> 2);
+                m_formatter.oneWordOp9Imm7(OP_ADD_SP_imm_T2, static_cast<uint8_t>(imm.getUInt9() >> 2));
                 return;
             }
         } else if (!((rd | rn) & 8)) {
@@ -747,7 +791,7 @@ public:
         }
     }
 
-    void add(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void add(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
         ASSERT(rd != ARMRegisters::pc);
@@ -757,7 +801,7 @@ public:
     }
 
     // NOTE: In an IT block, add doesn't modify the flags register.
-    void add(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void add(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (rd == rn)
             m_formatter.oneWordOp8RegReg143(OP_ADD_reg_T2, rm, rd);
@@ -770,7 +814,7 @@ public:
     }
 
     // Not allowed in an IT (if then) block.
-    void add_S(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void add_S(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
@@ -780,10 +824,10 @@ public:
 
         if (!((rd | rn) & 8)) {
             if (imm.isUInt3()) {
-                m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_S_imm_T1, (RegisterID)imm.getUInt3(), rn, rd);
+                m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_imm_T1, (RegisterID)imm.getUInt3(), rn, rd);
                 return;
             } else if ((rd == rn) && imm.isUInt8()) {
-                m_formatter.oneWordOp5Reg3Imm8(OP_ADD_S_imm_T2, rd, imm.getUInt8());
+                m_formatter.oneWordOp5Reg3Imm8(OP_ADD_imm_T2, rd, imm.getUInt8());
                 return;
             }
         }
@@ -792,7 +836,7 @@ public:
     }
 
     // Not allowed in an IT (if then) block?
-    void add_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void add_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
         ASSERT(rd != ARMRegisters::pc);
@@ -802,15 +846,15 @@ public:
     }
 
     // Not allowed in an IT (if then) block.
-    void add_S(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void add_S(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (!((rd | rn | rm) & 8))
-            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_S_reg_T1, rm, rn, rd);
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_ADD_reg_T1, rm, rn, rd);
         else
             add_S(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void ARM_and(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void ARM_and(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -818,7 +862,7 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_AND_imm_T1, rn, rd, imm);
     }
 
-    void ARM_and(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void ARM_and(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -826,7 +870,7 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_AND_reg_T2, rn, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void ARM_and(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void ARM_and(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if ((rd == rn) && !((rd | rm) & 8))
             m_formatter.oneWordOp10Reg3Reg3(OP_AND_reg_T1, rm, rd);
@@ -836,7 +880,7 @@ public:
             ARM_and(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void asr(RegisterID rd, RegisterID rm, int32_t shiftAmount)
+    ALWAYS_INLINE void asr(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
@@ -844,42 +888,49 @@ public:
         m_formatter.twoWordOp16FourFours(OP_ASR_imm_T1, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void asr(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void asr(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_ASR_reg_T2, rn, FourFours(0xf, rd, 0, rm));
     }
-
+    
     // Only allowed in IT (if then) block if last instruction.
-    JmpSrc b()
+    ALWAYS_INLINE AssemblerLabel b()
     {
         m_formatter.twoWordOp16Op16(OP_B_T4a, OP_B_T4b);
-        return JmpSrc(m_formatter.size());
+        return m_formatter.label();
     }
     
     // Only allowed in IT (if then) block if last instruction.
-    JmpSrc blx(RegisterID rm)
+    ALWAYS_INLINE AssemblerLabel blx(RegisterID rm)
     {
         ASSERT(rm != ARMRegisters::pc);
         m_formatter.oneWordOp8RegReg143(OP_BLX, rm, (RegisterID)8);
-        return JmpSrc(m_formatter.size());
+        return m_formatter.label();
     }
 
     // Only allowed in IT (if then) block if last instruction.
-    JmpSrc bx(RegisterID rm)
+    ALWAYS_INLINE AssemblerLabel bx(RegisterID rm)
     {
         m_formatter.oneWordOp8RegReg143(OP_BX, rm, (RegisterID)0);
-        return JmpSrc(m_formatter.size());
+        return m_formatter.label();
     }
 
-    void bkpt(uint8_t imm=0)
+    void bkpt(uint8_t imm = 0)
     {
         m_formatter.oneWordOp8Imm8(OP_BKPT, imm);
     }
 
-    void cmn(RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void clz(RegisterID rd, RegisterID rm)
+    {
+        ASSERT(!BadReg(rd));
+        ASSERT(!BadReg(rm));
+        m_formatter.twoWordOp12Reg4FourFours(OP_CLZ, rm, FourFours(0xf, rd, 8, rm));
+    }
+
+    ALWAYS_INLINE void cmn(RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isEncodedImm());
@@ -887,7 +938,7 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_CMN_imm, rn, (RegisterID)0xf, imm);
     }
 
-    void cmp(RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void cmp(RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(imm.isEncodedImm());
@@ -898,14 +949,14 @@ public:
             m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_CMP_imm_T2, rn, (RegisterID)0xf, imm);
     }
 
-    void cmp(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void cmp(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_CMP_reg_T2, rn, FourFours(shift.hi4(), 0xf, shift.lo4(), rm));
     }
 
-    void cmp(RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void cmp(RegisterID rn, RegisterID rm)
     {
         if ((rn | rm) & 8)
             cmp(rn, rm, ShiftTypeAndAmount());
@@ -914,7 +965,7 @@ public:
     }
 
     // xor is not spelled with an 'e'. :-(
-    void eor(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void eor(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -923,7 +974,7 @@ public:
     }
 
     // xor is not spelled with an 'e'. :-(
-    void eor(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void eor(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -942,28 +993,28 @@ public:
             eor(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void it(Condition cond)
+    ALWAYS_INLINE void it(Condition cond)
     {
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond));
     }
 
-    void it(Condition cond, bool inst2if)
+    ALWAYS_INLINE void it(Condition cond, bool inst2if)
     {
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond, inst2if));
     }
 
-    void it(Condition cond, bool inst2if, bool inst3if)
+    ALWAYS_INLINE void it(Condition cond, bool inst2if, bool inst3if)
     {
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond, inst2if, inst3if));
     }
 
-    void it(Condition cond, bool inst2if, bool inst3if, bool inst4if)
+    ALWAYS_INLINE void it(Condition cond, bool inst2if, bool inst3if, bool inst4if)
     {
         m_formatter.oneWordOp8Imm8(OP_IT, ifThenElse(cond, inst2if, inst3if, inst4if));
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void ldr(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void ldr(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(imm.isUInt12());
@@ -971,11 +1022,19 @@ public:
         if (!((rt | rn) & 8) && imm.isUInt7())
             m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDR_imm_T1, imm.getUInt7() >> 2, rn, rt);
         else if ((rn == ARMRegisters::sp) && !(rt & 8) && imm.isUInt10())
-            m_formatter.oneWordOp5Reg3Imm8(OP_LDR_imm_T2, rt, imm.getUInt10() >> 2);
+            m_formatter.oneWordOp5Reg3Imm8(OP_LDR_imm_T2, rt, static_cast<uint8_t>(imm.getUInt10() >> 2));
         else
             m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDR_imm_T3, rn, rt, imm.getUInt12());
     }
 
+    ALWAYS_INLINE void ldrCompact(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    {
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
+        ASSERT(imm.isUInt7());
+        ASSERT(!((rt | rn) & 8));
+        m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDR_imm_T1, imm.getUInt7() >> 2, rn, rt);
+    }
+
     // If index is set, this is a regular offset or a pre-indexed load;
     // if index is not set then is is a post-index load.
     //
@@ -987,7 +1046,7 @@ public:
     // _tmp = _reg + offset
     // MEM[index ? _tmp : _reg] = REG[rt]
     // if (wback) REG[rn] = _tmp
-    void ldr(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    ALWAYS_INLINE void ldr(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
         ASSERT(rt != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -1010,7 +1069,7 @@ public:
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void ldr(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
+    ALWAYS_INLINE void ldr(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
     {
         ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(!BadReg(rm));
@@ -1023,7 +1082,7 @@ public:
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void ldrh(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void ldrh(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rn != ARMRegisters::pc); // LDR (literal)
         ASSERT(imm.isUInt12());
@@ -1045,7 +1104,7 @@ public:
     // _tmp = _reg + offset
     // MEM[index ? _tmp : _reg] = REG[rt]
     // if (wback) REG[rn] = _tmp
-    void ldrh(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    ALWAYS_INLINE void ldrh(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
         ASSERT(rt != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -1067,7 +1126,7 @@ public:
         m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDRH_imm_T3, rn, rt, offset);
     }
 
-    void ldrh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
+    ALWAYS_INLINE void ldrh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
     {
         ASSERT(!BadReg(rt));   // Memory hint
         ASSERT(rn != ARMRegisters::pc); // LDRH (literal)
@@ -1080,6 +1139,76 @@ public:
             m_formatter.twoWordOp12Reg4FourFours(OP_LDRH_reg_T2, rn, FourFours(rt, 0, shift, rm));
     }
 
+    void ldrb(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    {
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
+        ASSERT(imm.isUInt12());
+
+        if (!((rt | rn) & 8) && imm.isUInt5())
+            m_formatter.oneWordOp5Imm5Reg3Reg3(OP_LDRB_imm_T1, imm.getUInt5(), rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDRB_imm_T2, rn, rt, imm.getUInt12());
+    }
+
+    void ldrb(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(index || wback);
+        ASSERT(!wback | (rt != rn));
+
+        bool add = true;
+        if (offset < 0) {
+            add = false;
+            offset = -offset;
+        }
+
+        ASSERT(!(offset & ~0xff));
+
+        offset |= (wback << 8);
+        offset |= (add   << 9);
+        offset |= (index << 10);
+        offset |= (1 << 11);
+
+        m_formatter.twoWordOp12Reg4Reg4Imm12(OP_LDRB_imm_T3, rn, rt, offset);
+    }
+
+    ALWAYS_INLINE void ldrb(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc); // LDR (literal)
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_LDRB_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_LDRB_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+    
+    void ldrsb(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+        
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_LDRSB_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_LDRSB_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+
+    void ldrsh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+        
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_LDRSH_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_LDRSH_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+
     void lsl(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
@@ -1088,7 +1217,7 @@ public:
         m_formatter.twoWordOp16FourFours(OP_LSL_imm_T1, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void lsl(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void lsl(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1096,7 +1225,7 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_LSL_reg_T2, rn, FourFours(0xf, rd, 0, rm));
     }
 
-    void lsr(RegisterID rd, RegisterID rm, int32_t shiftAmount)
+    ALWAYS_INLINE void lsr(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
@@ -1104,7 +1233,7 @@ public:
         m_formatter.twoWordOp16FourFours(OP_LSR_imm_T1, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void lsr(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void lsr(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1112,7 +1241,7 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_LSR_reg_T2, rn, FourFours(0xf, rd, 0, rm));
     }
 
-    void movT3(RegisterID rd, ARMThumbImmediate imm)
+    ALWAYS_INLINE void movT3(RegisterID rd, ARMThumbImmediate imm)
     {
         ASSERT(imm.isValid());
         ASSERT(!imm.isEncodedImm());
@@ -1121,7 +1250,7 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_MOV_imm_T3, imm.m_value.imm4, rd, imm);
     }
 
-     void mov(RegisterID rd, ARMThumbImmediate imm)
+    ALWAYS_INLINE void mov(RegisterID rd, ARMThumbImmediate imm)
     {
         ASSERT(imm.isValid());
         ASSERT(!BadReg(rd));
@@ -1134,19 +1263,19 @@ public:
             movT3(rd, imm);
     }
 
-   void mov(RegisterID rd, RegisterID rm)
+    ALWAYS_INLINE void mov(RegisterID rd, RegisterID rm)
     {
         m_formatter.oneWordOp8RegReg143(OP_MOV_reg_T1, rm, rd);
     }
 
-    void movt(RegisterID rd, ARMThumbImmediate imm)
+    ALWAYS_INLINE void movt(RegisterID rd, ARMThumbImmediate imm)
     {
         ASSERT(imm.isUInt16());
         ASSERT(!BadReg(rd));
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_MOVT, imm.m_value.imm4, rd, imm);
     }
 
-    void mvn(RegisterID rd, ARMThumbImmediate imm)
+    ALWAYS_INLINE void mvn(RegisterID rd, ARMThumbImmediate imm)
     {
         ASSERT(imm.isEncodedImm());
         ASSERT(!BadReg(rd));
@@ -1154,14 +1283,14 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_MVN_imm, 0xf, rd, imm);
     }
 
-    void mvn(RegisterID rd, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void mvn(RegisterID rd, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp16FourFours(OP_MVN_reg_T2, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void mvn(RegisterID rd, RegisterID rm)
+    ALWAYS_INLINE void mvn(RegisterID rd, RegisterID rm)
     {
         if (!((rd | rm) & 8))
             m_formatter.oneWordOp10Reg3Reg3(OP_MVN_reg_T1, rm, rd);
@@ -1169,7 +1298,13 @@ public:
             mvn(rd, rm, ShiftTypeAndAmount());
     }
 
-    void orr(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void neg(RegisterID rd, RegisterID rm)
+    {
+        ARMThumbImmediate zero = ARMThumbImmediate::makeUInt12(0);
+        sub(rd, zero, rm);
+    }
+
+    ALWAYS_INLINE void orr(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1177,7 +1312,7 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_ORR_imm_T1, rn, rd, imm);
     }
 
-    void orr(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void orr(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1195,7 +1330,25 @@ public:
             orr(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void ror(RegisterID rd, RegisterID rm, int32_t shiftAmount)
+    ALWAYS_INLINE void orr_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    {
+        ASSERT(!BadReg(rd));
+        ASSERT(!BadReg(rn));
+        ASSERT(!BadReg(rm));
+        m_formatter.twoWordOp12Reg4FourFours(OP_ORR_S_reg_T2, rn, FourFours(shift.hi4(), rd, shift.lo4(), rm));
+    }
+
+    void orr_S(RegisterID rd, RegisterID rn, RegisterID rm)
+    {
+        if ((rd == rn) && !((rd | rm) & 8))
+            m_formatter.oneWordOp10Reg3Reg3(OP_ORR_reg_T1, rm, rd);
+        else if ((rd == rm) && !((rd | rn) & 8))
+            m_formatter.oneWordOp10Reg3Reg3(OP_ORR_reg_T1, rn, rd);
+        else
+            orr_S(rd, rn, rm, ShiftTypeAndAmount());
+    }
+
+    ALWAYS_INLINE void ror(RegisterID rd, RegisterID rm, int32_t shiftAmount)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rm));
@@ -1203,7 +1356,7 @@ public:
         m_formatter.twoWordOp16FourFours(OP_ROR_imm_T1, FourFours(shift.hi4(), rd, shift.lo4(), rm));
     }
 
-    void ror(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void ror(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rd));
         ASSERT(!BadReg(rn));
@@ -1211,7 +1364,7 @@ public:
         m_formatter.twoWordOp12Reg4FourFours(OP_ROR_reg_T2, rn, FourFours(0xf, rd, 0, rm));
     }
 
-    void smull(RegisterID rdLo, RegisterID rdHi, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void smull(RegisterID rdLo, RegisterID rdHi, RegisterID rn, RegisterID rm)
     {
         ASSERT(!BadReg(rdLo));
         ASSERT(!BadReg(rdHi));
@@ -1222,7 +1375,7 @@ public:
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void str(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void str(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(rt != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -1231,7 +1384,7 @@ public:
         if (!((rt | rn) & 8) && imm.isUInt7())
             m_formatter.oneWordOp5Imm5Reg3Reg3(OP_STR_imm_T1, imm.getUInt7() >> 2, rn, rt);
         else if ((rn == ARMRegisters::sp) && !(rt & 8) && imm.isUInt10())
-            m_formatter.oneWordOp5Reg3Imm8(OP_STR_imm_T2, rt, imm.getUInt10() >> 2);
+            m_formatter.oneWordOp5Reg3Imm8(OP_STR_imm_T2, rt, static_cast<uint8_t>(imm.getUInt10() >> 2));
         else
             m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STR_imm_T3, rn, rt, imm.getUInt12());
     }
@@ -1247,7 +1400,7 @@ public:
     // _tmp = _reg + offset
     // MEM[index ? _tmp : _reg] = REG[rt]
     // if (wback) REG[rn] = _tmp
-    void str(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    ALWAYS_INLINE void str(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
     {
         ASSERT(rt != ARMRegisters::pc);
         ASSERT(rn != ARMRegisters::pc);
@@ -1270,7 +1423,7 @@ public:
     }
 
     // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
-    void str(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift=0)
+    ALWAYS_INLINE void str(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
     {
         ASSERT(rn != ARMRegisters::pc);
         ASSERT(!BadReg(rm));
@@ -1282,7 +1435,125 @@ public:
             m_formatter.twoWordOp12Reg4FourFours(OP_STR_reg_T2, rn, FourFours(rt, 0, shift, rm));
     }
 
-    void sub(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
+    ALWAYS_INLINE void strb(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isUInt12());
+
+        if (!((rt | rn) & 8) && imm.isUInt7())
+            m_formatter.oneWordOp5Imm5Reg3Reg3(OP_STRB_imm_T1, imm.getUInt7() >> 2, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STRB_imm_T2, rn, rt, imm.getUInt12());
+    }
+
+    // If index is set, this is a regular offset or a pre-indexed store;
+    // if index is not set then is is a post-index store.
+    //
+    // If wback is set rn is updated - this is a pre or post index store,
+    // if wback is not set this is a regular offset memory access.
+    //
+    // (-255 <= offset <= 255)
+    // _reg = REG[rn]
+    // _tmp = _reg + offset
+    // MEM[index ? _tmp : _reg] = REG[rt]
+    // if (wback) REG[rn] = _tmp
+    ALWAYS_INLINE void strb(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(index || wback);
+        ASSERT(!wback | (rt != rn));
+    
+        bool add = true;
+        if (offset < 0) {
+            add = false;
+            offset = -offset;
+        }
+        ASSERT((offset & ~0xff) == 0);
+        
+        offset |= (wback << 8);
+        offset |= (add   << 9);
+        offset |= (index << 10);
+        offset |= (1 << 11);
+        
+        m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STRB_imm_T3, rn, rt, offset);
+    }
+
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
+    ALWAYS_INLINE void strb(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_STRB_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_STRB_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+    
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
+    ALWAYS_INLINE void strh(RegisterID rt, RegisterID rn, ARMThumbImmediate imm)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isUInt12());
+        
+        if (!((rt | rn) & 8) && imm.isUInt7())
+            m_formatter.oneWordOp5Imm5Reg3Reg3(OP_STRH_imm_T1, imm.getUInt7() >> 2, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STRH_imm_T2, rn, rt, imm.getUInt12());
+    }
+    
+    // If index is set, this is a regular offset or a pre-indexed store;
+    // if index is not set then is is a post-index store.
+    //
+    // If wback is set rn is updated - this is a pre or post index store,
+    // if wback is not set this is a regular offset memory access.
+    //
+    // (-255 <= offset <= 255)
+    // _reg = REG[rn]
+    // _tmp = _reg + offset
+    // MEM[index ? _tmp : _reg] = REG[rt]
+    // if (wback) REG[rn] = _tmp
+    ALWAYS_INLINE void strh(RegisterID rt, RegisterID rn, int offset, bool index, bool wback)
+    {
+        ASSERT(rt != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(index || wback);
+        ASSERT(!wback | (rt != rn));
+        
+        bool add = true;
+        if (offset < 0) {
+            add = false;
+            offset = -offset;
+        }
+        ASSERT(!(offset & ~0xff));
+        
+        offset |= (wback << 8);
+        offset |= (add   << 9);
+        offset |= (index << 10);
+        offset |= (1 << 11);
+        
+        m_formatter.twoWordOp12Reg4Reg4Imm12(OP_STRH_imm_T3, rn, rt, offset);
+    }
+    
+    // rt == ARMRegisters::pc only allowed if last instruction in IT (if then) block.
+    ALWAYS_INLINE void strh(RegisterID rt, RegisterID rn, RegisterID rm, unsigned shift = 0)
+    {
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(!BadReg(rm));
+        ASSERT(shift <= 3);
+        
+        if (!shift && !((rt | rn | rm) & 8))
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_STRH_reg_T1, rm, rn, rt);
+        else
+            m_formatter.twoWordOp12Reg4FourFours(OP_STRH_reg_T2, rn, FourFours(rt, 0, shift, rm));
+    }
+
+    ALWAYS_INLINE void sub(RegisterID rd, RegisterID rn, ARMThumbImmediate imm)
     {
         // Rd can only be SP if Rn is also SP.
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
@@ -1291,7 +1562,8 @@ public:
         ASSERT(imm.isValid());
 
         if ((rn == ARMRegisters::sp) && (rd == ARMRegisters::sp) && imm.isUInt9()) {
-            m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, imm.getUInt9() >> 2);
+            ASSERT(!(imm.getUInt16() & 3));
+            m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, static_cast<uint8_t>(imm.getUInt9() >> 2));
             return;
         } else if (!((rd | rn) & 8)) {
             if (imm.isUInt3()) {
@@ -1311,7 +1583,20 @@ public:
         }
     }
 
-    void sub(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void sub(RegisterID rd, ARMThumbImmediate imm, RegisterID rn)
+    {
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isValid());
+        ASSERT(imm.isUInt12());
+
+        if (!((rd | rn) & 8) && !imm.getUInt12())
+            m_formatter.oneWordOp10Reg3Reg3(OP_RSB_imm_T1, rn, rd);
+        else
+            m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_RSB_imm_T2, rn, rd, imm);
+    }
+
+    ALWAYS_INLINE void sub(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
         ASSERT(rd != ARMRegisters::pc);
@@ -1321,7 +1606,7 @@ public:
     }
 
     // NOTE: In an IT block, add doesn't modify the flags register.
-    void sub(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void sub(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (!((rd | rn | rm) & 8))
             m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_reg_T1, rm, rn, rd);
@@ -1339,14 +1624,15 @@ public:
         ASSERT(imm.isValid());
 
         if ((rn == ARMRegisters::sp) && (rd == ARMRegisters::sp) && imm.isUInt9()) {
-            m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, imm.getUInt9() >> 2);
+            ASSERT(!(imm.getUInt16() & 3));
+            m_formatter.oneWordOp9Imm7(OP_SUB_SP_imm_T1, static_cast<uint8_t>(imm.getUInt9() >> 2));
             return;
         } else if (!((rd | rn) & 8)) {
             if (imm.isUInt3()) {
-                m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_S_imm_T1, (RegisterID)imm.getUInt3(), rn, rd);
+                m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_imm_T1, (RegisterID)imm.getUInt3(), rn, rd);
                 return;
             } else if ((rd == rn) && imm.isUInt8()) {
-                m_formatter.oneWordOp5Reg3Imm8(OP_SUB_S_imm_T2, rd, imm.getUInt8());
+                m_formatter.oneWordOp5Reg3Imm8(OP_SUB_imm_T2, rd, imm.getUInt8());
                 return;
             }
         }
@@ -1354,8 +1640,18 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_SUB_S_imm_T3, rn, rd, imm);
     }
 
+    ALWAYS_INLINE void sub_S(RegisterID rd, ARMThumbImmediate imm, RegisterID rn)
+    {
+        ASSERT(rd != ARMRegisters::pc);
+        ASSERT(rn != ARMRegisters::pc);
+        ASSERT(imm.isValid());
+        ASSERT(imm.isUInt12());
+
+        m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_RSB_S_imm_T2, rn, rd, imm);
+    }
+
     // Not allowed in an IT (if then) block?
-    void sub_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void sub_S(RegisterID rd, RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT((rd != ARMRegisters::sp) || (rn == ARMRegisters::sp));
         ASSERT(rd != ARMRegisters::pc);
@@ -1365,15 +1661,15 @@ public:
     }
 
     // Not allowed in an IT (if then) block.
-    void sub_S(RegisterID rd, RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void sub_S(RegisterID rd, RegisterID rn, RegisterID rm)
     {
         if (!((rd | rn | rm) & 8))
-            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_S_reg_T1, rm, rn, rd);
+            m_formatter.oneWordOp7Reg3Reg3Reg3(OP_SUB_reg_T1, rm, rn, rd);
         else
             sub_S(rd, rn, rm, ShiftTypeAndAmount());
     }
 
-    void tst(RegisterID rn, ARMThumbImmediate imm)
+    ALWAYS_INLINE void tst(RegisterID rn, ARMThumbImmediate imm)
     {
         ASSERT(!BadReg(rn));
         ASSERT(imm.isEncodedImm());
@@ -1381,14 +1677,14 @@ public:
         m_formatter.twoWordOp5i6Imm4Reg4EncodedImm(OP_TST_imm, rn, (RegisterID)0xf, imm);
     }
 
-    void tst(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
+    ALWAYS_INLINE void tst(RegisterID rn, RegisterID rm, ShiftTypeAndAmount shift)
     {
         ASSERT(!BadReg(rn));
         ASSERT(!BadReg(rm));
         m_formatter.twoWordOp12Reg4FourFours(OP_TST_reg_T2, rn, FourFours(shift.hi4(), 0xf, shift.lo4(), rm));
     }
 
-    void tst(RegisterID rn, RegisterID rm)
+    ALWAYS_INLINE void tst(RegisterID rn, RegisterID rm)
     {
         if ((rn | rm) & 8)
             tst(rn, rm, ShiftTypeAndAmount());
@@ -1396,131 +1692,307 @@ public:
             m_formatter.oneWordOp10Reg3Reg3(OP_TST_reg_T1, rm, rn);
     }
 
-    void vadd_F64(FPRegisterID rd, FPRegisterID rn, FPRegisterID rm)
+    ALWAYS_INLINE void ubfx(RegisterID rd, RegisterID rn, unsigned lsb, unsigned width)
     {
-        m_formatter.vfpOp(0x0b00ee30 | doubleRegisterMask(rd, 6, 28) | doubleRegisterMask(rn, 23, 0) | doubleRegisterMask(rm, 21, 16));
+        ASSERT(lsb < 32);
+        ASSERT((width >= 1) && (width <= 32));
+        ASSERT((lsb + width) <= 32);
+        m_formatter.twoWordOp12Reg40Imm3Reg4Imm20Imm5(OP_UBFX_T1, rd, rn, (lsb & 0x1c) << 10, (lsb & 0x3) << 6, (width - 1) & 0x1f);
     }
 
-    void vcmp_F64(FPRegisterID rd, FPRegisterID rm)
+    void vadd(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
     {
-        m_formatter.vfpOp(0x0bc0eeb4 | doubleRegisterMask(rd, 6, 28) | doubleRegisterMask(rm, 21, 16));
+        m_formatter.vfpOp(OP_VADD_T2, OP_VADD_T2b, true, rn, rd, rm);
     }
 
-    void vcvt_F64_S32(FPRegisterID fd, FPRegisterID sm)
+    void vcmp(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
     {
-        m_formatter.vfpOp(0x0bc0eeb8 | doubleRegisterMask(fd, 6, 28) | singleRegisterMask(sm, 16, 21));
+        m_formatter.vfpOp(OP_VCMP, OP_VCMPb, true, VFPOperand(4), rd, rm);
     }
 
-    void vcvt_S32_F64(FPRegisterID sd, FPRegisterID fm)
+    void vcmpz(FPDoubleRegisterID rd)
     {
-        m_formatter.vfpOp(0x0bc0eebd | singleRegisterMask(sd, 28, 6) | doubleRegisterMask(fm, 21, 16));
+        m_formatter.vfpOp(OP_VCMP, OP_VCMPb, true, VFPOperand(5), rd, VFPOperand(0));
     }
 
-    void vldr(FPRegisterID rd, RegisterID rn, int32_t imm)
+    void vcvt_signedToFloatingPoint(FPDoubleRegisterID rd, FPSingleRegisterID rm)
     {
-        vmem(rd, rn, imm, true);
+        // boolean values are 64bit (toInt, unsigned, roundZero)
+        m_formatter.vfpOp(OP_VCVT_FPIVFP, OP_VCVT_FPIVFPb, true, vcvtOp(false, false, false), rd, rm);
     }
 
-    void vmov(RegisterID rd, FPRegisterID sn)
+    void vcvt_floatingPointToSigned(FPSingleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        // boolean values are 64bit (toInt, unsigned, roundZero)
+        m_formatter.vfpOp(OP_VCVT_FPIVFP, OP_VCVT_FPIVFPb, true, vcvtOp(true, false, true), rd, rm);
+    }
+    
+    void vcvt_floatingPointToUnsigned(FPSingleRegisterID rd, FPDoubleRegisterID rm)
     {
-        m_formatter.vfpOp(0x0a10ee10 | (rd << 28) | singleRegisterMask(sn, 0, 23));
+        // boolean values are 64bit (toInt, unsigned, roundZero)
+        m_formatter.vfpOp(OP_VCVT_FPIVFP, OP_VCVT_FPIVFPb, true, vcvtOp(true, true, true), rd, rm);
     }
 
-    void vmov(FPRegisterID sn, RegisterID rd)
+    void vdiv(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
     {
-        m_formatter.vfpOp(0x0a10ee00 | (rd << 28) | singleRegisterMask(sn, 0, 23));
+        m_formatter.vfpOp(OP_VDIV, OP_VDIVb, true, rn, rd, rm);
     }
 
-    // move FPSCR flags to APSR.
-    void vmrs_APSR_nzcv_FPSCR()
+    void vldr(FPDoubleRegisterID rd, RegisterID rn, int32_t imm)
     {
-        m_formatter.vfpOp(0xfa10eef1);
+        m_formatter.vfpMemOp(OP_VLDR, OP_VLDRb, true, rn, rd, imm);
+    }
+    
+    void flds(FPSingleRegisterID rd, RegisterID rn, int32_t imm)
+    {
+        m_formatter.vfpMemOp(OP_FLDS, OP_FLDSb, false, rn, rd, imm);
     }
 
-    void vmul_F64(FPRegisterID rd, FPRegisterID rn, FPRegisterID rm)
+    void vmov(RegisterID rd, FPSingleRegisterID rn)
     {
-        m_formatter.vfpOp(0x0b00ee20 | doubleRegisterMask(rd, 6, 28) | doubleRegisterMask(rn, 23, 0) | doubleRegisterMask(rm, 21, 16));
+        ASSERT(!BadReg(rd));
+        m_formatter.vfpOp(OP_VMOV_StoC, OP_VMOV_StoCb, false, rn, rd, VFPOperand(0));
     }
 
-    void vstr(FPRegisterID rd, RegisterID rn, int32_t imm)
+    void vmov(FPSingleRegisterID rd, RegisterID rn)
     {
-        vmem(rd, rn, imm, false);
+        ASSERT(!BadReg(rn));
+        m_formatter.vfpOp(OP_VMOV_CtoS, OP_VMOV_CtoSb, false, rd, rn, VFPOperand(0));
     }
 
-    void vsub_F64(FPRegisterID rd, FPRegisterID rn, FPRegisterID rm)
+    void vmov(RegisterID rd1, RegisterID rd2, FPDoubleRegisterID rn)
     {
-        m_formatter.vfpOp(0x0b40ee30 | doubleRegisterMask(rd, 6, 28) | doubleRegisterMask(rn, 23, 0) | doubleRegisterMask(rm, 21, 16));
+        ASSERT(!BadReg(rd1));
+        ASSERT(!BadReg(rd2));
+        m_formatter.vfpOp(OP_VMOV_DtoC, OP_VMOV_DtoCb, true, rd2, VFPOperand(rd1 | 16), rn);
     }
 
+    void vmov(FPDoubleRegisterID rd, RegisterID rn1, RegisterID rn2)
+    {
+        ASSERT(!BadReg(rn1));
+        ASSERT(!BadReg(rn2));
+        m_formatter.vfpOp(OP_VMOV_CtoD, OP_VMOV_CtoDb, true, rn2, VFPOperand(rn1 | 16), rd);
+    }
 
-    JmpDst label()
+    void vmov(FPDoubleRegisterID rd, FPDoubleRegisterID rn)
     {
-        return JmpDst(m_formatter.size());
+        m_formatter.vfpOp(OP_VMOV_T2, OP_VMOV_T2b, true, VFPOperand(0), rd, rn);
     }
-    
-    JmpDst align(int alignment)
+
+    void vmrs(RegisterID reg = ARMRegisters::pc)
     {
-        while (!m_formatter.isAligned(alignment))
-            bkpt();
+        ASSERT(reg != ARMRegisters::sp);
+        m_formatter.vfpOp(OP_VMRS, OP_VMRSb, false, VFPOperand(1), VFPOperand(0x10 | reg), VFPOperand(0));
+    }
 
-        return label();
+    void vmul(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VMUL_T2, OP_VMUL_T2b, true, rn, rd, rm);
     }
-    
-    static void* getRelocatedAddress(void* code, JmpSrc jump)
+
+    void vstr(FPDoubleRegisterID rd, RegisterID rn, int32_t imm)
     {
-        ASSERT(jump.m_offset != -1);
+        m_formatter.vfpMemOp(OP_VSTR, OP_VSTRb, true, rn, rd, imm);
+    }
 
-        return reinterpret_cast<void*>(reinterpret_cast<ptrdiff_t>(code) + jump.m_offset);
+    void fsts(FPSingleRegisterID rd, RegisterID rn, int32_t imm)
+    {
+        m_formatter.vfpMemOp(OP_FSTS, OP_FSTSb, false, rn, rd, imm);
+    }
+
+    void vsub(FPDoubleRegisterID rd, FPDoubleRegisterID rn, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VSUB_T2, OP_VSUB_T2b, true, rn, rd, rm);
+    }
+
+    void vabs(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VABS_T2, OP_VABS_T2b, true, VFPOperand(16), rd, rm);
+    }
+
+    void vneg(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VNEG_T2, OP_VNEG_T2b, true, VFPOperand(1), rd, rm);
+    }
+
+    void vsqrt(FPDoubleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VSQRT_T1, OP_VSQRT_T1b, true, VFPOperand(17), rd, rm);
     }
     
-    static void* getRelocatedAddress(void* code, JmpDst destination)
+    void vcvtds(FPDoubleRegisterID rd, FPSingleRegisterID rm)
     {
-        ASSERT(destination.m_offset != -1);
+        m_formatter.vfpOp(OP_VCVTDS_T1, OP_VCVTDS_T1b, false, VFPOperand(23), rd, rm);
+    }
 
-        return reinterpret_cast<void*>(reinterpret_cast<ptrdiff_t>(code) + destination.m_offset);
+    void vcvtsd(FPSingleRegisterID rd, FPDoubleRegisterID rm)
+    {
+        m_formatter.vfpOp(OP_VCVTSD_T1, OP_VCVTSD_T1b, true, VFPOperand(23), rd, rm);
+    }
+
+    void nop()
+    {
+        m_formatter.oneWordOp8Imm8(OP_NOP_T1, 0);
+    }
+
+    AssemblerLabel label()
+    {
+        return m_formatter.label();
     }
     
-    static int getDifferenceBetweenLabels(JmpDst src, JmpDst dst)
+    AssemblerLabel align(int alignment)
     {
-        return dst.m_offset - src.m_offset;
+        while (!m_formatter.isAligned(alignment))
+            bkpt();
+
+        return label();
     }
     
-    static int getDifferenceBetweenLabels(JmpDst src, JmpSrc dst)
+    static void* getRelocatedAddress(void* code, AssemblerLabel label)
     {
-        return dst.m_offset - src.m_offset;
+        ASSERT(label.isSet());
+        return reinterpret_cast<void*>(reinterpret_cast<ptrdiff_t>(code) + label.m_offset);
     }
     
-    static int getDifferenceBetweenLabels(JmpSrc src, JmpDst dst)
+    static int getDifferenceBetweenLabels(AssemblerLabel a, AssemblerLabel b)
+    {
+        return b.m_offset - a.m_offset;
+    }
+
+    int executableOffsetFor(int location)
     {
-        return dst.m_offset - src.m_offset;
+        if (!location)
+            return 0;
+        return static_cast<int32_t*>(m_formatter.data())[location / sizeof(int32_t) - 1];
     }
     
+    int jumpSizeDelta(JumpType jumpType, JumpLinkType jumpLinkType) { return JUMP_ENUM_SIZE(jumpType) - JUMP_ENUM_SIZE(jumpLinkType); }
+    
     // Assembler admin methods:
 
-    size_t size() const
+    static ALWAYS_INLINE bool linkRecordSourceComparator(const LinkRecord& a, const LinkRecord& b)
     {
-        return m_formatter.size();
+        return a.from() < b.from();
     }
 
-    void* executableCopy(ExecutablePool* allocator)
+    bool canCompact(JumpType jumpType)
     {
-        void* copy = m_formatter.executableCopy(allocator);
-
-        unsigned jumpCount = m_jumpsToLink.size();
-        for (unsigned i = 0; i < jumpCount; ++i) {
-            uint16_t* location = reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(copy) + m_jumpsToLink[i].from);
-            uint16_t* target = reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(copy) + m_jumpsToLink[i].to);
-            linkJumpAbsolute(location, target);
+        // The following cannot be compacted:
+        //   JumpFixed: represents custom jump sequence
+        //   JumpNoConditionFixedSize: represents unconditional jump that must remain a fixed size
+        //   JumpConditionFixedSize: represents conditional jump that must remain a fixed size
+        return (jumpType == JumpNoCondition) || (jumpType == JumpCondition);
+    }
+    
+    JumpLinkType computeJumpType(JumpType jumpType, const uint8_t* from, const uint8_t* to)
+    {
+        if (jumpType == JumpFixed)
+            return LinkInvalid;
+        
+        // for patchable jump we must leave space for the longest code sequence
+        if (jumpType == JumpNoConditionFixedSize)
+            return LinkBX;
+        if (jumpType == JumpConditionFixedSize)
+            return LinkConditionalBX;
+        
+        const int paddingSize = JUMP_ENUM_SIZE(jumpType);
+        bool mayTriggerErrata = false;
+        
+        if (jumpType == JumpCondition) {
+            // 2-byte conditional T1
+            const uint16_t* jumpT1Location = reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkJumpT1)));
+            if (canBeJumpT1(jumpT1Location, to))
+                return LinkJumpT1;
+            // 4-byte conditional T3
+            const uint16_t* jumpT3Location = reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkJumpT3)));
+            if (canBeJumpT3(jumpT3Location, to, mayTriggerErrata)) {
+                if (!mayTriggerErrata)
+                    return LinkJumpT3;
+            }
+            // 4-byte conditional T4 with IT
+            const uint16_t* conditionalJumpT4Location = 
+            reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkConditionalJumpT4)));
+            if (canBeJumpT4(conditionalJumpT4Location, to, mayTriggerErrata)) {
+                if (!mayTriggerErrata)
+                    return LinkConditionalJumpT4;
+            }
+        } else {
+            // 2-byte unconditional T2
+            const uint16_t* jumpT2Location = reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkJumpT2)));
+            if (canBeJumpT2(jumpT2Location, to))
+                return LinkJumpT2;
+            // 4-byte unconditional T4
+            const uint16_t* jumpT4Location = reinterpret_cast_ptr<const uint16_t*>(from - (paddingSize - JUMP_ENUM_SIZE(LinkJumpT4)));
+            if (canBeJumpT4(jumpT4Location, to, mayTriggerErrata)) {
+                if (!mayTriggerErrata)
+                    return LinkJumpT4;
+            }
+            // use long jump sequence
+            return LinkBX;
+        }
+        
+        ASSERT(jumpType == JumpCondition);
+        return LinkConditionalBX;
+    }
+    
+    JumpLinkType computeJumpType(LinkRecord& record, const uint8_t* from, const uint8_t* to)
+    {
+        JumpLinkType linkType = computeJumpType(record.type(), from, to);
+        record.setLinkType(linkType);
+        return linkType;
+    }
+    
+    void recordLinkOffsets(int32_t regionStart, int32_t regionEnd, int32_t offset)
+    {
+        int32_t ptr = regionStart / sizeof(int32_t);
+        const int32_t end = regionEnd / sizeof(int32_t);
+        int32_t* offsets = static_cast<int32_t*>(m_formatter.data());
+        while (ptr < end)
+            offsets[ptr++] = offset;
+    }
+    
+    Vector<LinkRecord>& jumpsToLink()
+    {
+        std::sort(m_jumpsToLink.begin(), m_jumpsToLink.end(), linkRecordSourceComparator);
+        return m_jumpsToLink;
+    }
+
+    void ALWAYS_INLINE link(LinkRecord& record, uint8_t* from, uint8_t* to)
+    {
+        switch (record.linkType()) {
+        case LinkJumpT1:
+            linkJumpT1(record.condition(), reinterpret_cast_ptr<uint16_t*>(from), to);
+            break;
+        case LinkJumpT2:
+            linkJumpT2(reinterpret_cast_ptr<uint16_t*>(from), to);
+            break;
+        case LinkJumpT3:
+            linkJumpT3(record.condition(), reinterpret_cast_ptr<uint16_t*>(from), to);
+            break;
+        case LinkJumpT4:
+            linkJumpT4(reinterpret_cast_ptr<uint16_t*>(from), to);
+            break;
+        case LinkConditionalJumpT4:
+            linkConditionalJumpT4(record.condition(), reinterpret_cast_ptr<uint16_t*>(from), to);
+            break;
+        case LinkConditionalBX:
+            linkConditionalBX(record.condition(), reinterpret_cast_ptr<uint16_t*>(from), to);
+            break;
+        case LinkBX:
+            linkBX(reinterpret_cast_ptr<uint16_t*>(from), to);
+            break;
+        default:
+            ASSERT_NOT_REACHED();
+            break;
         }
-        m_jumpsToLink.clear();
-
-        ASSERT(copy);
-        return copy;
     }
 
-    static unsigned getCallReturnOffset(JmpSrc call)
+    void* unlinkedCode() { return m_formatter.data(); }
+    size_t codeSize() const { return m_formatter.codeSize(); }
+
+    static unsigned getCallReturnOffset(AssemblerLabel call)
     {
-        ASSERT(call.m_offset >= 0);
+        ASSERT(call.isSet());
         return call.m_offset;
     }
 
@@ -1532,33 +2004,31 @@ public:
     // writable region of memory; to modify the code in an execute-only execuable
     // pool the 'repatch' and 'relink' methods should be used.
 
-    void linkJump(JmpSrc from, JmpDst to)
+    void linkJump(AssemblerLabel from, AssemblerLabel to, JumpType type, Condition condition)
     {
-        ASSERT(to.m_offset != -1);
-        ASSERT(from.m_offset != -1);
-        m_jumpsToLink.append(LinkRecord(from.m_offset, to.m_offset));
+        ASSERT(to.isSet());
+        ASSERT(from.isSet());
+        m_jumpsToLink.append(LinkRecord(from.m_offset, to.m_offset, type, condition));
     }
 
-    static void linkJump(void* code, JmpSrc from, void* to)
+    static void linkJump(void* code, AssemblerLabel from, void* to)
     {
-        ASSERT(from.m_offset != -1);
+        ASSERT(from.isSet());
         
         uint16_t* location = reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(code) + from.m_offset);
         linkJumpAbsolute(location, to);
     }
 
-    // bah, this mathod should really be static, since it is used by the LinkBuffer.
-    // return a bool saying whether the link was successful?
-    static void linkCall(void* code, JmpSrc from, void* to)
+    static void linkCall(void* code, AssemblerLabel from, void* to)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(code) & 1));
-        ASSERT(from.m_offset != -1);
+        ASSERT(from.isSet());
         ASSERT(reinterpret_cast<intptr_t>(to) & 1);
 
         setPointer(reinterpret_cast<uint16_t*>(reinterpret_cast<intptr_t>(code) + from.m_offset) - 1, to);
     }
 
-    static void linkPointer(void* code, JmpDst where, void* value)
+    static void linkPointer(void* code, AssemblerLabel where, void* value)
     {
         setPointer(reinterpret_cast<char*>(code) + where.m_offset, value);
     }
@@ -1570,7 +2040,7 @@ public:
 
         linkJumpAbsolute(reinterpret_cast<uint16_t*>(from), to);
 
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(from) - 5, 5 * sizeof(uint16_t));
+        cacheFlush(reinterpret_cast<uint16_t*>(from) - 5, 5 * sizeof(uint16_t));
     }
     
     static void relinkCall(void* from, void* to)
@@ -1579,8 +2049,11 @@ public:
         ASSERT(reinterpret_cast<intptr_t>(to) & 1);
 
         setPointer(reinterpret_cast<uint16_t*>(from) - 1, to);
-
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(from) - 5, 4 * sizeof(uint16_t));
+    }
+    
+    static void* readCallTarget(void* from)
+    {
+        return readPointer(reinterpret_cast<uint16_t*>(from) - 1);
     }
 
     static void repatchInt32(void* where, int32_t value)
@@ -1588,8 +2061,13 @@ public:
         ASSERT(!(reinterpret_cast<intptr_t>(where) & 1));
         
         setInt32(where, value);
-
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(where) - 4, 4 * sizeof(uint16_t));
+    }
+    
+    static void repatchCompact(void* where, int32_t value)
+    {
+        ASSERT(value >= 0);
+        ASSERT(ARMThumbImmediate::makeUInt12(value).isUInt7());
+        setUInt7ForLoad(where, ARMThumbImmediate::makeUInt12(value));
     }
 
     static void repatchPointer(void* where, void* value)
@@ -1597,43 +2075,111 @@ public:
         ASSERT(!(reinterpret_cast<intptr_t>(where) & 1));
         
         setPointer(where, value);
+    }
 
-        ExecutableAllocator::cacheFlush(reinterpret_cast<uint16_t*>(where) - 4, 4 * sizeof(uint16_t));
+    static void* readPointer(void* where)
+    {
+        return reinterpret_cast<void*>(readInt32(where));
+    }
+
+    unsigned debugOffset() { return m_formatter.debugOffset(); }
+
+    static void cacheFlush(void* code, size_t size)
+    {
+#if OS(IOS)
+        sys_cache_control(kCacheFunctionPrepareForExecution, code, size);
+#elif OS(LINUX)
+        asm volatile(
+            "push    {r7}\n"
+            "mov     r0, %0\n"
+            "mov     r1, %1\n"
+            "movw    r7, #0x2\n"
+            "movt    r7, #0xf\n"
+            "movs    r2, #0x0\n"
+            "svc     0x0\n"
+            "pop     {r7}\n"
+            :
+            : "r" (code), "r" (reinterpret_cast<char*>(code) + size)
+            : "r0", "r1", "r2");
+#elif OS(WINCE)
+        CacheRangeFlush(code, size, CACHE_SYNC_ALL);
+#elif OS(QNX)
+#if !ENABLE(ASSEMBLER_WX_EXCLUSIVE)
+        msync(code, size, MS_INVALIDATE_ICACHE);
+#else
+        UNUSED_PARAM(code);
+        UNUSED_PARAM(size);
+#endif
+#else
+#error "The cacheFlush support is missing on this platform."
+#endif
     }
 
-    static void repatchLoadPtrToLEA(void* where)
-    {
-        ASSERT(!(reinterpret_cast<intptr_t>(where) & 1));
+private:
+    // VFP operations commonly take one or more 5-bit operands, typically representing a
+    // floating point register number.  This will commonly be encoded in the instruction
+    // in two parts, with one single bit field, and one 4-bit field.  In the case of
+    // double precision operands the high bit of the register number will be encoded
+    // separately, and for single precision operands the high bit of the register number
+    // will be encoded individually.
+    // VFPOperand encapsulates a 5-bit VFP operand, with bits 0..3 containing the 4-bit
+    // field to be encoded together in the instruction (the low 4-bits of a double
+    // register number, or the high 4-bits of a single register number), and bit 4
+    // contains the bit value to be encoded individually.
+    struct VFPOperand {
+        explicit VFPOperand(uint32_t value)
+            : m_value(value)
+        {
+            ASSERT(!(m_value & ~0x1f));
+        }
 
-        uint16_t* loadOp = reinterpret_cast<uint16_t*>(where) + 4;
-        ASSERT((*loadOp & 0xfff0) == OP_LDR_reg_T2);
+        VFPOperand(FPDoubleRegisterID reg)
+            : m_value(reg)
+        {
+        }
 
-        *loadOp = OP_ADD_reg_T3 | (*loadOp & 0xf);
-        ExecutableAllocator::cacheFlush(loadOp, sizeof(uint16_t));
-    }
+        VFPOperand(RegisterID reg)
+            : m_value(reg)
+        {
+        }
 
-private:
+        VFPOperand(FPSingleRegisterID reg)
+            : m_value(((reg & 1) << 4) | (reg >> 1)) // rotate the lowest bit of 'reg' to the top.
+        {
+        }
 
-    // Arm vfp addresses can be offset by a 9-bit ones-comp immediate, left shifted by 2.
-    // (i.e. +/-(0..255) 32-bit words)
-    void vmem(FPRegisterID rd, RegisterID rn, int32_t imm, bool isLoad)
-    {
-        bool up;
-        uint32_t offset;
-        if (imm < 0) {
-            offset = -imm;
-            up = false;
-        } else {
-            offset = imm;
-            up = true;
+        uint32_t bits1()
+        {
+            return m_value >> 4;
         }
 
-        // offset is effectively leftshifted by 2 already (the bottom two bits are zero, and not
-        // reperesented in the instruction.  Left shift by 14, to mov it into position 0x00AA0000.
-        ASSERT((offset & ~(0xff << 2)) == 0);
-        offset <<= 14;
+        uint32_t bits4()
+        {
+            return m_value & 0xf;
+        }
 
-        m_formatter.vfpOp(0x0b00ed00 | offset | (up << 7) | (isLoad << 4) | doubleRegisterMask(rd, 6, 28) | rn);
+        uint32_t m_value;
+    };
+
+    VFPOperand vcvtOp(bool toInteger, bool isUnsigned, bool isRoundZero)
+    {
+        // Cannot specify rounding when converting to float.
+        ASSERT(toInteger || !isRoundZero);
+
+        uint32_t op = 0x8;
+        if (toInteger) {
+            // opc2 indicates both toInteger & isUnsigned.
+            op |= isUnsigned ? 0x4 : 0x5;
+            // 'op' field in instruction is isRoundZero
+            if (isRoundZero)
+                op |= 0x10;
+        } else {
+            ASSERT(!isRoundZero);
+            // 'op' field in instruction is isUnsigned
+            if (!isUnsigned)
+                op |= 0x10;
+        }
+        return VFPOperand(op);
     }
 
     static void setInt32(void* code, uint32_t value)
@@ -1648,7 +2194,35 @@ private:
         location[-2] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
         location[-1] = twoWordOp5i6Imm4Reg4EncodedImmSecond((location[-1] >> 8) & 0xf, hi16);
 
-        ExecutableAllocator::cacheFlush(location - 4, 4 * sizeof(uint16_t));
+        cacheFlush(location - 4, 4 * sizeof(uint16_t));
+    }
+    
+    static int32_t readInt32(void* code)
+    {
+        uint16_t* location = reinterpret_cast<uint16_t*>(code);
+        ASSERT(isMOV_imm_T3(location - 4) && isMOVT(location - 2));
+        
+        ARMThumbImmediate lo16;
+        ARMThumbImmediate hi16;
+        decodeTwoWordOp5i6Imm4Reg4EncodedImmFirst(lo16, location[-4]);
+        decodeTwoWordOp5i6Imm4Reg4EncodedImmSecond(lo16, location[-3]);
+        decodeTwoWordOp5i6Imm4Reg4EncodedImmFirst(hi16, location[-2]);
+        decodeTwoWordOp5i6Imm4Reg4EncodedImmSecond(hi16, location[-1]);
+        uint32_t result = hi16.asUInt16();
+        result <<= 16;
+        result |= lo16.asUInt16();
+        return static_cast<int32_t>(result);
+    }
+
+    static void setUInt7ForLoad(void* code, ARMThumbImmediate imm)
+    {
+        // Requires us to have planted a LDR_imm_T1
+        ASSERT(imm.isValid());
+        ASSERT(imm.isUInt7());
+        uint16_t* location = reinterpret_cast<uint16_t*>(code);
+        location[0] &= ~((static_cast<uint16_t>(0x7f) >> 2) << 6);
+        location[0] |= (imm.getUInt7() >> 2) << 6;
+        cacheFlush(location, sizeof(uint16_t));
     }
 
     static void setPointer(void* code, void* value)
@@ -1692,25 +2266,194 @@ private:
         return (instruction[0] == OP_NOP_T2a) && (instruction[1] == OP_NOP_T2b);
     }
 
-    static void linkJumpAbsolute(uint16_t* instruction, void* target)
+    static bool canBeJumpT1(const uint16_t* instruction, const void* target)
+    {
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // It does not appear to be documented in the ARM ARM (big surprise), but
+        // for OP_B_T1 the branch displacement encoded in the instruction is 2 
+        // less than the actual displacement.
+        relative -= 2;
+        return ((relative << 23) >> 23) == relative;
+    }
+    
+    static bool canBeJumpT2(const uint16_t* instruction, const void* target)
+    {
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // It does not appear to be documented in the ARM ARM (big surprise), but
+        // for OP_B_T2 the branch displacement encoded in the instruction is 2 
+        // less than the actual displacement.
+        relative -= 2;
+        return ((relative << 20) >> 20) == relative;
+    }
+    
+    static bool canBeJumpT3(const uint16_t* instruction, const void* target, bool& mayTriggerErrata)
+    {
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // From Cortex-A8 errata:
+        // If the 32-bit Thumb-2 branch instruction spans two 4KiB regions and
+        // the target of the branch falls within the first region it is
+        // possible for the processor to incorrectly determine the branch
+        // instruction, and it is also possible in some cases for the processor
+        // to enter a deadlock state.
+        // The instruction is spanning two pages if it ends at an address ending 0x002
+        bool spansTwo4K = ((reinterpret_cast<intptr_t>(instruction) & 0xfff) == 0x002);
+        mayTriggerErrata = spansTwo4K;
+        // The target is in the first page if the jump branch back by [3..0x1002] bytes
+        bool targetInFirstPage = (relative >= -0x1002) && (relative < -2);
+        bool wouldTriggerA8Errata = spansTwo4K && targetInFirstPage;
+        return ((relative << 11) >> 11) == relative && !wouldTriggerA8Errata;
+    }
+    
+    static bool canBeJumpT4(const uint16_t* instruction, const void* target, bool& mayTriggerErrata)
+    {
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // From Cortex-A8 errata:
+        // If the 32-bit Thumb-2 branch instruction spans two 4KiB regions and
+        // the target of the branch falls within the first region it is
+        // possible for the processor to incorrectly determine the branch
+        // instruction, and it is also possible in some cases for the processor
+        // to enter a deadlock state.
+        // The instruction is spanning two pages if it ends at an address ending 0x002
+        bool spansTwo4K = ((reinterpret_cast<intptr_t>(instruction) & 0xfff) == 0x002);
+        mayTriggerErrata = spansTwo4K;
+        // The target is in the first page if the jump branch back by [3..0x1002] bytes
+        bool targetInFirstPage = (relative >= -0x1002) && (relative < -2);
+        bool wouldTriggerA8Errata = spansTwo4K && targetInFirstPage;
+        return ((relative << 7) >> 7) == relative && !wouldTriggerA8Errata;
+    }
+    
+    void linkJumpT1(Condition cond, uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        ASSERT(canBeJumpT1(instruction, target));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // It does not appear to be documented in the ARM ARM (big surprise), but
+        // for OP_B_T1 the branch displacement encoded in the instruction is 2 
+        // less than the actual displacement.
+        relative -= 2;
+        
+        // All branch offsets should be an even distance.
+        ASSERT(!(relative & 1));
+        instruction[-1] = OP_B_T1 | ((cond & 0xf) << 8) | ((relative & 0x1fe) >> 1);
+    }
+    
+    static void linkJumpT2(uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        ASSERT(canBeJumpT2(instruction, target));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // It does not appear to be documented in the ARM ARM (big surprise), but
+        // for OP_B_T2 the branch displacement encoded in the instruction is 2 
+        // less than the actual displacement.
+        relative -= 2;
+        
+        // All branch offsets should be an even distance.
+        ASSERT(!(relative & 1));
+        instruction[-1] = OP_B_T2 | ((relative & 0xffe) >> 1);
+    }
+    
+    void linkJumpT3(Condition cond, uint16_t* instruction, void* target)
     {
         // FIMXE: this should be up in the MacroAssembler layer. :-(
-        const uint16_t JUMP_TEMPORARY_REGISTER = ARMRegisters::ip;
-
         ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
         ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
-
-        ASSERT( (isMOV_imm_T3(instruction - 5) && isMOVT(instruction - 3) && isBX(instruction - 1))
-            || (isNOP_T1(instruction - 5) && isNOP_T2(instruction - 4) && isB(instruction - 2)) );
-
+        bool scratch;
+        UNUSED_PARAM(scratch);
+        ASSERT(canBeJumpT3(instruction, target, scratch));
+        
         intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
-        if (((relative << 7) >> 7) == relative) {
-            // ARM encoding for the top two bits below the sign bit is 'peculiar'.
-            if (relative >= 0)
-                relative ^= 0xC00000;
-
-            // All branch offsets should be an even distance.
-            ASSERT(!(relative & 1));
+        
+        // All branch offsets should be an even distance.
+        ASSERT(!(relative & 1));
+        instruction[-2] = OP_B_T3a | ((relative & 0x100000) >> 10) | ((cond & 0xf) << 6) | ((relative & 0x3f000) >> 12);
+        instruction[-1] = OP_B_T3b | ((relative & 0x80000) >> 8) | ((relative & 0x40000) >> 5) | ((relative & 0xffe) >> 1);
+    }
+    
+    static void linkJumpT4(uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        bool scratch;
+        UNUSED_PARAM(scratch);
+        ASSERT(canBeJumpT4(instruction, target, scratch));
+        
+        intptr_t relative = reinterpret_cast<intptr_t>(target) - (reinterpret_cast<intptr_t>(instruction));
+        // ARM encoding for the top two bits below the sign bit is 'peculiar'.
+        if (relative >= 0)
+            relative ^= 0xC00000;
+        
+        // All branch offsets should be an even distance.
+        ASSERT(!(relative & 1));
+        instruction[-2] = OP_B_T4a | ((relative & 0x1000000) >> 14) | ((relative & 0x3ff000) >> 12);
+        instruction[-1] = OP_B_T4b | ((relative & 0x800000) >> 10) | ((relative & 0x400000) >> 11) | ((relative & 0xffe) >> 1);
+    }
+    
+    void linkConditionalJumpT4(Condition cond, uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        instruction[-3] = ifThenElse(cond) | OP_IT;
+        linkJumpT4(instruction, target);
+    }
+    
+    static void linkBX(uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        const uint16_t JUMP_TEMPORARY_REGISTER = ARMRegisters::ip;
+        ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) + 1));
+        ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) >> 16));
+        instruction[-5] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
+        instruction[-4] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, lo16);
+        instruction[-3] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOVT, hi16);
+        instruction[-2] = twoWordOp5i6Imm4Reg4EncodedImmSecond(JUMP_TEMPORARY_REGISTER, hi16);
+        instruction[-1] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3);
+    }
+    
+    void linkConditionalBX(Condition cond, uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(        
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        linkBX(instruction, target);
+        instruction[-6] = ifThenElse(cond, true, true) | OP_IT;
+    }
+    
+    static void linkJumpAbsolute(uint16_t* instruction, void* target)
+    {
+        // FIMXE: this should be up in the MacroAssembler layer. :-(
+        ASSERT(!(reinterpret_cast<intptr_t>(instruction) & 1));
+        ASSERT(!(reinterpret_cast<intptr_t>(target) & 1));
+        
+        ASSERT((isMOV_imm_T3(instruction - 5) && isMOVT(instruction - 3) && isBX(instruction - 1))
+               || (isNOP_T1(instruction - 5) && isNOP_T2(instruction - 4) && isB(instruction - 2)));
+        
+        bool scratch;
+        if (canBeJumpT4(instruction, target, scratch)) {
             // There may be a better way to fix this, but right now put the NOPs first, since in the
             // case of an conditional branch this will be coming after an ITTT predicating *three*
             // instructions!  Looking backwards to modify the ITTT to an IT is not easy, due to
@@ -1719,9 +2462,9 @@ private:
             instruction[-5] = OP_NOP_T1;
             instruction[-4] = OP_NOP_T2a;
             instruction[-3] = OP_NOP_T2b;
-            instruction[-2] = OP_B_T4a | ((relative & 0x1000000) >> 14) | ((relative & 0x3ff000) >> 12);
-            instruction[-1] = OP_B_T4b | ((relative & 0x800000) >> 10) | ((relative & 0x400000) >> 11) | ((relative & 0xffe) >> 1);
+            linkJumpT4(instruction, target);
         } else {
+            const uint16_t JUMP_TEMPORARY_REGISTER = ARMRegisters::ip;
             ARMThumbImmediate lo16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) + 1));
             ARMThumbImmediate hi16 = ARMThumbImmediate::makeUInt16(static_cast<uint16_t>(reinterpret_cast<uint32_t>(target) >> 16));
             instruction[-5] = twoWordOp5i6Imm4Reg4EncodedImmFirst(OP_MOV_imm_T3, lo16);
@@ -1731,71 +2474,85 @@ private:
             instruction[-1] = OP_BX | (JUMP_TEMPORARY_REGISTER << 3);
         }
     }
-
+    
     static uint16_t twoWordOp5i6Imm4Reg4EncodedImmFirst(uint16_t op, ARMThumbImmediate imm)
     {
         return op | (imm.m_value.i << 10) | imm.m_value.imm4;
     }
+
+    static void decodeTwoWordOp5i6Imm4Reg4EncodedImmFirst(ARMThumbImmediate& result, uint16_t value)
+    {
+        result.m_value.i = (value >> 10) & 1;
+        result.m_value.imm4 = value & 15;
+    }
+
     static uint16_t twoWordOp5i6Imm4Reg4EncodedImmSecond(uint16_t rd, ARMThumbImmediate imm)
     {
         return (imm.m_value.imm3 << 12) | (rd << 8) | imm.m_value.imm8;
     }
 
+    static void decodeTwoWordOp5i6Imm4Reg4EncodedImmSecond(ARMThumbImmediate& result, uint16_t value)
+    {
+        result.m_value.imm3 = (value >> 12) & 7;
+        result.m_value.imm8 = value & 255;
+    }
+
     class ARMInstructionFormatter {
     public:
-        void oneWordOp5Reg3Imm8(OpcodeID op, RegisterID rd, uint8_t imm)
+        ALWAYS_INLINE void oneWordOp5Reg3Imm8(OpcodeID op, RegisterID rd, uint8_t imm)
         {
             m_buffer.putShort(op | (rd << 8) | imm);
         }
         
-        void oneWordOp5Imm5Reg3Reg3(OpcodeID op, uint8_t imm, RegisterID reg1, RegisterID reg2)
+        ALWAYS_INLINE void oneWordOp5Imm5Reg3Reg3(OpcodeID op, uint8_t imm, RegisterID reg1, RegisterID reg2)
         {
             m_buffer.putShort(op | (imm << 6) | (reg1 << 3) | reg2);
         }
 
-        void oneWordOp7Reg3Reg3Reg3(OpcodeID op, RegisterID reg1, RegisterID reg2, RegisterID reg3)
+        ALWAYS_INLINE void oneWordOp7Reg3Reg3Reg3(OpcodeID op, RegisterID reg1, RegisterID reg2, RegisterID reg3)
         {
             m_buffer.putShort(op | (reg1 << 6) | (reg2 << 3) | reg3);
         }
 
-        void oneWordOp8Imm8(OpcodeID op, uint8_t imm)
+        ALWAYS_INLINE void oneWordOp8Imm8(OpcodeID op, uint8_t imm)
         {
             m_buffer.putShort(op | imm);
         }
 
-        void oneWordOp8RegReg143(OpcodeID op, RegisterID reg1, RegisterID reg2)
+        ALWAYS_INLINE void oneWordOp8RegReg143(OpcodeID op, RegisterID reg1, RegisterID reg2)
         {
             m_buffer.putShort(op | ((reg2 & 8) << 4) | (reg1 << 3) | (reg2 & 7));
         }
-        void oneWordOp9Imm7(OpcodeID op, uint8_t imm)
+
+        ALWAYS_INLINE void oneWordOp9Imm7(OpcodeID op, uint8_t imm)
         {
             m_buffer.putShort(op | imm);
         }
 
-        void oneWordOp10Reg3Reg3(OpcodeID op, RegisterID reg1, RegisterID reg2)
+        ALWAYS_INLINE void oneWordOp10Reg3Reg3(OpcodeID op, RegisterID reg1, RegisterID reg2)
         {
             m_buffer.putShort(op | (reg1 << 3) | reg2);
         }
 
-        void twoWordOp12Reg4FourFours(OpcodeID1 op, RegisterID reg, FourFours ff)
+        ALWAYS_INLINE void twoWordOp12Reg4FourFours(OpcodeID1 op, RegisterID reg, FourFours ff)
         {
             m_buffer.putShort(op | reg);
             m_buffer.putShort(ff.m_u.value);
         }
         
-        void twoWordOp16FourFours(OpcodeID1 op, FourFours ff)
+        ALWAYS_INLINE void twoWordOp16FourFours(OpcodeID1 op, FourFours ff)
         {
             m_buffer.putShort(op);
             m_buffer.putShort(ff.m_u.value);
         }
         
-        void twoWordOp16Op16(OpcodeID1 op1, OpcodeID2 op2)
+        ALWAYS_INLINE void twoWordOp16Op16(OpcodeID1 op1, OpcodeID2 op2)
         {
             m_buffer.putShort(op1);
             m_buffer.putShort(op2);
         }
 
-        void twoWordOp5i6Imm4Reg4EncodedImm(OpcodeID1 op, int imm4, RegisterID rd, ARMThumbImmediate imm)
+        ALWAYS_INLINE void twoWordOp5i6Imm4Reg4EncodedImm(OpcodeID1 op, int imm4, RegisterID rd, ARMThumbImmediate imm)
         {
             ARMThumbImmediate newImm = imm;
             newImm.m_value.imm4 = imm4;
@@ -1804,30 +2561,63 @@ private:
             m_buffer.putShort(ARMv7Assembler::twoWordOp5i6Imm4Reg4EncodedImmSecond(rd, newImm));
         }
 
-        void twoWordOp12Reg4Reg4Imm12(OpcodeID1 op, RegisterID reg1, RegisterID reg2, uint16_t imm)
+        ALWAYS_INLINE void twoWordOp12Reg4Reg4Imm12(OpcodeID1 op, RegisterID reg1, RegisterID reg2, uint16_t imm)
         {
             m_buffer.putShort(op | reg1);
             m_buffer.putShort((reg2 << 12) | imm);
         }
 
-        void vfpOp(int32_t op)
+        ALWAYS_INLINE void twoWordOp12Reg40Imm3Reg4Imm20Imm5(OpcodeID1 op, RegisterID reg1, RegisterID reg2, uint16_t imm1, uint16_t imm2, uint16_t imm3)
         {
-            m_buffer.putInt(op);
+            m_buffer.putShort(op | reg1);
+            m_buffer.putShort((imm1 << 12) | (reg2 << 8) | (imm2 << 6) | imm3);
         }
 
+        // Formats up instructions of the pattern:
+        //    111111111B11aaaa:bbbb222SA2C2cccc
+        // Where 1s in the pattern come from op1, 2s in the pattern come from op2, S is the provided size bit.
+        // Operands provide 5 bit values of the form Aaaaa, Bbbbb, Ccccc.
+        ALWAYS_INLINE void vfpOp(OpcodeID1 op1, OpcodeID2 op2, bool size, VFPOperand a, VFPOperand b, VFPOperand c)
+        {
+            ASSERT(!(op1 & 0x004f));
+            ASSERT(!(op2 & 0xf1af));
+            m_buffer.putShort(op1 | b.bits1() << 6 | a.bits4());
+            m_buffer.putShort(op2 | b.bits4() << 12 | size << 8 | a.bits1() << 7 | c.bits1() << 5 | c.bits4());
+        }
+
+        // Arm vfp addresses can be offset by a 9-bit ones-comp immediate, left shifted by 2.
+        // (i.e. +/-(0..255) 32-bit words)
+        ALWAYS_INLINE void vfpMemOp(OpcodeID1 op1, OpcodeID2 op2, bool size, RegisterID rn, VFPOperand rd, int32_t imm)
+        {
+            bool up = true;
+            if (imm < 0) {
+                imm = -imm;
+                up = false;
+            }
+            
+            uint32_t offset = imm;
+            ASSERT(!(offset & ~0x3fc));
+            offset >>= 2;
+
+            m_buffer.putShort(op1 | (up << 7) | rd.bits1() << 6 | rn);
+            m_buffer.putShort(op2 | rd.bits4() << 12 | size << 8 | offset);
+        }
 
         // Administrative methods:
 
-        size_t size() const { return m_buffer.size(); }
+        size_t codeSize() const { return m_buffer.codeSize(); }
+        AssemblerLabel label() const { return m_buffer.label(); }
         bool isAligned(int alignment) const { return m_buffer.isAligned(alignment); }
         void* data() const { return m_buffer.data(); }
-        void* executableCopy(ExecutablePool* allocator) { return m_buffer.executableCopy(allocator); }
+
+        unsigned debugOffset() { return m_buffer.debugOffset(); }
 
     private:
         AssemblerBuffer m_buffer;
     } m_formatter;
 
     Vector<LinkRecord> m_jumpsToLink;
+    Vector<int32_t> m_offsets;
 };
 
 } // namespace JSC