diff --git a/wgpu-native/src/command/allocator.rs b/wgpu-native/src/command/allocator.rs
index 6c45be2c2..899c0ad00 100644
--- a/wgpu-native/src/command/allocator.rs
+++ b/wgpu-native/src/command/allocator.rs
@@ -12,14 +12,37 @@ use std::thread;
 
 struct CommandPool<B: hal::Backend> {
     raw: B::CommandPool,
-    available: Vec<CommandBuffer<B>>,
+    available: Vec<B::CommandBuffer>,
+}
+
+impl<B: hal::Backend> CommandPool<B> {
+    fn allocate(&mut self) -> B::CommandBuffer {
+        if self.available.is_empty() {
+            let extra = self.raw.allocate(20, hal::command::RawLevel::Primary);
+            self.available.extend(extra);
+        }
+
+        self.available.pop().unwrap()
+    }
 }
 
 struct Inner<B: hal::Backend> {
     pools: HashMap<thread::ThreadId, CommandPool<B>>,
+    fences: Vec<B::Fence>,
     pending: Vec<CommandBuffer<B>>,
 }
 
+impl<B: hal::Backend> Inner<B> {
+    fn recycle(&mut self, cmd_buf: CommandBuffer<B>) {
+        let pool = self.pools.get_mut(&cmd_buf.recorded_thread_id).unwrap();
+        for mut raw in cmd_buf.raw {
+            raw.reset(false);
+            pool.available.push(raw);
+        }
+        self.fences.push(cmd_buf.fence);
+    }
+}
+
 pub struct CommandAllocator<B: hal::Backend> {
     queue_family: hal::queue::QueueFamilyId,
     inner: Mutex<Inner<B>>,
@@ -31,6 +54,7 @@ impl<B: hal::Backend> CommandAllocator<B> {
             queue_family,
             inner: Mutex::new(Inner {
                 pools: HashMap::new(),
+                fences: Vec::new(),
                 pending: Vec::new(),
             }),
         }
@@ -41,6 +65,17 @@ impl<B: hal::Backend> CommandAllocator<B> {
     ) -> CommandBuffer<B> {
         let thread_id = thread::current().id();
         let mut inner = self.inner.lock().unwrap();
+
+        let fence = match inner.fences.pop() {
+            Some(fence) => {
+                device.reset_fence(&fence);
+                fence
+            }
+            None => {
+                device.create_fence(false)
+            }
+        };
+
         let pool = inner.pools.entry(thread_id).or_insert_with(|| CommandPool {
             raw: device.create_command_pool(
                 self.queue_family,
@@ -48,21 +83,25 @@ impl<B: hal::Backend> CommandAllocator<B> {
             ),
             available: Vec::new(),
         });
+        let init = pool.allocate();
 
-        if let Some(cmd_buf) = pool.available.pop() {
-            assert_eq!(device_id, cmd_buf.device_id.0);
-            device.reset_fence(&cmd_buf.fence);
-            return cmd_buf;
+        CommandBuffer {
+            raw: vec![init],
+            fence,
+            recorded_thread_id: thread_id,
+            device_id: Stored(device_id),
+        }
+    }
+
+    pub fn extend(&self, cmd_buf: &CommandBuffer<B>) -> B::CommandBuffer {
+        let mut inner = self.inner.lock().unwrap();
+        let pool = inner.pools.get_mut(&cmd_buf.recorded_thread_id).unwrap();
+
+        if pool.available.is_empty() {
+            let extra = pool.raw.allocate(20, hal::command::RawLevel::Primary);
+            pool.available.extend(extra);
         }
 
-        for cmbuf in pool.raw.allocate(20, hal::command::RawLevel::Primary) {
-            pool.available.push(CommandBuffer {
-                raw: Some(cmbuf),
-                fence: device.create_fence(false),
-                recorded_thread_id: thread_id,
-                device_id: Stored(device_id),
-            });
-        }
         pool.available.pop().unwrap()
     }
 
@@ -70,16 +109,8 @@ impl<B: hal::Backend> CommandAllocator<B> {
         self.inner.lock().unwrap().pending.push(cmd_buf);
     }
 
-    pub fn recycle(&self, mut cmd_buf: CommandBuffer<B>) {
-        cmd_buf.raw.as_mut().unwrap().reset(false);
-        self.inner
-            .lock()
-            .unwrap()
-            .pools
-            .get_mut(&cmd_buf.recorded_thread_id)
-            .unwrap()
-            .available
-            .push(cmd_buf);
+    pub fn recycle(&self, cmd_buf: CommandBuffer<B>) {
+        self.inner.lock().unwrap().recycle(cmd_buf);
     }
 
     pub fn maintain(&self, device: &B::Device) {
@@ -87,12 +118,7 @@ impl<B: hal::Backend> CommandAllocator<B> {
         for i in (0..inner.pending.len()).rev() {
             if device.get_fence_status(&inner.pending[i].fence) {
                 let cmd_buf = inner.pending.swap_remove(i);
-                inner
-                    .pools
-                    .get_mut(&cmd_buf.recorded_thread_id)
-                    .unwrap()
-                    .available
-                    .push(cmd_buf);
+                inner.recycle(cmd_buf);
             }
         }
     }
diff --git a/wgpu-native/src/command/compute.rs b/wgpu-native/src/command/compute.rs
index 3f96a1774..05bcc7703 100644
--- a/wgpu-native/src/command/compute.rs
+++ b/wgpu-native/src/command/compute.rs
@@ -35,7 +35,8 @@ pub extern "C" fn wgpu_compute_pass_end_pass(
     HUB.command_buffers
         .lock()
         .get_mut(pass.cmb_id.0)
-        .raw = Some(pass.raw);
+        .raw
+        .push(pass.raw);
     pass.cmb_id.0
 }
 
diff --git a/wgpu-native/src/command/mod.rs b/wgpu-native/src/command/mod.rs
index 25c44a745..695a10277 100644
--- a/wgpu-native/src/command/mod.rs
+++ b/wgpu-native/src/command/mod.rs
@@ -72,7 +72,7 @@ pub struct TextureCopyView {
 }
 
 pub struct CommandBuffer<B: hal::Backend> {
-    pub(crate) raw: Option<B::CommandBuffer>,
+    pub(crate) raw: Vec<B::CommandBuffer>,
     fence: B::Fence,
     recorded_thread_id: ThreadId,
     device_id: Stored<DeviceId>,
@@ -89,10 +89,11 @@ pub extern "C" fn wgpu_command_buffer_begin_render_pass(
     let mut cmb_guard = HUB.command_buffers.lock();
     let cmb = cmb_guard.get_mut(command_buffer_id);
 
-    let raw = cmb.raw.take().unwrap();
-
     let device_guard = HUB.devices.lock();
-    let _device = &device_guard.get(cmb.device_id.0).raw;
+    let device = device_guard.get(cmb.device_id.0);
+
+    let transit_comb = cmb.raw.pop().unwrap();
+    let current_comb = device.com_allocator.extend(cmb);
 
     //let render_pass = device.create_render_pass();
     //let framebuffer = device.create_framebuffer();
@@ -108,7 +109,11 @@ pub extern "C" fn wgpu_command_buffer_begin_render_pass(
 
     HUB.render_passes
         .lock()
-        .register(RenderPass::new(raw, command_buffer_id))
+        .register(RenderPass::new(
+            current_comb,
+            transit_comb,
+            command_buffer_id,
+        ))
 }
 
 #[no_mangle]
@@ -118,7 +123,7 @@ pub extern "C" fn wgpu_command_buffer_begin_compute_pass(
     let mut cmb_guard = HUB.command_buffers.lock();
     let cmb = cmb_guard.get_mut(command_buffer_id);
 
-    let raw = cmb.raw.take().unwrap();
+    let raw = cmb.raw.pop().unwrap();
 
     HUB.compute_passes
         .lock()
diff --git a/wgpu-native/src/command/render.rs b/wgpu-native/src/command/render.rs
index 79dcd91da..3e221d662 100644
--- a/wgpu-native/src/command/render.rs
+++ b/wgpu-native/src/command/render.rs
@@ -7,16 +7,24 @@ use {
 use hal;
 use hal::command::RawCommandBuffer;
 
+use std::iter;
+
 
 pub struct RenderPass<B: hal::Backend> {
     raw: B::CommandBuffer,
+    parent: B::CommandBuffer,
     cmb_id: Stored<CommandBufferId>,
 }
 
 impl<B: hal::Backend> RenderPass<B> {
-    pub fn new(raw: B::CommandBuffer, cmb_id: CommandBufferId) -> Self {
+    pub fn new(
+        raw: B::CommandBuffer,
+        parent: B::CommandBuffer,
+        cmb_id: CommandBufferId,
+    ) -> Self {
         RenderPass {
             raw,
+            parent,
             cmb_id: Stored(cmb_id),
         }
     }
@@ -31,9 +39,13 @@ pub extern "C" fn wgpu_render_pass_end_pass(
         .take(pass_id);
     pass.raw.end_render_pass();
 
+    let combs = iter::once(pass.parent)
+        .chain(iter::once(pass.raw));
     HUB.command_buffers
         .lock()
         .get_mut(pass.cmb_id.0)
-        .raw = Some(pass.raw);
+        .raw
+        .extend(combs);
+
     pass.cmb_id.0
 }
diff --git a/wgpu-native/src/device.rs b/wgpu-native/src/device.rs
index d2185a235..fd3716dc1 100644
--- a/wgpu-native/src/device.rs
+++ b/wgpu-native/src/device.rs
@@ -17,7 +17,7 @@ pub struct Device<B: hal::Backend> {
     pub(crate) raw: B::Device,
     queue_group: hal::QueueGroup<B, hal::General>,
     mem_allocator: Heaps<B::Memory>,
-    com_allocator: command::CommandAllocator<B>,
+    pub(crate) com_allocator: command::CommandAllocator<B>,
     mem_props: hal::MemoryProperties,
 }
 
@@ -219,7 +219,7 @@ pub extern "C" fn wgpu_device_create_command_buffer(
     let device = device_guard.get_mut(device_id);
 
     let mut cmd_buf = device.com_allocator.allocate(device_id, &device.raw);
-    cmd_buf.raw.as_mut().unwrap().begin(
+    cmd_buf.raw.last_mut().unwrap().begin(
         hal::command::CommandBufferFlags::ONE_TIME_SUBMIT,
         hal::command::CommandBufferInheritanceInfo::default(),
     );
@@ -249,7 +249,7 @@ pub extern "C" fn wgpu_queue_submit(
         command_buffer_guard
             .get_mut(cmb_id)
             .raw
-            .as_mut()
+            .last_mut()
             .unwrap()
             .finish();
     }
@@ -259,12 +259,8 @@ pub extern "C" fn wgpu_queue_submit(
         let submission = hal::queue::RawSubmission {
             cmd_buffers: command_buffer_ids
                 .iter()
-                .map(|&cmb_id| {
-                    command_buffer_guard
-                        .get(cmb_id)
-                        .raw
-                        .as_ref()
-                        .unwrap()
+                .flat_map(|&cmb_id| {
+                    &command_buffer_guard.get(cmb_id).raw
                 }),
             wait_semaphores: &[],
             signal_semaphores: &[],