diff --git a/.travis.yml b/.travis.yml
index 8df922446..cd675ae6d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,3 +11,9 @@ before_install:
   # Do not run bors builds against the nightly compiler.
   # We want to find out about nightly bugs, so they're done in master, but we don't block on them.
   - if [[ $TRAVIS_RUST_VERSION == "nightly" && $TRAVIS_BRANCH == "staging" ]]; then exit; fi
+
+script:
+  - cargo test
+  - cargo build --manifest-path wgpu-native/Cargo.toml --features remote
+  - cargo build
+  #- (cd examples && make) #TODO
diff --git a/examples/hello_triangle_c/main.c b/examples/hello_triangle_c/main.c
index ef451a0b4..59e81e055 100644
--- a/examples/hello_triangle_c/main.c
+++ b/examples/hello_triangle_c/main.c
@@ -38,5 +38,11 @@ int main()
         .code = read_file("./../data/hello_triangle.frag.spv"),
     };
     WGPUShaderModuleId _fs = wgpu_device_create_shader_module(device, fs_desc);
+
+    WGPUCommandBufferDescriptor cmd_buf_desc = {
+    };
+    WGPUCommandBufferId cmd_buf = wgpu_device_create_command_buffer(device, cmd_buf_desc);
+    WGPUQueueId queue = wgpu_device_get_queue(device);
+    wgpu_queue_submit(queue, &cmd_buf, 1);
     return 0;
 }
diff --git a/examples/hello_triangle_rust/main.rs b/examples/hello_triangle_rust/main.rs
index c6d078691..125771c30 100644
--- a/examples/hello_triangle_rust/main.rs
+++ b/examples/hello_triangle_rust/main.rs
@@ -17,4 +17,9 @@ fn main() {
     let _vs = device.create_shader_module(vs_bytes);
     let fs_bytes = include_bytes!("./../data/hello_triangle.frag.spv");
     let _fs = device.create_shader_module(fs_bytes);
+
+    let cmd_buf = device.create_command_buffer(wgpu::CommandBufferDescriptor {
+    });
+    let queue = device.get_queue();
+    queue.submit(&[cmd_buf]);
 }
diff --git a/wgpu-bindings/wgpu.h b/wgpu-bindings/wgpu.h
index 4c776af9e..ff5151a13 100644
--- a/wgpu-bindings/wgpu.h
+++ b/wgpu-bindings/wgpu.h
@@ -34,6 +34,10 @@ typedef WGPUId WGPUCommandBufferId;
 
 typedef WGPUId WGPUInstanceId;
 
+typedef struct {
+
+} WGPUCommandBufferDescriptor;
+
 typedef WGPUId WGPUShaderModuleId;
 
 typedef struct {
@@ -45,19 +49,30 @@ typedef struct {
   WGPUByteArray code;
 } WGPUShaderModuleDescriptor;
 
+typedef WGPUId WGPUQueueId;
+
 typedef struct {
   WGPUPowerPreference power_preference;
 } WGPUAdapterDescriptor;
 
-WGPUDeviceId wgpu_adapter_create_device(WGPUAdapterId adapter_id, WGPUDeviceDescriptor desc);
+WGPUDeviceId wgpu_adapter_create_device(WGPUAdapterId adapter_id, WGPUDeviceDescriptor _desc);
 
 WGPUComputePassId wgpu_command_buffer_begin_compute_pass(void);
 
-WGPURenderPassId wgpu_command_buffer_begin_render_pass(WGPUCommandBufferId command_buffer);
+WGPURenderPassId wgpu_command_buffer_begin_render_pass(WGPUCommandBufferId _command_buffer);
 
 WGPUInstanceId wgpu_create_instance(void);
 
+WGPUCommandBufferId wgpu_device_create_command_buffer(WGPUDeviceId device_id,
+                                                      WGPUCommandBufferDescriptor desc);
+
 WGPUShaderModuleId wgpu_device_create_shader_module(WGPUDeviceId device_id,
                                                     WGPUShaderModuleDescriptor desc);
 
+WGPUQueueId wgpu_device_get_queue(WGPUDeviceId device_id);
+
 WGPUAdapterId wgpu_instance_get_adapter(WGPUInstanceId instance_id, WGPUAdapterDescriptor desc);
+
+void wgpu_queue_submit(WGPUQueueId queue_id,
+                       const WGPUCommandBufferId *command_buffer_ptr,
+                       uintptr_t command_buffer_count);
diff --git a/wgpu-native/src/command/allocator.rs b/wgpu-native/src/command/allocator.rs
new file mode 100644
index 000000000..ec5adbe34
--- /dev/null
+++ b/wgpu-native/src/command/allocator.rs
@@ -0,0 +1,101 @@
+use super::CommandBuffer;
+
+use hal::{self, Device};
+use hal::command::RawCommandBuffer;
+use hal::pool::RawCommandPool;
+
+use std::collections::HashMap;
+//TODO: use `parking_lot::Mutex`?
+use std::sync::Mutex;
+use std::thread;
+
+
+struct CommandPool<B: hal::Backend> {
+    raw: B::CommandPool,
+    available: Vec<CommandBuffer<B>>,
+}
+
+pub struct Inner<B: hal::Backend> {
+    pools: HashMap<thread::ThreadId, CommandPool<B>>,
+    pending: Vec<CommandBuffer<B>>,
+}
+
+pub struct CommandAllocator<B: hal::Backend> {
+    queue_family: hal::queue::QueueFamilyId,
+    inner: Mutex<Inner<B>>,
+}
+
+impl<B: hal::Backend> CommandAllocator<B> {
+    pub fn new(queue_family: hal::queue::QueueFamilyId) -> Self {
+        CommandAllocator {
+            queue_family,
+            inner: Mutex::new(Inner {
+                pools: HashMap::new(),
+                pending: Vec::new(),
+            }),
+        }
+    }
+
+    pub fn allocate(&self, device: &B::Device) -> CommandBuffer<B> {
+        let thread_id = thread::current().id();
+        let mut inner = self.inner.lock().unwrap();
+        let pool = inner.pools
+            .entry(thread_id)
+            .or_insert_with(|| CommandPool {
+                raw: device.create_command_pool(
+                    self.queue_family,
+                    hal::pool::CommandPoolCreateFlags::RESET_INDIVIDUAL,
+                ),
+                available: Vec::new(),
+            });
+
+        if let Some(cmd_buf) = pool.available.pop() {
+            device.reset_fence(&cmd_buf.fence);
+            return cmd_buf;
+        }
+
+        for raw in pool.raw.allocate(20, hal::command::RawLevel::Primary) {
+            pool.available.push(CommandBuffer {
+                raw,
+                fence: device.create_fence(false),
+                recorded_thread_id: thread_id,
+            });
+        }
+        pool.available.pop().unwrap()
+    }
+
+    pub fn submit(&self, cmd_buf: CommandBuffer<B>) {
+        self.inner
+            .lock()
+            .unwrap()
+            .pending
+            .push(cmd_buf);
+    }
+
+    pub fn recycle(&self, mut cmd_buf: CommandBuffer<B>) {
+        cmd_buf.raw.reset(false);
+        self.inner
+            .lock()
+            .unwrap()
+            .pools
+            .get_mut(&cmd_buf.recorded_thread_id)
+            .unwrap()
+            .available
+            .push(cmd_buf);
+    }
+
+    pub fn maintain(&self, device: &B::Device) {
+        let mut inner = self.inner.lock().unwrap();
+        for i in (0 .. inner.pending.len()).rev() {
+            if device.get_fence_status(&inner.pending[i].fence) {
+                let cmd_buf = inner.pending.swap_remove(i);
+                inner
+                    .pools
+                    .get_mut(&cmd_buf.recorded_thread_id)
+                    .unwrap()
+                    .available
+                    .push(cmd_buf);
+            }
+        }
+    }
+}
diff --git a/wgpu-native/src/command/compute.rs b/wgpu-native/src/command/compute.rs
index daff85ff3..fdf5473d5 100644
--- a/wgpu-native/src/command/compute.rs
+++ b/wgpu-native/src/command/compute.rs
@@ -1,6 +1,6 @@
 use hal;
 
-use {CommandBuffer, CommandBufferId, ComputePassId};
+//use {CommandBuffer, CommandBufferId, ComputePassId};
 
 pub struct ComputePass<B: hal::Backend> {
     raw: B::CommandBuffer,
diff --git a/wgpu-native/src/command/mod.rs b/wgpu-native/src/command/mod.rs
index e70a017c1..73abf9a83 100644
--- a/wgpu-native/src/command/mod.rs
+++ b/wgpu-native/src/command/mod.rs
@@ -1,6 +1,8 @@
+mod allocator;
 mod compute;
 mod render;
 
+pub use self::allocator::*;
 pub use self::compute::*;
 pub use self::render::*;
 
@@ -11,6 +13,9 @@ use {
     TextureViewId,
 };
 
+use std::thread::ThreadId;
+
+
 #[repr(C)]
 pub enum LoadOp {
     Clear = 0,
@@ -65,15 +70,17 @@ pub struct TextureCopyView {
 }
 
 pub struct CommandBuffer<B: hal::Backend> {
-    raw: B::CommandBuffer,
+    pub(crate) raw: B::CommandBuffer,
+    fence: B::Fence,
+    recorded_thread_id: ThreadId,
 }
 
 #[repr(C)]
-pub struct CommandBufferDescriptor;
+pub struct CommandBufferDescriptor {}
 
 #[no_mangle]
 pub extern "C" fn wgpu_command_buffer_begin_render_pass(
-    command_buffer: CommandBufferId,
+    _command_buffer: CommandBufferId,
 ) -> RenderPassId {
     unimplemented!()
 }
diff --git a/wgpu-native/src/command/render.rs b/wgpu-native/src/command/render.rs
index f77d1edcd..4ab967a39 100644
--- a/wgpu-native/src/command/render.rs
+++ b/wgpu-native/src/command/render.rs
@@ -1,6 +1,6 @@
 use hal;
 
-use {CommandBuffer, CommandBufferId, RenderPassId};
+//use {CommandBuffer, CommandBufferId, RenderPassId};
 
 pub struct RenderPass<B: hal::Backend> {
     raw: B::CommandBuffer,
diff --git a/wgpu-native/src/device.rs b/wgpu-native/src/device.rs
index 15bc10085..3e881a793 100644
--- a/wgpu-native/src/device.rs
+++ b/wgpu-native/src/device.rs
@@ -1,28 +1,31 @@
-use hal::{self, Device as _Device, QueueGroup};
-use {conv, memory, pipeline, resource};
+use hal::{self, Device as _Device};
+use hal::queue::RawCommandQueue;
+use {command, conv, memory, pipeline, resource};
 
 use registry::{self, Registry};
-use {BufferId, CommandBufferId, DeviceId, ShaderModuleId};
+use {BufferId, CommandBufferId, DeviceId, QueueId, ShaderModuleId};
+
+use std::{iter, slice};
 
-#[repr(C)]
-pub struct CommandBufferDescriptor {}
 
 pub struct Device<B: hal::Backend> {
     device: B::Device,
-    queue_group: QueueGroup<B, hal::General>,
-    allocator: memory::SmartAllocator<B>,
+    queue_group: hal::QueueGroup<B, hal::General>,
+    mem_allocator: memory::SmartAllocator<B>,
+    com_allocator: command::CommandAllocator<B>,
 }
 
 impl<B: hal::Backend> Device<B> {
     pub(crate) fn new(
         device: B::Device,
-        queue_group: QueueGroup<B, hal::General>,
+        queue_group: hal::QueueGroup<B, hal::General>,
         mem_props: hal::MemoryProperties,
     ) -> Self {
         Device {
             device,
+            mem_allocator: memory::SmartAllocator::new(mem_props, 1, 1, 1, 1),
+            com_allocator: command::CommandAllocator::new(queue_group.family()),
             queue_group,
-            allocator: memory::SmartAllocator::new(mem_props, 1, 1, 1, 1),
         }
     }
 }
@@ -40,7 +43,53 @@ pub extern "C" fn wgpu_device_create_shader_module(
     let shader = device
         .device
         .create_shader_module(unsafe {
-            ::std::slice::from_raw_parts(desc.code.bytes, desc.code.length)
+            slice::from_raw_parts(desc.code.bytes, desc.code.length)
         }).unwrap();
     registry::SHADER_MODULE_REGISTRY.register(ShaderModule { raw: shader })
 }
+
+#[no_mangle]
+pub extern "C" fn wgpu_device_create_command_buffer(
+    device_id: DeviceId,
+    _desc: command::CommandBufferDescriptor,
+) -> CommandBufferId {
+    let device = registry::DEVICE_REGISTRY.get_mut(device_id);
+    let cmd_buf = device.com_allocator.allocate(&device.device);
+    registry::COMMAND_BUFFER_REGISTRY.register(cmd_buf)
+}
+
+#[no_mangle]
+pub extern "C" fn wgpu_device_get_queue(
+    device_id: DeviceId,
+) -> QueueId {
+   device_id
+}
+
+#[no_mangle]
+pub extern "C" fn wgpu_queue_submit(
+    queue_id: QueueId,
+    command_buffer_ptr: *const CommandBufferId,
+    command_buffer_count: usize,
+) {
+    let mut device = registry::DEVICE_REGISTRY.get_mut(queue_id);
+    let command_buffer_ids = unsafe {
+        slice::from_raw_parts(command_buffer_ptr, command_buffer_count)
+    };
+    //TODO: submit at once, requires `get_all()`
+    for &cmb_id in command_buffer_ids {
+        let cmd_buf = registry::COMMAND_BUFFER_REGISTRY.take(cmb_id);
+        {
+            let submission = hal::queue::RawSubmission {
+                cmd_buffers: iter::once(&cmd_buf.raw),
+                wait_semaphores: &[],
+                signal_semaphores: &[],
+            };
+            unsafe {
+                device.queue_group.queues[0]
+                    .as_raw_mut()
+                    .submit_raw(submission, None);
+            }
+        }
+        device.com_allocator.submit(cmd_buf);
+    }
+}
diff --git a/wgpu-native/src/instance.rs b/wgpu-native/src/instance.rs
index a6a43f288..5e4f79fe6 100644
--- a/wgpu-native/src/instance.rs
+++ b/wgpu-native/src/instance.rs
@@ -71,7 +71,7 @@ pub extern "C" fn wgpu_instance_get_adapter(
 #[no_mangle]
 pub extern "C" fn wgpu_adapter_create_device(
     adapter_id: AdapterId,
-    desc: DeviceDescriptor,
+    _desc: DeviceDescriptor,
 ) -> DeviceId {
     let mut adapter = registry::ADAPTER_REGISTRY.get_mut(adapter_id);
     let (device, queue_group) = adapter.open_with::<_, hal::General>(1, |_qf| true).unwrap();
diff --git a/wgpu-native/src/lib.rs b/wgpu-native/src/lib.rs
index db405f85a..bbae752c2 100644
--- a/wgpu-native/src/lib.rs
+++ b/wgpu-native/src/lib.rs
@@ -40,6 +40,7 @@ pub use self::resource::*;
 use back::Backend as B;
 use registry::Id;
 
+
 #[repr(C)]
 pub struct Color {
     pub r: f32,
@@ -69,11 +70,12 @@ pub struct ByteArray {
 }
 
 pub type InstanceId = Id;
-pub(crate) type InstanceHandle = back::Instance;
+type InstanceHandle = back::Instance;
 pub type AdapterId = Id;
-pub(crate) type AdapterHandle = hal::Adapter<B>;
+type AdapterHandle = hal::Adapter<B>;
 pub type DeviceId = Id;
-pub(crate) type DeviceHandle = Device<B>;
+type DeviceHandle = Device<B>;
+pub type QueueId = Id;
 pub type BufferId = Id;
 
 // Resource
@@ -90,11 +92,12 @@ pub type BlendStateId = Id;
 pub type DepthStencilStateId = Id;
 pub type InputStateId = Id;
 pub type ShaderModuleId = Id;
-pub(crate) type ShaderModuleHandle = ShaderModule<B>;
+type ShaderModuleHandle = ShaderModule<B>;
 pub type AttachmentStateId = Id;
 pub type ComputePipelineId = Id;
 pub type RenderPipelineId = Id;
 
 pub type CommandBufferId = Id;
+type CommandBufferHandle = CommandBuffer<B>;
 pub type RenderPassId = Id;
 pub type ComputePassId = Id;
diff --git a/wgpu-native/src/registry.rs b/wgpu-native/src/registry.rs
index 08a072ec9..a57bb6f08 100644
--- a/wgpu-native/src/registry.rs
+++ b/wgpu-native/src/registry.rs
@@ -1,13 +1,18 @@
+#[cfg(not(feature = "remote"))]
 use std::marker::PhantomData;
+#[cfg(not(feature = "remote"))]
 use std::os::raw::c_void;
+
 #[cfg(feature = "remote")]
 use std::sync::Arc;
 #[cfg(feature = "remote")]
 use parking_lot::{Mutex, MutexGuard, MappedMutexGuard};
-use std::{borrow, cmp, fmt, ops, ptr};
 
+#[cfg(feature = "remote")]
 use hal::backend::FastHashMap;
-use {AdapterHandle, DeviceHandle, InstanceHandle, ShaderModuleHandle};
+
+use {AdapterHandle, CommandBufferHandle, DeviceHandle, InstanceHandle, ShaderModuleHandle};
+
 
 #[cfg(not(feature = "remote"))]
 pub(crate) type Id = *mut c_void;
@@ -23,6 +28,7 @@ pub(crate) trait Registry<T> {
     fn new() -> Self;
     fn register(&self, handle: T) -> Id;
     fn get_mut(&self, id: Id) -> RegistryItem<T>;
+    fn take(&self, id: Id) -> T;
 }
 
 #[cfg(not(feature = "remote"))]
@@ -39,18 +45,25 @@ impl<T> Registry<T> for LocalRegistry<T> {
     }
 
     fn register(&self, handle: T) -> Id {
-        ::std::boxed::Box::into_raw(Box::new(handle)) as *mut _ as *mut c_void
+        Box::into_raw(Box::new(handle)) as *mut _ as *mut c_void
     }
 
     fn get_mut(&self, id: Id) -> RegistryItem<T> {
         unsafe { (id as *mut T).as_mut() }.unwrap()
     }
+
+    fn take(&self, id: Id) -> T {
+        unsafe {
+            *Box::from_raw(id as *mut T)
+        }
+    }
 }
 
 #[cfg(feature = "remote")]
 struct Registrations<T> {
     next_id: Id,
     tracked: FastHashMap<Id, T>,
+    free: Vec<Id>,
 }
 
 #[cfg(feature = "remote")]
@@ -59,6 +72,7 @@ impl<T> Registrations<T> {
         Registrations {
             next_id: 0,
             tracked: FastHashMap::default(),
+            free: Vec::new(),
         }
     }
 }
@@ -78,15 +92,26 @@ impl<T> Registry<T> for RemoteRegistry<T> {
 
     fn register(&self, handle: T) -> Id {
         let mut registrations = self.registrations.lock();
-        let id = registrations.next_id;
+        let id = match registrations.free.pop() {
+            Some(id) => id,
+            None => {
+                registrations.next_id += 1;
+                registrations.next_id - 1
+            }
+        };
         registrations.tracked.insert(id, handle);
-        registrations.next_id += 1;
         id
     }
 
     fn get_mut(&self, id: Id) -> RegistryItem<T> {
         MutexGuard::map(self.registrations.lock(), |r| r.tracked.get_mut(&id).unwrap())
     }
+
+    fn take(&self, id: Id) -> T {
+        let mut registrations = self.registrations.lock();
+        registrations.free.push(id);
+        registrations.tracked.remove(&id).unwrap()
+    }
 }
 
 #[cfg(not(feature = "remote"))]
@@ -99,4 +124,5 @@ lazy_static! {
     pub(crate) static ref DEVICE_REGISTRY: ConcreteRegistry<DeviceHandle> = ConcreteRegistry::new();
     pub(crate) static ref INSTANCE_REGISTRY: ConcreteRegistry<InstanceHandle> = ConcreteRegistry::new();
     pub(crate) static ref SHADER_MODULE_REGISTRY: ConcreteRegistry<ShaderModuleHandle> = ConcreteRegistry::new();
+    pub(crate) static ref COMMAND_BUFFER_REGISTRY: ConcreteRegistry<CommandBufferHandle> = ConcreteRegistry::new();
 }
diff --git a/wgpu-rs/src/lib.rs b/wgpu-rs/src/lib.rs
index e8305c6bf..b9b448092 100644
--- a/wgpu-rs/src/lib.rs
+++ b/wgpu-rs/src/lib.rs
@@ -3,7 +3,7 @@ extern crate wgpu_native as wgn;
 pub use wgn::{
     Color, Origin3d, Extent3d,
     AdapterDescriptor, Extensions, DeviceDescriptor, PowerPreference,
-    ShaderModuleDescriptor,
+    ShaderModuleDescriptor, CommandBufferDescriptor,
 };
 
 
@@ -23,6 +23,14 @@ pub struct ShaderModule {
     id: wgn::ShaderModuleId,
 }
 
+pub struct CommandBuffer {
+    id: wgn::CommandBufferId,
+}
+
+pub struct Queue {
+    id: wgn::QueueId,
+}
+
 
 impl Instance {
     pub fn new() -> Self {
@@ -58,4 +66,26 @@ impl Device {
             id: wgn::wgpu_device_create_shader_module(self.id, desc),
         }
     }
+
+    pub fn get_queue(&self) -> Queue {
+        Queue {
+            id: wgn::wgpu_device_get_queue(self.id),
+        }
+    }
+
+    pub fn create_command_buffer(&self, desc: CommandBufferDescriptor) -> CommandBuffer {
+        CommandBuffer {
+            id: wgn::wgpu_device_create_command_buffer(self.id, desc),
+        }
+    }
+}
+
+impl Queue {
+    pub fn submit(&self, command_buffers: &[CommandBuffer]) {
+        wgn::wgpu_queue_submit(
+            self.id,
+            command_buffers.as_ptr() as *const _,
+            command_buffers.len(),
+        );
+    }
 }