Speed up the deletion of all files in an Azure container
authorMichael Vrable <mvrable@cs.ucsd.edu>
Tue, 15 Mar 2011 03:56:08 +0000 (20:56 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Tue, 15 Mar 2011 03:56:08 +0000 (20:56 -0700)
cleaner/azure.py

index 89b4f23..7a07694 100644 (file)
@@ -148,6 +148,34 @@ class AzureConnection:
         path = "/%s/%s" % (container, key)
         r = self._make_request(path, method='DELETE')
 
+def parallel_delete(container, keys):
+    import Queue
+    from threading import Lock, Thread
+
+    keys = list(iter(keys))
+
+    q = Queue.Queue(16384)
+    l = Lock()
+
+    def deletion_task():
+        conn = AzureConnection()
+        while True:
+            k = q.get()
+            l.acquire()
+            print k
+            l.release()
+            conn.delete(container, k)
+            q.task_done()
+
+    for i in range(128):
+        t = Thread(target=deletion_task)
+        t.setDaemon(True)
+        t.start()
+
+    for k in keys:
+        q.put(k)
+    q.join()
+
 if __name__ == '__main__':
     container = 'bluesky'
     conn = AzureConnection()
@@ -155,6 +183,4 @@ if __name__ == '__main__':
     conn.put(container, "testkey", "A" * 40)
     print "Fetch result:", conn.get(container, "testkey")
 
-    for k in list(iter(conn.list(container))):
-        print "Deleting", k
-        conn.delete(container, k)
+    parallel_delete(container, conn.list(container))