fix(win): FIX and HIDE 2 win-errors remaining

+ File-in-use errors were fixed with `gitdb.util.mman.collect()`! + This call is disabled `gitdb.util.HIDE_WINDOWS_KNOWN_ERRORS == False`.
gitpython-developers · Oct 24, 2016 · b17f91a · b17f91a
1 parent 08b1f5f
commit b17f91a
Show file tree

Hide file tree

Showing 13 changed files with 117 additions and 89 deletions.
diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py
@@ -40,7 +40,8 @@
     rename,
     dirname,
     basename,
-    join
+    join,
+    is_win,
 )
 
 from gitdb.fun import (
@@ -71,7 +72,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
     # On windows we need to keep it writable, otherwise it cannot be removed
     # either
     new_objects_mode = int("444", 8)
-    if os.name == 'nt':
+    if is_win:
         new_objects_mode = int("644", 8)
 
     def __init__(self, root_path):
@@ -226,7 +227,7 @@ def store(self, istream):
                 mkdir(obj_dir)
             # END handle destination directory
             # rename onto existing doesn't work on windows
-            if os.name == 'nt':
+            if is_win:
                 if isfile(obj_path):
                     remove(tmp_path)
                 else:

diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py
@@ -43,13 +43,13 @@ def __init__(self, root_path):
         # * hits - number of times the pack was hit with a request
         # * entity - Pack entity instance
         # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
-        # self._entities = list()       # lazy loaded list
+        # self._entities = []       # lazy loaded list
         self._hit_count = 0             # amount of hits
         self._st_mtime = 0              # last modification data of our root path
 
     def _set_cache_(self, attr):
         if attr == '_entities':
-            self._entities = list()
+            self._entities = []
             self.update_cache(force=True)
         # END handle entities initialization
 

diff --git a/gitdb/pack.py b/gitdb/pack.py
@@ -40,7 +40,7 @@
     pass
 # END try c module
 
-from gitdb.base import (  # Amazing !
+from gitdb.base import (
     OInfo,
     OStream,
     OPackInfo,

diff --git a/gitdb/stream.py b/gitdb/stream.py
@@ -25,6 +25,7 @@
     write,
     close,
     suppress,
+    is_darwin,
 )
 
 from gitdb.const import NULL_BYTE, BYTE_SPACE
@@ -318,7 +319,7 @@ def read(self, size=-1):
         # However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
         # table in the github issue. This is it ... it was the only way I could make this work everywhere.
         # IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
-        if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not sys.platform == 'darwin'):
+        if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not is_darwin):
             unused_datalen = len(self._zip.unconsumed_tail)
         else:
             unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
@@ -447,7 +448,7 @@ def _set_cache_brute_(self, attr):
         # TODO: There should be a special case if there is only one stream
         # Then the default-git algorithm should perform a tad faster, as the
         # delta is not peaked into, causing less overhead.
-        buffer_info_list = list()
+        buffer_info_list = []
         max_target_size = 0
         for dstream in self._dstreams:
             buf = dstream.read(512)         # read the header information + X

diff --git a/gitdb/test/db/lib.py b/gitdb/test/db/lib.py
@@ -39,7 +39,7 @@ class TestDBBase(TestBase):
 
     # data
     two_lines = b'1234\nhello world'
-    all_data = (two_lines, )
+    all_data = (two_lines,)
 
     def _assert_object_writing_simple(self, db):
         # write a bunch of objects and query their streams and info
@@ -56,10 +56,10 @@ def _assert_object_writing_simple(self, db):
             assert isinstance(info, OInfo)
             assert info.type == istream.type and info.size == istream.size
 
-            stream = db.stream(istream.binsha)
-            assert isinstance(stream, OStream)
-            assert stream.binsha == info.binsha and stream.type == info.type
-            assert stream.read() == data
+            with db.stream(istream.binsha) as stream:
+                assert isinstance(stream, OStream)
+                assert stream.binsha == info.binsha and stream.type == info.type
+                assert stream.read() == data
         # END for each item
 
         assert db.size() == null_objs + ni

diff --git a/gitdb/test/db/test_git.py b/gitdb/test/db/test_git.py
@@ -24,8 +24,8 @@ def test_reading(self):
         # access should be possible
         gitdb_sha = next(gdb.sha_iter())
         assert isinstance(gdb.info(gitdb_sha), OInfo)
-        assert isinstance(gdb.stream(gitdb_sha), OStream)
-        ni = 50
+        with gdb.stream(gitdb_sha) as stream:
+            assert isinstance(gdb.stream(gitdb_sha), OStream)        ni = 50
         assert gdb.size() >= ni
         sha_list = list(gdb.sha_iter())
         assert len(sha_list) == gdb.size()

diff --git a/gitdb/test/db/test_pack.py b/gitdb/test/db/test_pack.py
@@ -13,10 +13,16 @@
 
 import os
 import random
+from gitdb.util import mman, HIDE_WINDOWS_KNOWN_ERRORS
 
 
 class TestPackDB(TestDBBase):
 
+    ## Unless HIDE_WINDOWS_KNOWN_ERRORS, on Windows fails with:
+    # File "D:\Work\gitdb.git\gitdb\test\db\test_pack.py", line 41, in test_writing
+    #    os.rename(pack_path, new_pack_path)
+    # PermissionError: [WinError 32] The process cannot access the file
+    #    because it is being used by another process: 'pack-c0438c19fb16422b6bbcce24387b3264416d485b.packrenamed'
     @with_rw_directory
     @with_packs_rw
     def test_writing(self, path):
@@ -30,6 +36,10 @@ def test_writing(self, path):
         # packs removed - rename a file, should affect the glob
         pack_path = pdb.entities()[0].pack().path()
         new_pack_path = pack_path + "renamed"
+        ## FIXME: Had to manually collect leaked files!!
+        if HIDE_WINDOWS_KNOWN_ERRORS:
+            leaked_mmaps = mman.collect()
+            self.assertEqual(leaked_mmaps, 6)
         os.rename(pack_path, new_pack_path)
 
         pdb.update_cache(force=True)

diff --git a/gitdb/test/lib.py b/gitdb/test/lib.py
@@ -17,7 +17,7 @@
 import unittest
 
 from gitdb import OStream
-from gitdb.util import rmtree
+from gitdb.util import rmtree, mman, HIDE_WINDOWS_KNOWN_ERRORS
 from gitdb.utils.compat import xrange
 
 
@@ -96,6 +96,13 @@ def wrapper(self):
             # memory maps closed, once objects go out of scope. For some reason
             # though this is not the case here unless we collect explicitly.
             if not keep:
+                if HIDE_WINDOWS_KNOWN_ERRORS:
+                    ## Or else 2 Windows TCs fail with:
+                    # File "D:\Work\gitdb.git\gitdb\util.py", line 141, in onerror
+                    #     func(path)  # Will scream if still not possible to delete.
+                    # PermissionError: [WinError 32] The process cannot access the file
+                    #    because it is being used by another process: 'sss\\index_cc_wll5'
+                    mman.collect()
                 gc.collect()
                 rmtree(path)
         # END handle exception

diff --git a/gitdb/test/performance/test_pack_streaming.py b/gitdb/test/performance/test_pack_streaming.py
@@ -46,7 +46,8 @@ def test_pack_writing(self):
         st = time()
         for sha in pdb.sha_iter():
             count += 1
-            pdb.stream(sha)
+            with pdb.stream(sha):
+                pass
             if count == ni:
                 break
         # END gather objects for pack-writing
@@ -55,6 +56,8 @@ def test_pack_writing(self):
               (ni, elapsed, ni / (elapsed or 1)), file=sys.stderr)
 
         st = time()
+        ## We are leaking files here, but we don't care...
+        #  and we need a `contextlib.ExitStack` to safely close them.
         PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni)
         elapsed = time() - st
         total_kb = ostream.bytes_written() / 1000

diff --git a/gitdb/test/test_example.py b/gitdb/test/test_example.py
@@ -18,26 +18,19 @@ def test_base(self):
 
         for sha1 in ldb.sha_iter():
             oinfo = ldb.info(sha1)
-            ostream = ldb.stream(sha1)
-            assert oinfo[:3] == ostream[:3]
+            with ldb.stream(sha1) as ostream:
+                assert oinfo[:3] == ostream[:3]
 
-            assert len(ostream.read()) == ostream.size
+                assert len(ostream.read()) == ostream.size
             assert ldb.has_object(oinfo.binsha)
         # END for each sha in database
-        # assure we close all files
-        try:
-            del(ostream)
-            del(oinfo)
-        except UnboundLocalError:
-            pass
-        # END ignore exception if there are no loose objects
 
         data = "my data".encode("ascii")
         istream = IStream("blob", len(data), BytesIO(data))
 
         # the object does not yet have a sha
         assert istream.binsha is None
-        ldb.store(istream)
-        # now the sha is set
-        assert len(istream.binsha) == 20
-        assert ldb.has_object(istream.binsha)
+        with ldb.store(istream):
+            # now the sha is set
+            assert len(istream.binsha) == 20
+            assert ldb.has_object(istream.binsha)
diff --git a/gitdb/test/test_pack.py b/gitdb/test/test_pack.py
@@ -88,42 +88,42 @@ def _assert_pack_file(self, pack, version, size):
 
         num_obj = 0
         for obj in pack.stream_iter():
-            num_obj += 1
-            info = pack.info(obj.pack_offset)
-            stream = pack.stream(obj.pack_offset)
-
-            assert info.pack_offset == stream.pack_offset
-            assert info.type_id == stream.type_id
-            assert hasattr(stream, 'read')
-
-            # it should be possible to read from both streams
-            assert obj.read() == stream.read()
-
-            streams = pack.collect_streams(obj.pack_offset)
-            assert streams
-
-            # read the stream
-            try:
-                dstream = DeltaApplyReader.new(streams)
-            except ValueError:
-                # ignore these, old git versions use only ref deltas,
-                # which we havent resolved ( as we are without an index )
-                # Also ignore non-delta streams
-                continue
-            # END get deltastream
-
-            with dstream:
-                # read all
-                data = dstream.read()
-                assert len(data) == dstream.size
-
-                # test seek
-                dstream.seek(0)
-                assert dstream.read() == data
-
-            # read chunks
-            # NOTE: the current implementation is safe, it basically transfers
-            # all calls to the underlying memory map
+            with obj:
+                num_obj += 1
+                info = pack.info(obj.pack_offset)
+                with pack.stream(obj.pack_offset) as stream:
+                    assert info.pack_offset == stream.pack_offset
+                    assert info.type_id == stream.type_id
+                    assert hasattr(stream, 'read')
+
+                    # it should be possible to read from both streams
+                    assert obj.read() == stream.read()
+
+                streams = pack.collect_streams(obj.pack_offset)
+                assert streams
+
+                # read the stream
+                try:
+                    dstream = DeltaApplyReader.new(streams)
+                except ValueError:
+                    # ignore these, old git versions use only ref deltas,
+                    # which we havent resolved ( as we are without an index )
+                    # Also ignore non-delta streams
+                    continue
+                # END get deltastream
+
+                with dstream:
+                    # read all
+                    data = dstream.read()
+                    assert len(data) == dstream.size
+
+                    # test seek
+                    dstream.seek(0)
+                    assert dstream.read() == data
+
+                # read chunks
+                # NOTE: the current implementation is safe, it basically transfers
+                # all calls to the underlying memory map
 
         # END for each object
         assert num_obj == size
@@ -142,6 +142,11 @@ def test_pack(self):
                 self._assert_pack_file(pack, version, size)
         # END for each pack to test
 
+    ## Unless HIDE_WINDOWS_KNOWN_ERRORS, on Windows fails with:
+    # File "D:\Work\gitdb.git\gitdb\util.py", line 141, in onerror
+    #     func(path)  # Will scream if still not possible to delete.
+    # PermissionError: [WinError 32] The process cannot access the file
+    #    because it is being used by another process: 'sss\\index_cc_wll5'
     @with_rw_directory
     def test_pack_entity(self, rw_dir):
         pack_objs = list()

diff --git a/gitdb/test/test_stream.py b/gitdb/test/test_stream.py
@@ -154,13 +154,12 @@ def test_decompress_reader_special_case(self):
         mdb = MemoryDB()
         for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
                     b'7bb839852ed5e3a069966281bb08d50012fb309b',):
-            ostream = odb.stream(hex_to_bin(sha))
-
-            # if there is a bug, we will be missing one byte exactly !
-            data = ostream.read()
-            assert len(data) == ostream.size
-
-            # Putting it back in should yield nothing new - after all, we have
-            dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
-            assert dump.hexsha == sha
+            with odb.stream(hex_to_bin(sha)) as ostream:
+                # if there is a bug, we will be missing one byte exactly !
+                data = ostream.read()
+                assert len(data) == ostream.size
+
+                # Putting it back in should yield nothing new - after all, we have
+                dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
+                assert dump.hexsha == sha
         # end for each loose object sha to test