Skip to content

Commit

Permalink
fix(win): FIX and HIDE 2 win-errors remaining
Browse files Browse the repository at this point in the history
+ File-in-use errors were fixed with `gitdb.util.mman.collect()`!
+ This call is disabled `gitdb.util.HIDE_WINDOWS_KNOWN_ERRORS == False`.
  • Loading branch information
ankostis committed Oct 24, 2016
1 parent 08b1f5f commit b17f91a
Show file tree
Hide file tree
Showing 13 changed files with 117 additions and 89 deletions.
7 changes: 4 additions & 3 deletions gitdb/db/loose.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
rename,
dirname,
basename,
join
join,
is_win,
)

from gitdb.fun import (
Expand Down Expand Up @@ -71,7 +72,7 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
# On windows we need to keep it writable, otherwise it cannot be removed
# either
new_objects_mode = int("444", 8)
if os.name == 'nt':
if is_win:
new_objects_mode = int("644", 8)

def __init__(self, root_path):
Expand Down Expand Up @@ -226,7 +227,7 @@ def store(self, istream):
mkdir(obj_dir)
# END handle destination directory
# rename onto existing doesn't work on windows
if os.name == 'nt':
if is_win:
if isfile(obj_path):
remove(tmp_path)
else:
Expand Down
4 changes: 2 additions & 2 deletions gitdb/db/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ def __init__(self, root_path):
# * hits - number of times the pack was hit with a request
# * entity - Pack entity instance
# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
# self._entities = list() # lazy loaded list
# self._entities = [] # lazy loaded list
self._hit_count = 0 # amount of hits
self._st_mtime = 0 # last modification data of our root path

def _set_cache_(self, attr):
if attr == '_entities':
self._entities = list()
self._entities = []
self.update_cache(force=True)
# END handle entities initialization

Expand Down
2 changes: 1 addition & 1 deletion gitdb/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
pass
# END try c module

from gitdb.base import ( # Amazing !
from gitdb.base import (
OInfo,
OStream,
OPackInfo,
Expand Down
5 changes: 3 additions & 2 deletions gitdb/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
write,
close,
suppress,
is_darwin,
)

from gitdb.const import NULL_BYTE, BYTE_SPACE
Expand Down Expand Up @@ -318,7 +319,7 @@ def read(self, size=-1):
# However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
# table in the github issue. This is it ... it was the only way I could make this work everywhere.
# IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not sys.platform == 'darwin'):
if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not is_darwin):
unused_datalen = len(self._zip.unconsumed_tail)
else:
unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
Expand Down Expand Up @@ -447,7 +448,7 @@ def _set_cache_brute_(self, attr):
# TODO: There should be a special case if there is only one stream
# Then the default-git algorithm should perform a tad faster, as the
# delta is not peaked into, causing less overhead.
buffer_info_list = list()
buffer_info_list = []
max_target_size = 0
for dstream in self._dstreams:
buf = dstream.read(512) # read the header information + X
Expand Down
10 changes: 5 additions & 5 deletions gitdb/test/db/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class TestDBBase(TestBase):

# data
two_lines = b'1234\nhello world'
all_data = (two_lines, )
all_data = (two_lines,)

def _assert_object_writing_simple(self, db):
# write a bunch of objects and query their streams and info
Expand All @@ -56,10 +56,10 @@ def _assert_object_writing_simple(self, db):
assert isinstance(info, OInfo)
assert info.type == istream.type and info.size == istream.size

stream = db.stream(istream.binsha)
assert isinstance(stream, OStream)
assert stream.binsha == info.binsha and stream.type == info.type
assert stream.read() == data
with db.stream(istream.binsha) as stream:
assert isinstance(stream, OStream)
assert stream.binsha == info.binsha and stream.type == info.type
assert stream.read() == data
# END for each item

assert db.size() == null_objs + ni
Expand Down
4 changes: 2 additions & 2 deletions gitdb/test/db/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def test_reading(self):
# access should be possible
gitdb_sha = next(gdb.sha_iter())
assert isinstance(gdb.info(gitdb_sha), OInfo)
assert isinstance(gdb.stream(gitdb_sha), OStream)
ni = 50
with gdb.stream(gitdb_sha) as stream:
assert isinstance(gdb.stream(gitdb_sha), OStream) ni = 50
assert gdb.size() >= ni
sha_list = list(gdb.sha_iter())
assert len(sha_list) == gdb.size()
Expand Down
10 changes: 10 additions & 0 deletions gitdb/test/db/test_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,16 @@

import os
import random
from gitdb.util import mman, HIDE_WINDOWS_KNOWN_ERRORS


class TestPackDB(TestDBBase):

## Unless HIDE_WINDOWS_KNOWN_ERRORS, on Windows fails with:
# File "D:\Work\gitdb.git\gitdb\test\db\test_pack.py", line 41, in test_writing
# os.rename(pack_path, new_pack_path)
# PermissionError: [WinError 32] The process cannot access the file
# because it is being used by another process: 'pack-c0438c19fb16422b6bbcce24387b3264416d485b.packrenamed'
@with_rw_directory
@with_packs_rw
def test_writing(self, path):
Expand All @@ -30,6 +36,10 @@ def test_writing(self, path):
# packs removed - rename a file, should affect the glob
pack_path = pdb.entities()[0].pack().path()
new_pack_path = pack_path + "renamed"
## FIXME: Had to manually collect leaked files!!
if HIDE_WINDOWS_KNOWN_ERRORS:
leaked_mmaps = mman.collect()
self.assertEqual(leaked_mmaps, 6)
os.rename(pack_path, new_pack_path)

pdb.update_cache(force=True)
Expand Down
9 changes: 8 additions & 1 deletion gitdb/test/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import unittest

from gitdb import OStream
from gitdb.util import rmtree
from gitdb.util import rmtree, mman, HIDE_WINDOWS_KNOWN_ERRORS
from gitdb.utils.compat import xrange


Expand Down Expand Up @@ -96,6 +96,13 @@ def wrapper(self):
# memory maps closed, once objects go out of scope. For some reason
# though this is not the case here unless we collect explicitly.
if not keep:
if HIDE_WINDOWS_KNOWN_ERRORS:
## Or else 2 Windows TCs fail with:
# File "D:\Work\gitdb.git\gitdb\util.py", line 141, in onerror
# func(path) # Will scream if still not possible to delete.
# PermissionError: [WinError 32] The process cannot access the file
# because it is being used by another process: 'sss\\index_cc_wll5'
mman.collect()
gc.collect()
rmtree(path)
# END handle exception
Expand Down
5 changes: 4 additions & 1 deletion gitdb/test/performance/test_pack_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def test_pack_writing(self):
st = time()
for sha in pdb.sha_iter():
count += 1
pdb.stream(sha)
with pdb.stream(sha):
pass
if count == ni:
break
# END gather objects for pack-writing
Expand All @@ -55,6 +56,8 @@ def test_pack_writing(self):
(ni, elapsed, ni / (elapsed or 1)), file=sys.stderr)

st = time()
## We are leaking files here, but we don't care...
# and we need a `contextlib.ExitStack` to safely close them.
PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni)
elapsed = time() - st
total_kb = ostream.bytes_written() / 1000
Expand Down
21 changes: 7 additions & 14 deletions gitdb/test/test_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,19 @@ def test_base(self):

for sha1 in ldb.sha_iter():
oinfo = ldb.info(sha1)
ostream = ldb.stream(sha1)
assert oinfo[:3] == ostream[:3]
with ldb.stream(sha1) as ostream:
assert oinfo[:3] == ostream[:3]

assert len(ostream.read()) == ostream.size
assert len(ostream.read()) == ostream.size
assert ldb.has_object(oinfo.binsha)
# END for each sha in database
# assure we close all files
try:
del(ostream)
del(oinfo)
except UnboundLocalError:
pass
# END ignore exception if there are no loose objects

data = "my data".encode("ascii")
istream = IStream("blob", len(data), BytesIO(data))

# the object does not yet have a sha
assert istream.binsha is None
ldb.store(istream)
# now the sha is set
assert len(istream.binsha) == 20
assert ldb.has_object(istream.binsha)
with ldb.store(istream):
# now the sha is set
assert len(istream.binsha) == 20
assert ldb.has_object(istream.binsha)
77 changes: 41 additions & 36 deletions gitdb/test/test_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,42 +88,42 @@ def _assert_pack_file(self, pack, version, size):

num_obj = 0
for obj in pack.stream_iter():
num_obj += 1
info = pack.info(obj.pack_offset)
stream = pack.stream(obj.pack_offset)

assert info.pack_offset == stream.pack_offset
assert info.type_id == stream.type_id
assert hasattr(stream, 'read')

# it should be possible to read from both streams
assert obj.read() == stream.read()

streams = pack.collect_streams(obj.pack_offset)
assert streams

# read the stream
try:
dstream = DeltaApplyReader.new(streams)
except ValueError:
# ignore these, old git versions use only ref deltas,
# which we havent resolved ( as we are without an index )
# Also ignore non-delta streams
continue
# END get deltastream

with dstream:
# read all
data = dstream.read()
assert len(data) == dstream.size

# test seek
dstream.seek(0)
assert dstream.read() == data

# read chunks
# NOTE: the current implementation is safe, it basically transfers
# all calls to the underlying memory map
with obj:
num_obj += 1
info = pack.info(obj.pack_offset)
with pack.stream(obj.pack_offset) as stream:
assert info.pack_offset == stream.pack_offset
assert info.type_id == stream.type_id
assert hasattr(stream, 'read')

# it should be possible to read from both streams
assert obj.read() == stream.read()

streams = pack.collect_streams(obj.pack_offset)
assert streams

# read the stream
try:
dstream = DeltaApplyReader.new(streams)
except ValueError:
# ignore these, old git versions use only ref deltas,
# which we havent resolved ( as we are without an index )
# Also ignore non-delta streams
continue
# END get deltastream

with dstream:
# read all
data = dstream.read()
assert len(data) == dstream.size

# test seek
dstream.seek(0)
assert dstream.read() == data

# read chunks
# NOTE: the current implementation is safe, it basically transfers
# all calls to the underlying memory map

# END for each object
assert num_obj == size
Expand All @@ -142,6 +142,11 @@ def test_pack(self):
self._assert_pack_file(pack, version, size)
# END for each pack to test

## Unless HIDE_WINDOWS_KNOWN_ERRORS, on Windows fails with:
# File "D:\Work\gitdb.git\gitdb\util.py", line 141, in onerror
# func(path) # Will scream if still not possible to delete.
# PermissionError: [WinError 32] The process cannot access the file
# because it is being used by another process: 'sss\\index_cc_wll5'
@with_rw_directory
def test_pack_entity(self, rw_dir):
pack_objs = list()
Expand Down
17 changes: 8 additions & 9 deletions gitdb/test/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,12 @@ def test_decompress_reader_special_case(self):
mdb = MemoryDB()
for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
b'7bb839852ed5e3a069966281bb08d50012fb309b',):
ostream = odb.stream(hex_to_bin(sha))

# if there is a bug, we will be missing one byte exactly !
data = ostream.read()
assert len(data) == ostream.size

# Putting it back in should yield nothing new - after all, we have
dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
assert dump.hexsha == sha
with odb.stream(hex_to_bin(sha)) as ostream:
# if there is a bug, we will be missing one byte exactly !
data = ostream.read()
assert len(data) == ostream.size

# Putting it back in should yield nothing new - after all, we have
dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
assert dump.hexsha == sha
# end for each loose object sha to test
Loading

0 comments on commit b17f91a

Please sign in to comment.