ยปCore Development>Code coverage>Lib/dumbdbm.py

Python code coverage for Lib/dumbdbm.py

#countcontent
1n/a"""A dumb and slow but simple dbm clone.
2n/a
3n/aFor database spam, spam.dir contains the index (a text file),
4n/aspam.bak *may* contain a backup of the index (also a text file),
5n/awhile spam.dat contains the data (a binary file).
6n/a
7n/aXXX TO DO:
8n/a
9n/a- seems to contain a bug when updating...
10n/a
11n/a- reclaim free space (currently, space once occupied by deleted or expanded
12n/aitems is never reused)
13n/a
14n/a- support concurrent access (currently, if two processes take turns making
15n/aupdates, they can mess up the index)
16n/a
17n/a- support efficient access to large databases (currently, the whole index
18n/ais read when the database is opened, and some updates rewrite the whole index)
19n/a
20n/a- support opening for read-only (flag = 'm')
21n/a
221"""
23n/a
241import os as _os
251import __builtin__
261import UserDict
27n/a
281_open = __builtin__.open
29n/a
301_BLOCKSIZE = 512
31n/a
321error = IOError # For anydbm
33n/a
342class _Database(UserDict.DictMixin):
35n/a
36n/a # The on-disk directory and data files can remain in mutually
37n/a # inconsistent states for an arbitrarily long time (see comments
38n/a # at the end of __setitem__). This is only repaired when _commit()
39n/a # gets called. One place _commit() gets called is from __del__(),
40n/a # and if that occurs at program shutdown time, module globals may
41n/a # already have gotten rebound to None. Since it's crucial that
42n/a # _commit() finish successfully, we can't ignore shutdown races
43n/a # here, and _commit() must not reference any globals.
441 _os = _os # for _commit()
451 _open = _open # for _commit()
46n/a
471 def __init__(self, filebasename, mode):
4825 self._mode = mode
49n/a
50n/a # The directory file is a text file. Each line looks like
51n/a # "%r, (%d, %d)\n" % (key, pos, siz)
52n/a # where key is the string key, pos is the offset into the dat
53n/a # file of the associated value's first byte, and siz is the number
54n/a # of bytes in the associated value.
5525 self._dirfile = filebasename + _os.extsep + 'dir'
56n/a
57n/a # The data file is a binary file pointed into by the directory
58n/a # file, and holds the values associated with keys. Each value
59n/a # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
60n/a # binary 8-bit string value.
6125 self._datfile = filebasename + _os.extsep + 'dat'
6225 self._bakfile = filebasename + _os.extsep + 'bak'
63n/a
64n/a # The index is an in-memory dict, mirroring the directory file.
6525 self._index = None # maps keys to (pos, siz) pairs
66n/a
67n/a # Mod by Jack: create data file if needed
6825 try:
6925 f = _open(self._datfile, 'r')
7010 except IOError:
7110 f = _open(self._datfile, 'w')
7210 self._chmod(self._datfile)
7325 f.close()
7425 self._update()
75n/a
76n/a # Read directory file into the in-memory index dict.
771 def _update(self):
7825 self._index = {}
7925 try:
8025 f = _open(self._dirfile)
8110 except IOError:
8210 pass
83n/a else:
84141 for line in f:
85126 line = line.rstrip()
86126 key, pos_and_siz_pair = eval(line)
87126 self._index[key] = pos_and_siz_pair
8815 f.close()
89n/a
90n/a # Write the index dict to the directory file. The original directory
91n/a # file (if any) is renamed with a .bak extension first. If a .bak
92n/a # file currently exists, it's deleted.
931 def _commit(self):
94n/a # CAUTION: It's vital that _commit() succeed, and _commit() can
95n/a # be called from __del__(). Therefore we must never reference a
96n/a # global in this routine.
97115 if self._index is None:
9813 return # nothing to do
99n/a
100102 try:
101102 self._os.unlink(self._bakfile)
10211 except self._os.error:
10311 pass
104n/a
105102 try:
106102 self._os.rename(self._dirfile, self._bakfile)
1072 except self._os.error:
1082 pass
109n/a
110102 f = self._open(self._dirfile, 'w')
111102 self._chmod(self._dirfile)
1121039 for key, pos_and_siz_pair in self._index.iteritems():
113937 f.write("%r, %r\n" % (key, pos_and_siz_pair))
114102 f.close()
115n/a
1161 sync = _commit
117n/a
1181 def __getitem__(self, key):
119491 pos, siz = self._index[key] # may raise KeyError
120491 f = _open(self._datfile, 'rb')
121491 f.seek(pos)
122491 dat = f.read(siz)
123491 f.close()
124491 return dat
125n/a
126n/a # Append val to the data file, starting at a _BLOCKSIZE-aligned
127n/a # offset. The data file is first padded with NUL bytes (if needed)
128n/a # to get to an aligned offset. Return pair
129n/a # (starting offset of val, len(val))
1301 def _addval(self, val):
131271 f = _open(self._datfile, 'rb+')
132271 f.seek(0, 2)
133271 pos = int(f.tell())
134271 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
135271 f.write('\0'*(npos-pos))
136271 pos = npos
137271 f.write(val)
138271 f.close()
139271 return (pos, len(val))
140n/a
141n/a # Write val to the data file, starting at offset pos. The caller
142n/a # is responsible for ensuring that there's enough room starting at
143n/a # pos to hold val, without overwriting some other value. Return
144n/a # pair (pos, len(val)).
1451 def _setval(self, pos, val):
146173 f = _open(self._datfile, 'rb+')
147173 f.seek(pos)
148173 f.write(val)
149173 f.close()
150173 return (pos, len(val))
151n/a
152n/a # key is a new key whose associated value starts in the data file
153n/a # at offset pos and with length siz. Add an index record to
154n/a # the in-memory index dict, and append one to the directory file.
1551 def _addkey(self, key, pos_and_siz_pair):
156122 self._index[key] = pos_and_siz_pair
157122 f = _open(self._dirfile, 'a')
158122 self._chmod(self._dirfile)
159122 f.write("%r, %r\n" % (key, pos_and_siz_pair))
160122 f.close()
161n/a
1621 def __setitem__(self, key, val):
163444 if not type(key) == type('') == type(val):
1640 raise TypeError, "keys and values must be strings"
165444 if key not in self._index:
166122 self._addkey(key, self._addval(val))
167n/a else:
168n/a # See whether the new value is small enough to fit in the
169n/a # (padded) space currently occupied by the old value.
170322 pos, siz = self._index[key]
171322 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
172322 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
173322 if newblocks <= oldblocks:
174173 self._index[key] = self._setval(pos, val)
175n/a else:
176n/a # The new value doesn't fit in the (padded) space used
177n/a # by the old value. The blocks used by the old value are
178n/a # forever lost.
179149 self._index[key] = self._addval(val)
180n/a
181n/a # Note that _index may be out of synch with the directory
182n/a # file now: _setval() and _addval() don't update the directory
183n/a # file. This also means that the on-disk directory and data
184n/a # files are in a mutually inconsistent state, and they'll
185n/a # remain that way until _commit() is called. Note that this
186n/a # is a disaster (for the database) if the program crashes
187n/a # (so that _commit() never gets called).
188n/a
1891 def __delitem__(self, key):
190n/a # The blocks used by the associated value are lost.
19177 del self._index[key]
192n/a # XXX It's unclear why we do a _commit() here (the code always
193n/a # XXX has, so I'm not changing it). _setitem__ doesn't try to
194n/a # XXX keep the directory file in synch. Why should we? Or
195n/a # XXX why shouldn't __setitem__?
19677 self._commit()
197n/a
1981 def keys(self):
1995 return self._index.keys()
200n/a
2011 def has_key(self, key):
2020 return key in self._index
203n/a
2041 def __contains__(self, key):
2050 return key in self._index
206n/a
2071 def iterkeys(self):
2085 return self._index.iterkeys()
2091 __iter__ = iterkeys
210n/a
2111 def __len__(self):
2120 return len(self._index)
213n/a
2141 def close(self):
21538 self._commit()
21638 self._index = self._datfile = self._dirfile = self._bakfile = None
217n/a
2181 __del__ = close
219n/a
2201 def _chmod (self, file):
221234 if hasattr(self._os, 'chmod'):
222234 self._os.chmod(file, self._mode)
223n/a
224n/a
2251def open(file, flag=None, mode=0666):
226n/a """Open the database file, filename, and return corresponding object.
227n/a
228n/a The flag argument, used to control how the database is opened in the
229n/a other DBM implementations, is ignored in the dumbdbm module; the
230n/a database is always opened for update, and will be created if it does
231n/a not exist.
232n/a
233n/a The optional mode argument is the UNIX mode of the file, used only when
234n/a the database has to be created. It defaults to octal code 0666 (and
235n/a will be modified by the prevailing umask).
236n/a
237n/a """
238n/a # flag argument is currently ignored
239n/a
240n/a # Modify mode depending on the umask
24125 try:
24225 um = _os.umask(0)
24325 _os.umask(um)
2440 except AttributeError:
2450 pass
246n/a else:
247n/a # Turn off any bits that are set in the umask
24825 mode = mode & (~um)
249n/a
25025 return _Database(file, mode)