ยปCore Development>Code coverage>Lib/dbm/dumb.py

Python code coverage for Lib/dbm/dumb.py

#countcontent
1n/a"""A dumb and slow but simple dbm clone.
2n/a
3n/aFor database spam, spam.dir contains the index (a text file),
4n/aspam.bak *may* contain a backup of the index (also a text file),
5n/awhile spam.dat contains the data (a binary file).
6n/a
7n/aXXX TO DO:
8n/a
9n/a- seems to contain a bug when updating...
10n/a
11n/a- reclaim free space (currently, space once occupied by deleted or expanded
12n/aitems is never reused)
13n/a
14n/a- support concurrent access (currently, if two processes take turns making
15n/aupdates, they can mess up the index)
16n/a
17n/a- support efficient access to large databases (currently, the whole index
18n/ais read when the database is opened, and some updates rewrite the whole index)
19n/a
20n/a- support opening for read-only (flag = 'm')
21n/a
22n/a"""
23n/a
24n/aimport ast as _ast
25n/aimport io as _io
26n/aimport os as _os
27n/aimport collections
28n/a
29n/a__all__ = ["error", "open"]
30n/a
31n/a_BLOCKSIZE = 512
32n/a
33n/aerror = OSError
34n/a
35n/aclass _Database(collections.MutableMapping):
36n/a
37n/a # The on-disk directory and data files can remain in mutually
38n/a # inconsistent states for an arbitrarily long time (see comments
39n/a # at the end of __setitem__). This is only repaired when _commit()
40n/a # gets called. One place _commit() gets called is from __del__(),
41n/a # and if that occurs at program shutdown time, module globals may
42n/a # already have gotten rebound to None. Since it's crucial that
43n/a # _commit() finish successfully, we can't ignore shutdown races
44n/a # here, and _commit() must not reference any globals.
45n/a _os = _os # for _commit()
46n/a _io = _io # for _commit()
47n/a
48n/a def __init__(self, filebasename, mode, flag='c'):
49n/a self._mode = mode
50n/a self._readonly = (flag == 'r')
51n/a
52n/a # The directory file is a text file. Each line looks like
53n/a # "%r, (%d, %d)\n" % (key, pos, siz)
54n/a # where key is the string key, pos is the offset into the dat
55n/a # file of the associated value's first byte, and siz is the number
56n/a # of bytes in the associated value.
57n/a self._dirfile = filebasename + '.dir'
58n/a
59n/a # The data file is a binary file pointed into by the directory
60n/a # file, and holds the values associated with keys. Each value
61n/a # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
62n/a # binary 8-bit string value.
63n/a self._datfile = filebasename + '.dat'
64n/a self._bakfile = filebasename + '.bak'
65n/a
66n/a # The index is an in-memory dict, mirroring the directory file.
67n/a self._index = None # maps keys to (pos, siz) pairs
68n/a
69n/a # Handle the creation
70n/a self._create(flag)
71n/a self._update(flag)
72n/a
73n/a def _create(self, flag):
74n/a if flag == 'n':
75n/a for filename in (self._datfile, self._bakfile, self._dirfile):
76n/a try:
77n/a _os.remove(filename)
78n/a except OSError:
79n/a pass
80n/a # Mod by Jack: create data file if needed
81n/a try:
82n/a f = _io.open(self._datfile, 'r', encoding="Latin-1")
83n/a except OSError:
84n/a if flag not in ('c', 'n'):
85n/a import warnings
86n/a warnings.warn("The database file is missing, the "
87n/a "semantics of the 'c' flag will be used.",
88n/a DeprecationWarning, stacklevel=4)
89n/a with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
90n/a self._chmod(self._datfile)
91n/a else:
92n/a f.close()
93n/a
94n/a # Read directory file into the in-memory index dict.
95n/a def _update(self, flag):
96n/a self._index = {}
97n/a try:
98n/a f = _io.open(self._dirfile, 'r', encoding="Latin-1")
99n/a except OSError:
100n/a self._modified = not self._readonly
101n/a if flag not in ('c', 'n'):
102n/a import warnings
103n/a warnings.warn("The index file is missing, the "
104n/a "semantics of the 'c' flag will be used.",
105n/a DeprecationWarning, stacklevel=4)
106n/a else:
107n/a self._modified = False
108n/a with f:
109n/a for line in f:
110n/a line = line.rstrip()
111n/a key, pos_and_siz_pair = _ast.literal_eval(line)
112n/a key = key.encode('Latin-1')
113n/a self._index[key] = pos_and_siz_pair
114n/a
115n/a # Write the index dict to the directory file. The original directory
116n/a # file (if any) is renamed with a .bak extension first. If a .bak
117n/a # file currently exists, it's deleted.
118n/a def _commit(self):
119n/a # CAUTION: It's vital that _commit() succeed, and _commit() can
120n/a # be called from __del__(). Therefore we must never reference a
121n/a # global in this routine.
122n/a if self._index is None or not self._modified:
123n/a return # nothing to do
124n/a
125n/a try:
126n/a self._os.unlink(self._bakfile)
127n/a except OSError:
128n/a pass
129n/a
130n/a try:
131n/a self._os.rename(self._dirfile, self._bakfile)
132n/a except OSError:
133n/a pass
134n/a
135n/a with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
136n/a self._chmod(self._dirfile)
137n/a for key, pos_and_siz_pair in self._index.items():
138n/a # Use Latin-1 since it has no qualms with any value in any
139n/a # position; UTF-8, though, does care sometimes.
140n/a entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
141n/a f.write(entry)
142n/a
143n/a sync = _commit
144n/a
145n/a def _verify_open(self):
146n/a if self._index is None:
147n/a raise error('DBM object has already been closed')
148n/a
149n/a def __getitem__(self, key):
150n/a if isinstance(key, str):
151n/a key = key.encode('utf-8')
152n/a self._verify_open()
153n/a pos, siz = self._index[key] # may raise KeyError
154n/a with _io.open(self._datfile, 'rb') as f:
155n/a f.seek(pos)
156n/a dat = f.read(siz)
157n/a return dat
158n/a
159n/a # Append val to the data file, starting at a _BLOCKSIZE-aligned
160n/a # offset. The data file is first padded with NUL bytes (if needed)
161n/a # to get to an aligned offset. Return pair
162n/a # (starting offset of val, len(val))
163n/a def _addval(self, val):
164n/a with _io.open(self._datfile, 'rb+') as f:
165n/a f.seek(0, 2)
166n/a pos = int(f.tell())
167n/a npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
168n/a f.write(b'\0'*(npos-pos))
169n/a pos = npos
170n/a f.write(val)
171n/a return (pos, len(val))
172n/a
173n/a # Write val to the data file, starting at offset pos. The caller
174n/a # is responsible for ensuring that there's enough room starting at
175n/a # pos to hold val, without overwriting some other value. Return
176n/a # pair (pos, len(val)).
177n/a def _setval(self, pos, val):
178n/a with _io.open(self._datfile, 'rb+') as f:
179n/a f.seek(pos)
180n/a f.write(val)
181n/a return (pos, len(val))
182n/a
183n/a # key is a new key whose associated value starts in the data file
184n/a # at offset pos and with length siz. Add an index record to
185n/a # the in-memory index dict, and append one to the directory file.
186n/a def _addkey(self, key, pos_and_siz_pair):
187n/a self._index[key] = pos_and_siz_pair
188n/a with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
189n/a self._chmod(self._dirfile)
190n/a f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
191n/a
192n/a def __setitem__(self, key, val):
193n/a if self._readonly:
194n/a import warnings
195n/a warnings.warn('The database is opened for reading only',
196n/a DeprecationWarning, stacklevel=2)
197n/a if isinstance(key, str):
198n/a key = key.encode('utf-8')
199n/a elif not isinstance(key, (bytes, bytearray)):
200n/a raise TypeError("keys must be bytes or strings")
201n/a if isinstance(val, str):
202n/a val = val.encode('utf-8')
203n/a elif not isinstance(val, (bytes, bytearray)):
204n/a raise TypeError("values must be bytes or strings")
205n/a self._verify_open()
206n/a self._modified = True
207n/a if key not in self._index:
208n/a self._addkey(key, self._addval(val))
209n/a else:
210n/a # See whether the new value is small enough to fit in the
211n/a # (padded) space currently occupied by the old value.
212n/a pos, siz = self._index[key]
213n/a oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
214n/a newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
215n/a if newblocks <= oldblocks:
216n/a self._index[key] = self._setval(pos, val)
217n/a else:
218n/a # The new value doesn't fit in the (padded) space used
219n/a # by the old value. The blocks used by the old value are
220n/a # forever lost.
221n/a self._index[key] = self._addval(val)
222n/a
223n/a # Note that _index may be out of synch with the directory
224n/a # file now: _setval() and _addval() don't update the directory
225n/a # file. This also means that the on-disk directory and data
226n/a # files are in a mutually inconsistent state, and they'll
227n/a # remain that way until _commit() is called. Note that this
228n/a # is a disaster (for the database) if the program crashes
229n/a # (so that _commit() never gets called).
230n/a
231n/a def __delitem__(self, key):
232n/a if self._readonly:
233n/a import warnings
234n/a warnings.warn('The database is opened for reading only',
235n/a DeprecationWarning, stacklevel=2)
236n/a if isinstance(key, str):
237n/a key = key.encode('utf-8')
238n/a self._verify_open()
239n/a self._modified = True
240n/a # The blocks used by the associated value are lost.
241n/a del self._index[key]
242n/a # XXX It's unclear why we do a _commit() here (the code always
243n/a # XXX has, so I'm not changing it). __setitem__ doesn't try to
244n/a # XXX keep the directory file in synch. Why should we? Or
245n/a # XXX why shouldn't __setitem__?
246n/a self._commit()
247n/a
248n/a def keys(self):
249n/a try:
250n/a return list(self._index)
251n/a except TypeError:
252n/a raise error('DBM object has already been closed') from None
253n/a
254n/a def items(self):
255n/a self._verify_open()
256n/a return [(key, self[key]) for key in self._index.keys()]
257n/a
258n/a def __contains__(self, key):
259n/a if isinstance(key, str):
260n/a key = key.encode('utf-8')
261n/a try:
262n/a return key in self._index
263n/a except TypeError:
264n/a if self._index is None:
265n/a raise error('DBM object has already been closed') from None
266n/a else:
267n/a raise
268n/a
269n/a def iterkeys(self):
270n/a try:
271n/a return iter(self._index)
272n/a except TypeError:
273n/a raise error('DBM object has already been closed') from None
274n/a __iter__ = iterkeys
275n/a
276n/a def __len__(self):
277n/a try:
278n/a return len(self._index)
279n/a except TypeError:
280n/a raise error('DBM object has already been closed') from None
281n/a
282n/a def close(self):
283n/a try:
284n/a self._commit()
285n/a finally:
286n/a self._index = self._datfile = self._dirfile = self._bakfile = None
287n/a
288n/a __del__ = close
289n/a
290n/a def _chmod(self, file):
291n/a if hasattr(self._os, 'chmod'):
292n/a self._os.chmod(file, self._mode)
293n/a
294n/a def __enter__(self):
295n/a return self
296n/a
297n/a def __exit__(self, *args):
298n/a self.close()
299n/a
300n/a
301n/adef open(file, flag='c', mode=0o666):
302n/a """Open the database file, filename, and return corresponding object.
303n/a
304n/a The flag argument, used to control how the database is opened in the
305n/a other DBM implementations, supports only the semantics of 'c' and 'n'
306n/a values. Other values will default to the semantics of 'c' value:
307n/a the database will always opened for update and will be created if it
308n/a does not exist.
309n/a
310n/a The optional mode argument is the UNIX mode of the file, used only when
311n/a the database has to be created. It defaults to octal code 0o666 (and
312n/a will be modified by the prevailing umask).
313n/a
314n/a """
315n/a
316n/a # Modify mode depending on the umask
317n/a try:
318n/a um = _os.umask(0)
319n/a _os.umask(um)
320n/a except AttributeError:
321n/a pass
322n/a else:
323n/a # Turn off any bits that are set in the umask
324n/a mode = mode & (~um)
325n/a if flag not in ('r', 'w', 'c', 'n'):
326n/a import warnings
327n/a warnings.warn("Flag must be one of 'r', 'w', 'c', or 'n'",
328n/a DeprecationWarning, stacklevel=2)
329n/a return _Database(file, mode, flag=flag)