Python code coverage for Lib/dbm/dumb.py

#	count	content
1	n/a	"""A dumb and slow but simple dbm clone.
2	n/a
3	n/a	For database spam, spam.dir contains the index (a text file),
4	n/a	spam.bak may contain a backup of the index (also a text file),
5	n/a	while spam.dat contains the data (a binary file).
6	n/a
7	n/a	XXX TO DO:
8	n/a
9	n/a	- seems to contain a bug when updating...
10	n/a
11	n/a	- reclaim free space (currently, space once occupied by deleted or expanded
12	n/a	items is never reused)
13	n/a
14	n/a	- support concurrent access (currently, if two processes take turns making
15	n/a	updates, they can mess up the index)
16	n/a
17	n/a	- support efficient access to large databases (currently, the whole index
18	n/a	is read when the database is opened, and some updates rewrite the whole index)
19	n/a
20	n/a	- support opening for read-only (flag = 'm')
21	n/a
22	n/a	"""
23	n/a
24	n/a	import ast as _ast
25	n/a	import io as _io
26	n/a	import os as _os
27	n/a	import collections
28	n/a
29	n/a	__all__ = ["error", "open"]
30	n/a
31	n/a	_BLOCKSIZE = 512
32	n/a
33	n/a	error = OSError
34	n/a
35	n/a	class _Database(collections.MutableMapping):
36	n/a
37	n/a	# The on-disk directory and data files can remain in mutually
38	n/a	# inconsistent states for an arbitrarily long time (see comments
39	n/a	# at the end of __setitem__). This is only repaired when _commit()
40	n/a	# gets called. One place _commit() gets called is from __del__(),
41	n/a	# and if that occurs at program shutdown time, module globals may
42	n/a	# already have gotten rebound to None. Since it's crucial that
43	n/a	# _commit() finish successfully, we can't ignore shutdown races
44	n/a	# here, and _commit() must not reference any globals.
45	n/a	_os = _os # for _commit()
46	n/a	_io = _io # for _commit()
47	n/a
48	n/a	def __init__(self, filebasename, mode, flag='c'):
49	n/a	self._mode = mode
50	n/a	self._readonly = (flag == 'r')
51	n/a
52	n/a	# The directory file is a text file. Each line looks like
53	n/a	# "%r, (%d, %d)\n" % (key, pos, siz)
54	n/a	# where key is the string key, pos is the offset into the dat
55	n/a	# file of the associated value's first byte, and siz is the number
56	n/a	# of bytes in the associated value.
57	n/a	self._dirfile = filebasename + '.dir'
58	n/a
59	n/a	# The data file is a binary file pointed into by the directory
60	n/a	# file, and holds the values associated with keys. Each value
61	n/a	# begins at a _BLOCKSIZE-aligned byte offset, and is a raw
62	n/a	# binary 8-bit string value.
63	n/a	self._datfile = filebasename + '.dat'
64	n/a	self._bakfile = filebasename + '.bak'
65	n/a
66	n/a	# The index is an in-memory dict, mirroring the directory file.
67	n/a	self._index = None # maps keys to (pos, siz) pairs
68	n/a
69	n/a	# Handle the creation
70	n/a	self._create(flag)
71	n/a	self._update(flag)
72	n/a
73	n/a	def _create(self, flag):
74	n/a	if flag == 'n':
75	n/a	for filename in (self._datfile, self._bakfile, self._dirfile):
76	n/a	try:
77	n/a	_os.remove(filename)
78	n/a	except OSError:
79	n/a	pass
80	n/a	# Mod by Jack: create data file if needed
81	n/a	try:
82	n/a	f = _io.open(self._datfile, 'r', encoding="Latin-1")
83	n/a	except OSError:
84	n/a	if flag not in ('c', 'n'):
85	n/a	import warnings
86	n/a	warnings.warn("The database file is missing, the "
87	n/a	"semantics of the 'c' flag will be used.",
88	n/a	DeprecationWarning, stacklevel=4)
89	n/a	with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
90	n/a	self._chmod(self._datfile)
91	n/a	else:
92	n/a	f.close()
93	n/a
94	n/a	# Read directory file into the in-memory index dict.
95	n/a	def _update(self, flag):
96	n/a	self._index = {}
97	n/a	try:
98	n/a	f = _io.open(self._dirfile, 'r', encoding="Latin-1")
99	n/a	except OSError:
100	n/a	self._modified = not self._readonly
101	n/a	if flag not in ('c', 'n'):
102	n/a	import warnings
103	n/a	warnings.warn("The index file is missing, the "
104	n/a	"semantics of the 'c' flag will be used.",
105	n/a	DeprecationWarning, stacklevel=4)
106	n/a	else:
107	n/a	self._modified = False
108	n/a	with f:
109	n/a	for line in f:
110	n/a	line = line.rstrip()
111	n/a	key, pos_and_siz_pair = _ast.literal_eval(line)
112	n/a	key = key.encode('Latin-1')
113	n/a	self._index[key] = pos_and_siz_pair
114	n/a
115	n/a	# Write the index dict to the directory file. The original directory
116	n/a	# file (if any) is renamed with a .bak extension first. If a .bak
117	n/a	# file currently exists, it's deleted.
118	n/a	def _commit(self):
119	n/a	# CAUTION: It's vital that _commit() succeed, and _commit() can
120	n/a	# be called from __del__(). Therefore we must never reference a
121	n/a	# global in this routine.
122	n/a	if self._index is None or not self._modified:
123	n/a	return # nothing to do
124	n/a
125	n/a	try:
126	n/a	self._os.unlink(self._bakfile)
127	n/a	except OSError:
128	n/a	pass
129	n/a
130	n/a	try:
131	n/a	self._os.rename(self._dirfile, self._bakfile)
132	n/a	except OSError:
133	n/a	pass
134	n/a
135	n/a	with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
136	n/a	self._chmod(self._dirfile)
137	n/a	for key, pos_and_siz_pair in self._index.items():
138	n/a	# Use Latin-1 since it has no qualms with any value in any
139	n/a	# position; UTF-8, though, does care sometimes.
140	n/a	entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
141	n/a	f.write(entry)
142	n/a
143	n/a	sync = _commit
144	n/a
145	n/a	def _verify_open(self):
146	n/a	if self._index is None:
147	n/a	raise error('DBM object has already been closed')
148	n/a
149	n/a	def __getitem__(self, key):
150	n/a	if isinstance(key, str):
151	n/a	key = key.encode('utf-8')
152	n/a	self._verify_open()
153	n/a	pos, siz = self._index[key] # may raise KeyError
154	n/a	with _io.open(self._datfile, 'rb') as f:
155	n/a	f.seek(pos)
156	n/a	dat = f.read(siz)
157	n/a	return dat
158	n/a
159	n/a	# Append val to the data file, starting at a _BLOCKSIZE-aligned
160	n/a	# offset. The data file is first padded with NUL bytes (if needed)
161	n/a	# to get to an aligned offset. Return pair
162	n/a	# (starting offset of val, len(val))
163	n/a	def _addval(self, val):
164	n/a	with _io.open(self._datfile, 'rb+') as f:
165	n/a	f.seek(0, 2)
166	n/a	pos = int(f.tell())
167	n/a	npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
168	n/a	f.write(b'\0'*(npos-pos))
169	n/a	pos = npos
170	n/a	f.write(val)
171	n/a	return (pos, len(val))
172	n/a
173	n/a	# Write val to the data file, starting at offset pos. The caller
174	n/a	# is responsible for ensuring that there's enough room starting at
175	n/a	# pos to hold val, without overwriting some other value. Return
176	n/a	# pair (pos, len(val)).
177	n/a	def _setval(self, pos, val):
178	n/a	with _io.open(self._datfile, 'rb+') as f:
179	n/a	f.seek(pos)
180	n/a	f.write(val)
181	n/a	return (pos, len(val))
182	n/a
183	n/a	# key is a new key whose associated value starts in the data file
184	n/a	# at offset pos and with length siz. Add an index record to
185	n/a	# the in-memory index dict, and append one to the directory file.
186	n/a	def _addkey(self, key, pos_and_siz_pair):
187	n/a	self._index[key] = pos_and_siz_pair
188	n/a	with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
189	n/a	self._chmod(self._dirfile)
190	n/a	f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
191	n/a
192	n/a	def __setitem__(self, key, val):
193	n/a	if self._readonly:
194	n/a	import warnings
195	n/a	warnings.warn('The database is opened for reading only',
196	n/a	DeprecationWarning, stacklevel=2)
197	n/a	if isinstance(key, str):
198	n/a	key = key.encode('utf-8')
199	n/a	elif not isinstance(key, (bytes, bytearray)):
200	n/a	raise TypeError("keys must be bytes or strings")
201	n/a	if isinstance(val, str):
202	n/a	val = val.encode('utf-8')
203	n/a	elif not isinstance(val, (bytes, bytearray)):
204	n/a	raise TypeError("values must be bytes or strings")
205	n/a	self._verify_open()
206	n/a	self._modified = True
207	n/a	if key not in self._index:
208	n/a	self._addkey(key, self._addval(val))
209	n/a	else:
210	n/a	# See whether the new value is small enough to fit in the
211	n/a	# (padded) space currently occupied by the old value.
212	n/a	pos, siz = self._index[key]
213	n/a	oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
214	n/a	newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
215	n/a	if newblocks <= oldblocks:
216	n/a	self._index[key] = self._setval(pos, val)
217	n/a	else:
218	n/a	# The new value doesn't fit in the (padded) space used
219	n/a	# by the old value. The blocks used by the old value are
220	n/a	# forever lost.
221	n/a	self._index[key] = self._addval(val)
222	n/a
223	n/a	# Note that _index may be out of synch with the directory
224	n/a	# file now: _setval() and _addval() don't update the directory
225	n/a	# file. This also means that the on-disk directory and data
226	n/a	# files are in a mutually inconsistent state, and they'll
227	n/a	# remain that way until _commit() is called. Note that this
228	n/a	# is a disaster (for the database) if the program crashes
229	n/a	# (so that _commit() never gets called).
230	n/a
231	n/a	def __delitem__(self, key):
232	n/a	if self._readonly:
233	n/a	import warnings
234	n/a	warnings.warn('The database is opened for reading only',
235	n/a	DeprecationWarning, stacklevel=2)
236	n/a	if isinstance(key, str):
237	n/a	key = key.encode('utf-8')
238	n/a	self._verify_open()
239	n/a	self._modified = True
240	n/a	# The blocks used by the associated value are lost.
241	n/a	del self._index[key]
242	n/a	# XXX It's unclear why we do a _commit() here (the code always
243	n/a	# XXX has, so I'm not changing it). __setitem__ doesn't try to
244	n/a	# XXX keep the directory file in synch. Why should we? Or
245	n/a	# XXX why shouldn't __setitem__?
246	n/a	self._commit()
247	n/a
248	n/a	def keys(self):
249	n/a	try:
250	n/a	return list(self._index)
251	n/a	except TypeError:
252	n/a	raise error('DBM object has already been closed') from None
253	n/a
254	n/a	def items(self):
255	n/a	self._verify_open()
256	n/a	return [(key, self[key]) for key in self._index.keys()]
257	n/a
258	n/a	def __contains__(self, key):
259	n/a	if isinstance(key, str):
260	n/a	key = key.encode('utf-8')
261	n/a	try:
262	n/a	return key in self._index
263	n/a	except TypeError:
264	n/a	if self._index is None:
265	n/a	raise error('DBM object has already been closed') from None
266	n/a	else:
267	n/a	raise
268	n/a
269	n/a	def iterkeys(self):
270	n/a	try:
271	n/a	return iter(self._index)
272	n/a	except TypeError:
273	n/a	raise error('DBM object has already been closed') from None
274	n/a	__iter__ = iterkeys
275	n/a
276	n/a	def __len__(self):
277	n/a	try:
278	n/a	return len(self._index)
279	n/a	except TypeError:
280	n/a	raise error('DBM object has already been closed') from None
281	n/a
282	n/a	def close(self):
283	n/a	try:
284	n/a	self._commit()
285	n/a	finally:
286	n/a	self._index = self._datfile = self._dirfile = self._bakfile = None
287	n/a
288	n/a	__del__ = close
289	n/a
290	n/a	def _chmod(self, file):
291	n/a	if hasattr(self._os, 'chmod'):
292	n/a	self._os.chmod(file, self._mode)
293	n/a
294	n/a	def __enter__(self):
295	n/a	return self
296	n/a
297	n/a	def __exit__(self, *args):
298	n/a	self.close()
299	n/a
300	n/a
301	n/a	def open(file, flag='c', mode=0o666):
302	n/a	"""Open the database file, filename, and return corresponding object.
303	n/a
304	n/a	The flag argument, used to control how the database is opened in the
305	n/a	other DBM implementations, supports only the semantics of 'c' and 'n'
306	n/a	values. Other values will default to the semantics of 'c' value:
307	n/a	the database will always opened for update and will be created if it
308	n/a	does not exist.
309	n/a
310	n/a	The optional mode argument is the UNIX mode of the file, used only when
311	n/a	the database has to be created. It defaults to octal code 0o666 (and
312	n/a	will be modified by the prevailing umask).
313	n/a
314	n/a	"""
315	n/a
316	n/a	# Modify mode depending on the umask
317	n/a	try:
318	n/a	um = _os.umask(0)
319	n/a	_os.umask(um)
320	n/a	except AttributeError:
321	n/a	pass
322	n/a	else:
323	n/a	# Turn off any bits that are set in the umask
324	n/a	mode = mode & (~um)
325	n/a	if flag not in ('r', 'w', 'c', 'n'):
326	n/a	import warnings
327	n/a	warnings.warn("Flag must be one of 'r', 'w', 'c', or 'n'",
328	n/a	DeprecationWarning, stacklevel=2)
329	n/a	return _Database(file, mode, flag=flag)