| 1 | n/a | r"""File-like objects that read from or write to a string buffer. |
|---|
| 2 | n/a | |
|---|
| 3 | n/a | This implements (nearly) all stdio methods. |
|---|
| 4 | n/a | |
|---|
| 5 | n/a | f = StringIO() # ready for writing |
|---|
| 6 | n/a | f = StringIO(buf) # ready for reading |
|---|
| 7 | n/a | f.close() # explicitly release resources held |
|---|
| 8 | n/a | flag = f.isatty() # always false |
|---|
| 9 | n/a | pos = f.tell() # get current position |
|---|
| 10 | n/a | f.seek(pos) # set current position |
|---|
| 11 | n/a | f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF |
|---|
| 12 | n/a | buf = f.read() # read until EOF |
|---|
| 13 | n/a | buf = f.read(n) # read up to n bytes |
|---|
| 14 | n/a | buf = f.readline() # read until end of line ('\n') or EOF |
|---|
| 15 | n/a | list = f.readlines()# list of f.readline() results until EOF |
|---|
| 16 | n/a | f.truncate([size]) # truncate file at to at most size (default: current pos) |
|---|
| 17 | n/a | f.write(buf) # write at current position |
|---|
| 18 | n/a | f.writelines(list) # for line in list: f.write(line) |
|---|
| 19 | n/a | f.getvalue() # return whole file's contents as a string |
|---|
| 20 | n/a | |
|---|
| 21 | n/a | Notes: |
|---|
| 22 | n/a | - Using a real file is often faster (but less convenient). |
|---|
| 23 | n/a | - There's also a much faster implementation in C, called cStringIO, but |
|---|
| 24 | n/a | it's not subclassable. |
|---|
| 25 | n/a | - fileno() is left unimplemented so that code which uses it triggers |
|---|
| 26 | n/a | an exception early. |
|---|
| 27 | n/a | - Seeking far beyond EOF and then writing will insert real null |
|---|
| 28 | n/a | bytes that occupy space in the buffer. |
|---|
| 29 | n/a | - There's a simple test set (see end of this file). |
|---|
| 30 | n/a | """ |
|---|
| 31 | 0 | try: |
|---|
| 32 | 0 | from errno import EINVAL |
|---|
| 33 | 0 | except ImportError: |
|---|
| 34 | 0 | EINVAL = 22 |
|---|
| 35 | n/a | |
|---|
| 36 | 0 | __all__ = ["StringIO"] |
|---|
| 37 | n/a | |
|---|
| 38 | 0 | def _complain_ifclosed(closed): |
|---|
| 39 | 510300 | if closed: |
|---|
| 40 | 6 | raise ValueError, "I/O operation on closed file" |
|---|
| 41 | n/a | |
|---|
| 42 | 0 | class StringIO: |
|---|
| 43 | n/a | """class StringIO([buffer]) |
|---|
| 44 | n/a | |
|---|
| 45 | n/a | When a StringIO object is created, it can be initialized to an existing |
|---|
| 46 | n/a | string by passing the string to the constructor. If no string is given, |
|---|
| 47 | n/a | the StringIO will start empty. |
|---|
| 48 | n/a | |
|---|
| 49 | n/a | The StringIO object can accept either Unicode or 8-bit strings, but |
|---|
| 50 | n/a | mixing the two may take some care. If both are used, 8-bit strings that |
|---|
| 51 | n/a | cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause |
|---|
| 52 | n/a | a UnicodeError to be raised when getvalue() is called. |
|---|
| 53 | n/a | """ |
|---|
| 54 | 0 | def __init__(self, buf = ''): |
|---|
| 55 | n/a | # Force self.buf to be a string or unicode |
|---|
| 56 | 47437 | if not isinstance(buf, basestring): |
|---|
| 57 | 10 | buf = str(buf) |
|---|
| 58 | 47437 | self.buf = buf |
|---|
| 59 | 47437 | self.len = len(buf) |
|---|
| 60 | 47437 | self.buflist = [] |
|---|
| 61 | 47437 | self.pos = 0 |
|---|
| 62 | 47437 | self.closed = False |
|---|
| 63 | 47437 | self.softspace = 0 |
|---|
| 64 | n/a | |
|---|
| 65 | 0 | def __iter__(self): |
|---|
| 66 | 4 | return self |
|---|
| 67 | n/a | |
|---|
| 68 | 0 | def next(self): |
|---|
| 69 | n/a | """A file object is its own iterator, for example iter(f) returns f |
|---|
| 70 | n/a | (unless f is closed). When a file is used as an iterator, typically |
|---|
| 71 | n/a | in a for loop (for example, for line in f: print line), the next() |
|---|
| 72 | n/a | method is called repeatedly. This method returns the next input line, |
|---|
| 73 | n/a | or raises StopIteration when EOF is hit. |
|---|
| 74 | n/a | """ |
|---|
| 75 | 14 | _complain_ifclosed(self.closed) |
|---|
| 76 | 12 | r = self.readline() |
|---|
| 77 | 12 | if not r: |
|---|
| 78 | 2 | raise StopIteration |
|---|
| 79 | 10 | return r |
|---|
| 80 | n/a | |
|---|
| 81 | 0 | def close(self): |
|---|
| 82 | n/a | """Free the memory buffer. |
|---|
| 83 | n/a | """ |
|---|
| 84 | 38 | if not self.closed: |
|---|
| 85 | 37 | self.closed = True |
|---|
| 86 | 37 | del self.buf, self.pos |
|---|
| 87 | n/a | |
|---|
| 88 | 0 | def isatty(self): |
|---|
| 89 | n/a | """Returns False because StringIO objects are not connected to a |
|---|
| 90 | n/a | tty-like device. |
|---|
| 91 | n/a | """ |
|---|
| 92 | 4 | _complain_ifclosed(self.closed) |
|---|
| 93 | 2 | return False |
|---|
| 94 | n/a | |
|---|
| 95 | 0 | def seek(self, pos, mode = 0): |
|---|
| 96 | n/a | """Set the file's current position. |
|---|
| 97 | n/a | |
|---|
| 98 | n/a | The mode argument is optional and defaults to 0 (absolute file |
|---|
| 99 | n/a | positioning); other values are 1 (seek relative to the current |
|---|
| 100 | n/a | position) and 2 (seek relative to the file's end). |
|---|
| 101 | n/a | |
|---|
| 102 | n/a | There is no return value. |
|---|
| 103 | n/a | """ |
|---|
| 104 | 1773 | _complain_ifclosed(self.closed) |
|---|
| 105 | 1773 | if self.buflist: |
|---|
| 106 | 261 | self.buf += ''.join(self.buflist) |
|---|
| 107 | 261 | self.buflist = [] |
|---|
| 108 | 1773 | if mode == 1: |
|---|
| 109 | 0 | pos += self.pos |
|---|
| 110 | 1773 | elif mode == 2: |
|---|
| 111 | 212 | pos += self.len |
|---|
| 112 | 1773 | self.pos = max(0, pos) |
|---|
| 113 | n/a | |
|---|
| 114 | 0 | def tell(self): |
|---|
| 115 | n/a | """Return the file's current position.""" |
|---|
| 116 | 1814 | _complain_ifclosed(self.closed) |
|---|
| 117 | 1814 | return self.pos |
|---|
| 118 | n/a | |
|---|
| 119 | 0 | def read(self, n = -1): |
|---|
| 120 | n/a | """Read at most size bytes from the file |
|---|
| 121 | n/a | (less if the read hits EOF before obtaining size bytes). |
|---|
| 122 | n/a | |
|---|
| 123 | n/a | If the size argument is negative or omitted, read all data until EOF |
|---|
| 124 | n/a | is reached. The bytes are returned as a string object. An empty |
|---|
| 125 | n/a | string is returned when EOF is encountered immediately. |
|---|
| 126 | n/a | """ |
|---|
| 127 | 229187 | _complain_ifclosed(self.closed) |
|---|
| 128 | 229187 | if self.buflist: |
|---|
| 129 | 0 | self.buf += ''.join(self.buflist) |
|---|
| 130 | 0 | self.buflist = [] |
|---|
| 131 | 229187 | if n is None or n < 0: |
|---|
| 132 | 1346 | newpos = self.len |
|---|
| 133 | n/a | else: |
|---|
| 134 | 227841 | newpos = min(self.pos+n, self.len) |
|---|
| 135 | 229187 | r = self.buf[self.pos:newpos] |
|---|
| 136 | 229187 | self.pos = newpos |
|---|
| 137 | 229187 | return r |
|---|
| 138 | n/a | |
|---|
| 139 | 0 | def readline(self, length=None): |
|---|
| 140 | n/a | r"""Read one entire line from the file. |
|---|
| 141 | n/a | |
|---|
| 142 | n/a | A trailing newline character is kept in the string (but may be absent |
|---|
| 143 | n/a | when a file ends with an incomplete line). If the size argument is |
|---|
| 144 | n/a | present and non-negative, it is a maximum byte count (including the |
|---|
| 145 | n/a | trailing newline) and an incomplete line may be returned. |
|---|
| 146 | n/a | |
|---|
| 147 | n/a | An empty string is returned only when EOF is encountered immediately. |
|---|
| 148 | n/a | |
|---|
| 149 | n/a | Note: Unlike stdio's fgets(), the returned string contains null |
|---|
| 150 | n/a | characters ('\0') if they occurred in the input. |
|---|
| 151 | n/a | """ |
|---|
| 152 | 41973 | _complain_ifclosed(self.closed) |
|---|
| 153 | 41973 | if self.buflist: |
|---|
| 154 | 4 | self.buf += ''.join(self.buflist) |
|---|
| 155 | 4 | self.buflist = [] |
|---|
| 156 | 41973 | i = self.buf.find('\n', self.pos) |
|---|
| 157 | 41973 | if i < 0: |
|---|
| 158 | 1085 | newpos = self.len |
|---|
| 159 | n/a | else: |
|---|
| 160 | 40888 | newpos = i+1 |
|---|
| 161 | 41973 | if length is not None and length > 0: |
|---|
| 162 | 32977 | if self.pos + length < newpos: |
|---|
| 163 | 28500 | newpos = self.pos + length |
|---|
| 164 | 41973 | r = self.buf[self.pos:newpos] |
|---|
| 165 | 41973 | self.pos = newpos |
|---|
| 166 | 41973 | return r |
|---|
| 167 | n/a | |
|---|
| 168 | 0 | def readlines(self, sizehint = 0): |
|---|
| 169 | n/a | """Read until EOF using readline() and return a list containing the |
|---|
| 170 | n/a | lines thus read. |
|---|
| 171 | n/a | |
|---|
| 172 | n/a | If the optional sizehint argument is present, instead of reading up |
|---|
| 173 | n/a | to EOF, whole lines totalling approximately sizehint bytes (or more |
|---|
| 174 | n/a | to accommodate a final whole line). |
|---|
| 175 | n/a | """ |
|---|
| 176 | 46 | total = 0 |
|---|
| 177 | 46 | lines = [] |
|---|
| 178 | 46 | line = self.readline() |
|---|
| 179 | 356 | while line: |
|---|
| 180 | 313 | lines.append(line) |
|---|
| 181 | 313 | total += len(line) |
|---|
| 182 | 313 | if 0 < sizehint <= total: |
|---|
| 183 | 3 | break |
|---|
| 184 | 310 | line = self.readline() |
|---|
| 185 | 46 | return lines |
|---|
| 186 | n/a | |
|---|
| 187 | 0 | def truncate(self, size=None): |
|---|
| 188 | n/a | """Truncate the file's size. |
|---|
| 189 | n/a | |
|---|
| 190 | n/a | If the optional size argument is present, the file is truncated to |
|---|
| 191 | n/a | (at most) that size. The size defaults to the current position. |
|---|
| 192 | n/a | The current file position is not changed unless the position |
|---|
| 193 | n/a | is beyond the new file size. |
|---|
| 194 | n/a | |
|---|
| 195 | n/a | If the specified size exceeds the file's current size, the |
|---|
| 196 | n/a | file remains unchanged. |
|---|
| 197 | n/a | """ |
|---|
| 198 | 4274 | _complain_ifclosed(self.closed) |
|---|
| 199 | 4274 | if size is None: |
|---|
| 200 | 14 | size = self.pos |
|---|
| 201 | 4260 | elif size < 0: |
|---|
| 202 | 2 | raise IOError(EINVAL, "Negative size not allowed") |
|---|
| 203 | 4258 | elif size < self.pos: |
|---|
| 204 | 2214 | self.pos = size |
|---|
| 205 | 4272 | self.buf = self.getvalue()[:size] |
|---|
| 206 | 4272 | self.len = size |
|---|
| 207 | n/a | |
|---|
| 208 | 0 | def write(self, s): |
|---|
| 209 | n/a | """Write a string to the file. |
|---|
| 210 | n/a | |
|---|
| 211 | n/a | There is no return value. |
|---|
| 212 | n/a | """ |
|---|
| 213 | 230539 | _complain_ifclosed(self.closed) |
|---|
| 214 | 230537 | if not s: return |
|---|
| 215 | n/a | # Force s to be a string or unicode |
|---|
| 216 | 226842 | if not isinstance(s, basestring): |
|---|
| 217 | 1 | s = str(s) |
|---|
| 218 | 226842 | spos = self.pos |
|---|
| 219 | 226842 | slen = self.len |
|---|
| 220 | 226842 | if spos == slen: |
|---|
| 221 | 226763 | self.buflist.append(s) |
|---|
| 222 | 226763 | self.len = self.pos = spos + len(s) |
|---|
| 223 | 226763 | return |
|---|
| 224 | 79 | if spos > slen: |
|---|
| 225 | 0 | self.buflist.append('\0'*(spos - slen)) |
|---|
| 226 | 0 | slen = spos |
|---|
| 227 | 79 | newpos = spos + len(s) |
|---|
| 228 | 79 | if spos < slen: |
|---|
| 229 | 79 | if self.buflist: |
|---|
| 230 | 3 | self.buf += ''.join(self.buflist) |
|---|
| 231 | 79 | self.buflist = [self.buf[:spos], s, self.buf[newpos:]] |
|---|
| 232 | 79 | self.buf = '' |
|---|
| 233 | 79 | if newpos > slen: |
|---|
| 234 | 5 | slen = newpos |
|---|
| 235 | n/a | else: |
|---|
| 236 | 0 | self.buflist.append(s) |
|---|
| 237 | 0 | slen = newpos |
|---|
| 238 | 79 | self.len = slen |
|---|
| 239 | 79 | self.pos = newpos |
|---|
| 240 | n/a | |
|---|
| 241 | 0 | def writelines(self, iterable): |
|---|
| 242 | n/a | """Write a sequence of strings to the file. The sequence can be any |
|---|
| 243 | n/a | iterable object producing strings, typically a list of strings. There |
|---|
| 244 | n/a | is no return value. |
|---|
| 245 | n/a | |
|---|
| 246 | n/a | (The name is intended to match readlines(); writelines() does not add |
|---|
| 247 | n/a | line separators.) |
|---|
| 248 | n/a | """ |
|---|
| 249 | 15 | write = self.write |
|---|
| 250 | 61 | for line in iterable: |
|---|
| 251 | 46 | write(line) |
|---|
| 252 | n/a | |
|---|
| 253 | 0 | def flush(self): |
|---|
| 254 | n/a | """Flush the internal buffer |
|---|
| 255 | n/a | """ |
|---|
| 256 | 722 | _complain_ifclosed(self.closed) |
|---|
| 257 | n/a | |
|---|
| 258 | 0 | def getvalue(self): |
|---|
| 259 | n/a | """ |
|---|
| 260 | n/a | Retrieve the entire contents of the "file" at any time before |
|---|
| 261 | n/a | the StringIO object's close() method is called. |
|---|
| 262 | n/a | |
|---|
| 263 | n/a | The StringIO object can accept either Unicode or 8-bit strings, |
|---|
| 264 | n/a | but mixing the two may take some care. If both are used, 8-bit |
|---|
| 265 | n/a | strings that cannot be interpreted as 7-bit ASCII (that use the |
|---|
| 266 | n/a | 8th bit) will cause a UnicodeError to be raised when getvalue() |
|---|
| 267 | n/a | is called. |
|---|
| 268 | n/a | """ |
|---|
| 269 | 18526 | if self.buflist: |
|---|
| 270 | 9916 | self.buf += ''.join(self.buflist) |
|---|
| 271 | 9916 | self.buflist = [] |
|---|
| 272 | 18526 | return self.buf |
|---|
| 273 | n/a | |
|---|
| 274 | n/a | |
|---|
| 275 | n/a | # A little test suite |
|---|
| 276 | n/a | |
|---|
| 277 | 0 | def test(): |
|---|
| 278 | 0 | import sys |
|---|
| 279 | 0 | if sys.argv[1:]: |
|---|
| 280 | 0 | file = sys.argv[1] |
|---|
| 281 | n/a | else: |
|---|
| 282 | 0 | file = '/etc/passwd' |
|---|
| 283 | 0 | lines = open(file, 'r').readlines() |
|---|
| 284 | 0 | text = open(file, 'r').read() |
|---|
| 285 | 0 | f = StringIO() |
|---|
| 286 | 0 | for line in lines[:-2]: |
|---|
| 287 | 0 | f.write(line) |
|---|
| 288 | 0 | f.writelines(lines[-2:]) |
|---|
| 289 | 0 | if f.getvalue() != text: |
|---|
| 290 | 0 | raise RuntimeError, 'write failed' |
|---|
| 291 | 0 | length = f.tell() |
|---|
| 292 | 0 | print 'File length =', length |
|---|
| 293 | 0 | f.seek(len(lines[0])) |
|---|
| 294 | 0 | f.write(lines[1]) |
|---|
| 295 | 0 | f.seek(0) |
|---|
| 296 | 0 | print 'First line =', repr(f.readline()) |
|---|
| 297 | 0 | print 'Position =', f.tell() |
|---|
| 298 | 0 | line = f.readline() |
|---|
| 299 | 0 | print 'Second line =', repr(line) |
|---|
| 300 | 0 | f.seek(-len(line), 1) |
|---|
| 301 | 0 | line2 = f.read(len(line)) |
|---|
| 302 | 0 | if line != line2: |
|---|
| 303 | 0 | raise RuntimeError, 'bad result after seek back' |
|---|
| 304 | 0 | f.seek(len(line2), 1) |
|---|
| 305 | 0 | list = f.readlines() |
|---|
| 306 | 0 | line = list[-1] |
|---|
| 307 | 0 | f.seek(f.tell() - len(line)) |
|---|
| 308 | 0 | line2 = f.read() |
|---|
| 309 | 0 | if line != line2: |
|---|
| 310 | 0 | raise RuntimeError, 'bad result after seek back from EOF' |
|---|
| 311 | 0 | print 'Read', len(list), 'more lines' |
|---|
| 312 | 0 | print 'File length =', f.tell() |
|---|
| 313 | 0 | if f.tell() != length: |
|---|
| 314 | 0 | raise RuntimeError, 'bad length' |
|---|
| 315 | 0 | f.truncate(length/2) |
|---|
| 316 | 0 | f.seek(0, 2) |
|---|
| 317 | 0 | print 'Truncated length =', f.tell() |
|---|
| 318 | 0 | if f.tell() != length/2: |
|---|
| 319 | 0 | raise RuntimeError, 'truncate did not adjust length' |
|---|
| 320 | 0 | f.close() |
|---|
| 321 | n/a | |
|---|
| 322 | 0 | if __name__ == '__main__': |
|---|
| 323 | 0 | test() |
|---|