Skip to content

instamatic.formats

Functions

read_cbf(fname)

CBF reader not implemented.

Source code in src/instamatic/formats/__init__.py
119
120
121
def read_cbf(fname: str):
    """CBF reader not implemented."""
    raise NotImplementedError('CBF reader not implemented.')

read_hdf5(fname)

Simple function to read a hdf5 file written by Instamatic.

fname: str, path or filename to image which should be opened

Returns: image: np.ndarray, header: dict a tuple of the image as numpy array and dictionary with all the tem parameters and image attributes

Source code in src/instamatic/formats/__init__.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def read_hdf5(fname: str) -> (np.array, dict):
    """Simple function to read a hdf5 file written by Instamatic.

    fname: str,
        path or filename to image which should be opened

    Returns:
        image: np.ndarray, header: dict
            a tuple of the image as numpy array and dictionary with all the tem parameters and image attributes
    """
    if not os.path.exists(fname):
        raise FileNotFoundError(f"No such file: '{fname}'")

    f = h5py.File(fname, 'r')
    return np.array(f['data']), dict(f['data'].attrs)

read_image(fname)

Guess filetype by extension.

Source code in src/instamatic/formats/__init__.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def read_image(fname: str) -> (np.array, dict):
    """Guess filetype by extension."""
    ext = Path(fname).suffix.lower()
    if ext in ('.tif', '.tiff'):
        img, h = read_tiff(fname)
    elif ext in ('.h5', '.hdf5'):
        img, h = read_hdf5(fname)
    elif ext in ('.img', '.smv'):
        img, h = read_adsc(fname)
    elif ext in ('.mrc'):
        img, h = read_mrc(fname)
    elif ext in ('.cbf'):
        img, h = read_cbf(fname)
    else:
        raise OSError(f'Cannot open file {fname}, unknown extension: {ext}')
    return img, h

read_tiff(fname)

Simple function to read a tiff file.

fname: str, path or filename to image which should be opened

Returns: image: np.ndarray, header: dict a tuple of the image as numpy array and dictionary with all the tem parameters and image attributes

Source code in src/instamatic/formats/__init__.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def read_tiff(fname: str) -> (np.array, dict):
    """Simple function to read a tiff file.

    fname: str,
        path or filename to image which should be opened

    Returns:
        image: np.ndarray, header: dict
            a tuple of the image as numpy array and dictionary with all the tem parameters and image attributes
    """
    tiff = tifffile.TiffFile(fname)

    page = tiff.pages[0]
    img = page.asarray()

    if page.software == 'instamatic':
        header = yaml.load(page.tags['ImageDescription'].value, Loader=yaml.Loader)
    elif tiff.is_tvips:
        header = tiff.tvips_metadata
    else:
        header = {}

    return img, header

write_hdf5(fname, data, header=None)

Simple function to write data to hdf5 format using h5py.

fname: str, path or filename to which the image should be saved data: np.ndarray, numpy array containing image data (path="/data") header: dict, dictionary containing the metadata that should be saved key/value pairs are stored as attributes on the data

Source code in src/instamatic/formats/__init__.py
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def write_hdf5(fname: str, data, header: dict = None):
    """Simple function to write data to hdf5 format using h5py.

    fname: str,
        path or filename to which the image should be saved
    data: np.ndarray,
        numpy array containing image data (path="/data")
    header: dict,
        dictionary containing the metadata that should be saved
        key/value pairs are stored as attributes on the data
    """
    fname = Path(fname).with_suffix('.h5')

    f = h5py.File(fname, 'w')
    h5data = f.create_dataset('data', data=data)
    if header:
        h5data.attrs.update(header)
    f.close()

write_tiff(fname, data, header=None)

Simple function to write a tiff file.

fname: str, path or filename to which the image should be saved data: np.ndarray, numpy array containing image data header: dict, dictionary containing the metadata that should be saved key/value pairs are stored as yaml in the TIFF ImageDescription tag

Source code in src/instamatic/formats/__init__.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def write_tiff(fname: str, data, header: dict = None):
    """Simple function to write a tiff file.

    fname: str,
        path or filename to which the image should be saved
    data: np.ndarray,
        numpy array containing image data
    header: dict,
        dictionary containing the metadata that should be saved
        key/value pairs are stored as yaml in the TIFF ImageDescription tag
    """
    if isinstance(header, dict):
        header = yaml.dump(header)
    if not header:
        header = ''

    fname = Path(fname).with_suffix('.tiff')

    with tifffile.TiffWriter(fname) as f:
        f.save(data=data, software='instamatic', description=header)

Modules

adscimage

Functions

read_adsc(fname)

Read in the file.

Source code in src/instamatic/formats/adscimage.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def read_adsc(fname: str) -> (np.array, dict):
    """Read in the file."""
    with open(fname, 'rb', buffering=0) as infile:
        try:
            header = readheader(infile)
        except BaseException:
            raise Exception('Error processing adsc header')
        # banned by bzip/gzip???
        try:
            infile.seek(int(header['HEADER_BYTES']), 0)
        except TypeError:
            # Gzipped does not allow a seek and read header is not
            # promising to stop in the right place
            infile.close()
            infile = open(fname, 'rb', buffering=0)
            infile.read(int(header['HEADER_BYTES']))
        binary = infile.read()
    # infile.close()

    # now read the data into the array
    dim1 = int(header['SIZE1'])
    dim2 = int(header['SIZE2'])
    data = np.frombuffer(binary, np.uint16)
    if swap_needed(header):
        data.byteswap(True)
    try:
        data.shape = (dim2, dim1)
    except ValueError:
        raise OSError(f'Size spec in ADSC-header does not match size of image data field {dim1}x{dim2} != {data.size}')

    return data, header
readheader(infile)

Read an adsc header.

Source code in src/instamatic/formats/adscimage.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def readheader(infile):
    """Read an adsc header."""
    header = {}
    line = infile.readline()
    bytesread = len(line)
    while b'}' not in line:
        string = line.decode().strip()
        if '=' in string:
            (key, val) = string.split('=')
            val = val.strip(';')
            key = key.strip()
            header[key] = val
        line = infile.readline()
        bytesread = bytesread + len(line)
    return header
write_adsc(fname, data, header={})

Write adsc format.

Source code in src/instamatic/formats/adscimage.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def write_adsc(fname: str, data: np.array, header: dict = {}):
    """Write adsc format."""
    if 'SIZE1' not in header and 'SIZE2' not in header:
        dim2, dim1 = data.shape
        header['SIZE1'] = dim1
        header['SIZE2'] = dim2

    out = b'{\n'
    for key in header:
        out += f'{key}={header[key]};\n'.encode()
    if 'HEADER_BYTES' in header:
        pad = int(header['HEADER_BYTES']) - len(out) - 2
    else:
        #         hsize = ((len(out) + 23) // 512 + 1) * 512
        hsize = (len(out) + 533) & ~(512 - 1)
        out += f'HEADER_BYTES={hsize:d};\n'.encode()
        pad = hsize - len(out) - 2
    out += b'}' + (pad + 1) * b'\x00'
    assert len(out) % 512 == 0, 'Header is not multiple of 512'

    # NOTE: XDS can handle only "SMV" images of TYPE=unsigned_short.
    dtype = np.uint16
    data = np.round(data, 0).astype(dtype, copy=False)  # copy=False ensures that no copy is made if dtype is already satisfied
    if swap_needed(header):
        data.byteswap(True)

    with open(fname, 'wb') as outf:
        outf.write(out)
        outf.write(data.tobytes())

csvIO

Functions

read_csv(f)

Read a csv file into a pandas DataFrame.

Source code in src/instamatic/formats/csvIO.py
25
26
27
28
29
30
def read_csv(f):
    """Read a csv file into a pandas DataFrame."""
    if isinstance(f, (list, tuple)):
        return pd.concat(read_csv(csv) for csv in f)
    else:
        return pd.DataFrame.from_csv(f)
read_ycsv(f)

read file in ycsv format: https://blog.datacite.org/using-yaml- frontmatter-with-csv/

format:
$YAML_BLOCK
$CSV_BLOCK
Source code in src/instamatic/formats/csvIO.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def read_ycsv(f):
    """read file in ycsv format: https://blog.datacite.org/using-yaml-
    frontmatter-with-csv/

    format:
        ---
        $YAML_BLOCK
        ---
        $CSV_BLOCK
    """
    if isinstance(f, str):
        f = open(f)

    first_line = f.tell()

    in_yaml_block = False

    yaml_block = []

    for line in f:
        if line.strip() == '---':
            if not in_yaml_block:
                in_yaml_block = True
            else:
                in_yaml_block = False
                break
            continue

        if in_yaml_block:
            yaml_block.append(line)

    # white space is important when reading yaml
    d = yaml.load(io.StringIO(''.join(yaml_block)))

    # workaround to fix pandas crash when it is not at the first line for some reason
    f.seek(first_line)
    header = len(yaml_block) + 2
    try:
        df = pd.DataFrame.from_csv(f, header=header)
    except pd.io.common.EmptyDataError:
        df = None

    # print "".join(yaml_block)

    return df, d
results2df(results, sort=True)

Convert a list of IndexingResult objects to pandas DataFrame.

Source code in src/instamatic/formats/csvIO.py
 8
 9
10
11
12
13
14
15
def results2df(results, sort=True):
    """Convert a list of IndexingResult objects to pandas DataFrame."""
    import pandas as pd
    df = pd.DataFrame(results).T
    df.columns = list(results.values())[0]._fields
    if sort:
        df = df.sort_values('score', ascending=False)
    return df
write_csv(f, results)

Write a list of IndexingResult objects to a csv file.

Source code in src/instamatic/formats/csvIO.py
18
19
20
21
22
def write_csv(f, results):
    """Write a list of IndexingResult objects to a csv file."""
    if not hasattr(results, 'to_csv'):
        results = results2df(results)
    results.to_csv(f)
write_ycsv(f, data, metadata)

write file in ycsv format: https://blog.datacite.org/using-yaml- frontmatter-with-csv/

format:
$YAML_BLOCK
$CSV_BLOCK
Source code in src/instamatic/formats/csvIO.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def write_ycsv(f, data, metadata):
    """write file in ycsv format: https://blog.datacite.org/using-yaml-
    frontmatter-with-csv/

    format:
        ---
        $YAML_BLOCK
        ---
        $CSV_BLOCK
    """
    if isinstance(f, str):
        f = open(f, 'w')

    f.write('---\n')
    yaml.dump(metadata, f, default_flow_style=False, sort_keys=False)

    f.write('---\n')
    write_csv(f, data)

mrc

Read and write images in the MRC format.

.. todo:: define arachnid header and map to mrc

.. note::

This code is heavily modified version of the MRC parser/writer
found in the Scripps Appion program.

.. Created on Aug 9, 2012 .. codeauthor:: Robert Langlois rl2528@columbia.edu

Functions

array_from_header(header)

Convert header information to array parameters.

:Parameters:

header : header_dtype Header fields

:Returns:

header : dict File header dtype : dtype Data type shape : tuple Shape of the array order : str Order of the array offset : int Header offset swap : bool Swap byte order

Source code in src/instamatic/formats/mrc.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def array_from_header(header):
    """Convert header information to array parameters.

    :Parameters:

    header : header_dtype
             Header fields

    :Returns:

    header : dict
             File header
    dtype : dtype
            Data type
    shape : tuple
            Shape of the array
    order : str
            Order of the array
    offset : int
             Header offset
    swap : bool
            Swap byte order
    """

    pass
cache_data()

Get keywords to be added as data cache.

:Returns:

extra : dict Keyword arguments

Source code in src/instamatic/formats/mrc.py
217
218
219
220
221
222
223
224
225
226
def cache_data():
    """Get keywords to be added as data cache.

    :Returns:

    extra : dict
            Keyword arguments
    """

    return {'header': None, 'no_strict_mrc': False, 'force_volume': False}
count_images(filename, no_strict_mrc=False)

Count the number of images in the file.

:Parameters:

filename : str or file object Filename or open stream for a file no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off.

:Returns:

out : int Number of images in the file

Source code in src/instamatic/formats/mrc.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
def count_images(filename, no_strict_mrc=False):
    """Count the number of images in the file.

    :Parameters:

    filename : str or file object
               Filename or open stream for a file
    no_strict_mrc : bool
                    Perform strict MRC header checking (recommended) - Only
                    EPU MRC files and Yifan's frame alignment require this
                    to be off.

    :Returns:

    out : int
          Number of images in the file
    """

    if hasattr(filename, 'dtype'):
        h = filename
    else:
        h = read_mrc_header(filename, no_strict_mrc)
    return h['nz'][0]
create_header(shape, dtype, order='C', header=None)

Create a header for the MRC image format.

@todo support header parameters

:Parameters:

shape : tuple Shape of the array dtype : numpy.dtype Data type for NumPy ndarray header : dict Header values for image :Returns:

h : dtype Data type for NumPy ndarray describing the header

Source code in src/instamatic/formats/mrc.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def create_header(shape, dtype, order='C', header=None):
    """Create a header for the MRC image format.

    @todo support header parameters

    :Parameters:

    shape : tuple
            Shape of the array
    dtype : numpy.dtype
            Data type for NumPy ndarray
    header : dict
             Header values  for image
    :Returns:

    h : dtype
        Data type for NumPy ndarray describing the header
    """

    pass
is_format_header(h)

Test if the given header has the proper format.

:Parameters:

h : array Header to test

:Returns:

val : bool Test if dtype matches format dtype

Source code in src/instamatic/formats/mrc.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def is_format_header(h):
    """Test if the given header has the proper format.

    :Parameters:

    h : array
        Header to test

    :Returns:

    val : bool
          Test if dtype matches format dtype
    """

    return h.dtype == header_image_dtype or h.dtype == header_image_dtype.newbyteorder()
is_readable(filename, no_strict_mrc=False)

Test if the file read has a valid MRC header.

:Parameters:

filename : str or file object Filename or open stream for a file no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off.

:Returns:

out : bool True if the header conforms to MRC

Source code in src/instamatic/formats/mrc.py
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
def is_readable(filename, no_strict_mrc=False):
    """Test if the file read has a valid MRC header.

    :Parameters:

    filename : str or file object
               Filename or open stream for a file
    no_strict_mrc : bool
                    Perform strict MRC header checking (recommended) - Only
                    EPU MRC files and Yifan's frame alignment require this
                    to be off.

    :Returns:

    out : bool
          True if the header conforms to MRC
    """

    global bad_mrc_header

    if hasattr(filename, 'dtype'):
        h = filename
        if not is_format_header(h):
            raise ValueError('Array dtype incorrect')
    else:
        try:
            h = read_mrc_header(filename)
        except BaseException:
            return False
    if _logger.isEnabledFor(logging.DEBUG):
        _logger.debug('Mode: %d - %d' % (h['mode'][0], (h['mode'][0] not in mrc2numpy)))
        _logger.debug('Byteorder: %d - %d' % (h['byteorder'][0], ((h['byteorder'][0] & -65536) not in intbyteorder)))
        _logger.debug('Byteorder-swap: %d - %d' % ((h['byteorder'][0].byteswap() & -65536), ((h['byteorder'][0].byteswap() & -65536) not in intbyteorder)))
        for name in ('alpha', 'beta', 'gamma'):
            _logger.debug('%s: %f - %d' % (name, h[name][0], (h[name][0] != 90.0)))
        for name in ('nx', 'ny', 'nz'):
            _logger.debug('%s: %d - %d' % (name, h[name][0], (h[name][0] > 0)))
    if h['mode'][0] not in mrc2numpy:
        _logger.debug('Failed to read proper mode - not MRC!')
        return False

    if (h['byteorder'][0] & -65536) not in intbyteorder and \
       (h['byteorder'][0].byteswap() & -65536) not in intbyteorder:
        if h['alpha'][0] == 0.0 and h['beta'][0] == 0.0 and h['gamma'][0] == 0.0 and int(h['mode'][0]) == 6:  # this line hack for non-standard writers
            if not bad_mrc_header:
                bad_mrc_header = True
                if not no_strict_mrc and 1 == 0:
                    _logger.warn('This image could be MRC format likely this image came from EPU. Use --no-strict-mrc to read this image')
                    return False
                _logger.warn('Assuming image is MRC format - format is not correct (Likely this image came from EPU)')
        elif h['alpha'][0] == 90.0 and h['beta'][0] == 90.0 and h['gamma'][0] == 90.0:  # this line hack for non-standard writers
            if not bad_mrc_header:
                bad_mrc_header = True
                if not no_strict_mrc and 1 == 0:
                    _logger.warn("This image could be MRC format likely this image came from Yifan's GPU alignment. Use --no-strict-mrc to read this image")
                    return False
                _logger.warn("Assuming image is MRC format - format is not correct (Likely this image came from Yifan's GPU alignment)")
        else:
            _logger.debug('Failed to read proper machine stamp - not MRC!')
            # return False
    if not numpy.alltrue([h[v][0] > 0 for v in ('nx', 'ny', 'nz')]):
        _logger.debug('Failed to read proper dimensions - not MRC!')
        return False
    return True
is_writable(filename)

Test if the image extension of the given filename is understood as a writable format.

:Parameters:

filename : str Output filename to test

:Returns:

write : bool True if the format is recognized

Source code in src/instamatic/formats/mrc.py
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
def is_writable(filename):
    """Test if the image extension of the given filename is understood as a
    writable format.

    :Parameters:

    filename : str
               Output filename to test

    :Returns:

    write : bool
            True if the format is recognized
    """

    ext = os.path.splitext(filename)[1][1:].lower()
    return ext == 'mrc' or \
        ext == 'ccp4' or \
        ext == 'map'
iter_images(filename, index=None, header=None, no_strict_mrc=False)

Read a set of SPIDER images.

:Parameters:

filename : str or file object Filename or open stream for a file index : int, optional Index of image to start, if None, start with the first image (Default: None) header : dict, optional Output dictionary to place header values no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off.

:Returns:

out : array Array with image information from the file

Source code in src/instamatic/formats/mrc.py
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
def iter_images(filename, index=None, header=None, no_strict_mrc=False):
    """Read a set of SPIDER images.

    :Parameters:

    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to start, if None, start with the first image (Default: None)
    header : dict, optional
             Output dictionary to place header values
    no_strict_mrc : bool
                    Perform strict MRC header checking (recommended) - Only
                    EPU MRC files and Yifan's frame alignment require this
                    to be off.

    :Returns:

    out : array
          Array with image information from the file
    """

    f = util.uopen(filename, 'rb')
    if index is None:
        index = 0
    try:
        h = read_mrc_header(f, no_strict_mrc)
        count = count_images(h)
        # if header is not None:  util.update_header(header, h, mrc2ara, 'mrc')
        tmp = read_header(h)
        if header is not None:
            header.update(tmp)
        d_len = h['nx'][0] * h['ny'][0]
        dtype = numpy.dtype(mrc2numpy[h['mode'][0]])
        offset = 1024 + int(h['nsymbt']) + 0 * d_len * dtype.itemsize
        try:
            f.seek(int(offset))
        except BaseException:
            _logger.error(f'{str(offset)} -- {str(offset.__class__.__name__)}')
            raise
        if not hasattr(index, '__iter__'):
            index = range(index, count)
        else:
            index = index.astype(numpy.int)
        last = 0
        total = file_size(f)
        if total != (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize):
            raise util.InvalidHeaderException('file size != header: %d != %d -- %d' % (total, (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize), int(h['nsymbt'])))
        for i in index:
            if i != (last + 1):
                f.seek(int(1024 + int(h['nsymbt']) + i * d_len * dtype.itemsize))
            out = util.fromfile(f, dtype=dtype, count=d_len)

            out = reshape_data(out, h, index, count)
            if header_image_dtype.newbyteorder()[0] == h.dtype[0]:
                out = out.byteswap()
            yield out
    finally:
        util.close(filename, f)
read_header(filename, index=None, no_strict_mrc=False, force_volume=False)

Read the MRC header.

:Parameters:

filename : str or file object Filename or open stream for a file index : int, ignored Index of image to get the header, if None, the stack header (Default: None) no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off. force_volume : bool Force image to be treated as a volume

:Returns:

header : dict Dictionary with header information

Source code in src/instamatic/formats/mrc.py
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
def read_header(filename, index=None, no_strict_mrc=False, force_volume=False):
    """Read the MRC header.

    :Parameters:

    filename : str or file object
               Filename or open stream for a file
    index : int, ignored
            Index of image to get the header, if None, the stack header (Default: None)
    no_strict_mrc : bool
                    Perform strict MRC header checking (recommended) - Only
                    EPU MRC files and Yifan's frame alignment require this
                    to be off.
    force_volume : bool
                   Force image to be treated as a volume

    :Returns:

    header : dict
             Dictionary with header information
    """

    h = read_mrc_header(filename, index, no_strict_mrc) if not hasattr(filename, 'ndim') else filename
    header = {}
    header['apix'] = float(h['xlen'][0]) / float(h['nx'][0])
    header['count'] = int(h['nz'][0]) if int(h['nz'][0]) != int(h['nx'][0]) and not force_volume else 1
    header['nx'] = int(h['nx'][0])
    header['ny'] = int(h['ny'][0])
    header['nz'] = int(h['nz'][0]) if int(h['nz'][0]) == int(h['nx'][0]) or force_volume else 1
    for key in h.dtype.fields.keys():
        header['mrc_' + key] = h[key][0]
    header['format'] = 'mrc'
    return header
read_image(filename, index=None, cache=None, no_strict_mrc=False, force_volume=False)

Read an image from the specified file in the MRC format.

:Parameters:

filename : str or file object
           Filename or open stream for a file
index : int, optional
        Index of image to get, if None, first image (Default: None)
no_strict_mrc : bool
                Perform strict MRC header checking (recommended) - Only
                EPU MRC files and Yifan's frame alignment require this
                to be off.
force_volume : bool
               For image to be read as a volume

:Returns:

out : array
      Array with image information from the file
Source code in src/instamatic/formats/mrc.py
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
def read_image(filename, index=None, cache=None, no_strict_mrc=False, force_volume=False):
    """Read an image from the specified file in the MRC format.

    :Parameters:

        filename : str or file object
                   Filename or open stream for a file
        index : int, optional
                Index of image to get, if None, first image (Default: None)
        no_strict_mrc : bool
                        Perform strict MRC header checking (recommended) - Only
                        EPU MRC files and Yifan's frame alignment require this
                        to be off.
        force_volume : bool
                       For image to be read as a volume

    :Returns:

        out : array
              Array with image information from the file
    """

    idx = 0 if index is None else index
    f = util.uopen(filename, 'rb')
    try:
        h = read_mrc_header(f, no_strict_mrc)
        # if header is not None: util.update_header(header, h, mrc2ara, 'mrc')
        header = read_header(h, force_volume=force_volume)
        count = count_images(h)
        if idx >= count:
            raise OSError('Index exceeds number of images in stack: %d < %d' % (idx, count))
        if index is None and (count == h['nx'][0] or force_volume):
            d_len = h['nx'][0] * h['ny'][0] * h['nz'][0]
        else:
            d_len = h['nx'][0] * h['ny'][0]
        dtype = numpy.dtype(mrc2numpy[h['mode'][0]])
        offset = 1024 + int(h['nsymbt']) + idx * d_len * dtype.itemsize
        total = file_size(f)
        if total != (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize):
            raise util.InvalidHeaderException('file size != header: %d != %d -- %s, %d' % (total, (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize), str(idx), int(h['nsymbt'])))
        f.seek(int(offset))
        out = util.fromfile(f, dtype=dtype, count=d_len)
        out = reshape_data(out, h, index, count, force_volume)
        if header_image_dtype.newbyteorder()[0] == h.dtype[0]:
            out = out.byteswap()
    finally:
        util.close(filename, f)
    # assert(numpy.alltrue(numpy.logical_not(numpy.isnan(out))))
    # if header_image_dtype.newbyteorder()==h.dtype:out = out.byteswap()
    return out, header
read_mrc_header(filename, index=None, no_strict_mrc=False)

Read the MRC header.

:Parameters:

filename : str or file object Filename or open stream for a file index : int, ignored Index of image to get the header, if None, the stack header (Default: None) no_strict_mrc : bool Perform strict MRC header checking (recommended) - Only EPU MRC files and Yifan's frame alignment require this to be off.

:Returns:

out : array Array with header information in the file

Source code in src/instamatic/formats/mrc.py
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
def read_mrc_header(filename, index=None, no_strict_mrc=False):
    """Read the MRC header.

    :Parameters:

    filename : str or file object
               Filename or open stream for a file
    index : int, ignored
            Index of image to get the header, if None, the stack header (Default: None)
    no_strict_mrc : bool
                    Perform strict MRC header checking (recommended) - Only
                    EPU MRC files and Yifan's frame alignment require this
                    to be off.

    :Returns:

    out : array
          Array with header information in the file
    """

    f = util.uopen(filename, 'rb')
    try:
        # curr = f.tell()
        h = util.fromfile(f, dtype=header_image_dtype, count=1)
        if not is_readable(h, no_strict_mrc):
            h = h.newbyteorder()
        if not is_readable(h, no_strict_mrc):
            raise OSError('Not MRC header')
    finally:
        util.close(filename, f)
    return h
reshape_data(out, h, index, count, force_volume=False)

Reshape the data to the proper dimensions.

:Parameters:

out : array Array with image information from the file h : array Header information index : int Index of image count : int Number of images in file

:Returns:

out : array Array with image information from the file

Source code in src/instamatic/formats/mrc.py
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
def reshape_data(out, h, index, count, force_volume=False):
    """Reshape the data to the proper dimensions.

    :Parameters:

    out : array
          Array with image information from the file
    h : array
        Header information
    index : int
            Index of image
    count : int
            Number of images in file

    :Returns:

    out : array
          Array with image information from the file
    """

    if index is None and int(h['nz'][0]) > 1 and (count == h['nx'][0] or force_volume):
        if h['mapc'][0] == 2 and h['mapr'][0] == 1:
            out = out.reshape((int(h['nx'][0]), int(h['ny'][0]), int(h['nz'][0])))
            for i in range(out.shape[2]):
                out[:, :, i] = out[:, :, i].squeeze().T
        else:
            out = out.reshape((int(h['nx'][0]), int(h['ny'][0]), int(h['nz'][0])))
    elif int(h['ny']) > 1:
        if h['mapc'][0] == 2 and h['mapr'][0] == 1:
            out = out.reshape((int(h['ny'][0]), int(h['nx'][0])))  # .transpose() # Test this!
        else:
            out = out.reshape((int(h['ny'][0]), int(h['nx'][0])))
    return out
valid_image(filename, no_strict_mrc=False)

Test if the image is valid.

:Parameters:

filename : str
           Input filename to test
no_strict_mrc : bool
                Perform strict MRC header checking (recommended) - Only
                EPU MRC files and Yifan's frame alignment require this
                to be off.

:Returns:

flag : bool
       True if image is valid
Source code in src/instamatic/formats/mrc.py
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
def valid_image(filename, no_strict_mrc=False):
    """Test if the image is valid.

    :Parameters:

        filename : str
                   Input filename to test
        no_strict_mrc : bool
                        Perform strict MRC header checking (recommended) - Only
                        EPU MRC files and Yifan's frame alignment require this
                        to be off.

    :Returns:

        flag : bool
               True if image is valid
    """

    f = util.uopen(filename, 'rb')
    try:
        h = read_mrc_header(f, no_strict_mrc)
        total = file_size(f)
        dtype = numpy.dtype(mrc2numpy[h['mode'][0]])
        return total == (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize)
    finally:
        util.close(filename, f)
write_image(filename, img, index=None, header=None, inplace=False)

Write an image array to a file in the MRC format.

:Parameters:

filename : str Name of the output file img : array Image array index : int, optional Index to write image in the stack header : dict, optional Dictionary of header values inplace : bool Write new image to stack without removing the stack

Source code in src/instamatic/formats/mrc.py
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
def write_image(filename, img, index=None, header=None, inplace=False):
    """Write an image array to a file in the MRC format.

    :Parameters:

    filename : str
               Name of the output file
    img : array
          Image array
    index : int, optional
            Index to write image in the stack
    header : dict, optional
             Dictionary of header values
    inplace : bool
              Write new image to stack without removing the stack
    """

    if header is None and hasattr(img, 'header'):
        header = img.header
    try:
        img = img.astype(mrc2numpy[numpy2mrc[img.dtype.type]])
    except BaseException:
        raise TypeError('Unsupported type for MRC writing: %s' % str(img.dtype))

    mode = 'rb+' if index is not None and (index > 0 or inplace and index > -1) else 'wb+'
    f = util.uopen(filename, mode)
    if header is None or not hasattr(header, 'dtype') or not is_format_header(header):
        h = numpy.zeros(1, header_image_dtype)
        util.update_header(h, mrc_defaults, ara2mrc)
        pix = header.get('apix', 1.0) if header is not None else 1.0
        header = util.update_header(h, header, ara2mrc, 'mrc')
        header['nx'] = img.T.shape[0]
        header['ny'] = img.T.shape[1] if img.ndim > 1 else 1
        if header['nz'] == 0:
            header['nz'] = img.shape[2] if img.ndim > 2 else 1
        header['mode'] = numpy2mrc[img.dtype.type]
        header['mx'] = header['nx']
        header['my'] = header['ny']
        header['mz'] = header['nz']
        header['xlen'] = header['nx'] * pix
        header['ylen'] = header['ny'] * pix
        header['zlen'] = header['nz'] * pix
        header['alpha'] = 90
        header['beta'] = 90
        header['gamma'] = 90
        header['mapc'] = 1
        header['mapr'] = 2
        header['maps'] = 3
        header['amin'] = numpy.min(img)
        header['amax'] = numpy.max(img)
        header['amean'] = numpy.mean(img)

        header['map'] = 'MAP'
        header['byteorder'] = byteorderint2[sys.byteorder]  # 'DA\x00\x00'
        header['nlabels'] = 1
        header['label0'] = 'Created by Instamatic'

        # header['byteorder'] = numpy.fromstring('\x44\x41\x00\x00', dtype=header['byteorder'].dtype)

        # header['rms'] = numpy.std(img)
        if img.ndim == 3:
            header['nxstart'] = header['nx'] / -2
            header['nystart'] = header['ny'] / -2
            header['nzstart'] = header['nz'] / -2
        if index is not None:
            stack_count = index + 1
            header['nz'] = stack_count
            header['mz'] = stack_count
            header['zlen'] = stack_count
            # header['zorigin'] = stack_count/2.0
        else:
            index = 0

    try:
        if inplace:
            f.seek(int(1024 + int(h['nsymbt']) + index * img.ravel().shape[0] * img.dtype.itemsize))
        elif f != filename:
            f.seek(0)
            header.tofile(f)
            if index > 0:
                f.seek(int(1024 + int(h['nsymbt']) + index * img.ravel().shape[0] * img.dtype.itemsize))
        img.tofile(f)
    finally:
        util.close(filename, f)

Modules

util

Defines a set of utility functions.

.. Created on Jul 18, 2013 .. codeauthor:: Robert Langlois rl2528@columbia.edu

Classes

InvalidHeaderException

Bases: Exception

Thrown when the image file has an invalid header.

Functions

close(filename, fd)

Close the file descriptor (if it was opened by caller)

filename : str Name of the file fd : File File descriptor

Source code in src/instamatic/formats/util.py
63
64
65
66
67
68
69
70
71
72
def close(filename, fd):
    """Close the file descriptor (if it was opened by caller)

    filename : str
               Name of the file
    fd : File
         File descriptor
    """
    if fd != filename:
        fd.close()
fromfile(fin, dtype, count, sep='')
Source code in src/instamatic/formats/util.py
24
25
26
27
28
29
def fromfile(fin, dtype, count, sep=''):
    """"""
    if hasattr(fin, 'fileno'):
        return np.fromfile(fin, dtype, count, sep)
    else:
        return np.frombuffer(fin.read(count * dtype.itemsize), dtype, count)
read_image(f, header, dtype, dlen, shape, swap, order='C')

Read an image from a file using random file acess.

:Parameters:

f : stream Input file stream header : dict Header dtype : dtype Data type dlen : int Number of elements shape : tuple Shape of the array swap : bool Swap the byte order order : str Layout of a 2 or 3D array

:Returns:

out : ndarray Array of image data

Source code in src/instamatic/formats/util.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def read_image(f, header, dtype, dlen, shape, swap, order='C'):
    """Read an image from a file using random file acess.

    :Parameters:

    f : stream
        Input file stream
    header : dict
             Header
    dtype : dtype
            Data type
    dlen : int
           Number of elements
    shape : tuple
            Shape of the array
    swap : bool
           Swap the byte order
    order : str
            Layout of a 2 or 3D array

    :Returns:

    out : ndarray
          Array of image data
    """
    out = np.fromfile(f, dtype=dtype, count=dlen)
    out.shape = shape
    out = out.squeeze()
    if order == 'F':
        out.shape = out.shape[::-1]
        out = out.transpose()
    if swap:
        out = out.byteswap().newbyteorder()
    return ndimage(out, header)
uopen(filename, mode)

Open a stream to filename.

:Parameters:

filename : str Name of the file mode : str Mode to open file

:Returns:

fd : File File descriptor

Source code in src/instamatic/formats/util.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def uopen(filename, mode):
    """Open a stream to filename.

    :Parameters:

    filename : str
               Name of the file
    mode : str
           Mode to open file

    :Returns:

    fd : File
         File descriptor
    """
    try:
        os.fspath(filename)
    except BaseException:
        f = filename
    else:
        if os.path.splitext(filename)[1] == '.bz2':
            f = bz2.BZ2File(filename, mode)
        else:
            try:
                f = open(filename, mode)
            except BaseException:
                _logger.error('Mode: %s' % str(mode))
                raise
    return f
update_header(dest, source, header_map, tag=None)

Map values from or to the format and the internal header.

:Parameters:

dest : array or dict Destination of the header values source : array or dict Source of the header values header_map : dict Map from destination to source tag : str Format specific attribute tag

:Returns:

dest : array or dict Destination of the header values

Source code in src/instamatic/formats/util.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def update_header(dest, source, header_map, tag=None):
    """Map values from or to the format and the internal header.

    :Parameters:

    dest : array or dict
           Destination of the header values
    source : array or dict
             Source of the header values
    header_map : dict
                 Map from destination to source
    tag : str
          Format specific attribute tag

    :Returns:

    dest : array or dict
           Destination of the header values
    """
    if source is None:
        return dest
    keys = dest.dtype.names if hasattr(dest, 'dtype') else dest.keys()
    tag = None
    for key in keys:
        try:
            dest[key] = source[header_map.get(key, key)]
        except BaseException:
            if tag is not None:
                try:
                    dest[key] = source[tag + '_' + key]
                except BaseException:
                    pass
    return dest

xdscbf

Functions

compByteOffset(data)

Compress a dataset into a string using the byte_offet algorithm.

:param data: ndarray :return: string/bytes with compressed data

test = np.array([0,1,2,127,0,1,2,128,0,1,2,32767,0,1,2,32768,0,1,2,2147483647,0,1,2,2147483648,0,1,2,128,129,130,32767,32768,128,129,130,32768,2147483647,2147483648])

Source code in src/instamatic/formats/xdscbf.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def compByteOffset(data):
    """Compress a dataset into a string using the byte_offet algorithm.

    :param data: ndarray
    :return: string/bytes with compressed data

    test = np.array([0,1,2,127,0,1,2,128,0,1,2,32767,0,1,2,32768,0,1,2,2147483647,0,1,2,2147483648,0,1,2,128,129,130,32767,32768,128,129,130,32768,2147483647,2147483648])
    """
    flat = np.ascontiguousarray(data.ravel(), np.int64)
    delta = np.zeros_like(flat)
    delta[0] = flat[0]
    delta[1:] = flat[1:] - flat[:-1]
    mask = abs(delta) > 127
    exceptions = np.nonzero(mask)[0]
    if np.little_endian:
        byteswap = False
    else:
        byteswap = True
    start = 0
    binary_blob = b''
    for stop in exceptions:
        if stop - start > 0:
            binary_blob += delta[start:stop].astype(np.int8).tobytes()
        exc = delta[stop]
        absexc = abs(exc)
        if absexc > 2147483647:  # 2**31-1
            binary_blob += b'\x80\x00\x80\x00\x00\x00\x80'
            if byteswap:
                binary_blob += delta[stop:stop + 1].byteswap().tobytes()
            else:
                binary_blob += delta[stop:stop + 1].tobytes()
        elif absexc > 32767:  # 2**15-1
            binary_blob += b'\x80\x00\x80'
            if byteswap:
                binary_blob += delta[stop:stop + 1].astype(np.int32).byteswap().tobytes()
            else:
                binary_blob += delta[stop:stop + 1].astype(np.int32).tobytes()
        else:  # >127
            binary_blob += b'\x80'
            if byteswap:
                binary_blob += delta[stop:stop + 1].astype(np.int16).byteswap().tobytes()
            else:
                binary_blob += delta[stop:stop + 1].astype(np.int16).tobytes()
        start = stop + 1
    if start < delta.size:
        binary_blob += delta[start:].astype(np.int8).tobytes()
    return binary_blob
write(fname, data, header={})

Write the file in CBF format.

:param str fname: name of the file

Source code in src/instamatic/formats/xdscbf.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def write(fname, data, header={}):
    """Write the file in CBF format.

    :param str fname: name of the file
    """
    if data is not None:
        dim2, dim1 = data.shape
    else:
        raise RuntimeError('CBF image contains no data')
    binary_blob = compByteOffset(data)
    dtype = 'Unknown'
    for key, value in DATA_TYPES.items():
        if value == data.dtype:
            dtype = key
    binary_block = [b'###CBF: Version July 2008 generated by XDS',
                    b'',
                    b'data_a.cbf',
                    b'',
                    b'_array_data.header_convention "XDS special"',
                    b'_array_data.header_contents',
                    b';',
                    b';',
                    b'',
                    b'_array_data.data',
                    b';',
                    b'--CIF-BINARY-FORMAT-SECTION--',
                    b'Content-Type: application/octet-stream;',
                    b'     conversions="x-CBF_BYTE_OFFSET"',
                    b'Content-Transfer-Encoding: BINARY',
                    np.string_('X-Binary-Size: %d' % (len(binary_blob))),
                    b'X-Binary-ID: 1',
                    np.string_('X-Binary-Element-Type: "%s"' % (dtype)),
                    b'X-Binary-Element-Byte-Order: LITTLE_ENDIAN',
                    np.string_('X-Binary-Number-of-Elements: %d' % (dim1 * dim2)),
                    np.string_('X-Binary-Size-Fastest-Dimension: %d' % dim1),
                    np.string_('X-Binary-Size-Second-Dimension: %d' % dim2),
                    b'X-Binary-Size-Padding: 1',
                    b'',
                    STARTER + binary_blob,
                    b'',
                    b'--CIF-BINARY-FORMAT-SECTION----']

    cbf = b'\r\n'.join(binary_block)
    with open(fname, 'wb') as out_file:
        out_file.write(cbf)