# -*- coding: utf-8 -*- """ ========================================== Create a very large FITS file from scratch ========================================== This example demonstrates how to create a large file (larger than will fit in memory) from scratch using `astropy.io.fits`. ------------------- *By: Erik Bray* *License: BSD* ------------------- """ ############################################################################## # Normally to create a single image FITS file one would do something like: import os import numpy as np from astropy.io import fits data = np.zeros((40000, 40000), dtype=np.float64) hdu = fits.PrimaryHDU(data=data) ############################################################################## # Then use the `astropy.io.fits.writeto()` method to write out the new # file to disk hdu.writeto('large.fits') ############################################################################## # However, a 40000 x 40000 array of doubles is nearly twelve gigabytes! Most # systems won't be able to create that in memory just to write out to disk. In # order to create such a large file efficiently requires a little extra work, # and a few assumptions. # # First, it is helpful to anticipate about how large (as in, how many keywords) # the header will have in it. FITS headers must be written in 2880 byte # blocks, large enough for 36 keywords per block (including the END keyword in # the final block). Typical headers have somewhere between 1 and 4 blocks, # though sometimes more. # # Since the first thing we write to a FITS file is the header, we want to write # enough header blocks so that there is plenty of padding in which to add new # keywords without having to resize the whole file. Say you want the header to # use 4 blocks by default. Then, excluding the END card which Astropy will add # automatically, create the header and pad it out to 36 * 4 cards. # # Create a stub array to initialize the HDU; its # exact size is irrelevant, as long as it has the desired number of # dimensions data = np.zeros((100, 100), dtype=np.float64) hdu = fits.PrimaryHDU(data=data) header = hdu.header while len(header) < (36 * 4 - 1): header.append() # Adds a blank card to the end ############################################################################## # Now adjust the NAXISn keywords to the desired size of the array, and write # only the header out to a file. Using the ``hdu.writeto()`` method will cause # astropy to "helpfully" reset the NAXISn keywords to match the size of the # dummy array. That is because it works hard to ensure that only valid FITS # files are written. Instead, we can write just the header to a file using the # `astropy.io.fits.Header.tofile` method: header['NAXIS1'] = 40000 header['NAXIS2'] = 40000 header.tofile('large.fits') ############################################################################## # Finally, grow out the end of the file to match the length of the # data (plus the length of the header). This can be done very efficiently on # most systems by seeking past the end of the file and writing a single byte, # like so: with open('large.fits', 'rb+') as fobj: # Seek past the length of the header, plus the length of the # Data we want to write. # 8 is the number of bytes per value, i.e. abs(header['BITPIX'])/8 # (this example is assuming a 64-bit float) # The -1 is to account for the final byte that we are about to # write: fobj.seek(len(header.tostring()) + (40000 * 40000 * 8) - 1) fobj.write(b'\0') ############################################################################## # More generally, this can be written: shape = tuple(header['NAXIS{0}'.format(ii)] for ii in range(1, header['NAXIS']+1)) with open('large.fits', 'rb+') as fobj: fobj.seek(len(header.tostring()) + (np.product(shape) * np.abs(header['BITPIX']//8)) - 1) fobj.write(b'\0') ############################################################################## # On modern operating systems this will cause the file (past the header) to be # filled with zeros out to the ~12GB needed to hold a 40000 x 40000 image. On # filesystems that support sparse file creation (most Linux filesystems, but not # the HFS+ filesystem used by most Macs) this is a very fast, efficient # operation. On other systems your mileage may vary. # # This isn't the only way to build up a large file, but probably one of the # safest. This method can also be used to create large multi-extension FITS # files, with a little care. ############################################################################## # Finally, we'll remove the file we created: os.remove('large.fits')