public class GFFIndexer extends TextFileIndexer
Modifier and Type | Field and Description |
---|---|
GFFMetadataInfo |
file_info |
static java.lang.String |
marker_max_read_length |
static int |
MAX_TEXT_FILE_ROW_LENGTH |
static long |
min_gff_chunk_lenght |
protected static int |
progress_interval_factor |
Constructor and Description |
---|
GFFIndexer() |
Modifier and Type | Method and Description |
---|---|
protected java.lang.String |
buildCacheFileName(java.lang.String file_name,
int contig,
int num_chunks) |
GFFMetadataInfo |
getGFFFileIndexesAndMetadata(java.lang.String file_name,
int num_data_points)
Splits a large gff2 file into desired number of chunks,
and creates a list of indexes, offset in the file, and
corresponding genome position, e.g.: [file_offset] [genome_position]
0x1234 412 0x9876 3258 ....
|
GFFMetadataInfo |
getGFFFileRegionIndexesAndMetadata(java.lang.String file_name,
int num_chunks,
int genomicContig,
long fileStartOffset,
long fileEndOffset)
Splits a gff2 file region into desired number of chunks,
and creates a list of indexes (absolute in the file) and
corresponding genome position, e.g.: [file_offset] [genome_position]
0x1234 412 0x9876 3258 ....
|
FileIndexInfo |
getGFFFileRegionInfo(java.lang.String file_name,
int genomicContig,
long genomicStart,
long genomicEnd,
int num_cached_data_points) |
long |
getIndexSliceSize() |
java.io.Reader |
getReaderForGFFFileRegion(java.lang.String file_name,
int genomicContig,
long genomicStart,
long genomicEnd,
long data_size,
int num_cached_data_points)
gets a reader for a specified region of a gff file
used for details/sab view
allocates block of contiguous memory....
|
boolean |
isSorted() |
protected void |
printProgress(int chunk_idx,
int num_chunks) |
boolean |
readFileInfoFromCacheFile(java.lang.String file_name,
int contig,
int num_chunks) |
protected void |
scanGFFV2Slice(java.util.List<GFFIndexInfo> indexes,
GFFMetadataInfoCalculator row_info,
java.lang.String slice,
int chunk_idx)
this method scans a slice of data to find an F3 read
and stores the info into the indexes array
|
void |
setIndexSliceSize(long l) |
boolean |
writeFileInfoToCacheFile(java.lang.String file_name,
int contig,
int num_chunks) |
printStatistics, printStatistics
public static final int MAX_TEXT_FILE_ROW_LENGTH
public static final long min_gff_chunk_lenght
public static final java.lang.String marker_max_read_length
protected static final int progress_interval_factor
public GFFMetadataInfo file_info
public long getIndexSliceSize()
getIndexSliceSize
in class TextFileIndexer
public void setIndexSliceSize(long l)
l
- sets slice sizepublic FileIndexInfo getGFFFileRegionInfo(java.lang.String file_name, int genomicContig, long genomicStart, long genomicEnd, int num_cached_data_points)
file_name
- genomicContig
- genomicStart
- genomicEnd
- data_size
- num_cached_data_points
- public java.io.Reader getReaderForGFFFileRegion(java.lang.String file_name, int genomicContig, long genomicStart, long genomicEnd, long data_size, int num_cached_data_points)
file_name
- start
- end
- public boolean isSorted()
public GFFMetadataInfo getGFFFileIndexesAndMetadata(java.lang.String file_name, int num_data_points)
file_name
- path to GFF2 filenum_data_points
- slices to divide files into, if 0 automatically determine based on file sizepublic GFFMetadataInfo getGFFFileRegionIndexesAndMetadata(java.lang.String file_name, int num_chunks, int genomicContig, long fileStartOffset, long fileEndOffset)
file_name
- path to GFF2 filenum_chunks
- slices to divide files intoprotected void scanGFFV2Slice(java.util.List<GFFIndexInfo> indexes, GFFMetadataInfoCalculator row_info, java.lang.String slice, int chunk_idx)
indexes
- array to store indexesrow_info
- helper class to accumulate/average row lenghtslice
- the slice as stringchunk_idx
- current slice/chunk numberpublic boolean readFileInfoFromCacheFile(java.lang.String file_name, int contig, int num_chunks)
public boolean writeFileInfoToCacheFile(java.lang.String file_name, int contig, int num_chunks)
file_name
- contig
- num_chunks
- protected java.lang.String buildCacheFileName(java.lang.String file_name, int contig, int num_chunks)
protected void printProgress(int chunk_idx, int num_chunks)
chunk_idx
- num_chunks
- Copyright © 2010-2014 Pacific Biosciences. All Rights Reserved.