SeqAn3 3.2.0-rc.1
The Modern C++ library for sequence analysis.
input.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <cassert>
16#include <concepts>
17#include <filesystem>
18#include <fstream>
19#include <seqan3/std/ranges>
20#include <string>
21#include <variant>
22#include <vector>
23
47
48namespace seqan3
49{
50
51// ---------------------------------------------------------------------------------------------------------------------
52// sam_file_input_traits
53// ---------------------------------------------------------------------------------------------------------------------
54
113template <typename t>
114concept sam_file_input_traits = requires (t v)
115{
116 // field::seq
121
122 // field::id
124
125 // field::qual
128
129 // field::ref_seq
130 // either ref_info_not_given or a range over ranges over alphabet (e.g. std::vector<dna4_vector>)
131 requires std::same_as<typename t::ref_sequences, ref_info_not_given> || requires ()
132 {
134 };
135
136 // field::ref_id
138 (!std::same_as<typename t::ref_sequences, ref_info_not_given> ||
140 requires std::ranges::forward_range<std::ranges::range_reference_t<typename t::ref_ids>>;
141 requires std::ranges::forward_range<typename t::ref_ids>;
142
143 // field::offset is fixed to int32_t
144 // field::ref_offset is fixed to std::optional<int32_t>
145 // field::flag is fixed to seqan3::sam_flag
146 // field::mapq is fixed to uint8_t
147 // field::evalue is fixed to double
148 // field::bitscore is fixed to double
149 // field::mate is fixed to std::tuple<ref_id_container<ref_id_alphabet>, ref_offset_type, int32_t>
150
151 // field::alignment
152 // the alignment type cannot be configured.
153 // Type of tuple entry 1 (reference) is set to
154 // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
155 // or 2) a "dummy" sequence type:
156 // views::repeat_n(sequence_alphabet{}, size_t{}) | std::views::transform(detail::access_restrictor_fn{})
157 // Type of tuple entry 2 (query) is set to
158 // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
159 // or 2) a "dummy" sequence type:
160};
162
163// ---------------------------------------------------------------------------------------------------------------------
164// sam_file_input_default_traits
165// ---------------------------------------------------------------------------------------------------------------------
166
182template <typename ref_sequences_t = ref_info_not_given, typename ref_ids_t = std::deque<std::string>>
184{
192
195
197 template <typename _sequence_alphabet>
199
201 template <typename _id_alphabet>
203
206
208 template <typename _quality_alphabet>
210
212 using ref_sequences = ref_sequences_t;
213
215 using ref_ids = ref_ids_t;
217};
218
219// ---------------------------------------------------------------------------------------------------------------------
220// sam_file_input
221// ---------------------------------------------------------------------------------------------------------------------
222
238template <
240 detail::fields_specialisation selected_field_ids_ = fields<field::seq,
241 field::id,
255{
256public:
262 using traits_type = traits_type_;
264 using selected_field_ids = selected_field_ids_;
266 using valid_formats = valid_formats_;
268 using stream_char_type = char;
270
271private:
273 using dummy_ref_type = decltype(views::repeat_n(typename traits_type::sequence_alphabet{}, size_t{}) |
274 std::views::transform(detail::access_restrictor_fn{}));
275
278 detail::lazy_conditional_t<std::ranges::range<typename traits_type::ref_sequences const>,
279 detail::lazy<std::ranges::range_reference_t,
280 typename traits_type::ref_sequences const>,
282
284 using ref_sequence_sliced_type = decltype(std::declval<ref_sequence_unsliced_type>() | views::slice(0, 0));
285public:
292 using sequence_type = typename traits_type::template sequence_container<
293 typename traits_type::sequence_alphabet>;
295 using id_type = typename traits_type::template id_container<char>;
297 using offset_type = int32_t;
323 using mapq_type = uint8_t;
325 using quality_type = typename traits_type::template quality_container<
326 typename traits_type::quality_alphabet>;
335
336private:
341 decltype(std::declval<sequence_type &>() | views::slice(0, 0))>,
342 typename traits_type::template sequence_container<
344
345public:
348
351 id_type,
357 mapq_type,
359 flag_type,
360 mate_type,
362 header_type *>;
363
386 field::id,
398
399 static_assert([] () constexpr
400 {
401 for (field f : selected_field_ids::as_array)
402 if (!field_ids::contains(f))
403 return false;
404 return true;
405 }(),
406 "You selected a field that is not valid for alignment files, please refer to the documentation "
407 "of sam_file_input::field_ids for the accepted values.");
408
413
423 using const_reference = void;
425 using size_type = size_t;
431 using const_iterator = void;
433 using sentinel = std::default_sentinel_t;
435
440 sam_file_input() = delete;
450 ~sam_file_input() = default;
451
470 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
472 {
473 init_by_filename(std::move(filename));
474 }
475
495 template <input_stream stream_t, sam_file_input_format file_format>
497 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
499 sam_file_input(stream_t & stream,
500 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
501 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
503 {
504 init_by_format<file_format>();
505 }
506
508 template <input_stream stream_t, sam_file_input_format file_format>
510 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
512 sam_file_input(stream_t && stream,
513 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
514 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
515 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
516 {
517 init_by_format<file_format>();
518 }
519
544 typename traits_type::ref_ids & ref_ids,
545 typename traits_type::ref_sequences & ref_sequences,
546 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
548 {
549 // initialize reference information
550 set_references(ref_ids, ref_sequences);
551
552 init_by_filename(std::move(filename));
553 }
554
580 template <input_stream stream_t, sam_file_input_format file_format>
581 sam_file_input(stream_t & stream,
582 typename traits_type::ref_ids & ref_ids,
583 typename traits_type::ref_sequences & ref_sequences,
584 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
585 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
587 {
588 // initialize reference information
589 set_references(ref_ids, ref_sequences);
590
591 init_by_format<file_format>();
592 }
593
595 template <input_stream stream_t, sam_file_input_format file_format>
596 sam_file_input(stream_t && stream,
597 typename traits_type::ref_ids & ref_ids,
598 typename traits_type::ref_sequences & ref_sequences,
599 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
600 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
601 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
602 {
603 // initialize reference information
604 set_references(ref_ids, ref_sequences);
605
606 init_by_format<file_format>();
607 }
608
610 // explicitly delete rvalues for reference information
612 typename traits_type::ref_ids &&,
613 typename traits_type::ref_sequences &&,
614 selected_field_ids const &) = delete;
615
616 template <input_stream stream_t, sam_file_input_format file_format>
617 sam_file_input(stream_t &&,
618 typename traits_type::ref_ids &&,
619 typename traits_type::ref_sequences &&,
620 file_format const &,
621 selected_field_ids const &) = delete;
624
646 {
647 // buffer first record
649 {
652 }
653
654 return {*this};
655 }
656
670 sentinel end() noexcept
671 {
672 return {};
673 }
674
698 reference front() noexcept
699 {
700 return *begin();
701 }
703
706
720 {
721 // make sure header is read
723 {
726 }
727
728 return *header_ptr;
729 }
730
731protected:
733
736 {
737 primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
738 static_cast<std::basic_ifstream<char> *>(primary_stream.get())->open(filename,
739 std::ios_base::in | std::ios::binary);
740 // open stream
741 if (!primary_stream->good())
742 throw file_open_error{"Could not open file " + filename.string() + " for reading."};
743
745 detail::set_format(format, filename);
746 }
747
749 template <typename format_type>
751 {
752 static_assert(list_traits::contains<format_type, valid_formats>,
753 "You selected a format that is not in the valid_formats of this file.");
754
757 }
758
761
772
783
788
792 bool at_end{false};
793
797
801
806 typename traits_type::ref_sequences const * reference_sequences_ptr{nullptr};
807
818 template <std::ranges::forward_range ref_sequences_t>
819 void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
820 {
821 assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
822
823 header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
824 reference_sequences_ptr = &ref_sequences;
825
826 // initialise reference map and ref_dict if ref_ids are non-empty
827 for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
828 {
829 header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]), "");
830
831 if constexpr (std::ranges::contiguous_range<std::ranges::range_reference_t<
832 typename traits_type::ref_ids>> &&
833 std::ranges::sized_range<std::ranges::range_reference_t<typename traits_type::ref_ids>> &&
834 std::ranges::borrowed_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>)
835 {
836 auto && id = header_ptr->ref_ids()[idx];
837 header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx;
838 }
839 else
840 {
841 header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
842 }
843 }
844 }
846
849 {
850 // clear the record
852 detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
853
854 // at end if we could not read further
857 {
858 at_end = true;
859 return;
860 }
861
862 auto call_read_func = [this] (auto & ref_seq_info)
863 {
864 std::visit([&] (auto & f)
865 {
866 f.read_alignment_record(*secondary_stream,
867 options,
868 ref_seq_info,
869 *header_ptr,
871 detail::get_or_ignore<field::seq>(record_buffer),
872 detail::get_or_ignore<field::qual>(record_buffer),
873 detail::get_or_ignore<field::id>(record_buffer),
874 detail::get_or_ignore<field::offset>(record_buffer),
875 detail::get_or_ignore<field::ref_seq>(record_buffer),
876 detail::get_or_ignore<field::ref_id>(record_buffer),
877 detail::get_or_ignore<field::ref_offset>(record_buffer),
878 detail::get_or_ignore<field::alignment>(record_buffer),
879 detail::get_or_ignore<field::cigar>(record_buffer),
880 detail::get_or_ignore<field::flag>(record_buffer),
881 detail::get_or_ignore<field::mapq>(record_buffer),
882 detail::get_or_ignore<field::mate>(record_buffer),
883 detail::get_or_ignore<field::tags>(record_buffer),
884 detail::get_or_ignore<field::evalue>(record_buffer),
885 detail::get_or_ignore<field::bit_score>(record_buffer));
886 }, format);
887 };
888
889 assert(!format.valueless_by_exception());
890
891 if constexpr (!std::same_as<typename traits_type::ref_sequences, ref_info_not_given>)
892 call_read_func(*reference_sequences_ptr);
893 else
894 call_read_func(std::ignore);
895 }
896
898 friend iterator;
899};
900
906template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
907sam_file_input(stream_type && stream, file_format const &, selected_field_ids const &)
908 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
911
913template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
914sam_file_input(stream_type & stream, file_format const &, selected_field_ids const &)
915 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
918
920template <input_stream stream_type, sam_file_input_format file_format>
921sam_file_input(stream_type && stream, file_format const &)
922 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
923 typename sam_file_input<>::selected_field_ids, // actually use the default
925
927template <input_stream stream_type, sam_file_input_format file_format>
928sam_file_input(stream_type & stream, file_format const &)
929 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
930 typename sam_file_input<>::selected_field_ids, // actually use the default
932
934template <std::ranges::forward_range ref_ids_t,
935 std::ranges::forward_range ref_sequences_t,
937sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &)
941 typename sam_file_input<>::valid_formats>; // actually use the default
942
944template <std::ranges::forward_range ref_ids_t,
945 std::ranges::forward_range ref_sequences_t>
946sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &)
949 typename sam_file_input<>::selected_field_ids, // actually use the default
950 typename sam_file_input<>::valid_formats>; // actually use the default
951
953template <input_stream stream_type,
954 std::ranges::forward_range ref_ids_t,
955 std::ranges::forward_range ref_sequences_t,
956 sam_file_input_format file_format,
958sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
963
965template <input_stream stream_type,
966 std::ranges::forward_range ref_ids_t,
967 std::ranges::forward_range ref_sequences_t,
968 sam_file_input_format file_format,
970sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
975
977template <input_stream stream_type,
978 std::ranges::forward_range ref_ids_t,
979 std::ranges::forward_range ref_sequences_t,
980 sam_file_input_format file_format>
981sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &)
984 typename sam_file_input<>::selected_field_ids, // actually use the default
986
988template <input_stream stream_type,
989 std::ranges::forward_range ref_ids_t,
990 std::ranges::forward_range ref_sequences_t,
991 sam_file_input_format file_format>
992sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &)
995 typename sam_file_input<>::selected_field_ids, // actually use the default
998
999} // namespace seqan3
Provides seqan3::aa27, container aliases and string literals.
Provides the seqan3::cigar alphabet.
Provides alphabet adaptations for standard char types.
A combined alphabet that can hold values of either of its alternatives..
Definition: alphabet_variant.hpp:120
Input iterator necessary for providing a range-like interface in input file.
Definition: in_file_iterator.hpp:41
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap..
Definition: dna15.hpp:51
The five letter DNA alphabet of A,C,G,T and the unknown character N..
Definition: dna5.hpp:51
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:83
Quality type for traditional Sanger and modern Illumina Phred scores..
Definition: phred42.hpp:47
Stores the header information of alignment files.
Definition: header.hpp:34
A class for reading alignment files, e.g. SAM, BAM, BLAST ...
Definition: input.hpp:255
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce ref_sequences_t and ref_ids_t, default the rest.
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: input.hpp:670
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: input.hpp:425
std::optional< int32_t > ref_id_type
The type of field::ref_id is fixed to std::optional<int32_t>.
Definition: input.hpp:314
void set_references(typename traits_type::ref_ids &ref_ids, ref_sequences_t &&ref_sequences)
Updates the reference information members and the header.
Definition: input.hpp:819
void const_reference
The const_reference type is void because files are not const-iterable.
Definition: input.hpp:423
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce selected fields, ref_sequences_t and ref_ids_t, default the rest.
sam_file_input(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:499
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: input.hpp:266
decltype(views::repeat_n(typename traits_type::sequence_alphabet{}, size_t{})|std::views::transform(detail::access_restrictor_fn{})) dummy_ref_type
The dummy ref sequence type if no reference information were given.
Definition: input.hpp:274
char stream_char_type
Character type of the stream(s).
Definition: input.hpp:268
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce ref_sequences_t and ref_ids_t, and file format.
typename traits_type::template sequence_container< typename traits_type::sequence_alphabet > sequence_type
The type of field::seq (default std::vector<seqan3::dna5>).
Definition: input.hpp:293
sam_file_input(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: input.hpp:469
bool at_end
File is one position behind the last record.
Definition: input.hpp:792
sam_file_input(stream_type &stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce file_format, and default the rest.
std::default_sentinel_t sentinel
The type returned by end().
Definition: input.hpp:433
void read_next_record()
Tell the format to move to the next record and update the buffer.
Definition: input.hpp:848
sam_file_input(stream_t &stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:581
std::vector< char > stream_buffer
A larger (compared to stl default) stream buffer to use when reading from a file.
Definition: input.hpp:768
stream_ptr_t primary_stream
The primary stream is the user provided stream or the file stream if constructed from filename.
Definition: input.hpp:785
format_type format
The actual std::variant holding a pointer to the detected/selected format.
Definition: input.hpp:799
std::optional< int32_t > ref_offset_type
The type of field::ref_offset is fixed to a std::optional<int32_t>.
Definition: input.hpp:321
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: input.hpp:262
int32_t offset_type
The type of field::offset is fixed to int32_t.
Definition: input.hpp:297
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
sam_file_input_options< typename traits_type::sequence_legal_alphabet > options
The options are public and its members can be set directly.
Definition: input.hpp:705
sam_file_input(stream_type &&stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce file_format, and default the rest.
static void stream_deleter_noop(std::basic_istream< stream_char_type > *)
Stream deleter that does nothing (no ownership assumed).
Definition: input.hpp:780
detail::lazy_conditional_t< std::ranges::range< typename traits_type::ref_sequences const >, detail::lazy< std::ranges::range_reference_t, typename traits_type::ref_sequences const >, dummy_ref_type > ref_sequence_unsliced_type
The unsliced ref sequence type if reference information were given.
Definition: input.hpp:281
typename traits_type::template id_container< char > id_type
The type of field::id (default std::string by default).
Definition: input.hpp:295
sam_file_input & operator=(sam_file_input &&)=default
Move assignment is defaulted.
friend iterator
Befriend iterator so it can access the buffers.
Definition: input.hpp:898
stream_ptr_t secondary_stream
The secondary stream is a compression layer on the primary or just points to the primary (no compress...
Definition: input.hpp:787
typename traits_type::template quality_container< typename traits_type::quality_alphabet > quality_type
The type of field::qual (default std::vector<seqan3::phred42>).
Definition: input.hpp:326
sam_file_input(stream_t &&stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: input.hpp:596
std::tuple< gap_decorator< ref_sequence_type >, alignment_query_type > alignment_type
The type of field::alignment (default: std::pair<std::vector<gapped<dna5>>, std::vector<gapped<dna5>>...
Definition: input.hpp:347
sam_record< detail::select_types_with_ids_t< field_types, field_ids, selected_field_ids >, selected_field_ids > record_type
The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field typ...
Definition: input.hpp:411
typename detail::variant_from_tags< valid_formats, detail::sam_file_input_format_exposer >::type format_type
Type of the format, a std::variant over the valid_formats.
Definition: input.hpp:796
sam_file_input()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
iterator begin()
Returns an iterator to current position in the file.
Definition: input.hpp:645
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: input.hpp:264
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
bool first_record_was_read
Tracks whether the very first record is buffered when calling begin().
Definition: input.hpp:790
sam_file_input(std::filesystem::path filename, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename and given additional reference information.
Definition: input.hpp:543
sam_file_input & operator=(sam_file_input const &)=delete
Copy assignment is explicitly deleted because you cannot have multiple access to the same file.
record_type record_buffer
Buffer for a single record.
Definition: input.hpp:766
sam_file_input(sam_file_input &&)=default
Move construction is defaulted.
void init_by_format()
/brief Initialisation based on a format (construction via stream).
Definition: input.hpp:750
void const_iterator
The const iterator type is void because files are not const-iterable.
Definition: input.hpp:431
std::streampos position_buffer
Buffer for the previous record position.
Definition: input.hpp:770
std::unique_ptr< header_type > header_ptr
The file header object.
Definition: input.hpp:760
header_type & header()
Access the file's header.
Definition: input.hpp:719
sam_file_input(sam_file_input const &)=delete
Copy construction is explicitly deleted because you cannot have multiple access to the same file.
uint8_t mapq_type
The type of field::mapq is fixed to uint8_t.
Definition: input.hpp:323
sam_flag flag_type
The type of field::flag is fixed to seqan3::sam_flag.
Definition: input.hpp:328
sam_file_input(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: input.hpp:512
decltype(std::declval< ref_sequence_unsliced_type >()|views::slice(0, 0)) ref_sequence_sliced_type
The ref sequence type if reference information were given.
Definition: input.hpp:284
sam_file_input(stream_type &&stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format > >
Deduce selected fields, file_format, and default the rest.
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
static void stream_deleter_default(std::basic_istream< stream_char_type > *ptr)
Stream deleter with default behaviour (ownership assumed).
Definition: input.hpp:782
~sam_file_input()=default
Destructor is defaulted.
std::tuple< ref_id_type, ref_offset_type, int32_t > mate_type
The type of field::mate is fixed to std::tuple<ref_id_type, ref_offset_type, int32_t>).
Definition: input.hpp:332
void init_by_filename(std::filesystem::path filename)
Definition: input.hpp:735
reference front() noexcept
Return the record we are currently at in the file.
Definition: input.hpp:698
traits_type::ref_sequences const * reference_sequences_ptr
A pointer to the reference sequence information if given on construction.
Definition: input.hpp:806
sam_file_input(stream_type &stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format > >
Deduce selected fields, file_format, and default the rest.
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:336
Auxiliary concept that checks whether a type is a specialisation of seqan3::fields.
Definition: record.hpp:35
Auxiliary concept that checks whether a type is a seqan3::type_list and all types meet seqan3::sam_fi...
Definition: input_format_concept.hpp:255
T data(T... args)
Provides auxiliary data structures and functions for seqan3::record and seqan3::fields.
Provides seqan3::dna15, container aliases and string literals.
Provides seqan3::dna5, container aliases and string literals.
Provides the seqan3::format_bam.
Provides the seqan3::format_sam.
Provides seqan3::gap_decorator.
T get(T... args)
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:76
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
void set_format(format_variant_type &format, std::filesystem::path const &file_name)
Sets the file format according to the file name extension.
Definition: misc.hpp:68
auto make_secondary_istream(std::basic_istream< char_t > &primary_stream, std::filesystem::path &filename) -> std::unique_ptr< std::basic_istream< char_t >, std::function< void(std::basic_istream< char_t > *)> >
Depending on the magic bytes of the given stream, return a decompression stream or forward the primar...
Definition: misc_input.hpp:81
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ cigar
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ header_ptr
A pointer to the seqan3::sam_file_header object storing header information.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
@ tags
The optional tags in the SAM format, stored in a dictionary.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr bool contains
Whether a type occurs in a type list or not.
Definition: traits.hpp:255
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:495
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:183
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
Provides the seqan3::detail::in_file_iterator class template.
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be explicitly converted to to.
The generic concept for alignment file input formats.
The requirements a traits_type for seqan3::sam_file_input must meet.
A more refined container concept than seqan3::container.
Refines seqan3::alphabet and adds assignability.
A concept that indicates whether a writable alphabet represents quality scores.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for input.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::phred42 quality scores.
Provides quality alphabet composites.
The <ranges> header from C++20's standard library.
Provides seqan3::views::repeat_n.
Provides seqan3::sam_file_input_format and auxiliary classes.
Provides seqan3::sam_record.
Provides helper data structures for the seqan3::sam_file_output.
T size(T... args)
Provides seqan3::views::slice.
An empty type whose only purpose is to hold an uninstantiated template plus its arguments.
Definition: lazy_conditional.hpp:33
Internal class used to expose the actual format interface to read alignment records from the file.
Definition: input_format_concept.hpp:47
Base class to deduce the std::variant type from format tags.
Definition: misc.hpp:31
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
static constexpr bool contains(field f)
Whether a field is contained in the parameter pack.
Definition: record.hpp:149
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition: exception.hpp:39
void clear() noexcept(noexcept(std::apply(expander, std::declval< record & >())))
Clears containers that provide .clear() and (re-)initialises all other elements with = {}.
Definition: record.hpp:235
The default traits for seqan3::sam_file_input.
Definition: input.hpp:184
ref_ids_t ref_ids
The type of the reference identifiers is deduced on construction.
Definition: input.hpp:215
ref_sequences_t ref_sequences
The type of the reference sequences is deduced on construction.
Definition: input.hpp:212
Type that contains multiple types.
Definition: type_list.hpp:29
Provides seqan3::detail::transformation_trait_or.
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
T visit(T... args)