1
- {- Copyright 2015-2019 NGLess Authors
1
+ {- Copyright 2015-2020 NGLess Authors
2
2
- License: MIT
3
3
-}
4
4
7
7
module Interpretation.Select
8
8
( executeSelect
9
9
, executeMappedReadMethod
10
- , splitSamlines3
11
10
, fixCigar
11
+ , reinjectSequences
12
12
) where
13
13
14
14
import qualified Data.ByteString as B
@@ -25,7 +25,7 @@ import qualified Data.Text.Encoding as TE
25
25
import Data.Bits (Bits (.. ))
26
26
import Control.Monad.Except (throwError )
27
27
import Data.Either.Combinators (fromRight )
28
- import Data.List (foldl' , find )
28
+ import Data.List (foldl' )
29
29
import Data.Either.Extra (eitherToMaybe )
30
30
import Data.Tuple.Extra (fst3 )
31
31
import Data.Ratio (Ratio , (%) )
@@ -34,7 +34,7 @@ import Data.Maybe
34
34
35
35
import Data.Sam
36
36
import FileManagement
37
- import FileOrStream
37
+ import FileOrStream ( asSamStream , FileOrStream ( .. ))
38
38
import Language
39
39
import Output
40
40
@@ -81,24 +81,52 @@ _parseConditions args = do
81
81
-- 2) we want to keep the full sequence, so we want to use soft trimming (if
82
82
-- at all possible)
83
83
matchConditions :: Bool -> MatchCondition -> [(SamLine ,B. ByteString )] -> NGLess [(SamLine , B. ByteString )]
84
- matchConditions doReinject conds sg = reinjectSequences doReinject (matchConditions' conds sg)
84
+ matchConditions doReinject conds sg =
85
+ let sg' = matchConditions' conds sg
86
+ in if doReinject
87
+ then reinjectSequencesIfNeeded sg sg'
88
+ else pure sg'
89
+
90
+ toStrictBS :: BB. Builder -> B. ByteString
91
+ toStrictBS = BL. toStrict . BB. toLazyByteString
92
+
93
+
94
+ reinjectSequencesIfNeeded :: [(SamLine ,B. ByteString )] -> [(SamLine ,B. ByteString )] -> NGLess [(SamLine ,B. ByteString )]
95
+ reinjectSequencesIfNeeded _ filtered@ ((SamHeader {},_): _) = return filtered
96
+ reinjectSequencesIfNeeded orig filtered =
97
+ if needsReinject (fmap fst filtered)
98
+ then do
99
+ filtered' <- reinjectSequences (fmap fst orig) (fmap fst filtered)
100
+ return $ fmap (\ s -> (s, toStrictBS $ encodeSamLine s)) filtered'
101
+ else return filtered
85
102
where
86
- reinjectSequences True f@ ((s@ SamLine {}, _): rs)
87
- | not (any (hasSequence . fst ) f) && any (hasSequence . fst ) sg
88
- = do
89
- s' <- addSequence s
90
- return ((s', toStrictBS $ encodeSamLine s'): rs)
91
- reinjectSequences _ f = return f
92
-
93
- toStrictBS :: BB. Builder -> B. ByteString
94
- toStrictBS = BL. toStrict . BB. toLazyByteString
95
-
96
- addSequence s = case find hasSequence (fst <$> sg) of
97
- Just s'@ SamLine {} -> do
98
- cigar' <- fixCigar (samCigar s) (B. length $ samSeq s')
99
- return s { samSeq = samSeq s', samQual = samQual s', samCigar = cigar' }
100
- _ -> return s
103
+ needsReinject :: [SamLine ] -> Bool
104
+ needsReinject fs = let (fs1, fs2, fs0) = splitSamlines3 fs
105
+ in needsReinject' fs1 || needsReinject' fs2 || needsReinject' fs0
106
+ needsReinject' :: [SamLine ] -> Bool
107
+ needsReinject' [] = False
108
+ needsReinject' xs = not (any hasSequence xs)
101
109
110
+ -- See note above on "Sequence reinjection" about why this function is necessary
111
+ reinjectSequences :: [SamLine ] -> [SamLine ] -> NGLess [SamLine ]
112
+ reinjectSequences original filtered = case (splitSamlines3 original, splitSamlines3 filtered) of
113
+ ((o1, o2, os), (f1, f2, fs)) -> do
114
+ r1 <- reinjectSequences' o1 f1
115
+ r2 <- reinjectSequences' o2 f2
116
+ ss <- reinjectSequences' os fs
117
+ return (r1 ++ r2 ++ ss)
118
+ reinjectSequences' :: [SamLine ] -> [SamLine ] -> NGLess [SamLine ]
119
+ reinjectSequences' original f@ (s@ SamLine {}: rs)
120
+ | not (any hasSequence f) = let
121
+ fixed = flip map (filter hasSequence original) $ \ s' -> do
122
+ cigar <- fixCigar (samCigar s) (samLength s')
123
+ return (s { samSeq = samSeq s', samQual = samQual s', samCigar = cigar }: rs)
124
+ asum' [] = return f
125
+ asum' (x: xs) = case x of
126
+ Right {} -> x
127
+ Left {} -> asum' xs
128
+ in asum' fixed
129
+ reinjectSequences' _ f = return f
102
130
-- See note above on "Sequence reinjection" about why this function is necessary
103
131
fixCigar :: B. ByteString -> Int -> NGLess B. ByteString
104
132
fixCigar prev n = do
0 commit comments