-
Notifications
You must be signed in to change notification settings - Fork 3
add Data.Align.Text #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| 0.2 | ||
|
|
||
| * Add 'text' dependency | ||
| * Add 'Data.Align.Text' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| {-# language RecordWildCards #-} | ||
| {-# language OverloadedStrings #-} | ||
| module Data.Align.Text ( | ||
| -- * Text | ||
| align | ||
| ) where | ||
|
|
||
| import qualified Data.List as L (maximumBy) | ||
| import Data.Ord (comparing) | ||
|
|
||
| import qualified Data.Map as M | ||
| import qualified Data.Text as T (Text, length, index) | ||
| import Control.Monad.Trans.State.Strict | ||
|
|
||
|
|
||
| import Data.Align.Types | ||
|
|
||
|
|
||
| -- | 'Data.Align.align', specialized to 'Data.Text.Text' | ||
| align :: (Num s, Ord s) => | ||
| AlignConfig Char s | ||
| -> T.Text -- ^ Left sequence | ||
| -> T.Text -- ^ Right sequence | ||
| -> Trace Char s | ||
| align AlignConfig{..} as bs = revTrace $ evalState (go p) M.empty | ||
| where | ||
| p = (lastIndex as, lastIndex bs) | ||
| revTrace (Trace s t) = Trace s (reverse t) | ||
| lastIndex v = T.length v - 1 | ||
| -- | ||
| go ij = do | ||
| res <- gets (M.lookup ij) | ||
| case res of | ||
| Just r -> return r | ||
| Nothing -> do | ||
| newRes <- pgo ij | ||
| modify (M.insert ij newRes) | ||
| return newRes | ||
| -- | ||
| pgo (i, j) | ||
| | i == (-1) || j == (-1) = return $ | ||
| if i == j then Trace 0 [] | ||
| else if i == (-1) | ||
| then skipInit j stepRight bs | ||
| else skipInit i stepLeft as | ||
| | otherwise = do | ||
| let a = as `T.index` i | ||
| b = bs `T.index` j | ||
| diag <- go (i-1,j-1) `tappend` Trace (acPairScore a b) [stepBoth a b] | ||
| a_gap <- go (i-1, j) `tappend` Trace ac_gap_penalty [stepLeft a] | ||
| b_gap <- go ( i,j-1) `tappend` Trace ac_gap_penalty [stepRight b] | ||
| return $ L.maximumBy (comparing traceScore) [diag, a_gap, b_gap] | ||
| -- | ||
| skipInit idx stepFun xs = | ||
| let score = ac_initial_gap_penalty * fromIntegral (idx+1) | ||
| tr = reverse [stepFun (xs `T.index` xi) | xi <- [0..idx]] | ||
| in Trace score tr | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| module Data.Align.Types ( | ||
| AlignConfig(..) | ||
| , alignConfig | ||
| , localAlignConfig | ||
| , Step | ||
| , stepLeft, stepRight, stepBoth | ||
| , isMatch, isLeft, isRight | ||
| , Trace(..) | ||
| , tappend | ||
| ) where | ||
|
|
||
|
|
||
| data AlignConfig a s = AlignConfig | ||
| { acPairScore :: a -> a -> s | ||
| , ac_initial_gap_penalty :: s | ||
| , ac_gap_penalty :: s | ||
| } | ||
|
|
||
|
|
||
| -- | Configures the scores used when aligning. | ||
| -- | ||
| -- The gap scores should be negative in order to be penalties. | ||
| alignConfig :: (a -> a -> s) -- ^ Scoring function | ||
| -> s -- ^ Initial gap score | ||
| -> s -- ^ Gap score | ||
| -> AlignConfig a s | ||
| alignConfig = AlignConfig | ||
|
|
||
| -- | Configuration for local alignment. | ||
| localAlignConfig | ||
| :: Num s | ||
| => (a -> a -> s) -- ^ Scoring function. | ||
| -> s -- ^ Gap score. | ||
| -> AlignConfig a s | ||
| localAlignConfig f = alignConfig f 0 | ||
|
|
||
| -- | Either an unmatched item or a match. | ||
| type Step a = Either (Either a a) (a, a) | ||
|
|
||
| stepLeft :: a -> Either (Either a b1) b2 | ||
| stepLeft = Left . Left | ||
| stepRight :: b1 -> Either (Either a b1) b2 | ||
| stepRight = Left . Right | ||
| stepBoth :: a1 -> b -> Either a2 (a1, b) | ||
| stepBoth a b = Right (a,b) | ||
|
|
||
| isMatch :: Step a -> Bool | ||
| isMatch (Right _) = True | ||
| isMatch _ = False | ||
|
|
||
| isLeft :: Step a -> Bool | ||
| isLeft (Left (Left _)) = True | ||
| isLeft _ = False | ||
|
|
||
| isRight :: Step a -> Bool | ||
| isRight (Left (Right _)) = True | ||
| isRight _ = False | ||
|
|
||
| -- | The result of the alignment. | ||
| data Trace a s = Trace | ||
| { traceScore :: s | ||
| , trace :: [Step a] | ||
| } | ||
|
|
||
| instance Semigroup s => Semigroup (Trace a s) where | ||
| (Trace s1 t1) <> (Trace s2 t2) = Trace (s1 <> s2) (t1 ++ t2) | ||
|
|
||
| instance (Show a, Show s) => Show (Trace a s) where | ||
| show (Trace s t) = "Trace(score = " ++ show s ++ ", steps = " ++ show t ++ ")" | ||
|
|
||
| tappend :: (Functor f, Num s) => | ||
| f (Trace a s) -> Trace a s -> f (Trace a s) | ||
| mt `tappend` (Trace z (t:_)) = | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please move to be a local helper at callsite, as mentioned in #2 .
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, but we're now using this in Data.Align.Text as well. Perhaps an INLINE pragma would work instead? |
||
| fmap (\(Trace s ts) -> Trace (s+z) (t:ts)) mt | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| resolver: lts-14.27 | ||
|
|
||
| packages: | ||
| - . |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you have even anecdotal knowledge, please add a few lines of comment about expectable gains vs using stringy version.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think better still would be to set up a benchmark suite.