Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
0.2

* Add 'text' dependency
* Add 'Data.Align.Text'
7 changes: 6 additions & 1 deletion align.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: align
version: 0.1.1.2
version: 0.2
synopsis: Sequence alignment algorithms.
description: Global or local sequence alignment, not exclusively for text.
license: BSD3
Expand All @@ -9,15 +9,20 @@ maintainer: palotai.robin@gmail.com
category: Data
build-type: Simple
cabal-version: >=1.10
extra-source-files: CHANGELOG.md

source-repository head
type: git
location: https://github.com/robinp/align.git

library
exposed-modules: Data.Align,
Data.Align.Text,
Data.Align.Demo
other-modules: Data.Align.Types
build-depends: base >=4.6 && <5,
containers,
text,
transformers,
vector >=0.10
hs-source-dirs: src
Expand Down
66 changes: 8 additions & 58 deletions src/Data/Align.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -Wno-name-shadowing #-}
-- | Collection of functions for global, local and multi-sequence alignment.
module Data.Align
(
Expand All @@ -20,66 +21,16 @@ module Data.Align
) where

import Control.Monad.Trans.State.Strict
import Data.Function (fix, on)
import Data.Function (on)
import qualified Data.List as L
import qualified Data.Map.Strict as M
import Data.Maybe (fromMaybe)
import Data.Ord
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G

data AlignConfig a s = AlignConfig
{ acPairScore :: a -> a -> s
, ac_initial_gap_penalty :: s
, ac_gap_penalty :: s
}

-- | Configures the scores used when aligning.
-- The gap scores should be negative in order to be penalties.
alignConfig :: (a -> a -> s) -- ^ Scoring function.
-> s -- ^ Initial gap score.
-> s -- ^ Gap score.
-> AlignConfig a s
alignConfig = AlignConfig

-- | Configuration for local alignment.
localAlignConfig
:: Num s
=> (a -> a -> s) -- ^ Scoring function.
-> s -- ^ Gap score.
-> AlignConfig a s
localAlignConfig f = alignConfig f 0

-- | Either an unmatched item or a match.
type Step a = Either (Either a a) (a, a)

stepLeft = Left . Left
stepRight = Left . Right
stepBoth a b = Right (a,b)

isMatch :: Step a -> Bool
isMatch (Right _) = True
isMatch _ = False

isLeft :: Step a -> Bool
isLeft (Left (Left _)) = True
isLeft _ = False

isRight :: Step a -> Bool
isRight (Left (Right _)) = True
isRight _ = False

-- | The result of the alignment.
data Trace a s = Trace
{ traceScore :: s
, trace :: [Step a]
}

instance (Show a, Show s) => Show (Trace a s) where
show (Trace s t) = "Trace(score = " ++ show s ++ ", steps = " ++ show t ++ ")"
import Data.Align.Types

mt `tappend` (Trace z (t:_)) =
fmap (\(Trace s ts) -> Trace (s+z) (t:ts)) mt

-- | Utility for displaying a Char-based alignment.
debugAlign :: [Step Char] -> String
Expand Down Expand Up @@ -108,18 +59,17 @@ debugAlign = go [] []
-- -applied
align :: (G.Vector v a, Num s, Ord s)
=> AlignConfig a s
-> v a -- ^ Left sequence.
-> v a -- ^ Right sequence.
-> v a -- ^ Left sequence
-> v a -- ^ Right sequence
-> Trace a s
align AlignConfig{..} as bs =
let p = (lastIndex as, lastIndex bs)
in revTrace $ evalState (go p) M.empty
align AlignConfig{..} as bs = revTrace $ evalState (go p) M.empty
where
p = (lastIndex as, lastIndex bs)
revTrace (Trace s t) = Trace s (reverse t)
lastIndex v = G.length v - 1
--
go p = do
res <- gets $ M.lookup p
res <- gets (M.lookup p)
case res of
Just r -> return r
Nothing -> do
Expand Down
10 changes: 10 additions & 0 deletions src/Data/Align/Demo.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,24 @@ sampleGlobalConfig = alignConfig
(-0.5)
(-1)

testIn1, testIn2 :: String
testIn1 = "dopple"
testIn2 = "applied"

alignedGlobal :: Trace Char Double
alignedGlobal =
align sampleGlobalConfig (V.fromList testIn1) (V.fromList testIn2)

debug :: Trace Char s -> IO ()
debug = putStrLn . debugAlign . trace

printAlignedGlobal :: IO ()
printAlignedGlobal = debug alignedGlobal

-- * Multi-sequence fun.

-- | Example from https://www.biostat.wisc.edu/bmi576/lectures/multiple-alignment.pdf
nucs :: [String]
nucs =
[ "ATTGCCATT"
, "ATGGCCATT"
Expand All @@ -33,15 +38,20 @@ nucs =
, "ATTGCCGATT"
]

alignNuc :: Eq a => [a] -> [a] -> Trace a Double
alignNuc a b = align sampleGlobalConfig (V.fromList a) (V.fromList b)

alignedNucPairs :: [Trace Char Double]
alignedNucPairs = do
n <- tail nucs
return $ alignNuc (head nucs) n

printAlignedNucPairs :: IO ()
printAlignedNucPairs = mapM_ (\x -> debug x >> putStrLn "") alignedNucPairs

alignedNucStar :: MultiTrace Integer Char Double
alignedNucStar =
centerStar sampleGlobalConfig $ zip [1..] (map V.fromList nucs)

printAlignedNucStar :: IO ()
printAlignedNucStar = putStrLn . debugMultiAlign . multiTrace $ alignedNucStar
57 changes: 57 additions & 0 deletions src/Data/Align/Text.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{-# language RecordWildCards #-}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you have even anecdotal knowledge, please add a few lines of comment about expectable gains vs using stringy version.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think better still would be to set up a benchmark suite.

{-# language OverloadedStrings #-}
module Data.Align.Text (
-- * Text
align
) where

import qualified Data.List as L (maximumBy)
import Data.Ord (comparing)

import qualified Data.Map as M
import qualified Data.Text as T (Text, length, index)
import Control.Monad.Trans.State.Strict


import Data.Align.Types


-- | 'Data.Align.align', specialized to 'Data.Text.Text'
align :: (Num s, Ord s) =>
AlignConfig Char s
-> T.Text -- ^ Left sequence
-> T.Text -- ^ Right sequence
-> Trace Char s
align AlignConfig{..} as bs = revTrace $ evalState (go p) M.empty
where
p = (lastIndex as, lastIndex bs)
revTrace (Trace s t) = Trace s (reverse t)
lastIndex v = T.length v - 1
--
go ij = do
res <- gets (M.lookup ij)
case res of
Just r -> return r
Nothing -> do
newRes <- pgo ij
modify (M.insert ij newRes)
return newRes
--
pgo (i, j)
| i == (-1) || j == (-1) = return $
if i == j then Trace 0 []
else if i == (-1)
then skipInit j stepRight bs
else skipInit i stepLeft as
| otherwise = do
let a = as `T.index` i
b = bs `T.index` j
diag <- go (i-1,j-1) `tappend` Trace (acPairScore a b) [stepBoth a b]
a_gap <- go (i-1, j) `tappend` Trace ac_gap_penalty [stepLeft a]
b_gap <- go ( i,j-1) `tappend` Trace ac_gap_penalty [stepRight b]
return $ L.maximumBy (comparing traceScore) [diag, a_gap, b_gap]
--
skipInit idx stepFun xs =
let score = ac_initial_gap_penalty * fromIntegral (idx+1)
tr = reverse [stepFun (xs `T.index` xi) | xi <- [0..idx]]
in Trace score tr
74 changes: 74 additions & 0 deletions src/Data/Align/Types.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
module Data.Align.Types (
AlignConfig(..)
, alignConfig
, localAlignConfig
, Step
, stepLeft, stepRight, stepBoth
, isMatch, isLeft, isRight
, Trace(..)
, tappend
) where


data AlignConfig a s = AlignConfig
{ acPairScore :: a -> a -> s
, ac_initial_gap_penalty :: s
, ac_gap_penalty :: s
}


-- | Configures the scores used when aligning.
--
-- The gap scores should be negative in order to be penalties.
alignConfig :: (a -> a -> s) -- ^ Scoring function
-> s -- ^ Initial gap score
-> s -- ^ Gap score
-> AlignConfig a s
alignConfig = AlignConfig

-- | Configuration for local alignment.
localAlignConfig
:: Num s
=> (a -> a -> s) -- ^ Scoring function.
-> s -- ^ Gap score.
-> AlignConfig a s
localAlignConfig f = alignConfig f 0

-- | Either an unmatched item or a match.
type Step a = Either (Either a a) (a, a)

stepLeft :: a -> Either (Either a b1) b2
stepLeft = Left . Left
stepRight :: b1 -> Either (Either a b1) b2
stepRight = Left . Right
stepBoth :: a1 -> b -> Either a2 (a1, b)
stepBoth a b = Right (a,b)

isMatch :: Step a -> Bool
isMatch (Right _) = True
isMatch _ = False

isLeft :: Step a -> Bool
isLeft (Left (Left _)) = True
isLeft _ = False

isRight :: Step a -> Bool
isRight (Left (Right _)) = True
isRight _ = False

-- | The result of the alignment.
data Trace a s = Trace
{ traceScore :: s
, trace :: [Step a]
}

instance Semigroup s => Semigroup (Trace a s) where
(Trace s1 t1) <> (Trace s2 t2) = Trace (s1 <> s2) (t1 ++ t2)

instance (Show a, Show s) => Show (Trace a s) where
show (Trace s t) = "Trace(score = " ++ show s ++ ", steps = " ++ show t ++ ")"

tappend :: (Functor f, Num s) =>
f (Trace a s) -> Trace a s -> f (Trace a s)
mt `tappend` (Trace z (t:_)) =
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move to be a local helper at callsite, as mentioned in #2 .

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, but we're now using this in Data.Align.Text as well. Perhaps an INLINE pragma would work instead?

fmap (\(Trace s ts) -> Trace (s+z) (t:ts)) mt
4 changes: 4 additions & 0 deletions stack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
resolver: lts-14.27

packages:
- .