module Data.ByteString.LCP where
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as C
import qualified Data.ByteString.Unsafe as B
import Data.Function
import Data.Int
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as UM
import Data.ByteString.SuffixArray
import My.Prelude
newtype LCPArray = LCPArray {LCPArray -> Vector Int
getLCPArray :: U.Vector Int}
instance Show LCPArray where
show :: LCPArray -> String
show = Vector Int -> String
forall a. Show a => a -> String
show (Vector Int -> String)
-> (LCPArray -> Vector Int) -> LCPArray -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LCPArray -> Vector Int
getLCPArray
viewLCPArray :: B.ByteString -> SuffixArray Int32 -> LCPArray -> [String]
viewLCPArray :: ByteString -> SuffixArray Int32 -> LCPArray -> [String]
viewLCPArray ByteString
bs (SuffixArray Vector Int32
sa) (LCPArray Vector Int
lcp) =
((Int, Int) -> String) -> [(Int, Int)] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\(Int
i, Int
l) -> ByteString -> String
C.unpack (ByteString -> String)
-> (ByteString -> ByteString) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
C.take Int
l (ByteString -> String) -> ByteString -> String
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
C.drop (Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> Int) -> Int32 -> Int
forall a b. (a -> b) -> a -> b
$ Vector Int32
sa Vector Int32 -> Int -> Int32
forall a. Unbox a => Vector a -> Int -> a
U.! Int
i) ByteString
bs)
([(Int, Int)] -> [String])
-> (Vector (Int, Int) -> [(Int, Int)])
-> Vector (Int, Int)
-> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Int, Int) -> [(Int, Int)]
forall a. Unbox a => Vector a -> [a]
U.toList
(Vector (Int, Int) -> [String]) -> Vector (Int, Int) -> [String]
forall a b. (a -> b) -> a -> b
$ Vector Int -> Vector (Int, Int)
forall a. Unbox a => Vector a -> Vector (Int, a)
U.indexed Vector Int
lcp
buildLCPArray :: B.ByteString -> SuffixArray Int32 -> LCPArray
buildLCPArray :: ByteString -> SuffixArray Int32 -> LCPArray
buildLCPArray ByteString
bs SuffixArray Int32
sa = Vector Int -> LCPArray
LCPArray (Vector Int -> LCPArray) -> Vector Int -> LCPArray
forall a b. (a -> b) -> a -> b
$
(forall s. ST s (MVector s Int)) -> Vector Int
forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
U.create ((forall s. ST s (MVector s Int)) -> Vector Int)
-> (forall s. ST s (MVector s Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
MVector s Int
lcp <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
UM.unsafeNew Int
n
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
lcp Int
0 Int
0
(Int -> Int -> Int -> ST s Int) -> Int -> Vector Int -> ST s ()
forall (m :: * -> *) b a.
(Monad m, Unbox b) =>
(a -> Int -> b -> m a) -> a -> Vector b -> m ()
U.ifoldM'_
( \Int
h Int
i Int
r -> do
let !j :: Int
j = Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> Int) -> Int32 -> Int
forall a b. (a -> b) -> a -> b
$ SuffixArray Int32 -> Int -> Int32
forall a. Unbox a => SuffixArray a -> Int -> a
indexSA SuffixArray Int32
sa (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
h' :: Int
h' =
((Int -> Int) -> Int -> Int) -> Int -> Int
forall a. (a -> a) -> a
fix
( \Int -> Int
loop !Int
d ->
if Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n
Bool -> Bool -> Bool
&& Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n
Bool -> Bool -> Bool
&& ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d)
then Int -> Int
loop (Int
d Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
else Int
d
)
(Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
0 (Int
h Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
lcp (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int
h'
Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
h'
)
Int
0
(Vector Int -> ST s ()) -> Vector Int -> ST s ()
forall a b. (a -> b) -> a -> b
$ Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
U.init Vector Int
rank
MVector s Int -> ST s (MVector s Int)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
lcp
where
!n :: Int
n = ByteString -> Int
B.length ByteString
bs
!rank :: Vector Int
rank = (forall s. ST s (MVector s Int)) -> Vector Int
forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
U.create ((forall s. ST s (MVector s Int)) -> Vector Int)
-> (forall s. ST s (MVector s Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
MVector s Int
buf <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
UM.unsafeNew (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Int -> (Int -> ST s ()) -> ST s ()
forall (m :: * -> *). Monad m => Int -> (Int -> m ()) -> m ()
rep (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
buf (Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> Int) -> Int32 -> Int
forall a b. (a -> b) -> a -> b
$ SuffixArray Int32 -> Int -> Int32
forall a. Unbox a => SuffixArray a -> Int -> a
indexSA SuffixArray Int32
sa Int
i) Int
i
MVector s Int -> ST s (MVector s Int)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
buf
naiveLCP :: B.ByteString -> B.ByteString -> Int
naiveLCP :: ByteString -> ByteString -> Int
naiveLCP ByteString
xs ByteString
ys = Int -> Int
go Int
0
where
!n :: Int
n = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (ByteString -> Int
B.length ByteString
xs) (ByteString -> Int
B.length ByteString
ys)
go :: Int -> Int
go !Int
i
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n
, ByteString -> Int -> Word8
B.unsafeIndex ByteString
xs Int
i Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString -> Int -> Word8
B.unsafeIndex ByteString
ys Int
i =
Int -> Int
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
| Bool
otherwise = Int
i