{-# LANGUAGE BangPatterns #-}
module Config.LexerUtils
(
AlexInput
, alexGetByte
, LexerMode(..)
, startString
, nestMode
, endMode
, token
, token_
, section
, number
, untermString
, eofAction
, errorAction
) where
import Data.Char (GeneralCategory(..), generalCategory, isAscii, isSpace, ord)
import Data.Text (Text)
import Data.Word (Word8)
import qualified Data.Text as Text
import Config.Tokens
import qualified Config.NumberParser
type AlexInput = Located Text
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (Located Position
p Text
cs)
= do (Char
c,Text
cs') <- Text -> Maybe (Char, Text)
Text.uncons Text
cs
let !b :: Word8
b = Char -> Word8
byteForChar Char
c
!inp :: AlexInput
inp = Position -> Text -> AlexInput
forall a. Position -> a -> Located a
Located (Position -> Char -> Position
move Position
p Char
c) Text
cs'
(Word8, AlexInput) -> Maybe (Word8, AlexInput)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8
b, AlexInput
inp)
move :: Position -> Char -> Position
move :: Position -> Char -> Position
move (Position Int
ix Int
line Int
column) Char
c =
case Char
c of
Char
'\t' -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
line (((Int
column Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
8) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Char
'\n' -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
1
Char
_ -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
line (Int
column Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
eofAction :: Position -> LexerMode -> [Located Token]
eofAction :: Position -> LexerMode -> [Located Token]
eofAction Position
eofPosn LexerMode
st =
case LexerMode
st of
InComment Position
posn LexerMode
_ -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermComment)]
InCommentString Position
posn LexerMode
_ -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermComment)]
InString Position
posn Text
_ -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermString)]
LexerMode
InNormal -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located (Position -> Position
park Position
eofPosn) Token
EOF]
park :: Position -> Position
park :: Position -> Position
park Position
pos
| Position -> Int
posColumn Position
pos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Position
pos { posColumn :: Int
posColumn = Int
0 }
| Bool
otherwise = Position
pos { posColumn :: Int
posColumn = Int
0, posLine :: Int
posLine = Position -> Int
posLine Position
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 }
errorAction :: AlexInput -> [Located Token]
errorAction :: AlexInput -> [Located Token]
errorAction AlexInput
inp = [(Text -> Token) -> AlexInput -> Located Token
forall a b. (a -> b) -> Located a -> Located b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Error -> Token
Error (Error -> Token) -> (Text -> Error) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Error
NoMatch (Char -> Error) -> (Text -> Char) -> Text -> Error
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HasCallStack => Text -> Char
Text -> Char
Text.head) AlexInput
inp]
data LexerMode
= InNormal
| !Position !LexerMode
| !Position !LexerMode
| InString !Position !Text
type Action =
Int ->
Located Text ->
LexerMode ->
(LexerMode, [Located Token])
token :: (Text -> Token) -> Action
token :: (Text -> Token) -> Action
token Text -> Token
f Int
len AlexInput
match LexerMode
st = (LexerMode
st, [(Text -> Token) -> AlexInput -> Located Token
forall a b. (a -> b) -> Located a -> Located b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> Token
f (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
Text.take Int
len) AlexInput
match])
token_ :: Token -> Action
token_ :: Token -> Action
token_ = (Text -> Token) -> Action
token ((Text -> Token) -> Action)
-> (Token -> Text -> Token) -> Token -> Action
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Token -> Text -> Token
forall a b. a -> b -> a
const
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode Position -> LexerMode -> LexerMode
f Int
_ AlexInput
match LexerMode
st = (Position -> LexerMode -> LexerMode
f (AlexInput -> Position
forall a. Located a -> Position
locPosition AlexInput
match) LexerMode
st, [])
startString :: Action
startString :: Action
startString Int
_ (Located Position
posn Text
text) LexerMode
_ = (Position -> Text -> LexerMode
InString Position
posn Text
text, [])
endMode :: Action
endMode :: Action
endMode Int
len (Located Position
endPosn Text
_) LexerMode
mode =
case LexerMode
mode of
LexerMode
InNormal -> (LexerMode
InNormal, [])
InCommentString Position
_ LexerMode
st -> (LexerMode
st, [])
InComment Position
_ LexerMode
st -> (LexerMode
st, [])
InString Position
startPosn Text
input ->
let n :: Int
n = Position -> Int
posIndex Position
endPosn Int -> Int -> Int
forall a. Num a => a -> a -> a
- Position -> Int
posIndex Position
startPosn Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
badEscape :: Error
badEscape = Text -> Error
BadEscape (String -> Text
Text.pack String
"out of range")
in case ReadS String
forall a. Read a => ReadS a
reads (Text -> String
Text.unpack (Int -> Text -> Text
Text.take Int
n Text
input)) of
[(String
s,String
"")] -> (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
startPosn (Text -> Token
String (String -> Text
Text.pack String
s))])
[(String, String)]
_ -> (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
startPosn (Error -> Token
Error Error
badEscape)])
untermString :: Action
untermString :: Action
untermString Int
_ AlexInput
_ = \(InString Position
posn Text
_) ->
(LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermString)])
number ::
Text ->
Token
number :: Text -> Token
number = Number -> Token
Number (Number -> Token) -> (Text -> Number) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Number
Config.NumberParser.number
(String -> Number) -> (Text -> String) -> Text -> Number
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Text.unpack (Text -> String) -> (Text -> Text) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.toUpper (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
Text.filter (Char
'_' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=)
section :: Text -> Token
section :: Text -> Token
section = Text -> Token
Section (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
Text.dropWhileEnd Char -> Bool
isSpace (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HasCallStack => Text -> Text
Text -> Text
Text.init
byteForChar :: Char -> Word8
byteForChar :: Char -> Word8
byteForChar Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\6' = Word8
non_graphic
| Char -> Bool
isAscii Char
c = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c)
| Bool
otherwise = case Char -> GeneralCategory
generalCategory Char
c of
GeneralCategory
LowercaseLetter -> Word8
lower
GeneralCategory
OtherLetter -> Word8
lower
GeneralCategory
UppercaseLetter -> Word8
upper
GeneralCategory
TitlecaseLetter -> Word8
upper
GeneralCategory
DecimalNumber -> Word8
digit
GeneralCategory
OtherNumber -> Word8
digit
GeneralCategory
ConnectorPunctuation -> Word8
symbol
GeneralCategory
DashPunctuation -> Word8
symbol
GeneralCategory
OtherPunctuation -> Word8
symbol
GeneralCategory
MathSymbol -> Word8
symbol
GeneralCategory
CurrencySymbol -> Word8
symbol
GeneralCategory
ModifierSymbol -> Word8
symbol
GeneralCategory
OtherSymbol -> Word8
symbol
GeneralCategory
Space -> Word8
space
GeneralCategory
ModifierLetter -> Word8
other
GeneralCategory
NonSpacingMark -> Word8
other
GeneralCategory
SpacingCombiningMark -> Word8
other
GeneralCategory
EnclosingMark -> Word8
other
GeneralCategory
LetterNumber -> Word8
other
GeneralCategory
OpenPunctuation -> Word8
other
GeneralCategory
ClosePunctuation -> Word8
other
GeneralCategory
InitialQuote -> Word8
other
GeneralCategory
FinalQuote -> Word8
other
GeneralCategory
_ -> Word8
non_graphic
where
non_graphic :: Word8
non_graphic = Word8
0
upper :: Word8
upper = Word8
1
lower :: Word8
lower = Word8
2
digit :: Word8
digit = Word8
3
symbol :: Word8
symbol = Word8
4
space :: Word8
space = Word8
5
other :: Word8
other = Word8
6