-- | The CSV (comma-separated value) format is defined by RFC 4180,
--   \"Common Format and MIME Type for Comma-Separated Values (CSV) Files\",
--   <http://www.rfc-editor.org/rfc/rfc4180.txt>
--
--   This lazy parser can report all CSV formatting errors, whilst also
--   returning all the valid data, so the user can choose whether to
--   continue, to show warnings, or to halt on error.
--
--   Valid fields retain information about their original location in the
--   input, so a secondary parser from textual fields to typed values
--   can give intelligent error messages.
--
--   In a valid CSV file, all rows must have the same number of columns.
--   This parser will flag a row with the wrong number of columns as a error.
--   (But the error type contains the actual data, so the user can recover
--   it if desired.)  Completely blank lines are also treated as errors,
--   and again the user is free either to filter these out or convert them
--   to a row of actual null fields.

module Text.CSV.Lazy.String
  ( -- * CSV types
    CSVTable
  , CSVRow
  , CSVField(..)
    -- * CSV parsing
  , CSVError(..)
  , CSVResult
  , csvErrors
  , csvTable
  , csvTableFull
  , csvTableHeader
  , parseCSV
  , parseDSV
    -- * Pretty-printing
  , ppCSVError
  , ppCSVField
  , ppCSVTable
  , ppDSVTable
    -- * Conversion between standard and simple representations
  , fromCSVTable
  , toCSVTable
    -- * Selection, validation, and algebra of CSV tables
  , selectFields
  , expectFields
  , mkEmptyColumn
  , joinCSV
  , mkCSVField
  ) where

import Data.List (groupBy, partition, elemIndex, intercalate, takeWhile
                 ,deleteFirstsBy, nub)
import Data.Function (on)
import Data.Maybe (fromJust)

-- | A CSV table is a sequence of rows.  All rows have the same number
--   of fields.
type CSVTable   = [CSVRow]

-- | A CSV row is just a sequence of fields.
type CSVRow     = [CSVField]

-- | A CSV field's content is stored with its logical row and column number,
--   as well as its textual extent.  This information is necessary if you
--   want to generate good error messages in a secondary parsing stage,
--   should you choose to convert the textual fields to typed data values.
data CSVField   = CSVField       { CSVField -> Int
csvRowNum        :: !Int
                                 , CSVField -> Int
csvColNum        :: !Int
                                 , CSVField -> (Int, Int)
csvTextStart     :: !(Int,Int)
                                 , CSVField -> (Int, Int)
csvTextEnd       :: !(Int,Int)
                                 , CSVField -> String
csvFieldContent  :: !String
                                 , CSVField -> Bool
csvFieldQuoted   :: !Bool }
                | CSVFieldError  { csvRowNum        :: !Int
                                 , csvColNum        :: !Int
                                 , csvTextStart     :: !(Int,Int)
                                 , csvTextEnd       :: !(Int,Int)
                                 , CSVField -> String
csvFieldError    :: !String }
                                                    deriving (CSVField -> CSVField -> Bool
(CSVField -> CSVField -> Bool)
-> (CSVField -> CSVField -> Bool) -> Eq CSVField
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CSVField -> CSVField -> Bool
$c/= :: CSVField -> CSVField -> Bool
== :: CSVField -> CSVField -> Bool
$c== :: CSVField -> CSVField -> Bool
Eq,Int -> CSVField -> ShowS
[CSVField] -> ShowS
CSVField -> String
(Int -> CSVField -> ShowS)
-> (CSVField -> String) -> ([CSVField] -> ShowS) -> Show CSVField
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CSVField] -> ShowS
$cshowList :: [CSVField] -> ShowS
show :: CSVField -> String
$cshow :: CSVField -> String
showsPrec :: Int -> CSVField -> ShowS
$cshowsPrec :: Int -> CSVField -> ShowS
Show)

-- | A structured error type for CSV formatting mistakes.
data CSVError   = IncorrectRow   { CSVError -> Int
csvRow           :: !Int
                                 , CSVError -> Int
csvColsExpected  :: !Int
                                 , CSVError -> Int
csvColsActual    :: !Int
                                 , CSVError -> [CSVField]
csvFields        :: [CSVField] }
                | BlankLine      { csvRow           :: !Int
                                 , csvColsExpected  :: !Int
                                 , csvColsActual    :: !Int
                                 , CSVError -> CSVField
csvField         :: CSVField }
                | FieldError     { csvField         :: CSVField }
                | DuplicateHeader{ csvColsExpected  :: !Int
                                 , CSVError -> Int
csvHeaderSerial  :: !Int
                                 , CSVError -> String
csvDuplicate     :: !String }
                | NoData
                                                    deriving (CSVError -> CSVError -> Bool
(CSVError -> CSVError -> Bool)
-> (CSVError -> CSVError -> Bool) -> Eq CSVError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CSVError -> CSVError -> Bool
$c/= :: CSVError -> CSVError -> Bool
== :: CSVError -> CSVError -> Bool
$c== :: CSVError -> CSVError -> Bool
Eq,Int -> CSVError -> ShowS
[CSVError] -> ShowS
CSVError -> String
(Int -> CSVError -> ShowS)
-> (CSVError -> String) -> ([CSVError] -> ShowS) -> Show CSVError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CSVError] -> ShowS
$cshowList :: [CSVError] -> ShowS
show :: CSVError -> String
$cshow :: CSVError -> String
showsPrec :: Int -> CSVError -> ShowS
$cshowsPrec :: Int -> CSVError -> ShowS
Show)

-- | The result of parsing a CSV input is a mixed collection of errors
--   and valid rows.  This way of representing things is crucial to the
--   ability to parse lazily whilst still catching format errors.
type CSVResult  = [Either [CSVError] CSVRow]

-- | Extract just the valid portions of a CSV parse.
csvTable    :: CSVResult -> CSVTable
csvTable :: CSVResult -> CSVTable
csvTable  CSVResult
r  = [ [CSVField]
v | Right [CSVField]
v <- CSVResult
r ]
-- | Extract just the errors from a CSV parse.
csvErrors   :: CSVResult -> [CSVError]
csvErrors :: CSVResult -> [CSVError]
csvErrors CSVResult
r  = [[CSVError]] -> [CSVError]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [ [CSVError]
v | Left  [CSVError]
v <- CSVResult
r ]
-- | Extract the full table, including invalid rows, repaired with padding.
--   and de-duplicated headers.
csvTableFull:: CSVResult -> CSVTable
csvTableFull :: CSVResult -> CSVTable
csvTableFull = (Either [CSVError] [CSVField] -> [CSVField])
-> CSVResult -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map Either [CSVError] [CSVField] -> [CSVField]
beCareful (CSVResult -> CSVTable)
-> (CSVResult -> CSVResult) -> CSVResult -> CSVTable
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVResult -> CSVResult
deduplicate
    where beCareful :: Either [CSVError] [CSVField] -> [CSVField]
beCareful (Right [CSVField]
row) = [CSVField]
row
          beCareful (Left (r :: CSVError
r@IncorrectRow{}:[CSVError]
_)) =
              CSVError -> [CSVField]
csvFields CSVError
r [CSVField] -> [CSVField] -> [CSVField]
forall a. [a] -> [a] -> [a]
++
              Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- CSVError -> Int
csvColsActual CSVError
r)
                        (Int -> Int -> String -> CSVField
mkCSVField (CSVError -> Int
csvRow CSVError
r) Int
0 String
"")
          beCareful (Left (r :: CSVError
r@BlankLine{}:[CSVError]
_)) =
              Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r)
                        (Int -> Int -> String -> CSVField
mkCSVField (CSVError -> Int
csvRow CSVError
r) Int
0 String
"")
          beCareful (Left (r :: CSVError
r@DuplicateHeader{}:[CSVError]
_)) = -- obsolete with deduping
              Int -> CSVField -> [CSVField]
forall a. Int -> a -> [a]
replicate (CSVError -> Int
csvColsExpected CSVError
r)
                        (Int -> Int -> String -> CSVField
mkCSVField Int
0 Int
0 String
"")
          beCareful (Left (FieldError{}:[CSVError]
r)) = Either [CSVError] [CSVField] -> [CSVField]
beCareful ([CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVError]
r)
          beCareful (Left (CSVError
NoData:[CSVError]
_))       = []
          beCareful (Left [])               = []

          deduplicate :: CSVResult -> CSVResult
deduplicate (Left (errs :: [CSVError]
errs@(DuplicateHeader{}:[CSVError]
_)):Right [CSVField]
heads:CSVResult
rows) =
--               Right (reverse $ foldl replace [] heads)
                 [CSVField] -> Either [CSVError] [CSVField]
forall a b. b -> Either a b
Right ([CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder [CSVError]
errs ([CSVField] -> [Int] -> [(CSVField, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [CSVField]
heads [Int
0..]))
                 Either [CSVError] [CSVField] -> CSVResult -> CSVResult
forall a. a -> [a] -> [a]
: CSVResult
rows
          deduplicate CSVResult
rows = CSVResult
rows
{-
          replace output header
              | headerName `elem` map csvFieldContent output
                          = header{ csvFieldContent=headerName++"_duplicate" }
                                  : output
              | otherwise = header: output
              where headerName = csvFieldContent header
-}
          replaceInOrder :: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder []       [(CSVField, Int)]
headers        = ((CSVField, Int) -> CSVField) -> [(CSVField, Int)] -> [CSVField]
forall a b. (a -> b) -> [a] -> [b]
map (CSVField, Int) -> CSVField
forall a b. (a, b) -> a
fst [(CSVField, Int)]
headers
          replaceInOrder [CSVError]
_        []             = []
          replaceInOrder (CSVError
d:[CSVError]
dups) ((CSVField
h,Int
n):[(CSVField, Int)]
headers)
              | CSVError -> Int
csvHeaderSerial CSVError
d Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
n = CSVField
h{ csvFieldContent :: String
csvFieldContent =
                                                (CSVError -> String
csvDuplicate CSVError
dString -> ShowS
forall a. [a] -> [a] -> [a]
++String
"_"String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show Int
n) }
                                          CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder [CSVError]
dups     [(CSVField, Int)]
headers
              | Bool
otherwise              = CSVField
hCSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: [CSVError] -> [(CSVField, Int)] -> [CSVField]
replaceInOrder (CSVError
dCSVError -> [CSVError] -> [CSVError]
forall a. a -> [a] -> [a]
:[CSVError]
dups) [(CSVField, Int)]
headers

-- | The header row of the CSV table, assuming it is non-empty.
csvTableHeader :: CSVResult -> [String]
csvTableHeader :: CSVResult -> [String]
csvTableHeader = (CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> String
csvFieldContent ([CSVField] -> [String])
-> (CSVResult -> [CSVField]) -> CSVResult -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVResult -> [CSVField]
forall {a} {t}. [Either a t] -> t
firstRow
    where firstRow :: [Either a t] -> t
firstRow (Left a
_: [Either a t]
rest) = [Either a t] -> t
firstRow [Either a t]
rest
          firstRow (Right t
x: [Either a t]
_)   = t
x


-- | A first-stage parser for CSV (comma-separated values) data.
--   The individual fields remain as text, but errors in CSV formatting
--   are reported.  Errors (containing unrecognisable rows/fields) are
--   interspersed with the valid rows/fields.
parseCSV :: String -> CSVResult
parseCSV :: String -> CSVResult
parseCSV = Bool -> Char -> String -> CSVResult
parseDSV Bool
True Char
','

-- | Sometimes CSV is not comma-separated, but delimiter-separated
--   values (DSV).  The choice of delimiter is arbitrary, but semi-colon
--   is common in locales where comma is used as a decimal point, and tab
--   is also common.  The Boolean argument is
--   whether newlines should be accepted within quoted fields.  The CSV RFC
--   says newlines can occur in quotes, but other DSV formats might say
--   otherwise.  You can often get better error messages if newlines are
--   disallowed.
parseDSV :: Bool -> Char -> String -> CSVResult
parseDSV :: Bool -> Char -> String -> CSVResult
parseDSV Bool
qn Char
delim = CSVTable -> CSVResult
validate
                    (CSVTable -> CSVResult)
-> (String -> CSVTable) -> String -> CSVResult
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> CSVField -> Bool) -> [CSVField] -> CSVTable
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
(==)(Int -> Int -> Bool)
-> (CSVField -> Int) -> CSVField -> CSVField -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on`CSVField -> Int
csvRowNum)
                    ([CSVField] -> CSVTable)
-> (String -> [CSVField]) -> String -> CSVTable
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Char -> String -> [CSVField]
lexCSV Bool
qn Char
delim

validate          :: [CSVRow] -> CSVResult
validate :: CSVTable -> CSVResult
validate []        = [[CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVError
NoData]]
validate xs :: CSVTable
xs@([CSVField]
x:CSVTable
_)  = [CSVField] -> CSVResult -> CSVResult
checkDuplicateHeaders [CSVField]
x (CSVResult -> CSVResult) -> CSVResult -> CSVResult
forall a b. (a -> b) -> a -> b
$ ([CSVField] -> Either [CSVError] [CSVField])
-> CSVTable -> CSVResult
forall a b. (a -> b) -> [a] -> [b]
map (Int -> [CSVField] -> Either [CSVError] [CSVField]
extractErrs ([CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
x)) CSVTable
xs

extractErrs       :: Int -> CSVRow -> Either [CSVError] CSVRow
extractErrs :: Int -> [CSVField] -> Either [CSVError] [CSVField]
extractErrs Int
size [CSVField]
row
    | [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
row0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
size Bool -> Bool -> Bool
&& [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
errs0    = [CSVField] -> Either [CSVError] [CSVField]
forall a b. b -> Either a b
Right [CSVField]
row0
    | [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
row0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1    Bool -> Bool -> Bool
&& CSVField -> Bool
empty CSVField
field0  = [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left [CSVField -> CSVError
blankLine CSVField
field0]
    | Bool
otherwise                            = [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left ((CSVField -> CSVError) -> [CSVField] -> [CSVError]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> CSVError
convert [CSVField]
errs0
                                                   [CSVError] -> [CSVError] -> [CSVError]
forall a. [a] -> [a] -> [a]
++ [CSVField] -> [CSVError]
validateColumns [CSVField]
row0)
  where
  ([CSVField]
row0,[CSVField]
errs0)   = (CSVField -> Bool) -> [CSVField] -> ([CSVField], [CSVField])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition CSVField -> Bool
isField [CSVField]
row
  (CSVField
field0:[CSVField]
_)     = [CSVField]
row0

  isField :: CSVField -> Bool
isField (CSVField{})      = Bool
True
  isField (CSVFieldError{}) = Bool
False

  empty :: CSVField -> Bool
empty   f :: CSVField
f@(CSVField{})    = String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (CSVField -> String
csvFieldContent CSVField
f)
  empty   CSVField
_                 = Bool
False

  convert :: CSVField -> CSVError
convert CSVField
err = FieldError :: CSVField -> CSVError
FieldError {csvField :: CSVField
csvField = CSVField
err}

  validateColumns :: [CSVField] -> [CSVError]
validateColumns [CSVField]
r  =
      if [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
size then []
      else [ IncorrectRow :: Int -> Int -> Int -> [CSVField] -> CSVError
IncorrectRow{ csvRow :: Int
csvRow  = if [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
r then Int
0 else CSVField -> Int
csvRowNum ([CSVField] -> CSVField
forall a. [a] -> a
head [CSVField]
r)
                         , csvColsExpected :: Int
csvColsExpected  = Int
size
                         , csvColsActual :: Int
csvColsActual    = [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
r
                         , csvFields :: [CSVField]
csvFields        = [CSVField]
r } ]
  blankLine :: CSVField -> CSVError
blankLine CSVField
f = BlankLine :: Int -> Int -> Int -> CSVField -> CSVError
BlankLine{ csvRow :: Int
csvRow           = CSVField -> Int
csvRowNum CSVField
f
                         , csvColsExpected :: Int
csvColsExpected  = Int
size
                         , csvColsActual :: Int
csvColsActual    = Int
1
                         , csvField :: CSVField
csvField         = CSVField
f }

checkDuplicateHeaders :: CSVRow -> CSVResult -> CSVResult
checkDuplicateHeaders :: [CSVField] -> CSVResult -> CSVResult
checkDuplicateHeaders [CSVField]
row CSVResult
result =
    let headers :: [CSVField]
headers = [ CSVField
f | f :: CSVField
f@(CSVField{}) <- [CSVField]
row ]
        dups :: [CSVField]
dups    = (CSVField -> CSVField -> Bool)
-> [CSVField] -> [CSVField] -> [CSVField]
forall a. (a -> a -> Bool) -> [a] -> [a] -> [a]
deleteFirstsBy (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
(==)(String -> String -> Bool)
-> (CSVField -> String) -> CSVField -> CSVField -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on`CSVField -> String
csvFieldContent)
                                 [CSVField]
headers ([CSVField] -> [CSVField]
forall a. Eq a => [a] -> [a]
nub [CSVField]
headers)
        n :: Int
n       = [CSVField] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CSVField]
headers
    in if [CSVField] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CSVField]
dups then CSVResult
result
       else [CSVError] -> Either [CSVError] [CSVField]
forall a b. a -> Either a b
Left ((CSVField -> CSVError) -> [CSVField] -> [CSVError]
forall a b. (a -> b) -> [a] -> [b]
map (\CSVField
d-> DuplicateHeader :: Int -> Int -> String -> CSVError
DuplicateHeader
                              { csvColsExpected :: Int
csvColsExpected = Int
n
                              , csvHeaderSerial :: Int
csvHeaderSerial = CSVField -> Int
csvColNum CSVField
d
                              , csvDuplicate :: String
csvDuplicate    = CSVField -> String
csvFieldContent CSVField
d })
                      [CSVField]
dups)
            Either [CSVError] [CSVField] -> CSVResult -> CSVResult
forall a. a -> [a] -> [a]
: CSVResult
result


-- Reading CSV data is essentially lexical, and can be implemented with a
-- simple finite state machine.  We keep track of logical row number,
-- logical column number (in tabular terms), and textual position (row,col)
-- to enable good error messages.
-- Positional data is retained even after successful lexing, in case a
-- second-stage field parser wants to complain.
--
-- A double-quoted CSV field may contain commas, newlines, and double quotes.

data CSVState  = CSVState  { CSVState -> Int
tableRow, CSVState -> Int
tableCol  :: !Int
                           , CSVState -> Int
textRow,  CSVState -> Int
textCol   :: !Int }

incTableRow, incTableCol, incTextRow, incTextCol :: CSVState -> CSVState
incTableRow :: CSVState -> CSVState
incTableRow  CSVState
st = CSVState
st { tableRow :: Int
tableRow  = CSVState -> Int
tableRow  CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 }
incTableCol :: CSVState -> CSVState
incTableCol  CSVState
st = CSVState
st { tableCol :: Int
tableCol  = CSVState -> Int
tableCol  CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 }
incTextRow :: CSVState -> CSVState
incTextRow   CSVState
st = CSVState
st { textRow :: Int
textRow   = CSVState -> Int
textRow   CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 }
incTextCol :: CSVState -> CSVState
incTextCol   CSVState
st = CSVState
st { textCol :: Int
textCol   = CSVState -> Int
textCol   CSVState
st Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 }

-- Lexer is a small finite state machine.
lexCSV :: Bool -> Char -> [Char] -> [CSVField]
lexCSV :: Bool -> Char -> String -> [CSVField]
lexCSV Bool
quotedNewline Char
delim =
    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState :: Int -> Int -> Int -> Int -> CSVState
CSVState{tableRow :: Int
tableRow=Int
1,tableCol :: Int
tableCol=Int
1,textRow :: Int
textRow=Int
1,textCol :: Int
textCol=Int
1} (Int
1,Int
1) []
  where
  -- 'simple' recognises an unquoted field, and delimiter char as separator
  simple :: CSVState -> (Int,Int) -> String -> String -> [CSVField]
  simple :: CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
_ (Int, Int)
_     []         []    = []
  simple CSVState
s (Int, Int)
begin String
acc        []    = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
False CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: []
  simple CSVState
s (Int, Int)
begin String
acc     (Char
c:String
cs)
            | Bool -> Bool
not (Char -> Bool
interesting Char
c) = CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple (CSVState -> CSVState
incTextCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s) (Int, Int)
begin (Char
cChar -> ShowS
forall a. a -> [a] -> [a]
:String
acc) String
cs
  simple CSVState
s (Int, Int)
begin String
acc  (Char
c:Char
'"':String
cs)
                      | Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
delim  = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
False CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
string CSVState
s' (CSVState -> Int
textRow CSVState
s',CSVState -> Int
textCol CSVState
s') [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTextCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                                               CSVState -> CSVState
incTableCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s
  simple CSVState
s (Int, Int)
begin String
acc  (Char
c:String
cs)
                      | Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
delim  = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
False CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s' (CSVState -> Int
textRow CSVState
s',CSVState -> Int
textCol CSVState
s') [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTableCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s
  simple CSVState
s (Int, Int)
begin String
acc  (Char
'\r':Char
'\n':String
cs)
                                  = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
False CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s' (CSVState -> Int
textRow CSVState
s',Int
1) [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$!
                                               CSVState
s {tableCol :: Int
tableCol=Int
1, textCol :: Int
textCol=Int
1}
  simple CSVState
s (Int, Int)
begin String
acc  (Char
'\n' :String
cs)  = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
False CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s' (CSVState -> Int
textRow CSVState
s',Int
1) [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$!
                                               CSVState
s {tableCol :: Int
tableCol=Int
1, textCol :: Int
textCol=Int
1}
  simple CSVState
s (Int, Int)
begin String
acc  (Char
'\r' :String
cs)  = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
False CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s' (CSVState -> Int
textRow CSVState
s',Int
1) [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$!
                                               CSVState
s {tableCol :: Int
tableCol=Int
1, textCol :: Int
textCol=Int
1}
  simple CSVState
s (Int, Int)
begin []   (Char
'"'  :String
cs)  = CSVState -> (Int, Int) -> String -> String -> [CSVField]
string (CSVState -> CSVState
incTextCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s) (Int, Int)
begin [] String
cs
  simple CSVState
s (Int, Int)
begin String
acc  (Char
'"'  :String
cs)  = CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
s (Int, Int)
begin
                                            String
"Start-quote not next to comma"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
string (CSVState -> CSVState
incTextCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s) (Int, Int)
begin String
acc String
cs

  -- 'string' recognises a double-quoted field containing commas and newlines
  string :: CSVState -> (Int,Int) -> String -> String -> [CSVField]
  string :: CSVState -> (Int, Int) -> String -> String -> [CSVField]
string CSVState
s (Int, Int)
begin []   []          = CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
s (Int, Int)
begin String
"Data ends at start-quote"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    []
  string CSVState
s (Int, Int)
begin String
acc  []          = CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
s (Int, Int)
begin String
"Data ends in quoted field"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    []
  string CSVState
s (Int, Int)
begin String
acc   (Char
c:String
cs)
    | Bool -> Bool
not (Char -> Bool
interestingInString Char
c) = CSVState -> (Int, Int) -> String -> String -> [CSVField]
string (CSVState -> CSVState
incTextCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s) (Int, Int)
begin (Char
cChar -> ShowS
forall a. a -> [a] -> [a]
:String
acc) String
cs
  string CSVState
s (Int, Int)
begin String
acc (Char
'"':Char
'"':String
cs) = CSVState -> (Int, Int) -> String -> String -> [CSVField]
string CSVState
s' (Int, Int)
begin (Char
'"'Char -> ShowS
forall a. a -> [a] -> [a]
:String
acc) String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTextCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s
  string CSVState
s (Int, Int)
begin String
acc (Char
'"':Char
c:Char
'"':String
cs)
                       | Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
delim = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
True CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
string CSVState
s' (CSVState -> Int
textRow CSVState
s',CSVState -> Int
textCol CSVState
s') [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTextCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                                               CSVState -> CSVState
incTextCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTableCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s
  string CSVState
s (Int, Int)
begin String
acc (Char
'"':Char
c:String
cs)
                       | Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
delim = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
True CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s' (CSVState -> Int
textRow CSVState
s',CSVState -> Int
textCol CSVState
s') [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTextCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextCol (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                                               CSVState -> CSVState
incTableCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s
  string CSVState
s (Int, Int)
begin String
acc (Char
'"':Char
'\n':String
cs)= CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
True CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s' (CSVState -> Int
textRow CSVState
s',Int
1) [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$!
                                               CSVState
s {tableCol :: Int
tableCol=Int
1, textCol :: Int
textCol=Int
1}
  string CSVState
s (Int, Int)
begin String
acc (Char
'"':Char
'\r':Char
'\n':String
cs)
                                  = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
True CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s' (CSVState -> Int
textRow CSVState
s',Int
1) [] String
cs
                                    where s' :: CSVState
s' = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$!
                                               CSVState
s {tableCol :: Int
tableCol=Int
1, textCol :: Int
textCol=Int
1}
  string CSVState
s (Int, Int)
begin String
acc (Char
'"':[])     = CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
s (Int, Int)
begin String
acc Bool
True CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
: []
  string CSVState
s (Int, Int)
begin String
acc (Char
'"':String
cs)     = CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
s (Int, Int)
begin
                                            String
"End-quote not followed by comma"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple (CSVState -> CSVState
incTextCol (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s) (Int, Int)
begin String
acc String
cs
  string CSVState
s (Int, Int)
begin String
acc (Char
'\r':Char
'\n':String
cs)
                  | Bool
quotedNewline = CSVState -> (Int, Int) -> String -> String -> [CSVField]
string CSVState
s' (Int, Int)
begin (Char
'\n'Char -> ShowS
forall a. a -> [a] -> [a]
:String
acc) String
cs
                  | Bool
otherwise     = CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
s (Int, Int)
begin
                                            String
"Found newline within quoted field"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s'' (CSVState -> Int
textRow CSVState
s'',CSVState -> Int
textCol CSVState
s'') [] String
cs
                                    where s' :: CSVState
s'  = CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s {textCol :: Int
textCol=Int
1}
                                          s'' :: CSVState
s'' = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$!
                                                CSVState
s {textCol :: Int
textCol=Int
1, tableCol :: Int
tableCol=Int
1}
  string CSVState
s (Int, Int)
begin String
acc (Char
'\n' :String
cs)
                  | Bool
quotedNewline = CSVState -> (Int, Int) -> String -> String -> [CSVField]
string CSVState
s' (Int, Int)
begin (Char
'\n'Char -> ShowS
forall a. a -> [a] -> [a]
:String
acc) String
cs
                  | Bool
otherwise     = CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
s (Int, Int)
begin
                                            String
"Found newline within quoted field"CSVField -> [CSVField] -> [CSVField]
forall a. a -> [a] -> [a]
:
                                    CSVState -> (Int, Int) -> String -> String -> [CSVField]
simple CSVState
s'' (CSVState -> Int
textRow CSVState
s'',CSVState -> Int
textCol CSVState
s'') [] String
cs
                                    where s' :: CSVState
s'  = CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$! CSVState
s {textCol :: Int
textCol=Int
1}
                                          s'' :: CSVState
s'' = CSVState -> CSVState
incTableRow (CSVState -> CSVState)
-> (CSVState -> CSVState) -> CSVState -> CSVState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSVState -> CSVState
incTextRow (CSVState -> CSVState) -> CSVState -> CSVState
forall a b. (a -> b) -> a -> b
$!
                                                CSVState
s {textCol :: Int
textCol=Int
1, tableCol :: Int
tableCol=Int
1}

  interesting :: Char -> Bool
  interesting :: Char -> Bool
interesting Char
'\n' = Bool
True
  interesting Char
'\r' = Bool
True
  interesting Char
'"'  = Bool
True
  interesting Char
c    = Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
delim

  interestingInString :: Char -> Bool
  interestingInString :: Char -> Bool
interestingInString Char
'\n' = Bool
True
  interestingInString Char
'\r' = Bool
True
  interestingInString Char
'"'  = Bool
True
  interestingInString Char
_    = Bool
False

  -- generate the lexical tokens representing either a field or an error
  mkField :: CSVState -> (Int, Int) -> String -> Bool -> CSVField
mkField CSVState
st (Int, Int)
begin String
f Bool
q =    CSVField :: Int
-> Int -> (Int, Int) -> (Int, Int) -> String -> Bool -> CSVField
CSVField { csvRowNum :: Int
csvRowNum       = CSVState -> Int
tableRow CSVState
st
                                     , csvColNum :: Int
csvColNum       = CSVState -> Int
tableCol CSVState
st
                                     , csvTextStart :: (Int, Int)
csvTextStart    = (Int, Int)
begin
                                     , csvTextEnd :: (Int, Int)
csvTextEnd      = (CSVState -> Int
textRow CSVState
st,CSVState -> Int
textCol CSVState
st)
                                     , csvFieldContent :: String
csvFieldContent = ShowS
forall a. [a] -> [a]
reverse String
f
                                     , csvFieldQuoted :: Bool
csvFieldQuoted  = Bool
q }
  mkError :: CSVState -> (Int, Int) -> String -> CSVField
mkError CSVState
st (Int, Int)
begin String
e = CSVFieldError :: Int -> Int -> (Int, Int) -> (Int, Int) -> String -> CSVField
CSVFieldError { csvRowNum :: Int
csvRowNum       = CSVState -> Int
tableRow CSVState
st
                                     , csvColNum :: Int
csvColNum       = CSVState -> Int
tableCol CSVState
st
                                     , csvTextStart :: (Int, Int)
csvTextStart    = (Int, Int)
begin
                                     , csvTextEnd :: (Int, Int)
csvTextEnd      = (CSVState -> Int
textRow CSVState
st,CSVState -> Int
textCol CSVState
st)
                                     , csvFieldError :: String
csvFieldError   = String
e }

-- | Some pretty-printing for structured CSV errors.
ppCSVError :: CSVError -> String
ppCSVError :: CSVError -> String
ppCSVError (err :: CSVError
err@IncorrectRow{}) =
        String
"\nRow "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvRow CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
" has wrong number of fields."String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"\n    Expected "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvColsExpected CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
" but got "String -> ShowS
forall a. [a] -> [a] -> [a]
++
        Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvColsActual CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"."String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"\n    The fields are:"String -> ShowS
forall a. [a] -> [a] -> [a]
++
        Int -> ShowS
indent Int
8 ((CSVField -> String) -> [CSVField] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap CSVField -> String
ppCSVField (CSVError -> [CSVField]
csvFields CSVError
err))
ppCSVError (err :: CSVError
err@BlankLine{})  =
        String
"\nRow "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvRow CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
" is blank."String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"\n    Expected "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvColsExpected CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
" fields."
ppCSVError (err :: CSVError
err@FieldError{}) = CSVField -> String
ppCSVField (CSVError -> CSVField
csvField CSVError
err)
ppCSVError (err :: CSVError
err@DuplicateHeader{}) =
        String
"\nThere are two (or more) identical column headers: "String -> ShowS
forall a. [a] -> [a] -> [a]
++
        ShowS
forall a. Show a => a -> String
show (CSVError -> String
csvDuplicate CSVError
err)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"."String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"\n    Column number "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVError -> Int
csvHeaderSerial CSVError
err)
ppCSVError (err :: CSVError
err@NoData{})     =
        String
"\nNo usable data (after accounting for any other errors)."

-- | Pretty-printing for CSV fields, shows positional information in addition
--   to the textual content.
ppCSVField :: CSVField -> String
ppCSVField :: CSVField -> String
ppCSVField (f :: CSVField
f@CSVField{}) =
        String
"\n"String -> ShowS
forall a. [a] -> [a] -> [a]
++Bool -> ShowS
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (CSVField -> String
csvFieldContent CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"\nin row "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVField -> Int
csvRowNum CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
" at column "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVField -> Int
csvColNum CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
" (textually from "String -> ShowS
forall a. [a] -> [a] -> [a]
++(Int, Int) -> String
forall a. Show a => a -> String
show (CSVField -> (Int, Int)
csvTextStart CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
" to "String -> ShowS
forall a. [a] -> [a] -> [a]
++
        (Int, Int) -> String
forall a. Show a => a -> String
show (CSVField -> (Int, Int)
csvTextEnd CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
")"
ppCSVField (f :: CSVField
f@CSVFieldError{}) =
        String
"\n"String -> ShowS
forall a. [a] -> [a] -> [a]
++CSVField -> String
csvFieldError CSVField
fString -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"\nin row "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVField -> Int
csvRowNum CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
" at column "String -> ShowS
forall a. [a] -> [a] -> [a]
++Int -> String
forall a. Show a => a -> String
show (CSVField -> Int
csvColNum CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
" (textually from "String -> ShowS
forall a. [a] -> [a] -> [a]
++(Int, Int) -> String
forall a. Show a => a -> String
show (CSVField -> (Int, Int)
csvTextStart CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
" to "String -> ShowS
forall a. [a] -> [a] -> [a]
++
        (Int, Int) -> String
forall a. Show a => a -> String
show (CSVField -> (Int, Int)
csvTextEnd CSVField
f)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
")"

-- | Turn a full CSV table back into text, as much like the original
--   input as possible,  e.g. preserving quoted/unquoted format of fields.
ppCSVTable :: CSVTable -> String
ppCSVTable :: CSVTable -> String
ppCSVTable = [String] -> String
unlines ([String] -> String)
-> (CSVTable -> [String]) -> CSVTable -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([CSVField] -> String) -> CSVTable -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"," ([String] -> String)
-> ([CSVField] -> [String]) -> [CSVField] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> String
ppField)
  where ppField :: CSVField -> String
ppField CSVField
f = Bool -> ShowS
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (CSVField -> String
csvFieldContent CSVField
f)

-- | Turn a full CSV table back into text, using the given delimiter
--   character.  Quoted/unquoted formatting of the original is preserved.
--   The Boolean argument is to repair fields containing newlines, by
--   replacing the nl with a space.
ppDSVTable :: Bool -> Char -> CSVTable -> String
ppDSVTable :: Bool -> Char -> CSVTable -> String
ppDSVTable Bool
nl Char
delim = [String] -> String
unlines ([String] -> String)
-> (CSVTable -> [String]) -> CSVTable -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([CSVField] -> String) -> CSVTable -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate [Char
delim] ([String] -> String)
-> ([CSVField] -> [String]) -> [CSVField] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> String
ppField)
  where ppField :: CSVField -> String
ppField CSVField
f = Bool -> ShowS
quoted (CSVField -> Bool
csvFieldQuoted CSVField
f) (ShowS
doNL ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ CSVField -> String
csvFieldContent CSVField
f)
        doNL :: ShowS
doNL | Bool
nl        = ShowS
replaceNL
             | Bool
otherwise = ShowS
forall a. a -> a
id


-- Some pp helpers - indent and quoted - should live elsewhere, in a
-- pretty-printing package.

indent :: Int -> String -> String
indent :: Int -> ShowS
indent Int
n = [String] -> String
unlines ([String] -> String) -> (String -> [String]) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Char -> String
forall a. Int -> a -> [a]
replicate Int
n Char
' ' String -> ShowS
forall a. [a] -> [a] -> [a]
++) ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lines

quoted :: Bool -> String -> String
quoted :: Bool -> ShowS
quoted Bool
False  String
s  = String
s
quoted Bool
True   String
s  = Char
'"'Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
escape String
s String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"\""
  where escape :: ShowS
escape (Char
'"':String
cs) = Char
'"'Char -> ShowS
forall a. a -> [a] -> [a]
:Char
'"'Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
escape String
cs
        escape (Char
c:String
cs)   = Char
cChar -> ShowS
forall a. a -> [a] -> [a]
: ShowS
escape String
cs
        escape  []      = []

replaceNL :: String -> String
replaceNL :: ShowS
replaceNL (Char
'\n':String
s) = Char
' 'Char -> ShowS
forall a. a -> [a] -> [a]
:ShowS
replaceNL String
s
replaceNL (Char
c:String
s)    = Char
cChar -> ShowS
forall a. a -> [a] -> [a]
: ShowS
replaceNL String
s
replaceNL []       = []


-- | Convert a CSV table to a simpler representation, by dropping all
--   the original location information.
fromCSVTable :: CSVTable -> [[String]]
fromCSVTable :: CSVTable -> [[String]]
fromCSVTable = ([CSVField] -> [String]) -> CSVTable -> [[String]]
forall a b. (a -> b) -> [a] -> [b]
map ((CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> String
csvFieldContent)

-- | Convert a simple list of lists into a CSVTable by the addition of
--   logical locations.  (Textual locations are not so useful.)
--   Rows of varying lengths generate errors.  Fields that need
--   quotation marks are automatically marked as such.
toCSVTable   :: [[String]] -> ([CSVError], CSVTable)
toCSVTable :: [[String]] -> ([CSVError], CSVTable)
toCSVTable []         = ([CSVError
NoData], [])
toCSVTable rows :: [[String]]
rows@([String]
r:[[String]]
_) = (\ ([[CSVError]]
a,CSVTable
b)-> ([[CSVError]] -> [CSVError]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[CSVError]]
a, CSVTable
b)) (([[CSVError]], CSVTable) -> ([CSVError], CSVTable))
-> ([[CSVError]], CSVTable) -> ([CSVError], CSVTable)
forall a b. (a -> b) -> a -> b
$
                        [([CSVError], [CSVField])] -> ([[CSVError]], CSVTable)
forall a b. [(a, b)] -> ([a], [b])
unzip ((Int -> [String] -> ([CSVError], [CSVField]))
-> [Int] -> [[String]] -> [([CSVError], [CSVField])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> [String] -> ([CSVError], [CSVField])
walk [Int
1..] [[String]]
rows)
  where
    n :: Int
n            = [String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
r
    walk        :: Int -> [String] -> ([CSVError], CSVRow)
    walk :: Int -> [String] -> ([CSVError], [CSVField])
walk Int
rnum [] = ( [Int -> CSVError
blank Int
rnum]
                   , (Int -> CSVField) -> [Int] -> [CSVField]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
c-> Int -> Int -> String -> CSVField
mkCSVField Int
rnum Int
c String
"") [Int
1..Int
n])
    walk Int
rnum [String]
cs = ( if [String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
cs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
n then [Int -> [String] -> CSVError
bad Int
rnum [String]
cs] else []
                   , (Int -> String -> CSVField) -> [Int] -> [String] -> [CSVField]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Int -> Int -> String -> CSVField
mkCSVField Int
rnum) [Int
1..Int
n] [String]
cs )

    blank :: Int -> CSVError
blank Int
rnum =  BlankLine :: Int -> Int -> Int -> CSVField -> CSVError
BlankLine{ csvRow :: Int
csvRow          = Int
rnum
                           , csvColsExpected :: Int
csvColsExpected = Int
n
                           , csvColsActual :: Int
csvColsActual   = Int
0
                           , csvField :: CSVField
csvField        = Int -> Int -> String -> CSVField
mkCSVField Int
rnum Int
0 String
""
                           }
    bad :: Int -> [String] -> CSVError
bad Int
r [String]
cs = IncorrectRow :: Int -> Int -> Int -> [CSVField] -> CSVError
IncorrectRow{ csvRow :: Int
csvRow          = Int
r
                           , csvColsExpected :: Int
csvColsExpected = Int
n
                           , csvColsActual :: Int
csvColsActual   = [String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
cs
                           , csvFields :: [CSVField]
csvFields       = (Int -> String -> CSVField) -> [Int] -> [String] -> [CSVField]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Int -> Int -> String -> CSVField
mkCSVField Int
r) [Int
1..] [String]
cs
                           }

-- | Select and/or re-arrange columns from a CSV table, based on names in the
--   header row of the table.  The original header row is re-arranged too.
--   The result is either a list of column names that were not present, or
--   the (possibly re-arranged) sub-table.
selectFields :: [String] -> CSVTable -> Either [String] CSVTable
selectFields :: [String] -> CSVTable -> Either [String] CSVTable
selectFields [String]
names CSVTable
table
    | CSVTable -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null CSVTable
table          = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left [String]
names
    | Bool -> Bool
not ([String] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
missing)  = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left [String]
missing
    | Bool
otherwise           = CSVTable -> Either [String] CSVTable
forall a b. b -> Either a b
Right (([CSVField] -> [CSVField]) -> CSVTable -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map [CSVField] -> [CSVField]
forall a. [a] -> [a]
select CSVTable
table)
  where
    header :: [String]
header         = (CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> String
csvFieldContent (CSVTable -> [CSVField]
forall a. [a] -> a
head CSVTable
table)
    missing :: [String]
missing        = (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [String]
header) [String]
names
    reordering :: [Int]
reordering     = (String -> Int) -> [String] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Int -> Int) -> (String -> Maybe Int) -> String -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\String
n-> String -> [String] -> Maybe Int
forall a. Eq a => a -> [a] -> Maybe Int
elemIndex String
n [String]
header)) [String]
names
    select :: [b] -> [b]
select [b]
fields  = (Int -> b) -> [Int] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map ([b]
fields[b] -> Int -> b
forall a. [a] -> Int -> a
!!) [Int]
reordering

-- | Validate that the named columns of a table have exactly the names and
--   ordering given in the argument.
expectFields :: [String] -> CSVTable -> Either [String] CSVTable
expectFields :: [String] -> CSVTable -> Either [String] CSVTable
expectFields [String]
names CSVTable
table
    | CSVTable -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null CSVTable
table          = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left [String
"CSV table is empty"]
    | Bool -> Bool
not ([String] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
missing)  = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left (ShowS -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (String
"CSV table is missing field: "String -> ShowS
forall a. [a] -> [a] -> [a]
++)
                                       [String]
missing)
    | [String]
header [String] -> [String] -> Bool
forall a. Eq a => a -> a -> Bool
/= [String]
names     = [String] -> Either [String] CSVTable
forall a b. a -> Either a b
Left [String
"CSV columns are in the wrong order"
                                 ,String
"Expected: "String -> ShowS
forall a. [a] -> [a] -> [a]
++String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
", " [String]
names
                                 ,String
"Found:    "String -> ShowS
forall a. [a] -> [a] -> [a]
++String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
", " [String]
header]
    | Bool
otherwise           = CSVTable -> Either [String] CSVTable
forall a b. b -> Either a b
Right CSVTable
table
  where
    header :: [String]
header         = (CSVField -> String) -> [CSVField] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map CSVField -> String
csvFieldContent (CSVTable -> [CSVField]
forall a. [a] -> a
head CSVTable
table)
    missing :: [String]
missing        = (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [String]
header) [String]
names

-- | A join operator, adds the columns of two tables together.
--   Precondition: the tables have the same number of rows.
joinCSV :: CSVTable -> CSVTable -> CSVTable
joinCSV :: CSVTable -> CSVTable -> CSVTable
joinCSV = ([CSVField] -> [CSVField] -> [CSVField])
-> CSVTable -> CSVTable -> CSVTable
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith [CSVField] -> [CSVField] -> [CSVField]
forall a. [a] -> [a] -> [a]
(++)

-- | A generator for a new CSV column, of arbitrary length.
--   The result can be joined to an existing table if desired.
mkEmptyColumn :: String -> CSVTable
mkEmptyColumn :: String -> CSVTable
mkEmptyColumn String
header = [Int -> Int -> String -> CSVField
mkCSVField Int
1 Int
0 String
header] [CSVField] -> CSVTable -> CSVTable
forall a. a -> [a] -> [a]
:
                       (Int -> [CSVField]) -> [Int] -> CSVTable
forall a b. (a -> b) -> [a] -> [b]
map (\Int
n-> [Int -> Int -> String -> CSVField
mkCSVField Int
n Int
0 String
""]) [Int
2..]

-- | Generate a fresh field with the given textual content.
--   The quoting flag is set automatically based on the text.
--   Textual extents are not particularly useful, since there was no original
--   input to refer to.
mkCSVField :: Int -> Int -> String -> CSVField
mkCSVField :: Int -> Int -> String -> CSVField
mkCSVField Int
n Int
c String
text =
        CSVField :: Int
-> Int -> (Int, Int) -> (Int, Int) -> String -> Bool -> CSVField
CSVField { csvRowNum :: Int
csvRowNum       = Int
n
                 , csvColNum :: Int
csvColNum       = Int
c
                 , csvTextStart :: (Int, Int)
csvTextStart    = (Int
0,Int
0)
                 , csvTextEnd :: (Int, Int)
csvTextEnd      = (String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ((Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') String
text)
                                     ,String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (String -> Int) -> ShowS -> String -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n')
                                             ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. [a] -> [a]
reverse (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ String
text )
                 , csvFieldContent :: String
csvFieldContent = String
text
                 , csvFieldQuoted :: Bool
csvFieldQuoted  = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`String
"\",\n\r") String
text
                 }