Hide this comment

I have been meaning to write a blog entry about this, maybe by March I will. In the meantime, here's a sample that it suggestive about what is possible. Look at the tiny code sample at the bottom. The key is to use operator-question-mark. Feel free to refine/improve this; I plan to, but probably won't have time for a couple months yet.

(See also [link:stackoverflow.com] )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
 

// This code is pretty hacky, but shows a skeleton of what can be done.
type DataType =
    | String
    | Int
    | Float
type ColumnName = string
type Column = ColumnName * DataType    

[<AllowNullLiteral>]
type DynamicRow(data:obj[], columns:Column[]) =
    do assert (data.Length = columns.Length)
    member private this.Columns = columns
    member private this.Data = data
    static member (?)(row:DynamicRow, columnName:string) =
        match row.Columns |> Array.tryFindIndex (fun (name,_) -> name = columnName) with
        | Some i ->
            downcast row.Data.[ i ]
        | None ->
            raise <| new System.InvalidOperationException(sprintf "Column name '%s' is not one of the legal column names for this CSV file" columnName)
        
// See <A href="http://stackoverflow.com/questions/1621250/f-dynamic-lookup-operator-overloading">http://stackoverflow.com/questions/1621250/f-dynamic-lookup-operator-overloading</A>
// regarding a Beta2 bug that requires this workaround function.
let inline (?) (o:^T) (prop:string) : ^U =
    (^T : (static member (?) : ^T * string -> ^U)(o,prop))

type CSVFile private (data:DynamicRow[], columns:Column[]) =
    static member Read(filename:string, ?delim:char) =
        let delim = defaultArg delim ','
        let lines = System.IO.File.ReadAllLines(filename)
        if lines.Length < 2 then
            raise <| new System.InvalidOperationException(sprintf "CSV file '%s' must be at least 2 lines long" filename)
        // read first row to get column names
        let columnNames = lines.[0].Split [|delim|] |> Array.map (fun s -> s.Trim())
        let len = columnNames.Length 
        let BadLineWrongNumColumns(line,numColumns) =
            raise <| new System.InvalidOperationException(sprintf "CSV file '%s', line %d, has %d columns but the file header says %d columns are expected" filename line numColumns len)
        // read second row to infer types
        let firstRowData = lines.[1].Split [|delim|]
        if firstRowData.Length <> len then
            BadLineWrongNumColumns(2,firstRowData.Length)
        let ParseItem(s:string,line,column) =
            let s = s.Trim()
            if s.Length = 0 then (String,box s) else
            if System.Char.IsDigit(s.[0]) then
                if Seq.forall (fun c -> System.Char.IsDigit(c)) s then
                    Int, box(int s)
                elif s |> Seq.fold (fun (ok,seenDot) c -> if System.Char.IsDigit(c) then (ok,seenDot) elif c='.' && not seenDot then (ok,true) else (false,seenDot)) (true,false) |> fst then
                    Float, box(float s)
                else
                    raise <| new System.InvalidOperationException(sprintf "Can't infer type of '%s' on line %d, column %d (starts with digit, but does not look like int or float)" s line column)
            else String, box s
        let firstRowItems = firstRowData |> Array.mapi (fun i s -> ParseItem(s,2,i+1))
        let COLUMNS = Seq.zip columnNames (firstRowItems |> Seq.map fst) |> Seq.toArray 
        let DATA = Array.create (lines.Length-1) null
        DATA.[0] <- new DynamicRow(firstRowItems |> Array.map snd, COLUMNS)
        // read rest of rows
        for i in 2..lines.Length-1 do
            let items = lines.[ i ].Split [|delim|]
            if items.Length <> len then
                BadLineWrongNumColumns(i+1, items.Length)
            let data = items |> Array.mapi (fun j s -> 
                let ty, datum = ParseItem(s,i+1,j+1)
                if ty <> snd COLUMNS.[j] then
                    raise <| new System.InvalidOperationException(sprintf "CSV file line %d column %d was inferred to have type %A but previous rows had inferred this column named '%s' to be type %A" (i+1) (j+1) ty (fst COLUMNS.[j]) (snd COLUMNS.[j]))
                datum)
            DATA.[i-1] <- new DynamicRow(data, COLUMNS)
        new CSVFile(DATA, COLUMNS)                    
    member this.Rows = data.Clone() :?> DynamicRow[]

(* Let Data.txt contain:
Name,         Age,        QBRating
Joe,          28,            119.4
Bob,          31,             57.6
Fred,         32,             99.8
Old Man Ian,  9999,            0.3
*)

let csv = CSVFile.Read("Data.txt")
for row in csv.Rows do
    printfn "%s - %d - %f" row?Name row?Age row?QBRating

By on 12/18/2009 1:01 PM ()Reply
Hide this comment

Thank you Julien and Brian this gets me closer to my dream :) , I ll try and post a first draft soon.

By on 12/31/2009 7:57 AM ()Reply
Hide this comment

You might be interested in [link:filehelpers.sourceforge.net] which "can strong type your flat file (fixed or delimited) simply describing a class that maps to each record and later read/write your file as an strong typed .NET array"

not exactly your "dream", but it may be helpful

By on 12/18/2009 9:44 AM ()Reply
IntelliFactory Offices Copyright (c) 2011-2012 IntelliFactory. All rights reserved.
Home | Products | Consulting | Trainings | Blogs | Jobs | Contact Us | Terms of Use | Privacy Policy | Cookie Policy
Built with WebSharper

Logging in...