2 people like it.

log parsing.

basically an extension of http://msdn.microsoft.com/en-us/magazine/gg983490.aspx I can't remember why I wrote it now, it should be more generic than MSDN's version.

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
110: 
111: 
112: 
113: 
114: 
115: 
116: 
117: 
118: 
119: 
120: 
// Currently, only uniformed data is supported.
// meaning lines must be seperated by line break or line feed. 
// each line must have the same format, like a1,b1,c1,d1\na2,b2,c2,d2\n; 
 
open System
open System.IO

let validate (xs : string) minlen maxlen defaultArg =
  if xs.Length >= minlen && xs.Length <= maxlen then xs
  else defaultArg

// experimenting with arg parsing.
// doesn't catch all conditions 
// - Sting.empty for a file name will exception later-
[<Struct>]
type userArgs = 
  val file   : string
  val delim  : char
  val filter : string
  val count  : int
  new (file,delim,filter,count) = 
    let file   = validate file   1 128 String.Empty
    let delim  = validate delim  1 1 ";"
    let filter = validate filter 1 128 String.Empty
    let count  = validate count  1 1 "10"
    { file=file; delim=char delim; filter=filter; count=int count }

let readFileOf (data : userArgs) = seq {
  use fs = new StreamReader(File.OpenRead data.file)
  while not fs.EndOfStream do
    let line = fs.ReadLine()
    if not <| line.StartsWith data.filter then
      yield line.Split data.delim
}

// generic map reduce poached from msdn magazine
let reduceData xs = 
  Seq.fold (fun (acc : Map<_,_>) (data, num) ->
    if   Map.containsKey data   acc then
         Map.add         data ( acc.[data] + num) acc
    else Map.add         data 1 acc)
         Map.empty xs

let maybe f x = try Some <| f x with _ -> None

// not convinced of my question \ answer stuff, but could be worse
let rec askLineChoice lines =
  let printLinesWithNumber : string seq -> unit =
    Seq.iteri (printfn "[#%d] %s")

  let questionAnswer q =
    printf "[+] %s: " q; stdin.ReadLine()

  let validateIntAnswer (answer : string) =
    match maybe int answer with
      Some d -> d >= 0 | _ -> false
      
  lines |> Seq.nth 1 |> printLinesWithNumber
  
  let answer = questionAnswer "Enter integer choice #"
  let valid  = validateIntAnswer answer
  if not valid then askLineChoice lines
  else int answer, lines

// missing entries are treated as exceptions. 
// this will slow stuff down a lot if there are many missing entries
let maybeGetLine (choice,lines) =
  let f k line =
    match maybe (Array.get line) choice with
      Some b -> b,1
    | None   ->
      printfn "[!] missing %d entry from line %d" choice k
      String.Empty, 0
      
  lines |> Seq.mapi f

let cutFileAndReduce = 
  readFileOf >> askLineChoice >> maybeGetLine >> reduceData
   
// Errorless Take, default take causes exception if take < length
// however sequences are lazy, so we only find out about
// the exception as we are evaluating it, which is messy to deal with.
// instead, since we dont care too much about if we cant print top 50 out
// of a set of 20, we just take whatever we can.
let takeOf count (xs : 'a seq) = 
  if count <= 0 then Seq.empty else  
  seq {
    use e = xs.GetEnumerator() 
    for i in 0 .. count - 1 do
      if e.MoveNext() then yield e.Current }
    
let displayTopN (data : userArgs) =
  Map.toSeq
  >> Seq.sortBy (fun (x,y) -> -y)
  >> takeOf data.count

let printTopEntries data =
  let printResults xs =
    printfn "\ncount\t\tdata"
    xs |> Seq.iter (fun (x,freq) ->
          Console.WriteLine("{0}\t-\t{1,3}",freq,x))
        
  cutFileAndReduce data
  |> displayTopN data
  |> printResults 

let exitWithError () =
  fprintfn stderr "log.exe <file> <delim> <ignoreStartChar> <count>"
  fprintfn stderr "e.g. log.exe c:\\blah.log # , 15"
  exit -1

let validateUserArgs = function
   [| file; delim; skip; count |] ->
      userArgs (file=file,delim=delim,filter=skip,count=count)
  | _                             -> exitWithError ()

[<EntryPoint>]
let main argv =
  validateUserArgs argv |> printTopEntries
  0
namespace System
namespace System.IO
val validate : xs:string -> minlen:int -> maxlen:int -> defaultArg:string -> string

Full name: Script.validate
val xs : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
val minlen : int
val maxlen : int
val defaultArg : string
property String.Length: int
Multiple items
type StructAttribute =
  inherit Attribute
  new : unit -> StructAttribute

Full name: Microsoft.FSharp.Core.StructAttribute

--------------------
new : unit -> StructAttribute
Multiple items
type userArgs =
  struct
    new : file:string * delim:string * filter:string * count:string -> userArgs
    val file: string
    val delim: char
    val filter: string
    val count: int
  end

Full name: Script.userArgs

--------------------
userArgs()
new : file:string * delim:string * filter:string * count:string -> userArgs
userArgs.file: string
userArgs.delim: char
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = Char

Full name: Microsoft.FSharp.Core.char
userArgs.filter: string
userArgs.count: int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val file : string
val delim : string
val filter : string
val count : string
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
field string.Empty
val readFileOf : data:userArgs -> seq<string []>

Full name: Script.readFileOf
val data : userArgs
Multiple items
val seq : sequence:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Core.Operators.seq

--------------------
type seq<'T> = Collections.Generic.IEnumerable<'T>

Full name: Microsoft.FSharp.Collections.seq<_>
val fs : StreamReader
Multiple items
type StreamReader =
  inherit TextReader
  new : stream:Stream -> StreamReader + 9 overloads
  member BaseStream : Stream
  member Close : unit -> unit
  member CurrentEncoding : Encoding
  member DiscardBufferedData : unit -> unit
  member EndOfStream : bool
  member Peek : unit -> int
  member Read : unit -> int + 1 overload
  member ReadLine : unit -> string
  member ReadToEnd : unit -> string
  ...

Full name: System.IO.StreamReader

--------------------
StreamReader(stream: Stream) : unit
StreamReader(path: string) : unit
StreamReader(stream: Stream, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(stream: Stream, encoding: Text.Encoding) : unit
StreamReader(path: string, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(path: string, encoding: Text.Encoding) : unit
StreamReader(stream: Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(stream: Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
StreamReader(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
type File =
  static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
  static member AppendAllText : path:string * contents:string -> unit + 1 overload
  static member AppendText : path:string -> StreamWriter
  static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
  static member Create : path:string -> FileStream + 3 overloads
  static member CreateText : path:string -> StreamWriter
  static member Decrypt : path:string -> unit
  static member Delete : path:string -> unit
  static member Encrypt : path:string -> unit
  static member Exists : path:string -> bool
  ...

Full name: System.IO.File
File.OpenRead(path: string) : FileStream
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
property StreamReader.EndOfStream: bool
val line : string
StreamReader.ReadLine() : string
String.StartsWith(value: string) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: Globalization.CultureInfo) : bool
String.Split([<ParamArray>] separator: char []) : string []
String.Split(separator: string [], options: StringSplitOptions) : string []
String.Split(separator: char [], options: StringSplitOptions) : string []
String.Split(separator: char [], count: int) : string []
String.Split(separator: string [], count: int, options: StringSplitOptions) : string []
String.Split(separator: char [], count: int, options: StringSplitOptions) : string []
val reduceData : xs:seq<'a * int> -> Map<'a,int> (requires comparison)

Full name: Script.reduceData
val xs : seq<'a * int> (requires comparison)
module Seq

from Microsoft.FSharp.Collections
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State

Full name: Microsoft.FSharp.Collections.Seq.fold
val acc : Map<'a,int> (requires comparison)
Multiple items
module Map

from Microsoft.FSharp.Collections

--------------------
type Map<'Key,'Value (requires comparison)> =
  interface IEnumerable
  interface IComparable
  interface IEnumerable<KeyValuePair<'Key,'Value>>
  interface ICollection<KeyValuePair<'Key,'Value>>
  interface IDictionary<'Key,'Value>
  new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
  member Add : key:'Key * value:'Value -> Map<'Key,'Value>
  member ContainsKey : key:'Key -> bool
  override Equals : obj -> bool
  member Remove : key:'Key -> Map<'Key,'Value>
  ...

Full name: Microsoft.FSharp.Collections.Map<_,_>

--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
val data : 'a (requires comparison)
val num : int
val containsKey : key:'Key -> table:Map<'Key,'T> -> bool (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.containsKey
val add : key:'Key -> value:'T -> table:Map<'Key,'T> -> Map<'Key,'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.add
val empty<'Key,'T (requires comparison)> : Map<'Key,'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.empty
val maybe : f:('a -> 'b) -> x:'a -> 'b option

Full name: Script.maybe
val f : ('a -> 'b)
val x : 'a
union case Option.Some: Value: 'T -> Option<'T>
union case Option.None: Option<'T>
val askLineChoice : lines:'a -> int * 'a (requires 'a :> seq<'b> and 'b :> seq<string>)

Full name: Script.askLineChoice
val lines : #seq<'b> (requires 'b :> seq<string>)
val printLinesWithNumber : (seq<string> -> unit)
type unit = Unit

Full name: Microsoft.FSharp.Core.unit
val iteri : action:(int -> 'T -> unit) -> source:seq<'T> -> unit

Full name: Microsoft.FSharp.Collections.Seq.iteri
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val questionAnswer : (string -> string)
val q : string
val printf : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printf
val stdin<'T> : TextReader

Full name: Microsoft.FSharp.Core.Operators.stdin
val validateIntAnswer : (string -> bool)
val answer : string
val d : int
val nth : index:int -> source:seq<'T> -> 'T

Full name: Microsoft.FSharp.Collections.Seq.nth
val valid : bool
val maybeGetLine : choice:int * lines:seq<string []> -> seq<string * int>

Full name: Script.maybeGetLine
val choice : int
val lines : seq<string []>
val f : (int -> string [] -> string * int)
val k : int
val line : string []
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val get : array:'T [] -> index:int -> 'T

Full name: Microsoft.FSharp.Collections.Array.get
val b : string
val mapi : mapping:(int -> 'T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.mapi
val cutFileAndReduce : (userArgs -> Map<string,int>)

Full name: Script.cutFileAndReduce
val takeOf : count:int -> xs:seq<'a> -> seq<'a>

Full name: Script.takeOf
val count : int
val xs : seq<'a>
val empty<'T> : seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.empty
val e : Collections.Generic.IEnumerator<'a>
Collections.Generic.IEnumerable.GetEnumerator() : Collections.Generic.IEnumerator<'a>
val i : int
Collections.IEnumerator.MoveNext() : bool
property Collections.Generic.IEnumerator.Current: 'a
val displayTopN : data:userArgs -> (Map<'a,int> -> seq<'a * int>) (requires comparison)

Full name: Script.displayTopN
val toSeq : table:Map<'Key,'T> -> seq<'Key * 'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.toSeq
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortBy
val x : 'a (requires comparison)
val y : int
val printTopEntries : data:userArgs -> unit

Full name: Script.printTopEntries
val printResults : (seq<'a * 'b> -> unit)
val xs : seq<'a * 'b>
val iter : action:('T -> unit) -> source:seq<'T> -> unit

Full name: Microsoft.FSharp.Collections.Seq.iter
val freq : 'b
type Console =
  static member BackgroundColor : ConsoleColor with get, set
  static member Beep : unit -> unit + 1 overload
  static member BufferHeight : int with get, set
  static member BufferWidth : int with get, set
  static member CapsLock : bool
  static member Clear : unit -> unit
  static member CursorLeft : int with get, set
  static member CursorSize : int with get, set
  static member CursorTop : int with get, set
  static member CursorVisible : bool with get, set
  ...

Full name: System.Console
Console.WriteLine() : unit
   (+0 other overloads)
Console.WriteLine(value: string) : unit
   (+0 other overloads)
Console.WriteLine(value: obj) : unit
   (+0 other overloads)
Console.WriteLine(value: uint64) : unit
   (+0 other overloads)
Console.WriteLine(value: int64) : unit
   (+0 other overloads)
Console.WriteLine(value: uint32) : unit
   (+0 other overloads)
Console.WriteLine(value: int) : unit
   (+0 other overloads)
Console.WriteLine(value: float32) : unit
   (+0 other overloads)
Console.WriteLine(value: float) : unit
   (+0 other overloads)
Console.WriteLine(value: decimal) : unit
   (+0 other overloads)
val exitWithError : unit -> 'a

Full name: Script.exitWithError
val fprintfn : textWriter:TextWriter -> format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.fprintfn
val stderr<'T> : TextWriter

Full name: Microsoft.FSharp.Core.Operators.stderr
val exit : exitcode:int -> 'T

Full name: Microsoft.FSharp.Core.Operators.exit
val validateUserArgs : _arg1:string [] -> userArgs

Full name: Script.validateUserArgs
val skip : string
Multiple items
type EntryPointAttribute =
  inherit Attribute
  new : unit -> EntryPointAttribute

Full name: Microsoft.FSharp.Core.EntryPointAttribute

--------------------
new : unit -> EntryPointAttribute
val main : argv:string [] -> int

Full name: Script.main
val argv : string []
Raw view Test code New version

More information

Link:http://fssnip.net/iL
Posted:11 years ago
Author:David Klein
Tags: log , map reduce , user input