open HtmlAgilityPack open System.Text let rec getText (sb:StringBuilder) (node:HtmlNode) : StringBuilder = match node.NodeType with | HtmlNodeType.Document -> node.ChildNodes |> Seq.fold (getText) sb | HtmlNodeType.Text -> let textNode = (node :?> HtmlTextNode) let text = textNode.Text |> HtmlEntity.DeEntitize sb.Append text | HtmlNodeType.Element -> match node.Name with | "p" -> let sb = node.ChildNodes |> Seq.fold getText (sb.AppendLine()) sb.AppendLine() | "li" -> let sb = node.ChildNodes |> Seq.fold (getText) sb sb.AppendLine() | "div" -> node.ChildNodes |> Seq.fold getText (sb.AppendLine()) | "ul" -> let sb = sb.AppendLine() node.ChildNodes |> Seq.fold(fun (stateSb:StringBuilder) node -> let itemSb = new StringBuilder() itemSb.Append("• ") |> ignore let itemSb = getText itemSb node stateSb.Append(itemSb) ) (new StringBuilder()) |> sb.Append | "ol" -> let sb = sb.AppendLine() node.ChildNodes |> Seq.fold(fun ((stateSb:StringBuilder),count) node -> let itemSb = new StringBuilder() itemSb.Append(sprintf "%i. ") |> ignore let itemSb = getText itemSb node stateSb.Append(itemSb), count + 1 ) ((new StringBuilder()),1) |> fst |> sb.Append | _ -> node.ChildNodes |> Seq.fold (getText) sb | HtmlNodeType.Comment -> sb | unknown -> printfn "Unknown value: %A" unknown; sb