package main
import (
"bufio"
"errors"
"html"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/jbowtie/gokogiri"
gokogirixml "github.com/jbowtie/gokogiri/xml"
"github.com/lunny/html2md"
"github.com/microcosm-cc/bluemonday"
"mvdan.cc/xurls/v2"
)
//FBPostData FBPostData
type FBPostData struct {
PostURL string
TimeStamp string
ProfileLink *ProfileLink
GiftURL string
ImageURL string
Content string
Summary string
Title string
Author string
Tags string
}
// ParsePost ParsePost
func ParsePost(s, PostURL string) (*FBPostData, error) {
fb := FBPostData{PostURL: PostURL}
docKogiri, err := gokogiri.ParseHtml([]byte(s))
if err != nil {
return &fb, err
}
defer docKogiri.Free()
htmlNode := docKogiri.Root().FirstChild()
//doc, err := goquery.NewDocumentFromReader(strings.NewReader(s))
//if err != nil {
// return &fb, err
//}
fb.TimeStamp, err = GetTimeStamp(htmlNode)
if err != nil {
return &fb, err
}
//fmt.Printf("\n\n%#v\n\n", dataFT)
//fb.ProfileLink, err = GetProfileLink(doc)
//if err != nil {
// return &fb, err
//}
fb.ImageURL, err = GetImageURL(htmlNode)
if err != nil {
return &fb, err
}
fb.GiftURL, err = GetGiftURL(htmlNode)
if err != nil {
return &fb, err
}
fb.PostURL, err = GetPostURL(htmlNode)
if err != nil {
fb.PostURL = PostURL
return &fb, err
}
fb.Content, err = GetContent(htmlNode)
if err != nil {
return &fb, err
}
if strings.Contains(fb.Content, FreeSilverCaskets) {
if len(fb.Title) == 0 {
fb.Title = FreeSilverCaskets
} else {
fb.Title = FreeSilverCaskets + "+" + fb.Title
}
}
if strings.Contains(fb.Content, FreeTitanArtifact) {
if len(fb.Title) == 0 {
fb.Title = FreeTitanArtifact
} else {
fb.Title = FreeTitanArtifact + "+" + fb.Title
}
}
if strings.Contains(fb.Content, FreeSoulStones) {
if len(fb.Title) == 0 {
fb.Title = FreeSoulStones
} else {
fb.Title = FreeSoulStones + "+" + fb.Title
}
}
if strings.Contains(fb.Content, FreeSkinStones) {
if len(fb.Title) == 0 {
fb.Title = FreeSkinStones
} else {
fb.Title = FreeSkinStones + "+" + fb.Title
}
}
if strings.Contains(fb.Content, ActionKeepTheAmount) {
if len(fb.Title) == 0 {
fb.Title = ActionKeepTheAmount
} else {
fb.Title = ActionKeepTheAmount + "+" + fb.Title
}
fb.GiftURL = fb.PostURL
}
if strings.Contains(fb.Content, FreeWinterfestBaubles) {
if len(fb.Title) == 0 {
fb.Title = FreeWinterfestBaubles
} else {
fb.Title = FreeWinterfestBaubles + "+" + fb.Title
}
}
if strings.Contains(fb.Content, FreeTopFanPackage) {
if len(fb.Title) == 0 {
fb.Title = FreeTopFanPackage
} else {
fb.Title = FreeTopFanPackage + "+" + fb.Title
}
}
if strings.Contains(fb.Content, FreeEnergyForFee) {
if len(fb.Title) == 0 {
fb.Title = FreeEnergyForFee
} else {
fb.Title = FreeEnergyForFee + "+" + fb.Title
}
}
if strings.Contains(fb.Content, WinterfestRankingRewards) {
if len(fb.Title) == 0 {
fb.Title = WinterfestRankingRewards
} else {
fb.Title = WinterfestRankingRewards + "+" + fb.Title
}
}
if len(fb.Title) == 0 {
fb.Title = "unknown - need to implemented"
}
defer func() {
dataFT = nil
}()
return &fb, nil
}
// Parse Parse
func Parse(url string) (*FBPostData, error) {
doc, err := goquery.NewDocument(url)
if err != nil {
return nil, err
}
if strings.Contains(url, ".blogspot.") {
return ParseBlogspotPost(doc)
}
// If not login, post looks like
//
s := QuerySelector(doc, "div.hidden_elem > code")
cmt, err := s.Html()
if err != nil {
return nil, err
}
if len(cmt) != 0 {
return ParsePost(cmt, url)
}
s = QuerySelector(doc, "div._427x")
cmt, err = s.Html()
if err != nil {
return nil, err
}
return ParsePost(cmt, url)
}
// ParseAll ParseAll
func ParseAll(url string) ([]*FBPostData, error) {
doc, err := goquery.NewDocument(url)
if err != nil {
return nil, err
}
allFbPosts := []*FBPostData{}
QuerySelectorEach(doc, "div._427x", func(i int, selected *goquery.Selection) {
cmt, err := selected.Html()
if err != nil {
return
}
fbPost, err := ParsePost(cmt, url)
if err != nil {
return
}
allFbPosts = append(allFbPosts, fbPost)
})
return allFbPosts, nil
}
// GetContent GetContent
func GetContent(htmlNode gokogirixml.Node) (string, error) {
results, err := htmlNode.Search(xPathArticleContent)
if err != nil {
//fmt.Printf("ERR: %#v -- %#v\n", results, err)
return "", err
}
//fmt.Printf("ERR: %#v -- %#v\n", results, err)
if len(results) > 0 {
if resultHTML := results[0].InnerHtml(); resultHTML != "" {
bmsanizer := bluemonday.StrictPolicy()
bmsanizer.AllowAttrs("href").OnElements("a")
bmsanizer.AllowElements("p")
bmsanizer.RequireParseableURLs(true)
htmlText := bmsanizer.SanitizeBytes([]byte(resultHTML))
content := strings.Join(strings.Fields(string(htmlText)), " ")
content = html2md.Convert(content)
content = html.UnescapeString(content)
content = strings.TrimSpace(content)
return content, nil
}
}
return "", errors.New("no content found")
}
// GetImageURL GetImageURL
func GetImageURL(htmlNode gokogirixml.Node) (string, error) {
results, err := htmlNode.Search(xPathImageURL)
if err != nil {
//fmt.Printf("ERR: %#v -- %#v\n", results, err)
return "", err
}
//fmt.Printf("RESULT: %#v -- %#v\n", results, err)
// search for the second possible image
if len(results) == 0 {
results, err = htmlNode.Search(xPathImageURL2)
if err != nil {
//fmt.Printf("ERR2: %#v -- %#v\n", results, err)
return "", err
}
}
// Video Preview
if len(results) == 0 {
results, err = htmlNode.Search(xPathImageURL3)
if err != nil {
//fmt.Printf("ERR2: %#v -- %#v\n", results, err)
return "", err
}
}
//fmt.Printf("RESULT2: %#v -- %#v\n", results, err)
if len(results) > 0 {
if attrib := results[0].Attribute("style"); attrib != nil {
styleContent := attrib.Value()
codedImageURL := regexStyleImage.FindString(styleContent)
genURL := strings.ReplaceAll(codedImageURL, `\3a `, `:`)
genURL = strings.ReplaceAll(genURL, `\3d `, `=`)
genURL = strings.ReplaceAll(genURL, `\26 `, `&`)
genURL = strings.ReplaceAll(genURL, `\25 `, `%`)
genURL = strings.TrimPrefix(genURL, `url('`)
genURL = strings.TrimSuffix(genURL, `');`)
return genURL, nil
}
}
return "", errors.New("cannot find image url")
//s := QuerySelector(doc, "img.scaledImageFitHeight")
//if s.Length() == 0 {
// s = QuerySelector(doc, "img.scaledImageFitWidth")
//}
//
//url, ok := s.Attr("src")
//if !ok {
// return "", errors.New("cannot find image url")
//}
//
//return url, nil
}
// GetGiftURL GetGiftURL
func GetGiftURL(htmlNode gokogirixml.Node) (string, error) {
//s := QuerySelector(doc, "div._6ks > a")
//
//url, ok := s.Attr("href")
results, err := htmlNode.Search(xPathGiftURL)
if err != nil {
// search for the second possible image
results, err = htmlNode.Search(xPathImageURL2)
if err != nil {
return "", err
}
}
var genURL string
var attrib *gokogirixml.AttributeNode
if len(results) > 0 {
attrib = results[0].Attribute("href")
if attrib != nil {
tmpGiftURL := attrib.Value()
tmpGiftURL = html.UnescapeString(tmpGiftURL)
tmpGiftURL2, err := url.Parse(tmpGiftURL)
if err != nil {
attrib = nil
goto nextGiftLinkChecker
}
//fmt.Printf("%#v\n", tmpGiftURL2)
newQuery := url.Values{}
oldQuery := tmpGiftURL2.Query()
newQuery.Set("nx_source", oldQuery.Get("nx_source"))
newQuery.Set("gift_id", oldQuery.Get("gift_id"))
tmpURLGen := url.URL{
Scheme: tmpGiftURL2.Scheme,
Host: tmpGiftURL2.Host,
Path: tmpGiftURL2.Path,
RawQuery: newQuery.Encode(),
}
if tmpURLGen.Scheme != "https" {
tmpURLGen.Scheme = "https"
}
genURL = tmpURLGen.String()
}
}
nextGiftLinkChecker:
if attrib == nil {
results, err := htmlNode.Search(xPathArticleContent)
if err != nil {
return "", err
}
if len(results) > 0 {
//TODO: Fix and check all URLs
rxRelaxed := xurls.Relaxed()
genURL = rxRelaxed.FindString(results[0].Content())
}
}
if len(genURL) == 0 {
return "", errors.New("cannot find gift url")
}
//fmt.Printf("%#v\n", genURL)
resultURL, err := ExpandURL2(genURL, fbGameURL)
if err != nil {
return "", errors.New("cannot find gift url - ExpandURL2")
}
return resultURL, nil
}
// GetPostURL GetPostURL
func GetPostURL(htmlNode gokogirixml.Node) (string, error) {
//s := QuerySelector(doc, "a._5pcq")
//
//url, ok := s.Attr("href")
//if !ok {
// //TODO: Fix and check all URLs
// rxRelaxed := xurls.Relaxed()
// url = rxRelaxed.FindString(doc.Text())
//
// if len(url) == 0 {
// return "", errors.New("cannot find post url")
// }
//} else {
// url = "https://www.facebook.com" + url
//}
results, err := htmlNode.Search(xPathPostingURL)
if err != nil {
return "", err
}
var genURL string
var attrib *gokogirixml.AttributeNode
if len(results) > 0 {
attrib = results[0].Attribute("href")
//fmt.Printf("%#v\n\n", attrib)
if attrib != nil {
tmpPostURL := attrib.Value()
//fmt.Printf("%#v\n", tmpPostURL)
tmpPostURL = html.UnescapeString(tmpPostURL)
//fmt.Printf("%#v\n", tmpPostURL)
tmpPostURL2, err := url.Parse(tmpPostURL)
//fmt.Printf("%#v -- %#v\n", tmpPostURL2, err)
if err != nil {
attrib = nil
goto nextPostLinkChecker
}
//fmt.Printf("%#v\n", tmpPostURL2)
newQuery := url.Values{}
oldQuery := tmpPostURL2.Query()
newQuery.Set("nx_source", oldQuery.Get("nx_source"))
newQuery.Set("gift_id", oldQuery.Get("gift_id"))
tmpURLGen := url.URL{
Scheme: tmpPostURL2.Scheme,
Host: tmpPostURL2.Host,
Path: `herowarsgame/posts/` + dataFT.TopLevelPostID,
}
if tmpURLGen.Scheme != "https" {
tmpURLGen.Scheme = "https"
}
if tmpURLGen.Host != "www.facebook.com" {
tmpURLGen.Host = "www.facebook.com"
}
// dataFT.
genURL = tmpURLGen.String()
}
//fmt.Printf("GENURL: %#v\n", genURL)
}
nextPostLinkChecker:
resultURL, err := ExpandURL2(genURL, fbPageURL)
if err != nil {
return "", errors.New("cannot find post url - ExpandURL2")
}
return resultURL, nil
}
type object interface {
Find(string) *goquery.Selection
}
// QuerySelector QuerySelector
func QuerySelector(s object, selector string) *goquery.Selection {
return s.Find(selector).First()
}
// QuerySelectorEach QuerySelectorEach
func QuerySelectorEach(s object, selector string, mf func(i int, selection *goquery.Selection)) *goquery.Selection {
return s.Find(selector).Each(mf)
}
// ParseTimeStamp ParseTimeStamp
func ParseTimeStamp(utime int64) (string, error) {
t := time.Unix(utime, 0)
return t.Format(time.RFC3339), nil
}
// GetTimeStamp GetTimeStamp
func GetTimeStamp(htmlNode gokogirixml.Node) (string, error) {
//s := QuerySelector(doc, "._5ptz.timestamp.livetimestamp")
//s := QuerySelector(doc, "abbr._5ptz")
results, err := htmlNode.Search(xPathArticle)
if err != nil {
return "", err
}
if attrib := results[0].Attribute("data-ft"); attrib != nil {
json := attrib.Value()
normalJSON := html.UnescapeString(json)
dataFTinternal, err := UnmarshalDataFT([]byte(normalJSON))
dataFT = &dataFTinternal
if err != nil {
return "", err
}
pid := dataFTinternal.PageID
return ParseTimeStamp(dataFTinternal.PageInsights[pid].PostContext.PublishTime)
}
return "", errors.New("cannot find timestamp")
}
// ProfileLink ProfileLink
type ProfileLink struct {
Name string
URL string
}
// GetProfileLink GetProfileLink
func GetProfileLink(doc *goquery.Document) (*ProfileLink, error) {
s := QuerySelector(doc, "a.profileLink")
if s.Length() == 0 {
s = QuerySelector(doc, "span.fwb.fcg > a")
}
pl := ProfileLink{}
pl.Name = s.Text()
if pl.Name == "" {
return nil, errors.New("cannot find name of profile link")
}
url, ok := s.Attr("href")
if !ok {
return nil, errors.New("cannot find url of profile link")
}
pl.URL = url
return &pl, nil
}
// GetBlogspotTimeStamp GetBlogspotTimeStamp
func GetBlogspotTimeStamp(doc *goquery.Document) (string, error) {
abbr := QuerySelector(doc, "a.timestamp-link > abbr")
t, ok := abbr.Attr("title")
if ok {
return t, nil
}
return "", errors.New("cannot find timestamp")
}
// GetBlogspotTitle GetBlogspotTitle
func GetBlogspotTitle(doc *goquery.Document) (string, error) {
t := QuerySelector(doc, "h3.post-title")
return strings.TrimSpace(t.Text()), nil
}
//GetBlogspotContent GetBlogspotContent
func GetBlogspotContent(doc *goquery.Document) (string, error) {
c := QuerySelector(doc, "div.post-body")
s, err := c.Html()
if err != nil {
return "", err
}
var lines []string
scanner := bufio.NewScanner(strings.NewReader(s))
for scanner.Scan() {
lines = append(lines, " "+scanner.Text())
}
if err := scanner.Err(); err != nil {
return "", err
}
return strings.Join(lines, "\n"), nil
}
//GetBlogspotURL GetBlogspotURL
func GetBlogspotURL(doc *goquery.Document) (string, error) {
meta := QuerySelector(doc, "meta[property='og:url']")
u, ok := meta.Attr("content")
if ok {
return u, nil
}
return "", errors.New("cannot find url")
}
// GetBlogspotSummary GetBlogspotSummary
func GetBlogspotSummary(doc *goquery.Document) (string, error) {
meta := QuerySelector(doc, "meta[property='og:description']")
d, ok := meta.Attr("content")
if ok {
return strings.TrimSpace(d), nil
}
return "", errors.New("cannot find summary")
}
// GetBlogspotAuthor GetBlogspotAuthor
func GetBlogspotAuthor(doc *goquery.Document) (string, error) {
a := QuerySelector(doc, "span.post-author > span.fn")
return a.Text(), nil
}
//GetBlogspotTags GetBlogspotTags
func GetBlogspotTags(doc *goquery.Document) (string, error) {
s := doc.Find("span.post-labels > a")
labels := ""
s.Each(func(_ int, l *goquery.Selection) {
if labels != "" {
labels += ", "
}
labels += l.Text()
})
return labels, nil
}
// ParseBlogspotPost ParseBlogspotPost
func ParseBlogspotPost(doc *goquery.Document) (*FBPostData, error) {
bs := FBPostData{}
var err error
bs.TimeStamp, err = GetBlogspotTimeStamp(doc)
if err != nil {
return &bs, err
}
bs.Title, err = GetBlogspotTitle(doc)
if err != nil {
return &bs, err
}
bs.Content, err = GetBlogspotContent(doc)
if err != nil {
return &bs, err
}
bs.PostURL, err = GetBlogspotURL(doc)
if err != nil {
return &bs, err
}
bs.Summary, err = GetBlogspotSummary(doc)
if err != nil {
return &bs, err
}
bs.Author, err = GetBlogspotAuthor(doc)
if err != nil {
return &bs, err
}
bs.Tags, err = GetBlogspotTags(doc)
if err != nil {
return &bs, err
}
return &bs, nil
}