first commit
This commit is contained in:
691
facebook.go
Normal file
691
facebook.go
Normal file
@@ -0,0 +1,691 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"html"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/jbowtie/gokogiri"
|
||||
gokogirixml "github.com/jbowtie/gokogiri/xml"
|
||||
"github.com/lunny/html2md"
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"mvdan.cc/xurls/v2"
|
||||
)
|
||||
|
||||
// xPath constants
|
||||
const (
|
||||
xPathArticle string = `(//*/article[contains(concat(" ", normalize-space(@class)," "), " _55wo ") and contains(concat(" ", normalize-space(@class)," "), " _5rgr ")])[1]`
|
||||
xPathArticleContent string = xPathArticle + `/div/div[contains(concat(" ", normalize-space(@class)," "), " _5rgt ") and contains(concat(" ", normalize-space(@class)," "), " _5nk5 ")]/span`
|
||||
xPathImageURL string = xPathArticle + `/div/div[2]/div[1]/a/div/div/i`
|
||||
xPathImageURL2 string = xPathArticle + `/div/div[2]/section/div/i`
|
||||
xPathImageURL3 string = xPathArticle + `/div/div[2]/section/div/div/i` // Video Preview
|
||||
xPathGiftURL string = xPathArticle + `/div/div[2]/section/a`
|
||||
xPathPostingURL string = xPathArticle + `/div/header/div[2]/div/div/div[1]/div/a`
|
||||
)
|
||||
|
||||
// internal URL constants
|
||||
const (
|
||||
fbPageURL string = "https://www.facebook.com/herowarsgame/"
|
||||
webhookLive string = "https://discordapp.com/api/webhooks/..."
|
||||
webhookLiveExcelsior string = "https://discordapp.com/api/webhooks/..."
|
||||
webhookDev string = "https://discordapp.com/api/webhooks/..."
|
||||
|
||||
// fbGameURL for search on expaned urls
|
||||
fbGameURL string = "apps.facebook.com/mobaheroes"
|
||||
)
|
||||
|
||||
var (
|
||||
dataFT *DataFT
|
||||
|
||||
lastUpdatePosted string
|
||||
regexStyleImage = regexp.MustCompile(`(?m)url\((.*)\);`)
|
||||
)
|
||||
|
||||
// Constants for all search for title of the Postings
|
||||
const (
|
||||
FreeTitanArtifact string = "FREE Titan Artifact"
|
||||
FreeSilverCaskets string = "FREE Silver Caskets"
|
||||
FreeSoulStones string = "Soul Stones"
|
||||
FreeSkinStones string = "Skin Stones"
|
||||
ActionKeepTheAmount string = "Keep the amount"
|
||||
FreeWinterfestBaubles string = "Winterfest Baubles"
|
||||
FreeTopFanPackage string = "Top Fan"
|
||||
FreeEnergyForFee string = "ENERGY FOR FREE"
|
||||
WinterfestRankingRewards string = "Winterfest ranking rewards"
|
||||
)
|
||||
|
||||
//FBPostData FBPostData
|
||||
type FBPostData struct {
|
||||
PostURL string
|
||||
TimeStamp string
|
||||
ProfileLink *ProfileLink
|
||||
GiftURL string
|
||||
ImageURL string
|
||||
Content string
|
||||
Summary string
|
||||
Title string
|
||||
Author string
|
||||
Tags string
|
||||
}
|
||||
|
||||
// ParsePost ParsePost
|
||||
func ParsePost(s, PostURL string) (*FBPostData, error) {
|
||||
fb := FBPostData{PostURL: PostURL}
|
||||
|
||||
docKogiri, err := gokogiri.ParseHtml([]byte(s))
|
||||
if err != nil {
|
||||
return &fb, err
|
||||
}
|
||||
defer docKogiri.Free()
|
||||
htmlNode := docKogiri.Root().FirstChild()
|
||||
|
||||
//doc, err := goquery.NewDocumentFromReader(strings.NewReader(s))
|
||||
//if err != nil {
|
||||
// return &fb, err
|
||||
//}
|
||||
|
||||
fb.TimeStamp, err = GetTimeStamp(htmlNode)
|
||||
if err != nil {
|
||||
return &fb, err
|
||||
}
|
||||
|
||||
//fmt.Printf("\n\n%#v\n\n", dataFT)
|
||||
|
||||
//fb.ProfileLink, err = GetProfileLink(doc)
|
||||
//if err != nil {
|
||||
// return &fb, err
|
||||
//}
|
||||
|
||||
fb.ImageURL, err = GetImageURL(htmlNode)
|
||||
if err != nil {
|
||||
return &fb, err
|
||||
}
|
||||
|
||||
fb.GiftURL, err = GetGiftURL(htmlNode)
|
||||
if err != nil {
|
||||
return &fb, err
|
||||
}
|
||||
|
||||
fb.PostURL, err = GetPostURL(htmlNode)
|
||||
if err != nil {
|
||||
fb.PostURL = PostURL
|
||||
return &fb, err
|
||||
}
|
||||
|
||||
fb.Content, err = GetContent(htmlNode)
|
||||
if err != nil {
|
||||
return &fb, err
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, FreeSilverCaskets) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = FreeSilverCaskets
|
||||
} else {
|
||||
fb.Title = FreeSilverCaskets + "+" + fb.Title
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, FreeTitanArtifact) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = FreeTitanArtifact
|
||||
} else {
|
||||
fb.Title = FreeTitanArtifact + "+" + fb.Title
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, FreeSoulStones) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = FreeSoulStones
|
||||
} else {
|
||||
fb.Title = FreeSoulStones + "+" + fb.Title
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, FreeSkinStones) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = FreeSkinStones
|
||||
} else {
|
||||
fb.Title = FreeSkinStones + "+" + fb.Title
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, ActionKeepTheAmount) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = ActionKeepTheAmount
|
||||
} else {
|
||||
fb.Title = ActionKeepTheAmount + "+" + fb.Title
|
||||
}
|
||||
fb.GiftURL = fb.PostURL
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, FreeWinterfestBaubles) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = FreeWinterfestBaubles
|
||||
} else {
|
||||
fb.Title = FreeWinterfestBaubles + "+" + fb.Title
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, FreeTopFanPackage) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = FreeTopFanPackage
|
||||
} else {
|
||||
fb.Title = FreeTopFanPackage + "+" + fb.Title
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, FreeEnergyForFee) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = FreeEnergyForFee
|
||||
} else {
|
||||
fb.Title = FreeEnergyForFee + "+" + fb.Title
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(fb.Content, WinterfestRankingRewards) {
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = WinterfestRankingRewards
|
||||
} else {
|
||||
fb.Title = WinterfestRankingRewards + "+" + fb.Title
|
||||
}
|
||||
}
|
||||
|
||||
if len(fb.Title) == 0 {
|
||||
fb.Title = "unknown - need to implemented"
|
||||
}
|
||||
defer func() {
|
||||
dataFT = nil
|
||||
}()
|
||||
|
||||
return &fb, nil
|
||||
}
|
||||
|
||||
// Parse Parse
|
||||
func Parse(url string) (*FBPostData, error) {
|
||||
doc, err := goquery.NewDocument(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if strings.Contains(url, ".blogspot.") {
|
||||
return ParseBlogspotPost(doc)
|
||||
}
|
||||
|
||||
// If not login, post looks like
|
||||
// <div class="hidden_elem"><code id="u_0_p"><!-- ... --></code></div>
|
||||
s := QuerySelector(doc, "div.hidden_elem > code")
|
||||
cmt, err := s.Html()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(cmt) != 0 {
|
||||
return ParsePost(cmt, url)
|
||||
}
|
||||
|
||||
s = QuerySelector(doc, "div._427x")
|
||||
cmt, err = s.Html()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ParsePost(cmt, url)
|
||||
}
|
||||
|
||||
// ParseAll ParseAll
|
||||
func ParseAll(url string) ([]*FBPostData, error) {
|
||||
doc, err := goquery.NewDocument(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allFbPosts := []*FBPostData{}
|
||||
|
||||
QuerySelectorEach(doc, "div._427x", func(i int, selected *goquery.Selection) {
|
||||
cmt, err := selected.Html()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
fbPost, err := ParsePost(cmt, url)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
allFbPosts = append(allFbPosts, fbPost)
|
||||
})
|
||||
|
||||
return allFbPosts, nil
|
||||
}
|
||||
|
||||
// GetContent GetContent
|
||||
func GetContent(htmlNode gokogirixml.Node) (string, error) {
|
||||
results, err := htmlNode.Search(xPathArticleContent)
|
||||
if err != nil {
|
||||
//fmt.Printf("ERR: %#v -- %#v\n", results, err)
|
||||
return "", err
|
||||
}
|
||||
//fmt.Printf("ERR: %#v -- %#v\n", results, err)
|
||||
if len(results) > 0 {
|
||||
if resultHTML := results[0].InnerHtml(); resultHTML != "" {
|
||||
|
||||
bmsanizer := bluemonday.StrictPolicy()
|
||||
bmsanizer.AllowAttrs("href").OnElements("a")
|
||||
bmsanizer.AllowElements("p")
|
||||
bmsanizer.RequireParseableURLs(true)
|
||||
htmlText := bmsanizer.SanitizeBytes([]byte(resultHTML))
|
||||
|
||||
content := strings.Join(strings.Fields(string(htmlText)), " ")
|
||||
content = html2md.Convert(content)
|
||||
content = html.UnescapeString(content)
|
||||
content = strings.TrimSpace(content)
|
||||
|
||||
return content, nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("no content found")
|
||||
}
|
||||
|
||||
// GetImageURL GetImageURL
|
||||
func GetImageURL(htmlNode gokogirixml.Node) (string, error) {
|
||||
|
||||
results, err := htmlNode.Search(xPathImageURL)
|
||||
if err != nil {
|
||||
//fmt.Printf("ERR: %#v -- %#v\n", results, err)
|
||||
return "", err
|
||||
}
|
||||
//fmt.Printf("RESULT: %#v -- %#v\n", results, err)
|
||||
// search for the second possible image
|
||||
if len(results) == 0 {
|
||||
results, err = htmlNode.Search(xPathImageURL2)
|
||||
if err != nil {
|
||||
//fmt.Printf("ERR2: %#v -- %#v\n", results, err)
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
// Video Preview
|
||||
if len(results) == 0 {
|
||||
results, err = htmlNode.Search(xPathImageURL3)
|
||||
if err != nil {
|
||||
//fmt.Printf("ERR2: %#v -- %#v\n", results, err)
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
//fmt.Printf("RESULT2: %#v -- %#v\n", results, err)
|
||||
if len(results) > 0 {
|
||||
if attrib := results[0].Attribute("style"); attrib != nil {
|
||||
styleContent := attrib.Value()
|
||||
codedImageURL := regexStyleImage.FindString(styleContent)
|
||||
genURL := strings.ReplaceAll(codedImageURL, `\3a `, `:`)
|
||||
genURL = strings.ReplaceAll(genURL, `\3d `, `=`)
|
||||
genURL = strings.ReplaceAll(genURL, `\26 `, `&`)
|
||||
genURL = strings.ReplaceAll(genURL, `\25 `, `%`)
|
||||
genURL = strings.TrimPrefix(genURL, `url('`)
|
||||
genURL = strings.TrimSuffix(genURL, `');`)
|
||||
return genURL, nil
|
||||
|
||||
}
|
||||
}
|
||||
return "", errors.New("cannot find image url")
|
||||
//s := QuerySelector(doc, "img.scaledImageFitHeight")
|
||||
//if s.Length() == 0 {
|
||||
// s = QuerySelector(doc, "img.scaledImageFitWidth")
|
||||
//}
|
||||
//
|
||||
//url, ok := s.Attr("src")
|
||||
//if !ok {
|
||||
// return "", errors.New("cannot find image url")
|
||||
//}
|
||||
//
|
||||
//return url, nil
|
||||
}
|
||||
|
||||
// GetGiftURL GetGiftURL
|
||||
func GetGiftURL(htmlNode gokogirixml.Node) (string, error) {
|
||||
|
||||
//s := QuerySelector(doc, "div._6ks > a")
|
||||
//
|
||||
//url, ok := s.Attr("href")
|
||||
|
||||
results, err := htmlNode.Search(xPathGiftURL)
|
||||
if err != nil {
|
||||
// search for the second possible image
|
||||
results, err = htmlNode.Search(xPathImageURL2)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
var genURL string
|
||||
var attrib *gokogirixml.AttributeNode
|
||||
if len(results) > 0 {
|
||||
attrib = results[0].Attribute("href")
|
||||
if attrib != nil {
|
||||
tmpGiftURL := attrib.Value()
|
||||
tmpGiftURL = html.UnescapeString(tmpGiftURL)
|
||||
tmpGiftURL2, err := url.Parse(tmpGiftURL)
|
||||
if err != nil {
|
||||
attrib = nil
|
||||
goto nextGiftLinkChecker
|
||||
}
|
||||
|
||||
//fmt.Printf("%#v\n", tmpGiftURL2)
|
||||
|
||||
newQuery := url.Values{}
|
||||
oldQuery := tmpGiftURL2.Query()
|
||||
newQuery.Set("nx_source", oldQuery.Get("nx_source"))
|
||||
newQuery.Set("gift_id", oldQuery.Get("gift_id"))
|
||||
|
||||
tmpURLGen := url.URL{
|
||||
Scheme: tmpGiftURL2.Scheme,
|
||||
Host: tmpGiftURL2.Host,
|
||||
Path: tmpGiftURL2.Path,
|
||||
RawQuery: newQuery.Encode(),
|
||||
}
|
||||
if tmpURLGen.Scheme != "https" {
|
||||
tmpURLGen.Scheme = "https"
|
||||
}
|
||||
genURL = tmpURLGen.String()
|
||||
}
|
||||
}
|
||||
nextGiftLinkChecker:
|
||||
if attrib == nil {
|
||||
results, err := htmlNode.Search(xPathArticleContent)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(results) > 0 {
|
||||
//TODO: Fix and check all URLs
|
||||
rxRelaxed := xurls.Relaxed()
|
||||
genURL = rxRelaxed.FindString(results[0].Content())
|
||||
}
|
||||
}
|
||||
|
||||
if len(genURL) == 0 {
|
||||
return "", errors.New("cannot find gift url")
|
||||
}
|
||||
//fmt.Printf("%#v\n", genURL)
|
||||
resultURL, err := ExpandURL2(genURL, fbGameURL)
|
||||
if err != nil {
|
||||
return "", errors.New("cannot find gift url - ExpandURL2")
|
||||
}
|
||||
|
||||
return resultURL, nil
|
||||
}
|
||||
|
||||
// GetPostURL GetPostURL
|
||||
func GetPostURL(htmlNode gokogirixml.Node) (string, error) {
|
||||
|
||||
//s := QuerySelector(doc, "a._5pcq")
|
||||
//
|
||||
//url, ok := s.Attr("href")
|
||||
//if !ok {
|
||||
// //TODO: Fix and check all URLs
|
||||
// rxRelaxed := xurls.Relaxed()
|
||||
// url = rxRelaxed.FindString(doc.Text())
|
||||
//
|
||||
// if len(url) == 0 {
|
||||
// return "", errors.New("cannot find post url")
|
||||
// }
|
||||
//} else {
|
||||
// url = "https://www.facebook.com" + url
|
||||
//}
|
||||
results, err := htmlNode.Search(xPathPostingURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var genURL string
|
||||
var attrib *gokogirixml.AttributeNode
|
||||
if len(results) > 0 {
|
||||
attrib = results[0].Attribute("href")
|
||||
//fmt.Printf("%#v\n\n", attrib)
|
||||
if attrib != nil {
|
||||
tmpPostURL := attrib.Value()
|
||||
//fmt.Printf("%#v\n", tmpPostURL)
|
||||
tmpPostURL = html.UnescapeString(tmpPostURL)
|
||||
//fmt.Printf("%#v\n", tmpPostURL)
|
||||
tmpPostURL2, err := url.Parse(tmpPostURL)
|
||||
//fmt.Printf("%#v -- %#v\n", tmpPostURL2, err)
|
||||
if err != nil {
|
||||
attrib = nil
|
||||
goto nextPostLinkChecker
|
||||
}
|
||||
|
||||
//fmt.Printf("%#v\n", tmpPostURL2)
|
||||
|
||||
newQuery := url.Values{}
|
||||
oldQuery := tmpPostURL2.Query()
|
||||
newQuery.Set("nx_source", oldQuery.Get("nx_source"))
|
||||
newQuery.Set("gift_id", oldQuery.Get("gift_id"))
|
||||
|
||||
tmpURLGen := url.URL{
|
||||
Scheme: tmpPostURL2.Scheme,
|
||||
Host: tmpPostURL2.Host,
|
||||
Path: `herowarsgame/posts/` + dataFT.TopLevelPostID,
|
||||
}
|
||||
if tmpURLGen.Scheme != "https" {
|
||||
tmpURLGen.Scheme = "https"
|
||||
}
|
||||
if tmpURLGen.Host != "www.facebook.com" {
|
||||
tmpURLGen.Host = "www.facebook.com"
|
||||
}
|
||||
|
||||
// dataFT.
|
||||
|
||||
genURL = tmpURLGen.String()
|
||||
}
|
||||
//fmt.Printf("GENURL: %#v\n", genURL)
|
||||
}
|
||||
nextPostLinkChecker:
|
||||
|
||||
resultURL, err := ExpandURL2(genURL, fbPageURL)
|
||||
if err != nil {
|
||||
return "", errors.New("cannot find post url - ExpandURL2")
|
||||
}
|
||||
|
||||
return resultURL, nil
|
||||
}
|
||||
|
||||
type object interface {
|
||||
Find(string) *goquery.Selection
|
||||
}
|
||||
|
||||
// QuerySelector QuerySelector
|
||||
func QuerySelector(s object, selector string) *goquery.Selection {
|
||||
return s.Find(selector).First()
|
||||
}
|
||||
|
||||
// QuerySelectorEach QuerySelectorEach
|
||||
func QuerySelectorEach(s object, selector string, mf func(i int, selection *goquery.Selection)) *goquery.Selection {
|
||||
return s.Find(selector).Each(mf)
|
||||
}
|
||||
|
||||
// ParseTimeStamp ParseTimeStamp
|
||||
func ParseTimeStamp(utime int64) (string, error) {
|
||||
t := time.Unix(utime, 0)
|
||||
return t.Format(time.RFC3339), nil
|
||||
}
|
||||
|
||||
// GetTimeStamp GetTimeStamp
|
||||
func GetTimeStamp(htmlNode gokogirixml.Node) (string, error) {
|
||||
//s := QuerySelector(doc, "._5ptz.timestamp.livetimestamp")
|
||||
//s := QuerySelector(doc, "abbr._5ptz")
|
||||
|
||||
results, err := htmlNode.Search(xPathArticle)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if attrib := results[0].Attribute("data-ft"); attrib != nil {
|
||||
json := attrib.Value()
|
||||
normalJSON := html.UnescapeString(json)
|
||||
dataFTinternal, err := UnmarshalDataFT([]byte(normalJSON))
|
||||
|
||||
dataFT = &dataFTinternal
|
||||
|
||||
if err != nil {
|
||||
|
||||
return "", err
|
||||
}
|
||||
pid := dataFTinternal.PageID
|
||||
return ParseTimeStamp(dataFTinternal.PageInsights[pid].PostContext.PublishTime)
|
||||
|
||||
}
|
||||
|
||||
return "", errors.New("cannot find timestamp")
|
||||
}
|
||||
|
||||
// ProfileLink ProfileLink
|
||||
type ProfileLink struct {
|
||||
Name string
|
||||
URL string
|
||||
}
|
||||
|
||||
// GetProfileLink GetProfileLink
|
||||
func GetProfileLink(doc *goquery.Document) (*ProfileLink, error) {
|
||||
s := QuerySelector(doc, "a.profileLink")
|
||||
if s.Length() == 0 {
|
||||
s = QuerySelector(doc, "span.fwb.fcg > a")
|
||||
}
|
||||
|
||||
pl := ProfileLink{}
|
||||
|
||||
pl.Name = s.Text()
|
||||
if pl.Name == "" {
|
||||
return nil, errors.New("cannot find name of profile link")
|
||||
}
|
||||
|
||||
url, ok := s.Attr("href")
|
||||
if !ok {
|
||||
return nil, errors.New("cannot find url of profile link")
|
||||
}
|
||||
pl.URL = url
|
||||
|
||||
return &pl, nil
|
||||
}
|
||||
|
||||
// GetBlogspotTimeStamp GetBlogspotTimeStamp
|
||||
func GetBlogspotTimeStamp(doc *goquery.Document) (string, error) {
|
||||
abbr := QuerySelector(doc, "a.timestamp-link > abbr")
|
||||
t, ok := abbr.Attr("title")
|
||||
if ok {
|
||||
return t, nil
|
||||
}
|
||||
|
||||
return "", errors.New("cannot find timestamp")
|
||||
}
|
||||
|
||||
// GetBlogspotTitle GetBlogspotTitle
|
||||
func GetBlogspotTitle(doc *goquery.Document) (string, error) {
|
||||
t := QuerySelector(doc, "h3.post-title")
|
||||
return strings.TrimSpace(t.Text()), nil
|
||||
}
|
||||
|
||||
//GetBlogspotContent GetBlogspotContent
|
||||
func GetBlogspotContent(doc *goquery.Document) (string, error) {
|
||||
c := QuerySelector(doc, "div.post-body")
|
||||
|
||||
s, err := c.Html()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var lines []string
|
||||
|
||||
scanner := bufio.NewScanner(strings.NewReader(s))
|
||||
for scanner.Scan() {
|
||||
lines = append(lines, " "+scanner.Text())
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.Join(lines, "\n"), nil
|
||||
}
|
||||
|
||||
//GetBlogspotURL GetBlogspotURL
|
||||
func GetBlogspotURL(doc *goquery.Document) (string, error) {
|
||||
meta := QuerySelector(doc, "meta[property='og:url']")
|
||||
u, ok := meta.Attr("content")
|
||||
if ok {
|
||||
return u, nil
|
||||
}
|
||||
|
||||
return "", errors.New("cannot find url")
|
||||
}
|
||||
|
||||
// GetBlogspotSummary GetBlogspotSummary
|
||||
func GetBlogspotSummary(doc *goquery.Document) (string, error) {
|
||||
meta := QuerySelector(doc, "meta[property='og:description']")
|
||||
d, ok := meta.Attr("content")
|
||||
if ok {
|
||||
return strings.TrimSpace(d), nil
|
||||
}
|
||||
|
||||
return "", errors.New("cannot find summary")
|
||||
}
|
||||
|
||||
// GetBlogspotAuthor GetBlogspotAuthor
|
||||
func GetBlogspotAuthor(doc *goquery.Document) (string, error) {
|
||||
a := QuerySelector(doc, "span.post-author > span.fn")
|
||||
return a.Text(), nil
|
||||
}
|
||||
|
||||
//GetBlogspotTags GetBlogspotTags
|
||||
func GetBlogspotTags(doc *goquery.Document) (string, error) {
|
||||
s := doc.Find("span.post-labels > a")
|
||||
labels := ""
|
||||
s.Each(func(_ int, l *goquery.Selection) {
|
||||
if labels != "" {
|
||||
labels += ", "
|
||||
}
|
||||
labels += l.Text()
|
||||
})
|
||||
return labels, nil
|
||||
}
|
||||
|
||||
// ParseBlogspotPost ParseBlogspotPost
|
||||
func ParseBlogspotPost(doc *goquery.Document) (*FBPostData, error) {
|
||||
bs := FBPostData{}
|
||||
var err error
|
||||
|
||||
bs.TimeStamp, err = GetBlogspotTimeStamp(doc)
|
||||
if err != nil {
|
||||
return &bs, err
|
||||
}
|
||||
|
||||
bs.Title, err = GetBlogspotTitle(doc)
|
||||
if err != nil {
|
||||
return &bs, err
|
||||
}
|
||||
|
||||
bs.Content, err = GetBlogspotContent(doc)
|
||||
if err != nil {
|
||||
return &bs, err
|
||||
}
|
||||
|
||||
bs.PostURL, err = GetBlogspotURL(doc)
|
||||
if err != nil {
|
||||
return &bs, err
|
||||
}
|
||||
|
||||
bs.Summary, err = GetBlogspotSummary(doc)
|
||||
if err != nil {
|
||||
return &bs, err
|
||||
}
|
||||
|
||||
bs.Author, err = GetBlogspotAuthor(doc)
|
||||
if err != nil {
|
||||
return &bs, err
|
||||
}
|
||||
|
||||
bs.Tags, err = GetBlogspotTags(doc)
|
||||
if err != nil {
|
||||
return &bs, err
|
||||
}
|
||||
|
||||
return &bs, nil
|
||||
}
|
||||
Reference in New Issue
Block a user