From 25cdb9e9d96d4d7abfd5dfd7853a6c919a12457c Mon Sep 17 00:00:00 2001 From: Andreas Mieke Date: Sun, 28 Feb 2016 14:49:38 +0100 Subject: [PATCH] Adding and adjusting RegExps for more episode formats --- config/config.go | 1 + config/gronkh.json.example | 3 ++- gparser/episodeparser.go | 3 +++ gparser/feedparser.go | 3 +++ gparser/main.go | 2 ++ 5 files changed, 11 insertions(+), 1 deletion(-) diff --git a/config/config.go b/config/config.go index 328e489..9b2426b 100644 --- a/config/config.go +++ b/config/config.go @@ -8,6 +8,7 @@ import ( type Config struct { DatabaseConnection string EpisodeRegex string + RemoveRegex string YoutubeKey string GronkhUrl string ImageDirectory string diff --git a/config/gronkh.json.example b/config/gronkh.json.example index fc32122..7284cb4 100644 --- a/config/gronkh.json.example +++ b/config/gronkh.json.example @@ -1,5 +1,6 @@ { - "EpisodeRegex": "^(?:Folge |Folge |Special |Folge S01E|Folge ß|Folge ESIDENT EVIL REVELATIONS 2 \\[|Folge #)([0-9]+)", + "EpisodeRegex": "^(?:Folge |Folge |Special |Folge S01E|Folge ß|Folge ESIDENT EVIL REVELATIONS 2 \\[|Folge #|Folge 1 – Galaktischer Krie\\: |Folge 15; Zurück in der neuen Wel\\: |Folge 6½ \\[German\\] – Dr\\. Kleiners Vortra\\: |Folge ESIDENT EVIL REVELATIONS 2 \\[)([0-9]+)(?:½\\: |\\: | – |; |\\.5\\: |½ \\[German\\] – | Verwirrspiele & Hinterhalt\\: 18 |] |\\. KIPPLASTER RACING Schwadro\\: 95\\. |[,.][0-9a-zA-Z]+\\: | Titanenlord Ursego\\: 41 |[a-zA-Z]\\: )", + "RemoveRegex": "^(?:Folge Special\\: |Folge special\\: | – Let’s Play [1080][PS4\\: SIDENT EVIL REVELATIONS 2 [23] Getrennt..! – Let’s Play [1080][PS4]|Folge pecial\\: )", "YoutubeKey": "FOOOBAAAARRRRRRR", "DatabaseConnection": "host=localhost user=gronkhde dbname=gronkhde sslmode=disable", "GronkhUrl": "http://5.62.67.8", diff --git a/gparser/episodeparser.go b/gparser/episodeparser.go index 36a9b34..da9d1e6 100644 --- a/gparser/episodeparser.go +++ b/gparser/episodeparser.go @@ -98,9 +98,12 @@ func ParseEpisode(i int, s *goquery.Selection) { EP.Episode.Int64 = 0 } EP.Episode.Int64 = int64(num) + EP.Name.String = episodeRegex.ReplaceAllString(EP.Name.String, "") + EP.Name.String = removeRegex.ReplaceAllString(EP.Name.String, "") } else { log.Printf("WAR EP %s: Name does not match RegEx", slug) EP.Episode.Int64 = 0 + EP.Name.String = removeRegex.ReplaceAllString(EP.Name.String, "") } EP.Season.Int64 = 1 doc.Find(".article > p").Each(func(i int, s *goquery.Selection) { diff --git a/gparser/feedparser.go b/gparser/feedparser.go index 038453e..bab46a7 100644 --- a/gparser/feedparser.go +++ b/gparser/feedparser.go @@ -108,9 +108,12 @@ func ParseFeedEpisode(u string) { if match := episodeRegex.FindStringSubmatch(EP.Name.String); len(match) > 0 { num, _ := strconv.Atoi(match[1]) EP.Episode.Int64 = int64(num) + EP.Name.String = episodeRegex.ReplaceAllString(EP.Name.String, "") + EP.Name.String = removeRegex.ReplaceAllString(EP.Name.String, "") } else { log.Printf("WAR RSS %s: Name does not match RegEx", slug) EP.Episode.Int64 = 0 + EP.Name.String = removeRegex.ReplaceAllString(EP.Name.String, "") } doc.Find(".article > p").Each(func(i int, s *goquery.Selection) { EP.Descr.String += s.Text() + "\n" diff --git a/gparser/main.go b/gparser/main.go index 5a55827..1e3b23d 100644 --- a/gparser/main.go +++ b/gparser/main.go @@ -12,6 +12,7 @@ import ( var conf config.Config var episodeRegex *regexp.Regexp +var removeRegex *regexp.Regexp var wg sync.WaitGroup func main() { @@ -28,6 +29,7 @@ func main() { return } episodeRegex = regexp.MustCompile(config.C.EpisodeRegex) + removeRegex = regexp.MustCompile(config.C.RemoveRegex) if err = os.MkdirAll(config.C.ImageDirectory, 0775); err != nil { log.Fatalf("FAT Could not create ImageDirectory, error: %+v", err)