728x90
예전에 만들어봤던 카카오社 채용공고 크롤러이다.
package main
import (
"fmt"
"log"
"net/http"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
func main() {
loc, err := time.LoadLocation("Asia/Seoul")
if err != nil {
panic(err)
}
now := time.Now()
t := now.In(loc)
nowUTCKorea := t.Format("2006-01-02 15:04:05")
countNotice := 0
NoticeURL := "https://careers.kakao.com/index"
resp, err := http.Get(NoticeURL)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
html, err := goquery.NewDocumentFromReader(resp.Body)
wrapper := html.Find("ul.list_gnb")
items := wrapper.Find("div.list_sub")
// 타이틀 URL
items.Each(func(idx int, sel *goquery.Selection) {
menu := sel.Find("div.list_sub").Text()
if strings.Contains(menu, "a.on link_sub fst") {
fmt.Println("새로운 공지사항 메뉴가 올라왔슴!")
fmt.Println(menu)
fmt.Println()
}
})
var Want string
var pick string
fmt.Println("\n********************************")
fmt.Println("* 2020-05-08, 20:28 *")
fmt.Println("* 카카오 채용공고 파싱 Ver.0.2 *")
fmt.Println("* 패치노트를 보려면: PatchNote\n* 실행시키려면 : Go")
fmt.Println("********************************")
fmt.Scanln(&pick)
if pick == "PatchNote" {
fmt.Println("\n2020-05-08, 21 : 40 링크 중복 발생 제거\n")
fmt.Println("How to ver.1.2 == href 파싱")
} else if pick == "Go" {
fmt.Print("\n원하는 키워드 혹은 기술태그를 입력 : ")
fmt.Scanln(&Want)
fmt.Println("\n" + Want + " 에 대한 채용공고를 불러옵니다.\n")
fmt.Println("〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓")
fmt.Println()
for i := 1; i < 21; i++ {
url_Tech := fmt.Sprint("https://careers.kakao.com/jobs?company=ALL&keyword=&page=", i, "&part=TECHNOLOGY&skilset=")
resp, err := http.Get(url_Tech)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
html, err := goquery.NewDocumentFromReader(resp.Body)
wrapper := html.Find("ul.list_notice")
items := wrapper.Find("li")
items.Each(func(idx int, sel *goquery.Selection) {
Skilltag := sel.Find("a.link_tag").Text()
Skill_tag_title := sel.Find("span.txt_tit").Text()
title := sel.Find("a.link_notice").Text()
band, ok := sel.Attr("href")
if ok {
fmt.Printf(band)
}
if strings.Contains(Skilltag, Want) {
fmt.Println(Skill_tag_title)
fmt.Println(url_Tech)
fmt.Println()
countNotice++
} else if strings.Contains(title, Want) {
fmt.Println(title)
fmt.Println(url_Tech)
fmt.Println()
countNotice++
}
})
}
for j := 1; j < 2; j++ {
url_Brand_Marketing := fmt.Sprint("https://careers.kakao.com/jobs?company=ALL&keyword=&page=", j, "&part=BRAND_MARKETING&skilset=")
resp, err := http.Get(url_Brand_Marketing)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
html, err := goquery.NewDocumentFromReader(resp.Body)
wrapper := html.Find("ul.list_notice")
items := wrapper.Find("li")
items.Each(func(idx int, sel *goquery.Selection) {
Skilltag := sel.Find("a.link_tag").Text()
Skill_tag_title := sel.Find("span.txt_tit").Text()
title := sel.Find("a.link_notice").Text()
band, ok := sel.Attr("href")
if ok {
fmt.Printf(band)
}
if strings.Contains(Skilltag, Want) {
fmt.Println(Skill_tag_title)
fmt.Println(url_Brand_Marketing)
fmt.Println()
countNotice++
} else if strings.Contains(title, Want) {
fmt.Println(title)
fmt.Println(url_Brand_Marketing)
fmt.Println()
countNotice++
}
})
}
for j := 1; j < 8; j++ {
url_Service_Business := fmt.Sprint("https://careers.kakao.com/jobs?company=ALL&keyword=&page=", j, "&part=BUSINESS_SERVICES&skilset=")
resp, err := http.Get(url_Service_Business)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
html, err := goquery.NewDocumentFromReader(resp.Body)
wrapper := html.Find("ul.list_notice")
items := wrapper.Find("li")
items.Each(func(idx int, sel *goquery.Selection) {
Skilltag := sel.Find("a.link_tag").Text()
Skill_tag_title := sel.Find("span.txt_tit").Text()
title := sel.Find("a.link_notice").Text()
band, ok := sel.Attr("href")
if ok {
fmt.Printf(band)
}
if strings.Contains(Skilltag, Want) {
fmt.Println(Skill_tag_title)
fmt.Println(url_Service_Business)
fmt.Println()
countNotice++
} else if strings.Contains(title, Want) {
fmt.Println(title)
fmt.Println(url_Service_Business)
fmt.Println()
countNotice++
}
})
}
for j := 1; j < 4; j++ {
url_Staff := fmt.Sprint("https://careers.kakao.com/jobs?company=ALL&keyword=&page=", j, "&part=STAFF&skilset=")
resp, err := http.Get(url_Staff)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
html, err := goquery.NewDocumentFromReader(resp.Body)
wrapper := html.Find("ul.list_notice")
items := wrapper.Find("li")
items.Each(func(idx int, sel *goquery.Selection) {
Skilltag := sel.Find("a.link_tag").Text()
Skill_tag_title := sel.Find("span.txt_tit").Text()
title := sel.Find("a.link_notice").Text()
band, ok := sel.Attr("href")
if ok {
fmt.Printf(band)
}
if strings.Contains(Skilltag, Want) {
fmt.Println(Skill_tag_title)
fmt.Println(url_Staff)
fmt.Println()
countNotice++
} else if strings.Contains(title, Want) {
fmt.Println(title)
fmt.Println(url_Staff)
fmt.Println()
countNotice++
}
})
}
fmt.Println("〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓")
fmt.Println(nowUTCKorea, " 기준")
fmt.Println("현재 모집중인 공고는 ", countNotice, "건 입니다.")
fmt.Println("Enter를 누르면 화면이 종료 됩니다.")
fmt.Scanln()
}
}
728x90