728x90

예전에 만들어봤던 카카오社 채용공고 크롤러이다.

 

package main

import (
	"fmt"
	"log"
	"net/http"
	"strings"
	"time"

	"github.com/PuerkitoBio/goquery"
)

func main() {

	loc, err := time.LoadLocation("Asia/Seoul")
	if err != nil {
		panic(err)
	}
	now := time.Now()
	t := now.In(loc)

	nowUTCKorea := t.Format("2006-01-02 15:04:05")

	countNotice := 0

	NoticeURL := "https://careers.kakao.com/index"
	resp, err := http.Get(NoticeURL)

	if err != nil {
		log.Fatal(err)
	}
	defer resp.Body.Close()

	html, err := goquery.NewDocumentFromReader(resp.Body)
	wrapper := html.Find("ul.list_gnb")
	items := wrapper.Find("div.list_sub")

	//  타이틀 URL
	items.Each(func(idx int, sel *goquery.Selection) {

		menu := sel.Find("div.list_sub").Text()

		if strings.Contains(menu, "a.on link_sub fst") {
			fmt.Println("새로운 공지사항 메뉴가 올라왔슴!")
			fmt.Println(menu)
			fmt.Println()
		}
	})

	var Want string
	var pick string
	fmt.Println("\n********************************")
	fmt.Println("* 2020-05-08, 20:28            *")
	fmt.Println("* 카카오 채용공고 파싱 Ver.0.2 *")
	fmt.Println("* 패치노트를 보려면: PatchNote\n* 실행시키려면 : Go")
	fmt.Println("********************************")
	fmt.Scanln(&pick)
	if pick == "PatchNote" {
		fmt.Println("\n2020-05-08, 21 : 40 링크 중복 발생 제거\n")
		fmt.Println("How to ver.1.2 == href 파싱")
	} else if pick == "Go" {

		fmt.Print("\n원하는 키워드 혹은 기술태그를 입력 : ")
		fmt.Scanln(&Want)

		fmt.Println("\n" + Want + " 에 대한 채용공고를 불러옵니다.\n")
		fmt.Println("〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓")
		fmt.Println()

		for i := 1; i < 21; i++ {
			url_Tech := fmt.Sprint("https://careers.kakao.com/jobs?company=ALL&keyword=&page=", i, "&part=TECHNOLOGY&skilset=")

			resp, err := http.Get(url_Tech)

			if err != nil {
				log.Fatal(err)
			}
			defer resp.Body.Close()

			html, err := goquery.NewDocumentFromReader(resp.Body)
			wrapper := html.Find("ul.list_notice")
			items := wrapper.Find("li")

			items.Each(func(idx int, sel *goquery.Selection) {

				Skilltag := sel.Find("a.link_tag").Text()

				Skill_tag_title := sel.Find("span.txt_tit").Text()

				title := sel.Find("a.link_notice").Text()

				band, ok := sel.Attr("href")
				if ok {
					fmt.Printf(band)
				}

				if strings.Contains(Skilltag, Want) {
					fmt.Println(Skill_tag_title)
					fmt.Println(url_Tech)
					fmt.Println()
					countNotice++
				} else if strings.Contains(title, Want) {
					fmt.Println(title)
					fmt.Println(url_Tech)
					fmt.Println()
					countNotice++
				}

			})
		}
		for j := 1; j < 2; j++ {
			url_Brand_Marketing := fmt.Sprint("https://careers.kakao.com/jobs?company=ALL&keyword=&page=", j, "&part=BRAND_MARKETING&skilset=")

			resp, err := http.Get(url_Brand_Marketing)

			if err != nil {
				log.Fatal(err)
			}
			defer resp.Body.Close()

			html, err := goquery.NewDocumentFromReader(resp.Body)
			wrapper := html.Find("ul.list_notice")
			items := wrapper.Find("li")

			items.Each(func(idx int, sel *goquery.Selection) {

				Skilltag := sel.Find("a.link_tag").Text()

				Skill_tag_title := sel.Find("span.txt_tit").Text()

				title := sel.Find("a.link_notice").Text()

				band, ok := sel.Attr("href")
				if ok {
					fmt.Printf(band)
				}

				if strings.Contains(Skilltag, Want) {
					fmt.Println(Skill_tag_title)
					fmt.Println(url_Brand_Marketing)
					fmt.Println()
					countNotice++
				} else if strings.Contains(title, Want) {
					fmt.Println(title)
					fmt.Println(url_Brand_Marketing)
					fmt.Println()
					countNotice++
				}
			})
		}

		for j := 1; j < 8; j++ {
			url_Service_Business := fmt.Sprint("https://careers.kakao.com/jobs?company=ALL&keyword=&page=", j, "&part=BUSINESS_SERVICES&skilset=")

			resp, err := http.Get(url_Service_Business)

			if err != nil {
				log.Fatal(err)
			}
			defer resp.Body.Close()

			html, err := goquery.NewDocumentFromReader(resp.Body)
			wrapper := html.Find("ul.list_notice")
			items := wrapper.Find("li")

			items.Each(func(idx int, sel *goquery.Selection) {

				Skilltag := sel.Find("a.link_tag").Text()

				Skill_tag_title := sel.Find("span.txt_tit").Text()

				title := sel.Find("a.link_notice").Text()

				band, ok := sel.Attr("href")
				if ok {
					fmt.Printf(band)
				}

				if strings.Contains(Skilltag, Want) {
					fmt.Println(Skill_tag_title)
					fmt.Println(url_Service_Business)
					fmt.Println()
					countNotice++
				} else if strings.Contains(title, Want) {
					fmt.Println(title)
					fmt.Println(url_Service_Business)
					fmt.Println()
					countNotice++
				}
			})
		}

		for j := 1; j < 4; j++ {
			url_Staff := fmt.Sprint("https://careers.kakao.com/jobs?company=ALL&keyword=&page=", j, "&part=STAFF&skilset=")

			resp, err := http.Get(url_Staff)

			if err != nil {
				log.Fatal(err)
			}
			defer resp.Body.Close()

			html, err := goquery.NewDocumentFromReader(resp.Body)
			wrapper := html.Find("ul.list_notice")
			items := wrapper.Find("li")

			items.Each(func(idx int, sel *goquery.Selection) {

				Skilltag := sel.Find("a.link_tag").Text()

				Skill_tag_title := sel.Find("span.txt_tit").Text()

				title := sel.Find("a.link_notice").Text()

				band, ok := sel.Attr("href")
				if ok {
					fmt.Printf(band)
				}

				if strings.Contains(Skilltag, Want) {
					fmt.Println(Skill_tag_title)
					fmt.Println(url_Staff)
					fmt.Println()
					countNotice++
				} else if strings.Contains(title, Want) {
					fmt.Println(title)
					fmt.Println(url_Staff)
					fmt.Println()
					countNotice++
				}
			})
		}

		fmt.Println("〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓〓")
		fmt.Println(nowUTCKorea, " 기준")
		fmt.Println("현재 모집중인 공고는 ", countNotice, "건 입니다.")

		fmt.Println("Enter를 누르면 화면이 종료 됩니다.")
		fmt.Scanln()
	}
}
728x90
복사했습니다!