파이썬 톺아보기 (5_1)

1. 네이버 급상승 검색어 크롤링

In [1]:
import requests
from bs4 import BeautifulSoup
res = requests.get('https://datalab.naver.com/keyword/realtimeList.naver')

# BeautifulSoup를 이용해 파싱
soup = BeautifulSoup(res.content, 'html.parser')
soup
Out[1]:
<!DOCTYPE HTML>

<html lang="ko">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<meta content="width=device-width,initial-scale=1.0,maximum-scale=1.0,minimum-scale=1.0,user-scalable=no" name="viewport"/>
<meta content="네이버 :: 서비스에 접속할 수 없습니다." lang="ko" name="description"/>
<title>[접근 오류] 서비스에 접속할 수 없습니다.</title>
<style type="text/css">
        /* PC */
        body,p,h1,h2,h3,h4,h5,h6,ul,ol,li,dl,dt,dd,table,th,td,form,fieldset,legend,input,textarea,button,select{margin:0;padding:0}
        body,input,textarea,select,button,table{font-size:12px;font-family:'굴림',Gulim,helvetica,sans-serif;color:#424242}
        body,html{height:100%}
        img,fieldset{border:0}
        img{vertical-align:middle}
        ul,ol{list-style:none}
        em,address{font-style:normal}
        a{color:#000;text-decoration:none}
        legend{display:none}
        hr{display:none !important}
        #u_skip{position:relative;width:100%;z-index:10}
        #u_skip a{position:absolute;top:-40px;left:0;width:auto;padding:0 10px;border:1px solid #4ec53d;background-color:#000;color:#fff;line-height:38px;white-space:nowrap;opacity:0;-webkit-transition:0.3s;transition:0.3s}
        #u_skip a:active, #u_skip a:focus{top:0;text-decoration:none;opacity:1}
        .wrap{width:600px;margin:0 auto}
        .header{overflow:hidden;padding-top:30px}
        .header .logo{float:left}
        .header .logo .logo_link{display:inline-block;vertical-align:top}
        .header .logo .logo_link img{width:90px;height:16px}
        .header .nav{float:right;margin-top:9px}
        .header .nav .nav_link:hover{text-decoration:underline}
        .header .nav .nav_link+.nav_link:before{content:'';display:inline-block;width:1px;height:10px;margin:0 9px 0 5px;background-color:#d6d6d6;vertical-align:0}
        .container{margin-top:66px}
        .container .content{overflow:hidden}
        .container .image_area{float:right;width:280px}
        .container .image_area .image_link{display:block}
        .container .image_area .image_author{overflow:hidden;display:block;text-align:center;white-space:nowrap;text-overflow:ellipsis;font-size:12px;line-height:17px;color:#8f8f8f}
        .container .image_area+.info_area{overflow:hidden;text-align:left}
        .container .info_area{margin-bottom:26px;text-align:center}
        .container .info_area .info_txt{margin-top:29px}
        .container .info_area .info_txt .tit{font-size:22px;line-height:25px}
        .container .info_area .info_txt .txt{margin-top:18px;line-height:20px}
        .container .info_area .info_link{margin-top:33px}
        .container .info_area .info_link [class^="link_"]{overflow:hidden;display:inline-block;width:101px;height:34px;border:1px solid #e3e6e7;background-color:#fafbfc;line-height:34px;text-align:center;vertical-align:top;white-space:nowrap}
        .container .info_area .info_link .link_home{margin-left:10px}
        .footer{margin-top:93px;padding:18px 0 24px;border-top:1px solid #eff3f6}
        .footer address, .footer .link_naver{text-align:center;font-family:verdana,sans-serif;font-size:11px;line-height:14px;color:#8f8f8f}
        /* 모바일 */
        body.mobile, body.mobile input,textarea,select,button,table{font-family:-apple-system,BlinkMacSystemFont,helvetica,Apple SD Gothic Neo,sans-serif;letter-spacing:-0.3px}
        body.mobile{background-color:#fafbfc;-webkit-text-size-adjust:none}
        body.mobile .wrap{width:100%}
        body.mobile .header{padding-top:15px;text-align:center}
        body.mobile .header .logo{float:none}
        body.mobile .header .logo .logo_link img{width:82px;height:15px}
        body.mobile .header .nav{display:none}
        body.mobile .container{margin-top:38px}
        body.mobile .container .content{position:relative}
        body.mobile .container .image_area{position:absolute;bottom:72px;left:50%;margin-left:-140px}
        body.mobile .container .image_area+.info_area{margin:0}
        body.mobile .container .image_area+.info_area .info_link{margin-top:280px}
        body.mobile .container .info_area{margin:40px 0 161px 0;text-align:center}
        body.mobile .container .info_area .info_txt{margin-top:0}
        body.mobile .container .info_area .info_txt .tit{font-size:26px;line-height:29px;color:#000}
        body.mobile .container .info_area .info_txt .txt{margin-top:13px;font-size:17px;line-height:25px;color:#666}
        body.mobile .container .info_area .info_link{margin-top:79px}
        body.mobile .container .info_area .info_link [class^="link_"]{width:139px;height:42px;background-color:#fff;font-size:15px;line-height:42px;color:#424242}
        body.mobile .container .info_area .info_link .link_home{margin-left:-1px}
        body.mobile .footer{margin-top:66px;border-top:0}
        body.mobile .footer address span{display:none}
    </style>
</head>
<!-- 모바일 노출시 .mobile 추가 -->
<body>
<div class="u_skip" id="u_skip">
<a href="#content">본문 바로가기</a>
</div>
<div class="wrap">
<div class="header" role="banner">
<h1 class="logo"><a class="logo_link" href="https://www.naver.com"><img alt="네이버" height="16" src="https://ssl.pstatic.net/static.datalab/202010150900/img/naver_logo.png" width="90"/></a></h1>
<div class="nav" role="navigation">
<a class="nav_link" href="https://www.naver.com">네이버홈</a>
<a class="nav_link" href="https://help.naver.com/support/service/main.nhn?serviceNo=14493">네이버 데이터랩 고객센터</a>
</div>
</div>
<hr/>
<div class="container" role="main">
<div class="content" id="content">
<div class="image_area _errorImage">
</div>
<div class="info_area">
<div class="info_txt">
<strong class="tit">다시 한번 확인해주세요!</strong>
<p class="txt">
                        지금 입력하신 주소의 페이지는<br/>
                        사라졌거나 다른 페이지로 변경되었습니다.<br/>
                        주소를 다시 확인해주세요.
                    </p>
</div>
<div class="info_link">
<a class="link_prev" href="javascript:history.back()">이전 페이지</a><a class="link_home" href="https://www.naver.com">네이버 홈</a>
</div>
</div>
</div>
</div>
<hr/>
<div class="footer" role="contentinfo">
<address>
<span>Copyright</span> ©<a class="link_naver" href="http://www.navercorp.com" target="_blank">NAVER Corp.</a> <span>All Rights Reserved.</span>
</address>
</div>
</div>
<script src="https://ssl.pstatic.net/static/fe/grafolio.js"></script>
</body>
</html>
  • 서비스에 접속할 수 없다?
    사용자 정보를 나타내는 것들을 넣어서 마치 정상적인 브라우저로 접속하는 것처럼 보이게 호출!
In [2]:
import requests
from bs4 import BeautifulSoup

headers = {
    'accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding' : 'gzip, deflate, br',
    'accept-language' : 'ko,en;q=0.9,ja;q=0.8',
    'cache-control' : 'max-age=0',
    'cookie': 'NNB=2ZG2CVQMD5GV4; ASID=deee73dd00000170ce3314680000005b; _fbp=fb.1.1584619869900.376728336; NID_JKL=oMG66OMrQM7cXVt8hxSxw7S0/CK8JUnarDuKGWvBHiA=; NRTK=ag#20s_gr#0_ma#-2_si#-2_en#-2_sp#-2; NDARK=N; _ga_4BKHBFKFK0=GS1.1.1600765502.3.1.1600765593.60; NID_AUT=vISybQ57uXiG5iIoiCDIbQQjSMdkO3+gEGvolmcwzpQOEpc3w/+rfcduxQvGZ751; page_uid=UHttlsprvmZssahtXP8ssssstgo-522361; _gid=GA1.2.507020079.1603432529; IMAGE_BANNER_MAIL=2020-10-19%2010%3A05%3A47; _ga=GA1.1.1874910356.1583131331; _ga_7VKFYR6RV1=GS1.1.1603438306.79.0.1603438306.60; NID_SES=AAABmWfXxQTnxjWvMs81FpTL2tnyCBiM+2PhOdqg3Y33unj6sb08R7kt5IYxQ/rNEfn+XVoAKQhkwDZFhVQ15usoH8Da1cHWgBNaPb9MAdHEmNXvo31aQuLINAOW6wx+mGFWRzc80aTEJG2CG/nLTL0dMevb0rZPCRkdvhnbNgPsze2bOUVsPDxaegnyXK8QdBdvMQaWaspr2xta72j1eY8VVpqj+0tnpDQzkBZxr4vHz4Q0eBfdXVh27mWdqOymioZQmlz+pGq12haZNjFZJQY/mV03tsuTMd+gRzUc0zRA4xqRuC/iwCYvnf6nWTtSeXuMdbevXRF69ezsGqOwH5Mc7NzTXFvll2+bf3ynNk3xM51yYnEBKnlQJdtCiEBCHo2lW++AgdXcEd6XAjydbvY94bv4BKbCPaiNjNvK/aYYv3BDECToSjo3fT2wo2zq5zOyh+qZHiSl0hAsX3OIUr8SFndX97PvabvwzbUC9JrSS8UbT1tZowFTfPYs7hMgdd3uuG+3DjzO6VKLoT5w5ZoIdkgPUs18RYd5Bqee3D9Z2xGC; BMR=s=1603504123862&r=https%3A%2F%2Fpost.naver.com%2Fviewer%2FpostView.nhn%3FvolumeNo%3D21936501%26memberNo%3D5661329&r2=https%3A%2F%2Fwww.google.com%2F; _datalab_cid=50000000',
    'sec-fetch-dest' : 'document',
    'sec-fetch-mode' : 'navigate',
    'sec-fetch-site' : 'none',
    'sec-fetch-user' : '?1',
    'upgrade-insecure-requests' : '1',
    'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'
    
}

# 작은 따옴표 혹은 큰 따옴표로 스트링 잘 되어 있고 맨 뒤에 콤마까지 있는지 확인!
# 쭉 드래그해서 shift + " 하면 한번에 됨

res = requests.get('https://datalab.naver.com/keyword/realtimeList.naver', headers=headers)
# headers 값을 임의로 만들어서 파싱할 때 함께 보내 줌!
In [3]:
# BeautifulSoup를 이용해 파싱
soup = BeautifulSoup(res.content, 'html.parser')
soup
Out[3]:
<!DOCTYPE html>

<html lang="ko">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=1200" name="viewport"/>
<title>급상승검색어 : 네이버 데이터랩</title>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/css/datalab.css" rel="stylesheet" type="text/css"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/css/graph.css" rel="stylesheet" type="text/css"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/img/favicon/android_legacy_xxxhpdi_192x192.png" rel="icon" sizes="192x192"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/img/favicon/android_legacy_xxhpdi_144x144.png" rel="icon" sizes="144x144"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/img/favicon/android_legacy_xhdpi_96x96.png" rel="icon" sizes="96x96"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/img/favicon/android_legacy_hdpi_72X72.png" rel="icon" sizes="72x72"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/img/favicon/iOS_iPhone@3x_180X180.png" rel="apple-touch-icon-precomposed" sizes="180x180"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/img/favicon/iOS_iPadPro@2x_167X167.png" rel="apple-touch-icon-precomposed" sizes="167x167"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/img/favicon/iOS_iPad@2x_152X152.png" rel="apple-touch-icon-precomposed" sizes="152x152"/>
<link href="https://ssl.pstatic.net/static.datalab/202010150900/img/favicon/iOS_iPhone@2x_120X120.png" rel="apple-touch-icon-precomposed" sizes="120x120"/>
<meta content="네이버 데이터랩 : 급상승 검색어" property="og:title">
<meta content="https://datalab.naver.com/realtimeList.naver" property="og:url">
<meta content="https://ssl.pstatic.net/static.datalab/img2019/datalab_og_244x244.jpg" property="og:image">
<meta content="검색 횟수가 급상승한 검색어의 순위를 다양한 옵션을 통해 자세히 제공합니다." property="og:description"/>
<meta content="검색 횟수가 급상승한 검색어의 순위를 다양한 옵션을 통해 자세히 제공합니다." name="description"/>
</meta></meta></meta></head>
<!--[if lte IE 9]>
<body class="pop_opened">
<![endif]-->
<?if !IE?>
<body>
<?endif?>
<div class="wrap" id="wrap">
<div class="header" id="header">
<div class="inner">
<div class="srch_wrap">
<a class="logo_area" href="https://www.naver.com" target="_blank">
<span class="sp_header_logo"><span class="blind">NAVER</span></span>
</a>
<h1 class="sta_area">
<a class="sp_sta" href="/"><span class="blind">DataLab</span></a>
</h1>
</div>
<div class="gnb_wrap">
<div id="gnb"></div>
</div>
</div>
<div class="lnb" id="lnb">
<div class="lnb_innner">
<ul class="lnb_list">
<li class="list _home">
<a class="list_area" href="/">
<span class="sp_lnb_list nav1">
<span class="blind">데이터랩 홈</span>
</span>
</a>
</li>
<li class="list _keyword active">
<a class="list_area" href="/keyword/realtimeList.naver">
<span class="sp_lnb_list nav2">
<span class="blind">급상승검색어</span>
</span>
</a>
</li>
<li class="list _keyword">
<a class="list_area" href="/keyword/trendSearch.naver">
<span class="sp_lnb_list nav3">
<span class="blind">검색어트렌드</span>
</span>
</a>
</li>
<li class="list _shopping">
<a class="list_area" href="/shoppingInsight/sCategory.naver">
<span class="sp_lnb_list nav4">
<span class="blind">쇼핑인사이트</span>
</span>
</a>
</li>
<li class="list _local">
<a class="list_area" href="/local/trend.naver">
<span class="sp_lnb_list nav5">
<span class="blind">지역통계</span>
</span>
</a>
</li>
<li class="list _commentStat">
<a class="list_area" href="/commentStat/news.naver">
<span class="sp_lnb_list nav6">
<span class="blind">뉴스댓글통계</span>
</span>
</a>
</li>
</ul>
</div>
</div>
</div>
<div class="container" id="container" role="main">
<div class="content" id="content">
<div class="section_keyword">
<div class="com_title title_keyword">
<h3 class="title">급상승 검색어</h3>
<em class="desc">검색 횟수가 급상승한 검색어의 순위를 다양한 옵션을 통해 자세히 제공합니다.</em>
<!-- [D] 'btn_guide' 클릭시 class="on" 추가해주세요. -->
<div class="guide_box">
<button class="btn_guide" type="button"><span class="blind">급상승 검색어 옵션 조절 기능 안내</span></button>
<div class="tooltip_guide">
<div class="tooltip_wrap">
<p class="tooltip_txt">검색어 설정 기능은 검색어가 해당 주제에 어느 정도 관련성이 있는 지를 분석, 측정한 후, 사용자가 선택하는 옵션 단계에 따라 가중치를 가감하여 급상승검색어를 재정렬하는 기능입니다.
                    관련도의 분석, 측정은 AI 기반 검색어 추천 로직에 의해 이루어지며 일부 검색어는 실제 주제 연관성과 다르게 측정되고 정렬될 수 있습니다. </p>
<a class="btn_detail" href="https://blog.naver.com/naver_diary/221694954271" target="_blank">자세히 보기</a>
<button class="btn_close" type="button"><span class="blind">안내 문구 닫기</span></button>
</div>
</div>
</div>
</div>
<div class="selection_area">
<div class="selection_header">
<div class="selection_box">
<div class="box_wrap">
<strong class="title_txt">집계 주기</strong>
<div class="section_serch_area">
<div class="select_inbo _picker_component" data-datetime="2020-10-24T14:31:00" data-end="2020-10-24T14:31:00" data-start="2020-01-16T06:00:00">
<div class="date_indo" href="#">
<a class="date_btn _prev_day" href="#"><span class="blind">이전 날짜로 가기</span></a>
<a class="date_btn next _next_day btn_off" href="#"><span class="blind">다음 날짜로 가기</span></a>
<a class="date_box _date_trigger" href="#">
<span class="date_txt _title_ymd">2020.10.24. <em>(토)</em></span>
<span class="date_ico"><span class="blind">달력</span></span>
</a>
<!-- [D] 캘린더 활성화 display:block 추가 부탁 드립니다 -->
<div class="calendar" id="_calendar">
<div class="layer_area _date_wrapper" style="display:none">
<div class="calendar_area _leftMonth">
<div class="calendar_head">
<strong class="calendar-title _title_ym">2017.02.</strong>
<!--[D] 링크 비활성화 a 링크에 class="off" 추가 -->
<a class="calendar-btn-prev-mon _prev" data-date="" href="#"><span class="btn_prev">이전달</span></a>
<a class="calendar-btn-next-mon _next" data-date="" href="#"><span class="btn_next">다음달</span></a>
</div>
<div class="calendar_body _calendar"></div>
</div>
<div class="sub_area">
<a class="btn_today _today" href="#">오늘</a>
<a class="btn_check _confirm" href="#">확인</a>
</div>
<div class="calendar_popup">
<div class="ly_cont">
<p class="cont_txt">2020.01.16. 이전 데이터는<br/>아래의 버튼을 눌러 확인해주세요.</p>
</div>
<div class="ly_btn_bottom">
<a class="btn_data" href="realtimeList.naver?datetime=2020-01-16T05:59:00" target="_blank"><span class="data_txt">2020.01.16. 이전 데이터 조회</span></a>
</div>
<a class="btn_popup_close" href="#none"><span class="blind">팝업닫기</span></a>
</div>
</div>
</div>
</div><!-- [D] 비활성화시 class="time_indo" 에 v2를 입력해주세요 -->
<div class="time_indo" href="#">
<a class="time_btn _prev_tick" href="#"><span class="blind">이전 시간으로 가기</span></a>
<a class="time_btn next _next_tick btn_off" href="#"><span class="blind">다음 시간으로 가기</span></a>
<a class="time_box _time_trigger" href="#">
<span class="time_txt _title_hms">14:31</span>
<span class="time_ico"><span class="blind">시간</span></span>
</a>
<!-- [D] 시간탬 활성화 display:block 추가 부탁 드립니다. -->
<div class="layer_time _time_wrapper time_no_second" id="u.c.layer.time" style="display:none">
<div class="hour">
<!--[AU] data-max 에 입력가능 최대 숫자 설정-->
<!-- [AU] scrollbar-box 클래스 위치 변경 -->
<input class="time_input _tinput _hour" data-max="23" maxlength="2" type="text" value="14"/><span class="blind">시</span>
<div class="time_scroll scrollbar-box _tlist _hour" id="scrollbox" style="overflow: hidden;">
<div class="scrollbar-content" style="top: 0px; height: auto;">
<ul class="lst_time">
<li class="selected _item"><a href="#">00</a></li>
<li class="_item"><a href="#">01</a></li>
<li class="_item"><a href="#">02</a></li>
<li class="_item"><a href="#">03</a></li>
<li class="_item"><a href="#">04</a></li>
<li class="_item"><a href="#">05</a></li>
<li class="_item"><a href="#">06</a></li>
<li class="_item"><a href="#">07</a></li>
<li class="_item"><a href="#">08</a></li>
<li class="_item"><a href="#">09</a></li>
<li class="_item"><a href="#">10</a></li>
<li class="_item"><a href="#">11</a></li>
<li class="_item"><a href="#">12</a></li>
<li class="_item"><a href="#">13</a></li>
<li class="_item"><a href="#">14</a></li>
<li class="_item"><a href="#">15</a></li>
<li class="_item"><a href="#">16</a></li>
<li class="_item"><a href="#">17</a></li>
<li class="_item"><a href="#">18</a></li>
<li class="_item"><a href="#">19</a></li>
<li class="_item"><a href="#">20</a></li>
<li class="_item"><a href="#">21</a></li>
<li class="_item"><a href="#">22</a></li>
<li class="_item"><a href="#">23</a></li>
</ul>
</div>
<div class="iScrollVerticalScrollbar scrollbar-show">
<div class="scrollbar-button-up rollover"></div>
<div class="scrollbar-track rollover" style="height: 202px;">
<div class="iScrollIndicator S21614895 rollover S84675399" style="top: 0px; height: 61px;"></div>
</div>
<div class="scrollbar-button-down rollover"></div>
</div>
</div>
</div>
<div class="hour_minute">:</div>
<div class="minute">
<!--[AU] data-max 에 입력가능 최대 숫자 설정-->
<input class="time_input _tinput _minute" data-max="59" maxlength="2" type="text" value="00"/><span class="blind">분</span>
<div class="time_scroll scrollbar-box _tlist _minute" id="scrollbox2" style="overflow: hidden;">
<div class="scrollbar-content" style="top: 0px; height: auto;">
<ul class="lst_time">
<li class="selected _item"><a href="#">00</a></li>
<li class="_item"><a href="#">01</a></li>
<li class="_item"><a href="#">02</a></li>
<li class="_item"><a href="#">03</a></li>
<li class="_item"><a href="#">04</a></li>
<li class="_item"><a href="#">05</a></li>
<li class="_item"><a href="#">06</a></li>
<li class="_item"><a href="#">07</a></li>
<li class="_item"><a href="#">08</a></li>
<li class="_item"><a href="#">09</a></li>
<li class="_item"><a href="#">10</a></li>
<li class="_item"><a href="#">11</a></li>
<li class="_item"><a href="#">12</a></li>
<li class="_item"><a href="#">13</a></li>
<li class="_item"><a href="#">14</a></li>
<li class="_item"><a href="#">15</a></li>
<li class="_item"><a href="#">16</a></li>
<li class="_item"><a href="#">17</a></li>
<li class="_item"><a href="#">18</a></li>
<li class="_item"><a href="#">19</a></li>
<li class="_item"><a href="#">20</a></li>
<li class="_item"><a href="#">21</a></li>
<li class="_item"><a href="#">22</a></li>
<li class="_item"><a href="#">23</a></li>
<li class="_item"><a href="#">24</a></li>
<li class="_item"><a href="#">25</a></li>
<li class="_item"><a href="#">26</a></li>
<li class="_item"><a href="#">27</a></li>
<li class="_item"><a href="#">28</a></li>
<li class="_item"><a href="#">29</a></li>
<li class="_item"><a href="#">30</a></li>
<li class="_item"><a href="#">31</a></li>
<li class="_item"><a href="#">32</a></li>
<li class="_item"><a href="#">33</a></li>
<li class="_item"><a href="#">34</a></li>
<li class="_item"><a href="#">35</a></li>
<li class="_item"><a href="#">36</a></li>
<li class="_item"><a href="#">37</a></li>
<li class="_item"><a href="#">38</a></li>
<li class="_item"><a href="#">39</a></li>
<li class="_item"><a href="#">40</a></li>
<li class="_item"><a href="#">41</a></li>
<li class="_item"><a href="#">42</a></li>
<li class="_item"><a href="#">43</a></li>
<li class="_item"><a href="#">44</a></li>
<li class="_item"><a href="#">45</a></li>
<li class="_item"><a href="#">46</a></li>
<li class="_item"><a href="#">47</a></li>
<li class="_item"><a href="#">48</a></li>
<li class="_item"><a href="#">49</a></li>
<li class="_item"><a href="#">50</a></li>
<li class="_item"><a href="#">51</a></li>
<li class="_item"><a href="#">52</a></li>
<li class="_item"><a href="#">53</a></li>
<li class="_item"><a href="#">54</a></li>
<li class="_item"><a href="#">55</a></li>
<li class="_item"><a href="#">56</a></li>
<li class="_item"><a href="#">57</a></li>
<li class="_item"><a href="#">58</a></li>
<li class="_item"><a href="#">59</a></li>
</ul>
</div>
<div class="iScrollVerticalScrollbar scrollbar-show">
<div class="scrollbar-button-up rollover"></div>
<div class="scrollbar-track rollover" style="height: 202px;">
<div class="iScrollIndicator S6808276 rollover S39500592" style="top: 0px; height: 25px;"></div>
</div>
<div class="scrollbar-button-down rollover"></div>
</div>
</div>
</div>
<div class="hour_minute v2">:</div>
<div class="sec">
<!--[AU] data-max 에 입력가능 최대 숫자 설정-->
<input class="time_input _tinput _second" data-max="30" maxlength="2" type="text" value="00"/><span class="blind">초</span>
<div class="time_scroll v2 scrollbar-box _tlist _second" id="scrollbox3" style="overflow: hidden;">
<div class="scrollbar-box" style="width: 62px; height: 202px;">
<ul class="lst_time">
<li class="selected _item" data-value="00"><a href="#">00</a></li>
<li class="_item" data-value="30"><a href="#">30</a></li>
</ul>
</div>
</div>
</div>
<div class="sub_area v2">
<a class="btn_today _now" href="#">지금</a>
<a class="btn_check _confirm" href="#">확인</a>
</div>
</div>
</div> </div>
</div>
</div>
</div>
<div class="selection_box _age_filter" data-age="20s">
<div class="box_wrap">
<strong class="title_txt">연령</strong>
<div class="section_keyword_detail">
<div class="graph_tab_box">
<ul class="graph_tab_list">
<!-- [D] 선택된 li에 class="on" 추가 -->
<li class="">
<a href="#none">10대</a>
</li>
<li class="on">
<a href="#none">20대</a>
</li>
<li class="">
<a href="#none">30대</a>
</li>
<li class="">
<a href="#none">40대</a>
</li>
<li class="">
<a href="#none">50대 -</a>
</li>
<li class="">
<a href="#none">전체</a>
</li>
</ul>
</div>
</div>
</div>
</div>
</div>
<!-- 상세옵션 및 대표검색어,유사검색어 ranking -->
<div class="selection_content">
<div class="field_option">
<div class="option_box">
<strong class="box_title">상세 옵션</strong>
<div class="box_content">
<!-- filter -->
<div class="various_filter_area">
<ul class="filter_list">
<!--[D] 필터 주제에 따라 클래스 추가해주세요.
                                이슈별 묶어보기: default
                                이벤트・할인: item_event
                                시사: item_actualities
                                엔터: item_enter
                                스포츠: item_sports
                                -->
<li class="filter_item _opt_filter">
<strong class="item_title">이슈별 묶어보기</strong>
<div class="item_graph_wrap">
<div class="item_graph" role="radiogroup">
<!--[D] 활성화/비활성화시 aria-checked=true/false로 변경해주세요.-->
<a aria-checked="true" aria-label="0%" class="level" href="#" role="radio">
<span class="blind">1단계</span>
</a>
<a aria-checked="false" aria-label="25%" class="level" href="#" role="radio">
<span class="blind">2단계</span>
</a>
<a aria-checked="false" aria-label="50%" class="level" href="#" role="radio">
<span class="blind">3단계</span>
</a>
<a aria-checked="false" aria-label="75%" class="level" href="#" role="radio">
<span class="blind">4단계</span>
</a>
<a aria-checked="false" aria-label="100%" class="level" href="#" role="radio">
<span class="blind">5단계</span>
</a>
<div class="item_fill" style="width: 0%;"></div>
</div>
</div>
</li>
<li class="filter_item _opt_filter item_event">
<strong class="item_title">이벤트・할인</strong>
<div class="item_graph_wrap">
<div class="item_graph" role="radiogroup">
<!--[D] 활성화/비활성화시 aria-checked=true/false로 변경해주세요.-->
<a aria-checked="true" aria-label="0%" class="level" href="#" role="radio">
<span class="blind">1단계</span>
</a>
<a aria-checked="false" aria-label="25%" class="level" href="#" role="radio">
<span class="blind">2단계</span>
</a>
<a aria-checked="false" aria-label="50%" class="level" href="#" role="radio">
<span class="blind">3단계</span>
</a>
<a aria-checked="false" aria-label="75%" class="level" href="#" role="radio">
<span class="blind">4단계</span>
</a>
<a aria-checked="false" aria-label="100%" class="level" href="#" role="radio">
<span class="blind">5단계</span>
</a>
<div class="item_fill" style="width: 0%;"></div>
</div>
</div>
</li>
<li class="filter_item _opt_filter item_actualities">
<strong class="item_title">시사</strong>
<div class="item_graph_wrap">
<div class="item_graph" role="radiogroup">
<!--[D] 활성화/비활성화시 aria-checked=true/false로 변경해주세요.-->
<a aria-checked="true" aria-label="0%" class="level" href="#" role="radio">
<span class="blind">1단계</span>
</a>
<a aria-checked="false" aria-label="25%" class="level" href="#" role="radio">
<span class="blind">2단계</span>
</a>
<a aria-checked="false" aria-label="50%" class="level" href="#" role="radio">
<span class="blind">3단계</span>
</a>
<a aria-checked="false" aria-label="75%" class="level" href="#" role="radio">
<span class="blind">4단계</span>
</a>
<a aria-checked="false" aria-label="100%" class="level" href="#" role="radio">
<span class="blind">5단계</span>
</a>
<div class="item_fill" style="width: 0%;"></div>
</div>
</div>
</li>
<li class="filter_item _opt_filter item_enter">
<strong class="item_title">엔터</strong>
<div class="item_graph_wrap">
<div class="item_graph" role="radiogroup">
<!--[D] 활성화/비활성화시 aria-checked=true/false로 변경해주세요.-->
<a aria-checked="true" aria-label="0%" class="level" href="#" role="radio">
<span class="blind">1단계</span>
</a>
<a aria-checked="false" aria-label="25%" class="level" href="#" role="radio">
<span class="blind">2단계</span>
</a>
<a aria-checked="false" aria-label="50%" class="level" href="#" role="radio">
<span class="blind">3단계</span>
</a>
<a aria-checked="false" aria-label="75%" class="level" href="#" role="radio">
<span class="blind">4단계</span>
</a>
<a aria-checked="false" aria-label="100%" class="level" href="#" role="radio">
<span class="blind">5단계</span>
</a>
<div class="item_fill" style="width: 0%;"></div>
</div>
</div>
</li>
<li class="filter_item _opt_filter item_sports">
<strong class="item_title">스포츠</strong>
<div class="item_graph_wrap">
<div class="item_graph" role="radiogroup">
<!--[D] 활성화/비활성화시 aria-checked=true/false로 변경해주세요.-->
<a aria-checked="true" aria-label="0%" class="level" href="#" role="radio">
<span class="blind">1단계</span>
</a>
<a aria-checked="false" aria-label="25%" class="level" href="#" role="radio">
<span class="blind">2단계</span>
</a>
<a aria-checked="false" aria-label="50%" class="level" href="#" role="radio">
<span class="blind">3단계</span>
</a>
<a aria-checked="false" aria-label="75%" class="level" href="#" role="radio">
<span class="blind">4단계</span>
</a>
<a aria-checked="false" aria-label="100%" class="level" href="#" role="radio">
<span class="blind">5단계</span>
</a>
<div class="item_fill" style="width: 0%;"></div>
</div>
</div>
</li>
</ul>
</div>
</div>
</div>
<div class="option_box box_bottom">
<strong class="box_title">과거 데이터 조회하기</strong>
<div class="box_content">
<ul class="previous_data">
<li class="data_item"><a href="realtimeList.naver?datetime=2018-10-10T05:59:30" target="_blank">2017.03.29. ~ 2018.10.10.</a></li>
<li class="data_item"><a href="realtimeList.naver?datetime=2019-11-28T05:59:00" target="_blank">2018.10.10. ~ 2019.11.28.</a></li>
<li class="data_item"><a href="realtimeList.naver?datetime=2020-01-16T05:59:00" target="_blank">2019.11.28. ~ 2020.01.16.</a></li>
</ul>
</div>
</div>
</div>
<div class="field_list">
<div class="ranking_box">
<div class="list_group">
<ul class="ranking_list">
<li class="ranking_item">
<div class="item_box">
<span class="item_num">1</span>
<span class="item_title_wrap">
<span class="item_title">한능검</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">2</span>
<span class="item_title_wrap">
<span class="item_title">핵심능력 테스트</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">3</span>
<span class="item_title_wrap">
<span class="item_title">한국사능력검정시험</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">4</span>
<span class="item_title_wrap">
<span class="item_title">삼성채용</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">5</span>
<span class="item_title_wrap">
<span class="item_title">엘클라시코</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">6</span>
<span class="item_title_wrap">
<span class="item_title">장범준</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">7</span>
<span class="item_title_wrap">
<span class="item_title">서지혜 맥주</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">8</span>
<span class="item_title_wrap">
<span class="item_title">히든싱어 장범준</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">9</span>
<span class="item_title_wrap">
<span class="item_title">곰표 맥주</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">10</span>
<span class="item_title_wrap">
<span class="item_title">카이로스</span>
</span>
</div>
</li>
</ul>
<ul class="ranking_list">
<li class="ranking_item">
<div class="item_box">
<span class="item_num">11</span>
<span class="item_title_wrap">
<span class="item_title">슭곰발</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">12</span>
<span class="item_title_wrap">
<span class="item_title">서지혜</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">13</span>
<span class="item_title_wrap">
<span class="item_title">후쿠하라 모네</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">14</span>
<span class="item_title_wrap">
<span class="item_title">심지유</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">15</span>
<span class="item_title_wrap">
<span class="item_title">기믹</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">16</span>
<span class="item_title_wrap">
<span class="item_title">이강인</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">17</span>
<span class="item_title_wrap">
<span class="item_title">아쿠아맨</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">18</span>
<span class="item_title_wrap">
<span class="item_title">하빕 게이치</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">19</span>
<span class="item_title_wrap">
<span class="item_title">화사 별</span>
</span>
</div>
</li>
<li class="ranking_item">
<div class="item_box">
<span class="item_num">20</span>
<span class="item_title_wrap">
<span class="item_title">비비</span>
</span>
</div>
</li>
</ul>
</div>
</div>
</div>
</div>
<!-- //상세옵션 및 대표검색어,유사검색어 ranking -->
</div>
</div>
</div>
<div class="_data" style="display:none">
<span class="_subject_server"></span>
<span class="_keyword_server"></span>
</div>
</div>
<div class="footer" id="footer">
<div class="footer_inner">
<div class="footer_info">
<a href="https://policy.naver.com/rules/service.html" target="_blank">이용약관</a>
<a href="https://policy.naver.com/rules/privacy.html" target="_blank"><strong>개인정보처리방침</strong></a>
<a href="https://policy.naver.com/rules/disclaimer.html" target="_blank">책임의 한계와 법적고지</a>
<a href="https://help.naver.com/support/service/main.nhn?serviceNo=14493" target="_blank">고객센터</a>
</div>
<div class="footer_copyright">
<a class="sp_footer_logo" href="https://www.naver.com/" target="_blank"><span class="blind">NAVER</span></a>
<em class="copyright">Copyright ©</em>
<strong><a href="https://www.navercorp.com/ko/index.nhn" target="_blank">NAVER Corp.</a></strong> All Rights Reserved.
            </div>
</div>
</div>
</div>
<!--[if lte IE 9]>
<div class="com_browser_pop" style="display:none">
    <div class="pop_dimmed"></div>
    <div class="pop_inner type_browser">
        <strong class="browser_title">권장 브라우저 안내</strong>
        <p class="browser_desc">
            해당 사이트에 최적화된 브라우저는 Internet Explorer10 이상입니다<br>
            편리한 사이트 이용을 위하여 최신 브라우저로 업그레이드를 권장합니다
        </p>
        <div class="browser_list">
			<span class="list ie">
				<span class="title">Internet Explore</span>
			</span>
            <span class="list firefox">
				<span class="title">Firefox</span>
			</span>
            <span class="list chrome">
				<span class="title">Google Chrome</span>
			</span>
            <span class="list safari">
				<span class="title">Safari</span>
			</span>
        </div>
        <div class="browser_close_chk">
			<span class="select_chk _old_browser_cookie">
				<input type="checkbox" id="item_browser" class="chk" checked="">
				<label class="lbl" for="item_browser">이 메세지를 다시 표시하지 않습니다</label>
			</span>
        </div>
        <a href="#" class="sp_btn_pop_close">
            <span class="blind">권장 브라우저 안내 팝업 닫기</span>
        </a>
    </div>
</div>
<![endif]-->
<script type="text/javascript">
(function() {
    window.$DATALAB = (typeof $DATALAB != "undefined") ? $DATALAB : {};
    window.$DATALAB.CONFIG = (typeof $DATALAB.CONFIG != "undefined") ? $DATALAB.CONFIG : {};

    $DATALAB.CONFIG.request_url = "'http://datalab.naver.com/keyword/realtimeList.naver";
    $DATALAB.CONFIG.realTimeOpenDate = "2020-01-16T06:00";

    if(location.href.indexOf("dev") > 0) {
        window.lcs_SerName = "alpha-lcs.naver.com";
    }
})();
</script>
<script src="https://ssl.pstatic.net/static.datalab/202010150900/js/vendor.202010150900.js" type="text/javascript"></script>
<script src="https://ssl.pstatic.net/static.datalab/202010150900/js/realtime_v3.202010150900.js" type="text/javascript"></script>
</body>
</html>
In [4]:
rank_list = soup.select(".ranking_list .item_title") # ranking_list 하위에 있는 item_title을 가져온다
rank_list
Out[4]:
[<span class="item_title">한능검</span>,
 <span class="item_title">핵심능력 테스트</span>,
 <span class="item_title">한국사능력검정시험</span>,
 <span class="item_title">삼성채용</span>,
 <span class="item_title">엘클라시코</span>,
 <span class="item_title">장범준</span>,
 <span class="item_title">서지혜 맥주</span>,
 <span class="item_title">히든싱어 장범준</span>,
 <span class="item_title">곰표 맥주</span>,
 <span class="item_title">카이로스</span>,
 <span class="item_title">슭곰발</span>,
 <span class="item_title">서지혜</span>,
 <span class="item_title">후쿠하라 모네</span>,
 <span class="item_title">심지유</span>,
 <span class="item_title">기믹</span>,
 <span class="item_title">이강인</span>,
 <span class="item_title">아쿠아맨</span>,
 <span class="item_title">하빕 게이치</span>,
 <span class="item_title">화사 별</span>,
 <span class="item_title">비비</span>]
In [5]:
for rank in rank_list:
    print(rank.text)
한능검
핵심능력 테스트
한국사능력검정시험
삼성채용
엘클라시코
장범준
서지혜 맥주
히든싱어 장범준
곰표 맥주
카이로스
슭곰발
서지혜
후쿠하라 모네
심지유
기믹
이강인
아쿠아맨
하빕 게이치
화사 별
비비
In [6]:
for rank in rank_list:
    print(rank.text.strip()) # strip - 앞뒤 공백만 제거하기 때문에 모든 공백이 지워지지 않는다!
한능검
핵심능력 테스트
한국사능력검정시험
삼성채용
엘클라시코
장범준
서지혜 맥주
히든싱어 장범준
곰표 맥주
카이로스
슭곰발
서지혜
후쿠하라 모네
심지유
기믹
이강인
아쿠아맨
하빕 게이치
화사 별
비비
In [7]:
for rank in rank_list:
    print(rank.text.split()) # split - 공백제거 & 문자열 나누기 -> but split의 결과값은 리스트!
['한능검']
['핵심능력', '테스트']
['한국사능력검정시험']
['삼성채용']
['엘클라시코']
['장범준']
['서지혜', '맥주']
['히든싱어', '장범준']
['곰표', '맥주']
['카이로스']
['슭곰발']
['서지혜']
['후쿠하라', '모네']
['심지유']
['기믹']
['이강인']
['아쿠아맨']
['하빕', '게이치']
['화사', '별']
['비비']
In [8]:
for (index, rank) in enumerate(rank_list): 
    print(index+1, rank.text)
# index(내가 맘대로 지정)와 rank를 함께 출력한다, enumerate를 쓰면 앞단에 있는 것이 index로 자동 인식된다
1 한능검
2 핵심능력 테스트
3 한국사능력검정시험
4 삼성채용
5 엘클라시코
6 장범준
7 서지혜 맥주
8 히든싱어 장범준
9 곰표 맥주
10 카이로스
11 슭곰발
12 서지혜
13 후쿠하라 모네
14 심지유
15 기믹
16 이강인
17 아쿠아맨
18 하빕 게이치
19 화사 별
20 비비
In [13]:
rank_list = soup.select(".ranking_list .item_title")
rank_result_list = [] # 결과값을 담을 빈 리스트 생성

for (index, rank) in enumerate(rank_list):
    # 우리가 이용하기 쉬운 데이터 형태로 만들어 주기 위해 Dictionary 형태로 구성
    rank_result_list += [{"rank":index+1,"value":rank.text}]
    
# 결과값이 의도한대로 잘 들어갔는지 출력하여 확인
print(rank_result_list)
print("-"*100)
# 그리고 마음껏 사용!
print(f'실시간 급상승 검색어 {rank_result_list[0]["rank"]}위는 "{rank_result_list[0]["value"]}"입니다.')
[{'rank': 1, 'value': '한능검'}, {'rank': 2, 'value': '핵심능력 테스트'}, {'rank': 3, 'value': '한국사능력검정시험'}, {'rank': 4, 'value': '삼성채용'}, {'rank': 5, 'value': '엘클라시코'}, {'rank': 6, 'value': '장범준'}, {'rank': 7, 'value': '서지혜 맥주'}, {'rank': 8, 'value': '히든싱어 장범준'}, {'rank': 9, 'value': '곰표 맥주'}, {'rank': 10, 'value': '카이로스'}, {'rank': 11, 'value': '슭곰발'}, {'rank': 12, 'value': '서지혜'}, {'rank': 13, 'value': '후쿠하라 모네'}, {'rank': 14, 'value': '심지유'}, {'rank': 15, 'value': '기믹'}, {'rank': 16, 'value': '이강인'}, {'rank': 17, 'value': '아쿠아맨'}, {'rank': 18, 'value': '하빕 게이치'}, {'rank': 19, 'value': '화사 별'}, {'rank': 20, 'value': '비비'}]
----------------------------------------------------------------------------------------------------
실시간 급상승 검색어 1위는 "한능검"입니다.

2. Selenium 설치 & Kernal restart

In [1]:
pip install selenium
Requirement already satisfied: selenium in /Users/daseul/opt/anaconda3/lib/python3.8/site-packages (3.141.0)
Requirement already satisfied: urllib3 in /Users/daseul/opt/anaconda3/lib/python3.8/site-packages (from selenium) (1.25.9)
Note: you may need to restart the kernel to use updated packages.
In [3]:
from selenium import webdriver
from bs4 import BeautifulSoup
driver = webdriver.Chrome('./chromedriver')
In [6]:
from selenium import webdriver
from bs4 import BeautifulSoup
driver = webdriver.Chrome('./chromedriver')
# requests를 안 쓰고 셀레니움의 크롬 웹드라이버로 읽어온다

driver.implicitly_wait(3)
# 페이지가 모두 로드될 때까지 3초 기다려주는 코드 (사이트 로딩이 느릴 수도 있으니까)

driver.get('https://datalab.naver.com/keyword/realtimeList.naver')
# 셀레니움울 통해 실제 크롬창에서 접속한 것과 동일하게 해당 URL 내용을 가져옴

html = driver.page_source # 창을 띄우니까 그 중에서 뭘 따올지 html에 지정해야 함
soup = BeautifulSoup(html, 'html.parser')
# 소스를 읽어 BeautifulSoup를 이용해 파싱

driver.close() # 할일 다 한 크롬창 닫기

# -----------------------
# 그 다음은 이전에 requests와 BeautifulSoup를 이용했던 코드와 동일하게 처리
rank_list = soup.select(".ranking_list .item_title")
rank_result_list = [] # 결과값을 담을 빈 리스트 생성

for (index, rank) in enumerate(rank_list):
    # 우리가 이용하기 쉬운 데이터 형태로 만들어 주기 위해 Dictionary 형태로 구성
    rank_result_list += [{"rank":index+1,"value":rank.text}]
    
# 결과값이 의도한대로 잘 들어갔는지 출력하여 확인
print(rank_result_list)
print("-"*100)
# 그리고 마음껏 사용!
print(f'실시간 급상승 검색어 {rank_result_list[0]["rank"]}위는 "{rank_result_list[0]["value"]}"입니다.')
[{'rank': 1, 'value': '히든싱어 장범준'}, {'rank': 2, 'value': '장범준'}, {'rank': 3, 'value': '배정남'}, {'rank': 4, 'value': '서지혜 맥주'}, {'rank': 5, 'value': '아쿠아맨'}, {'rank': 6, 'value': '서지혜'}, {'rank': 7, 'value': '기믹'}, {'rank': 8, 'value': '양준혁 박현선'}, {'rank': 9, 'value': '김연경'}, {'rank': 10, 'value': '심지유'}, {'rank': 11, 'value': '비비'}, {'rank': 12, 'value': '라리가'}, {'rank': 13, 'value': '양준혁'}, {'rank': 14, 'value': '이재영'}, {'rank': 15, 'value': '이다영'}, {'rank': 16, 'value': '배정남 나이'}, {'rank': 17, 'value': '인현왕후의 남자'}, {'rank': 18, 'value': '화사 별'}, {'rank': 19, 'value': '나를 사랑한 스파이'}, {'rank': 20, 'value': '스파이'}]
----------------------------------------------------------------------------------------------------
실시간 급상승 검색어 1위는 "히든싱어 장범준"입니다.