Source code for advertools.serp

"""
.. _serp:

Import Search Engine Results Pages (SERPs) for Google and YouTube
=================================================================
Analyzing a single SERP is like getting one person to fill out a questionnaire
and calling it a survey.

Just like surveys, SERPs need to be collected in large-enough numbers that are
representative of the industry/market you want to understand. This is the main
feature of the ``serp_`` functions. They allow you to get the SERPs for a list
of queries, across several dimensions (like country, search type, start
position, and so on).

There are many parameters that can be used, and you can supply a list for each.
The function will get the SERPs for the *product* of all those lists. For
example, let's say you you provide the following arguments to the
:func:`serp_goog` function:

* `q`: ['serp tools', 'best serp tools', 'serp tool reviews']
* `gl`: ['us', 'ca', 'uk', 'au', 'nz']
* `start`: [1, 11, 21]

The function will produce:
3 (queries) x 5 (countries) x 3 (start positions) = 45 requests

You typically get ten results each, so in this case you would get 450 rows of
data.

All this is done in with one line of code. The result is a single DataFrame
with a row for each result, and columns for each attribute (title, snippet,
etc.), as well as meta data columns, like `queryTime` and the parameters you
selected (`q`, `gl`, and `start` in this case).


Before being able to run queries using :func:`serp_goog`, you will need to set
up some credentials as follows (you don't need a custom search engine for
:func:`serp_youtube`):

* `Create a custom search engine <https://cse.google.com/>`_: At first, you might be
  asked to enter a site to search. Enter any domain, then go to the control panel and
  remove it. Make sure you enable "Search the entire web" and image search. You will
  also need to get your search engine ID, which you can find on the control panel page.

* `Enable the custom search API <https://console.cloud.google.com/apis/library/customsearch.googleapis.com?pli=1>`_:
  The service will allow you to retrieve and display search results from your custom
  search engine programmatically. You will need to create a project for this first.

* `Create credentials for this project <https://console.developers.google.com/apis/api/customsearch.googleapis.com/credentials>`_:
  so you can get your key.

* `Enable billing for your project <https://console.cloud.google.com/billing/projects>`_
  if you want to run more than 100 queries per day. The first 100 queries are free; then
  for each additional 1,000 queries, you pay $5.


"""

__all__ = [
    "SERP_GOOG_VALID_VALS",
    "YOUTUBE_TOPIC_IDS",
    "YOUTUBE_VID_CATEGORY_IDS",
    "serp_goog",
    "serp_youtube",
    "set_logging_level",
    "youtube_channel_details",
    "youtube_video_details",
]

import datetime
import logging
from itertools import product

import pandas as pd

if int(pd.__version__[0]) >= 1:
    from pandas import json_normalize
else:
    from pandas.io.json import json_normalize

import requests

SERP_GOOG_LOG_FMT = (
    "%(asctime)s | %(levelname)s | %(filename)s:%(lineno)d "
    "| %(funcName)s | %(message)s"
)
logging.basicConfig(format=SERP_GOOG_LOG_FMT)


##############################################################################
# Google variables
##############################################################################


SERP_GOOG_VALID_VALS = dict(
    fileType={
        "bas",
        "c",
        "cc",
        "cpp",
        "cs",
        "cxx",
        "doc",
        "docx",
        "dwf",
        "gpx",
        "h",
        "hpp",
        "htm",
        "html",
        "hwp",
        "java",
        "kml",
        "kmz",
        "odp",
        "ods",
        "odt",
        "pdf",
        "pl",
        "ppt",
        "pptx",
        "ps",
        "py",
        "rtf",
        "svg",
        "swf",
        "tex",
        "text",
        "txt",
        "wap",
        "wml",
        "xls",
        "xlsx",
        "xml",
    },
    c2coff={0, 1},
    cr={
        "countryAF",
        "countryAL",
        "countryDZ",
        "countryAS",
        "countryAD",
        "countryAO",
        "countryAI",
        "countryAQ",
        "countryAG",
        "countryAR",
        "countryAM",
        "countryAW",
        "countryAU",
        "countryAT",
        "countryAZ",
        "countryBS",
        "countryBH",
        "countryBD",
        "countryBB",
        "countryBY",
        "countryBE",
        "countryBZ",
        "countryBJ",
        "countryBM",
        "countryBT",
        "countryBO",
        "countryBA",
        "countryBW",
        "countryBV",
        "countryBR",
        "countryIO",
        "countryBN",
        "countryBG",
        "countryBF",
        "countryBI",
        "countryKH",
        "countryCM",
        "countryCA",
        "countryCV",
        "countryKY",
        "countryCF",
        "countryTD",
        "countryCL",
        "countryCN",
        "countryCX",
        "countryCC",
        "countryCO",
        "countryKM",
        "countryCG",
        "countryCD",
        "countryCK",
        "countryCR",
        "countryCI",
        "countryHR",
        "countryCU",
        "countryCY",
        "countryCZ",
        "countryDK",
        "countryDJ",
        "countryDM",
        "countryDO",
        "countryTP",
        "countryEC",
        "countryEG",
        "countrySV",
        "countryGQ",
        "countryER",
        "countryEE",
        "countryET",
        "countryEU",
        "countryFK",
        "countryFO",
        "countryFJ",
        "countryFI",
        "countryFR",
        "countryFX",
        "countryGF",
        "countryPF",
        "countryTF",
        "countryGA",
        "countryGM",
        "countryGE",
        "countryDE",
        "countryGH",
        "countryGI",
        "countryGR",
        "countryGL",
        "countryGD",
        "countryGP",
        "countryGU",
        "countryGT",
        "countryGN",
        "countryGW",
        "countryGY",
        "countryHT",
        "countryHM",
        "countryVA",
        "countryHN",
        "countryHK",
        "countryHU",
        "countryIS",
        "countryIN",
        "countryID",
        "countryIR",
        "countryIQ",
        "countryIE",
        "countryIL",
        "countryIT",
        "countryJM",
        "countryJP",
        "countryJO",
        "countryKZ",
        "countryKE",
        "countryKI",
        "countryKP",
        "countryKR",
        "countryKW",
        "countryKG",
        "countryLA",
        "countryLV",
        "countryLB",
        "countryLS",
        "countryLR",
        "countryLY",
        "countryLI",
        "countryLT",
        "countryLU",
        "countryMO",
        "countryMK",
        "countryMG",
        "countryMW",
        "countryMY",
        "countryMV",
        "countryML",
        "countryMT",
        "countryMH",
        "countryMQ",
        "countryMR",
        "countryMU",
        "countryYT",
        "countryMX",
        "countryFM",
        "countryMD",
        "countryMC",
        "countryMN",
        "countryMS",
        "countryMA",
        "countryMZ",
        "countryMM",
        "countryNA",
        "countryNR",
        "countryNP",
        "countryNL",
        "countryAN",
        "countryNC",
        "countryNZ",
        "countryNI",
        "countryNE",
        "countryNG",
        "countryNU",
        "countryNF",
        "countryMP",
        "countryNO",
        "countryOM",
        "countryPK",
        "countryPW",
        "countryPS",
        "countryPA",
        "countryPG",
        "countryPY",
        "countryPE",
        "countryPH",
        "countryPN",
        "countryPL",
        "countryPT",
        "countryPR",
        "countryQA",
        "countryRE",
        "countryRO",
        "countryRU",
        "countryRW",
        "countrySH",
        "countryKN",
        "countryLC",
        "countryPM",
        "countryVC",
        "countryWS",
        "countrySM",
        "countryST",
        "countrySA",
        "countrySN",
        "countryCS",
        "countrySC",
        "countrySL",
        "countrySG",
        "countrySK",
        "countrySI",
        "countrySB",
        "countrySO",
        "countryZA",
        "countryGS",
        "countryES",
        "countryLK",
        "countrySD",
        "countrySR",
        "countrySJ",
        "countrySZ",
        "countrySE",
        "countryCH",
        "countrySY",
        "countryTW",
        "countryTJ",
        "countryTZ",
        "countryTH",
        "countryTG",
        "countryTK",
        "countryTO",
        "countryTT",
        "countryTN",
        "countryTR",
        "countryTM",
        "countryTC",
        "countryTV",
        "countryUG",
        "countryUA",
        "countryAE",
        "countryUK",
        "countryUS",
        "countryUM",
        "countryUY",
        "countryUZ",
        "countryVU",
        "countryVE",
        "countryVN",
        "countryVG",
        "countryVI",
        "countryWF",
        "countryEH",
        "countryYE",
        "countryYU",
        "countryZM",
        "countryZW",
    },
    gl={
        "ad",
        "ae",
        "af",
        "ag",
        "ai",
        "al",
        "am",
        "an",
        "ao",
        "aq",
        "ar",
        "as",
        "at",
        "au",
        "aw",
        "az",
        "ba",
        "bb",
        "bd",
        "be",
        "bf",
        "bg",
        "bh",
        "bi",
        "bj",
        "bm",
        "bn",
        "bo",
        "br",
        "bs",
        "bt",
        "bv",
        "bw",
        "by",
        "bz",
        "ca",
        "cc",
        "cd",
        "cf",
        "cg",
        "ch",
        "ci",
        "ck",
        "cl",
        "cm",
        "cn",
        "co",
        "cr",
        "cs",
        "cu",
        "cv",
        "cx",
        "cy",
        "cz",
        "de",
        "dj",
        "dk",
        "dm",
        "do",
        "dz",
        "ec",
        "ee",
        "eg",
        "eh",
        "er",
        "es",
        "et",
        "fi",
        "fj",
        "fk",
        "fm",
        "fo",
        "fr",
        "ga",
        "gd",
        "ge",
        "gf",
        "gh",
        "gi",
        "gl",
        "gm",
        "gn",
        "gp",
        "gq",
        "gr",
        "gs",
        "gt",
        "gu",
        "gw",
        "gy",
        "hk",
        "hm",
        "hn",
        "hr",
        "ht",
        "hu",
        "id",
        "ie",
        "il",
        "in",
        "io",
        "iq",
        "ir",
        "is",
        "it",
        "jm",
        "jo",
        "jp",
        "ke",
        "kg",
        "kh",
        "ki",
        "km",
        "kn",
        "kp",
        "kr",
        "kw",
        "ky",
        "kz",
        "la",
        "lb",
        "lc",
        "li",
        "lk",
        "lr",
        "ls",
        "lt",
        "lu",
        "lv",
        "ly",
        "ma",
        "mc",
        "md",
        "mg",
        "mh",
        "mk",
        "ml",
        "mm",
        "mn",
        "mo",
        "mp",
        "mq",
        "mr",
        "ms",
        "mt",
        "mu",
        "mv",
        "mw",
        "mx",
        "my",
        "mz",
        "na",
        "nc",
        "ne",
        "nf",
        "ng",
        "ni",
        "nl",
        "no",
        "np",
        "nr",
        "nu",
        "nz",
        "om",
        "pa",
        "pe",
        "pf",
        "pg",
        "ph",
        "pk",
        "pl",
        "pm",
        "pn",
        "pr",
        "ps",
        "pt",
        "pw",
        "py",
        "qa",
        "re",
        "ro",
        "ru",
        "rw",
        "sa",
        "sb",
        "sc",
        "sd",
        "se",
        "sg",
        "sh",
        "si",
        "sj",
        "sk",
        "sl",
        "sm",
        "sn",
        "so",
        "sr",
        "st",
        "sv",
        "sy",
        "sz",
        "tc",
        "td",
        "tf",
        "tg",
        "th",
        "tj",
        "tk",
        "tl",
        "tm",
        "tn",
        "to",
        "tr",
        "tt",
        "tv",
        "tw",
        "tz",
        "ua",
        "ug",
        "uk",
        "um",
        "us",
        "uy",
        "uz",
        "va",
        "vc",
        "ve",
        "vg",
        "vi",
        "vn",
        "vu",
        "wf",
        "ws",
        "ye",
        "yt",
        "za",
        "zm",
        "zw",
    },
    filter={0, 1},
    hl={
        "af",
        "sq",
        "sm",
        "ar",
        "az",
        "eu",
        "be",
        "bn",
        "bh",
        "bs",
        "bg",
        "ca",
        "zh-CN",
        "zh-TW",
        "hr",
        "cs",
        "da",
        "nl",
        "en",
        "eo",
        "et",
        "fo",
        "fi",
        "fr",
        "fy",
        "gl",
        "ka",
        "de",
        "el",
        "gu",
        "iw",
        "hi",
        "hu",
        "is",
        "id",
        "ia",
        "ga",
        "it",
        "ja",
        "jw",
        "kn",
        "ko",
        "la",
        "lv",
        "lt",
        "mk",
        "ms",
        "ml",
        "mt",
        "mr",
        "ne",
        "no",
        "nn",
        "oc",
        "fa",
        "pl",
        "pt-BR",
        "pt-PT",
        "pa",
        "ro",
        "ru",
        "gd",
        "sr",
        "si",
        "sk",
        "sl",
        "es",
        "su",
        "sw",
        "sv",
        "tl",
        "ta",
        "te",
        "th",
        "ti",
        "tr",
        "uk",
        "ur",
        "uz",
        "vi",
        "cy",
        "xh",
        "zu",
    },
    imgColorType={"color", "gray", "mono", "trans"},
    imgDominantColor={
        "black",
        "blue",
        "brown",
        "gray",
        "green",
        "orange",
        "pink",
        "purple",
        "red",
        "teal",
        "white",
        "yellow",
    },
    imgSize={
        "huge",
        "icon",
        "large",
        "medium",
        "small",
        "xlarge",
        "xxlarge",
    },
    imgType={"clipart", "face", "lineart", "stock", "photo", "animated"},
    lr={
        "lang_ar",
        "lang_bg",
        "lang_ca",
        "lang_zh-CN",
        "lang_zh-TW",
        "lang_hr",
        "lang_cs",
        "lang_da",
        "lang_nl",
        "lang_en",
        "lang_et",
        "lang_fi",
        "lang_fr",
        "lang_de",
        "lang_el",
        "lang_iw",
        "lang_hu",
        "lang_is",
        "lang_id",
        "lang_it",
        "lang_ja",
        "lang_ko",
        "lang_lv",
        "lang_lt",
        "lang_no",
        "lang_pl",
        "lang_pt",
        "lang_ro",
        "lang_ru",
        "lang_sr",
        "lang_sk",
        "lang_sl",
        "lang_es",
        "lang_sv",
        "lang_tr",
    },
    num={1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
    rights={
        "cc_publicdomain",
        "cc_attribute",
        "cc_sharealike",
        "cc_noncommercial",
        "cc_nonderived",
    },
    safe={"active", "off"},
    searchType={None, "image"},
    siteSearchFilter={"e", "i"},
    start=range(1, 92),
)


##############################################################################
# YouTube variables
##############################################################################


YOUTUBE_TOPIC_IDS = {
    "Entertainment topics": {
        "Entertainment (parent topic)": "/m/02jjt",
        "Humor": "/m/09kqc",
        "Movies": "/m/02vxn",
        "Performing arts": "/m/05qjc",
        "Professional wrestling": "/m/066wd",
        "TV shows": "/m/0f2f9",
    },
    "Gaming topics": {
        "Action game": "/m/025zzc",
        "Action-adventure game": "/m/02ntfj",
        "Casual game": "/m/0b1vjn",
        "Gaming (parent topic)": "/m/0bzvm2",
        "Music video game": "/m/02hygl",
        "Puzzle video game": "/m/04q1x3q",
        "Racing video game": "/m/01sjng",
        "Role-playing video game": "/m/0403l3g",
        "Simulation video game": "/m/021bp2",
        "Sports game": "/m/022dc6",
        "Strategy video game": "/m/03hf_rm",
    },
    "Lifestyle topics": {
        "Fashion": "/m/032tl",
        "Fitness": "/m/027x7n",
        "Food": "/m/02wbm",
        "Hobby": "/m/03glg",
        "Lifestyle (parent topic)": "/m/019_rr",
        "Pets": "/m/068hy",
        "Physical attractiveness [Beauty]": "/m/041xxh",
        "Technology": "/m/07c1v",
        "Tourism": "/m/07bxq",
        "Vehicles": "/m/07yv9",
    },
    "Music topics": {
        "Christian music": "/m/02mscn",
        "Classical music": "/m/0ggq0m",
        "Country": "/m/01lyv",
        "Electronic music": "/m/02lkt",
        "Hip hop music": "/m/0glt670",
        "Independent music": "/m/05rwpb",
        "Jazz": "/m/03_d0",
        "Music (parent topic)": "/m/04rlf",
        "Music of Asia": "/m/028sqc",
        "Music of Latin America": "/m/0g293",
        "Pop music": "/m/064t9",
        "Reggae": "/m/06cqb",
        "Rhythm and blues": "/m/06j6l",
        "Rock music": "/m/06by7",
        "Soul music": "/m/0gywn",
    },
    "Other topics": {"Knowledge": "/m/01k8wb"},
    "Society topics": {
        "Business": "/m/09s1f",
        "Health": "/m/0kt51",
        "Military": "/m/01h6rj",
        "Politics": "/m/05qt0",
        "Religion": "/m/06bvp",
        "Society (parent topic)": "/m/098wr",
    },
    "Sports topics": {
        "American football": "/m/0jm_",
        "Baseball": "/m/018jz",
        "Basketball": "/m/018w8",
        "Boxing": "/m/01cgz",
        "Cricket": "/m/09xp_",
        "Football": "/m/02vx4",
        "Golf": "/m/037hz",
        "Ice hockey": "/m/03tmr",
        "Mixed martial arts": "/m/01h7lh",
        "Motorsport": "/m/0410tth",
        "Sports (parent topic)": "/m/06ntj",
        "Tennis": "/m/07bs0",
        "Volleyball": "/m/07_53",
    },
}

YOUTUBE_VID_CATEGORY_IDS = {
    "Action/Adventure": "32",
    "Anime/Animation": "31",
    "Autos & Vehicles": "2",
    "Classics": "33",
    "Comedy": "34",
    "Documentary": "35",
    "Drama": "36",
    "Education": "27",
    "Entertainment": "24",
    "Family": "37",
    "Film & Animation": "1",
    "Foreign": "38",
    "Gaming": "20",
    "Horror": "39",
    "Howto & Style": "26",
    "Movies": "30",
    "Music": "10",
    "News & Politics": "25",
    "Nonprofits & Activism": "29",
    "People & Blogs": "22",
    "Pets & Animals": "15",
    "Sci-Fi/Fantasy": "40",
    "Science & Technology": "28",
    "Short Movies": "18",
    "Shorts": "42",
    "Shows": "43",
    "Sports": "17",
    "Thriller": "41",
    "Trailers": "44",
    "Travel & Events": "19",
    "Videoblogging": "21",
}

SERP_YTUBE_VALID_VALS = dict(
    channelType={"any", "show"},
    eventType={"completed", "live", "upcoming"},
    forContentOwner={True, False, "true", "false"},
    forDeveloper={True, False, "true", "false"},
    forMine={True, False, "true", "false"},
    maxResults=range(51),
    order={"date", "rating", "relevance", "title", "videoCount", "viewCount"},
    regionCode={
        "ad",
        "ae",
        "af",
        "ag",
        "ai",
        "al",
        "am",
        "an",
        "ao",
        "aq",
        "ar",
        "as",
        "at",
        "au",
        "aw",
        "az",
        "ba",
        "bb",
        "bd",
        "be",
        "bf",
        "bg",
        "bh",
        "bi",
        "bj",
        "bm",
        "bn",
        "bo",
        "br",
        "bs",
        "bt",
        "bv",
        "bw",
        "by",
        "bz",
        "ca",
        "cc",
        "cd",
        "cf",
        "cg",
        "ch",
        "ci",
        "ck",
        "cl",
        "cm",
        "cn",
        "co",
        "cr",
        "cs",
        "cu",
        "cv",
        "cx",
        "cy",
        "cz",
        "de",
        "dj",
        "dk",
        "dm",
        "do",
        "dz",
        "ec",
        "ee",
        "eg",
        "eh",
        "er",
        "es",
        "et",
        "fi",
        "fj",
        "fk",
        "fm",
        "fo",
        "fr",
        "ga",
        "gd",
        "ge",
        "gf",
        "gh",
        "gi",
        "gl",
        "gm",
        "gn",
        "gp",
        "gq",
        "gr",
        "gs",
        "gt",
        "gu",
        "gw",
        "gy",
        "hk",
        "hm",
        "hn",
        "hr",
        "ht",
        "hu",
        "id",
        "ie",
        "il",
        "in",
        "io",
        "iq",
        "ir",
        "is",
        "it",
        "jm",
        "jo",
        "jp",
        "ke",
        "kg",
        "kh",
        "ki",
        "km",
        "kn",
        "kp",
        "kr",
        "kw",
        "ky",
        "kz",
        "la",
        "lb",
        "lc",
        "li",
        "lk",
        "lr",
        "ls",
        "lt",
        "lu",
        "lv",
        "ly",
        "ma",
        "mc",
        "md",
        "mg",
        "mh",
        "mk",
        "ml",
        "mm",
        "mn",
        "mo",
        "mp",
        "mq",
        "mr",
        "ms",
        "mt",
        "mu",
        "mv",
        "mw",
        "mx",
        "my",
        "mz",
        "na",
        "nc",
        "ne",
        "nf",
        "ng",
        "ni",
        "nl",
        "no",
        "np",
        "nr",
        "nu",
        "nz",
        "om",
        "pa",
        "pe",
        "pf",
        "pg",
        "ph",
        "pk",
        "pl",
        "pm",
        "pn",
        "pr",
        "ps",
        "pt",
        "pw",
        "py",
        "qa",
        "re",
        "ro",
        "ru",
        "rw",
        "sa",
        "sb",
        "sc",
        "sd",
        "se",
        "sg",
        "sh",
        "si",
        "sj",
        "sk",
        "sl",
        "sm",
        "sn",
        "so",
        "sr",
        "st",
        "sv",
        "sy",
        "sz",
        "tc",
        "td",
        "tf",
        "tg",
        "th",
        "tj",
        "tk",
        "tl",
        "tm",
        "tn",
        "to",
        "tr",
        "tt",
        "tv",
        "tw",
        "tz",
        "ua",
        "ug",
        "uk",
        "um",
        "us",
        "uy",
        "uz",
        "va",
        "vc",
        "ve",
        "vg",
        "vi",
        "vn",
        "vu",
        "wf",
        "ws",
        "ye",
        "yt",
        "za",
        "zm",
        "zw",
    },
    relevanceLanguage={
        "af",
        "sq",
        "sm",
        "ar",
        "az",
        "eu",
        "be",
        "bn",
        "bh",
        "bs",
        "bg",
        "ca",
        "zh-CN",
        "zh-TW",
        "zh-Hans",
        "zh-Hant",
        "hr",
        "cs",
        "da",
        "nl",
        "en",
        "eo",
        "et",
        "fo",
        "fi",
        "fr",
        "fy",
        "gl",
        "ka",
        "de",
        "el",
        "gu",
        "iw",
        "hi",
        "hu",
        "is",
        "id",
        "ia",
        "ga",
        "it",
        "ja",
        "jw",
        "kn",
        "ko",
        "la",
        "lv",
        "lt",
        "mk",
        "ms",
        "ml",
        "mt",
        "mr",
        "ne",
        "no",
        "nn",
        "oc",
        "fa",
        "pl",
        "pt-BR",
        "pt-PT",
        "pa",
        "ro",
        "ru",
        "gd",
        "sr",
        "si",
        "sk",
        "sl",
        "es",
        "su",
        "sw",
        "sv",
        "tl",
        "ta",
        "te",
        "th",
        "ti",
        "tr",
        "uk",
        "ur",
        "uz",
        "vi",
        "cy",
        "xh",
        "zu",
    },
    safeSearch={"moderate", "none", "strict"},
    topicId={
        "/m/04rlf",
        "/m/02mscn",
        "/m/0ggq0m",
        "/m/01lyv",
        "/m/02lkt",
        "/m/0glt670",
        "/m/05rwpb",
        "/m/03_d0",
        "/m/028sqc",
        "/m/0g293",
        "/m/064t9",
        "/m/06cqb",
        "/m/06j6l",
        "/m/06by7",
        "/m/0gywn",
        "/m/0bzvm2",
        "/m/025zzc",
        "/m/02ntfj",
        "/m/0b1vjn",
        "/m/02hygl",
        "/m/04q1x3q",
        "/m/01sjng",
        "/m/0403l3g",
        "/m/021bp2",
        "/m/022dc6",
        "/m/03hf_rm",
        "/m/06ntj",
        "/m/0jm_",
        "/m/018jz",
        "/m/018w8",
        "/m/01cgz",
        "/m/09xp_",
        "/m/02vx4",
        "/m/037hz",
        "/m/03tmr",
        "/m/01h7lh",
        "/m/0410tth",
        "/m/07bs0",
        "/m/07_53",
        "/m/02jjt",
        "/m/09kqc",
        "/m/02vxn",
        "/m/05qjc",
        "/m/066wd",
        "/m/0f2f9",
        "/m/019_rr",
        "/m/032tl",
        "/m/027x7n",
        "/m/02wbm",
        "/m/03glg",
        "/m/068hy",
        "/m/041xxh",
        "/m/07c1v",
        "/m/07bxq",
        "/m/07yv9",
        "/m/098wr",
        "/m/09s1f",
        "/m/0kt51",
        "/m/01h6rj",
        "/m/05qt0",
        "/m/06bvp",
        "/m/01k8wb",
    },
    type={"channel", "playlist", "video"},
    videoCaption={"any", "closedCaption", "none"},
    videoCategoryId={
        "1",
        "2",
        "10",
        "15",
        "17",
        "18",
        "19",
        "20",
        "21",
        "22",
        "23",
        "24",
        "25",
        "26",
        "27",
        "28",
        "29",
        "30",
        "31",
        "32",
        "33",
        "34",
        "35",
        "36",
        "37",
        "38",
        "39",
        "40",
        "41",
        "42",
        "43",
        "44",
    },
    videoDefinition={"any", "high", "standard"},
    videoDimension={"2d", "3d", "any"},
    videoDuration={"any", "long", "medium", "short"},
    videoEmbeddable={"any", True, "true"},
    videoLicense={"any", "creativeCommon", "youtube"},
    videoSyndicated={"any", True, "true"},
    videoType={"any", "episode", "movie"},
)


def _split_by_comma(s, length=50):
    """Group a comma-separated string into a list of at-most
    ``length``-length words each."""
    str_split = s.split(",")
    str_list = []
    for i in range(0, len(str_split) + length, length):
        temp_str = ",".join(str_split[i : i + length])
        if temp_str:
            str_list.append(temp_str)
    return str_list



[docs]
def youtube_video_details(key, vid_ids):
    """Return details of videos for which the ids are given.
    Assumes ``ids`` is a comma-separated list of video ids with
    no spaces.

    Parameters
    ----------
    key : str
      Your Google Developer key.
    vid_ids : str
      A comma-separated list of video ID's, with no spaces.

    Returns
    -------
    video_df : pandas.DataFrame
    """
    base_url = (
        "https://www.googleapis.com/youtube/v3/videos?part="
        "contentDetails,id,liveStreamingDetails,localizations,player,"
        "recordingDetails,snippet,statistics,status,topicDetails"
    )
    vid_ids = _split_by_comma(vid_ids, length=50)
    final_df = pd.DataFrame()
    for vid_id in vid_ids:
        params = {"id": vid_id, "key": key}
        logging.info(msg="Requesting: " + "video details")
        video_resp = requests.get(base_url, params=params)
        if video_resp.status_code >= 400:
            raise Exception(video_resp.json())
        items_df = pd.DataFrame(video_resp.json()["items"])
        details = ["snippet", "topicDetails", "statistics", "status", "contentDetails"]
        detail_df = pd.DataFrame()
        for detail in details:
            try:
                detail_df = pd.concat(
                    [
                        detail_df,
                        pd.DataFrame([x[detail] for x in video_resp.json()["items"]]),
                    ],
                    axis=1,
                )
            except KeyError:
                continue
        temp_df = pd.concat([items_df, detail_df], axis=1)
        final_df = pd.concat([final_df, temp_df], sort=False, ignore_index=True)
    return final_df




[docs]
def youtube_channel_details(key, channel_ids):
    """Return details of channels for which the ids are given.
    Assumes ``ids`` is a comma-separated list of channel ids with
    no spaces.

    Parameters
    ----------
    key : str
      Your Google Developer key.
    channel_ids : str
      A comma-separated list of channel ID's, with no spaces.

    Returns
    -------
    channel_df : pandas.DataFrame
    """
    base_url = (
        "https://www.googleapis.com/youtube/v3/channels?part="
        "snippet,contentDetails,statistics"
    )
    channel_ids = _split_by_comma(channel_ids, length=50)
    final_df = pd.DataFrame()
    for channel_id in channel_ids:
        params = {"id": channel_id, "key": key}
        logging.info(msg="Requesting: " + "channel details")
        channel_resp = requests.get(base_url, params=params)
        if channel_resp.status_code >= 400:
            raise Exception(channel_resp.json())
        items_df = pd.DataFrame(channel_resp.json()["items"])
        details = ["snippet", "statistics", "contentDetails"]
        detail_df = pd.DataFrame()
        for detail in details:
            try:
                detail_df = pd.concat(
                    [
                        detail_df,
                        pd.DataFrame([x[detail] for x in channel_resp.json()["items"]]),
                    ],
                    axis=1,
                )
            except KeyError:
                continue
        temp_df = pd.concat([items_df, detail_df], axis=1)
        final_df = pd.concat([final_df, temp_df], sort=False, ignore_index=True)
    return final_df



def _dict_product(d):
    """Return the product of all values of a dict, while
    coupling each value with its key.
    This is used to generate multiple queries out of
        possibly multiple arguments in serp_goog.

    >>> d = {"a": [1], "b": [2, 3, 4], "c": [5, 6]}
    >>> _dict_product(d)
    >>> [{'a': 1, 'b': 2, 'c': 5},
         {'a': 1, 'b': 2, 'c': 6},
         {'a': 1, 'b': 3, 'c': 5},
         {'a': 1, 'b': 3, 'c': 6},
         {'a': 1, 'b': 4, 'c': 5},
         {'a': 1, 'b': 4, 'c': 6}]
    """
    items = list(d.items())
    keys = [x[0] for x in items]
    values = [x[1] for x in items]
    dicts = []
    for prod in product(*values):
        tempdict = dict(zip(keys, prod))
        dicts.append(tempdict)
    return dicts



[docs]
def serp_goog(
    q,
    cx,
    key,
    c2coff=None,
    cr=None,
    dateRestrict=None,
    exactTerms=None,
    excludeTerms=None,
    fileType=None,
    filter=None,
    gl=None,
    highRange=None,
    hl=None,
    hq=None,
    imgColorType=None,
    imgDominantColor=None,
    imgSize=None,
    imgType=None,
    linkSite=None,
    lowRange=None,
    lr=None,
    num=None,
    orTerms=None,
    rights=None,
    safe=None,
    searchType=None,
    siteSearch=None,
    siteSearchFilter=None,
    sort=None,
    start=None,
):
    """Query Google's search API and get search results in a DataFrame.

    For each parameter, you can supply single or multiple values / arguments.
        If you pass multiple arguments, all the possible combinations of
        arguments (the product) will be requested, and you will get one
        DataFrame combining all queries. See examples below.

    Parameters
    ----------
    q : str
      The search expression.
    cx : str
      The custom search engine ID to use for this request.
    key : str
      The API key of your custom search engine.
    c2coff : str
      Enables or disables Simplified and Traditional Chinese Search. The default value
      for this parameter is 0 (zero), meaning that the feature is enabled. Supported
      values are:1: Disabled0: Enabled (default)
    cr : str
      Restricts search results to documents originating in a particular country. You may
      use Boolean operators in the cr parameter's value.Google Search determines the
      country of a document by analyzing:the top- level domain (TLD) of the document's
      URLthe geographic location of the Web server's IP addressSee the Country Parameter
      Values page for a list of valid values for this parameter.
    dateRestrict : str
      Restricts results to URLs based on date.

      Supported values include:
        - d[number]: requests results from the specified number of past days.
        - w[number]: requests results from the specified number of past weeks.
        - m[number]: requests results from the specified number of past months.
        - y[number]: requests results from the specified number of past years.
    exactTerms : str
      Identifies a phrase that all documents in the search results must contain.
    excludeTerms : str
      Identifies a word or phrase that should not appear in any documents in the search
      results.
    fileType : str
      Restricts results to files of a specified extension. A list of file types
      indexable by Google can be found in Search Console Help Center.
    filter : str
      Controls turning on or off the duplicate content filter.See Automatic Filtering
      for more information about Google's search results filters. Note that host
      crowding filtering applies only to multi-site searches.By default, Google applies
      filtering to all search results to improve the quality of those results.
      Acceptable values are:  "0": Turns off duplicate content filter.  "1": Turns on
      duplicate content filter.
    gl : str
      Geolocation of end user. The gl parameter value is a two-letter country code. The
      gl parameter boosts search results whose country of origin matches the parameter
      value. See the Country Codes page for a list of valid values.Specifying a gl
      parameter value should lead to more relevant results. This is particularly true
      for international customers and, even more specifically, for customers in
      English- speaking countries other than the United States.
    highRange : str
      Specifies the ending value for a search range.Use lowRange and highRange to append
      an inclusive search range of lowRange...highRange to the query.
    hl : str
      Sets the user interface language. Explicitly setting this parameter improves the
      performance and the quality of your search results.See the Interface Languages
      section of Internationalizing Queries and Results Presentation for more
      information, and Supported Interface Languages for a list of supported languages.
    hq : str
      Appends the specified query terms to the query, as if they were combined with a
      logical AND operator.
    imgColorType : str
      Returns black and white, grayscale, or color images: mono, gray, and color.
      Acceptable values are:  "color": color  "gray": gray  "mono": mono
    imgDominantColor : str
      Returns images of a specific dominant color.  Acceptable values are:
      "black": black "blue": blue  "brown": brown  "gray": gray  "green": green
      "orange": orange  "pink": pink  "purple": purple  "red": red "teal": teal
      "white": white  "yellow": yellow
    imgSize : str
      Returns images of a specified size. Acceptable values are:  "huge": huge
      "icon": icon  "large": large  "medium": medium  "small": small  "xlarge": xlarge
      "xxlarge": xxlarge
    imgType : str
      Returns images of a type.  Acceptable values are:  "clipart": clipart
      "face": face  "lineart": lineart  "news": news  "photo": photo
    linkSite : str
      Specifies that all search results should contain a link to a particular URL
    lowRange : str
      Specifies the starting value for a search range. Use lowRange and highRange to
      append an inclusive search range of lowRange...highRange to the query.
    lr : str
      Restricts the search to documents written in a particular language
      (e.g., lr=lang_ja).  Acceptable values are:  "lang_ar": Arabic
      "lang_bg": Bulgarian  "lang_ca": Catalan  "lang_cs": Czech  "lang_da": Danish
      "lang_de": German  "lang_el": Greek  "lang_en": English  "lang_es": Spanish
      "lang_et": Estonian  "lang_fi": Finnish  "lang_fr": French  "lang_hr": Croatian
      "lang_hu": Hungarian "lang_id": Indonesian  "lang_is": Icelandic
      "lang_it": Italian  "lang_iw": Hebrew  "lang_ja": Japanese  "lang_ko": Korean
      "lang_lt": Lithuanian  "lang_lv": Latvian "lang_nl": Dutch  "lang_no": Norwegian
      "lang_pl": Polish "lang_pt": Portuguese  "lang_ro": Romanian  "lang_ru": Russian
      "lang_sk": Slovak  "lang_sl": Slovenian  "lang_sr": Serbian  "lang_sv": Swedish
      "lang_tr": Turkish  "lang_zh- CN": Chinese (Simplified)  "lang_zh-TW":
      Chinese (Traditional)
    num : int
      Number of search results to return.Valid values are integers between 1 and 10,
      inclusive.
    orTerms : str
      Provides additional search terms to check for in a document, where each document
      in the search results must contain at least one of the additional search terms.
    rights : str
      Filters based on licensing. Supported values include: cc_publicdomain,
      cc_attribute, cc_sharealike, cc_noncommercial, cc_nonderived, and combinations of
      these.
    safe : str
      Search safety level.  Acceptable values are:  "active": Enables SafeSearch
      filtering.  "off":Disables SafeSearch filtering.  (default)
    searchType : str
      Specifies the search type: image. If unspecified, results are limited to webpages.
      Acceptable values are:  "image": custom image search.
    siteSearch : str
      Specifies all search results should be pages from a given site.
    siteSearchFilter : str
      Controls whether to include or exclude results from the site named in the
      siteSearch parameter.  Acceptable values are:  "e": exclude  "i": include
    sort : str
      The sort expression to apply to the results.
    start : int
      The index of the first result to return.Valid value are integers starting 1
      (default) and the second result is 2 and so forth. For example &start=11 gives the
      second page of results with the default "num" value of 10 results per page.Note:
      No more than 100 results will ever be returned for any query with JSON API, even
      if more than 100 documents match the query, so setting (start + num) to more than
      100 will produce an error. Note that the maximum value for num is 10.

    Returns
    -------
    serp_df : pandas.DataFrame

    Examples
    --------
    The following function call will produce two queries:
    "hotel" in the USA, and "hotel" in France

    >>> serp_goog(q="hotel", gl=["us", "fr"], cx="YOUR_CX", key="YOUR_KEY")

    The below function call will prouce four queries and make four requests:

    * "fligts" in UK
    * "fligts" in Australia
    * "tickets" in UK
    * "tickets" in Australia

    'cr' here refers to 'country restrict', which focuses on content
    originating from the specified country.

    >>> serp_goog(q=['flights', 'tickets'], cr=['countryUK', 'countryAU'],
                  cx='YOUR_CX', key='YOUR_KEY')
    """
    params = locals()
    supplied_params = {k: v for k, v in params.items() if params[k] is not None}

    for p in supplied_params:
        if isinstance(supplied_params[p], (str, int)):
            supplied_params[p] = [supplied_params[p]]

    for p in supplied_params:
        if p in SERP_GOOG_VALID_VALS:
            if not set(supplied_params[p]).issubset(SERP_GOOG_VALID_VALS[p]):
                raise ValueError(
                    "Please make sure you provide a"
                    ' valid value for "{}", valid values:\n'
                    "{}".format(p, sorted(SERP_GOOG_VALID_VALS[p]))
                )
    params_list = _dict_product(supplied_params)
    base_url = "https://www.googleapis.com/customsearch/v1?"
    specified_cols = [
        "searchTerms",
        "rank",
        "title",
        "snippet",
        "displayLink",
        "link",
        "queryTime",
        "totalResults",
    ]
    responses = []
    for param in params_list:
        param_log = ", ".join([k + "=" + str(v) for k, v in param.items()])
        logging.info(msg="Requesting: " + param_log)
        resp = requests.get(base_url, params=param)
        if resp.status_code >= 400:
            raise Exception(resp.json())
        responses.append(resp)
    result_df = pd.DataFrame()
    for i, resp in enumerate(responses):
        request_metadata = resp.json()["queries"]["request"][0]
        del request_metadata["title"]
        search_info = resp.json()["searchInformation"]
        if int(search_info["totalResults"]) == 0:
            df = pd.DataFrame(columns=specified_cols, index=range(1))
            df["searchTerms"] = request_metadata["searchTerms"]
            # These keys don't appear in the response so they have to be
            # added manually
            for missing in ["lr", "num", "start", "c2coff"]:
                if missing in params_list[i]:
                    df[missing] = params_list[i][missing]
        else:
            df = pd.DataFrame(resp.json()["items"])
            df["cseName"] = resp.json()["context"]["title"]
            start_idx = request_metadata["startIndex"]
            df["rank"] = range(start_idx, start_idx + len(df))
            for missing in ["lr", "num", "start", "c2coff"]:
                if missing in params_list[i]:
                    df[missing] = params_list[i][missing]
        meta_columns = {**request_metadata, **search_info}
        df = df.assign(**meta_columns)
        df["queryTime"] = datetime.datetime.now(tz=datetime.timezone.utc)
        df["queryTime"] = pd.to_datetime(df["queryTime"])
        if "image" in df:
            img_df = json_normalize(df["image"])
            img_df.columns = ["image." + c for c in img_df.columns]
            df = pd.concat([df, img_df], axis=1)
        result_df = pd.concat([result_df, df], sort=False, ignore_index=True)
    ordered_cols = (
        list(set(params_list[i]).difference({"q", "key", "cx"})) + specified_cols
    )
    non_ordered = result_df.columns.difference(set(ordered_cols))
    final_df = result_df[ordered_cols + list(non_ordered)]
    if "pagemap" in final_df:
        pagemap_df = pd.DataFrame()
        for p in final_df["pagemap"]:
            try:
                temp_pagemap_df = json_normalize(p)
                pagemap_df = pd.concat([pagemap_df, temp_pagemap_df], sort=False)
            except Exception:
                temp_pagemap_df = pd.DataFrame({"delete_me": None}, index=range(1))
                pagemap_df = pd.concat([pagemap_df, temp_pagemap_df], sort=False)
        pagemap_df = pagemap_df.reset_index(drop=True)
        if "delete_me" in pagemap_df:
            del pagemap_df["delete_me"]
        for col in pagemap_df:
            if col in final_df:
                pagemap_df = pagemap_df.rename(columns={col: "pagemap_" + col})
        final_df = pd.concat([final_df, pagemap_df], axis=1)

        if "metatags" in pagemap_df:
            metatag_df = pd.DataFrame()
            for m in pagemap_df["metatags"]:
                try:
                    temp_metatags_df = json_normalize(m)
                    metatag_df = pd.concat([metatag_df, temp_metatags_df], sort=False)
                except Exception:
                    temp_metatags_df = pd.DataFrame({"delete_me": None}, index=range(1))
                    metatag_df = pd.concat([metatag_df, temp_metatags_df], sort=False)
            metatag_df = metatag_df.reset_index(drop=True)
            if "delete_me" in metatag_df:
                del metatag_df["delete_me"]
            for col in metatag_df:
                if col in final_df:
                    metatag_df = metatag_df.rename(columns={col: "metatag_" + col})

            final_df = pd.concat([final_df, metatag_df], axis=1)
    return final_df




[docs]
def serp_youtube(
    key,
    q=None,
    channelId=None,
    channelType=None,
    eventType=None,
    forContentOwner=None,
    forDeveloper=None,
    forMine=None,
    location=None,
    locationRadius=None,
    maxResults=None,
    onBehalfOfContentOwner=None,
    order=None,
    pageToken=None,
    publishedAfter=None,
    publishedBefore=None,
    regionCode=None,
    relatedToVideoId=None,
    relevanceLanguage=None,
    safeSearch=None,
    topicId=None,
    type=None,
    videoCaption=None,
    videoCategoryId=None,
    videoDefinition=None,
    videoDimension=None,
    videoDuration=None,
    videoEmbeddable=None,
    videoLicense=None,
    videoSyndicated=None,
    videoType=None,
):
    """Query the YouTube API and get search results in a DataFrame.

    For each parameter you can supply a single or multiple value(s).
    Looping and merging results is handled automatically in case of multiple
    values.

    Parameters
    ----------
    q : str
      The ``q`` parameter specifies the query term to search for. Your request can also
      use the Boolean NOT (-) and OR (|) operators to exclude videos or to find videos
      that are associated with one of several search terms. For example, to search for
      videos matching either "boating" or "sailing", set the ``q`` parameter value to
      boating|sailing. Similarly, to search for videos matching either "boating" or
      "sailing" but not "fishing", set the q parameter value to
      boating|sailing -fishing. Note that the pipe character must be URL- escaped when
      it is sent in your API request. The URL-escaped value for the pipe character is
      %7C.
    channelId : str
      The ``channelId`` parameter indicates that the API response should only contain
      resources created by the channel. Note: Search results are constrained to a
      maximum of 500 videos if your request specifies a value for the ``channelId``
      parameter and sets the ``type`` parameter value to video, but it does not also set
      one of the ``forContentOwner``, ``forDeveloper``, or ``forMine`` filters.
    channelType : str
      The ``channelType`` parameter lets you restrict a search to a particular type of
      channel. Acceptable values are:

        any - Return all channels.

        show - Only retrieve shows.
    eventType : str
      The ``eventType`` parameter restricts a search to broadcast events. If you specify
      a value for this parameter, you must also set the type parameter's value to video.
      Acceptable values are:

        completed - Only include completed broadcasts.

        live - Only include active broadcasts.

        upcoming - Only include upcoming broadcasts.

    forContentOwner : bool
      This parameter can only be used in a properly authorized request, and it is
      intended exclusively for YouTube content partners. The ``forContentOwner``
      parameter restricts the search to only retrieve videos owned by the content owner
      identified by the ``onBehalfOfContentOwner`` parameter. If ``forContentOwner`` is
      set to true, the request must also meet these requirements: The
      ``onBehalfOfContentOwner`` parameter is required.The user authorizing the request
      must be using an account linked to the specified content owner. The ``type``
      parameter value must be set to video.None of the following other parameters can be
      set: ``videoDefinition``, ``videoDimension``, ``videoDuration``, ``videoLicense``,
      ``videoEmbeddable``, ``videoSyndicated``, ``videoType``.
    forDeveloper : bool
      This parameter can only be used in a properly authorized request. The
      ``forDeveloper`` parameter restricts the search to only retrieve videos uploaded
      via the developer's application or website. The API server uses the request's
      authorization credentials to identify the developer. The ``forDeveloper``
      parameter can be used in conjunction with optional search parameters like the
      ``q`` parameter. For this feature, each uploaded video is automatically tagged
      with the project number that is associated with the developer's application in the
      Google Developers Console. When a search request subsequently sets the
      ``forDeveloper`` parameter to ``true`` the API server uses the request's
      authorization credentials to identify the developer. Therefore, a developer can
      restrict results to videos uploaded through the developer's own app or website but
      not to videos uploaded through other apps or sites.
    forMine : bool
      This parameter can only be used in a properly authorized request. The ``forMine``
      parameter restricts the search to only retrieve videos owned by the authenticated
      user. If you set this parameter to ``true``, then the ``type`` parameter's value
      must also be set to ``video``. In addition, none of the following other parameters
      can be set in the same request: ``videoDefinition``, ``videoDimension``,
      ``videoDuration``, ``videoLicense``, ``videoEmbeddable``, ``videoSyndicated``,
      ``videoType``.
    relatedToVideoId: str
      The ``relatedToVideoId`` parameter retrieves a list of videos that are related to
      the video that the parameter ``value`` identifies. The parameter ``value`` must be
      set to a YouTube video ID and, if you are using this parameter, the ``type``
      parameter must be set to video.Note that if the ``relatedToVideoId`` parameter is
      set, the only other supported parameters are ``part``, ``maxResults``,
      ``pageToken``, ``regionCode``, ``relevanceLanguage``, ``safeSearch``, ``type``
      (which must be set to video), and ``fields``.
    location : str
      The ``location`` parameter, in conjunction with the ``locationRadius`` parameter,
      defines a circular geographic area and also restricts a search to videos that
      specify, in their metadata, a geographic location that falls within that area. The
      parameter value is a string that specifies latitude/longitude coordinates e.g.
      (37.42307,-122.08427).The location parameter value identifies the point at the
      center of the area. The ``locationRadius`` parameter specifies the maximum
      distance that the location associated with a video can be from that point for the
      video to still be included in the search results. The API returns an error if your
      request specifies a value for the ``location`` parameter but does not also specify
      a value for the ``locationRadius`` parameter.
    locationRadius : str
      The ``locationRadius`` parameter, in conjunction with the ``location`` parameter,
      defines a circular geographic area. The parameter value must be a floating point
      number followed by a measurement unit. Valid measurement units are m, km, ft, and
      mi. For example, valid parameter values include 1500m, 5km, 10000ft, and 0.75mi.
      The API does not support ``locationRadius`` parameter values larger than 1000
      kilometers. Note: See the definition of the ``location`` parameter for more
      information.
    maxResults : int
      The ``maxResults`` parameter specifies the maximum number of items that should be
      returned in the result set. Acceptable values are 0 to 50, inclusive. The default
      value is 5.
    onBehalfOfContentOwner : str
      This parameter can only be used in a properly authorized request. Note: This
      parameter is intended exclusively for YouTube content partners.The
      ``onBehalfOfContentOwner`` parameter indicates that the request's authorization
      credentials identify a YouTube CMS user who is acting on behalf of the content
      owner specified in the parameter value. This parameter is intended for YouTube
      content partners that own and manage many different YouTube channels. It allows
      content owners to authenticate once and get access to all their video and channel
      data, without having to provide authentication credentials for each individual
      channel. The CMS account that the user authenticates with must be linked to the
      specified YouTube content owner.
    order : str
      The order parameter specifies the method that will be used to order resources in
      the API response. The default value is relevance. Acceptable values are:

        date - Resources are sorted in reverse chronological order based on the
        date they were created.

        rating - Resources are sorted from highest to lowest rating.

        relevance - Resources are sorted based on their relevance to the search
        query. This is the default value for this parameter.

        title - Resources are sorted alphabetically by title.

        videoCount - Channels are sorted in descending order of their number of
        uploaded videos.

        viewCount - Resources sorted from highest to lowest number of views.

        For live broadcasts, videos are sorted by number of concurrent viewers
        while the broadcasts are ongoing.
    pageToken : str
      The ``pageToken`` parameter identifies a specific page in the result set that
      should be returned. In an API response, the ``nextPageToken`` and
      ``prevPageToken`` properties identify other pages that could be retrieved.
    publishedAfter : datetime
      The ``publishedAfter`` parameter indicates that the API response should only
      contain resources created at or after the specified time. The value is an RFC 3339
      formatted date-time value (1970-01-01T00:00:00Z).
    publishedBefore : datetime
      The ``publishedBefore`` parameter indicates that the API response should only
      contain resources created before or at the specified time. The value is an RFC
      3339 formatted date-time value (1970-01-01T00:00:00Z).
    regionCode : str
      The ``regionCode`` parameter instructs the API to return search results for videos
      that can be viewed in the specified country. The parameter value is an ISO 3166-1
      alpha-2 country code.
    relevanceLanguage : str
      The ``relevanceLanguage`` parameter instructs the API to return search results
      that are most relevant to the specified language. The parameter value is typically
      an ISO 639-1 two-letter language code. However, you should use the values zh-Hans
      for simplified Chinese and zh-Hant for traditional Chinese. Please note that
      results in other languages will still be returned if they are highly relevant to
      the search query term.
    safeSearch : str
      The ``safeSearch`` parameter indicates whether the search results should include
      restricted content as well as standard content. Acceptable values are:

        moderate - YouTube will filter some content from search results and,
        at the least, will filter content that is restricted in your locale.
        Based on their content, search results could be removed from search
        results or demoted in search results. This is the default parameter
        value.

        none - YouTube will not filter the search result set.

        strict - YouTube will try to exclude all restricted content from the
        search result set.

        Based on their content, search results
        could be removed from search results or demoted in search
        results.
    topicId : str
      The ``topicId`` parameter indicates that the API response should only contain
      resources associated with the specified topic. The value identifies a Freebase
      topic ID.
    type : str
      The ``type`` parameter restricts a search query to only retrieve a particular type
      of resource. The value is a comma-separated list of resource types. The default
      value is video,channel,playlist. Acceptable values are: channel, playlist, and
      video.
    videoCaption : str
      The ``videoCaption`` parameter indicates whether the API should filter video
      search results based on whether they have captions. If you specify a value for
      this parameter, you must also set the ``type`` parameter's value to video.
      Acceptable values are:

        any - Do not filter results based on caption availability.

        closedCaption - Only include videos that have captions.

        none - Only include videos that do not have captions.
    videoCategoryId : str
      The ``videoCategoryId`` parameter filters video search results based on their
      category. If you specify a value for this parameter, you must also set the
      ``type`` parameter's value to video.
    videoDefinition : str
      The ``videoDefinition`` parameter lets you restrict a search to only include
      either high definition (HD) or standard definition (SD) videos. HD videos are
      available for playback in at least 720p, though higher resolutions, like 1080p,
      might also be available. If you specify a value for this parameter, you must also
      set the ``type`` parameter's value to video. Acceptable values are:

        any - Return all videos, regardless of their resolution.

        high - Only retrieve HD videos.

        standard - Only retrieve videos in standard definition.
    videoDimension : str
      The ``videoDimension`` parameter lets you restrict a search to only retrieve 2D or
      3D videos. If you specify a value for this parameter, you must also set the
      ``type`` parameter's value to video. Acceptable values are:

        2d - Restrict search results to exclude 3D videos.

        3d - Restrict search results to only include 3D videos.

        any - Include both 3D and non-3D videos in returned results. This is the default
        value.
    videoDuration : str
      The ``videoDuration`` parameter filters video search results based on their
      duration. If you specify a value for this parameter, you must also set the
      ``type`` parameter's value to video. Acceptable values are:

        any - Do not filter video search results based on their duration.
        This is the default value.

        long - Only include videos longer than 20 minutes.

        medium - Only include videos that are between four and 20 minutes
        long (inclusive).

        short - Only include videos that are less than four minutes long.
    videoEmbeddable : str
      The ``videoEmbeddable`` parameter lets you to restrict a search to only videos
      that can be embedded into a webpage. If you specify a value for this parameter,
      you must also set the ``type`` parameter's value to video. Acceptable values are:

        any - Return all videos, embeddable or not.

        true - Only retrieve embeddable videos.
    videoLicense : str
      The ``videoLicense`` parameter filters search results to only include videos with
      a particular license. YouTube lets video uploaders choose to attach either the
      Creative Commons license or the standard YouTube license to each of their videos.
      If you specify a value for this parameter, you must also set the ``type``
      parameter's value to video. Acceptable values are:

        any - Return all videos, regardless of which license they have,
        that match the query parameters.

        creativeCommon - Only return videos that have a Creative Commons
        license.
        Users can reuse videos with this license in other videos that they
        create.

        youtube - Only return videos that have the standard YouTube license.
    videoSyndicated : str
      The ``videoSyndicated`` parameter lets you to restrict a search to only videos
      that can be played outside youtube.com. If you specify a value for this parameter,
      you must also set the ``type`` parameter's value to video. Acceptable values are:

        any - Return all videos, syndicated or not.

        true - Only retrieve syndicated videos.
    videoType : str
      The ``videoType`` parameter lets you restrict a search to a particular type of
      videos. If you specify a value for this parameter, you must also set the ``type``
      parameter's value to video. Acceptable values are:

        any - Return all videos.

        episode - Only retrieve episodes of shows.

        movie - Only retrieve movies.
    Returns
    -------
    serp_df : pandas.DataFrame
    """
    params = locals()
    supplied_params = {k: v for k, v in params.items() if params[k]}

    type_vid_params = {
        "eventType",
        "relatedToVideoId",
        "videoCaption",
        "videoCategoryId",
        "videoDefinition",
        "videoDimension",
        "videoDuration",
        "videoEmbeddable",
        "videoLicense",
        "videoSyndicated",
        "videoType",
        "forMine",
        "forContentOwner",
    }

    if supplied_params.get("type") != "video" and type_vid_params.intersection(
        set(supplied_params.keys())
    ):
        raise ValueError(
            'You need to set type="video" if you want to set'
            " any of the following:" + str(type_vid_params)
        )

    for p in supplied_params:
        if isinstance(supplied_params[p], (str, int)):
            supplied_params[p] = [supplied_params[p]]

    for p in supplied_params:
        if p in SERP_YTUBE_VALID_VALS:
            if not set(supplied_params[p]).issubset(SERP_YTUBE_VALID_VALS[p]):
                raise ValueError(
                    "Please make sure you provide a"
                    ' valid value for "{}", valid values:\n{}'.format(
                        p, sorted([str(x) for x in SERP_YTUBE_VALID_VALS[p]])
                    )
                )

    params_list = _dict_product(supplied_params)
    base_url = "https://www.googleapis.com/youtube/v3/search?part=snippet"

    responses = []
    for param in params_list:
        param_log = ", ".join([k + "=" + str(v) for k, v in param.items()])
        logging.info(msg="Requesting: " + param_log)
        resp = requests.get(base_url, params=param)
        if resp.status_code >= 400:
            raise Exception(resp.json())
        responses.append(resp)

    result_df = pd.DataFrame()
    for i, resp in enumerate(responses):
        snippet_df = pd.DataFrame([x["snippet"] for x in resp.json()["items"]])
        id_df = pd.DataFrame([x["id"] for x in resp.json()["items"]])
        if "channelId" in id_df:
            id_df = id_df.drop("channelId", axis=1)

        if "thumbnails" in snippet_df:
            thumb_df = json_normalize(snippet_df["thumbnails"])
        else:
            thumb_df = pd.DataFrame()
        page_info = resp.json()["pageInfo"]
        temp_df = pd.concat([snippet_df, id_df, thumb_df], axis=1).assign(**page_info)
        temp_df["rank"] = range(1, len(temp_df) + 1)

        if len(temp_df) == 0:
            empty_df_cols = [
                "title",
                "description",
                "publishedAt",
                "channelTitle",
                "kind",
                "videoId",
                "channelId",
            ]
            temp_df = temp_df.assign(q=[params_list[i]["q"]])
            temp_df = temp_df.assign(**dict.fromkeys(empty_df_cols))

            temp_df = temp_df.assign(**page_info)
        del params_list[i]["key"]
        temp_df = temp_df.assign(**params_list[i])
        temp_df["nextPageToken"] = resp.json().get("nextPageToken")
        result_df = pd.concat([result_df, temp_df], sort=False, ignore_index=True)

    result_df["queryTime"] = datetime.datetime.now(tz=datetime.timezone.utc)
    result_df["queryTime"] = pd.to_datetime(result_df["queryTime"])

    specified_cols = [
        "queryTime",
        "rank",
        "title",
        "description",
        "publishedAt",
        "channelTitle",
        "totalResults",
        "kind",
    ]
    ordered_cols = list(params_list[i].keys()) + specified_cols
    non_ordered = result_df.columns.difference(set(ordered_cols))
    final_df = result_df[ordered_cols + list(non_ordered)]

    vid_ids = ",".join(final_df["videoId"].dropna())
    if vid_ids:
        vid_details_df = youtube_video_details(vid_ids=vid_ids, key=key)
        vid_details_df.columns = ["video." + x for x in vid_details_df.columns]
        final_df = pd.merge(
            final_df, vid_details_df, how="left", left_on="videoId", right_on="video.id"
        )

    channel_ids = ",".join(final_df["channelId"].dropna())
    if channel_ids:
        channel_details_df = youtube_channel_details(channel_ids=channel_ids, key=key)
        channel_details_df.columns = [
            "channel." + x for x in channel_details_df.columns
        ]

        final_df = pd.merge(
            final_df,
            channel_details_df,
            how="left",
            left_on="channelId",
            right_on="channel.id",
        )
    final_df = final_df.drop_duplicates(subset=["videoId"])
    return final_df.reset_index(drop=True)




[docs]
def set_logging_level(level_or_name):
    """Change the logging level during the session.
    Acceptable values are [0, 10, 20, 30, 40, 50,
    'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR',
    'CRITICAL']
    """
    lvl_names_values = [
        0,
        10,
        20,
        30,
        40,
        50,
        "NOTSET",
        "DEBUG",
        "INFO",
        "WARNING",
        "ERROR",
        "CRITICAL",
    ]
    if level_or_name not in lvl_names_values:
        raise ValueError(
            "Please make sure you supply" " a value from: {}".format(lvl_names_values)
        )
    logging.getLogger().setLevel(level_or_name)



logging.getLogger().setLevel("INFO")