diff --git a/Gemfile b/Gemfile index 72d2ea1..bcec4da 100644 --- a/Gemfile +++ b/Gemfile @@ -16,6 +16,7 @@ gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] # Windows doe # gem 'kredis' # Use Kredis to get higher-level data types in Redis # gem 'bcrypt' # Use Active Model has_secure_password gem 'devise' # Flexible authentication solution for Rails with Warden +gem 'httparty' # A HTTP client for Ruby. # Authentications & Authorizations gem 'pundit' # Minimal authorization through OO design and pure Ruby classes diff --git a/Gemfile.lock b/Gemfile.lock index ce57ccf..3aa0579 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -187,6 +187,9 @@ GEM globalid (1.1.0) activesupport (>= 5.0) hashdiff (1.0.1) + httparty (0.21.0) + mini_mime (>= 1.0.0) + multi_xml (>= 0.5.2) i18n (1.13.0) concurrent-ruby (~> 1.0) i18n-js (3.9.0) @@ -226,6 +229,7 @@ GEM mini_mime (1.1.2) minitest (5.18.0) msgpack (1.7.1) + multi_xml (0.6.0) nap (1.1.0) net-imap (0.3.4) date @@ -468,6 +472,7 @@ DEPENDENCIES ffaker figaro foreman + httparty i18n-js (= 3.9.0) jsbundling-rails json_matchers diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb new file mode 100644 index 0000000..c022329 --- /dev/null +++ b/app/jobs/google/search_keyword_job.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Google + class ClientServiceError < StandardError; end + + class SearchKeywordJob < ApplicationJob + queue_as :default + + def perform(search_stat_id:) + search_stat = SearchStat.find search_stat_id + html_result = Google::ClientService.new(keyword: search_stat.keyword).call + parsed_attributes = ParserService.new(html_response: html_result).call + + update_search_stat(search_stat, parsed_attributes) + rescue ActiveRecord::RecordNotFound, ClientServiceError, ArgumentError, ActiveRecord::RecordInvalid + update_search_stat_status search_stat, :failed + end + + def update_search_stat(search_stat, attributes) + SearchStat.transaction do + search_stat.result_links.create(attributes[:result_links]) + + search_stat.update! attributes.except(:result_links) + end + end + + def update_search_stat_status(search_stat, status) + search_stat.update! status: status + end + end +end diff --git a/app/services/google/client_service.rb b/app/services/google/client_service.rb new file mode 100644 index 0000000..8b43ae7 --- /dev/null +++ b/app/services/google/client_service.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Google + class ClientService + USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '\ + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36' + + BASE_SEARCH_URL = 'https://www.google.com/search' + + SUCCESS_STATUS_CODE = '200' + + def initialize(keyword:, lang: 'en') + @escaped_keyword = CGI.escape(keyword) + @uri = URI("#{BASE_SEARCH_URL}?q=#{@escaped_keyword}&hl=#{lang}&gl=#{lang}") + end + + def call + result = HTTParty.get(@uri, { headers: { 'User-Agent' => USER_AGENT } }) + + raise ClientServiceError unless valid_result? result + + result + rescue HTTParty::Error, Timeout::Error, SocketError, ClientServiceError => e + Rails.logger.error "Error: Query Google with '#{@escaped_keyword}' thrown an error: #{e}" + + raise ClientServiceError, 'Error fetching HTML result' + end + + private + + def valid_result?(result) + return false unless result + return true if result.response.code == SUCCESS_STATUS_CODE + end + end +end diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb new file mode 100644 index 0000000..7289e3c --- /dev/null +++ b/app/services/google/parser_service.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +module Google + class ParserService + NON_ADS_RESULT_SELECTOR = 'a[data-ved]:not([role]):not([jsaction]):not(.adwords):not(.footer-links)' + AD_CONTAINER_ID = 'tads' + ADWORDS_CLASS = 'adwords' + + def initialize(html_response:) + @html = html_response + + @document = Nokogiri::HTML.parse(html_response) if html_response.body + end + + # Parse html data and return a hash with the results + def call + return unless valid? + + mark_adword_links + mark_footer_links + + present_parsed_data + end + + private + + attr_reader :html, :document + + def valid? + html.present? && document.present? + end + + def mark_adword_links + # Add a class to all AdWords link for easier manipulation + document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS) + end + + def mark_footer_links + # Mark footer links to identify them + document.css('#footcnt a').add_class('footer-links') + end + + def present_parsed_data + { + top_ad_count: ads_top_count, + ad_count: ads_page_count, + non_ad_count: non_ads_result_count, + total_result_count: total_link_count, + raw_response: html, + result_links: result_links, + status: :completed + } + end + + def ads_top_count + document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").count + end + + def ads_page_count + document.css(".#{ADWORDS_CLASS}").count + end + + def ads_top_urls + document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].presence } + end + + def ads_page_urls + document.css(".#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].presence } + end + + def non_ads_result_count + document.css(NON_ADS_RESULT_SELECTOR).count { |a_tag| a_tag['href'].presence } + end + + def non_ads_urls + document.css(NON_ADS_RESULT_SELECTOR).filter_map { |a_tag| a_tag['href'].presence } + end + + def total_link_count + document.css('a').count + end + + def result_links + results = result_link_map(ads_top_urls, :ads_top) + results += result_link_map(non_ads_urls, :non_ads) + + results + end + + def result_link_map(urls, type) + urls.map { |url| { url: url, link_type: type } } + end + end +end diff --git a/db/seeds.rb b/db/seeds.rb index 72d0a27..b3f6bed 100644 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -15,5 +15,5 @@ end 10.times do - Fabricate.times(100, :search_stat, user: user) + Fabricate.times(100, :search_stat_parsed_with_links, user: user) end diff --git a/spec/fabricators/search_stat_fabricator.rb b/spec/fabricators/search_stat_fabricator.rb index d39524c..f9b3429 100644 --- a/spec/fabricators/search_stat_fabricator.rb +++ b/spec/fabricators/search_stat_fabricator.rb @@ -1,8 +1,6 @@ # frozen_string_literal: true -demo_user = User.create(email: 'user@demo.com', password: 'Secret@11') - -Fabricator(:search_stat) do +Fabricator(:search_stat, class_name: SearchStat) do keyword { FFaker::Lorem.word } ad_count { rand(1..10) } link_count { rand(1..60) } @@ -11,5 +9,9 @@ top_ad_count { rand(1..5) } status { rand(1..3) } raw_response { FFaker::HTMLIpsum.body } - user_id { demo_user.id } + user { Fabricate(:user) } +end + +Fabricator(:search_stat_parsed_with_links, from: :search_stat) do + result_links(count: FFaker.rand(10) + 1) end diff --git a/spec/fixtures/vcr/google_search/too_many_requests.yml b/spec/fixtures/vcr/google_search/too_many_requests.yml new file mode 100644 index 0000000..5804d9e --- /dev/null +++ b/spec/fixtures/vcr/google_search/too_many_requests.yml @@ -0,0 +1,63 @@ +--- +http_interactions: + - request: + method: get + uri: https://www.google.com/search?gl=en&hl=en&q=vpn + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 429 + message: Too Many Requests + headers: + Content-Type: + - text/html; charset=UTF-8 + Date: + - Mon, 14 Jun 2021 08:39:12 GMT + Expires: + - "-1" + Cache-Control: + - private, max-age=0 + Strict-Transport-Security: + - max-age=31536000 + Bfcache-Opt-In: + - unload + P3p: + - CP="This is not a P3P policy! See g.co/p3phelp for more info." + Server: + - gws + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Set-Cookie: + - 1P_JAR=2021-06-14-08; expires=Wed, 14-Jul-2021 08:39:12 GMT; path=/; domain=.google.com; + Secure; SameSite=none + - CGIC=IgMqLyo; expires=Sat, 11-Dec-2021 08:39:12 GMT; path=/complete/search; + domain=.google.com; HttpOnly + - CGIC=IgMqLyo; expires=Sat, 11-Dec-2021 08:39:12 GMT; path=/search; domain=.google.com; + HttpOnly + - NID=216=RIaFqvX4KKi9ZQ9qGAicOJwbAOtokQNW9gIxE67VedOJHU0vWABUDx3P_0KdnOfQgkFyh1X3aSZ_on3Q4G3HwNCevH3-dM-VdV-Kkz0jh4xpGZV0K8n1dm2BVDm341KMPj_luc32sxztW9pdoTU3YnXYADzv212zuQPwAfhoSFI; + expires=Tue, 14-Dec-2021 08:39:12 GMT; path=/; domain=.google.com; Secure; + HttpOnly; SameSite=none + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + Transfer-Encoding: + - chunked + body: + encoding: ASCII-8BIT + string: !binary |- + + + recorded_at: Mon, 14 Jun 2021 08:39:13 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/fixtures/vcr/google_search/top_ads_1.yml b/spec/fixtures/vcr/google_search/top_ads_1.yml new file mode 100644 index 0000000..8399579 --- /dev/null +++ b/spec/fixtures/vcr/google_search/top_ads_1.yml @@ -0,0 +1,3430 @@ +--- +http_interactions: +- request: + method: get + uri: https://google.com/search?gl=en&hl=en&q=squarespace + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 301 + message: Moved Permanently + headers: + Location: + - https://www.google.com/search?q=squarespace&hl=en&gl=en + Content-Type: + - text/html; charset=UTF-8 + Bfcache-Opt-In: + - unload + Date: + - Tue, 15 Jun 2021 10:51:47 GMT + Expires: + - Thu, 15 Jul 2021 10:51:47 GMT + Cache-Control: + - public, max-age=2592000 + Server: + - gws + Content-Length: + - '260' + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + body: + encoding: UTF-8 + string: "\n301 + Moved\n

301 Moved

\nThe document has moved\nhere.\r\n\r\n" + recorded_at: Tue, 15 Jun 2021 10:51:47 GMT +- request: + method: get + uri: https://www.google.com/search?gl=en&hl=en&q=squarespace + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - text/html; charset=UTF-8 + Date: + - Tue, 15 Jun 2021 10:51:47 GMT + Expires: + - "-1" + Cache-Control: + - private, max-age=0 + Strict-Transport-Security: + - max-age=31536000 + Bfcache-Opt-In: + - unload + P3p: + - CP="This is not a P3P policy! See g.co/p3phelp for more info." + Server: + - gws + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Set-Cookie: + - 1P_JAR=2021-06-15-10; expires=Thu, 15-Jul-2021 10:51:47 GMT; path=/; domain=.google.com; + Secure; SameSite=none + - CGIC=IgMqLyo; expires=Sun, 12-Dec-2021 10:51:47 GMT; path=/complete/search; + domain=.google.com; HttpOnly + - CGIC=IgMqLyo; expires=Sun, 12-Dec-2021 10:51:47 GMT; path=/search; domain=.google.com; + HttpOnly + - NID=217=bEtRaL9u0zhI4dFWNn938Mt3qkwUmQiprOOmjrz4qVu7151kNmlECny6XFh0Dv3vusy0PTwDnlbaKVAsjbzCFWc5CV_f_NbXVuoUr4JKldTvPYl-dG6csEC9UyvQlXC7UkMbqMET0NUZEaPjY4l6ANKVa9vqJ_nJOl2GYKtlQe0; + expires=Wed, 15-Dec-2021 10:51:47 GMT; path=/; domain=.google.com; Secure; + HttpOnly; SameSite=none + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + Transfer-Encoding: + - chunked + body: + encoding: UTF-8 + string: "squarespace - Google Search

Accessibility links

Skip + to main contentAccessibility + help
Accessibility feedback
+
\"Google\"
Press / to jump to the search box
+ +
+ \
  • Remove
  • Report + inappropriate predictions
    +

    Search modes

    All
    Images
    News
    Videos
    More
    Settings
    Tools
      About + 96,100,000 results (0.69 seconds) 

      Ads


      Squarespace + (Official Website) - Create Your New Website Today
      Ad·www.squarespace.com/
      Why this ad?
      About + the advertiser
      Ad·www.squarespace.com/
      Why this ad?
      About + the advertiser
      Create a website with an award-winning template. + Get your unique domain today. No Plugins Necessary. Free 14-day Trial. 24/7 + Customer Support. Drag and Drop. Modern Templates. Custom Domains. Types: + Portfolio Websites, Business Websites, Ecommerce Websites.

      Try Our Website Builder

      Build your own website today.
      Make it yourself with Squarespace.

      Online Store Templates

      Everything You Need To Sell It
      Bring your products to the world

      Claim Your Domain

      Register a custom domain name.
      Fair pricing, no hidden fees

      Get Started

      Let us help you find the perfect
      website design for what you want!
      squarespace + pricing
      squarespace + login
      squarespace + vs wix
      squarespace + webmail
      squarespace + vs wordpress
      squarespace + ipo
      squarespace + crunchbase
      squarespace + templates

      People also search for

      Search Results

      Web result with site links


      Squarespace: Build a Website – Website Builder

      https://www.squarespace.com
      https://www.squarespace.com
      1. Cached
      2. Similar
      Squarespace + is the all-in-one solution for anyone looking to create a beautiful website. + Domains, eCommerce, hosting, galleries, analytics, and 24/7 support all​ ...

      Pricing

      You start with a free trial. We don't collect + your credit card until you ...

      Domain + Name Search

      Search for your name, + business, or brand name. · Check the ...

      Website + Templates

      All Templates - + Professional Services - Personal & CV - ...

      The + Leader in Website Design

      Help your + website stand out with award-winning website design ...

      An + Online Store

      Start selling with + an online store. Sell online with a professional ...

      Create + a Blog

      Stand out with a professionally-designed + template that can be ...
      More + results from squarespace.com »

      People also ask

      Why + Squarespace is bad?
      How + much does squarespace cost?
      Is + Wix or squarespace better?
      What + is better Shopify or squarespace?
      \"Loading...\"
      Feedback

      Twitter results


      Squarespace (@squarespace) · Twitter

      https://twitter.com/squarespace
      \"Media
      Join + us this afternoon at 3pm ET on the Squarespace Instagram channel for a #Juneteenth + celebration and IG Live conversation between #SQSP customers @evierobbie and + Miller!
      Twitter · 20 + hours ago
      \"Media
      Just + getting started on Squarespace? Tune in to our next free webinar. No site + is required to register – sign up right here: share.sqsp.link/getting… +
      Twitter · 7 + days ago
      Our + advice to prospective business owners? If you believe in something, launch + it. \U0001F680\n\nCheck out these websites that #JustLaunched on Squarespace + this month: share.sqsp.link/JustLau…
      Twitter · Jun 7, + 2021
      View on Twitter

      Web results


      Squarespace - Wikipedia

      https://en.wikipedia.org › wiki › Squarespace
      https://en.wikipedia.org › wiki › Squarespace
      1. Cached
      2. Similar
      Squarespace, + Inc. is an American website building and hosting company which is based in + New York City, United States. ... It provides software as a service for ...
      Revenue: + US$621.1Million(2020)
      Products: + Website builder and hosting service
      Industry: Internet
      Founder(s): + Anthony Casalena

      Squarespace | LinkedIn

      https://www.linkedin.com › company › squarespace
      https://www.linkedin.com › company › squarespace
      1. Similar
      Squarespace + empowers millions of dreamers, makers, and doers by providing them with the + tools they need to bring their creative ideas to life. On Squarespace's ...

      Videos

      \"Video
      37:33
      Squarespace Tutorial for Beginners (Full + Tutorial) - Create A ...
      YouTube + · Santrel Media
      Jan 15, 2020
      10 key + moments in this video
      From 01:30
      Step + 1
      From 06:41
      Site + Title
      From 11:33
      Add + a Button
      From 16:54
      Add + a New Page
      \"Video
      1:08
      5 to 9 by Dolly Parton – Extended | Big Game Commercial ...
      YouTube · Squarespace
      Feb + 2, 2021
      \"Video
      30:53
      How to Create a Website - Squarespace Tutorial + 2019
      YouTube · TheFigCo
      Mar 15, 2019
      6 key + moments in this video

      View all

      Web results


      Squarespace (@squarespace) • Instagram photos and + videos

      https://www.instagram.com › squarespace
      https://www.instagram.com › squarespace
      1. Similar
      280k + Followers, 10 Following, 703 Posts - See Instagram photos and videos from + Squarespace (@squarespace)

      Squarespace - Verified Page | Facebook

      https://www.facebook.com › ... › Squarespace
      https://www.facebook.com › ... › Squarespace
      Squarespace. + 364168 likes · 2584 talking about this. Everything needed to power your ideas: + websites, online ...
      10 hours ago

      Related searches

      \"\"
      Best website builder
      \"\"
      Squaresp...
      \"\"
      GoDaddy
      \"\"
      Weebly
      \"\"
      Shopify
      \"\"
      Webnode
      \"\"
      Strikingly
      See more
      \"\"
      Web design app
      \"\"
      Popular blog sites
      Feedback
      squarespace login
      squarespace vs + wix
      squarespace pricing
      squarespace vs + wordpress
      squarespace stock
      squarespace templates
      squarespace ipo
      squarespace crunchbase

      Page navigation

      12345678910Next

      Complementary results

      Images

      \"Image
      \"Image
      \"Image
      \"Image
      \"Image
      More images

      Squarespace

      Description

      Description

      Squarespace, Inc. is an American website + building and hosting company which is based in New York City, United States. + It provides software as a service for website building and hosting, and allows + users to use pre-built website templates and drag-and-drop elements to create + and modify webpages. Wikipedia
      Founder(s): + Anthony Casalena
      Employees: + 1,143 (2021)
      Launched: + January 2004; + 17 years ago
      CEO: Anthony Casalena
      Traded as: + NYSE: SQSP
      Headquarters: + New York City, + New York
      Registration: + Required; subscription needed for + certain features
      Choose what you’re giving + feedback on
      Or give general feedback
      Feedback

      See + results about

      Squarespace
      IT service management companyIT + service management company

      Footer links

      Vietnam
      - -  - Learn + more
      HelpSend + feedbackPrivacyTerms
      + + \
      " + recorded_at: Tue, 15 Jun 2021 10:51:48 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/fixtures/vcr/google_search/top_ads_6.yml b/spec/fixtures/vcr/google_search/top_ads_6.yml new file mode 100644 index 0000000..aea86ce --- /dev/null +++ b/spec/fixtures/vcr/google_search/top_ads_6.yml @@ -0,0 +1,343 @@ +--- +http_interactions: +- request: + method: get + uri: https://google.com/search?gl=en&hl=en&q=vpn + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 301 + message: Moved Permanently + headers: + Location: + - https://www.google.com/search?q=vpn&hl=en&gl=en + Content-Type: + - text/html; charset=UTF-8 + Bfcache-Opt-In: + - unload + Date: + - Tue, 15 Jun 2021 11:07:04 GMT + Expires: + - Thu, 15 Jul 2021 11:07:04 GMT + Cache-Control: + - public, max-age=2592000 + Server: + - gws + Content-Length: + - '252' + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + body: + encoding: UTF-8 + string: "\n301 + Moved\n

      301 Moved

      \nThe document has moved\nhere.\r\n\r\n" + recorded_at: Tue, 15 Jun 2021 11:07:04 GMT +- request: + method: get + uri: https://www.google.com/search?gl=en&hl=en&q=vpn + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - text/html; charset=UTF-8 + Date: + - Tue, 15 Jun 2021 11:07:05 GMT + Expires: + - "-1" + Cache-Control: + - private, max-age=0 + Strict-Transport-Security: + - max-age=31536000 + Bfcache-Opt-In: + - unload + P3p: + - CP="This is not a P3P policy! See g.co/p3phelp for more info." + Server: + - gws + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Set-Cookie: + - 1P_JAR=2021-06-15-11; expires=Thu, 15-Jul-2021 11:07:05 GMT; path=/; domain=.google.com; + Secure; SameSite=none + - CGIC=IgMqLyo; expires=Sun, 12-Dec-2021 11:07:05 GMT; path=/complete/search; + domain=.google.com; HttpOnly + - CGIC=IgMqLyo; expires=Sun, 12-Dec-2021 11:07:05 GMT; path=/search; domain=.google.com; + HttpOnly + - NID=217=R0nJsU55CouUI8tt-KPBFDcafgM0Hncl581iMqKFp74GMop1ZQAuvQTJRpyewJKYsk5Bt9CvGEOQBmHxdvngYP0DO1gU5EQDBB2euoon5N6GEzfuAG6oUDUskiaxJU0NapM-arm-y77YIWBvjAzQjcR_1tcuqI_jtA1zQoSXRSg; + expires=Wed, 15-Dec-2021 11:07:05 GMT; path=/; domain=.google.com; Secure; + HttpOnly; SameSite=none + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + Transfer-Encoding: + - chunked + body: + encoding: UTF-8 + string: |- + vpn - Google Search

      Accessibility links

      Skip to main contentAccessibility help
      Accessibility feedback
      Google
      Press / to jump to the search box
      • Remove
      • Report inappropriate predictions

        Search modes

        All
        Images
        Videos
        News
        Books
        More
        Tools
          About 228,000,000 results (0.73 seconds) 

          Ads


          Google Cloud VPN - Sign Up For A Free Trial
          Ad·cloud.google.com/vpn
          Why this ad?
          About the advertiser
          Ad·cloud.google.com/vpn
          Why this ad?
          About the advertiser
          High-Performance VMs. Powered by Google's Private Network. Try Free.

          Platform Security

          Deploy on a platform protected
          by more than 750 security experts

          Cloud DNS

          Highly Available DNS Serving
          Scales on Google Cloud.

          Compute Engine

          Run high-performance scalable
          VMs on Google's infrastructure

          Machine Learning Engine

          Build superior models &
          deploy them into production.
          vpn free
          vpn meaning
          protonvpn
          vpn online
          vpn extension
          nordvpn
          vpn apk
          best vpn

          People also search for


          ExpressVPNâ„¢ - Official Site - The Best VPN Trial: ExpressVPN
          Ad·www.expressvpn.com/
          Why this ad?
          About the advertiser
          Ad·www.expressvpn.com/
          Why this ad?
          About the advertiser
          Powerful Speeds. Unlimited Bandwidth. ExpressVPN is the World’s Most Trusted VPN Provider.
          ‎Order ExpressVPN Today · ‎3,000+ VPN Servers · ‎How ExpressVPN Works
          vpn free
          vpn meaning
          protonvpn
          vpn online
          vpn extension
          nordvpn
          vpn apk
          best vpn

          People also search for


          Top 10 Best Free VPN Services - The Most Popular VPNs of 2021
          Ad·www.top10vpn.com/
          Why this ad?
          About the advertiser
          Ad·www.top10vpn.com/
          Why this ad?
          About the advertiser
          See Our Top 10 Best VPN Picks for Unrestricted Web Access. Compare & Find Your Ideal VPN.
          vpn free
          vpn meaning
          protonvpn
          vpn online
          vpn extension
          nordvpn
          vpn apk
          best vpn

          People also search for

          Search Results

          Web results


          Free VPN Download | NordVPN

          https://nordvpn.com › download
          https://nordvpn.com › download
          1. Cached
          2. Similar
          Free VPN client download for any operating system: Windows, macOS, Android, iOS & more. Compatible with computers, phones, routers and even gaming ...
          ‎The best VPN for Chromecast · ‎Best Firefox VPN browser... · ‎Chrome extension

          People also ask

          What is the VPN and how it works?
          Is VPN illegal?
          What is a VPN used for?
          Is a VPN free?
          Loading...
          Feedback

          Web results


          NordVPN: Best VPN service. Online security starts with a click.

          https://nordvpn.com
          https://nordvpn.com
          1. Cached
          2. Similar
          A good VPN service provides you a secure, encrypted tunnel for online traffic to flow. Nobody can see through the tunnel and get their hands on your internet ...

          OpenVPN: VPN Software Solutions & Services For Business

          https://openvpn.net
          https://openvpn.net
          1. Cached
          2. Similar
          OpenVPN provides flexible VPN solutions for businesses to secure all data communications and extend private network services while maintaining security.

          ExpressVPN: High-Speed, Secure & Anonymous VPN Service

          https://www.expressvpn.com
          https://www.expressvpn.com
          1. Cached
          2. Similar
          Top-rated VPN for 2021. Unblock sites & protect every device. 24/7 support. VPN for Windows, Mac, Android, iOS, routers & more. Try 30 days risk-free.

          Virtual private network - Wikipedia

          https://en.wikipedia.org › wiki › Virtual_private_network
          https://en.wikipedia.org › wiki › Virtual_private_network
          1. Cached
          2. Similar
          A virtual private network (VPN) extends a private network across a public network and enables users to send and receive data across shared or public networks ...

          Top stories

          CNET
          .
          No, Apple's Private Relay is not a VPN
          .

          3 days ago

          TechRadar
          .
          What is Apple Private Relay and is it worse than a VPN?
          .

          8 hours ago

          MarketWatch
          .
          This top-rated VPN is now on sale for just $3.10 a month
          .

          4 days ago


          View all

          Web results


          VPN extensions - Google Chrome - Download the Fast ...

          https://chrome.google.com › webstore › search › vpn
          https://chrome.google.com › webstore › search › vpn
          1. Similar
          No information is available for this page.
          Learn why

          ProtonVPN: Secure and Free VPN service for protecting your ...

          https://protonvpn.com
          https://protonvpn.com
          1. Cached
          2. Similar
          ProtonVPN is a security focused FREE VPN service, developed by CERN and MIT scientists. Use the web anonymously, unblock websites & encrypt your ...

          The Best VPN Services for 2021 | PCMag

          https://www.pcmag.com › Best Products › Security › VPN
          https://www.pcmag.com › Best Products › Security › VPN
          What Is a VPN? When you switch on a VPN, it sends your web traffic through an encrypted tunnel to a server controlled by the VPN company. From there, ...

          TunnelBear: Secure VPN Service

          https://www.tunnelbear.com
          https://www.tunnelbear.com
          1. Cached
          2. Similar
          Really simple VPN to browse the web privately & securely. Unblock websites around the world with applications for Mac, PC, iOS, Android & Chrome.

          Ads


          IPVanish® Official Site - VPN From $2.62/month
          Ad·www.ipvanish.com/
          Why this ad?
          Ad·www.ipvanish.com/
          Why this ad?
          The Fastest, Most Reliable VPN. Secure & Easy-to-Use. Money Back Guarantee. Voted Fastest VPN. Unmetered Connections. 24x7 Customer Support. Super-Fast Connections.
          ‎World's Best VPN · ‎World's Fastest VPN · ‎Secure Online Access

          CyberGhostâ„¢ VPN - Try It Risk-Free 45-Days
          Ad·www.cyberghostvpn.com/
          Why this ad?
          About the advertiser
          Ad·www.cyberghostvpn.com/
          Why this ad?
          About the advertiser
          Fast and reliable VPN service. Dedicated streaming & torrenting servers. Zero log policy. Access 6100+ servers from 90 countries. Get automatic Wi-Fi protection. No Logs Policy.
          ‎Massive Server Fleet · ‎Special Streaming Servers
          Deal: 85% off Only $1.99/Month

          Top 10 Best VPNs For 2021 - Top 10 Best VPN Offers
          Ad·www.top10vpn.guide/
          Why this ad?
          Ad·www.top10vpn.guide/
          Why this ad?
          Don't Let Them Snoop On You! Protect Your Devices From Governments, Advertisers & Hackers! Money Back Guarantee. Keep Your Identity Safe. Protect All Your Devices. Free Trial! The Most Secure VPNs. Test Run With 30d. Trial. Access Restricted Content.
          ‎NordVPN Review · ‎Best VPN For Streaming · ‎Best For Privacy · ‎Top 10 Best VPNs 2021
          Monthly Plan - from $12.99/mo - Free Trials Available Â· More
          1 Year Plan - from $3.99/mo
          Great Value
          3 Years + 3 Months Free - from $2.25/mo
          Save Up To 83%

          Related searches

          Best VPN Reddit
          ProtonVPN
          ExpressV...
          NordVPN
          TunnelBear
          CyberGhost VPN
          Mullvad
          See more
          VPN protocols
          Best tools for privacy
          Feedback
          vpn free
          nordvpn
          best vpn
          vpn apk
          vpn online
          protonvpn
          vpn app
          vpn extension

          Page navigation

          12345678910Next

          Complementary results

          See results about

          Virtual private network
          A virtual private network extends a private network across a ...A virtual private network extends a private network across a public network and ...
          NordVPN
          NordVPN is a VPN service. It has desktop applications for ...NordVPN is a VPN service. It has desktop applications for Windows, macOS, and ...

          Footer links

          Vietnam
          - -  - Learn more
          HelpSend feedbackPrivacyTerms
          + recorded_at: Tue, 15 Jun 2021 11:07:06 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/jobs/google/search_keyword_job_spec.rb b/spec/jobs/google/search_keyword_job_spec.rb new file mode 100644 index 0000000..96b0a19 --- /dev/null +++ b/spec/jobs/google/search_keyword_job_spec.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Google::SearchKeywordJob, type: :job do + include ActiveJob::TestHelper + + describe '#perform' do + context 'given a valid request' do + it 'queues the job', vcr: 'google_search/top_ads_1' do + search_stat = Fabricate(:search_stat) + + expect { described_class.perform_later search_stat.id }.to have_enqueued_job(described_class) + end + + it 'saves all result_links in the DataBase', vcr: 'google_search/top_ads_1' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now search_stat_id: search_stat.id + + expect(search_stat.result_links.count).to eq(45) + end + + it 'sets the search stat status as completed', vcr: 'google_search/top_ads_1' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now search_stat_id: search_stat.id + + expect(search_stat.reload.status).to eq('completed') + end + + it 'sets the links counts with the right values', vcr: 'google_search/top_ads_1' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now search_stat_id: search_stat.id + + search_stat.reload + + expect(search_stat.top_ad_count + search_stat.non_ad_count).to eq(45) + end + end + + context 'given a 422 too many requests error' do + it 'sets the search stat status as failed', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + expect(search_stat.reload.status).to eq('failed') + end + + it 'does not save any result_links', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + expect(search_stat.reload.result_links.count).to eq(0) + end + + it 'does not set any result count', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + search_stat.reload + + expect([search_stat.ads_top_count, search_stat.ads_page_count, search_stat.non_ads_result_count]).to all(be_nil) + end + + it 'does not set the html attribute', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + expect(search_stat.reload.html).not_to be_present + end + + it 'performs a SearchProgress job with the right user id', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + expect(Google::SearchProgressJob).to have_received(:perform_now).with(search_stat.user_id).exactly(:once) + end + end + end +end diff --git a/spec/services/google/client_service.rb b/spec/services/google/client_service.rb new file mode 100644 index 0000000..2ff926f --- /dev/null +++ b/spec/services/google/client_service.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Google + class ClientService + USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '\ + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36' + + BASE_SEARCH_URL = 'https://www.google.com/search' + + def initialize(keyword:, lang: 'en') + @escaped_keyword = CGI.escape(keyword) + @uri = URI("#{BASE_SEARCH_URL}?q=#{@escaped_keyword}&hl=#{lang}&gl=#{lang}") + end + + def call + result = HTTParty.get(@uri, { headers: { 'User-Agent' => USER_AGENT } }) + + return false unless valid_result? result + + result + rescue HTTParty::Error, Timeout::Error, SocketError => e + Rails.logger.error "Error: Query Google with '#{@escaped_keyword}' thrown an error: #{e}".colorize(:red) + + false + end + + private + + # Inspect Http response status code + # Any non 200 response code will be logged + def valid_result?(result) + return true if result&.response&.code == '200' + + Rails.logger.warn "Warning: Query Google with '#{@escaped_keyword}' return status code #{result.response.code}" + .colorize(:yellow) + + false + end + end +end diff --git a/spec/services/google/parser_service_spec.rb b/spec/services/google/parser_service_spec.rb new file mode 100644 index 0000000..ff8806e --- /dev/null +++ b/spec/services/google/parser_service_spec.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Google::ParserService, type: :service do + describe '#call' do + context 'when parsing a page having 1 top ad' do + it 'counts exactly 1 top ad', vcr: 'google_search/top_ads_1' do + result = Google::ClientService.new(keyword: 'squarespace').call + + expect(described_class.new(html_response: result).call[:top_ad_count]).to eq(1) + end + end + + context 'when parsing a page having 3 top ads, 3 bottom ads and 14 non ad links' do + it 'counts exactly 3 top ads', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + expect(described_class.new(html_response: result).call[:top_ad_count]).to eq(3) + end + + it 'counts exactly 6 ads in total', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + expect(described_class.new(html_response: result).call[:ad_count]).to eq(6) + end + + it 'finds exactly the 3 top ads urls', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + result_links = described_class.new(html_response: result).call[:result_links] + + top_ads_urls = result_links.select { |link| link[:link_type] == :ads_top }.pluck(:url) + + expect(top_ads_urls).to contain_exactly('https://cloud.google.com/free', 'https://www.expressvpn.com/', 'https://www.top10vpn.com/best-vpn-for-vietnam/') + end + + it 'counts exactly 14 non ad results', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + expect(described_class.new(html_response: result).call[:non_ad_count]).to eq(14) + end + + it 'gets 14 non_ads result_links', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + result_links = described_class.new(html_response: result).call[:result_links] + + non_ads = result_links.select { |link| link[:link_type] == :non_ads } + + expect(non_ads.length).to eq(14) + end + + it 'gets exactly 113 links', vcr: 'google_search/top_ads_6' do + # Counted from cassette html raw code + result = Google::ClientService.new(keyword: 'vpn').call + + expect(described_class.new(html_response: result).call[:total_result_count]).to eq(113) + end + end + end +end