From 20b611e56cc9055a0d9500cdf77dd7d351736a47 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Mon, 12 Jun 2023 15:16:31 +0700 Subject: [PATCH 01/27] [#21] Add job to scrap search result from google --- Gemfile | 1 + Gemfile.lock | 5 + app/jobs/google/search_keyword_job.rb | 27 + app/models/result_link.rb | 9 + app/services/google/client_service.rb | 40 + app/services/google/parser_service.rb | 84 + lib/tasks/search_keyword.rake | 16 + spec/fabricators/search_stat_fabricator.rb | 8 +- spec/fixtures/vcr/google_search/top_ads_1.yml | 3430 +++++++++++++++++ spec/jobs/google/search_keyword_job_spec.rb | 43 + 10 files changed, 3660 insertions(+), 3 deletions(-) create mode 100644 app/jobs/google/search_keyword_job.rb create mode 100644 app/models/result_link.rb create mode 100644 app/services/google/client_service.rb create mode 100644 app/services/google/parser_service.rb create mode 100644 lib/tasks/search_keyword.rake create mode 100644 spec/fixtures/vcr/google_search/top_ads_1.yml create mode 100644 spec/jobs/google/search_keyword_job_spec.rb diff --git a/Gemfile b/Gemfile index 72d2ea1..bcec4da 100644 --- a/Gemfile +++ b/Gemfile @@ -16,6 +16,7 @@ gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] # Windows doe # gem 'kredis' # Use Kredis to get higher-level data types in Redis # gem 'bcrypt' # Use Active Model has_secure_password gem 'devise' # Flexible authentication solution for Rails with Warden +gem 'httparty' # A HTTP client for Ruby. # Authentications & Authorizations gem 'pundit' # Minimal authorization through OO design and pure Ruby classes diff --git a/Gemfile.lock b/Gemfile.lock index ce57ccf..3aa0579 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -187,6 +187,9 @@ GEM globalid (1.1.0) activesupport (>= 5.0) hashdiff (1.0.1) + httparty (0.21.0) + mini_mime (>= 1.0.0) + multi_xml (>= 0.5.2) i18n (1.13.0) concurrent-ruby (~> 1.0) i18n-js (3.9.0) @@ -226,6 +229,7 @@ GEM mini_mime (1.1.2) minitest (5.18.0) msgpack (1.7.1) + multi_xml (0.6.0) nap (1.1.0) net-imap (0.3.4) date @@ -468,6 +472,7 @@ DEPENDENCIES ffaker figaro foreman + httparty i18n-js (= 3.9.0) jsbundling-rails json_matchers diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb new file mode 100644 index 0000000..456b586 --- /dev/null +++ b/app/jobs/google/search_keyword_job.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Google + class SearchKeywordJob < ApplicationJob + queue_as :default + + def perform(search_stat_id) + search_stat = SearchStat.find search_stat_id + + html_result = Google::ClientService.new(keyword: search_stat.keyword).call + + raise ClientServiceError unless html_result + + update_search_stat search_stat, ParserService.new(html_response: html_result).call + end + + def update_search_stat(search_stat, attributes) + SearchStat.transaction do + # rubocop:disable Rails/SkipsModelValidations + search_stat.result_links.insert_all attributes[:result_links] + # rubocop:enable Rails/SkipsModelValidations + + search_stat.update! attributes.except(:result_links) + end + end + end +end diff --git a/app/models/result_link.rb b/app/models/result_link.rb new file mode 100644 index 0000000..4c45846 --- /dev/null +++ b/app/models/result_link.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +class ResultLink < ApplicationRecord + enum link_type: { ads_top: 0, non_ads: 1 } + + belongs_to :search_stat, inverse_of: :result_links + + validates :url, presence: true +end diff --git a/app/services/google/client_service.rb b/app/services/google/client_service.rb new file mode 100644 index 0000000..2ff926f --- /dev/null +++ b/app/services/google/client_service.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Google + class ClientService + USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '\ + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36' + + BASE_SEARCH_URL = 'https://www.google.com/search' + + def initialize(keyword:, lang: 'en') + @escaped_keyword = CGI.escape(keyword) + @uri = URI("#{BASE_SEARCH_URL}?q=#{@escaped_keyword}&hl=#{lang}&gl=#{lang}") + end + + def call + result = HTTParty.get(@uri, { headers: { 'User-Agent' => USER_AGENT } }) + + return false unless valid_result? result + + result + rescue HTTParty::Error, Timeout::Error, SocketError => e + Rails.logger.error "Error: Query Google with '#{@escaped_keyword}' thrown an error: #{e}".colorize(:red) + + false + end + + private + + # Inspect Http response status code + # Any non 200 response code will be logged + def valid_result?(result) + return true if result&.response&.code == '200' + + Rails.logger.warn "Warning: Query Google with '#{@escaped_keyword}' return status code #{result.response.code}" + .colorize(:yellow) + + false + end + end +end diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb new file mode 100644 index 0000000..ff1c431 --- /dev/null +++ b/app/services/google/parser_service.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +module Google + class ParserService + NON_ADS_RESULT_SELECTOR = 'a[data-ved]:not([role]):not([jsaction]):not(.adwords):not(.footer-links)' + AD_CONTAINER_ID = 'tads' + ADWORDS_CLASS = 'adwords' + + def initialize(html_response:) + raise ArgumentError, 'response.body cannot be blank' if html_response.body.blank? + + @html = html_response + + @document = Nokogiri::HTML.parse(html_response) + + # Add a class to all AdWords link for easier manipulation + document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS) + + # Mark footer links to identify them + document.css('#footcnt a').add_class('footer-links') + end + + # Parse html data and return a hash with the results + def call + { + top_ad_count: ads_top_count, + ad_count: ads_page_count, + non_ad_count: non_ads_result_count, + total_result_count: total_link_count, + + raw_response: html, + + result_links: result_links, + + status: :completed + } + end + + private + + attr_reader :html, :document + + def ads_top_count + document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").count + end + + def ads_page_count + document.css(".#{ADWORDS_CLASS}").count + end + + def ads_top_urls + document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].presence } + end + + def ads_page_urls + document.css(".#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].presence } + end + + def non_ads_result_count + document.css(NON_ADS_RESULT_SELECTOR).count { |a_tag| a_tag['href'].presence } + end + + def non_ads_urls + document.css(NON_ADS_RESULT_SELECTOR).filter_map { |a_tag| a_tag['href'].presence } + end + + def total_link_count + document.css('a').count + end + + def result_links + results = result_link_map(ads_top_urls, 0) + results += result_link_map(non_ads_urls, 1) + + results + end + + def result_link_map(urls, type) + urls.map do |url| + { url: url, link_type: type } + end + end + end +end diff --git a/lib/tasks/search_keyword.rake b/lib/tasks/search_keyword.rake new file mode 100644 index 0000000..961c4d3 --- /dev/null +++ b/lib/tasks/search_keyword.rake @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +# lib/tasks/search_keyword.rake + +namespace :google do + desc 'Schedule the SearchKeywordJob' + task schedule_search_keyword_job: :environment do + # Fetch the keyword ID or keyword name that you want to process + search_stat_id = 1 # Replace with the actual keyword ID or name + + # Schedule the SearchKeywordJob for background processing + Google::SearchKeywordJob.perform_later(search_stat_id) + + puts 'SearchKeywordJob scheduled successfully.' + end +end diff --git a/spec/fabricators/search_stat_fabricator.rb b/spec/fabricators/search_stat_fabricator.rb index d39524c..3daad71 100644 --- a/spec/fabricators/search_stat_fabricator.rb +++ b/spec/fabricators/search_stat_fabricator.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -demo_user = User.create(email: 'user@demo.com', password: 'Secret@11') - Fabricator(:search_stat) do keyword { FFaker::Lorem.word } ad_count { rand(1..10) } @@ -11,5 +9,9 @@ top_ad_count { rand(1..5) } status { rand(1..3) } raw_response { FFaker::HTMLIpsum.body } - user_id { demo_user.id } + user { User.create(email: 'user@demo.com', password: 'Secret@11') } +end + +Fabricator(:search_stat_parsed_with_links, from: :search_stat) do + result_links(count: FFaker.rand(10) + 1) end diff --git a/spec/fixtures/vcr/google_search/top_ads_1.yml b/spec/fixtures/vcr/google_search/top_ads_1.yml new file mode 100644 index 0000000..8399579 --- /dev/null +++ b/spec/fixtures/vcr/google_search/top_ads_1.yml @@ -0,0 +1,3430 @@ +--- +http_interactions: +- request: + method: get + uri: https://google.com/search?gl=en&hl=en&q=squarespace + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 301 + message: Moved Permanently + headers: + Location: + - https://www.google.com/search?q=squarespace&hl=en&gl=en + Content-Type: + - text/html; charset=UTF-8 + Bfcache-Opt-In: + - unload + Date: + - Tue, 15 Jun 2021 10:51:47 GMT + Expires: + - Thu, 15 Jul 2021 10:51:47 GMT + Cache-Control: + - public, max-age=2592000 + Server: + - gws + Content-Length: + - '260' + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + body: + encoding: UTF-8 + string: "\n301 + Moved\n

301 Moved

\nThe document has moved\nhere.\r\n\r\n" + recorded_at: Tue, 15 Jun 2021 10:51:47 GMT +- request: + method: get + uri: https://www.google.com/search?gl=en&hl=en&q=squarespace + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - text/html; charset=UTF-8 + Date: + - Tue, 15 Jun 2021 10:51:47 GMT + Expires: + - "-1" + Cache-Control: + - private, max-age=0 + Strict-Transport-Security: + - max-age=31536000 + Bfcache-Opt-In: + - unload + P3p: + - CP="This is not a P3P policy! See g.co/p3phelp for more info." + Server: + - gws + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Set-Cookie: + - 1P_JAR=2021-06-15-10; expires=Thu, 15-Jul-2021 10:51:47 GMT; path=/; domain=.google.com; + Secure; SameSite=none + - CGIC=IgMqLyo; expires=Sun, 12-Dec-2021 10:51:47 GMT; path=/complete/search; + domain=.google.com; HttpOnly + - CGIC=IgMqLyo; expires=Sun, 12-Dec-2021 10:51:47 GMT; path=/search; domain=.google.com; + HttpOnly + - NID=217=bEtRaL9u0zhI4dFWNn938Mt3qkwUmQiprOOmjrz4qVu7151kNmlECny6XFh0Dv3vusy0PTwDnlbaKVAsjbzCFWc5CV_f_NbXVuoUr4JKldTvPYl-dG6csEC9UyvQlXC7UkMbqMET0NUZEaPjY4l6ANKVa9vqJ_nJOl2GYKtlQe0; + expires=Wed, 15-Dec-2021 10:51:47 GMT; path=/; domain=.google.com; Secure; + HttpOnly; SameSite=none + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + Transfer-Encoding: + - chunked + body: + encoding: UTF-8 + string: "squarespace - Google Search

Accessibility links

Skip + to main contentAccessibility + help
Accessibility feedback
+
\"Google\"
Press / to jump to the search box
+ +
+ \
  • Remove
  • Report + inappropriate predictions
    +

    Search modes

    All
    Images
    News
    Videos
    More
    Settings
    Tools
      About + 96,100,000 results (0.69 seconds) 

      Ads


      Squarespace + (Official Website) - Create Your New Website Today
      Ad·www.squarespace.com/
      Why this ad?
      About + the advertiser
      Ad·www.squarespace.com/
      Why this ad?
      About + the advertiser
      Create a website with an award-winning template. + Get your unique domain today. No Plugins Necessary. Free 14-day Trial. 24/7 + Customer Support. Drag and Drop. Modern Templates. Custom Domains. Types: + Portfolio Websites, Business Websites, Ecommerce Websites.

      Try Our Website Builder

      Build your own website today.
      Make it yourself with Squarespace.

      Online Store Templates

      Everything You Need To Sell It
      Bring your products to the world

      Claim Your Domain

      Register a custom domain name.
      Fair pricing, no hidden fees

      Get Started

      Let us help you find the perfect
      website design for what you want!
      squarespace + pricing
      squarespace + login
      squarespace + vs wix
      squarespace + webmail
      squarespace + vs wordpress
      squarespace + ipo
      squarespace + crunchbase
      squarespace + templates

      People also search for

      Search Results

      Web result with site links


      Squarespace: Build a Website – Website Builder

      https://www.squarespace.com
      https://www.squarespace.com
      1. Cached
      2. Similar
      Squarespace + is the all-in-one solution for anyone looking to create a beautiful website. + Domains, eCommerce, hosting, galleries, analytics, and 24/7 support all​ ...

      Pricing

      You start with a free trial. We don't collect + your credit card until you ...

      Domain + Name Search

      Search for your name, + business, or brand name. · Check the ...

      Website + Templates

      All Templates - + Professional Services - Personal & CV - ...

      The + Leader in Website Design

      Help your + website stand out with award-winning website design ...

      An + Online Store

      Start selling with + an online store. Sell online with a professional ...

      Create + a Blog

      Stand out with a professionally-designed + template that can be ...
      More + results from squarespace.com »

      People also ask

      Why + Squarespace is bad?
      How + much does squarespace cost?
      Is + Wix or squarespace better?
      What + is better Shopify or squarespace?
      \"Loading...\"
      Feedback

      Twitter results


      Squarespace (@squarespace) · Twitter

      https://twitter.com/squarespace
      \"Media
      Join + us this afternoon at 3pm ET on the Squarespace Instagram channel for a #Juneteenth + celebration and IG Live conversation between #SQSP customers @evierobbie and + Miller!
      Twitter · 20 + hours ago
      \"Media
      Just + getting started on Squarespace? Tune in to our next free webinar. No site + is required to register – sign up right here: share.sqsp.link/getting… +
      Twitter · 7 + days ago
      Our + advice to prospective business owners? If you believe in something, launch + it. \U0001F680\n\nCheck out these websites that #JustLaunched on Squarespace + this month: share.sqsp.link/JustLau…
      Twitter · Jun 7, + 2021
      View on Twitter

      Web results


      Squarespace - Wikipedia

      https://en.wikipedia.org › wiki › Squarespace
      https://en.wikipedia.org › wiki › Squarespace
      1. Cached
      2. Similar
      Squarespace, + Inc. is an American website building and hosting company which is based in + New York City, United States. ... It provides software as a service for ...
      Revenue: + US$621.1Million(2020)
      Products: + Website builder and hosting service
      Industry: Internet
      Founder(s): + Anthony Casalena

      Squarespace | LinkedIn

      https://www.linkedin.com › company › squarespace
      https://www.linkedin.com › company › squarespace
      1. Similar
      Squarespace + empowers millions of dreamers, makers, and doers by providing them with the + tools they need to bring their creative ideas to life. On Squarespace's ...

      Videos

      \"Video
      37:33
      Squarespace Tutorial for Beginners (Full + Tutorial) - Create A ...
      YouTube + · Santrel Media
      Jan 15, 2020
      10 key + moments in this video
      From 01:30
      Step + 1
      From 06:41
      Site + Title
      From 11:33
      Add + a Button
      From 16:54
      Add + a New Page
      \"Video
      1:08
      5 to 9 by Dolly Parton – Extended | Big Game Commercial ...
      YouTube · Squarespace
      Feb + 2, 2021
      \"Video
      30:53
      How to Create a Website - Squarespace Tutorial + 2019
      YouTube · TheFigCo
      Mar 15, 2019
      6 key + moments in this video

      View all

      Web results


      Squarespace (@squarespace) • Instagram photos and + videos

      https://www.instagram.com › squarespace
      https://www.instagram.com › squarespace
      1. Similar
      280k + Followers, 10 Following, 703 Posts - See Instagram photos and videos from + Squarespace (@squarespace)

      Squarespace - Verified Page | Facebook

      https://www.facebook.com › ... › Squarespace
      https://www.facebook.com › ... › Squarespace
      Squarespace. + 364168 likes · 2584 talking about this. Everything needed to power your ideas: + websites, online ...
      10 hours ago

      Related searches

      \"\"
      Best website builder
      \"\"
      Squaresp...
      \"\"
      GoDaddy
      \"\"
      Weebly
      \"\"
      Shopify
      \"\"
      Webnode
      \"\"
      Strikingly
      See more
      \"\"
      Web design app
      \"\"
      Popular blog sites
      Feedback
      squarespace login
      squarespace vs + wix
      squarespace pricing
      squarespace vs + wordpress
      squarespace stock
      squarespace templates
      squarespace ipo
      squarespace crunchbase

      Page navigation

      12345678910Next

      Complementary results

      Images

      \"Image
      \"Image
      \"Image
      \"Image
      \"Image
      More images

      Squarespace

      Description

      Description

      Squarespace, Inc. is an American website + building and hosting company which is based in New York City, United States. + It provides software as a service for website building and hosting, and allows + users to use pre-built website templates and drag-and-drop elements to create + and modify webpages. Wikipedia
      Founder(s): + Anthony Casalena
      Employees: + 1,143 (2021)
      Launched: + January 2004; + 17 years ago
      CEO: Anthony Casalena
      Traded as: + NYSE: SQSP
      Headquarters: + New York City, + New York
      Registration: + Required; subscription needed for + certain features
      Choose what you’re giving + feedback on
      Or give general feedback
      Feedback

      See + results about

      Squarespace
      IT service management companyIT + service management company

      Footer links

      Vietnam
      - -  - Learn + more
      HelpSend + feedbackPrivacyTerms
      + + \
      " + recorded_at: Tue, 15 Jun 2021 10:51:48 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/jobs/google/search_keyword_job_spec.rb b/spec/jobs/google/search_keyword_job_spec.rb new file mode 100644 index 0000000..8fa720b --- /dev/null +++ b/spec/jobs/google/search_keyword_job_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Google::SearchKeywordJob, type: :job do + include ActiveJob::TestHelper + + describe '#perform' do + context 'given a valid request' do + it 'queues the job', vcr: 'google_search/top_ads_1' do + search_stat = Fabricate(:search_stat) + + expect { described_class.perform_later search_stat.id }.to have_enqueued_job(described_class) + end + + it 'saves all result_links in the DataBase', vcr: 'google_search/top_ads_1' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now search_stat.id + + expect(search_stat.result_links.count).to eq(45) + end + + it 'sets the search stat status as completed', vcr: 'google_search/top_ads_1' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now search_stat.id + + expect(search_stat.reload.status).to eq('completed') + end + + it 'sets the links counts with the right values', vcr: 'google_search/top_ads_1' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now search_stat.id + + search_stat.reload + + expect(search_stat.top_ad_count + search_stat.non_ad_count).to eq(45) + end + end + end +end From 45c5304a3660da109dc30cf1ac2308e056ff69f1 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Tue, 13 Jun 2023 12:17:05 +0700 Subject: [PATCH 02/27] [#21] Use keyword argument for search keyword job --- app/jobs/google/search_keyword_job.rb | 2 +- db/schema.rb | 4 ++-- lib/tasks/search_keyword.rake | 5 +---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index 456b586..8323f2b 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -4,7 +4,7 @@ module Google class SearchKeywordJob < ApplicationJob queue_as :default - def perform(search_stat_id) + def perform(search_stat_id:) search_stat = SearchStat.find search_stat_id html_result = Google::ClientService.new(keyword: search_stat.keyword).call diff --git a/db/schema.rb b/db/schema.rb index 0989781..5f377a0 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2023_06_08_032015) do +ActiveRecord::Schema.define(version: 2023_06_12_121926) do # These are extensions that must be enabled in order to support this database enable_extension "citext" @@ -18,7 +18,7 @@ create_table "result_links", force: :cascade do |t| t.bigint "search_stat_id", null: false - t.integer "link_type", null: false + t.string "link_type", null: false t.string "url", null: false t.datetime "created_at", precision: 6, default: -> { "CURRENT_TIMESTAMP" }, null: false t.datetime "updated_at", precision: 6, default: -> { "CURRENT_TIMESTAMP" }, null: false diff --git a/lib/tasks/search_keyword.rake b/lib/tasks/search_keyword.rake index 961c4d3..a8973fe 100644 --- a/lib/tasks/search_keyword.rake +++ b/lib/tasks/search_keyword.rake @@ -5,11 +5,8 @@ namespace :google do desc 'Schedule the SearchKeywordJob' task schedule_search_keyword_job: :environment do - # Fetch the keyword ID or keyword name that you want to process - search_stat_id = 1 # Replace with the actual keyword ID or name - # Schedule the SearchKeywordJob for background processing - Google::SearchKeywordJob.perform_later(search_stat_id) + Google::SearchKeywordJob.perform_later(search_stat_id: 1) puts 'SearchKeywordJob scheduled successfully.' end From 4e6018370d523f3e965cd7624dc10105dc1c0a87 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Tue, 13 Jun 2023 21:35:44 +0700 Subject: [PATCH 03/27] [#21] Fix search stat job spec --- spec/jobs/google/search_keyword_job_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/jobs/google/search_keyword_job_spec.rb b/spec/jobs/google/search_keyword_job_spec.rb index 8fa720b..f3250d1 100644 --- a/spec/jobs/google/search_keyword_job_spec.rb +++ b/spec/jobs/google/search_keyword_job_spec.rb @@ -16,7 +16,7 @@ it 'saves all result_links in the DataBase', vcr: 'google_search/top_ads_1' do search_stat = Fabricate(:search_stat) - described_class.perform_now search_stat.id + described_class.perform_now search_stat_id: search_stat.id expect(search_stat.result_links.count).to eq(45) end @@ -24,7 +24,7 @@ it 'sets the search stat status as completed', vcr: 'google_search/top_ads_1' do search_stat = Fabricate(:search_stat) - described_class.perform_now search_stat.id + described_class.perform_now search_stat_id: search_stat.id expect(search_stat.reload.status).to eq('completed') end @@ -32,7 +32,7 @@ it 'sets the links counts with the right values', vcr: 'google_search/top_ads_1' do search_stat = Fabricate(:search_stat) - described_class.perform_now search_stat.id + described_class.perform_now search_stat_id: search_stat.id search_stat.reload From 7ee3d0137f03754b1ab45d12cf27ee76851ed0df Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Tue, 13 Jun 2023 21:36:28 +0700 Subject: [PATCH 04/27] [#21] Remove blank lines from parser service --- app/services/google/parser_service.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb index ff1c431..f3cd1c7 100644 --- a/app/services/google/parser_service.rb +++ b/app/services/google/parser_service.rb @@ -27,11 +27,8 @@ def call ad_count: ads_page_count, non_ad_count: non_ads_result_count, total_result_count: total_link_count, - raw_response: html, - result_links: result_links, - status: :completed } end From f8e2338a23cdf04cc26d908e387791189e4a6674 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Wed, 14 Jun 2023 10:31:13 +0700 Subject: [PATCH 05/27] [#21] Update url count methods of parser service --- app/services/google/parser_service.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb index f3cd1c7..d1f512a 100644 --- a/app/services/google/parser_service.rb +++ b/app/services/google/parser_service.rb @@ -46,19 +46,19 @@ def ads_page_count end def ads_top_urls - document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].presence } + document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].present? } end def ads_page_urls - document.css(".#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].presence } + document.css(".#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].present? } end def non_ads_result_count - document.css(NON_ADS_RESULT_SELECTOR).count { |a_tag| a_tag['href'].presence } + document.css(NON_ADS_RESULT_SELECTOR).count { |a_tag| a_tag['href'].present? } end def non_ads_urls - document.css(NON_ADS_RESULT_SELECTOR).filter_map { |a_tag| a_tag['href'].presence } + document.css(NON_ADS_RESULT_SELECTOR).filter_map { |a_tag| a_tag['href'].present? } end def total_link_count From 9aa642f9af642c0ae2fea5b573a6db1efb0eac5d Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Wed, 14 Jun 2023 10:39:50 +0700 Subject: [PATCH 06/27] [#21] Seed search stat with result links --- db/seeds.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/seeds.rb b/db/seeds.rb index 5ebbdc8..8996b03 100644 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -12,5 +12,5 @@ user = User.where(email: 'user@demo.com').first_or_create(Fabricate.attributes_for(:user, email: 'user@demo.com')) 10.times do - Fabricate.times(100, :search_stat, user: user) + Fabricate.times(100, :search_stat_parsed_with_links, user: user) end From 3d70f82b61c973b3ff1901a07fe777e844e6346e Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Wed, 14 Jun 2023 18:23:52 +0700 Subject: [PATCH 07/27] [#21] Update result link type enum in parser service --- app/services/google/parser_service.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb index d1f512a..1ea94f4 100644 --- a/app/services/google/parser_service.rb +++ b/app/services/google/parser_service.rb @@ -66,8 +66,8 @@ def total_link_count end def result_links - results = result_link_map(ads_top_urls, 0) - results += result_link_map(non_ads_urls, 1) + results = result_link_map(ads_top_urls, :ads_top) + results += result_link_map(non_ads_urls, :non_ads) results end From 8a968728b03e4b79759d642db0b71b02634aad4b Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 15 Jun 2023 18:12:17 +0700 Subject: [PATCH 08/27] [#21] Refactor parser service Co-authored-by: Sang Huynh Thanh <63148598+sanG-github@users.noreply.github.com> --- app/services/google/parser_service.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb index 1ea94f4..f81c529 100644 --- a/app/services/google/parser_service.rb +++ b/app/services/google/parser_service.rb @@ -73,9 +73,7 @@ def result_links end def result_link_map(urls, type) - urls.map do |url| - { url: url, link_type: type } - end + urls.map { |url| { url: url, link_type: type } } end end end From 122de2f3e5a405dc611c32ecc76641b40e13951e Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 15 Jun 2023 18:25:17 +0700 Subject: [PATCH 09/27] [#21] Refactor search keyword job --- app/jobs/google/search_keyword_job.rb | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index 8323f2b..a883432 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -6,14 +6,17 @@ class SearchKeywordJob < ApplicationJob def perform(search_stat_id:) search_stat = SearchStat.find search_stat_id - - html_result = Google::ClientService.new(keyword: search_stat.keyword).call - - raise ClientServiceError unless html_result - + html_result = fetch_html_result(search_stat.keyword) update_search_stat search_stat, ParserService.new(html_response: html_result).call end + def fetch_html_result(keyword) + Google::ClientService.new(keyword: keyword).call + rescue StandardError => e + Rails.logger.error("Error while fetching HTML result: #{e.message}") + raise ClientServiceError, 'Error fetching HTML result' + end + def update_search_stat(search_stat, attributes) SearchStat.transaction do # rubocop:disable Rails/SkipsModelValidations From 5c5eaad6d8b370373b7f1a7571de437b5adb1fd9 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Tue, 20 Jun 2023 12:50:33 +0700 Subject: [PATCH 10/27] [#21] Fabricate random user while fabricating search stat --- spec/fabricators/search_stat_fabricator.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/fabricators/search_stat_fabricator.rb b/spec/fabricators/search_stat_fabricator.rb index 3daad71..bbaab2b 100644 --- a/spec/fabricators/search_stat_fabricator.rb +++ b/spec/fabricators/search_stat_fabricator.rb @@ -9,7 +9,7 @@ top_ad_count { rand(1..5) } status { rand(1..3) } raw_response { FFaker::HTMLIpsum.body } - user { User.create(email: 'user@demo.com', password: 'Secret@11') } + user { Fabricate(:user) } end Fabricator(:search_stat_parsed_with_links, from: :search_stat) do From 40d3740b4676cac48edd2756440ae73603811750 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Tue, 20 Jun 2023 12:53:55 +0700 Subject: [PATCH 11/27] [#21] Refactor search keyword job --- app/jobs/google/search_keyword_job.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index a883432..08fe679 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -7,7 +7,9 @@ class SearchKeywordJob < ApplicationJob def perform(search_stat_id:) search_stat = SearchStat.find search_stat_id html_result = fetch_html_result(search_stat.keyword) - update_search_stat search_stat, ParserService.new(html_response: html_result).call + parsed_attributes = ParserService.new(html_response: html_result).call + + update_search_stat(search_stat, parsed_attributes) end def fetch_html_result(keyword) From 8051e7cfa7e9d36bbedeefa8c0b7f8e0c374e9ff Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Tue, 20 Jun 2023 15:09:19 +0700 Subject: [PATCH 12/27] [#21] Refactor search keyword job --- app/jobs/google/search_keyword_job.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index 08fe679..04bc38f 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -21,9 +21,7 @@ def fetch_html_result(keyword) def update_search_stat(search_stat, attributes) SearchStat.transaction do - # rubocop:disable Rails/SkipsModelValidations - search_stat.result_links.insert_all attributes[:result_links] - # rubocop:enable Rails/SkipsModelValidations + search_stat.result_links.create(attributes[:result_links]) search_stat.update! attributes.except(:result_links) end From bce3b3e8d965e1dc410a5eae5d0bd582c45c7c12 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Tue, 20 Jun 2023 15:47:56 +0700 Subject: [PATCH 13/27] [#21] Refactor client service --- app/services/google/client_service.rb | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/app/services/google/client_service.rb b/app/services/google/client_service.rb index 2ff926f..48d5513 100644 --- a/app/services/google/client_service.rb +++ b/app/services/google/client_service.rb @@ -7,6 +7,8 @@ class ClientService BASE_SEARCH_URL = 'https://www.google.com/search' + SUCCESS_STATUS_CODE = 200 + def initialize(keyword:, lang: 'en') @escaped_keyword = CGI.escape(keyword) @uri = URI("#{BASE_SEARCH_URL}?q=#{@escaped_keyword}&hl=#{lang}&gl=#{lang}") @@ -26,15 +28,9 @@ def call private - # Inspect Http response status code - # Any non 200 response code will be logged def valid_result?(result) - return true if result&.response&.code == '200' - - Rails.logger.warn "Warning: Query Google with '#{@escaped_keyword}' return status code #{result.response.code}" - .colorize(:yellow) - - false + return false unless result + return true if result.response.code == SUCCESS_STATUS_CODE end end end From e58486624cac5cc251572e8848d9eeeed71a3a6b Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Tue, 20 Jun 2023 17:36:02 +0700 Subject: [PATCH 14/27] [#21] Fix failing unit tests --- app/services/google/client_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/google/client_service.rb b/app/services/google/client_service.rb index 48d5513..da20b0b 100644 --- a/app/services/google/client_service.rb +++ b/app/services/google/client_service.rb @@ -7,7 +7,7 @@ class ClientService BASE_SEARCH_URL = 'https://www.google.com/search' - SUCCESS_STATUS_CODE = 200 + SUCCESS_STATUS_CODE = '200' def initialize(keyword:, lang: 'en') @escaped_keyword = CGI.escape(keyword) From b1fde069f669e03dbba3ee33a5efe6a79a5818f6 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Wed, 21 Jun 2023 14:55:59 +0700 Subject: [PATCH 15/27] [#21] In search keyword job handle case of non existant search stat --- app/jobs/google/search_keyword_job.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index 04bc38f..3d4d36d 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -6,6 +6,8 @@ class SearchKeywordJob < ApplicationJob def perform(search_stat_id:) search_stat = SearchStat.find search_stat_id + return unless search_stat + html_result = fetch_html_result(search_stat.keyword) parsed_attributes = ParserService.new(html_response: html_result).call From 582a136239e71faefb57e750091a7db6474f68d0 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Wed, 21 Jun 2023 15:29:16 +0700 Subject: [PATCH 16/27] [#21] Handle exeption during search keyword job --- app/jobs/google/search_keyword_job.rb | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index 3d4d36d..f8aa851 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true module Google + class ClientServiceError < StandardError; end + class SearchKeywordJob < ApplicationJob queue_as :default @@ -8,17 +10,15 @@ def perform(search_stat_id:) search_stat = SearchStat.find search_stat_id return unless search_stat - html_result = fetch_html_result(search_stat.keyword) + html_result = Google::ClientService.new(keyword: search_stat.keyword).call + + raise ClientServiceError unless html_result + parsed_attributes = ParserService.new(html_response: html_result).call update_search_stat(search_stat, parsed_attributes) - end - - def fetch_html_result(keyword) - Google::ClientService.new(keyword: keyword).call - rescue StandardError => e - Rails.logger.error("Error while fetching HTML result: #{e.message}") - raise ClientServiceError, 'Error fetching HTML result' + rescue ActiveRecord::RecordNotFound, ClientServiceError, ArgumentError + update_keyword_status search_stat, :failed end def update_search_stat(search_stat, attributes) @@ -28,5 +28,9 @@ def update_search_stat(search_stat, attributes) search_stat.update! attributes.except(:result_links) end end + + def update_search_stat_status(search_stat, status) + search_stat.update! status: status + end end end From 58605851a54bc806e83ebff3d8fa6b1966e75360 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 22 Jun 2023 09:44:39 +0700 Subject: [PATCH 17/27] [#21] Add explicit class name for search stat fabricator --- spec/fabricators/search_stat_fabricator.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/fabricators/search_stat_fabricator.rb b/spec/fabricators/search_stat_fabricator.rb index bbaab2b..f9b3429 100644 --- a/spec/fabricators/search_stat_fabricator.rb +++ b/spec/fabricators/search_stat_fabricator.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -Fabricator(:search_stat) do +Fabricator(:search_stat, class_name: SearchStat) do keyword { FFaker::Lorem.word } ad_count { rand(1..10) } link_count { rand(1..60) } From ef9224b7ed6cce79d56fe312d9d470639aa53822 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 22 Jun 2023 10:47:39 +0700 Subject: [PATCH 18/27] [#21] Add test for parser service top ad count --- spec/fixtures/vcr/google_search/top_ads_6.yml | 343 ++++++++++++++++++ spec/services/google/parser_service_spec.rb | 15 + 2 files changed, 358 insertions(+) create mode 100644 spec/fixtures/vcr/google_search/top_ads_6.yml create mode 100644 spec/services/google/parser_service_spec.rb diff --git a/spec/fixtures/vcr/google_search/top_ads_6.yml b/spec/fixtures/vcr/google_search/top_ads_6.yml new file mode 100644 index 0000000..aea86ce --- /dev/null +++ b/spec/fixtures/vcr/google_search/top_ads_6.yml @@ -0,0 +1,343 @@ +--- +http_interactions: +- request: + method: get + uri: https://google.com/search?gl=en&hl=en&q=vpn + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 301 + message: Moved Permanently + headers: + Location: + - https://www.google.com/search?q=vpn&hl=en&gl=en + Content-Type: + - text/html; charset=UTF-8 + Bfcache-Opt-In: + - unload + Date: + - Tue, 15 Jun 2021 11:07:04 GMT + Expires: + - Thu, 15 Jul 2021 11:07:04 GMT + Cache-Control: + - public, max-age=2592000 + Server: + - gws + Content-Length: + - '252' + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + body: + encoding: UTF-8 + string: "\n301 + Moved\n

      301 Moved

      \nThe document has moved\nhere.\r\n\r\n" + recorded_at: Tue, 15 Jun 2021 11:07:04 GMT +- request: + method: get + uri: https://www.google.com/search?gl=en&hl=en&q=vpn + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Content-Type: + - text/html; charset=UTF-8 + Date: + - Tue, 15 Jun 2021 11:07:05 GMT + Expires: + - "-1" + Cache-Control: + - private, max-age=0 + Strict-Transport-Security: + - max-age=31536000 + Bfcache-Opt-In: + - unload + P3p: + - CP="This is not a P3P policy! See g.co/p3phelp for more info." + Server: + - gws + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Set-Cookie: + - 1P_JAR=2021-06-15-11; expires=Thu, 15-Jul-2021 11:07:05 GMT; path=/; domain=.google.com; + Secure; SameSite=none + - CGIC=IgMqLyo; expires=Sun, 12-Dec-2021 11:07:05 GMT; path=/complete/search; + domain=.google.com; HttpOnly + - CGIC=IgMqLyo; expires=Sun, 12-Dec-2021 11:07:05 GMT; path=/search; domain=.google.com; + HttpOnly + - NID=217=R0nJsU55CouUI8tt-KPBFDcafgM0Hncl581iMqKFp74GMop1ZQAuvQTJRpyewJKYsk5Bt9CvGEOQBmHxdvngYP0DO1gU5EQDBB2euoon5N6GEzfuAG6oUDUskiaxJU0NapM-arm-y77YIWBvjAzQjcR_1tcuqI_jtA1zQoSXRSg; + expires=Wed, 15-Dec-2021 11:07:05 GMT; path=/; domain=.google.com; Secure; + HttpOnly; SameSite=none + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + Transfer-Encoding: + - chunked + body: + encoding: UTF-8 + string: |- + vpn - Google Search

      Accessibility links

      Skip to main contentAccessibility help
      Accessibility feedback
      Google
      Press / to jump to the search box
      • Remove
      • Report inappropriate predictions

        Search modes

        All
        Images
        Videos
        News
        Books
        More
        Tools
          About 228,000,000 results (0.73 seconds) 

          Ads


          Google Cloud VPN - Sign Up For A Free Trial
          Ad·cloud.google.com/vpn
          Why this ad?
          About the advertiser
          Ad·cloud.google.com/vpn
          Why this ad?
          About the advertiser
          High-Performance VMs. Powered by Google's Private Network. Try Free.

          Platform Security

          Deploy on a platform protected
          by more than 750 security experts

          Cloud DNS

          Highly Available DNS Serving
          Scales on Google Cloud.

          Compute Engine

          Run high-performance scalable
          VMs on Google's infrastructure

          Machine Learning Engine

          Build superior models &
          deploy them into production.
          vpn free
          vpn meaning
          protonvpn
          vpn online
          vpn extension
          nordvpn
          vpn apk
          best vpn

          People also search for


          ExpressVPNâ„¢ - Official Site - The Best VPN Trial: ExpressVPN
          Ad·www.expressvpn.com/
          Why this ad?
          About the advertiser
          Ad·www.expressvpn.com/
          Why this ad?
          About the advertiser
          Powerful Speeds. Unlimited Bandwidth. ExpressVPN is the World’s Most Trusted VPN Provider.
          ‎Order ExpressVPN Today · ‎3,000+ VPN Servers · ‎How ExpressVPN Works
          vpn free
          vpn meaning
          protonvpn
          vpn online
          vpn extension
          nordvpn
          vpn apk
          best vpn

          People also search for


          Top 10 Best Free VPN Services - The Most Popular VPNs of 2021
          Ad·www.top10vpn.com/
          Why this ad?
          About the advertiser
          Ad·www.top10vpn.com/
          Why this ad?
          About the advertiser
          See Our Top 10 Best VPN Picks for Unrestricted Web Access. Compare & Find Your Ideal VPN.
          vpn free
          vpn meaning
          protonvpn
          vpn online
          vpn extension
          nordvpn
          vpn apk
          best vpn

          People also search for

          Search Results

          Web results


          Free VPN Download | NordVPN

          https://nordvpn.com › download
          https://nordvpn.com › download
          1. Cached
          2. Similar
          Free VPN client download for any operating system: Windows, macOS, Android, iOS & more. Compatible with computers, phones, routers and even gaming ...
          ‎The best VPN for Chromecast · ‎Best Firefox VPN browser... · ‎Chrome extension

          People also ask

          What is the VPN and how it works?
          Is VPN illegal?
          What is a VPN used for?
          Is a VPN free?
          Loading...
          Feedback

          Web results


          NordVPN: Best VPN service. Online security starts with a click.

          https://nordvpn.com
          https://nordvpn.com
          1. Cached
          2. Similar
          A good VPN service provides you a secure, encrypted tunnel for online traffic to flow. Nobody can see through the tunnel and get their hands on your internet ...

          OpenVPN: VPN Software Solutions & Services For Business

          https://openvpn.net
          https://openvpn.net
          1. Cached
          2. Similar
          OpenVPN provides flexible VPN solutions for businesses to secure all data communications and extend private network services while maintaining security.

          ExpressVPN: High-Speed, Secure & Anonymous VPN Service

          https://www.expressvpn.com
          https://www.expressvpn.com
          1. Cached
          2. Similar
          Top-rated VPN for 2021. Unblock sites & protect every device. 24/7 support. VPN for Windows, Mac, Android, iOS, routers & more. Try 30 days risk-free.

          Virtual private network - Wikipedia

          https://en.wikipedia.org › wiki › Virtual_private_network
          https://en.wikipedia.org › wiki › Virtual_private_network
          1. Cached
          2. Similar
          A virtual private network (VPN) extends a private network across a public network and enables users to send and receive data across shared or public networks ...

          Top stories

          CNET
          .
          No, Apple's Private Relay is not a VPN
          .

          3 days ago

          TechRadar
          .
          What is Apple Private Relay and is it worse than a VPN?
          .

          8 hours ago

          MarketWatch
          .
          This top-rated VPN is now on sale for just $3.10 a month
          .

          4 days ago


          View all

          Web results


          VPN extensions - Google Chrome - Download the Fast ...

          https://chrome.google.com › webstore › search › vpn
          https://chrome.google.com › webstore › search › vpn
          1. Similar
          No information is available for this page.
          Learn why

          ProtonVPN: Secure and Free VPN service for protecting your ...

          https://protonvpn.com
          https://protonvpn.com
          1. Cached
          2. Similar
          ProtonVPN is a security focused FREE VPN service, developed by CERN and MIT scientists. Use the web anonymously, unblock websites & encrypt your ...

          The Best VPN Services for 2021 | PCMag

          https://www.pcmag.com › Best Products › Security › VPN
          https://www.pcmag.com › Best Products › Security › VPN
          What Is a VPN? When you switch on a VPN, it sends your web traffic through an encrypted tunnel to a server controlled by the VPN company. From there, ...

          TunnelBear: Secure VPN Service

          https://www.tunnelbear.com
          https://www.tunnelbear.com
          1. Cached
          2. Similar
          Really simple VPN to browse the web privately & securely. Unblock websites around the world with applications for Mac, PC, iOS, Android & Chrome.

          Ads


          IPVanish® Official Site - VPN From $2.62/month
          Ad·www.ipvanish.com/
          Why this ad?
          Ad·www.ipvanish.com/
          Why this ad?
          The Fastest, Most Reliable VPN. Secure & Easy-to-Use. Money Back Guarantee. Voted Fastest VPN. Unmetered Connections. 24x7 Customer Support. Super-Fast Connections.
          ‎World's Best VPN · ‎World's Fastest VPN · ‎Secure Online Access

          CyberGhostâ„¢ VPN - Try It Risk-Free 45-Days
          Ad·www.cyberghostvpn.com/
          Why this ad?
          About the advertiser
          Ad·www.cyberghostvpn.com/
          Why this ad?
          About the advertiser
          Fast and reliable VPN service. Dedicated streaming & torrenting servers. Zero log policy. Access 6100+ servers from 90 countries. Get automatic Wi-Fi protection. No Logs Policy.
          ‎Massive Server Fleet · ‎Special Streaming Servers
          Deal: 85% off Only $1.99/Month

          Top 10 Best VPNs For 2021 - Top 10 Best VPN Offers
          Ad·www.top10vpn.guide/
          Why this ad?
          Ad·www.top10vpn.guide/
          Why this ad?
          Don't Let Them Snoop On You! Protect Your Devices From Governments, Advertisers & Hackers! Money Back Guarantee. Keep Your Identity Safe. Protect All Your Devices. Free Trial! The Most Secure VPNs. Test Run With 30d. Trial. Access Restricted Content.
          ‎NordVPN Review · ‎Best VPN For Streaming · ‎Best For Privacy · ‎Top 10 Best VPNs 2021
          Monthly Plan - from $12.99/mo - Free Trials Available Â· More
          1 Year Plan - from $3.99/mo
          Great Value
          3 Years + 3 Months Free - from $2.25/mo
          Save Up To 83%

          Related searches

          Best VPN Reddit
          ProtonVPN
          ExpressV...
          NordVPN
          TunnelBear
          CyberGhost VPN
          Mullvad
          See more
          VPN protocols
          Best tools for privacy
          Feedback
          vpn free
          nordvpn
          best vpn
          vpn apk
          vpn online
          protonvpn
          vpn app
          vpn extension

          Page navigation

          12345678910Next

          Complementary results

          See results about

          Virtual private network
          A virtual private network extends a private network across a ...A virtual private network extends a private network across a public network and ...
          NordVPN
          NordVPN is a VPN service. It has desktop applications for ...NordVPN is a VPN service. It has desktop applications for Windows, macOS, and ...

          Footer links

          Vietnam
          - -  - Learn more
          HelpSend feedbackPrivacyTerms
          + recorded_at: Tue, 15 Jun 2021 11:07:06 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/services/google/parser_service_spec.rb b/spec/services/google/parser_service_spec.rb new file mode 100644 index 0000000..0278f36 --- /dev/null +++ b/spec/services/google/parser_service_spec.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Google::ParserService, type: :service do + describe '#call' do + context 'when parsing a page having 1 top ad' do + it 'counts exactly 1 top ad', vcr: 'google_search/top_ads_1' do + result = Google::ClientService.new(keyword: 'squarespace').call + + expect(described_class.new(html_response: result).call[:top_ad_count]).to eq(1) + end + end + end +end From 57afc7efa269943e7313873f472da228d04be0ff Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 22 Jun 2023 11:05:24 +0700 Subject: [PATCH 19/27] [#21] Fix bugs in parser service --- app/services/google/parser_service.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb index f81c529..17bcac6 100644 --- a/app/services/google/parser_service.rb +++ b/app/services/google/parser_service.rb @@ -46,19 +46,19 @@ def ads_page_count end def ads_top_urls - document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].present? } + document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].presence } end def ads_page_urls - document.css(".#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].present? } + document.css(".#{ADWORDS_CLASS}").filter_map { |a_tag| a_tag['href'].presence } end def non_ads_result_count - document.css(NON_ADS_RESULT_SELECTOR).count { |a_tag| a_tag['href'].present? } + document.css(NON_ADS_RESULT_SELECTOR).count { |a_tag| a_tag['href'].presence } end def non_ads_urls - document.css(NON_ADS_RESULT_SELECTOR).filter_map { |a_tag| a_tag['href'].present? } + document.css(NON_ADS_RESULT_SELECTOR).filter_map { |a_tag| a_tag['href'].presence } end def total_link_count From c190d21d94820e2d718e8cd688f92541e2747058 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 22 Jun 2023 11:05:51 +0700 Subject: [PATCH 20/27] [#21] Add tests for parser service --- spec/services/google/parser_service_spec.rb | 47 +++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/spec/services/google/parser_service_spec.rb b/spec/services/google/parser_service_spec.rb index 0278f36..ff8806e 100644 --- a/spec/services/google/parser_service_spec.rb +++ b/spec/services/google/parser_service_spec.rb @@ -11,5 +11,52 @@ expect(described_class.new(html_response: result).call[:top_ad_count]).to eq(1) end end + + context 'when parsing a page having 3 top ads, 3 bottom ads and 14 non ad links' do + it 'counts exactly 3 top ads', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + expect(described_class.new(html_response: result).call[:top_ad_count]).to eq(3) + end + + it 'counts exactly 6 ads in total', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + expect(described_class.new(html_response: result).call[:ad_count]).to eq(6) + end + + it 'finds exactly the 3 top ads urls', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + result_links = described_class.new(html_response: result).call[:result_links] + + top_ads_urls = result_links.select { |link| link[:link_type] == :ads_top }.pluck(:url) + + expect(top_ads_urls).to contain_exactly('https://cloud.google.com/free', 'https://www.expressvpn.com/', 'https://www.top10vpn.com/best-vpn-for-vietnam/') + end + + it 'counts exactly 14 non ad results', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + expect(described_class.new(html_response: result).call[:non_ad_count]).to eq(14) + end + + it 'gets 14 non_ads result_links', vcr: 'google_search/top_ads_6' do + result = Google::ClientService.new(keyword: 'vpn').call + + result_links = described_class.new(html_response: result).call[:result_links] + + non_ads = result_links.select { |link| link[:link_type] == :non_ads } + + expect(non_ads.length).to eq(14) + end + + it 'gets exactly 113 links', vcr: 'google_search/top_ads_6' do + # Counted from cassette html raw code + result = Google::ClientService.new(keyword: 'vpn').call + + expect(described_class.new(html_response: result).call[:total_result_count]).to eq(113) + end + end end end From 7dda9bb68f77101383b233bece3f91aa5b25f3a7 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 22 Jun 2023 14:34:23 +0700 Subject: [PATCH 21/27] [#21] Refactor parser service --- app/services/google/parser_service.rb | 51 +++++++++++++++++---------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb index 17bcac6..02aa701 100644 --- a/app/services/google/parser_service.rb +++ b/app/services/google/parser_service.rb @@ -7,30 +7,19 @@ class ParserService ADWORDS_CLASS = 'adwords' def initialize(html_response:) - raise ArgumentError, 'response.body cannot be blank' if html_response.body.blank? - @html = html_response - @document = Nokogiri::HTML.parse(html_response) - - # Add a class to all AdWords link for easier manipulation - document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS) - - # Mark footer links to identify them - document.css('#footcnt a').add_class('footer-links') + @document = Nokogiri::HTML.parse(html_response) if html_response.body end # Parse html data and return a hash with the results def call - { - top_ad_count: ads_top_count, - ad_count: ads_page_count, - non_ad_count: non_ads_result_count, - total_result_count: total_link_count, - raw_response: html, - result_links: result_links, - status: :completed - } + return unless valid? + + mark_adword_links + mark_footer_links + + present_parsed_data end private @@ -75,5 +64,31 @@ def result_links def result_link_map(urls, type) urls.map { |url| { url: url, link_type: type } } end + + def valid? + html.present? && document.present? + end + + def mark_adword_links + # Add a class to all AdWords link for easier manipulation + document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS) + end + + def mark_footer_links + # Mark footer links to identify them + document.css('#footcnt a').add_class('footer-links') + end + + def present_parsed_data + { + top_ad_count: ads_top_count, + ad_count: ads_page_count, + non_ad_count: non_ads_result_count, + total_result_count: total_link_count, + raw_response: html, + result_links: result_links, + status: :completed + } + end end end From cf825ffb339fcd06307c3a12ca552d64c892ee24 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 29 Jun 2023 09:45:43 +0700 Subject: [PATCH 22/27] [#21] Add tests for failing scenarios for google search keyword job --- app/jobs/google/search_keyword_job.rb | 2 +- .../vcr/google_search/too_many_requests.yml | 63 +++++++++++++++++++ spec/jobs/google/search_keyword_job_spec.rb | 49 +++++++++++++++ 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 spec/fixtures/vcr/google_search/too_many_requests.yml diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index f8aa851..e7f38ed 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -18,7 +18,7 @@ def perform(search_stat_id:) update_search_stat(search_stat, parsed_attributes) rescue ActiveRecord::RecordNotFound, ClientServiceError, ArgumentError - update_keyword_status search_stat, :failed + update_search_stat_status search_stat, :failed end def update_search_stat(search_stat, attributes) diff --git a/spec/fixtures/vcr/google_search/too_many_requests.yml b/spec/fixtures/vcr/google_search/too_many_requests.yml new file mode 100644 index 0000000..5804d9e --- /dev/null +++ b/spec/fixtures/vcr/google_search/too_many_requests.yml @@ -0,0 +1,63 @@ +--- +http_interactions: + - request: + method: get + uri: https://www.google.com/search?gl=en&hl=en&q=vpn + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, + like Gecko) Chrome/91.0.4472.77 Safari/537.36 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 429 + message: Too Many Requests + headers: + Content-Type: + - text/html; charset=UTF-8 + Date: + - Mon, 14 Jun 2021 08:39:12 GMT + Expires: + - "-1" + Cache-Control: + - private, max-age=0 + Strict-Transport-Security: + - max-age=31536000 + Bfcache-Opt-In: + - unload + P3p: + - CP="This is not a P3P policy! See g.co/p3phelp for more info." + Server: + - gws + X-Xss-Protection: + - '0' + X-Frame-Options: + - SAMEORIGIN + Set-Cookie: + - 1P_JAR=2021-06-14-08; expires=Wed, 14-Jul-2021 08:39:12 GMT; path=/; domain=.google.com; + Secure; SameSite=none + - CGIC=IgMqLyo; expires=Sat, 11-Dec-2021 08:39:12 GMT; path=/complete/search; + domain=.google.com; HttpOnly + - CGIC=IgMqLyo; expires=Sat, 11-Dec-2021 08:39:12 GMT; path=/search; domain=.google.com; + HttpOnly + - NID=216=RIaFqvX4KKi9ZQ9qGAicOJwbAOtokQNW9gIxE67VedOJHU0vWABUDx3P_0KdnOfQgkFyh1X3aSZ_on3Q4G3HwNCevH3-dM-VdV-Kkz0jh4xpGZV0K8n1dm2BVDm341KMPj_luc32sxztW9pdoTU3YnXYADzv212zuQPwAfhoSFI; + expires=Tue, 14-Dec-2021 08:39:12 GMT; path=/; domain=.google.com; Secure; + HttpOnly; SameSite=none + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; + ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; + ma=2592000; v="46,43" + Transfer-Encoding: + - chunked + body: + encoding: ASCII-8BIT + string: !binary |- + + + recorded_at: Mon, 14 Jun 2021 08:39:13 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/jobs/google/search_keyword_job_spec.rb b/spec/jobs/google/search_keyword_job_spec.rb index f3250d1..96b0a19 100644 --- a/spec/jobs/google/search_keyword_job_spec.rb +++ b/spec/jobs/google/search_keyword_job_spec.rb @@ -39,5 +39,54 @@ expect(search_stat.top_ad_count + search_stat.non_ad_count).to eq(45) end end + + context 'given a 422 too many requests error' do + it 'sets the search stat status as failed', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + expect(search_stat.reload.status).to eq('failed') + end + + it 'does not save any result_links', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + expect(search_stat.reload.result_links.count).to eq(0) + end + + it 'does not set any result count', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + search_stat.reload + + expect([search_stat.ads_top_count, search_stat.ads_page_count, search_stat.non_ads_result_count]).to all(be_nil) + end + + it 'does not set the html attribute', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + expect(search_stat.reload.html).not_to be_present + end + + it 'performs a SearchProgress job with the right user id', vcr: 'google_search/too_many_requests' do + search_stat = Fabricate(:search_stat) + + described_class.perform_now(search_stat_id: search_stat.id) + + rescue Google::ClientServiceError + expect(Google::SearchProgressJob).to have_received(:perform_now).with(search_stat.user_id).exactly(:once) + end + end end end From 9691391ee0b413302ccf3f340597093140cd738c Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 29 Jun 2023 11:14:21 +0700 Subject: [PATCH 23/27] [#21] Add tests for client service --- spec/services/google/client_service.rb | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 spec/services/google/client_service.rb diff --git a/spec/services/google/client_service.rb b/spec/services/google/client_service.rb new file mode 100644 index 0000000..2ff926f --- /dev/null +++ b/spec/services/google/client_service.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Google + class ClientService + USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '\ + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36' + + BASE_SEARCH_URL = 'https://www.google.com/search' + + def initialize(keyword:, lang: 'en') + @escaped_keyword = CGI.escape(keyword) + @uri = URI("#{BASE_SEARCH_URL}?q=#{@escaped_keyword}&hl=#{lang}&gl=#{lang}") + end + + def call + result = HTTParty.get(@uri, { headers: { 'User-Agent' => USER_AGENT } }) + + return false unless valid_result? result + + result + rescue HTTParty::Error, Timeout::Error, SocketError => e + Rails.logger.error "Error: Query Google with '#{@escaped_keyword}' thrown an error: #{e}".colorize(:red) + + false + end + + private + + # Inspect Http response status code + # Any non 200 response code will be logged + def valid_result?(result) + return true if result&.response&.code == '200' + + Rails.logger.warn "Warning: Query Google with '#{@escaped_keyword}' return status code #{result.response.code}" + .colorize(:yellow) + + false + end + end +end From ced1a0e2346f4976c2f9ba1b437b0a0020b92827 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 29 Jun 2023 11:53:16 +0700 Subject: [PATCH 24/27] [#21] Refactor search keyword job and client service --- app/jobs/google/search_keyword_job.rb | 5 ----- app/services/google/client_service.rb | 8 ++++---- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index e7f38ed..79911d3 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -8,12 +8,7 @@ class SearchKeywordJob < ApplicationJob def perform(search_stat_id:) search_stat = SearchStat.find search_stat_id - return unless search_stat - html_result = Google::ClientService.new(keyword: search_stat.keyword).call - - raise ClientServiceError unless html_result - parsed_attributes = ParserService.new(html_response: html_result).call update_search_stat(search_stat, parsed_attributes) diff --git a/app/services/google/client_service.rb b/app/services/google/client_service.rb index da20b0b..8b43ae7 100644 --- a/app/services/google/client_service.rb +++ b/app/services/google/client_service.rb @@ -17,13 +17,13 @@ def initialize(keyword:, lang: 'en') def call result = HTTParty.get(@uri, { headers: { 'User-Agent' => USER_AGENT } }) - return false unless valid_result? result + raise ClientServiceError unless valid_result? result result - rescue HTTParty::Error, Timeout::Error, SocketError => e - Rails.logger.error "Error: Query Google with '#{@escaped_keyword}' thrown an error: #{e}".colorize(:red) + rescue HTTParty::Error, Timeout::Error, SocketError, ClientServiceError => e + Rails.logger.error "Error: Query Google with '#{@escaped_keyword}' thrown an error: #{e}" - false + raise ClientServiceError, 'Error fetching HTML result' end private From 6b8502e5752fad195f0f0958d408d75eb91dd36d Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 29 Jun 2023 12:02:12 +0700 Subject: [PATCH 25/27] [#21] Rescue active record transaction exception in search keyword job --- app/jobs/google/search_keyword_job.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/jobs/google/search_keyword_job.rb b/app/jobs/google/search_keyword_job.rb index 79911d3..c022329 100644 --- a/app/jobs/google/search_keyword_job.rb +++ b/app/jobs/google/search_keyword_job.rb @@ -12,7 +12,7 @@ def perform(search_stat_id:) parsed_attributes = ParserService.new(html_response: html_result).call update_search_stat(search_stat, parsed_attributes) - rescue ActiveRecord::RecordNotFound, ClientServiceError, ArgumentError + rescue ActiveRecord::RecordNotFound, ClientServiceError, ArgumentError, ActiveRecord::RecordInvalid update_search_stat_status search_stat, :failed end From 8f407693ee77c231d183942a11b2518a4ef5047c Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 29 Jun 2023 14:07:13 +0700 Subject: [PATCH 26/27] [#21] Remove unnecessary task class --- lib/tasks/search_keyword.rake | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 lib/tasks/search_keyword.rake diff --git a/lib/tasks/search_keyword.rake b/lib/tasks/search_keyword.rake deleted file mode 100644 index a8973fe..0000000 --- a/lib/tasks/search_keyword.rake +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -# lib/tasks/search_keyword.rake - -namespace :google do - desc 'Schedule the SearchKeywordJob' - task schedule_search_keyword_job: :environment do - # Schedule the SearchKeywordJob for background processing - Google::SearchKeywordJob.perform_later(search_stat_id: 1) - - puts 'SearchKeywordJob scheduled successfully.' - end -end From bd1df63230d99ee4c173bac6d94bb834bb6eec43 Mon Sep 17 00:00:00 2001 From: Md Mosharaf Hossan Date: Thu, 29 Jun 2023 18:11:41 +0700 Subject: [PATCH 27/27] [#21] Reorder parser service methods --- app/services/google/parser_service.rb | 52 +++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/app/services/google/parser_service.rb b/app/services/google/parser_service.rb index 02aa701..7289e3c 100644 --- a/app/services/google/parser_service.rb +++ b/app/services/google/parser_service.rb @@ -26,6 +26,32 @@ def call attr_reader :html, :document + def valid? + html.present? && document.present? + end + + def mark_adword_links + # Add a class to all AdWords link for easier manipulation + document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS) + end + + def mark_footer_links + # Mark footer links to identify them + document.css('#footcnt a').add_class('footer-links') + end + + def present_parsed_data + { + top_ad_count: ads_top_count, + ad_count: ads_page_count, + non_ad_count: non_ads_result_count, + total_result_count: total_link_count, + raw_response: html, + result_links: result_links, + status: :completed + } + end + def ads_top_count document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").count end @@ -64,31 +90,5 @@ def result_links def result_link_map(urls, type) urls.map { |url| { url: url, link_type: type } } end - - def valid? - html.present? && document.present? - end - - def mark_adword_links - # Add a class to all AdWords link for easier manipulation - document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS) - end - - def mark_footer_links - # Mark footer links to identify them - document.css('#footcnt a').add_class('footer-links') - end - - def present_parsed_data - { - top_ad_count: ads_top_count, - ad_count: ads_page_count, - non_ad_count: non_ads_result_count, - total_result_count: total_link_count, - raw_response: html, - result_links: result_links, - status: :completed - } - end end end