Skip to content

Full Setting Example

Elendil edited this page May 11, 2018 · 4 revisions
{
 "AppSettings": {
  "Scheduler": {
  "SchedulerType": "NScrapy.Scheduler.RedisExt.RedisScheduler"
  },//By disable the Scheduler section, NScrapy will use InMemory Mode to run the Crawl
  "Scheduler.RedisExt": {
    "RedisServer": "192.168.0.107",
    "RedisPort": "6379",
    "ReceiverQueue": "NScrapy.Downloader",
    "ResponseTopic": "NScrapy.ResponseTopic"
  },
"DownloaderMiddlewares": [
  {
    "Middleware": "NScrapy.Downloader.Middleware.HttpUserAgentPoolMiddleware"
  },
  {
    "RemovedMiddleware": "test3"//Not yet impelemented
  }
],
"DownloaderPoolCapbility": 4,//This setting indicates how many Downloader would be there in the DownloaderPool, The more the better, but anyway there is a limit of your system bandwidth
"HttpHeader": {
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
  "Accept-Encoding": "gzip,deflate,br",//Better not to use br Encoding, NScrapy does not support br decoding/decompress yet
  "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"
},
"SpiderProject": "NScrapy.Test",//Specify the Entry Assembly of the Spider
"UserAgentPool": [//Add User Agent here, please not do not add mobile user agent here, this will result to Downloader gets the http response specific for mobile device
  {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko"
  },
  {
    "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
  },
  {
    "User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)"
  },
  {
    "User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)"
  }
]
}
}

Clone this wiki locally