If you're using a crawler and find yourself needing to share data between multiple pageFunction calls, or need to persist some information for a future crawler run, there's not much the default PhantomJS browser can offer.
However, persistence is exactly what our key-value store is built for. When using it from your crawler, you won't have the comfort of the Apify-client library available to Apify actors, but you can still use it quite easily.
First we need to prepare a "library" to use in our crawler:
function Store(name, token){
this.id = null;
this.name = name;
this.token = token;
this.initialize = function(callback){
var self = this;
var url = 'https://api.apify.com/v2/key-value-stores?token=' +
this.token + '&name=' + this.name;
$.ajax({
url: url,
method: 'POST',
headers: {'Content-Type': 'application/json'},
success: function(resp){
self.id = resp.data.id;
if(callback){callback(resp);}
},
error: function(xhr, e1, e2){
console.log('createStore, ' + e1 + ': ' + e2);
if(callback){callback(null);}
}
});
};
this.getValue = function(key, callback){
var url = 'https://api.apify.com/v2/key-value-stores/' +
this.id + '/records/' + key;
$.ajax({
url: url,
method: 'GET',
headers: {'Content-Type': 'application/json'},
success: function(resp){
try{if(callback){callback(resp);}}
catch(e){console.log(e);}
},
error: function(xhr, e1, e2){
console.log('getValue, ' + e1 + ': ' + e2);
if(callback){callback(null);}
}
});
};
this.putValue = function(key, value, callback){
var url = 'https://api.apify.com/v2/key-value-stores/' +
this.id + '/records/' + key;
$.ajax({
url: url,
method: 'PUT',
headers: {'Content-Type': 'application/json'},
success: function(resp){
if(callback){callback(resp);}
},
error: function(xhr, e1, e2){
console.log('putValue, ' + e1 + ': ' + e2);
if(callback){callback(null);}
},
data: JSON.stringify(value)
});
};
}
Now we can use it as follows:
context.willFinishLater();
// create store
var store = new Store('my-store-name', 'my-api-token');
// initialize store
store.initialize(function(info){
console.log('store initialized: ' + store.id);
// save some data
store.putValue('test-key', 'test-value', function(){
console.log('data saved');
// read the data
store.getValue('test-key', function(data){
console.log('data read: ' + data);
context.finish();
});
});
});
Make sure you have the "Disable web security" option checked in the crawler's advanced settings - otherwise the necessary cross-domain XHR calls won't work.