In this article, we demonstrate how you can easily scrape data from a page behind a login using an Apify actor with Puppeteer. For this example, we'll use https://www.facebook.com/

First of all, let's find the login form and the submit button on a login page using Chrome Dev Tools.

You can see that there's an HTML element with id email for email, pass for password and the form submission button under element with id loginbutton. In the next step, we'll write the actor's source code that uses Puppeteer API to fill in the username and password on the page and click on the submit button:

const Apify = require('apify');

Apify.main(async () => {
    const input = await Apify.getValue('INPUT');

    const browser = await Apify.launchPuppeteer();
    const page = await browser.newPage();
    await page.goto('https://facebook.com');

    // Login
    await page.type('#email', input.username);
    await page.type('#pass', input.password);
    await page.click('#loginbutton input');
    await page.waitForNavigation();

    // Get cookies
    const cookies = await page.cookies();

    // Use cookies in other tab or browser
    const page2 = await browser.newPage();
    await page2.setCookie(...cookies);
    await page2.goto('https://facebook.com'); // Opens page as logged user

    await browser.close();

    console.log('Done.');
});

Now you can call this actor and pass the Facebook login credentials as input JSON. For example:

{
    "username": "bob@example.com",
    "password": "my secret password"
}

For most of pages you need to save cookies and them reuse then in next run. You can avoid logging for each run with this code. We use there named key-value store where we save cookies for the next run.

const Apify = require('apify');

const loggedCheck = async (page) => {
    try {
        await page.waitForSelector('#bluebarRoot', { timeout: 10000 });
        return true;
    } catch(err) {
        return false;
    }
};

Apify.main(async () => {
    const input = await Apify.getValue('INPUT');

    const fcbCacheStore = await Apify.openKeyValueStore('fcb-cache');
    const cookiesStoreKey = input.username.replace('@', '(at)');

    const browser = await Apify.launchPuppeteer();
    const page = await browser.newPage();

    let isLogged = false;
    let userCookies = await fcbCacheStore.getValue(cookiesStoreKey);
    if (userCookies) {
        console.log('Try to use cookies from cache..')
        await page.setCookie(...userCookies);
        await page.goto('https://facebook.com');
        isLogged = await loggedCheck(page);
    }

    if (!isLogged) {
        console.log(`Cookies from cache didn't work, try to login..`);
        await page.goto('https://facebook.com');
        await page.type('#email', input.username);
        await page.type('#pass', input.password);
        await page.click('#loginbutton input');
        await page.waitForNavigation();
        isLogged = await loggedCheck(page);
    }

    if (!isLogged) {
        throw new Error('Incorrect username or password!')
    }

    // Get cookies and refresh them in store cache
    console.log(`Saving new cookies to cache..`);
    const cookies = await page.cookies();
    await fcbCacheStore.setValue(cookiesStoreKey, cookies);

    // Use cookies in other tab or browser
    const page2 = await browser.newPage();
    await page2.setCookie(...cookies);
    await page2.goto('https://facebook.com'); // Opens page as logged user

    await browser.close();

    console.log('Done.');
});

If you have any questions about Apify actors, just let us know.

Did this answer your question?