Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/current/2825-add-browserless-support.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Added

- Added support for browserless
14 changes: 14 additions & 0 deletions docs/dev/api/0.1.2/objects.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ VisibleInfoBlocks:
example: false
description: Show the list of tools in the UI

BrowserlessConfig:
type: object
properties:
url:
type: string
example: http://localhost:3000
nullable: true
token:
type: string
example: ABCD
nullable: true

Config:
type: object
description: An object describing the configuration of the web app
Expand All @@ -66,6 +78,8 @@ Config:
description: True, if the user wished to print the recipe images with the rest of the recipes
visibleInfoBlocks:
$ref: "#/VisibleInfoBlocks"
browserless_config:
$reg: "#/BrowserlessConfig"

Error:
type: object
Expand Down
Binary file added docs/user/assets/settings-browserless.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 14 additions & 0 deletions docs/user/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,17 @@ Currently, the only way to share recipes is by sharing the Nextcloud folder that
### Public Sharing

At the moment it is not possible to share a public link to a recipe.

## Browserless configuration

By default cookbook fetches recipe pages via code. Some websites detect this (bot detection) and will block this request from accessing the website resulting in being unable to import the recipe.
To prevent this you can configure browserless. Browserless is a service that allows downloading a webpage more user-like and thus bypasses a lot of bot-detections on websites.

To set this up you should supply an URL and a token.

<img src="assets/settings-browserless.png" alt="Cookbook settings - Browserless" width="200px" />

You can create an account at https://www.browserless.io/ or setup a self-hosted instance.

For the url be sure to supply the REST API url, see https://docs.browserless.io/overview/connection-urls.
For the Amsterdam region this would be: https://production-ams.browserless.io
6 changes: 6 additions & 0 deletions lib/Controller/Implementation/ConfigImplementation.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public function __construct(
}

protected const KEY_VISIBLE_INFO_BLOCKS = 'visibleInfoBlocks';
protected const KEY_BROWSERLESS_CONFIG = 'browserless_config';

/**
* Get the current configuration of the app
Expand All @@ -46,6 +47,7 @@ public function list() {
'update_interval' => $this->dbCacheService->getSearchIndexUpdateInterval(),
'print_image' => $this->service->getPrintImage(),
self::KEY_VISIBLE_INFO_BLOCKS => $this->service->getVisibleInfoBlocks(),
self::KEY_BROWSERLESS_CONFIG => $this->service->getBrowserlessConfig(),
], Http::STATUS_OK);
}

Expand Down Expand Up @@ -79,6 +81,10 @@ public function config() {
$this->service->setVisibleInfoBlocks($data[self::KEY_VISIBLE_INFO_BLOCKS]);
}

if (isset($data[self::KEY_BROWSERLESS_CONFIG])) {
$this->service->setBrowserlessConfig($data[self::KEY_BROWSERLESS_CONFIG]);
}

$this->dbCacheService->triggerCheck();

return new JSONResponse('OK', Http::STATUS_OK);
Expand Down
30 changes: 30 additions & 0 deletions lib/Helper/UserConfigHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public function __construct(
protected const KEY_PRINT_IMAGE = 'print_image';
protected const KEY_VISIBLE_INFO_BLOCKS = 'visible_info_blocks';
protected const KEY_FOLDER = 'folder';
protected const KEY_BROWSERLESS_CONFIG = 'browserless_config';

/**
* Checks if the user is logged in and the configuration can be obtained at all
Expand Down Expand Up @@ -226,4 +227,33 @@ public function getFolderName(): string {
public function setFolderName(string $value): void {
$this->setRawValue(self::KEY_FOLDER, $value);
}

/**
* Gets the browserless config from the configuration
*
* @return array<string, string | null> keys: url and token, values: url and token
* @throws UserNotLoggedInException if no user is logged in
*/
public function getBrowserlessConfig(): array {
$rawValue = $this->getRawValue(self::KEY_BROWSERLESS_CONFIG);

if ($rawValue === '') {
return [
'url' => null,
'token' => null,
];
}

return json_decode($rawValue, true);
}

/**
* Sets the browserless config in the configuration
*
* @param array<string, bool> keys: url and token, values: url and token
* @throws UserNotLoggedInException if no user is logged in
*/
public function setBrowserlessConfig(array $data): void {
$this->setRawValue(self::KEY_BROWSERLESS_CONFIG, json_encode($data));
}
}
78 changes: 76 additions & 2 deletions lib/Service/HtmlDownloadService.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use OCA\Cookbook\Helper\HTMLFilter\HtmlEncodingFilter;
use OCA\Cookbook\Helper\HTMLFilter\HtmlEntityDecodeFilter;
use OCA\Cookbook\Helper\HtmlToDomParser;
use OCA\Cookbook\Helper\UserConfigHelper;
use OCP\IL10N;
use Psr\Log\LoggerInterface;

Expand Down Expand Up @@ -44,6 +45,9 @@ class HtmlDownloadService {
/** @var DownloadEncodingHelper */
private $downloadEncodingHelper;

/** @var UserConfigHelper */
private $userConfigHelper;

/**
* @var DOMDocument
*/
Expand All @@ -58,6 +62,7 @@ public function __construct(
DownloadHelper $downloadHelper,
EncodingGuessingHelper $encodingGuesser,
DownloadEncodingHelper $downloadEncodingHelper,
UserConfigHelper $userConfigHelper,
) {
$this->htmlFilters = [
$htmlEntityDecodeFilter,
Expand All @@ -69,6 +74,7 @@ public function __construct(
$this->downloadHelper = $downloadHelper;
$this->encodingGuesser = $encodingGuesser;
$this->downloadEncodingHelper = $downloadEncodingHelper;
$this->userConfigHelper = $userConfigHelper;
}

/**
Expand All @@ -82,7 +88,16 @@ public function __construct(
* @throws ImportException If obtaining of the URL was not possible
*/
public function downloadRecipe(string $url): int {
$html = $this->fetchHtmlPage($url);
$browserlessConfig = $this->userConfigHelper->getBrowserlessConfig();

// Check if a browserless configuration is available
if (!empty($browserlessConfig['url']) && !empty($browserlessConfig['token'])) {
// Use Browserless API if the url and token are set
$html = $this->fetchHtmlPageUsingBrowserless($url);
} else {
// Otherwise, use the standard method
$html = $this->fetchHtmlPage($url);
}

// Filter the HTML code
/** @var AbstractHtmlFilter $filter */
Expand All @@ -104,6 +119,61 @@ public function getDom(): ?DOMDocument {
return $this->dom;
}

/**
* Fetch an HTML page from Browserless.io or self hosted Browserless (rendered HTML)
*
* @param string $url The URL of the page to fetch
*
* @throws ImportException If the given URL was not fetched or parsed
*
* @return string The rendered HTML content as a plain string
*/
private function fetchHtmlPageUsingBrowserless(string $url): string {
// Get the browserless config from configuration or setting
$browserlessConfig = $this->userConfigHelper->getBrowserlessConfig();
$browserlessAddress = $browserlessConfig['url'];
$browserlessToken = $browserlessConfig['token'];

if (empty($browserlessAddress)) {
// Handle the case where Browserless address is not configured
$this->logger->error('Browserless address is not set.');
throw new ImportException($this->l->t('Browserless address is not configured.'));
}

if (empty($browserlessToken)) {
// Handle the case where Browserless token is not configured
$this->logger->error('Browserless token is not set.');
throw new ImportException($this->l->t('Browserless token is not configured.'));
}

// API endpoint for Browserless
$apiEndpoint = $browserlessAddress . '/chromium/content?token=' . $browserlessToken;

$langCode = $this->l->getLocaleCode();
$langCode = str_replace('_', '-', $langCode);

// Prepare the data to be sent in the POST request
$data = json_encode([
'url' => $url,
'userAgent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0',
'setExtraHTTPHeaders' => [
'Accept-Language' => "$langCode,en;q=0.5",
],
]);

$opt = [
CURLOPT_USERAGENT => 'Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0',
CURLOPT_POSTFIELDS => $data,
CURLOPT_CUSTOMREQUEST => 'POST',
];

$headers = [
'Content-Type: application/json',
];

return $this->fetchContent($apiEndpoint, $opt, $headers);
}

/**
* Fetch an HTML page from the internet
*
Expand Down Expand Up @@ -143,8 +213,12 @@ private function fetchHtmlPage(string $url): string {
'TE: trailers'
];

return $this->fetchContent($url, $opt, $headers);
}

private function fetchContent(string $url, array $options, array $headers): string {
try {
$this->downloadHelper->downloadFile($url, $opt, $headers);
$this->downloadHelper->downloadFile($url, $options, $headers);
} catch (NoDownloadWasCarriedOutException $ex) {
throw new ImportException($this->l->t('Exception while downloading recipe from %s.', [$url]), 0, $ex);
}
Expand Down
20 changes: 16 additions & 4 deletions lib/Service/RecipeService.php
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ public function addRecipe($json, $importedHtml = null) {

$recipe_folder->move($new_path);
}

} else {
// This is a new recipe, create it
$json['dateCreated'] = $now;
Expand Down Expand Up @@ -290,7 +289,6 @@ public function addRecipe($json, $importedHtml = null) {
$this->logger->warning('Failed to download an image using curl. Falling back to PHP default behavior.');
$full_image_data = file_get_contents($json['image']);
}

} else {
// The image is a local path
try {
Expand All @@ -301,7 +299,6 @@ public function addRecipe($json, $importedHtml = null) {
}
}
}

} else {
// The image field was empty, remove images in the recipe folder
$this->imageService->dropImage($recipe_folder);
Expand Down Expand Up @@ -419,7 +416,6 @@ private function migrateFolderStructure() {
$recipe_folder = $user_folder->newFolder($recipe_name);

$node->move($recipe_folder->getPath() . '/recipe.json');

} elseif ($node instanceof Folder && strpos($node->getName(), '.json')) {
// Rename folders with .json extensions (this was likely caused by a migration bug)
$node->move(str_replace('.json', '', $node->getPath()));
Expand Down Expand Up @@ -567,6 +563,22 @@ public function getVisibleInfoBlocks(): array {
return $this->userConfigHelper->getVisibleInfoBlocks();
}

/**
* Get browserless configuration
* @return array<string, string | null> keys: url and token, values: url and token
*/
public function getBrowserlessConfig(): array {
return $this->userConfigHelper->getBrowserlessConfig();
}

/**
* Sets browserless configuration.
* @param array<string, bool> keys: url and token, values: url and token
*/
public function setBrowserlessConfig(array $data) {
$this->userConfigHelper->setBrowserlessConfig($data);
}

/**
* Get recipe file contents as an array
*/
Expand Down
Loading