-
Notifications
You must be signed in to change notification settings - Fork 122
style: use dollar variables (getting links) #1844
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -43,16 +43,15 @@ if (response.ok) { | |||||
const html = await response.text(); | ||||||
const $ = cheerio.load(html); | ||||||
|
||||||
const data = []; | ||||||
$(".product-item").each((i, element) => { | ||||||
const productItem = $(element); | ||||||
const $items = $(".product-item").map((i, element) => { | ||||||
const $productItem = $(element); | ||||||
|
||||||
const title = productItem.find(".product-item__title"); | ||||||
const titleText = title.text().trim(); | ||||||
const $title = $productItem.find(".product-item__title"); | ||||||
const title = $title.text().trim(); | ||||||
|
||||||
const price = productItem.find(".price").contents().last(); | ||||||
const $price = $productItem.find(".price").contents().last(); | ||||||
const priceRange = { minPrice: null, price: null }; | ||||||
const priceText = price | ||||||
const priceText = $price | ||||||
.text() | ||||||
.trim() | ||||||
.replace("$", "") | ||||||
|
@@ -66,8 +65,9 @@ if (response.ok) { | |||||
priceRange.price = priceRange.minPrice; | ||||||
} | ||||||
|
||||||
data.push({ title: titleText, ...priceRange }); | ||||||
return { title, ...priceRange }; | ||||||
}); | ||||||
const data = $items.get(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
const jsonData = JSON.stringify(data); | ||||||
await writeFile('products.json', jsonData); | ||||||
|
@@ -97,13 +97,13 @@ async function download(url) { | |||||
Next, we can put parsing into a `parseProduct()` function, which takes the product item element and returns the object with data: | ||||||
|
||||||
```js | ||||||
function parseProduct(productItem) { | ||||||
const title = productItem.find(".product-item__title"); | ||||||
const titleText = title.text().trim(); | ||||||
function parseProduct($productItem) { | ||||||
const $title = $productItem.find(".product-item__title"); | ||||||
const title = $title.text().trim(); | ||||||
|
||||||
const price = productItem.find(".price").contents().last(); | ||||||
const $price = $productItem.find(".price").contents().last(); | ||||||
const priceRange = { minPrice: null, price: null }; | ||||||
const priceText = price | ||||||
const priceText = $price | ||||||
.text() | ||||||
.trim() | ||||||
.replace("$", "") | ||||||
|
@@ -117,24 +117,18 @@ function parseProduct(productItem) { | |||||
priceRange.price = priceRange.minPrice; | ||||||
} | ||||||
|
||||||
return { title: titleText, ...priceRange }; | ||||||
return { title, ...priceRange }; | ||||||
} | ||||||
``` | ||||||
|
||||||
Now the JSON export. For better readability, let's make a small change here and set the indentation level to two spaces: | ||||||
|
||||||
```js | ||||||
async function exportJSON(data) { | ||||||
function exportJSON(data) { | ||||||
return JSON.stringify(data, null, 2); | ||||||
} | ||||||
``` | ||||||
|
||||||
:::note Why asynchronous? | ||||||
|
||||||
The `exportJSON()` function doesn't need to be `async` now, but keeping it makes future changes easier — like switching to an async JSON parser. It also stays consistent with the upcoming `exportCSV()` function, which must be asynchronous. | ||||||
|
||||||
::: | ||||||
|
||||||
The last function we'll add will take care of the CSV export: | ||||||
|
||||||
```js | ||||||
|
@@ -161,13 +155,13 @@ async function download(url) { | |||||
} | ||||||
} | ||||||
|
||||||
function parseProduct(productItem) { | ||||||
const title = productItem.find(".product-item__title"); | ||||||
const titleText = title.text().trim(); | ||||||
function parseProduct($productItem) { | ||||||
const $title = $productItem.find(".product-item__title"); | ||||||
const title = $title.text().trim(); | ||||||
|
||||||
const price = productItem.find(".price").contents().last(); | ||||||
const $price = $productItem.find(".price").contents().last(); | ||||||
const priceRange = { minPrice: null, price: null }; | ||||||
const priceText = price | ||||||
const priceText = $price | ||||||
.text() | ||||||
.trim() | ||||||
.replace("$", "") | ||||||
|
@@ -181,10 +175,10 @@ function parseProduct(productItem) { | |||||
priceRange.price = priceRange.minPrice; | ||||||
} | ||||||
|
||||||
return { title: titleText, ...priceRange }; | ||||||
return { title, ...priceRange }; | ||||||
} | ||||||
|
||||||
async function exportJSON(data) { | ||||||
function exportJSON(data) { | ||||||
return JSON.stringify(data, null, 2); | ||||||
} | ||||||
|
||||||
|
@@ -196,14 +190,14 @@ async function exportCSV(data) { | |||||
const listingURL = "https://warehouse-theme-metal.myshopify.com/collections/sales" | ||||||
const $ = await download(listingURL); | ||||||
|
||||||
const data = [] | ||||||
$(".product-item").each((i, element) => { | ||||||
const productItem = $(element); | ||||||
const item = parseProduct(productItem); | ||||||
data.push(item); | ||||||
const $items = $(".product-item").map((i, element) => { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
const $productItem = $(element); | ||||||
const item = parseProduct($productItem); | ||||||
return item; | ||||||
}); | ||||||
const data = $items.get(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
await writeFile('products.json', await exportJSON(data)); | ||||||
await writeFile('products.json', exportJSON(data)); | ||||||
await writeFile('products.csv', await exportCSV(data)); | ||||||
``` | ||||||
|
||||||
|
@@ -232,14 +226,14 @@ Several methods exist for transitioning from one page to another, but the most c | |||||
In DevTools, we can see that each product title is, in fact, also a link element. We already locate the titles, so that makes our task easier. We just need to edit the code so that it extracts not only the text of the element but also the `href` attribute. Cheerio selections support accessing attributes using the `.attr()` method: | ||||||
|
||||||
```js | ||||||
function parseProduct(productItem) { | ||||||
const title = productItem.find(".product-item__title"); | ||||||
const titleText = title.text().trim(); | ||||||
const url = title.attr("href"); | ||||||
function parseProduct($productItem) { | ||||||
const $title = $productItem.find(".product-item__title"); | ||||||
const title = $title.text().trim(); | ||||||
const url = $title.attr("href"); | ||||||
|
||||||
... | ||||||
|
||||||
return { url, title: titleText, ...priceRange }; | ||||||
return { url, title, ...priceRange }; | ||||||
} | ||||||
``` | ||||||
|
||||||
|
@@ -274,15 +268,15 @@ We'll change the `parseProduct()` function so that it also takes the base URL as | |||||
|
||||||
```js | ||||||
// highlight-next-line | ||||||
function parseProduct(productItem, baseURL) { | ||||||
const title = productItem.find(".product-item__title"); | ||||||
const titleText = title.text().trim(); | ||||||
function parseProduct($productItem, baseURL) { | ||||||
const $title = $productItem.find(".product-item__title"); | ||||||
const title = $title.text().trim(); | ||||||
// highlight-next-line | ||||||
const url = new URL(title.attr("href"), baseURL).href; | ||||||
const url = new URL($title.attr("href"), baseURL).href; | ||||||
|
||||||
... | ||||||
|
||||||
return { url, title: titleText, ...priceRange }; | ||||||
return { url, title, ...priceRange }; | ||||||
} | ||||||
``` | ||||||
|
||||||
|
@@ -292,13 +286,13 @@ Now we'll pass the base URL to the function in the main body of our program: | |||||
const listingURL = "https://warehouse-theme-metal.myshopify.com/collections/sales" | ||||||
const $ = await download(listingURL); | ||||||
|
||||||
const data = [] | ||||||
$(".product-item").each((i, element) => { | ||||||
const productItem = $(element); | ||||||
const $items = $(".product-item").map((i, element) => { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
const $productItem = $(element); | ||||||
// highlight-next-line | ||||||
const item = parseProduct(productItem, listingURL); | ||||||
data.push(item); | ||||||
const item = parseProduct($productItem, listingURL); | ||||||
return item; | ||||||
}); | ||||||
const data = $items.get(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
``` | ||||||
|
||||||
When we run the scraper now, we should see full URLs in our exports: | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding the
toArray
instruction, you have a "regular" map method, that returns regular objects instead of Cheerio objects (less confusing), and you can remove theconst data = $items.get();
later. Moreover, thei
index was unused.