Skip to content

Commit 0e7e8fa

Browse files
authored
Merge pull request #28 from till-tietz/develop
Develop
2 parents 85fe26c + ef32519 commit 0e7e8fa

7 files changed

Lines changed: 115 additions & 23 deletions

File tree

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Type: Package
22
Package: parsel
3-
Title: Parallelized Dynamic Web-Scraping Using 'RSelenium'
3+
Title: Parallel Dynamic Web-Scraping Using 'RSelenium'
44
Version: 0.1.1
55
Authors@R: c(
66
person("Till", "Tietz", email = "ttietz2014@gmail.com", role = c("cre","aut")))

R/constructors_elements.R

Lines changed: 92 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ gen_varname <- function(input){
2929
#' @param using character string specifying locator scheme to use to search elements. Available schemes: "class name", "css selector", "id", "name", "link text", "partial link text", "tag name", "xpath".
3030
#' @param value character string specifying the search target.
3131
#' @param name character string specifying the object name the RSelenium "wElement" class object should be saved to.
32+
#' @param new_page logical indicating if clickElement() action will resullt in a change in url.
3233
#' @param prev a placeholder for the output of functions being piped into click(). Defaults to NULL and should not be altered.
3334
#' @return a character string defining 'RSelenium' clicking instructions that can be pasted into a scraping function.
3435
#' @export
@@ -44,7 +45,7 @@ gen_varname <- function(input){
4445
#'
4546
#' }
4647

47-
click <- function(using, value, name = NULL, prev = NULL){
48+
click <- function(using, value, name = NULL, new_page = FALSE, prev = NULL){
4849

4950
if(!missing(using)){
5051
if(!is.character(using)){
@@ -74,10 +75,37 @@ click <- function(using, value, name = NULL, prev = NULL){
7475

7576
}
7677

78+
if(!is.logical(new_page)){
79+
stop("new_page is not of type logical")
80+
}
81+
82+
7783
finding <- paste(name, " <- ","remDr$findElement(using = '", using,"', '", value, "')", sep = "")
7884
clicking <- paste(name,"$clickElement()", sep = "")
7985

80-
out <- paste(finding, clicking, sep = "\n")
86+
if(new_page){
87+
88+
from <- "from <- seleniumPipes::getCurrentUrl(remDr)"
89+
90+
wait <- paste("not_changed <- TRUE",
91+
"while(not_changed){",
92+
"Sys.sleep(0.25)",
93+
"current <- seleniumPipes::getCurrentUrl(remDr)",
94+
"if(current != from){",
95+
"not_changed <- FALSE",
96+
"}",
97+
"}",
98+
sep = "\n")
99+
100+
out <- paste(finding, from, clicking, wait, sep = "\n")
101+
102+
} else {
103+
104+
wait <- "Sys.sleep(0.25)"
105+
106+
out <- paste(finding, clicking, wait, sep = "\n")
107+
108+
}
81109

82110
if(!is.null(prev)){
83111
out <- paste(prev, out, sep = " \n \n ")
@@ -97,6 +125,7 @@ click <- function(using, value, name = NULL, prev = NULL){
97125
#' @param name character string specifying the object name the RSelenium "wElement" class object should be saved to.If NULL a name will be generated automatically.
98126
#' @param text a character vector specifying the text to be typed.
99127
#' @param text_object a character string specifying the name of an external object holding the text to be typed. Note that the remDr$sendKeysToElement method only accepts list inputs.
128+
#' @param new_page logical indicating if sendKeysToElement() action will resullt in a change in url.
100129
#' @param prev a placeholder for the output of functions being piped into type(). Defaults to NULL and should not be altered.
101130
#' @return a character string defining 'RSelenium' typing instructions that can be pasted into a scraping function.
102131
#' @export
@@ -124,7 +153,7 @@ click <- function(using, value, name = NULL, prev = NULL){
124153
#'
125154
#' }
126155

127-
type <- function(using, value, name = NULL, text, text_object, prev = NULL){
156+
type <- function(using, value, name = NULL, text, text_object, new_page = FALSE, prev = NULL){
128157

129158
if(!missing(using)){
130159
if(!is.character(using)){
@@ -187,6 +216,10 @@ type <- function(using, value, name = NULL, text, text_object, prev = NULL){
187216

188217
}
189218

219+
if(!is.logical(new_page)){
220+
stop("new_page is not of type logical")
221+
}
222+
190223

191224
finding <- paste(name, " <- ","remDr$findElement(using = '", using,"', '", value, "')", sep = "")
192225

@@ -200,7 +233,29 @@ type <- function(using, value, name = NULL, text, text_object, prev = NULL){
200233

201234
}
202235

203-
out <- paste(finding, typing, sep = "\n")
236+
if(new_page){
237+
238+
from <- "from <- seleniumPipes::getCurrentUrl(remDr)"
239+
240+
wait <- paste("not_changed <- TRUE",
241+
"while(not_changed){",
242+
"Sys.sleep(0.25)",
243+
"current <- seleniumPipes::getCurrentUrl(remDr)",
244+
"if(current != from){",
245+
"not_changed <- FALSE",
246+
"}",
247+
"}",
248+
sep = "\n")
249+
250+
out <- paste(finding, from, typing, wait, sep = "\n")
251+
252+
} else {
253+
254+
wait <- "Sys.sleep(0.25)"
255+
256+
out <- paste(finding, typing, wait, sep = "\n")
257+
258+
}
204259

205260
if(!is.null(prev)){
206261
out <- paste(prev, out, sep = " \n \n ")
@@ -217,6 +272,7 @@ type <- function(using, value, name = NULL, text, text_object, prev = NULL){
217272
#' @param value character string specifying the search target.
218273
#' @param name character string specifying the object name the RSelenium "wElement" class object should be saved to. If NULL a name will be generated automatically.
219274
#' @param prev a placeholder for the output of functions being piped into get_element(). Defaults to NULL and should not be altered.
275+
#' @param multiple logical indicating whether multiple elements should be returned. If TRUE the findElements() method will be invoced.
220276
#' @return a character string defining 'RSelenium' getElementText() instructions that can be pasted into a scraping function.
221277
#' @export
222278
#'
@@ -251,7 +307,7 @@ type <- function(using, value, name = NULL, text, text_object, prev = NULL){
251307
#'
252308
#' }
253309

254-
get_element <- function(using, value, name = NULL, prev = NULL){
310+
get_element <- function(using, value, name = NULL, multiple = FALSE, prev = NULL){
255311

256312
if(!missing(using)){
257313
if(!is.character(using)){
@@ -271,6 +327,10 @@ get_element <- function(using, value, name = NULL, prev = NULL){
271327
}
272328
}
273329

330+
if(!is.logical(multiple)){
331+
stop("multiple not of class logical")
332+
}
333+
274334
if(is.null(name)){
275335

276336
if(is.null(prev)){
@@ -281,15 +341,34 @@ get_element <- function(using, value, name = NULL, prev = NULL){
281341

282342
}
283343

284-
finding <- paste(name, " <- ", "try(", "remDr$findElement(using = '", using,"', '", value, "')", ")", sep = "")
285344

286-
out <- paste(finding,
287-
paste("if(is(", name, ",'try-error')){", sep = ""),
288-
paste(name, " <- NA", sep = ""),
289-
"} else {",
290-
paste(name, " <- ", name,"$getElementText()", sep = ""),
291-
"}",
292-
sep = " \n")
345+
if(multiple == FALSE){
346+
347+
finding <- paste(name, " <- ", "try(", "remDr$findElement(using = '", using,"', '", value, "')", ")", sep = "")
348+
349+
out <- paste(finding,
350+
paste("if(is(", name, ",'try-error')){", sep = ""),
351+
paste(name, " <- NA", sep = ""),
352+
"} else {",
353+
paste(name, " <- ", name,"$getElementText()", sep = ""),
354+
"}",
355+
sep = " \n")
356+
357+
} else {
358+
359+
finding <- paste(name, " <- ", "try(", "remDr$findElements(using = '", using,"', '", value, "')", ")", sep = "")
360+
361+
out <- paste(finding,
362+
paste("if(is(", name, ",'try-error')){", sep = ""),
363+
paste(name, " <- NA", sep = ""),
364+
"} else {",
365+
paste(name, " <- ", "lapply(", name, ", function(i) ","i$getElementText())", sep = ""),
366+
"}",
367+
sep = " \n")
368+
369+
}
370+
371+
293372

294373
if(!is.null(prev)){
295374
out <- paste(prev, out, sep = " \n \n ")

man/click.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_element.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/parsel-package.Rd

Lines changed: 2 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/type.Rd

Lines changed: 11 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test_constructors.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ testthat::test_that(
1818
expect_error(click(using = 1, value = "a"))
1919
expect_error(click(using = "a", value = 1))
2020
expect_error(click(using = "a", value = "a", name = 1))
21+
expect_error(click(using = "a", value = "a", new_page = 1))
2122
}
2223
)
2324

@@ -32,6 +33,7 @@ testthat::test_that(
3233
expect_error(type(using = "a", value = "a", text = 1))
3334
expect_error(type(using = "a", value = "a", text_object = 1))
3435
expect_error(type(using = "a", value = "a", text_object = c("a","b")))
36+
expect_error(type(using = "a", value = "a", text = "a", new_page = 1))
3537
}
3638
)
3739

@@ -42,6 +44,7 @@ testthat::test_that(
4244
expect_error(get_element(using = 1, value = "a"))
4345
expect_error(get_element(using = "a", value = 1))
4446
expect_error(get_element(using = "a", value = "a", name = 1))
47+
expect_error(get_element(using = "a", value = "a", name = "a", multiple = 1))
4548
}
4649
)
4750

0 commit comments

Comments
 (0)