Title: | Spot R Functions & Packages |
---|---|
Description: | Helpers for parsing out the R functions and packages used in R scripts and notebooks. |
Authors: | Bryan Shalloway [aut, cre] |
Maintainer: | Bryan Shalloway <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.0.4 |
Built: | 2024-10-31 21:09:52 UTC |
Source: | https://github.com/brshallo/funspotr |
Check whether packages are available in current library.
check_pkgs_availability(pkgs, quietly = TRUE)
check_pkgs_availability(pkgs, quietly = TRUE)
pkgs |
Character vector of package names. (Typically the output from
|
quietly |
logical: should progress and error messages be suppressed? |
Named logical vector indicating whether each package is available on the machine.
library(funspotr) library(dplyr) file_lines <- " library(dplyr) require(tidyr) library(madeUpPkg) as_tibble(mpg) %>% group_by(class) %>% nest() %>% mutate(stats = purrr::map(data, ~lm(cty ~ hwy, data = .x))) made_up_fun() " file_output <- tempfile(fileext = ".R") writeLines(file_lines, file_output) spot_pkgs(file_output) %>% check_pkgs_availability()
library(funspotr) library(dplyr) file_lines <- " library(dplyr) require(tidyr) library(madeUpPkg) as_tibble(mpg) %>% group_by(class) %>% nest() %>% mutate(stats = purrr::map(data, ~lm(cty ~ hwy, data = .x))) made_up_fun() " file_output <- tempfile(fileext = ".R") writeLines(file_lines, file_output) spot_pkgs(file_output) %>% check_pkgs_availability()
Given a username, return a dataframe with paths to all the gists by that user.
list_files_github_gists( user, pattern = stringr::regex("(r|rmd|rmarkdown|qmd)$", ignore_case = TRUE) )
list_files_github_gists( user, pattern = stringr::regex("(r|rmd|rmarkdown|qmd)$", ignore_case = TRUE) )
user |
Character string of username whose github gists you want to pull. |
pattern |
Regex pattern to keep only matching files. Default is
|
Dataframe with relative_paths
and absolute_paths
of file paths.
Because gists do not exist in a folder structure relative_paths
will
generally just be a file name. absolute_paths
a url to the raw file. See
unnest_results()
for helper to put into an easier to read format.
list_files_github_repo()
, list_files_wd()
library(dplyr) library(funspotr) # pulling and analyzing my R file github gists gists_urls <- list_files_github_gists("brshallo", pattern = ".") # Will just parse the first 2 files/gists # Note that is easy to hit the API limit if have lots of gists contents <- filter(gists_urls, str_detect_r_docs(absolute_paths)) %>% slice(1:2) %>% spot_funs_files() contents %>% unnest_results()
library(dplyr) library(funspotr) # pulling and analyzing my R file github gists gists_urls <- list_files_github_gists("brshallo", pattern = ".") # Will just parse the first 2 files/gists # Note that is easy to hit the API limit if have lots of gists contents <- filter(gists_urls, str_detect_r_docs(absolute_paths)) %>% slice(1:2) %>% spot_funs_files() contents %>% unnest_results()
Return a dataframe containing the paths of files in a github repostiory.
Generally used prior to spot_{funs/pkgs}_files()
.
list_files_github_repo( repo, branch = NULL, pattern = stringr::regex("(r|rmd|rmarkdown|qmd)$", ignore_case = TRUE), rmv_index = TRUE )
list_files_github_repo( repo, branch = NULL, pattern = stringr::regex("(r|rmd|rmarkdown|qmd)$", ignore_case = TRUE), rmv_index = TRUE )
repo |
Github repository, e.g. "brshallo/feat-eng-lags-presentation" |
branch |
Branch of github repository, default is "main". |
pattern |
Regex pattern to keep only matching files. Default is
|
rmv_index |
Logical, most repos containing blogdown sites will have an
index.R file at the root. Change to |
Dataframe with columns of relative_paths
and absolute_paths
for
file path locations. absolute_paths
will be urls to raw files.
list_files_wd()
, list_files_github_gists()
library(dplyr) library(funspotr) # pulling and analyzing my R file github gists gh_urls <- list_files_github_repo("brshallo/feat-eng-lags-presentation", branch = "main") # Will just parse the first 2 files/gists contents <- spot_funs_files(slice(gh_urls, 1:2)) contents %>% unnest_results()
library(dplyr) library(funspotr) # pulling and analyzing my R file github gists gh_urls <- list_files_github_repo("brshallo/feat-eng-lags-presentation", branch = "main") # Will just parse the first 2 files/gists contents <- spot_funs_files(slice(gh_urls, 1:2)) contents %>% unnest_results()
Return a dataframe containing the paths of files in the working directory.
Generally used prior to spot_{funs/pkgs}_files()
.
list_files_wd( path = ".", pattern = stringr::regex("(r|rmd|rmarkdown|qmd)$", ignore_case = TRUE), rmv_index = TRUE )
list_files_wd( path = ".", pattern = stringr::regex("(r|rmd|rmarkdown|qmd)$", ignore_case = TRUE), rmv_index = TRUE )
path |
Character vector or path. Default is "." which will set the
starting location for |
pattern |
Regex pattern to keep only matching files. Default is
|
rmv_index |
Logical, most repos containing blogdown sites will have an
index.R file at the root. Change to |
Can also be used outside of working directory if path
is specified.
Dataframe with columns of relative_paths
and absolute_paths
.
list_files_github_repo()
, list_files_github_gists()
library(dplyr) library(funspotr) # pulling and analyzing my R file github gists files_local <- list_files_wd() # Will just parse the first 2 files/gists contents <- spot_funs_files(slice(files_local, 2:3)) contents %>% unnest_results()
library(dplyr) library(funspotr) # pulling and analyzing my R file github gists files_local <- list_files_wd() # Will just parse the first 2 files/gists contents <- spot_funs_files(slice(files_local, 2:3)) contents %>% unnest_results()
Output simple network plot using
visNetwork connecting either
funs
or pkgs
to relative_paths
/absolute_paths
.
network_plot(df, to = .data$pkgs, show_each_use = FALSE)
network_plot(df, to = .data$pkgs, show_each_use = FALSE)
df |
Dataframe containing columns |
to |
|
show_each_use |
Binary, default is |
visNetwork plot
library(dplyr) library(funspotr) gh_ex_pkgs <- list_files_github_repo( repo = "brshallo/feat-eng-lags-presentation", branch = "main") %>% spot_funs_files() gh_ex_pkgs %>% unnest_results() %>% network_plot(to = pkgs)
library(dplyr) library(funspotr) gh_ex_pkgs <- list_files_github_repo( repo = "brshallo/feat-eng-lags-presentation", branch = "main") %>% spot_funs_files() gh_ex_pkgs %>% unnest_results() %>% network_plot(to = pkgs)
Given file_path
extract all functions and their associated packages from
specified file.
spot_funs(file_path, ...)
spot_funs(file_path, ...)
file_path |
character vector of path to file. |
... |
This allows you to pass additional arguments through to
|
spot_funs()
uses spot_funs_custom()
to run – it is a less verbose
version and does not require passing in the packages separately. See README
and ?spot_funs_custom
for details on how the function works and arguments
that can be passed through (via ...
).
If code syntax is malformed and cannot be properly parsed, function will error.
Given default arguments and no missing packages, a dataframe with the following columns is returned:
funs
: specifying functions in file.
pkgs
: the package a function came from. If funs
is a custom function or
if it came from a package not installed on your machine, pkgs
will return
"(unknown)".
Note that any unused loaded packages / pkgs
are dropped from output.
Any functions without an available package are returned with the value
"(unknown)".
See README for further documentation.
spot_funs_custom()
, spot_funs_files()
library(funspotr) file_lines <- " library(dplyr) require(tidyr) library(madeUpPkg) as_tibble(mpg) %>% group_by(class) %>% nest() %>% mutate(stats = purrr::map(data, ~lm(cty ~ hwy, data = .x))) made_up_fun() " file_output <- tempfile(fileext = ".R") writeLines(file_lines, file_output) spot_funs(file_output)
library(funspotr) file_lines <- " library(dplyr) require(tidyr) library(madeUpPkg) as_tibble(mpg) %>% group_by(class) %>% nest() %>% mutate(stats = purrr::map(data, ~lm(cty ~ hwy, data = .x))) made_up_fun() " file_output <- tempfile(fileext = ".R") writeLines(file_lines, file_output) spot_funs(file_output)
Engine that runs spot_funs()
. spot_funs_custom()
has options for changing
returned output and for producing print statements and errors. It also
requires you to provide a character vector for pkgs
rather than identifying
these automatically via spot_pkgs()
.
spot_funs_custom( pkgs, file_path, show_each_use = FALSE, keep_search_list = FALSE, copy_local = TRUE, print_pkgs_load_status = FALSE, error_if_missing_pkg = FALSE, keep_in_multiple_pkgs = FALSE )
spot_funs_custom( pkgs, file_path, show_each_use = FALSE, keep_search_list = FALSE, copy_local = TRUE, print_pkgs_load_status = FALSE, error_if_missing_pkg = FALSE, keep_in_multiple_pkgs = FALSE )
pkgs |
Character vector of packages that are added to search space via
|
file_path |
character vector of path to file. |
show_each_use |
Logical, default is |
keep_search_list |
Logical, default is |
copy_local |
Logical, if changed to |
print_pkgs_load_status |
Logical, default is |
error_if_missing_pkg |
Logical, default is |
keep_in_multiple_pkgs |
Logical, default is |
spot_funs_custom()
is also what you should use in cases where you don't
trust spot_pkgs()
to properly identify package dependencies from within the
same file and instead want to pass in your own character vector of packages.
See README for a description of how the function works.
If a package is not included in pkgs
, any functions called that should come
from that package will be assigned a value of "(unknown)" in the pkgs
column of the returned output. You can also use the print_pkgs_load_status
and error_if_missing_pkg
arguments to alter how output works in cases when
not all packages are on the machine.
Explicit calls to unexported functions i.e. pkg:::fun()
will have pkgs = "(unknown)"
in the returned dataframe.
Given default arguments and no missing packages, a dataframe with the following columns is returned:
funs
: specifying functions in file.
pkgs
: the package a function came from. If funs
is a custom function or
if it came from a package not installed on your machine, pkgs
will return
"(unknown)".
Note that any unused loaded packages / pkgs
are dropped from output.
Any functions without an available package are returned with the value
"(unknown)".
See README for further documentation.
library(funspotr) file_lines <- " library(dplyr) require(tidyr) library(madeUpPkg) as_tibble(mpg) %>% group_by(class) %>% nest() %>% mutate(stats = purrr::map(data, ~lm(cty ~ hwy, data = .x))) made_up_fun() " file_output <- tempfile(fileext = ".R") writeLines(file_lines, file_output) pkgs <- spot_pkgs(file_output) spot_funs_custom(pkgs, file_output) # If you'd rather it error when a pkg doesn't exist e.g. for {madeUpPkg} # set`error_if_missing_pkg = TRUE`
library(funspotr) file_lines <- " library(dplyr) require(tidyr) library(madeUpPkg) as_tibble(mpg) %>% group_by(class) %>% nest() %>% mutate(stats = purrr::map(data, ~lm(cty ~ hwy, data = .x))) made_up_fun() " file_output <- tempfile(fileext = ".R") writeLines(file_lines, file_output) pkgs <- spot_pkgs(file_output) spot_funs_custom(pkgs, file_output) # If you'd rather it error when a pkg doesn't exist e.g. for {madeUpPkg} # set`error_if_missing_pkg = TRUE`
spot_pkgs_files()
: Spot all packages that show-up in R or Rmarkdown or
quarto documents in a dataframe of filepaths.
spot_funs_files()
: Spot all functions and their corresponding packages
that show-up in R or Rmarkdown or quarto documents in a dataframe of
filepaths.
spot_funs_files(df, ..., .progress = TRUE) spot_pkgs_files(df, ..., .progress = TRUE)
spot_funs_files(df, ..., .progress = TRUE) spot_pkgs_files(df, ..., .progress = TRUE)
df |
Dataframe containing a column of |
... |
Arguments passed onto |
.progress |
Whether to show a progress bar. Use |
A purrr::safely()
wrapper for mapping spot_pkgs()
or spot_funs()
across
multiple filepaths. I.e. even if some files fail to parse the function will
continue on.
Default settings are meant for files where package libraries are referenced within the files themselves. See README for more details.
Dataframe with relative_paths
and absolute_paths
of file paths
along with a list-column spotted
containing purrr::safely()
named list
of "result" and "error" for each file parsed. Use unnest_results()
to
unnest only the "result" values.
spot_pkgs()
, spot_funs()
, unnest_results()
library(funspotr) library(dplyr) list_files_github_repo("brshallo/feat-eng-lags-presentation", branch = "main") %>% spot_funs_files()
library(funspotr) library(dplyr) list_files_github_repo("brshallo/feat-eng-lags-presentation", branch = "main") %>% spot_funs_files()
Extract all pkg
called in either library(pkg)
, require(pkg)
requireNamespace("pkg")
or pkg::fun()
. Will not identify packages loaded
in other ways not typically done in interactive R scripts (e.g. relying on a
DESCRIPTION file for a pkg or something like source("lib-calls.R")
).
Inspiration: blogdown#647.
spot_pkgs( file_path, show_explicit_funs = FALSE, copy_local = TRUE, as_yaml_tags = FALSE )
spot_pkgs( file_path, show_explicit_funs = FALSE, copy_local = TRUE, as_yaml_tags = FALSE )
file_path |
String of path to file of interest. |
show_explicit_funs |
In cases where a function is called explicitly,
show both the package dependency and the function together. For example a
script containing |
copy_local |
Logical, default is |
as_yaml_tags |
Logical, default is |
In cases where show_explicit_funs = TRUE
and there are explicit calls in
the package, "pkg:fun" is returned instead.
Packages are extracted solely based on text – not whether the package actually exists or not. Hence even packages that you do not have installed on your machine but show-up in the script will be returned in the character vector.
Character vector of all packages loaded in file.
spot_pkgs_used()
, spot_pkgs_from_description()
,
spot_pkgs_files()
, renv::dependencies()
library(funspotr) file_lines <- " library(dplyr) require(tidyr) library(madeUpPkg) as_tibble(mpg) %>% group_by(class) %>% nest() %>% mutate(stats = purrr::map(data, ~lm(cty ~ hwy, data = .x))) made_up_fun() " file_output <- tempfile(fileext = ".R") writeLines(file_lines, file_output) spot_pkgs(file_output) # To view `purrr::map` as an explicit call spot_pkgs(file_output, show_explicit_funs = TRUE) # To output for blogdown post YAML header tags cat(spot_pkgs(file_output, as_yaml_tags = TRUE))
library(funspotr) file_lines <- " library(dplyr) require(tidyr) library(madeUpPkg) as_tibble(mpg) %>% group_by(class) %>% nest() %>% mutate(stats = purrr::map(data, ~lm(cty ~ hwy, data = .x))) made_up_fun() " file_output <- tempfile(fileext = ".R") writeLines(file_lines, file_output) spot_pkgs(file_output) # To view `purrr::map` as an explicit call spot_pkgs(file_output, show_explicit_funs = TRUE) # To output for blogdown post YAML header tags cat(spot_pkgs(file_output, as_yaml_tags = TRUE))
Primarily used for cases where you load metapackages like tidyverse
or
tidymodels
but only want to return those packages that have functions from
the package that are actually called. E.g. say you have a
library(tidyverse)
call but only end-up using functions that are in dplyr
– in that case spot_pkgs()
would return "tidyverse"
whereas
spot_pkgs_used()
would return "dplyr"
.
spot_pkgs_used(file_path, as_yaml_tags = FALSE)
spot_pkgs_used(file_path, as_yaml_tags = FALSE)
file_path |
String of path to file of interest. |
as_yaml_tags |
Logical, default is |
Also does not return uninstalled packages or those loaded when R starts up.
Is essentially just calling spot_funs() |> with(unique(pkgs))
in the
background. Does not have as many options as spot_pkgs()
though.
Character vector of all packages with functions used in the file.
Put quoted inline R function in your blogdown or quarto post's YAML header to
have the packages be the packages used in your post (wrapper around
funspotr::spot_pkgs()
).
spot_tags( file_path = knitr::current_input(), used = FALSE, drop_knitr = FALSE, yaml_bullet = NULL, ... )
spot_tags( file_path = knitr::current_input(), used = FALSE, drop_knitr = FALSE, yaml_bullet = NULL, ... )
file_path |
Default is the file being knitted but can change to some other file (e.g. in cases where the code for the post may reside in a different file). |
used |
Default is |
drop_knitr |
Many blogdown posts have |
yaml_bullet |
Default is If it's first occurrence happens on a line that contains a bracket
the value becomes See examples for how to hard-code. |
... |
Any additional arguments to pass to |
tags: - "`r funspotr::spot_tags()`"
OR
tags: ["`r funspotr::spot_tags()`"]
OR
categories: ["`r funspotr::spot_tags()`"]
Thanks Yihui for the suggestions and for getting this working blogdown#647, blogdown#693.)
Character vector in a format meant to be read while evaluating the YAML header when rendering.
# To review input interactively from within rstudio you might also try: ## Not run: funspotr::spot_tags(rstudioapi::getSourceEditorContext()$path) ## End(Not run)
# To review input interactively from within rstudio you might also try: ## Not run: funspotr::spot_tags(rstudioapi::getSourceEditorContext()$path) ## End(Not run)
Run after running list_files_*() |> spot_{funs|pkgs}_files()
to unnest the
spotted
list-column.
unnest_results(df)
unnest_results(df)
df |
Dataframe outputted by |
An unnested dataframe with what was in spotted
moved to the front.
spot_funs_files()
, spot_pkgs_files()
library(funspotr) library(dplyr) list_files_github_repo("brshallo/feat-eng-lags-presentation", branch = "main") %>% spot_funs_files() %>% unnest_results()
library(funspotr) library(dplyr) list_files_github_repo("brshallo/feat-eng-lags-presentation", branch = "main") %>% spot_funs_files() %>% unnest_results()