Thanks *VERY* much, this is great!
I realized a few more cases, I think I've got something that
covers all the possibilities now:
library(stringr)
tmpstr = "The first number is: 32. Another one is: 32.1.
Here's a number in scientific format, 0.3523e10, and
another, 0.3523e-10, and a negative, -313.1"
patternslist = NULL
p=0
patternslist[[(p=p+1)]] = "(\\d+)" # positive integer
patternslist[[(p=p+1)]] = "(-\\d+)" # negative integer
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+)" # positive float
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e\\d+)" # positive
float, scientific w. positive power
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e-\\d+)" # positive
float, scientific w. negative power
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+)" # negative float
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e\\d+)" # negative
float, scientific w. positive power
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e-\\d+)"# negative
float, scientific w. negative power
patternslist[[(p=p+1)]] = "(\\d+e\\d+)" # positive int,
scientific w. positive power
patternslist[[(p=p+1)]] = "(\\d+e-\\d+)" # positive int,
scientific w. negative power
patternslist[[(p=p+1)]] = "(-\\d+e\\d+)" # negative int,
scientific w. positive power
patternslist[[(p=p+1)]] = "(-\\d+e-\\d+)" # negative int,
scientific w. negative power
pattern = paste(patternslist, collapse="|", sep="")
pattern
as.numeric(str_extract_all(tmpstr,pattern)[[1]])
# A more complex string
tmpstr = "The first number is: 32. 342 342.1 -3234e-10
3234e-1 Another one is: 32.1. Here's a number in scientific
format, 0.3523e10, and another, 0.3523e-10, and a negative,
-313.1"
#pattern =
"(\\d)+|(-\\d)+|(\\d+\\.\\d+)|(-\\d+\\.\\d+)|(\\d+.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)|(-\\d+.\\d+e\\d+)|(-\\d+\\.\\d+e-\\d+)"
as.numeric(str_extract_all(tmpstr,pattern)[[1]])
Cheers!
Nick
PS: A function version:
# Extract numbers / get numbers / get all numbers from a
text string
getnums <- function(tmpstr)
{
# Example string
# tmpstr = "The first number is: 32. 342 342.1 -3234e-10
3234e-1 Another one is: 32.1. Here's a number in
scientific format, 0.3523e10, and another, 0.3523e-10, and a
negative, -313.1"
library(stringr)
# patternslist = NULL
# p=0
# patternslist[[(p=p+1)]] = "(\\d+)" # positive integer
# patternslist[[(p=p+1)]] = "(-\\d+)" # negative integer
# patternslist[[(p=p+1)]] = "(\\d+\\.\\d+)" # positive float
# patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e\\d+)" # positive
float, scientific w. positive power
# patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e-\\d+)" #
positive float, scientific w. negative power
# patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+)" # negative float
# patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e\\d+)" #
negative float, scientific w. positive power
# patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e-\\d+)"#
negative float, scientific w. negative power
#
# patternslist[[(p=p+1)]] = "(\\d+e\\d+)" # positive int,
scientific w. positive power
# patternslist[[(p=p+1)]] = "(\\d+e-\\d+)" # positive
int, scientific w. negative power
# patternslist[[(p=p+1)]] = "(-\\d+e\\d+)" # negative int,
scientific w. positive power
# patternslist[[(p=p+1)]] = "(-\\d+e-\\d+)" # negative
int, scientific w. negative power
#
# pattern = paste(patternslist, collapse="|", sep="")
# set up the pattern
pattern =
"(\\d+)|(-\\d+)|(\\d+\\.\\d+)|(\\d+\\.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)|(-\\d+\\.\\d+)|(-\\d+\\.\\d+e\\d+)|(-\\d+\\.\\d+e-\\d+)|(\\d+e\\d+)|(\\d+e-\\d+)|(-\\d+e\\d+)|(-\\d+e-\\d+)"
# Get the numbers
nums_from_tmpstr =
as.numeric(str_extract_all(tmpstr,pattern)[[1]])
# Return them
return(nums_from_tmpstr)
}
On 6/15/13 10:46 PM, arun wrote:
>
>
> HI,
> One way would be:
>
> library(stringr)
> tmpstr = "The first number is: 32. Another one is: 32.1.
> Here's a number in scientific format, 0.3523e10, and
> another, 0.3523e-10, and a negative, -313.1"
> pattern<- "(\\d)+|(\\d+\\.\\d+)|(-\\d+\\.\\d+)|(\\d+.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)"
> str_extract_all(tmpstr,pattern)[[1]]
> #[1] "32" "32.1" "0.3523e10" "0.3523e-10" "-313.1"
> as.numeric(str_extract_all(tmpstr,pattern)[[1]])
> A.K.
>
>
>
> ----- Original Message -----
> From: Nick Matzke <matzke at berkeley.edu>
> To: R-help at r-project.org
> Cc:
> Sent: Sunday, June 16, 2013 1:06 AM
> Subject: [R] extract all numbers from a string
>
> Hi all,
>
> I have been beating my head against this problem for a bit,
> but I can't figure it out.
>
> I have a series of strings of variable length, and each will
> have one or more numbers, of varying format. E.g., I might
> have:
>
>
> tmpstr = "The first number is: 32. Another one is: 32.1.
> Here's a number in scientific format, 0.3523e10, and
> another, 0.3523e-10, and a negative, -313.1"
>
> How could I get R to just give me a list of numerics
> containing the numbers therein?
>
> Thanks very much to the regexp wizards!
>
> Cheers,
> Nick
>
>
>
Professional Website of Dr. Zhuo Yao