Skip to content

Commit e4807da

Browse files
committed
run through docstrings
1 parent 62915eb commit e4807da

4 files changed

Lines changed: 92 additions & 37 deletions

File tree

src/stats/fasttree.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ end
107107
Calculate a decision tree of `p` predictors variables and classes `1, 2, …, nclasses`.
108108
Nodes split when they reach `splitsize` observations until `maxsize` nodes are in the tree.
109109
Each variable is summarized by `stat`, which can be `FitNormal()` or `Hist(nbins)`.
110+
111+
# Example
112+
113+
x = randn(10^5, 10)
114+
y = rand([1,2], 10^5)
115+
116+
o = fit!(FastTree(10), (x,y))
117+
118+
xi = randn(10)
119+
classify(o, xi)
110120
"""
111121
struct FastTree{T<:FastNode} <: OnlineStat{XY}
112122
tree::Vector{T}
@@ -166,6 +176,13 @@ Calculate a random forest where each variable is summarized by `stat`.
166176
- `splitsize=5000`: Number of observations in any given node before splitting
167177
- `λ = .05`: Probability that each tree is updated on a new observation
168178
179+
# Example
180+
181+
x, y = randn(10^5, 10), rand(1:2, 10^5)
182+
183+
o = fit!(FastForest(10), (x,y))
184+
185+
classify(o, x[1,:])
169186
"""
170187
mutable struct FastForest{T<:FastTree} <: OnlineStat{XY}
171188
forest::Vector{T}

src/stats/hist.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,20 @@ make_alg(o::HistAlgorithm) = o
88
Hist(edges)
99
1010
Calculate a histogram over fixed `edges` or adaptive `nbins`.
11+
12+
# Example
13+
14+
using OnlineStats, Statistics
15+
y = randn(10^6)
16+
17+
o = fit!(Hist(20), y)
18+
quantile(o)
19+
mean(o)
20+
var(o)
21+
std(o)
22+
extrema(o)
23+
OnlineStats.pdf(o, 0.0)
24+
OnlineStats.cdf(o, 0.0)
1125
"""
1226
struct Hist{N, H <: HistAlgorithm{N}} <: OnlineStat{N}
1327
alg::H

src/stats/nbclassifier.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,20 @@
22
"""
33
NBClassifier(p::Int, T::Type; stat = Hist(15))
44
5-
Calculate a naive bayes classifier for classes of type `T` and `p` predictors.
5+
Calculate a naive bayes classifier for classes of type `T` and `p` predictors. For each
6+
class `K`, predictor variables are summarized by the `stat`.
7+
8+
# Example
9+
10+
x, y = randn(10^4, 10), rand(Bool, 10^4)
11+
12+
o = fit!(NBClassifier(10, Bool), (x,y))
13+
collect(keys(o))
14+
probs(o)
15+
16+
xi = randn(10)
17+
predict(o, xi)
18+
classify(o, xi)
619
"""
720
mutable struct NBClassifier{T, G<:Group} <: OnlineStat{XY}
821
d::OrderedDict{T, G}

src/stats/stats.jl

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#-----------------------------------------------------# StatCollection (Series and Group)
12
abstract type StatCollection{T} <: OnlineStat{T} end
23

34
function Base.show(io::IO, o::StatCollection)
@@ -20,7 +21,10 @@ Univariate variance.
2021
2122
# Example
2223
23-
@time fit!(Variance(), randn(10^6))
24+
o = fit!(Variance(), randn(10^6))
25+
mean(o)
26+
var(o)
27+
std(o)
2428
"""
2529
mutable struct Variance{W} <: OnlineStat{Number}
2630
σ2::Float64
@@ -58,7 +62,9 @@ Store the last `b` values for a data stream of type `T`. Values are stored as
5862
5963
# Example
6064
61-
fit!(Lag{Int}(10), 1:12)
65+
o = fit!(Lag{Int}(10), 1:12)
66+
o[1]
67+
o[end]
6268
"""
6369
mutable struct Lag{T} <: OnlineStat{T}
6470
circbuff::CircularBuffer{T}
@@ -70,6 +76,7 @@ value(o::Lag) = reverse(o.circbuff)
7076
_fit!(o::Lag, y) = (o.n += 1; push!(o.circbuff, y))
7177
Base.length(o::Lag) = length(o.circbuff)
7278
Base.getindex(o::Lag, i) = o.circbuff[end - i + 1]
79+
Base.lastindex(o::Lag) = length(o)
7380

7481
"""
7582
AutoCov(b, T = Float64; weight=EqualWeight())
@@ -139,7 +146,7 @@ any given replicate will be updated `rand(d)` times (default is double or nothin
139146
140147
o = Bootstrap(Variance())
141148
fit!(o, randn(1000))
142-
confint(o)
149+
confint(o, .95)
143150
"""
144151
struct Bootstrap{T, O <: OnlineStat{T}, D} <: OnlineStat{T}
145152
stat::O
@@ -181,7 +188,7 @@ Call `f(o)` every time the OnlineStat `o` gets updated.
181188
182189
# Example
183190
184-
o = CallFun(Mean(), info)
191+
o = CallFun(Mean(), println)
185192
fit!(o, [0,0,1,1])
186193
"""
187194
struct CallFun{T, O <: OnlineStat{T}, F <: Function} <: OnlineStat{T}
@@ -195,23 +202,6 @@ Base.show(io::IO, o::CallFun) = print(io, "CallFun: $(o.stat) |> $(o.f)")
195202
_fit!(o::CallFun, arg) = (_fit!(o.stat, arg); o.f(o.stat))
196203
_merge!(o::CallFun, o2::CallFun) = _merge!(o.stat, o2.stat)
197204

198-
# #-----------------------------------------------------------------------# Count
199-
# """
200-
# Count()
201-
202-
# The number of things observed.
203-
204-
# # Example
205-
206-
# fit!(Count(), 1:1000)
207-
# """
208-
# mutable struct Count <: OnlineStat{Nothing}
209-
# n::Int
210-
# Count() = new(0)
211-
# end
212-
# _fit!(o::Count, x) = (o.n += 1)
213-
# _merge!(o::Count, o2::Count) = (o.n += o2.n; o)
214-
215205
#-----------------------------------------------------------------------# CountMap
216206
"""
217207
CountMap(T::Type)
@@ -224,6 +214,9 @@ Track a dictionary that maps unique values to its number of occurrences. Simila
224214
225215
o = fit!(CountMap(Int), rand(1:10, 1000))
226216
value(o)
217+
probs(o)
218+
OnlineStats.pdf(o, 1)
219+
collect(keys(o))
227220
"""
228221
mutable struct CountMap{T, A <: AbstractDict{T, Int}} <: OnlineStat{T}
229222
value::A # OrderedDict by default
@@ -269,6 +262,9 @@ unknown, leave the default `p=0`.
269262
270263
o = fit!(CovMatrix(), randn(100, 4))
271264
cor(o)
265+
cov(o)
266+
mean(o)
267+
var(o)
272268
"""
273269
mutable struct CovMatrix{T,W} <: OnlineStat{VectorOb} where T<:Number
274270
value::Matrix{T}
@@ -384,7 +380,10 @@ Maximum and minimum.
384380
385381
# Example
386382
387-
fit!(Extrema(), rand(10^5))
383+
o = fit!(Extrema(), rand(10^5))
384+
extrema(o)
385+
maximum(o)
386+
minimum(o)
388387
"""
389388
mutable struct Extrema{T} <: OnlineStat{Number}
390389
min::T
@@ -418,8 +417,7 @@ fitted.
418417
419418
FTSeries(T, stats...; filter, transform)
420419
421-
If the transformed value has a different type than the original, provide an argument to
422-
the constructor to specify the type of an input observation.
420+
Create an FTSeries and specify the type `T` of the transformed values.
423421
424422
# Example
425423
@@ -472,7 +470,7 @@ end
472470
"""
473471
Group(stats::OnlineStat...)
474472
Group(; stats...)
475-
Group(tuple)
473+
Group(collection)
476474
477475
Create a vector-input stat from several scalar-input stats. For a new
478476
observation `y`, `y[i]` is sent to `stats[i]`.
@@ -484,7 +482,9 @@ observation `y`, `y[i]` is sent to `stats[i]`.
484482
fit!(Group(Mean(), Mean()), x)
485483
fit!(Group(Mean(), Variance()), x)
486484
487-
fit!(Group(m1 = Mean(), m2 = Mean()), x)
485+
o = fit!(Group(m1 = Mean(), m2 = Mean()), x)
486+
o.stats.m1
487+
o.stats.m2
488488
"""
489489
struct Group{T} <: StatCollection{VectorOb}
490490
stats::T
@@ -681,7 +681,7 @@ Approximate K-Means clustering of `k` clusters and `p` variables.
681681
682682
clusters = rand(Bool, 10^5)
683683
684-
x = [clusters[i] > .5 ? randn(): 5 + randn() for i in 1:10^5, j in 1:2]
684+
x = [clusters[i] > .5 ? randn() : 5 + randn() for i in 1:10^5, j in 1:2]
685685
686686
o = fit!(KMeans(2, 2), x)
687687
"""
@@ -750,6 +750,7 @@ First four non-central moments.
750750
o = fit!(Moments(), randn(1000))
751751
mean(o)
752752
var(o)
753+
std(o)
753754
skewness(o)
754755
kurtosis(o)
755756
"""
@@ -882,7 +883,8 @@ Average order statistics with batches of size `b`.
882883
883884
# Example
884885
885-
fit!(OrderStats(100), randn(10^5))
886+
o = fit!(OrderStats(100), randn(10^5))
887+
quantile(o, [.25, .5, .75])
886888
"""
887889
mutable struct OrderStats{T, W} <: OnlineStat{Number}
888890
value::Vector{T}
@@ -939,6 +941,7 @@ Track a dictionary that maps unique values to its probability. Similar to
939941
940942
o = ProbMap(Int)
941943
fit!(o, rand(1:10, 1000))
944+
probs(o)
942945
"""
943946
mutable struct ProbMap{T, A<:AbstractDict{T,Float64}, W} <: OnlineStat{T}
944947
value::A
@@ -1000,9 +1003,16 @@ function P2Quantile(τ::Real = 0.5)
10001003
nprime = [1, 1 + 2τ, 1 + 4τ, 3 + 2τ, 5]
10011004
P2Quantile(zeros(5), collect(1:5), nprime, τ, 0)
10021005
end
1003-
Base.show(io::IO, o::P2Quantile) = print(io, "P2Quantile($(o.τ), $(value(o)))")
1006+
Base.show(io::IO, o::P2Quantile) = print(io, "P2Quantile ($(o.τ)): n=$(nobs(o)) | value=$(value(o))")
10041007
value(o::P2Quantile) = o.q[3]
10051008
nobs(o::P2Quantile) = o.nobs
1009+
# function _merge!(a::P2Quantile, b::P2Quantile)
1010+
# a.τ == b.τ || error("Quantiles are not the same: $(a.τ) != $(b.τ)")
1011+
# a.nobs += b.nobs
1012+
# # q
1013+
# a.q[1] = min(a.q[1], b.q[1])
1014+
# a.q[5] = max(a.q[5], b.q[5])
1015+
# end
10061016
function _fit!(o::P2Quantile, y::Real)
10071017
o.nobs += 1
10081018
q = o.q
@@ -1061,10 +1071,11 @@ end
10611071

10621072
#-----------------------------------------------------------------------# Quantile
10631073
"""
1064-
Quantile(q = [.25, .5, .75]; alg=SGD(), rate=LearningRate(.6))
1074+
Quantile(q = [.25, .5, .75]; alg=OMAS(), rate=LearningRate(.6))
10651075
10661076
Calculate quantiles via a stochastic approximation algorithm `OMAS`, `SGD`, `ADAGRAD`, or
1067-
`MSPI`.
1077+
`MSPI`. For better (although slower) approximations, see [`P2Quantile`](@ref) and
1078+
[`Hist`](@ref).
10681079
10691080
# Example
10701081
@@ -1229,8 +1240,8 @@ _fit!(o::Sum{T}, x::Real) where {T<:AbstractFloat} = (o.sum += convert(T, x); o.
12291240
_fit!(o::Sum{T}, x::Real) where {T<:Integer} = (o.sum += round(T, x); o.n += 1)
12301241
_merge!(o::T, o2::T) where {T <: Sum} = (o.sum += o2.sum; o.n += o2.n; o)
12311242

1232-
#-----------------------------------------------------------------------# Summarizer
1233-
mutable struct Summarizer{T} <: OnlineStat{T}
1234-
group::Group
1235-
end
1236-
nobs(o::Summarizer) = nobs(o.group)
1243+
# #-----------------------------------------------------------------------# Summarizer
1244+
# mutable struct Summarizer{T} <: OnlineStat{T}
1245+
# group::Group
1246+
# end
1247+
# nobs(o::Summarizer) = nobs(o.group)

0 commit comments

Comments
 (0)