10 Nonparametric Real Spend
The previous chapter, Chapter 9 , gave us the estimated value of the real spend CDF at 101 points. For the parametric fitting step, we need the probability mass function (PMF) value of the real spend age distribution at each of Monero’s 100 million spendable RingCT outputs. In this chapter we will also produce summary statistics of the estimated real spend age distribution.
10.1 Reverse the transformation of output age
In the previous chapter, Chapter 9 , we obtained an estimate of the real spend age distribution after it was transformed by the CDF of the decoy distribution in Chapter 8 . To get back to the un-transformed distribution, we need to compute , the inverse CDF (i.e. the quantile function) of the decoy distribution. Let be the estimated CDF of the transformed real spend distribution, evaluated at support points . The value of the estimated un-transformed real spend distribution at the th support point is defined in Equation 10.1 :
10.2 Interpolation of the CDF
We have evaluated the CDF at only 101 points. We want a smooth interpolation through all points, i.e. every transaction output. Hippel, Hunter, and Drown (2017) suggest fitting a cubic spline (piecewise polynomial) with monotonicity restrictions. We use the splinebins()
function in the binsmooth
package to accomplish the interpolation. Later, the PMF can be computed by taking the first difference of the CDF.
10.3 Computing summary statistics
Having produced the estimated real spend CDF , we can compute summary statistics. Quantiles, including the median, are computed easily by evaluating the inverse CDF at the desired quantile. Other statistics, such as mean, standard deviation, skewness, and kurtosis, can be computed by evaluating a Riemann–Stieltjes integral with the appropriate integrand.
10.4 Code
This code can be run in a new R session.
library(decoyanalysis)
library(data.table)
<- "weekly-weighted-cdf"
cdf.dir <- "results"
results.dir
<- paste0(results.dir, "/results-01/")
results.dir.run
<- NULL
exclusion.weeks
<- list()
nonparametric.real.spend
for (label in c("all", "rucknium", "rucknium_isthmus")) {
<- paste0(results.dir.run, "bjr/", label, "/")
results.dir.run.label
<- list.files(results.dir.run.label)
week.set <- week.set[grepl(".qs", week.set)]
week.set
<- list()
weekly.real.spend.cdfs
<- data.table(week = week.set, mean = 0, median = 0, sd = 0, skewness = 0, kurtosis = 0)
summary.stats
<- data.table(week = week.set, component_1 = 0, component_2 = 0, component_3 = 0, component_4 = 0)
mixing.proportions
for (week.to.analyze in week.set) {
<- qs::qread(paste0(results.dir.run.label, week.to.analyze))
bjr.results cat(week.to.analyze, round(100 * rev(sort(bjr.results$mixing.proportions)), 3), "\n")
# print(round(100 * rev(sort(bjr.results$mixing.proportions)), 3))
== week.to.analyze, component_1 := rev(sort(bjr.results$mixing.proportions))[1]]
mixing.proportions[week == week.to.analyze, component_2 := rev(sort(bjr.results$mixing.proportions))[2]]
mixing.proportions[week == week.to.analyze, component_3 := rev(sort(bjr.results$mixing.proportions))[3]]
mixing.proportions[week == week.to.analyze, component_4 := rev(sort(bjr.results$mixing.proportions))[4]]
mixing.proportions[week
}
<- 0
support.max <- c()
all.weeks.weighted.v.mean
for (week.to.analyze in setdiff(week.set, exclusion.weeks)) {
if (week.to.analyze %in% summary.stats[mean != 0, week]) { next }
cat(week.to.analyze, "\n")
<- qs::qread(paste0(cdf.dir, week.to.analyze))
weekly.weighted.cdf
<- weekly.weighted.cdf[[1]]$weighted.v.mean
weighted.v.mean <- weighted.v.mean
all.weeks.weighted.v.mean[week.to.analyze] <- weekly.weighted.cdf[[1]]$weekly.z.max
weekly.z.max <- max(c(support.max, weekly.z.max))
support.max <- weekly.weighted.cdf[[1]]$weekly.weighted.cdf
weekly.weighted.cdf
<- stepfun(seq_along(weekly.weighted.cdf), c(0, weekly.weighted.cdf))
intermediate.stepfun
<- Vectorize(function(x) { gbutils::cdf2quantile(x, cdf = intermediate.stepfun) })
weekly.weighted.inv.cdf
<- qs::qread(paste0(results.dir.run.label, week.to.analyze))
bjr.results
<- which.max(bjr.results$mixing.proportions)
wallet2.dist.index <- bjr.results$cdf$CDF[, wallet2.dist.index]
Fn.hat.value <- bjr.results$cdf$cdf.points
supp.points
<- punif
decoy <- decoy
Fb = 16
M <- 1/M
alpha
<- patra.sen.bjr(Fn.hat.value, supp.points, Fb, alpha)
patra.sen.bjr.results
<- patra.sen.bjr.results$Fs.hat
Fs.hat
<- vector("numeric", length(supp.points))
supp.points.transformed
for (i in seq_along(supp.points)) {
if (supp.points[i] < 0.000001) {
<- 0
supp.points.transformed[i] next
}if (supp.points[i] > max(weekly.weighted.cdf)) {
# Sometimes max(weekly.weighted.cdf) is not equal to 1
<- length(weekly.weighted.cdf)
supp.points.transformed[i] next
}<- weekly.weighted.inv.cdf(supp.points[i])
supp.points.transformed[i]
}
<- aggregate(Fs.hat, by = list(Fs.hat.transformed = supp.points.transformed), FUN = max)
regularized.real.spend.cdf
<- regularized.real.spend.cdf$Fs.hat.transformed
Fs.hat.transformed.reg <- regularized.real.spend.cdf$x
supp.points.reg # Regularized eliminates duplicate support points
<- binsmooth::splinebins(
weekly.real.spend.cdfs[[week.to.analyze]] bEdges = c(Fs.hat.transformed.reg, max(Fs.hat.transformed.reg) + 1),
bCounts = c(diff(c(0, supp.points.reg)), 0))$splineCDF
<- stepfun(c(Fs.hat.transformed.reg), c(0, supp.points.reg))
real.spend.ecdf
<- weighted.v.mean/(60^2*24)
days.unit <- spatstat.univar::stieltjes(function(x){x * days.unit}, real.spend.ecdf)[[1]]
est.mean <- spatstat.univar::stieltjes(function(x){(x * days.unit - est.mean)^2}, real.spend.ecdf)[[1]]
est.var
if ( ! week.to.analyze %in% summary.stats$week) {
<- rbind(summary.stats, data.table(week = week.to.analyze), fill = TRUE)
summary.stats
}
== week.to.analyze, mean := est.mean]
summary.stats[week == week.to.analyze, median := Fs.hat.transformed.reg[findInterval(0.5, supp.points.reg)] * days.unit ]
summary.stats[week == week.to.analyze, percentile.25 := Fs.hat.transformed.reg[findInterval(0.25, supp.points.reg)] * days.unit ]
summary.stats[week == week.to.analyze, sd := sqrt(est.var)]
summary.stats[week == week.to.analyze, skewness := spatstat.univar::stieltjes(function(x){(x * days.unit - est.mean)^3}, real.spend.ecdf)[[1]] / est.var^(3/2)]
summary.stats[week == week.to.analyze, kurtosis := spatstat.univar::stieltjes(function(x){(x * days.unit - est.mean)^4}, real.spend.ecdf)[[1]] / est.var^(2)]
summary.stats[week
}
setorder(summary.stats, week)
print(summary.stats)
<- list(weekly.real.spend.cdfs = weekly.real.spend.cdfs, all.weeks.weighted.v.mean = all.weeks.weighted.v.mean, support.max = support.max,
nonparametric.real.spend[[label]] mixing.proportions = mixing.proportions, summary.stats = summary.stats)
}
::qsave(nonparametric.real.spend, file = paste0(results.dir.run, "nonparametric-real-spend.qs")) qs