Hi Paul, colleagues,
I have translated Paul’s recipe into a small bash script. As expected the bash script has a bad performance and is not suitable for a large scale / high-resolution analysis. Anyhow, maybe this workaround is useful for somebody else. I think slicing large domains into smaller tiles by a concurrent reduction of data points should increase the speed and may allow a simple form of parallelization. Any further ideas to improve the performance?
Here is the bash script
#Recipe of Paul
#-----------------------------------------------------------------------------------------------------
#Decide on your grid specifications and make a dummy empty grid with grdmath
#Dump the grid with grd2xyz to ascii
#Write a script that loops over the records in this list of nodes
#For each record, dump it to a point.txt file
#Run gmtselect -Cpoint.txt +dradius on the whole dataset, then count the number of records found
#Add the node location and the count to a growing output table
#Run xyz2grd on that table to make the final grid
function NumberOfDataPoints()
{
grdmath $reg $res -r 0 = dummy.grd
grd2xyz dummy.grd | awk ‘{print $1,$2}’ > Nodes
echo lon lat NoP > Density.txt
while read line; do
echo “$line” > point.txt
NumberOfPeaks=$(gmtselect peaks -Cpoint.txt+d"$radius"km -fg | wc -l)
echo “$line” $NumberOfPeaks >> Density.txt
echo “$line” $NumberOfPeaks
done < Nodes
xyz2grd Density.txt $reg $res -r -h1 -GDensity.grd
#clean up
rm dummy.grd Nodes point.txt
}
#Define Region of interest
W=0
E=20
S=40
N=50
reg=-R"$W"/"$E"/"$S"/"$N"
#Resolution of the resulting grid
res=-I30m
#Name of the input dataset
data=peaks
#Radius of the moving Window in [km]
radius=100
#Generate Subset of Input Data: Region of Interest + Buffer larger the Radius of gmtselect
#Buffer in degrees
Buffer=1
Win=$(echo “$W - $Buffer” | bc -l)
Ein=$(echo “$E + $Buffer” | bc -l)
Sin=$(echo “$S - $Buffer” | bc -l)
Nin=$(echo “$N + $Buffer” | bc -l)
regInputData=-R"$Win"/"$Ein"/"$Sin"/"$Nin"
#Preprocess the point dataset --> Col 1 : long; Col 2 : lat
cat Peaks.P500.Sorted | awk ‘BEGIN{FS=","}{print $5,$4,$6,$7}’ | gmtselect $regInputData > $data
#Call function to count data within the defined radius
NumberOfDataPoints