feat(ds): r2
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
.Rproj.user
|
||||||
|
.Rhistory
|
||||||
|
.RData
|
||||||
|
.Ruserdata
|
||||||
49
ds/25-1/r2/2.R
Normal file
49
ds/25-1/r2/2.R
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
df = read.csv("./zipIncome.txt", sep = "|")
|
||||||
|
colnames(df) <- c("zipCode", "income")
|
||||||
|
|
||||||
|
summary(df)
|
||||||
|
mean(df$income)
|
||||||
|
if (any(is.na(df[,2]))) {
|
||||||
|
na = which(is.na(df[,2]))
|
||||||
|
df = df[-na,]
|
||||||
|
}
|
||||||
|
|
||||||
|
mean(df$income)
|
||||||
|
median(df$income, na.rm=TRUE)
|
||||||
|
|
||||||
|
plot(x=df$income, y=df$zipCode, xlab="income", ylab="zipCode")
|
||||||
|
df$incomelog = log10(df$income)
|
||||||
|
hist(df$incomelog, breaks=80)
|
||||||
|
print(min_incomelog <- log10(7e3))
|
||||||
|
print(max_incomelog <- log10(2e5))
|
||||||
|
print(avg_incomelog <- median(df$incomelog))
|
||||||
|
|
||||||
|
df <- subset(df, 7e3 < df$income & df$income < 2e5)
|
||||||
|
hist(df$incomelog, breaks=80)
|
||||||
|
summary(df)
|
||||||
|
|
||||||
|
boxplot(incomelog ~ zipCode, data=df, main="boxplot", xlab="zipCode", ylab="incomelog")
|
||||||
|
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
ggplot(df, aes(x=zipCode, y=income, color=zipCode)) +
|
||||||
|
geom_point(
|
||||||
|
position = position_jitter(width = 0.2),
|
||||||
|
alpha = 0.2,
|
||||||
|
) +
|
||||||
|
geom_boxplot(
|
||||||
|
alpha = 0.5,
|
||||||
|
outlier.shape = NA,
|
||||||
|
width = 0.6,
|
||||||
|
fill = "white",
|
||||||
|
color = "black"
|
||||||
|
) +
|
||||||
|
scale_y_log10(
|
||||||
|
breaks = c(1e4, 25e3, 5e4, 1e5, 2e5, 5e5)
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
title = "Распределение доходов по почтовым индексам",
|
||||||
|
subtitle = "Scatter plot jitter",
|
||||||
|
) +
|
||||||
|
theme_minimal()
|
||||||
|
|
||||||
13
ds/25-1/r2/r2.Rproj
Normal file
13
ds/25-1/r2/r2.Rproj
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
Version: 1.0
|
||||||
|
|
||||||
|
RestoreWorkspace: Default
|
||||||
|
SaveWorkspace: Default
|
||||||
|
AlwaysSaveHistory: Default
|
||||||
|
|
||||||
|
EnableCodeIndexing: Yes
|
||||||
|
UseSpacesForTab: Yes
|
||||||
|
NumSpacesForTab: 2
|
||||||
|
Encoding: UTF-8
|
||||||
|
|
||||||
|
RnwWeave: Sweave
|
||||||
|
LaTeX: pdfLaTeX
|
||||||
32040
ds/25-1/r2/zipIncome.txt
Normal file
32040
ds/25-1/r2/zipIncome.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user