feat(ds): r2

This commit is contained in:
2025-12-02 00:46:02 +03:00
parent b97a160ac1
commit 07a6cf6184
4 changed files with 32106 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata

49
ds/25-1/r2/2.R Normal file
View File

@ -0,0 +1,49 @@
df = read.csv("./zipIncome.txt", sep = "|")
colnames(df) <- c("zipCode", "income")
summary(df)
mean(df$income)
if (any(is.na(df[,2]))) {
na = which(is.na(df[,2]))
df = df[-na,]
}
mean(df$income)
median(df$income, na.rm=TRUE)
plot(x=df$income, y=df$zipCode, xlab="income", ylab="zipCode")
df$incomelog = log10(df$income)
hist(df$incomelog, breaks=80)
print(min_incomelog <- log10(7e3))
print(max_incomelog <- log10(2e5))
print(avg_incomelog <- median(df$incomelog))
df <- subset(df, 7e3 < df$income & df$income < 2e5)
hist(df$incomelog, breaks=80)
summary(df)
boxplot(incomelog ~ zipCode, data=df, main="boxplot", xlab="zipCode", ylab="incomelog")
library(ggplot2)
ggplot(df, aes(x=zipCode, y=income, color=zipCode)) +
geom_point(
position = position_jitter(width = 0.2),
alpha = 0.2,
) +
geom_boxplot(
alpha = 0.5,
outlier.shape = NA,
width = 0.6,
fill = "white",
color = "black"
) +
scale_y_log10(
breaks = c(1e4, 25e3, 5e4, 1e5, 2e5, 5e5)
) +
labs(
title = "Распределение доходов по почтовым индексам",
subtitle = "Scatter plot jitter",
) +
theme_minimal()

13
ds/25-1/r2/r2.Rproj Normal file
View File

@ -0,0 +1,13 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX

32040
ds/25-1/r2/zipIncome.txt Normal file

File diff suppressed because it is too large Load Diff