Código
pacman::p_load(codebook, skimr, rlang, here, tidyverse)
Código
base_madre <- readRDS("../../../../input/data/proc_data/base_madre.rds")
Código
libro_codigos <- base_madre %>%
  select(-starts_with("curso_"))

codebook(libro_codigos)

A.0.1 Metadata

A.0.1.1 Description

Dataset name: libro_codigos

The dataset has N=1983 rows and 19 columns. 1605 rows have no missing values on any column.

Metadata for search engines
  • Date published: 2025-12-29
x
RUT
promedio_bruto
ingreso
sexo
NACIONALIDAD
ORIGEN_ETNICO
COMUNA_ORIGEN
SIT_ACADEMICA_CARRERA
carrera
cohorte
REGION_ORIGEN
colegio
EGRESO_EMEDIA
PROMEDIO_EM
nse
pct_paes_psu
decil_paes_psu
promedio_calculado
mean_dif

A.1 Variables

A.1.1 RUT

A.1.1.1 Distribution

Distribution of values for RUT

1 missing values.

A.1.1.2 Summary statistics

name data_type n_missing complete_rate min median max mean sd hist label
RUT numeric 1 0.9994957 8578025 2.1e+07 2.8e+07 21189279 1112119 ▁▁▁▇▁ NA

A.1.2 promedio_bruto

A.1.2.1 Distribution

Distribution of values for promedio_bruto

0 missing values.

A.1.2.2 Summary statistics

name data_type n_missing complete_rate min median max mean sd hist label
promedio_bruto numeric 0 1 2.7 6.5 7 6.403999 0.3343942 ▁▁▁▂▇ NA

A.1.3 ingreso

A.1.3.1 Distribution

Distribution of values for ingreso

0 missing values.

A.1.3.2 Summary statistics

name data_type n_missing complete_rate n_unique empty min max whitespace label
ingreso character 0 1 12 0 3 31 0 NA

A.1.4 sexo

A.1.4.1 Distribution

Distribution of values for sexo

0 missing values.

A.1.4.2 Summary statistics

name data_type ordered value_labels n_missing complete_rate n_unique top_counts label
sexo factor FALSE 1. 0,
2. 1
0 1 2 1: 1335, 0: 648 NA

A.1.5 NACIONALIDAD

A.1.5.1 Distribution

Distribution of values for NACIONALIDAD

0 missing values.

A.1.5.2 Summary statistics

name data_type n_missing complete_rate n_unique empty min max whitespace label
NACIONALIDAD character 0 1 21 0 7 28 0 NA

A.1.6 ORIGEN_ETNICO

A.1.6.1 Distribution

Distribution of values for ORIGEN_ETNICO

286 missing values.

A.1.6.2 Summary statistics

name data_type n_missing complete_rate n_unique empty min max whitespace label
ORIGEN_ETNICO character 286 0.8557741 10 0 5 24 0 NA

A.1.7 COMUNA_ORIGEN

A.1.7.1 Distribution

Distribution of values for COMUNA_ORIGEN

0 missing values.

A.1.7.2 Summary statistics

name data_type n_missing complete_rate n_unique empty min max whitespace label
COMUNA_ORIGEN character 0 1 147 0 3 20 0 NA

A.1.8 SIT_ACADEMICA_CARRERA

A.1.8.1 Distribution

Distribution of values for SIT_ACADEMICA_CARRERA

0 missing values.

A.1.8.2 Summary statistics

name data_type n_missing complete_rate n_unique empty min max whitespace label
SIT_ACADEMICA_CARRERA character 0 1 5 0 11 26 0 NA

A.1.9 carrera

A.1.9.1 Distribution

Distribution of values for carrera

0 missing values.

A.1.9.2 Summary statistics

name data_type n_missing complete_rate n_unique empty min max whitespace label
carrera character 0 1 5 0 10 33 0 NA

A.1.10 cohorte

A.1.10.1 Distribution

Distribution of values for cohorte

1 missing values.

A.1.10.2 Summary statistics

name data_type ordered value_labels n_missing complete_rate n_unique top_counts label
cohorte factor FALSE 1. 2020,
2. 2021,
3. 2022,
4. 2023,
5. 2024,
6. 2025
1 0.9994957 6 202: 452, 202: 421, 202: 385, 202: 362 NA

A.1.11 REGION_ORIGEN

A.1.11.1 Distribution

Distribution of values for REGION_ORIGEN

14 missing values.

A.1.11.2 Summary statistics

name data_type n_missing complete_rate n_unique empty min max whitespace label
REGION_ORIGEN character 14 0.99294 22 0 15 30 0 NA

A.1.12 colegio

A.1.12.1 Distribution

Distribution of values for colegio

0 missing values.

A.1.12.2 Summary statistics

name data_type n_missing complete_rate n_unique empty min max whitespace label
colegio character 0 1 4 0 5 24 0 NA

A.1.13 EGRESO_EMEDIA

A.1.13.1 Distribution

Distribution of values for EGRESO_EMEDIA

22 missing values.

A.1.13.2 Summary statistics

name data_type n_missing complete_rate min median max mean sd hist label
EGRESO_EMEDIA numeric 22 0.9889057 1986 2020 2023 2020.212 2.545078 ▁▁▁▁▇ NA

A.1.14 PROMEDIO_EM

A.1.14.1 Distribution

Distribution of values for PROMEDIO_EM

0 missing values.

A.1.14.2 Summary statistics

name data_type n_missing complete_rate min median max mean sd hist label
PROMEDIO_EM numeric 0 1 0 6.3 7 5.121377 2.591191 ▂▁▁▁▇ NA

A.1.15 nse

A.1.15.1 Distribution

Distribution of values for nse

0 missing values.

A.1.15.2 Summary statistics

name data_type ordered value_labels n_missing complete_rate n_unique top_counts label
nse factor FALSE 1. 0,
2. 1,
3. 2,
4. 3,
5. 4,
6. 5,
7. 6,
8. 7,
9. 8
0 1 9 5: 1034, 4: 313, 7: 305, 3: 120 NA

A.1.16 pct_paes_psu

A.1.16.1 Distribution

Distribution of values for pct_paes_psu

95 missing values.

A.1.16.2 Summary statistics

name data_type n_missing complete_rate min median max mean sd hist label
pct_paes_psu numeric 95 0.9520928 0.12 50 100 50.09019 28.91379 ▇▇▇▇▇ NA

A.1.17 decil_paes_psu

A.1.17.1 Distribution

Distribution of values for decil_paes_psu

95 missing values.

A.1.17.2 Summary statistics

name data_type n_missing complete_rate min median max mean sd hist label
decil_paes_psu numeric 95 0.9520928 1 5 10 5.481992 2.874646 ▇▇▇▇▇ NA

A.1.18 promedio_calculado

A.1.18.1 Distribution

Distribution of values for promedio_calculado

0 missing values.

A.1.18.2 Summary statistics

name data_type n_missing complete_rate min median max mean sd hist label
promedio_calculado numeric 0 1 1.5 6 6.8 5.924136 0.5872285 ▁▁▁▂▇ NA

A.1.19 mean_dif

A.1.19.1 Distribution

Distribution of values for mean_dif

0 missing values.

A.1.19.2 Summary statistics

name data_type n_missing complete_rate min median max mean sd hist label
mean_dif numeric 0 1 -1.9 0.41 4.8 0.4798627 0.5973724 ▁▇▂▁▁ NA

A.2 Missingness report

A.3 Codebook table

JSON-LD metadata

The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.

{
  "name": "libro_codigos",
  "datePublished": "2025-12-29",
  "description": "The dataset has N=1983 rows and 19 columns.\n1605 rows have no missing values on any column.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n|name                  |label | n_missing|\n|:---------------------|:-----|---------:|\n|RUT                   |NA    |         1|\n|promedio_bruto        |NA    |         0|\n|ingreso               |NA    |         0|\n|sexo                  |NA    |         0|\n|NACIONALIDAD          |NA    |         0|\n|ORIGEN_ETNICO         |NA    |       286|\n|COMUNA_ORIGEN         |NA    |         0|\n|SIT_ACADEMICA_CARRERA |NA    |         0|\n|carrera               |NA    |         0|\n|cohorte               |NA    |         1|\n|REGION_ORIGEN         |NA    |        14|\n|colegio               |NA    |         0|\n|EGRESO_EMEDIA         |NA    |        22|\n|PROMEDIO_EM           |NA    |         0|\n|nse                   |NA    |         0|\n|pct_paes_psu          |NA    |        95|\n|decil_paes_psu        |NA    |        95|\n|promedio_calculado    |NA    |         0|\n|mean_dif              |NA    |         0|\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.5).",
  "keywords": ["RUT", "promedio_bruto", "ingreso", "sexo", "NACIONALIDAD", "ORIGEN_ETNICO", "COMUNA_ORIGEN", "SIT_ACADEMICA_CARRERA", "carrera", "cohorte", "REGION_ORIGEN", "colegio", "EGRESO_EMEDIA", "PROMEDIO_EM", "nse", "pct_paes_psu", "decil_paes_psu", "promedio_calculado", "mean_dif"],
  "@context": "https://schema.org/",
  "@type": "Dataset",
  "variableMeasured": [
    {
      "name": "RUT",
      "@type": "propertyValue"
    },
    {
      "name": "promedio_bruto",
      "@type": "propertyValue"
    },
    {
      "name": "ingreso",
      "@type": "propertyValue"
    },
    {
      "name": "sexo",
      "value": "1. 0,\n2. 1",
      "@type": "propertyValue"
    },
    {
      "name": "NACIONALIDAD",
      "@type": "propertyValue"
    },
    {
      "name": "ORIGEN_ETNICO",
      "@type": "propertyValue"
    },
    {
      "name": "COMUNA_ORIGEN",
      "@type": "propertyValue"
    },
    {
      "name": "SIT_ACADEMICA_CARRERA",
      "@type": "propertyValue"
    },
    {
      "name": "carrera",
      "@type": "propertyValue"
    },
    {
      "name": "cohorte",
      "value": "1. 2020,\n2. 2021,\n3. 2022,\n4. 2023,\n5. 2024,\n6. 2025",
      "@type": "propertyValue"
    },
    {
      "name": "REGION_ORIGEN",
      "@type": "propertyValue"
    },
    {
      "name": "colegio",
      "@type": "propertyValue"
    },
    {
      "name": "EGRESO_EMEDIA",
      "@type": "propertyValue"
    },
    {
      "name": "PROMEDIO_EM",
      "@type": "propertyValue"
    },
    {
      "name": "nse",
      "value": "1. 0,\n2. 1,\n3. 2,\n4. 3,\n5. 4,\n6. 5,\n7. 6,\n8. 7,\n9. 8",
      "@type": "propertyValue"
    },
    {
      "name": "pct_paes_psu",
      "@type": "propertyValue"
    },
    {
      "name": "decil_paes_psu",
      "@type": "propertyValue"
    },
    {
      "name": "promedio_calculado",
      "@type": "propertyValue"
    },
    {
      "name": "mean_dif",
      "@type": "propertyValue"
    }
  ]
}`