diff --git a/scripts/world_bank/wdi/README.md b/scripts/world_bank/wdi/README.md index ef1f0f5dc6..239d0f7277 100644 --- a/scripts/world_bank/wdi/README.md +++ b/scripts/world_bank/wdi/README.md @@ -146,5 +146,24 @@ If you want to perform "only download", run the below command: python3 worldbank.py --mode=download ``` +### Added golden files and increased the threshold with golden checks in validation_config.json. + +The `GOLDENS_CHECK` validator confirms that the import includes a specific set of expected records. This is useful for verifying that critical StatVars, Places, or specific metadata combinations are consistently present in the output. + +The validator compares the input data (usually from the stats data source) against one or more "golden" files (MCF or CSV). + +If any combination of values in a golden file row is missing from the input, the validation fails. The missing golden rows are then listed in the validation report JSON. + +If you want to get goldens, run the below command: +```bash +#goldens from output csv +python3 validator_goldens.py --validate_goldens_input=../../scripts/world_bank/wdi/output/WorldBank.csv --generate_goldens=golden_data/golden_observations.csv --goldens_must_include="ISO3166Alpha3:gs://unresolved_mcf/import_validation/top_100k_places.csv" --generate_goldens_property_sets="ISO3166Alpha3" +``` + +#goldens from summary reports +```bash +python3 validator_goldens.py --validate_goldens_input="summary_report.csv" --generate_goldens=golden_data/golden_summary_report.csv --generate_goldens_property_sets="StatVar|Units|MinDate|MeasurementMethods|observationPeriod" +``` + We highly recommend the use of the import validation tool for this import which you can find in https://github.com/datacommonsorg/tools/tree/master/import-validation-helper. diff --git a/scripts/world_bank/wdi/golden_data/golden_WorldBank.csv b/scripts/world_bank/wdi/golden_data/golden_WorldBank.csv new file mode 100644 index 0000000000..567688bb57 --- /dev/null +++ b/scripts/world_bank/wdi/golden_data/golden_WorldBank.csv @@ -0,0 +1,219 @@ +"ISO3166Alpha3","value" +"","{'ISO3166Alpha3': 'dcid:Earth'}" +"","{'ISO3166Alpha3': 'dcid:country/ABW'}" +"","{'ISO3166Alpha3': 'dcid:country/AFG'}" +"","{'ISO3166Alpha3': 'dcid:country/AGO'}" +"","{'ISO3166Alpha3': 'dcid:country/ALB'}" +"","{'ISO3166Alpha3': 'dcid:country/AND'}" +"","{'ISO3166Alpha3': 'dcid:country/ARE'}" +"","{'ISO3166Alpha3': 'dcid:country/ARG'}" +"","{'ISO3166Alpha3': 'dcid:country/ARM'}" +"","{'ISO3166Alpha3': 'dcid:country/ATG'}" +"","{'ISO3166Alpha3': 'dcid:country/AUS'}" +"","{'ISO3166Alpha3': 'dcid:country/AUT'}" +"","{'ISO3166Alpha3': 'dcid:country/AZE'}" +"","{'ISO3166Alpha3': 'dcid:country/BDI'}" +"","{'ISO3166Alpha3': 'dcid:country/BEL'}" +"","{'ISO3166Alpha3': 'dcid:country/BEN'}" +"","{'ISO3166Alpha3': 'dcid:country/BFA'}" +"","{'ISO3166Alpha3': 'dcid:country/BGD'}" +"","{'ISO3166Alpha3': 'dcid:country/BGR'}" +"","{'ISO3166Alpha3': 'dcid:country/BHR'}" +"","{'ISO3166Alpha3': 'dcid:country/BHS'}" +"","{'ISO3166Alpha3': 'dcid:country/BIH'}" +"","{'ISO3166Alpha3': 'dcid:country/BLR'}" +"","{'ISO3166Alpha3': 'dcid:country/BLZ'}" +"","{'ISO3166Alpha3': 'dcid:country/BMU'}" +"","{'ISO3166Alpha3': 'dcid:country/BOL'}" +"","{'ISO3166Alpha3': 'dcid:country/BRA'}" +"","{'ISO3166Alpha3': 'dcid:country/BRB'}" +"","{'ISO3166Alpha3': 'dcid:country/BRN'}" +"","{'ISO3166Alpha3': 'dcid:country/BTN'}" +"","{'ISO3166Alpha3': 'dcid:country/BWA'}" +"","{'ISO3166Alpha3': 'dcid:country/CAF'}" +"","{'ISO3166Alpha3': 'dcid:country/CAN'}" +"","{'ISO3166Alpha3': 'dcid:country/CHE'}" +"","{'ISO3166Alpha3': 'dcid:country/CHL'}" +"","{'ISO3166Alpha3': 'dcid:country/CHN'}" +"","{'ISO3166Alpha3': 'dcid:country/CIV'}" +"","{'ISO3166Alpha3': 'dcid:country/CMR'}" +"","{'ISO3166Alpha3': 'dcid:country/COD'}" +"","{'ISO3166Alpha3': 'dcid:country/COG'}" +"","{'ISO3166Alpha3': 'dcid:country/COL'}" +"","{'ISO3166Alpha3': 'dcid:country/COM'}" +"","{'ISO3166Alpha3': 'dcid:country/CPV'}" +"","{'ISO3166Alpha3': 'dcid:country/CRI'}" +"","{'ISO3166Alpha3': 'dcid:country/CUW'}" +"","{'ISO3166Alpha3': 'dcid:country/CYM'}" +"","{'ISO3166Alpha3': 'dcid:country/CYP'}" +"","{'ISO3166Alpha3': 'dcid:country/CZE'}" +"","{'ISO3166Alpha3': 'dcid:country/DEU'}" +"","{'ISO3166Alpha3': 'dcid:country/DJI'}" +"","{'ISO3166Alpha3': 'dcid:country/DMA'}" +"","{'ISO3166Alpha3': 'dcid:country/DNK'}" +"","{'ISO3166Alpha3': 'dcid:country/DOM'}" +"","{'ISO3166Alpha3': 'dcid:country/DZA'}" +"","{'ISO3166Alpha3': 'dcid:country/ECU'}" +"","{'ISO3166Alpha3': 'dcid:country/EGY'}" +"","{'ISO3166Alpha3': 'dcid:country/ERI'}" +"","{'ISO3166Alpha3': 'dcid:country/ESP'}" +"","{'ISO3166Alpha3': 'dcid:country/EST'}" +"","{'ISO3166Alpha3': 'dcid:country/ETH'}" +"","{'ISO3166Alpha3': 'dcid:country/FIN'}" +"","{'ISO3166Alpha3': 'dcid:country/FJI'}" +"","{'ISO3166Alpha3': 'dcid:country/FRA'}" +"","{'ISO3166Alpha3': 'dcid:country/FRO'}" +"","{'ISO3166Alpha3': 'dcid:country/FSM'}" +"","{'ISO3166Alpha3': 'dcid:country/GAB'}" +"","{'ISO3166Alpha3': 'dcid:country/GBR'}" +"","{'ISO3166Alpha3': 'dcid:country/GEO'}" +"","{'ISO3166Alpha3': 'dcid:country/GHA'}" +"","{'ISO3166Alpha3': 'dcid:country/GIN'}" +"","{'ISO3166Alpha3': 'dcid:country/GMB'}" +"","{'ISO3166Alpha3': 'dcid:country/GNB'}" +"","{'ISO3166Alpha3': 'dcid:country/GNQ'}" +"","{'ISO3166Alpha3': 'dcid:country/GRC'}" +"","{'ISO3166Alpha3': 'dcid:country/GRD'}" +"","{'ISO3166Alpha3': 'dcid:country/GTM'}" +"","{'ISO3166Alpha3': 'dcid:country/GUY'}" +"","{'ISO3166Alpha3': 'dcid:country/HKG'}" +"","{'ISO3166Alpha3': 'dcid:country/HND'}" +"","{'ISO3166Alpha3': 'dcid:country/HRV'}" +"","{'ISO3166Alpha3': 'dcid:country/HTI'}" +"","{'ISO3166Alpha3': 'dcid:country/HUN'}" +"","{'ISO3166Alpha3': 'dcid:country/IDN'}" +"","{'ISO3166Alpha3': 'dcid:country/IND'}" +"","{'ISO3166Alpha3': 'dcid:country/IRL'}" +"","{'ISO3166Alpha3': 'dcid:country/IRN'}" +"","{'ISO3166Alpha3': 'dcid:country/IRQ'}" +"","{'ISO3166Alpha3': 'dcid:country/ISL'}" +"","{'ISO3166Alpha3': 'dcid:country/ISR'}" +"","{'ISO3166Alpha3': 'dcid:country/ITA'}" +"","{'ISO3166Alpha3': 'dcid:country/JAM'}" +"","{'ISO3166Alpha3': 'dcid:country/JOR'}" +"","{'ISO3166Alpha3': 'dcid:country/JPN'}" +"","{'ISO3166Alpha3': 'dcid:country/KAZ'}" +"","{'ISO3166Alpha3': 'dcid:country/KEN'}" +"","{'ISO3166Alpha3': 'dcid:country/KGZ'}" +"","{'ISO3166Alpha3': 'dcid:country/KHM'}" +"","{'ISO3166Alpha3': 'dcid:country/KIR'}" +"","{'ISO3166Alpha3': 'dcid:country/KNA'}" +"","{'ISO3166Alpha3': 'dcid:country/KOR'}" +"","{'ISO3166Alpha3': 'dcid:country/KWT'}" +"","{'ISO3166Alpha3': 'dcid:country/LAO'}" +"","{'ISO3166Alpha3': 'dcid:country/LBN'}" +"","{'ISO3166Alpha3': 'dcid:country/LBR'}" +"","{'ISO3166Alpha3': 'dcid:country/LBY'}" +"","{'ISO3166Alpha3': 'dcid:country/LCA'}" +"","{'ISO3166Alpha3': 'dcid:country/LKA'}" +"","{'ISO3166Alpha3': 'dcid:country/LSO'}" +"","{'ISO3166Alpha3': 'dcid:country/LTU'}" +"","{'ISO3166Alpha3': 'dcid:country/LUX'}" +"","{'ISO3166Alpha3': 'dcid:country/LVA'}" +"","{'ISO3166Alpha3': 'dcid:country/MAC'}" +"","{'ISO3166Alpha3': 'dcid:country/MAR'}" +"","{'ISO3166Alpha3': 'dcid:country/MDA'}" +"","{'ISO3166Alpha3': 'dcid:country/MDG'}" +"","{'ISO3166Alpha3': 'dcid:country/MDV'}" +"","{'ISO3166Alpha3': 'dcid:country/MEX'}" +"","{'ISO3166Alpha3': 'dcid:country/MHL'}" +"","{'ISO3166Alpha3': 'dcid:country/MKD'}" +"","{'ISO3166Alpha3': 'dcid:country/MLI'}" +"","{'ISO3166Alpha3': 'dcid:country/MLT'}" +"","{'ISO3166Alpha3': 'dcid:country/MMR'}" +"","{'ISO3166Alpha3': 'dcid:country/MNE'}" +"","{'ISO3166Alpha3': 'dcid:country/MNG'}" +"","{'ISO3166Alpha3': 'dcid:country/MOZ'}" +"","{'ISO3166Alpha3': 'dcid:country/MRT'}" +"","{'ISO3166Alpha3': 'dcid:country/MUS'}" +"","{'ISO3166Alpha3': 'dcid:country/MWI'}" +"","{'ISO3166Alpha3': 'dcid:country/MYS'}" +"","{'ISO3166Alpha3': 'dcid:country/NAM'}" +"","{'ISO3166Alpha3': 'dcid:country/NCL'}" +"","{'ISO3166Alpha3': 'dcid:country/NER'}" +"","{'ISO3166Alpha3': 'dcid:country/NGA'}" +"","{'ISO3166Alpha3': 'dcid:country/NIC'}" +"","{'ISO3166Alpha3': 'dcid:country/NLD'}" +"","{'ISO3166Alpha3': 'dcid:country/NOR'}" +"","{'ISO3166Alpha3': 'dcid:country/NPL'}" +"","{'ISO3166Alpha3': 'dcid:country/NRU'}" +"","{'ISO3166Alpha3': 'dcid:country/NZL'}" +"","{'ISO3166Alpha3': 'dcid:country/OMN'}" +"","{'ISO3166Alpha3': 'dcid:country/PAK'}" +"","{'ISO3166Alpha3': 'dcid:country/PAN'}" +"","{'ISO3166Alpha3': 'dcid:country/PER'}" +"","{'ISO3166Alpha3': 'dcid:country/PHL'}" +"","{'ISO3166Alpha3': 'dcid:country/PLW'}" +"","{'ISO3166Alpha3': 'dcid:country/PNG'}" +"","{'ISO3166Alpha3': 'dcid:country/POL'}" +"","{'ISO3166Alpha3': 'dcid:country/PRT'}" +"","{'ISO3166Alpha3': 'dcid:country/PRY'}" +"","{'ISO3166Alpha3': 'dcid:country/PSE'}" +"","{'ISO3166Alpha3': 'dcid:country/PYF'}" +"","{'ISO3166Alpha3': 'dcid:country/QAT'}" +"","{'ISO3166Alpha3': 'dcid:country/ROU'}" +"","{'ISO3166Alpha3': 'dcid:country/RUS'}" +"","{'ISO3166Alpha3': 'dcid:country/RWA'}" +"","{'ISO3166Alpha3': 'dcid:country/SAU'}" +"","{'ISO3166Alpha3': 'dcid:country/SDN'}" +"","{'ISO3166Alpha3': 'dcid:country/SEN'}" +"","{'ISO3166Alpha3': 'dcid:country/SGP'}" +"","{'ISO3166Alpha3': 'dcid:country/SLB'}" +"","{'ISO3166Alpha3': 'dcid:country/SLE'}" +"","{'ISO3166Alpha3': 'dcid:country/SLV'}" +"","{'ISO3166Alpha3': 'dcid:country/SMR'}" +"","{'ISO3166Alpha3': 'dcid:country/SOM'}" +"","{'ISO3166Alpha3': 'dcid:country/SRB'}" +"","{'ISO3166Alpha3': 'dcid:country/SSD'}" +"","{'ISO3166Alpha3': 'dcid:country/STP'}" +"","{'ISO3166Alpha3': 'dcid:country/SUR'}" +"","{'ISO3166Alpha3': 'dcid:country/SVK'}" +"","{'ISO3166Alpha3': 'dcid:country/SVN'}" +"","{'ISO3166Alpha3': 'dcid:country/SWE'}" +"","{'ISO3166Alpha3': 'dcid:country/SWZ'}" +"","{'ISO3166Alpha3': 'dcid:country/SXM'}" +"","{'ISO3166Alpha3': 'dcid:country/SYC'}" +"","{'ISO3166Alpha3': 'dcid:country/SYR'}" +"","{'ISO3166Alpha3': 'dcid:country/TCA'}" +"","{'ISO3166Alpha3': 'dcid:country/TCD'}" +"","{'ISO3166Alpha3': 'dcid:country/TGO'}" +"","{'ISO3166Alpha3': 'dcid:country/THA'}" +"","{'ISO3166Alpha3': 'dcid:country/TJK'}" +"","{'ISO3166Alpha3': 'dcid:country/TKM'}" +"","{'ISO3166Alpha3': 'dcid:country/TLS'}" +"","{'ISO3166Alpha3': 'dcid:country/TON'}" +"","{'ISO3166Alpha3': 'dcid:country/TTO'}" +"","{'ISO3166Alpha3': 'dcid:country/TUN'}" +"","{'ISO3166Alpha3': 'dcid:country/TUR'}" +"","{'ISO3166Alpha3': 'dcid:country/TUV'}" +"","{'ISO3166Alpha3': 'dcid:country/TZA'}" +"","{'ISO3166Alpha3': 'dcid:country/UGA'}" +"","{'ISO3166Alpha3': 'dcid:country/UKR'}" +"","{'ISO3166Alpha3': 'dcid:country/URY'}" +"","{'ISO3166Alpha3': 'dcid:country/USA'}" +"","{'ISO3166Alpha3': 'dcid:country/UZB'}" +"","{'ISO3166Alpha3': 'dcid:country/VCT'}" +"","{'ISO3166Alpha3': 'dcid:country/VEN'}" +"","{'ISO3166Alpha3': 'dcid:country/VNM'}" +"","{'ISO3166Alpha3': 'dcid:country/VUT'}" +"","{'ISO3166Alpha3': 'dcid:country/WSM'}" +"","{'ISO3166Alpha3': 'dcid:country/XKS'}" +"","{'ISO3166Alpha3': 'dcid:country/YEM'}" +"","{'ISO3166Alpha3': 'dcid:country/ZAF'}" +"","{'ISO3166Alpha3': 'dcid:country/ZMB'}" +"","{'ISO3166Alpha3': 'dcid:country/ZWE'}" +"","{'ISO3166Alpha3': 'dcid:ChannelIslands'}" +"","{'ISO3166Alpha3': 'dcid:country/ASM'}" +"","{'ISO3166Alpha3': 'dcid:country/CUB'}" +"","{'ISO3166Alpha3': 'dcid:country/GIB'}" +"","{'ISO3166Alpha3': 'dcid:country/GRL'}" +"","{'ISO3166Alpha3': 'dcid:country/GUM'}" +"","{'ISO3166Alpha3': 'dcid:country/MAF'}" +"","{'ISO3166Alpha3': 'dcid:country/PRI'}" +"","{'ISO3166Alpha3': 'dcid:country/PRK'}" +"","{'ISO3166Alpha3': 'dcid:country/VGB'}" +"","{'ISO3166Alpha3': 'dcid:country/VIR'}" +"","{'ISO3166Alpha3': 'dcid:country/IMN'}" +"","{'ISO3166Alpha3': 'dcid:country/LIE'}" +"","{'ISO3166Alpha3': 'dcid:country/MNP'}" +"","{'ISO3166Alpha3': 'dcid:country/MCO'}" diff --git a/scripts/world_bank/wdi/golden_data/golden_summary_report.csv b/scripts/world_bank/wdi/golden_data/golden_summary_report.csv new file mode 100644 index 0000000000..cd7541999f --- /dev/null +++ b/scripts/world_bank/wdi/golden_data/golden_summary_report.csv @@ -0,0 +1,80 @@ +"StatVar","MinDate","Units","MeasurementMethods","observationPeriods" +"Count_Death_IntentionalSelfHarm_Male_AsFractionOf_Count_Person_Male","2000","[Per100000Males]","[]","[P1Y]" +"Amount_EconomicActivity_GrossNationalIncome_PurchasingPowerParity","1990","[InternationalDollar]","[]","[P1Y]" +"Count_Person_Upto4Years_Wasting_AsFractionOf_Count_Person_Upto4Years","1983","[Percent]","[JointChildMalnutritionEstimate]","[P1Y]" +"Count_Person_25OrMoreYears_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears","1994","[]","[]","[P1Y]" +"Amount_Emissions_CarbonDioxide_PerCapita","1970","[MetricTon]","[]","[P1Y]" +"Count_Person_25OrMoreYears_Male_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears_Male","1970","[]","[]","[P1Y]" +"LifeExpectancy_Person_Female","1960","[Year]","[]","[P1Y]" +"Count_Person_25OrMoreYears_Male_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears_Male","1994","[]","[]","[P1Y]" +"Count_Death_0Years_Female_AsFractionOf_Count_BirthEvent_LiveBirth_Female","1960","[Per1000FemaleLiveBirths]","[UnitedNationsIGMEEstimate]","[P1Y]" +"Count_CriminalActivities_MurderAndNonNegligentManslaughter_AsFractionOf_Count_Person","1990","[Per100000Persons]","[]","[P1Y]" +"Amount_EconomicActivity_ExpenditureActivity_HealthcareExpenditure_AsFractionOf_Count_Person","2000","[InternationalDollar, USDollar]","[]","[P1Y]" +"Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_ExpenditureActivity_Government","1980","[Percent]","[]","[P1Y]" +"Count_Person_25OrMoreYears_Male_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Male","1970","[]","[]","[P1Y]" +"FertilityRate_Person_Female","1960","[]","[]","[]" +"Count_Person_Rural","1960","[]","[WorldBankEstimate]","[P1Y]" +"Count_Person_25OrMoreYears_Female_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears_Female","1970","[]","[]","[P1Y]" +"Count_Person_Urban","1960","[]","[WorldBankEstimate]","[P1Y]" +"Count_Person_Upto4Years_Overweight_AsFractionOf_Count_Person_Upto4Years","1983","[]","[]","[P1Y]" +"LifeExpectancy_Person_Male","1960","[Year]","[]","[P1Y]" +"Count_BirthEvent_LiveBirth_AsFractionOf_Count_Person","1960","[Per1000Persons]","[]","[P1Y]" +"MortalityRate_Person_Upto4Years_AsFractionOf_Count_BirthEvent_LiveBirth","1960","[Per1000LiveBirths]","[]","[P1Y]" +"Count_Person","1960","[]","[]","[P1Y]" +"Count_Person_7To14Years_Male_Employed_AsFractionOf_Count_Person_7To14Years_Male","1994","[Percent]","[]","[P1Y]" +"Count_Person_Upto4Years_Male_Wasting_AsFractionOf_Count_Person_Upto4Years_Male","1986","[Percent]","[JointChildMalnutritionEstimate]","[P1Y]" +"Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal","1970","[Percent]","[]","[P1Y]" +"Count_Person_25OrMoreYears_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears","1970","[]","[]","[P1Y]" +"Count_Person_15OrMoreYears_Female_Smoking_AsFractionOf_Count_Person_15OrMoreYears_Female","2000","[]","[AgeAdjustedPrevalence]","[P1Y]" +"Count_Person_15OrMoreYears_Smoking_AsFractionOf_Count_Person_15OrMoreYears","2000","[]","[AgeAdjustedPrevalence]","[P1Y]" +"Amount_EconomicActivity_GrossNationalIncome_PurchasingPowerParity_PerCapita","1990","[InternationalDollar]","[]","[P1Y]" +"Count_Person_Upto4Years_Male_Overweight_AsFractionOf_Count_Person_Upto4Years_Male","1986","[]","[]","[P1Y]" +"Count_Death_0Years","1960","[]","[UnitedNationsIGMEEstimate]","[P1Y]" +"Amount_EconomicActivity_ExpenditureActivity_TertiaryEducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government","1970","[]","[]","[P1Y]" +"Count_Person_Upto4Years_Male_SevereWasting_AsFractionOf_Count_Person_Upto4Years_Male","1986","[Percent]","[JointChildMalnutritionEstimate]","[P1Y]" +"Amount_Consumption_Electricity_PerCapita","1990","[KilowattHour]","[]","[P1Y]" +"Count_Death_0Years_Male_AsFractionOf_Count_BirthEvent_LiveBirth_Male","1960","[Per1000MaleLiveBirths]","[UnitedNationsIGMEEstimate]","[P1Y]" +"Amount_Consumption_Energy_PerCapita","1990","[KilogramOfOilEquivalent]","[]","[P1Y]" +"Count_Death_IntentionalSelfHarm_Female_AsFractionOf_Count_Person_Female","2000","[Per100000Females]","[]","[P1Y]" +"Count_Person_15OrMoreYears_Male_Smoking_AsFractionOf_Count_Person_15OrMoreYears_Male","2000","[]","[AgeAdjustedPrevalence]","[P1Y]" +"Count_CriminalActivities_MurderAndNonNegligentManslaughter_Male_AsFractionOf_Count_Person_Male","1990","[Per100000Males]","[]","[P1Y]" +"Amount_Remittance_InwardRemittance_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal","1970","[Percent]","[WorldBankEstimate]","[P1Y]" +"Count_Person_15To64Years_InLaborForce_AsFractionOf_Count_Person_15To64Years","1990","[]","[]","[P1Y]" +"Count_Person_7To14Years_Employed_AsFractionOf_Count_Person_7To14Years","1994","[Percent]","[]","[P1Y]" +"GiniIndex_EconomicActivity","1963","[]","[WorldBankEstimate]","[P1Y]" +"Count_Person_25OrMoreYears_Female_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Female","1990","[]","[]","[P1Y]" +"Count_Person_25OrMoreYears_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears","1990","[]","[]","[P1Y]" +"Count_CriminalActivities_MurderAndNonNegligentManslaughter_Female_AsFractionOf_Count_Person_Female","1990","[Per100000Females]","[]","[P1Y]" +"Count_Person_15To64Years_Female_InLaborForce_AsFractionOf_Count_Person_15To64Years_Female","1990","[]","[]","[P1Y]" +"Amount_Stock_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal","1975","[Percent]","[]","[P1Y]" +"Count_Person_25OrMoreYears_Female_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears_Female","1994","[]","[]","[P1Y]" +"GrowthRate_Amount_EconomicActivity_GrossDomesticProduction","1961","[]","[]","[P1Y]" +"Count_Death_AsAFractionOfCount_Person","1960","[Per1000Persons]","[WorldBankWeightedAverage]","[P1Y]" +"Amount_EconomicActivity_GrossDomesticProduction_Nominal","1960","[USDollar]","[]","[P1Y]" +"Count_Person_15To64Years_Male_InLaborForce_AsFractionOf_Count_Person_15To64Years_Male","1990","[]","[]","[P1Y]" +"Amount_Remittance_InwardRemittance","1970","[USDollar]","[WorldBankEstimate]","[P1Y]" +"Count_Person_Upto4Years_SevereWasting_AsFractionOf_Count_Person_Upto4Years","1983","[Percent]","[JointChildMalnutritionEstimate]","[P1Y]" +"Count_Person_25OrMoreYears_Female_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Female","1970","[]","[]","[P1Y]" +"Count_Person_7To14Years_Female_Employed_AsFractionOf_Count_Person_7To14Years_Female","1994","[Percent]","[]","[P1Y]" +"Count_Person_25OrMoreYears_Male_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Male","1990","[]","[]","[P1Y]" +"Amount_EconomicActivity_GrossDomesticProduction_Nominal_PerCapita","1960","[USDollar]","[]","[P1Y]" +"Amount_Consumption_Alcohol_15OrMoreYears_AsFractionOf_Count_Person_15OrMoreYears","2000","[Liter]","[WorldHealthOrganizationEstimates]","[P1Y]" +"Count_Person_15OrMoreYears_InLaborForce_Female_AsFractionOf_Count_Person_InLaborForce","1990","[]","[]","[P1Y]" +"Count_Person_ResidingLessThan5MetersAboveSeaLevel_AsFractionOf_Count_Person","1990","[]","[]","[P1Y]" +"Count_Product_MobileCellularSubscription_AsFractionOf_Count_Person","1960","[]","[]","[P1Y]" +"Count_Person_InLaborForce","1990","[]","[InternationalLaborOrganization]","[P1Y]" +"Count_Death_IntentionalSelfHarm_AsFractionOf_Count_Person","2000","[Per100000Persons]","[]","[P1Y]" +"Count_Death_0Years_AsFractionOf_Count_BirthEvent_LiveBirth","1960","[Per1000LiveBirths]","[UnitedNationsIGMEEstimate]","[P1Y]" +"Count_Person_Upto4Years_Female_Wasting_AsFractionOf_Count_Person_Upto4Years_Female","1986","[Percent]","[JointChildMalnutritionEstimate]","[P1Y]" +"Amount_Remittance_OutwardRemittance","1970","[USDollar]","[WorldBankEstimate]","[P1Y]" +"Count_Person_Upto4Years_Female_Overweight_AsFractionOf_Count_Person_Upto4Years_Female","1986","[]","[]","[P1Y]" +"Count_Person_IsInternetUser_PerCapita","1990","[]","[]","[P1Y]" +"Amount_Production_ElectricityFromNuclearSources_AsFractionOf_Amount_Production_Energy","1990","[]","[]","[P1Y]" +"Count_Person_Upto4Years_Female_SevereWasting_AsFractionOf_Count_Person_Upto4Years_Female","1986","[Percent]","[JointChildMalnutritionEstimate]","[P1Y]" +"Count_Person_25OrMoreYears_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears","1970","[]","[]","[P1Y]" +"Amount_Production_ElectricityFromOilGasOrCoalSources_AsFractionOf_Amount_Production_Energy","1990","[]","[]","[P1Y]" +"GrowthRate_Count_Person","1961","[]","[]","[P1Y]" +"Amount_Consumption_RenewableEnergy_AsFractionOf_Amount_Consumption_Energy","1990","[]","[]","[P1Y]" +"Amount_Stock","1975","[USDollar]","[]","[P1Y]" +"LifeExpectancy_Person","1960","[Year]","[]","[]" +"Count_Person_20To79Years_Diabetes_AsFractionOf_Count_Person_20To79Years","2000","[]","[]","[P1Y]" diff --git a/scripts/world_bank/wdi/manifest.json b/scripts/world_bank/wdi/manifest.json index bc3927141e..eb427c0472 100644 --- a/scripts/world_bank/wdi/manifest.json +++ b/scripts/world_bank/wdi/manifest.json @@ -20,7 +20,8 @@ "WorldBankCountries.csv", "schema_csvs/WorldBankIndicators_prod.csv" ], - "cron_schedule": "0 11 * * 2" + "cron_schedule": "0 11 * * 2", + "validation_config_file": "validation_config.json" } ] } \ No newline at end of file diff --git a/scripts/world_bank/wdi/validation_config.json b/scripts/world_bank/wdi/validation_config.json new file mode 100644 index 0000000000..7a7a9c70e3 --- /dev/null +++ b/scripts/world_bank/wdi/validation_config.json @@ -0,0 +1,28 @@ +{ + "schema_version": "1.0", + "rules": [ + { + "rule_id": "check_deleted_records_percent", + "description": "Checks that the percentage of deleted points is within the threshold.", + "validator": "DELETED_RECORDS_PERCENT", + "params": { + "threshold": 0.1 + } + }, + { + "rule_id": "check_goldens_output_csv", + "validator": "GOLDENS_CHECK", + "params": { + "golden_files": "golden_data/golden_WorldBank.csv", + "input_files": "output/WorldBank.csv" + } + }, + { + "rule_id": "check_goldens_summary_report", + "validator": "GOLDENS_CHECK", + "params": { + "golden_files": "golden_data/golden_summary_report.csv" + } + } + ] +} \ No newline at end of file diff --git a/tools/import_validation/Validations.md b/tools/import_validation/Validations.md index 4efebb3a55..d46ece74fc 100644 --- a/tools/import_validation/Validations.md +++ b/tools/import_validation/Validations.md @@ -72,6 +72,8 @@ To generate goldens for the summary_report.csv to verify that all the expected StatVars are generated with the corresponding number of places and dates, run the following: +This will compare the golden files using summary_report.csv as the default input: + ```shell python3 validator_goldens.py \ --validate_goldens_input=summary_report.csv \