A dummy data set to illustrate various data cleaning functions.
dummy
A data frame with 4 columns:
The date of the reading.
The name of the variable.
The value of the reading.
The units of the value.
demo(dummy)
#>
#>
#> demo(dummy)
#> ---- ~~~~~
#>
#> > # Copyright 2015 Province of British Columbia
#> > #
#> > # Licensed under the Apache License, Version 2.0 (the "License");
#> > # you may not use this file except in compliance with the License.
#> > # You may obtain a copy of the License at
#> > #
#> > # http://www.apache.org/licenses/LICENSE-2.0
#> > #
#> > # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
#> > # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#> > # See the License for the specific language governing permissions and limitations under the License.
#> >
#> > library(wqbc)
#>
#> > options(wqbc.messages = TRUE)
#>
#> > data(dummy)
#>
#> > print(dummy)
#> Date Variable Value Units
#> 1 2000-01-01 Aluminium Dissolved 20.0 mg/L
#> 2 2000-01-01 Aluminium Dissolved 1.0 mg/L
#> 3 2000-01-01 Aluminium Dissolved 2.0 mg/L
#> 4 2000-01-01 pH 8.0 PH UNITS
#> 5 2000-01-01 pH 9.5 PH UNITS
#> 6 2000-01-02 DISSOLVED ALUMINUM 1.0 MG/L
#> 7 2000-01-02 DISSOLVED ALUMINUM 2000.0 uG/L
#> 8 2000-01-04 Aluminium Dissolved 2000.0 ug/L
#> 9 2000-01-04 Aluminium Dissolved 1.0 mg/L
#> 10 2000-01-05 Aluminium Dissolved 1.0 mg/L
#> 11 2000-01-05 Aluminium Dissolved 40.0 mg/L
#> 12 2000-01-05 Aluminium Dissolved NA mg/L
#> 13 2000-01-06 Aluminium Dissolved 10.0 mg/L
#> 14 2000-01-06 Aluminium Dissolved 20.0 mg/L
#> 15 2000-01-06 Aluminium Dissolved NA mg/L
#> 16 1977-05-25 Zinc Total 1.0 ug/L
#> 17 1977-05-25 Zinc Total 1000.0 midichlorians
#> 18 1978-12-01 Kryptonite 1.0 ug/L
#> 19 1978-12-01 pH 7.0 PH UNITS
#>
#> > dummy$Units <- substitute_units(dummy$Units)
#> Substituted 'MG/L' with 'mg/L', 'PH UNITS' with 'pH' and 'uG/L' with 'ug/L'.
#> Failed to substitute 'midichlorians'.
#>
#> > dummy$Variable <- substitute_variables(dummy$Variable)
#> Substituted 'DISSOLVED ALUMINUM' with 'Aluminum Dissolved'.
#> Failed to substitute 'Aluminium Dissolved' and 'Kryptonite'.
#>
#> > print(dummy)
#> Date Variable Value Units
#> 1 2000-01-01 <NA> 20.0 mg/L
#> 2 2000-01-01 <NA> 1.0 mg/L
#> 3 2000-01-01 <NA> 2.0 mg/L
#> 4 2000-01-01 pH 8.0 pH
#> 5 2000-01-01 pH 9.5 pH
#> 6 2000-01-02 Aluminum Dissolved 1.0 mg/L
#> 7 2000-01-02 Aluminum Dissolved 2000.0 ug/L
#> 8 2000-01-04 <NA> 2000.0 ug/L
#> 9 2000-01-04 <NA> 1.0 mg/L
#> 10 2000-01-05 <NA> 1.0 mg/L
#> 11 2000-01-05 <NA> 40.0 mg/L
#> 12 2000-01-05 <NA> NA mg/L
#> 13 2000-01-06 <NA> 10.0 mg/L
#> 14 2000-01-06 <NA> 20.0 mg/L
#> 15 2000-01-06 <NA> NA mg/L
#> 16 1977-05-25 Zinc Total 1.0 ug/L
#> 17 1977-05-25 Zinc Total 1000.0 <NA>
#> 18 1978-12-01 <NA> 1.0 ug/L
#> 19 1978-12-01 pH 7.0 pH
#>
#> > dummy <- dplyr::filter(dummy, Units %in% lookup_units() & Variable %in% lookup_variables())
#>
#> > print(dummy)
#> Date Variable Value Units
#> 1 2000-01-01 pH 8.0 pH
#> 2 2000-01-01 pH 9.5 pH
#> 3 2000-01-02 Aluminum Dissolved 1.0 mg/L
#> 4 2000-01-02 Aluminum Dissolved 2000.0 ug/L
#> 5 1977-05-25 Zinc Total 1.0 ug/L
#> 6 1978-12-01 pH 7.0 pH
#>
#> > dummy <- standardize_wqdata(dummy)
#> Standardizing water quality data...
#> Standardized water quality data.
#>
#> > print(dummy)
#> # A tibble: 6 × 4
#> Date Variable Value Units
#> <date> <chr> <dbl> <chr>
#> 1 2000-01-02 Aluminum Dissolved 1 mg/L
#> 2 2000-01-02 Aluminum Dissolved 2 mg/L
#> 3 1977-05-25 Zinc Total 1 ug/L
#> 4 2000-01-01 pH 8 pH
#> 5 2000-01-01 pH 9.5 pH
#> 6 1978-12-01 pH 7 pH
#>
#> > dummy <- clean_wqdata(dummy)
#> Cleaning water quality data...
#> Identified 0 outliers in water quality data.
#> Cleansed water quality data.
#>
#> > print(dummy)
#> # A tibble: 4 × 6
#> Date Variable Value Units Outlier DetectionLimit
#> <date> <chr> <dbl> <chr> <lgl> <dbl>
#> 1 2000-01-02 Aluminum Dissolved 1.5 mg/L FALSE NA
#> 2 1977-05-25 Zinc Total 1 ug/L FALSE NA
#> 3 1978-12-01 pH 7 pH FALSE NA
#> 4 2000-01-01 pH 8.75 pH FALSE NA
#>
#> > dummy <- calc_limits(dummy, term = "short")
#> Cleaning water quality data...
#> Identified 0 outliers in water quality data.
#> Cleansed water quality data.
#> Standardizing water quality data...
#> Standardized water quality data.
#> Calculating short-term water quality limits...
#> Dropped 1 values for Zinc Total without limits
#> Dropped 2 values for pH without limits
#> Calculated short-term water quality limits.
#>
#> > print(dummy)
#> # A tibble: 1 × 7
#> Date Variable Value UpperLimit Units Outlier DetectionLimit
#> <date> <chr> <dbl> <dbl> <chr> <lgl> <dbl>
#> 1 2000-01-02 Aluminum Dissolved 1.5 0.1 mg/L NA NA