package dqcalculators.duplicate;

import dsd.elements.DSDElement;
import dsd.records.Record;
import dsd.records.RecordSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.jena.sparql.sse.Tags;
import quality.DataQualityStore;
import util.clustering.Cluster;
import util.clustering.LanceWilliamsClustering;
import util.clustering.LinkageStrategy;
import util.distances.Distance;

/* loaded from: input_file:dqcalculators/duplicate/ClusteringDuplicateDetector.class */
public class ClusteringDuplicateDetector {
    private DSDElement element;
    private double threshold;
    private Distance<Record> distance;
    private LanceWilliamsClustering<Record> clustering = null;
    int nrClusters;
    int unicates;
    int range;

    public ClusteringDuplicateDetector(DSDElement dSDElement, Distance<Record> distance, double d, int i) {
        this.element = dSDElement;
        this.threshold = d;
        this.distance = distance;
        this.range = i;
    }

    public void detectDuplicates(Iterable<Record> iterable, final String str) {
        this.nrClusters = 0;
        this.unicates = 0;
        ArrayList arrayList = new ArrayList();
        Iterator<Record> it = iterable.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next());
        }
        Collections.sort(arrayList, new Comparator<Record>() { // from class: dqcalculators.duplicate.ClusteringDuplicateDetector.1
            @Override // java.util.Comparator
            public int compare(Record record, Record record2) {
                Object field = record.getField(str);
                Object field2 = record.getField(str);
                return (field == null ? Tags.tagNull : field.toString()).compareTo(field2 == null ? Tags.tagNull : field2.toString());
            }
        });
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= arrayList.size() - this.range) {
                return;
            }
            this.clustering = new LanceWilliamsClustering<>(arrayList.subList(i2, Math.min(i2 + this.range, arrayList.size())), this.distance, LinkageStrategy.UPGMA);
            annoteClusters();
            i = i2 + this.range;
        }
    }

    private void annoteClusters() {
        List<Cluster<Record>> clusters = this.clustering.getClusters(this.threshold);
        this.nrClusters += clusters.size();
        for (Cluster<Record> cluster : clusters) {
            if (cluster.size() <= 1) {
                this.unicates++;
            } else {
                RecordSet recordSet = new RecordSet();
                Iterator<Record> it = cluster.getElements().iterator();
                while (it.hasNext()) {
                    recordSet.addRecord(it.next());
                }
                DataQualityStore.getDQRecords().addDuplicates(this.element, recordSet);
            }
        }
    }
}
