(Analysis by Benjamin Qi)

Note: I index the cows as $0\ldots N-1$ rather than $1\ldots N$.

To solve this problem in $\mathcal{O}(N^2)$ time, fix $r$ and count the number of $l$ such that both cows and $l$ and $r$ can be leaders. We do this by iterating over all possible $l$ in decreasing order.

My code:

#include<bits/stdc++.h>
using namespace std;
 
int main() {
	int N; cin >> N;
	vector<int> B(N); for (int& b: B) cin >> b;
	int64_t ans = 0;
	for (int r = 0; r < N; ++r) {
		vector<bool> occ(N+1);
		for (int l = r-1; l >= 0; --l) {
			if (B[l] == B[r]) break;
			if (!occ[B[l]]) {
				++ans;
				occ[B[l]] = 1;
			}
		}
	}
	cout << ans << "\n";
}

To solve this problem in $\mathcal{O}(N\log N)$ time, for each $l\le r$ define $\texttt{active}_r[l]$ to equal $1$ if cow $l$'s breed is unique in the range $l\ldots r$, and $0$ otherwise. Also let $\texttt{prev_oc}[j]$ equal the maximum index $i<j$ such that $B_i=B_j$.

For each $r$, we need to sum $\texttt{active}_r[l]$ over all $l\in [\texttt{prev_oc}[r]+1,r-1]$. Note that $\texttt{active}_r[l]=\texttt{active}_{r-1}[l]$ for almost all $l$, aside from for $l=r$ and $l=\texttt{prev_oc}[r]$. Therefore, when transitioning from $r-1$ to $r$, we need to make up to two point updates while allowing range sum queries over $\texttt{active}$. This can be done using a data structure such as a binary indexed tree.

Danny Mittal's code:

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.StringTokenizer;
 
public class PairsOfCows {
 
    public static void main(String[] args) throws IOException {
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        int n = Integer.parseInt(in.readLine());
        StringTokenizer tokenizer = new StringTokenizer(in.readLine());
        int[] last = new int[n + 1];
        long answer = 0;
        BinaryIndexTree bit = new BinaryIndexTree(n);
        for (int j = 1; j <= n; j++) {
            int k = Integer.parseInt(tokenizer.nextToken());
            if (last[k] != 0) {
                bit.update(last[k], -1L);
            }
            answer += bit.query(j) - bit.query(last[k]);
            last[k] = j;
            bit.update(j, 1L);
        }
        System.out.println(answer);
    }
 
    static class BinaryIndexTree {
        final int n;
        final long[] bit;
 
        BinaryIndexTree(int n) {
            this.n = n;
            this.bit = new long[n + 1];
        }
 
        void update(int j, long delta) {
            for (; j <= n; j += j & -j) {
                bit[j] += delta;
            }
        }
 
        long query(int j) {
            long res = 0;
            for (; j > 0; j -= j & -j) {
                res += bit[j];
            }
            return res;
        }
    }
}

Alternatively, use an order statistic tree (also mentioned in the link above).

My code:

#include <bits/stdc++.h>
using namespace std;
 
#include <ext/pb_ds/assoc_container.hpp>
using namespace __gnu_pbds;
template <class T> using Tree = tree<T, null_type, less<T>,
	rb_tree_tag, tree_order_statistics_node_update>;
 
int main() {
	int N; cin >> N;
	Tree<int> T;
	vector<int> last_oc(N+1,-1);
	int64_t ans = 0;
	for (int i = 0; i < N; ++i) {
		int b; cin >> b;
		ans += T.size()-T.order_of_key(last_oc[b]+1);
		T.insert(i);
		if (last_oc[b] != -1) T.erase(last_oc[b]);
		last_oc[b] = i;
	}
	cout << ans << "\n";
}