#if defined(_WIN32)
// MSVC compiler family
#define COMPILER_MSVC 1
#if _MSC_VER >= 1600
// <stdint.h> is only available in VS2010 and later
#define HAS_STDINT 1
#endif
#if _XBOX_VER >= 200
// Xbox 360
#define TARGET_XBOX_360 1
#define CPU_POWERPC 1
#define PTR_SIZE 4
#else
#if defined(_M_X64)
// x64
#define CPU_X64 1
#define PTR_SIZE 8
#elif defined(_M_IX86)
// x86
#define CPU_X86 1
#define PTR_SIZE 4
#else
#error Unrecognized platform!
#endif
#endif
#elif defined(__GNUC__)
// GCC compiler family
#define COMPILER_GCC 1
#define HAS_STDINT 1
#if defined(__llvm__)
// LLVM
#define COMPILER_LLVM 1
#if __has_feature(c_atomic)
// No need to mark atomic##_t members as volatile
#define HAS_C11_MEMORY_MODEL 1
#endif
#endif
#if defined(__APPLE__)
// Apple MacOS/iOS
#define IS_APPLE 1
#endif
#if defined(__x86_64__)
// x64
#define CPU_X64 1
#define PTR_SIZE 8
#elif defined(__i386__)
// x86
#define CPU_X86 1
#define PTR_SIZE 4
#elif defined(__arm__)
// ARM
#define CPU_ARM 1
#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7EM__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__)
// ARMv7
#define CPU_ARM_VERSION 7
#define PTR_SIZE 4
#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__)
// ARMv6
#define CPU_ARM_VERSION 6
#define PTR_SIZE 4
#else
// Could support earlier ARM versions at some point using compiler barriers and swp instruction
#error Unrecognized ARM CPU architecture version!
#endif
#if defined(__thumb__)
// Thumb instruction set mode
#define CPU_ARM_THUMB 1
#endif
#else
#error Unrecognized target CPU!
#endif
#else
#error Unrecognized compiler!
#endif
#if defined COMPILER_MSVC
#define VC_EXTRALEAN
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#endif
#include <string>
#include <thread>
#include <cinttypes>
#include <cstdint>
#include <vector>
#include <cmath>
#include <cassert>
#include <mutex>
#include <memory>
#include <algorithm>
#include <iostream>
extern int DebugLevel;
void slog(int level, char const *format, ...);
// Return a C++ string
std::string StringPrintf(const char* format, ...);
// Store result into a supplied string and return it
std::string& SStringPrintf(std::string & dst, const char* format, ...);
// Append result to a supplied string
void StringAppendF(std::string & dst, const char* format, ...);
// Lower-level routine that takes a va_list and appends to a specified
// string. All other routines are just convenience wrappers around it.
void StringAppendV(std::string & dst, const char* format, va_list ap);
inline int32_t load32(volatile int32_t *value) {
return *value;
}
inline void store32(volatile int32_t *value, int32_t desired) {
*value = desired;
}
inline void store64(volatile int64_t *value, int64_t desired) {
#if defined COMPILER_MSVC
#if CPU_X64 || TARGET_XBOX_360
*value = desired;
#else
// On 32-bit x86, the most compatible way to get an atomic 64-bit store is with cmpxchg8b.
// Essentially, perform compare & exchange(object, object->_nonatomic, desired)
// in a loop until it returns the original value.
// According to the Linux kernel (atomic64_cx8_32.S), we don't need the "lock;" prefix
// on cmpxchg8b since aligned 64-bit writes are already atomic on 586 and newer.
__asm
{
mov esi, value;
mov ebx, dword ptr desired;
mov ecx, dword ptr desired[4];
retry:
cmpxchg8b[esi];
jne retry;
}
#endif
#elif defined COMPILER_GCC
#if CPU_X64
*value = desired;
#else
auto expected = *value;
asm volatile("1: cmpxchg8b %0\n"
" jne 1b"
: "=m"(*value)
: "b"((uint32_t)desired), "c"((uint32_t)(desired >> 32)), "A"(expected));
#endif
#endif
}
inline int64_t load64(volatile int64_t *value) {
#if defined COMPILER_MSVC
#if CPU_X64 || TARGET_XBOX_360
return *value;
#else
int64_t result;
__asm
{
mov esi, value;
mov ebx, eax;
mov ecx, edx;
lock cmpxchg8b[esi];
mov dword ptr result, eax;
mov dword ptr result[4], edx;
}
return result;
#endif
#elif defined COMPILER_GCC
#if CPU_X64
return *value;
#else
int64_t original;
asm volatile("movl %%ebx, %%eax\n"
"movl %%ecx, %%edx\n"
"lock; cmpxchg8b %1"
: "=&A"(original)
: "m"(*value));
return original;
#endif
#endif
}
inline int64_t compareAndSwapValue64(volatile int64_t *value, int64_t expected, int64_t desired) {
#if defined COMPILER_MSVC
return _InterlockedCompareExchange64(value, desired, expected);
#elif defined COMPILER_GCC
#if CPU_X64
int64_t original;
asm volatile("lock; cmpxchgq %2, %1"
: "=a"(original), "+m"(*value)
: "q"(desired), "0"(expected));
return original;
#else
int64_t original;
asm volatile("lock; cmpxchg8b %1"
: "=A"(original), "+m"(*value)
: "b"((uint32_t)desired), "c"((uint32_t)(desired >> 32)), "0"(expected));
return original;
#endif
#endif
}
inline int32_t compareAndSwapValue32(volatile int32_t *value, int32_t expected, int32_t desired) {
#if defined COMPILER_MSVC
return _InterlockedCompareExchange(
reinterpret_cast<volatile uint32_t*>(value),
static_cast<uint32_t>(desired),
static_cast<uint32_t>(expected));
#elif defined COMPILER_GCC
int32_t original;
asm volatile("lock; cmpxchgl %2, %1"
: "=a"(original), "+m"(*value)
: "q"(desired), "0"(expected));
return original;
#endif
}
inline bool compareAndSwapBool32(volatile int32_t *value, int32_t expected, int32_t desired) {
return compareAndSwapValue32(value, expected, desired) == expected;
}
inline bool compareAndSwapBool64(volatile int64_t *value, int64_t expected, int64_t desired) {
return compareAndSwapValue64(value, expected, desired) == expected;
}
inline int64_t fetchAdd64(volatile int64_t *value, int64_t operand) {
#if defined COMPILER_MSVC
#if CPU_X64 || TARGET_XBOX_360
return static_cast<int64_t>(
_InterlockedExchangeAdd64(
reinterpret_cast<volatile int64_t*>(value),
static_cast<uint64_t>(operand)));
#else
int64_t expected = *value;
for (;;)
{
int64_t original = _InterlockedCompareExchange64(
reinterpret_cast<volatile int64_t*>(value),
static_cast<uint64_t>(expected + operand),
static_cast<uint64_t>(expected));
if (original == expected)
return original;
expected = original;
}
#endif
#elif defined COMPILER_GCC
#if CPU_X64
int64_t original;
asm volatile("lock; xaddq %0, %1"
: "=r"(original), "+m"(*value)
: "0"(operand));
return original;
#else
// This implementation generates an unnecessary cmp instruction after the cmpxchg8b.
// Could be optimized further using inline assembly.
for (;;)
{
int64_t original = *value;
if (compareAndSwapBool64(value, original, original + operand)) {
return original;
}
}
#endif
#endif
}
inline int32_t fetchAdd32(volatile int32_t *value, int32_t operand) {
#if defined COMPILER_MSVC
return static_cast<int32_t>(
_InterlockedExchangeAdd(
reinterpret_cast<volatile uint32_t*>(value),
static_cast<uint32_t>(operand)));
#elif defined COMPILER_GCC
int32_t original;
asm volatile("lock; xaddl %0, %1"
: "=r"(original), "+m"(*value)
: "0"(operand));
return original;
#endif
}
long long tps();
double milliseconds();
long int timeSinceEpoch();
class Spinlock {
volatile int32_t _value;
public:
Spinlock() {
store32(&_value, 0);
}
bool trylock() {
return compareAndSwapBool32(&_value, 0, 1);
}
void lock() {
volatile auto valuePtr = &_value;
while (true) {
for (int i = 0; i < 1024; i++) {
if (!*valuePtr) {
if (compareAndSwapBool32(valuePtr, 0, 1)) {
return;
}
}
std::this_thread::yield();
}
}
}
void unlock() {
store32(&_value, 0);
}
};
class SharedSpinlock {
Spinlock _sharedLock;
Spinlock _exclusiveLock;
int32_t _sharedCount;
public:
SharedSpinlock() :
_sharedCount(0) { }
void lock() {
_exclusiveLock.lock();
}
void unlock() {
_exclusiveLock.unlock();
}
void sharedLock() {
_sharedLock.lock();
_sharedCount++;
if (_sharedCount == 1) {
_exclusiveLock.lock();
}
_sharedLock.unlock();
}
void sharedUnlock() {
_sharedLock.lock();
_sharedCount--;
if (_sharedCount == 0) {
_exclusiveLock.unlock();
}
_sharedLock.unlock();
}
};
// An implementation of a nonblocking hashmap. It's MT safe on platforms with strong ordering (x86)
template <typename ValueType>
struct HT {
struct E {
uint64_t Key;
int32_t Value;
E() {
Key = 0LL;
Value = 0;
}
};
std::vector <SharedSpinlock> _sharedLocks;
std::vector <ValueType> _values;
std::vector <E> _table;
volatile uint32_t _valueIndex;
uint32_t _size;
uint32_t _tableSize;
uint32_t _mask;
HT(uint32_t N) {
//Indexing starts from 1 because 0 is a deleted sync
_valueIndex = 1;
uint32_t n = N;
while (n & (n + 1)) {
n |= (n + 1);
}
// Make it bigger for sparser collisions
// minimum 65k elements will need to 255k buckets
// with 90% access of first time hash, 99% second next bucket...
_table.resize(size_t(n) + 1);
_tableSize = n + 1;
// The value found on position 0 is special and it means it doesn't exist.
_size = N + 1;
_values.resize(N + 1);
_sharedLocks.resize(N + 1);
_mask = n;
}
uint32_t size() {
return static_cast<uint32_t>(
load32(
reinterpret_cast<int32_t*>(&_valueIndex))) - 1;
}
ValueType* values() {
return _values.data() + 1;
}
inline static uint32_t hash(uint64_t h) {
h ^= h >> 33;
h *= 0xff51afd7ed558ccd;
h ^= h >> 33;
h *= 0xc4ceb9fe1a85ec53;
h ^= h >> 33;
return static_cast<uint32_t>(h);
}
ValueType *begin() {
return _values.data();
}
ValueType *end() {
return _values.data() + _values.size();
}
ValueType *operator[](uint64_t key) {
auto h = hash(key);
for (uint32_t steps = 0; steps < _size; steps++) {
h = h & _mask;
auto kv = load64(
reinterpret_cast<volatile int64_t*>(&_table[h].Key));
if (kv == 0LL) {
return nullptr;
}
else if (static_cast<uint64_t>(kv) == key) {
int32_t vindex = load32(&_table[h].Value);
if (vindex > 0) {
return &_values[static_cast<uint32_t>(vindex)];
}
return nullptr;
}
h++;
}
return nullptr;
}
void lock(ValueType *valuePtr) {
if (valuePtr > begin() && valuePtr < end()) {
auto lockPosition = valuePtr - begin();
_sharedLocks[lockPosition].lock();
}
}
void unlock(ValueType *valuePtr) {
if (valuePtr > begin() && valuePtr < end()) {
auto lockPosition = valuePtr - begin();
_sharedLocks[lockPosition].unlock();
}
}
void sharedLock(ValueType *valuePtr) {
if (valuePtr > begin() && valuePtr < end()) {
auto lockPosition = valuePtr - begin();
_sharedLocks[lockPosition].sharedLock();
}
}
void sharedUnlock(ValueType *valuePtr) {
if (valuePtr > begin() && valuePtr < end()) {
auto lockPosition = valuePtr - begin();
_sharedLocks[lockPosition].sharedUnlock();
}
}
bool contains(uint64_t key) {
auto h = hash(key);
for (uint32_t steps = 0; steps < _size; steps++) {
h = h & _mask;
int64_t kv = load64(
reinterpret_cast<volatile int64_t*>(&_table[h].Key));
if (kv == 0LL) {
return false;
}
else if (static_cast<uint64_t>(kv) == key) {
int32_t vindex = load32(&_table[h].Value);
if (vindex > 0) {
return true;
}
}
h++;
}
return false;
}
bool insert(uint64_t key, ValueType const &value) {
if (!key) {
return false;
}
auto h = hash(key);
for (uint32_t steps = 0; steps < _size; steps++) {
h = h & _mask;
// Make sure that we write the key into an empty slot atomically.
auto kv = static_cast<uint64_t>(compareAndSwapValue64(
reinterpret_cast<volatile int64_t*>(&_table[h].Key),
0LL,
static_cast<int64_t>(key)));
if (kv == 0LL) {
// This code is guaranteed to be executed only once for each bucket.
auto volatile prefetch =
static_cast<uint32_t>(load32(
reinterpret_cast<volatile int32_t*>(&_valueIndex)));
// Cannot insert, the table is full
if (prefetch >= _size) {
return false;
}
auto volatile vindex =
fetchAdd32(reinterpret_cast<volatile int32_t*>(&_valueIndex), 1);
store32(reinterpret_cast<volatile int32_t*>(&_table[h].Value), vindex);
_values[static_cast<uint32_t>(vindex)] = value;
return true;
}
else {
// We must be very careful here because someone else might want to either:
// 1. Insert and override the same key
// 2. Delete the key.
// If someone tries to insert here before vindex will be != 0 it's fine because we
// simply don't write the value. If someone tries to delete while being here
// we have to simply *undelete by setting the sign positive for vIndex.
// race conditions may happen if we insert/delete on the same key (as we expect).
auto vk = load32(
reinterpret_cast<volatile int32_t*>(&_table[h].Value));
if (vk < 0) {
auto vkz = compareAndSwapValue32(
reinterpret_cast<volatile int32_t*>(&_table[h].Value), vk, 0);
if (vkz == vk) {
store64(
reinterpret_cast<volatile int64_t*>(&_table[h].Key),
static_cast<int64_t>(key));
store32(reinterpret_cast<volatile int32_t*>(&_table[h].Value), -vk);
_values[static_cast<uint32_t>(-vk)] = value;
return true;
}
}
else if (kv == key) {
// Don't override the old value.
return false;
}
}
h++;
}
return false;
}
void remove(unsigned long long int key) {
auto h = hash(key);
for (uint32_t steps = 0; steps < _size; steps++) {
h = h & _mask;
// We didn't find the element.
int64_t kv = load64(reinterpret_cast<volatile int64_t*>(&_table[h].Key));
if (kv == 0LL) {
return;
}
// If the key matches we set the state to
// deleted by making the index negative
else if (static_cast<uint64_t>(kv) == key) {
auto volatile vindex = load32(&_table[h].Value);
if (vindex > 0) {
vindex = -vindex;
store32(
reinterpret_cast<volatile int32_t*>(&_table[h].Value),
vindex);
}
return;
}
h++;
}
}
};
template <int Size = 1>
class AdaptiveFilecache {
std::vector<int8_t> _bytes;
int _headOffset;
int _binCount;
std::vector<int> _chunkSizes;
std::vector<double> _weights;
std::vector<int> _chunkNumber;
std::vector<int> _startingOffsets;
std::vector<int> _currentOffsets;
// The adaptive behaviour is tweaked using these variables.
std::vector<double> _correctedWeigths;
std::vector<double> _sizeWeigths;
std::vector<double> _countWeigths;
double _numberToSizeInterpolator;
double _totalSize;
double _totalNumber;
struct Node {
int* Next;
int* Prev;
int* LRUScore;
Node() : Next(nullptr), Prev(nullptr), LRUScore(nullptr) { }
};
std::vector<std::pair<int, Node>> _freeListsHeads;
std::vector<std::pair<int, Node>> _freeListsTails;
std::vector<std::pair<int, Node>> _usedListsHeads;
std::vector<int> _usedListsSizes;
int _lruMax;
int readNode(int memIndex, Node &node) {
node.Next = reinterpret_cast<int *>(&_bytes[memIndex]);
node.Prev = node.Next + 1;
node.LRUScore = node.Next + 2;
return memIndex + 3 * sizeof(int);
}
void readNode(int8_t* address, Node &node) {
node.Next = reinterpret_cast<int *>(address);
node.Prev = node.Next + 1;
node.LRUScore = node.Next + 2;
}
void moveToFree(int bin, int iaddr) {
if (iaddr < 0 || iaddr == _freeListsTails[bin].first) {
return;
}
Node current, prev, next;
readNode(iaddr, current);
if (*current.Next != -1) {
readNode(*current.Next, next);
*next.Prev = *current.Prev;
}
if (*current.Prev != -1) {
readNode(*current.Prev, prev);
*prev.Next = *current.Next;
}
Node tail = _freeListsTails[bin].second;
*tail.Next = iaddr;
*current.Prev = _freeListsTails[bin].first;
*current.Next = -1;
readNode(iaddr, _freeListsTails[bin].second);
_freeListsTails[bin].first = iaddr;
}
void moveToUsed(int bin, int iaddr) {
if (iaddr < 0 || iaddr == _usedListsHeads[bin].first) {
return;
}
Node current, prev, next;
readNode(iaddr, current);
if (*current.Next != -1) {
readNode(*current.Next, next);
*next.Prev = *current.Prev;
}
else {
_freeListsTails[bin].first = *current.Prev;
readNode(*current.Prev, _freeListsTails[bin].second);
}
if (*current.Prev != -1) {
readNode(*current.Prev, prev);
*prev.Next = *current.Next;
}
Node head = _usedListsHeads[bin].second;
if (*head.Next != -1) {
readNode(*head.Next, next);
*next.Prev = iaddr;
}
*current.Next = *head.Next;
*current.Prev = _usedListsHeads[bin].first;
*head.Next = iaddr;
}
int8_t* allocatei(int bin) {
Node prev = _freeListsHeads[bin].second, node, next;
if (*prev.Next == -1) {
return nullptr;
}
int iaddr = *prev.Next;
moveToUsed(bin, iaddr);
return &_bytes[iaddr] + 3 * sizeof(int);
}
void freei(int bin, int8_t *address) {
moveToFree(bin, address - _bytes.data() - 3 * sizeof(int));
}
void initNodes() {
int k = 0;
int nodeAddress = 0;
while (k < _binCount) {
Node head;
auto prevAddress = nodeAddress;
nodeAddress = readNode(nodeAddress, head);
*head.Next = _startingOffsets[k];
*head.Prev = -1;
*head.LRUScore = _lruMax + 1;
_freeListsHeads[k].first = prevAddress;
_freeListsHeads[k].second = head;
Node it = head, prev = it;
auto counter = 0;
while (true) {
auto nextAddress = *it.Next;
if (nextAddress >= _startingOffsets[k + 1]) {
_freeListsTails[k].first = prevAddress;
_freeListsTails[k].second = it;
*it.Next = -1;
break;
}
readNode(nextAddress, it);
*it.Prev = prevAddress;
*it.Next = *prev.Next + _chunkSizes[k];
prev = it;
prevAddress = nextAddress;
counter++;
}
assert(counter == _chunkNumber[k]);
prevAddress = nodeAddress;
nodeAddress = readNode(nodeAddress, head);
*head.Next = -1;
*head.Prev = -1;
*head.LRUScore = _lruMax + 1;
_usedListsHeads[k].first = prevAddress;
_usedListsHeads[k].second = head;
k++;
}
}
template<typename TFunctor>
int8_t* lruGet(int bin, TFunctor const &invalidate) {
auto ptr = allocatei(bin);
if (ptr) {
Node node;
readNode(ptr, node);
if (*node.LRUScore == -1) {
invalidate(ptr);
}
}
else {
ptr = nullptr;
Node it = _usedListsHeads[bin].second;
Node min = it;
while (*it.Next != -1) {
readNode(*it.Next, it);
if (*min.LRUScore > *it.LRUScore) {
min = it;
}
}
int iaddr = reinterpret_cast<int8_t*>(min.Next) - _bytes.data();
if (iaddr != _usedListsHeads[bin].first) {
ptr = reinterpret_cast<int8_t*>(min.Next + 3);
invalidate(ptr);
}
}
hit(ptr);
return ptr;
}
int binOf(int8_t *address) {
if (address < _bytes.data() ||
address >= _bytes.data() + _bytes.size()) {
return -1;
}
for (int i = 0; i < _binCount + 1; i++) {
if (address - _bytes.data() < _startingOffsets[i]) {
return i - 1;
}
}
return -1;
}
public:
void hit(int8_t* address) {
address -= 3 * sizeof(int);
int bin = binOf(address);
if (bin == -1) {
return;
}
int iaddress = address - _bytes.data();
Node node;
readNode(iaddress, node);
if (*node.LRUScore == -1) {
moveToUsed(bin, iaddress);
_usedListsSizes[bin]++;
}
*node.LRUScore = _lruMax;
bool needToMove = _usedListsSizes[bin] > _lruMax;
Node it = _usedListsHeads[bin].second;
Node min = it;
while (*it.Next != -1) {
readNode(*it.Next, it);
if (*it.LRUScore > 0) {
(*it.LRUScore)--;
}
if (needToMove) {
if (*min.LRUScore > *it.LRUScore) {
min = it;
}
}
}
if (needToMove) {
int iaddr = reinterpret_cast<int8_t *>(min.Next) - _bytes.data();
if (iaddr != _usedListsHeads[bin].first) {
*min.LRUScore = -1;
moveToFree(bin, iaddr);
_usedListsSizes[bin]--;
}
}
}
static char const * getErrorMessage(int code) {
static char const *_sMessages[12] = {
"Ok",
"Freelist: Next pointer outside of expected range",
"Freelist: Next pointer doesn't point to a Node",
"Freelist: Prev pointer outside of expected range",
"Freelist: Prev pointer doesn't point to a Node",
"Freelist: Forward nodes != Backward nodes",
"Usedlist: Next pointer outside of expected range",
"Usedlist: Next pointer doesn't point to a Node",
"Usedlist: Prev pointer outside of expected range",
"Usedlist: Prev pointer doesn't point to a Node",
"Usedlist: Forward nodes != Backward nodes",
"Chunk number different from number of elements from both lists",
};
return _sMessages[code];
}
int verifyIntegrity() {
for (int i = 0; i < _binCount; i++) {
int nodeCount = 0;
int prevNodeCount = 0;
Node it = _freeListsHeads[i].second;
for (; *it.Next != -1; readNode(*it.Next, it)) {
if (*it.Next < _startingOffsets[i] || *it.Next >= _startingOffsets[i + 1]) {
return 1;
}
if ((*it.Next - _startingOffsets[i]) % _chunkSizes[i] != 0) {
return 2;
}
nodeCount++;
}
for (; *it.Prev != -1; readNode(*it.Prev, it)) {
if (*it.Prev != _freeListsHeads[i].first) {
if (*it.Prev < _startingOffsets[i] || *it.Prev >= _startingOffsets[i + 1]) {
return 3;
}
if ((*it.Prev - _startingOffsets[i]) % _chunkSizes[i] != 0) {
return 4;
}
}
prevNodeCount++;
}
if (prevNodeCount != nodeCount) {
return 5;
}
it = _usedListsHeads[i].second;
for (; *it.Next != -1; readNode(*it.Next, it)) {
if (*it.Next < _startingOffsets[i] || *it.Next >= _startingOffsets[i + 1]) {
return 6;
}
if ((*it.Next - _startingOffsets[i]) % _chunkSizes[i] != 0) {
return 7;
}
nodeCount++;
}
for (; *it.Prev != -1; readNode(*it.Prev, it)) {
if (*it.Prev != _usedListsHeads[i].first) {
if (*it.Prev < _startingOffsets[i] || *it.Prev >= _startingOffsets[i + 1]) {
return 8;
}
if ((*it.Prev - _startingOffsets[i]) % _chunkSizes[i] != 0) {
return 9;
}
}
prevNodeCount++;
}
if (prevNodeCount != nodeCount) {
return 10;
}
if (_chunkNumber[i] != nodeCount) {
return 11;
}
}
return 0;
}
template<typename TFunctor>
int8_t *lru(int size, TFunctor const &invalidator) {
int binLoc = binLocation(size);
if (binLoc != -1)
return lruGet<TFunctor>(binLoc, invalidator);
return nullptr;
}
int8_t *allocate(int size) {
int bin = binLocation(size);
if (bin != -1)
return allocatei(bin);
return nullptr;
}
void free(int8_t *ptr) {
int bin = binOf(ptr);
if (bin != -1) {
freei(bin, ptr);
}
}
AdaptiveFilecache(std::vector<std::pair<int, double>> const &chunkDescription) :
_bytes(chunkDescription.size() * (sizeof(int) * 6) + Size)
, _headOffset(0)
, _binCount(chunkDescription.size())
, _chunkSizes(chunkDescription.size())
, _weights(chunkDescription.size())
, _chunkNumber(chunkDescription.size())
, _startingOffsets(chunkDescription.size() + 1)
, _currentOffsets(chunkDescription.size() + 1, 0)
, _correctedWeigths(chunkDescription.size())
, _sizeWeigths(chunkDescription.size())
, _countWeigths(chunkDescription.size())
, _numberToSizeInterpolator(0.7)
, _totalSize(0.0)
, _totalNumber(0.0)
, _freeListsHeads(chunkDescription.size())
, _freeListsTails(chunkDescription.size())
, _usedListsHeads(chunkDescription.size())
, _usedListsSizes(chunkDescription.size(), 0)
, _lruMax(32)
{
for (int i = 0; i < _binCount; i++) {
_chunkSizes[i] = chunkDescription[i].first;
_weights[i] = chunkDescription[i].second;
}
_numberToSizeInterpolator = 0.7;
_headOffset = _bytes.size() - Size;
reorganize();
}
bool insertChunk(int size) {
auto binLoc = binLocation(size);
if (binLoc != -1) {
this->_countWeigths[binLoc] += 1.0;
this->_sizeWeigths[binLoc] += size;
_totalNumber += 1.0;
_totalSize += size;
// really insert here
return true;
}
return false;
}
void reorganize() {
int64_t sumAi = 0LL;
double sumWeights = 0.0;
double sumWeightsA = 0.0;
for (int i = 0; i < _binCount; i++) {
sumAi += _chunkSizes[i];
sumWeights += _weights[i];
sumWeightsA += _weights[i] * _chunkSizes[i];
}
assert(Size >= sumAi);
for (int i = 0; i < _binCount; i++) {
_chunkNumber[i] = 1 +
static_cast<int>(floor(_weights[i] / (sumWeights * _chunkSizes[i]) * (Size - sumAi)));
}
_startingOffsets[0] = _headOffset;
_currentOffsets[0] = _headOffset;
for (int i = 1; i <= _binCount; i++) {
_currentOffsets[i] = _startingOffsets[i] =
_startingOffsets[i - 1] + _chunkNumber[i - 1] * _chunkSizes[i - 1];
}
initNodes();
}
void correctWeights() {
for (int i = 0; i < _binCount; i++) {
_correctedWeigths[i] =
_countWeigths[i] / _totalNumber * _numberToSizeInterpolator +
_sizeWeigths[i] / _totalSize * (1.0 - _numberToSizeInterpolator);
_countWeigths[i] = 0.0;
_sizeWeigths[i] = 0.0;
}
_totalSize = 0;
_totalNumber = 0;
swap(_correctedWeigths, _weights);
}
inline int binLocation(int size) {
size += 3 * sizeof(int);
if (size < 0) {
return -1;
}
int binLoc = 0;
while (binLoc < _binCount && _chunkSizes[binLoc] < size) {
binLoc++;
}
if (binLoc != _binCount) {
return binLoc;
}
return -1;
}
double getLoad() {
return 1;
}
void print() {
using std::cout;
using std::endl;
using std::ios;
using std::mutex;
using std::lock_guard;
if (DebugLevel > 70) {
static mutex mtx;
lock_guard<mutex> lock(mtx);
cout << "Weights:\n";
for (int i = 0; i < _binCount; i++) {
cout << _weights[i] << endl;
}
cout << "Chunks:\n";
for (int i = 0; i < _binCount; i++) {
cout.precision(4);
cout.setf(ios::fixed);
cout << _chunkSizes[i] / 1024.0 << "kb x "
<< _chunkNumber[i] << " -> " << (_startingOffsets[i + 1] - _startingOffsets[i]) / 1024.0 / 1024.0 << "mb" <<
" offset: " << _startingOffsets[i] / 1024.0 / 1024.0 << "mb" << endl;
}
cout << _startingOffsets[_binCount] / 1024.0 / 1024.0 << "mb\n";
cout << "====================================\n";
}
}
std::vector<int8_t> &getBytes() {
return _bytes;
}
bool contains(int8_t *ptr) {
if (ptr < _bytes.data() ||
ptr >= _bytes.data() + _bytes.size()) {
return false;
}
return true;
}
};
template <int N>
class ThreadedAdaptiveFilecache {
int _threads;
std::vector<std::unique_ptr<AdaptiveFilecache<N>>> _caches;
std::vector<Spinlock> _locks;
public:
ThreadedAdaptiveFilecache(std::vector<std::pair<int, double>> const &chunkDescription, int threads)
: _threads(threads) {
for (int i = 0; i < threads; i++) {
_caches.push_back(
std::move(
std::unique_ptr<AdaptiveFilecache<N>>(new AdaptiveFilecache<N>(chunkDescription))));
_locks.push_back(Spinlock());
}
}
template<typename TFunctor>
int8_t *lru(int size, TFunctor const &invalidator) {
using namespace std;
using std::sort;
int minLoad = 0;
vector<pair<double, int>> load(_threads);
for (int i = 0; i < _threads; i++) {
load[i] = make_pair(_caches[minLoad]->getLoad(), i);
if (load[i].first < load[minLoad].first) {
minLoad = i;
}
}
sort(load.begin(), load.end(), [](
pair<double, int> const &left, pair<double, int> const &right) {
return left.first < right.first;
});
volatile int nullCount = 0;
volatile int i = 0;
std::vector<bool> lrud(_threads, false);
while (nullCount < _threads) {
volatile int index = load[i].second;
if (!lrud[index] && _locks[index].trylock()) {
volatile auto ptr = _caches[index]->lru(size, invalidator);
_locks[index].unlock();
lrud[index] = true;
if (ptr == nullptr) {
nullCount++;
}
else {
return ptr;
}
}
i = (i + 1) % _threads;
}
return nullptr;
}
void hit(int8_t * ptr) {
for (int i = 0; i < _threads; i++) {
if (_caches[i]->contains(ptr)) {
_locks[i].lock();
_caches[i]->hit(ptr);
_locks[i].unlock();
return;
}
}
}
std::vector<int> verifyIntegrity() {
int idx = 0;
std::vector<int> result;
std::vector<bool> verified(_threads, false);
while ((int)result.size() < _threads) {
if (!verified[idx] && _locks[idx].trylock()) {
result.push_back(_caches[idx]->verifyIntegrity());
_locks[idx].unlock();
verified[idx] = true;
}
idx = (idx + 1) % _threads;
}
return result;
}
int8_t *allocate(int size) {
volatile int nullCount = 0;
volatile int index = 0;
std::vector<bool> allocated(_threads, false);
while (nullCount < _threads) {
if (!allocated[index] && _locks[index].trylock()) {
volatile auto ptr = _caches[index]->allocate(size);
_locks[index].unlock();
allocated[index] = true;
if (ptr == nullptr) {
nullCount++;
}
else {
return ptr;
}
}
index = (index + 1) % _threads;
}
return nullptr;
}
void free(int8_t * ptr) {
for (int i = 0; i < _threads; i++) {
if (_caches[i]->contains(ptr)) {
_locks[i].lock();
_caches[i]->free(ptr);
_locks[i].unlock();
return;
}
}
}
};
using namespace std;
int main() {
FILE *out = fopen("hashuri.out", "w");
FILE *in = fopen("hashuri.in", "r");
int n;
fscanf(in, "%d", &n);
HT<bool> ht(n);
for (int i = 0; i < n; i++) {
unsigned long long int op, c;
fscanf(in, "%llu%llu", &op, &c);
if (op == 1) {
ht.insert(c, true);
}
else if (op == 2) {
ht.remove(c);
}
else if (op == 3) {
if (ht.contains(c))
fprintf(out, "1\n");
else
fprintf(out, "0\n");
}
}
return 0;
}