Farewell to Disks: Efficient Processing of Obstinate Data

61
1 1 Αποχαιρετισμός στους Δίσκους: Αποδοτική Επεξεργασία Περίπλοκων Δεδομένων Διομήδης Σπινέλλης Καθηγητής Τμήμα Διοικητικής Επιστήμης και Τεχνολογίας Οικονομικό Πανεπιστήμιο Αθηνών http://www.dmst.aueb.gr/dds

description

Professor Diomidis Spinellis gave a lecture on Farewell to Disks: Efficient Processing of Obstinate Data in the Distinguished Lecturer Series - Leon The Mathematician.

Transcript of Farewell to Disks: Efficient Processing of Obstinate Data

Page 1: Farewell to Disks: Efficient Processing of Obstinate Data

1

1

Αποχαιρετισμός στους Δίσκους:

Αποδοτική Επεξεργασία

Περίπλοκων Δεδομένων

Διομήδης Σπινέλλης

Καθηγητής

Τμήμα Διοικητικής Επιστήμης και Τεχνολογίας

Οικονομικό Πανεπιστήμιο Αθηνών

http://www.dmst.aueb.gr/dds

Page 2: Farewell to Disks: Efficient Processing of Obstinate Data

2

Page 3: Farewell to Disks: Efficient Processing of Obstinate Data

3

Page 4: Farewell to Disks: Efficient Processing of Obstinate Data

4

Page 5: Farewell to Disks: Efficient Processing of Obstinate Data

5

Page 6: Farewell to Disks: Efficient Processing of Obstinate Data

6

1

Page 7: Farewell to Disks: Efficient Processing of Obstinate Data

7

Page 8: Farewell to Disks: Efficient Processing of Obstinate Data

8

Page 9: Farewell to Disks: Efficient Processing of Obstinate Data

9

Page 10: Farewell to Disks: Efficient Processing of Obstinate Data

10

2

Page 11: Farewell to Disks: Efficient Processing of Obstinate Data

11

Page 12: Farewell to Disks: Efficient Processing of Obstinate Data

12

3

Page 13: Farewell to Disks: Efficient Processing of Obstinate Data

13

Page 14: Farewell to Disks: Efficient Processing of Obstinate Data

14

L1 D cache

1.3 ns

L2 cache

9.7 ns

DDR RAM

28.5 ns

Hard disk

25.6 ms

Wors

t case late

ncy (

Log

scale

)

Page 15: Farewell to Disks: Efficient Processing of Obstinate Data

15

Page 16: Farewell to Disks: Efficient Processing of Obstinate Data

16

Page 17: Farewell to Disks: Efficient Processing of Obstinate Data

17

Page 18: Farewell to Disks: Efficient Processing of Obstinate Data

18

1

Page 19: Farewell to Disks: Efficient Processing of Obstinate Data

19

Function call

1.3ns

System call

1.9μs

Local IPC

4.3μs

Remote IPC

1.2ms

Tim

e (

Lo

g s

ca

le)

select Locations.cc1, Divisions.name, avg(CO2), count(*), Locations.lat, Locations.long, POPDENSITY.DENSITY from Papers inner join Locations on Papers.confLocId = Locations.id inner join Divisions on Locations.cc1 = Divisions.country inner join POPDENSITY on Divisions.name = upper(POPDENSITY.name) where Divisions.code = '00' and CO2 notnull group by Locations.cc1 having count(*) > 20 order by avg(CO2) desc;

Page 20: Farewell to Disks: Efficient Processing of Obstinate Data

20

/* Get the data */ if (mcSet.dataLen) { data = xmalloc(mcSet.dataLen); if (lseek(fd, mcSet.data.off, SEEK_SET) == -1) CORRUPT(); if (read(fd, data, mcSet.dataLen) != mcSet.dataLen) CORRUPT(); if (lseek(fd, mcSet.u.firstMsg, SEEK_SET) == -1) CORRUPT(); for (i = 0; i < mcSet.numMsgs; ++i) { if (read(fd, &mcMsg, sizeof(mcMsg)) != sizeof(mcMsg)) CORRUPT(); if (mcMsg.invalid) { --i; continue; } msg = xmalloc(sizeof(msgT)); memset(msg, '\0', sizeof(*msg)); /* […] */ msg->msgId = mcMsg.msgId; msg->str = xstrdup((char *) (data + mcMsg.msg.off)); } free(data); }

2

Page 21: Farewell to Disks: Efficient Processing of Obstinate Data

21

MMAP(2) FreeBSD System Calls Manual MMAP(2) NAME mmap -- allocate memory, or map files or devices into memory SYNOPSIS #include <sys/mman.h> void * mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset); DESCRIPTION The mmap() system call causes the pages starting at addr and continuing for at most len bytes to be mapped from the object described by fd, starting at byte offset offset.

Page 22: Farewell to Disks: Efficient Processing of Obstinate Data

22

[dds@istlab /usr/src/sys/vm]$ ls default_pager.c uma_int.h vm_page.c device_pager.c vm.h vm_page.h memguard.c vm_contig.c vm_pageout.c memguard.h vm_extern.h vm_pageout.h phys_pager.c vm_fault.c vm_pager.c pmap.h vm_glue.c vm_pager.h redzone.c vm_init.c vm_param.h redzone.h vm_kern.c vm_phys.c sg_pager.c vm_kern.h vm_phys.h swap_pager.c vm_map.c vm_reserv.c swap_pager.h vm_map.h vm_reserv.h uma.h vm_meter.c vm_unix.c uma_core.c vm_mmap.c vm_zeroidle.c uma_dbg.c vm_object.c vnode_pager.c uma_dbg.h vm_object.h vnode_pager.h

Page 23: Farewell to Disks: Efficient Processing of Obstinate Data

23

3

Page 24: Farewell to Disks: Efficient Processing of Obstinate Data

24

$ ls -lh sparse -rw-r--r-- 1 dds dds 500G Mar 19 20:32 sparse $ du -h sparse 28K sparse

4 διεργασία 1 διεργασία 2

φυσική μνήμη

r/o r/o

Page 25: Farewell to Disks: Efficient Processing of Obstinate Data

25

r/w r/w

διεργασία 1 διεργασία 2

φυσική μνήμη

read read

διεργασία 1 διεργασία 2

φυσική μνήμη

read r/w

αντίγραφο

διεργασία 1 διεργασία 2

φυσική μνήμη

Page 26: Farewell to Disks: Efficient Processing of Obstinate Data

26

5

C++

Page 27: Farewell to Disks: Efficient Processing of Obstinate Data

27

e.g. 1

CC-BY 2.5 Claudio Rocchini

Page 28: Farewell to Disks: Efficient Processing of Obstinate Data

28

01110010011 0111101101101011 0000101101110011 00101 // romane 01110010011 0111101101101011 0000101101110011 1010101110011 // romanus 01110010011 0111101101101011 10101011011000111010101110011 // romulus 01110010011 10101011000100110 0101011 0111001110011 // rubens 01110010011 10101011000100110 0101011 10010 // ruber 01110010011 10101011000100110 100101100011011 0111101101110 // rubicon 01110010011 10101011000100110 100101100011011 1010101101110011001000111 010101110011 // rubicundus

Κατασκευή

δομής

δίσκου

Δομή δίσκου

Κατάλογος

άρθρων

Δομή

μνήμης

Κατασκευή

δένδρου

ριζών

Αρχική

ιστοσελίδα

Ιστοσελίδα

με νέους

δεσμούς

wikipedialize

for (;;) { i = bitpos; // Loop until the end of the current node or the end of the word while (i < p->end && i < len * 8) { // Covering whole byte? if (i % 8 == 0 && i + 8 <= p->end && (i + 8) / 8 <= len && data[i / 8] == p->data[i / 8]) { i += 8; continue; } // Split point if (getbit(data, i) != getbit(p->data, i)) { // Node with the new data struct pnode *n = new_node(data + i / 8, i % 8, (len - i / 8) * 8, NULL, NULL, true); // Tail of the current node struct pnode *t = new_node(p->data + i / 8, i % 8, p->end - (i & ~7), p->zero, p->one, p->is_terminal); // Head of current node if (getbit(data, i)) *p2 = new_node(p->data, bitpos, i, t, n, false); else *p2 = new_node(p->data, bitpos, i, n, t, false); free(p); return; } i++; } // while

Page 29: Farewell to Disks: Efficient Processing of Obstinate Data

29

// Write the given node to the specified file, returning its file offset. // On return the file's offset is set to the first free byte. static long write_node(struct pnode *p, FILE *f) { long my_offset = ftell(f); size_t ret; if (p->one) { struct pnode_disk_one pdo; size_t dlen = datalen(p->end); long len = sizeof(pdo) + dlen; fseek(f, len, SEEK_CUR); pdo.h.type = dt_one; pdo.h.is_terminal = p->is_terminal; pdo.h.has_zero = (p->zero != NULL); pdo.h.has_one = true; pdo.h.begin = p->begin; pdo.h.end = p->end; if (p->zero) write_node(p->zero, f); pdo.one = write_node(p->one, f); long saved_offset = ftell(f); fseek(f, my_offset, SEEK_SET); fwrite(&pdo, 1, sizeof(pdo), f); fwrite(p->data, 1, dlen, f); fseek(f, saved_offset, SEEK_SET); return my_offset; } else {

$ zcat enwiki-latest-all-titles-in-ns0.gz | wc -c 106,237,053 $ wc -c enwiki.pt 144,657,286 enwiki.pt

Page 30: Farewell to Disks: Efficient Processing of Obstinate Data

30

$ curl http://www.kiosek.com/dostoevsky/library/crimeandpunishment.txt | perl -pe 's/[\r\n]/ /g' >crimeandpunishment.txt

Page 31: Farewell to Disks: Efficient Processing of Obstinate Data

31

$ wc crimeandpunishment.txt 0 203,273 1,462,661 crimeandpunishment.txt

$ time ./wpltest en en_US.UTF-8 ISO-8859-1 data/enwiki.pt <crimeandpunishment.txt >/dev/null

$ time ./wpltest en en_US.UTF-8 ISO-8859-1 data/enwiki.pt <crimeandpunishment.txt >/dev/null Checked 406,225 prefixes real 0m5.859s # Cold cache real 0m1.876s # Warm cache user 0m1.780s sys 0m0.090s

Page 32: Farewell to Disks: Efficient Processing of Obstinate Data

32

// Prevent memory alignment problems

memcpy(&end, &(p.h->end), sizeof(end)); while (i < end && i < len * 8) { if (i % 8 == 0 && i + 8 <= end && data[i / 8] == pdata[i / 8]) {

i += 8; prefix += 8;

continue; }

// Split point if (getbit(data, i) != getbit(pdata, i))

return best / 8; i++;

prefix++; }

if (i == end && p.h->is_terminal) best = prefix;

if (i == len * 8) return best / 8; // Move to next node

bitpos = end % 8; int covered = end / 8;

if (getbit(data, end)) { if (!p.h->has_one)

return best / 8; switch (p.h->type) {

case dt_both: p.h = (struct pnode_disk_head *)(base + p.b->one);

break; case dt_one:

p.h = (struct pnode_disk_head *)(base + p.o->one); break;

case dt_short: default: assert(0);

} } else {

if (!p.h->has_zero) return best / 8;

switch (p.h->type) { case dt_both:

p.h = (struct pnode_disk_head *)(base + p.b->zero); break;

case dt_one: // Advance to the end of this node

p.h = (struct pnode_disk_head *)((char *)p.h + sizeof(struct pnode_disk_one) + datalen(end)); break;

case dt_short: // Advance to the end of this node p.h = (struct pnode_disk_head *)((char *)p.h + sizeof(struct pnode_disk_short) + datalen(end));

break; default:

assert(0); }

}

/*

* You are not expected

* to understand this

*/

Page 33: Farewell to Disks: Efficient Processing of Obstinate Data

33

e.g. 2

The problem with wikipedia

Page 34: Farewell to Disks: Efficient Processing of Obstinate Data

34

Λίστα ακμών Δομή

δεδομένων

γράφου

Κατασκευή

γράφου

Κορυφές

αρχής, τέλους Διαδρομή

BFS

Δομή

δεδομένων

γράφου

Λίστα ακμών

Κατασκευή

γράφου

Κορυφές

αρχής, τέλους Διαδρομή

BFS

Δομή

δεδομένων

γράφου

Page 35: Farewell to Disks: Efficient Processing of Obstinate Data

35

Λίστα ακμών Δομή

δεδομένων

γράφου

Κατασκευή

γράφου

// Loop through all lines, // adding them to the graph while (std::getline(in, line)) { int split = line.find('\001'); if (split == std::string::npos) { std::cerr << "No separator: " << line << std::endl; continue; } n.setName(line.substr(0, split)); NodesIter from(entries->insert(n).first); n.setName(line.substr(split + 1)); NodesIter to(entries->insert(n).first); (const_cast<Node &>(*from)).addEdge( const_cast<Node *>(&*to)); }

Λίστα ακμών

Κατασκευή

γράφου

Κορυφές

αρχής, τέλους Διαδρομή

BFS

Δομή

δεδομένων

γράφου

Page 36: Farewell to Disks: Efficient Processing of Obstinate Data

36

Διαδρομή

BFS

Δομή

δεδομένων

γράφου

Tacoma Narrows Bridge

p=

Suspension bridge

p=

Washington

p=

Geneva

p=

William Howard Taft

p=

Montana

p=

Ουρά

[]=

Tacoma Narrows Bridge

p=

Suspension bridge

p=

Washington

p=

Geneva

p=

William Howard Taft

p=

Montana

p=

Ουρά

[]=Tacoma Narrow Bridge

Page 37: Farewell to Disks: Efficient Processing of Obstinate Data

37

Tacoma Narrows Bridge

p=

Suspension bridge

p=

Washington

p=

Geneva

p=

William Howard Taft

p=

Montana

p=

Ουρά

[]=Tacoma Narrow Bridge

Tacoma Narrows Bridge

p=

Suspension bridge

p=

Washington

p=

Geneva

p=

William Howard Taft

p=

Montana

p=

Ουρά

[]=

Tacoma Narrows Bridge

p=

Suspension bridge

p=Tacoma Narrows Bridge

Washington

p=

Geneva

p=

William Howard Taft

p=

Montana

p=

Ουρά

[]= Suspension bridge

Page 38: Farewell to Disks: Efficient Processing of Obstinate Data

38

Suspension bridge

p=Tacoma Narrows Bridge

Washington

p=Tacoma Narrows Bridge

Geneva

p=

William Howard Taft

p=

Montana

p=

Ουρά

[]= Suspension bridge

Washington

Tacoma Narrows Bridge

p=

Tacoma Narrows Bridge

p=

Suspension bridge

p=Tacoma Narrows Bridge

Washington

p=Tacoma Narrows Bridge

Geneva

p=

William Howard Taft

p=

Montana

p=

Ουρά

[]= Suspension bridge

Washington

Suspension bridge

p=Tacoma Narrows Bridge

Washington

p=Tacoma Narrows Bridge

Geneva

p=Suspension bridge

William Howard Taft

p=

Montana

p=

Ουρά

[]= Washington

Geneva

Tacoma Narrows Bridge

p=

Page 39: Farewell to Disks: Efficient Processing of Obstinate Data

39

Washington

p=Tacoma Narrows Bridge

Geneva

p=Suspension bridge

William Howard Taft

p=

Montana

p=Washington

Ουρά

[]= Geneva

Montana

Tacoma Narrows Bridge

p=

Suspension bridge

p=Tacoma Narrows Bridge

Washington

p=Tacoma Narrows Bridge

Geneva

p=Suspension bridge

William Howard Taft

p=

Montana

p=Washington

Ουρά

[]=Montana

Tacoma Narrows Bridge

p=

Suspension bridge

p=Tacoma Narrows Bridge

Washington

p=Tacoma Narrows Bridge

Geneva

p=Suspension bridge

William Howard Taft

p=Geneva

Montana

p=Washington

Ουρά

[]=Montana

Tacoma Narrows Bridge

p=

Suspension bridge

p=Tacoma Narrows Bridge

Page 40: Farewell to Disks: Efficient Processing of Obstinate Data

40

static bool breadthFirstSearchFor(NodePtr from, NodePtr to,

size_t n) {

std::queue<NodePtr> q;

from->setColor(Node::Gray);

q.push(from);

while (!q.empty()) {

NodePtr u = q.front();

q.pop();

const Edges edges = u->getEdges();

for (Edges::const_iterator j = edges.begin();

j != edges.end(); j++)

if ((*j)->getColor() == Node::White) {

(*j)->setColor(Node::Gray);

(*j)->setPredecessor(u);

if (*j == to)

return true; // Found

q.push(*j);

}

u->setColor(Node::Black);

}

return false; // Not found

}

Λίστα ακμών

Κατασκευή

γράφου

Κορυφές

αρχής, τέλους Διαδρομή

BFS

Δομή

δεδομένων

γράφου

Δομή

δεδομένων

γράφου

Page 41: Farewell to Disks: Efficient Processing of Obstinate Data

41

Page 42: Farewell to Disks: Efficient Processing of Obstinate Data

42

Δομή

δεδομένων

γράφου

#include <string> #include <iostream> #include <queue> #include <list> #include <functional> #include <boost/interprocess/managed_mapped_file.hpp> #include <boost/interprocess/offset_ptr.hpp> #include <boost/interprocess/allocators/allocator.hpp> #include <boost/unordered_set.hpp> #include <boost/interprocess/containers/string.hpp> #include <boost/interprocess/containers/slist.hpp> #include <boost/filesystem.hpp> #include <boost/filesystem/operations.hpp>

Page 43: Farewell to Disks: Efficient Processing of Obstinate Data

43

#include <string> #include <iostream> #include <queue> #include <list> #include <functional> #include <boost/interprocess/managed_mapped_file.hpp> #include <boost/interprocess/offset_ptr.hpp> #include <boost/interprocess/allocators/allocator.hpp> #include <boost/unordered_set.hpp> #include <boost/interprocess/containers/string.hpp> #include <boost/interprocess/containers/slist.hpp> #include <boost/filesystem.hpp> #include <boost/filesystem/operations.hpp>

typedef managed_mapped_file::segment_manager SegmentManager; typedef allocator<char, SegmentManager> CharAllocator; typedef basic_string<char, std::char_traits<char>, CharAllocator> CharString; typedef allocator<Node, SegmentManager> NodeAllocator; typedef boost::unordered_set<Node, boost::hash<Node>, NodeEqual, NodeAllocator> Nodes; typedef offset_ptr<Node> NodePtr; typedef allocator<NodePtr, SegmentManager> NodePtrAllocator; typedef slist<NodePtr, NodePtrAllocator> Edges; typedef allocator<void, SegmentManager> VoidAllocator; typedef allocator<Edges, SegmentManager> EdgesAllocator;

// A graph node, suitable for performing a breadh-first search class Node { public: typedef enum {White, Gray, Black} Color; private: CharString name; // Node name Color color; // Color used during BFS NodePtr predecessor; // BFS predecessor Edges edges; // Node's edges public: // Since VoidAllocator is convertible to any other // allocator<T>, we can simplify the initialization // taking just one allocator for all inner containers. Node(const std::string &n, const VoidAllocator &voidAlloc) : name(n.begin(), n.end(), voidAlloc), color(White), predecessor(NULL), edges(voidAlloc) {} void addEdge(NodePtr p) { edges.push_front(p); } };

Page 44: Farewell to Disks: Efficient Processing of Obstinate Data

44

/* * Read ^A-separated nodes from the inputFile, storing the graph * structure in the specified backingFile. */ static void readData(const char *backingFile, const char *inputFile) { std::ifstream in(inputFile, std::ios::binary); if (in.fail()) { perror(inputFile); exit(1); } boost::filesystem::remove_all(backingFile); managed_mapped_file segment(create_only, backingFile, FileSize); // An allocator convertible to any allocator<T, SegmentManager> type VoidAllocator allocInst (segment.get_segment_manager()); // Construct the memory map and fill it Nodes *entries = segment.construct<Nodes>("entries")(Elements, boost::hash<Node>(), NodeEqual(), allocInst); std::string line; Node n(std::string(), allocInst); // To save construction costs

/* * Search and report the shortest graph path from "from" to "to" * The graph is stored in backingFile. */ static void searchData(const char *backingFile, const std::string &from, const std::string &to) { managed_mapped_file segment(open_copy_on_write, backingFile); // An allocator convertible to any allocator<T, SegmentManager> VoidAllocator allocInst(segment.get_segment_manager()); // Obtain the previously saved entries Nodes *entries = segment.find<Nodes>("entries").first; NodePtr toPtr; bool found = breadthFirstSearchFor( findNode(entries, Node(from, allocInst)), toPtr = findNode(entries, Node(to, allocInst)), entries->size());

Page 45: Farewell to Disks: Efficient Processing of Obstinate Data

45

Λίστα ακμών

Κατασκευή

γράφου

Κορυφές

αρχής, τέλους Διαδρομή

BFS

Δομή

δεδομένων

γράφου

Page 46: Farewell to Disks: Efficient Processing of Obstinate Data

46

$ ./smap -r graph.bin graph.txt

$ ./smap -s graph.bin 'Tacoma Narrows Bridge'\ 'William howard taft' 0% 10 20 30 40 50 60 70 80 90 100% |----|----|----|----|----|----|----|----|----|----| *

Tacoma Narrows Bridge Washington Montana William howard taft

$ ./smap -s graph.bin 'Tacoma Narrows Bridge'\ '24-hour analog dial' 0% 10 20 30 40 50 60 70 80 90 100% |----|----|----|----|----|----|----|----|----|----| **

Tacoma Narrows Bridge Suspension bridge Geneva Watch 24-hour analog dial

Page 47: Farewell to Disks: Efficient Processing of Obstinate Data

47

$ ./smap -s graph.bin 'Tacoma Narrows Bridge' 'Wet t-shirt contest' 0% 10 20 30 40 50 60 70 80 90 100% |----|----|----|----|----|----|----|----|----|----| *

Tacoma Narrows Bridge Washington Starbucks Toplessness Wet t-shirt contest

The problem with wikipedia

Page 48: Farewell to Disks: Efficient Processing of Obstinate Data

48

Performance

MySQL mmap

Server 15:59:43

Client system 03:16:59 00:04:32

Client user 00:52:48 00:04:52

00:00

06:00

12:00

18:00

Χρ

όνο

ς(ω

:λ)

Κατασκευή δομής δεδομένων

MySQL mmap

Waiting 348 3.886

Server 259

Client system 58 19

Client user 16 2

0500

1.0001.5002.0002.5003.0003.5004.0004.500

Χρ

όνο

ς /

κό

μβ

ο (

μs)

Taft: Κρύα κρυφή μνήμη

Page 49: Farewell to Disks: Efficient Processing of Obstinate Data

49

MySQL mmap

Waiting 23 0

Server 305

Client system 59 5

Client user 15 3

050

100150200250300350400450

Χρ

όνο

ς/

κό

μβ

ο (

μs)

Taft: Ζεστή κρυφή μνήμη

MySQL mmap

Waiting 415 1.977

Server 472

Client system 103 10

Client user 26 4

0

500

1.000

1.500

2.000

2.500

Χρ

όνο

ς /

κό

μβ

ο (

μs)

24h Clock: Κρύα κρυφή μνήμη

MySQL mmap

Waiting 120 0

Server 469

Client system 103 3

Client user 27 4

0100200300400500600700800

Χρ

όνο

ς /

κό

μβ

ο (

μs)

24h Clock: Ζεστή κρυφή μνήμη

Page 50: Farewell to Disks: Efficient Processing of Obstinate Data

50

0

1

2

3

4

5

0 2000 4000 6000 8000

Χρ

όνο

ς(ρ

) /

κό

μβ

ο(m

s)

Αριθμός κόμβων

Χιλιάδες

Κλιμάκωση απόδοσης (κρύα μνήμη)

mmap

MySQL

Page 51: Farewell to Disks: Efficient Processing of Obstinate Data

51

ACID

A

Page 52: Farewell to Disks: Efficient Processing of Obstinate Data

52

C

I

D

Page 53: Farewell to Disks: Efficient Processing of Obstinate Data

53

SQL

Page 54: Farewell to Disks: Efficient Processing of Obstinate Data

54

A case…

Application code

vector<Customer> customers1;

Customer c1(d1,cd1,s1,p1);

customers1.push_back(c1);

vector<Truck> trucks;

Truck t1(cs1,dc1,pc1,rlp1, customers1);

trucks.push_back(t1);

….

ODBC

JDBC

Page 55: Farewell to Disks: Efficient Processing of Obstinate Data

55

register

L1 D cache

L2 cache

DRAM

HDD cache

HDD / SSD

L3 cache

Page 56: Farewell to Disks: Efficient Processing of Obstinate Data

56

534,681,000 εντολές ΚΜΕ

1

10

100

1,000

10,000

100,000

L1 D cache L2 cache DDR RAM Hard disk

Μέγ

ιστη

διε

κπ

ερα

ιωτι

κότη

τα

(MB

/s )

L1 D cache1.3 ns

L2 cache9.7 ns

DDR RAM28.5 ns

Hard disk25.6 ms

Χεί

ρισ

τη α

να

μονή (

λογ. κλιμ

.)

Page 57: Farewell to Disks: Efficient Processing of Obstinate Data

57

L1 D cache

1.3 ns

L2 cache

9.7 ns

DDR RAM

28.5 ns

Hard disk

25.6 ms

Χεί

ρισ

τη α

να

μονή (

λογ. κλιμ

.)

Page 58: Farewell to Disks: Efficient Processing of Obstinate Data

58

// Write the given node to the specified file, returning its file offset. // On return the file's offset is set to the first free byte. static long write_node(struct pnode *p, FILE *f) { long my_offset = ftell(f); size_t ret; if (p->one) { struct pnode_disk_one pdo; size_t dlen = datalen(p->end); long len = sizeof(pdo) + dlen; fseek(f, len, SEEK_CUR); pdo.h.type = dt_one; pdo.h.is_terminal = p->is_terminal; pdo.h.has_zero = (p->zero != NULL); pdo.h.has_one = true; pdo.h.begin = p->begin; pdo.h.end = p->end; if (p->zero) write_node(p->zero, f); pdo.one = write_node(p->one, f); long saved_offset = ftell(f); fseek(f, my_offset, SEEK_SET); fwrite(&pdo, 1, sizeof(pdo), f); fwrite(p->data, 1, dlen, f); fseek(f, saved_offset, SEEK_SET); return my_offset; } else {

Page 59: Farewell to Disks: Efficient Processing of Obstinate Data

59

#include <boost/interprocess/managed_mapped_file.hpp> #include <boost/interprocess/offset_ptr.hpp> #include <boost/interprocess/allocators/allocator.hpp> #include <boost/unordered_set.hpp> #include <boost/interprocess/containers/string.hpp> #include <boost/interprocess/containers/slist.hpp>

w r/ο

βήμα 1 βήμα Ν

φυσική μνήμη

read r/w

αντίγραφο

διεργασία 1 διεργασία 2

φυσική μνήμη

Page 60: Farewell to Disks: Efficient Processing of Obstinate Data

60

www.spinellis.gr

twitter.com/CoolSWEng

[email protected]

Page 61: Farewell to Disks: Efficient Processing of Obstinate Data

61

www.spinellis.gr/wpl

www.spinellis.gr/blog/20101030/smap.cpp