allura
Revisão | 0966df4efa0db839ec8bc6b9ef9028bf8e264615 (tree) |
---|---|
Hora | 2011-10-06 02:14:55 |
Autor | Rick Copeland <rcopeland@geek...> |
Commiter | Rick Copeland |
[#2020] Integrating new repo refresh code with existing repo models
Signed-off-by: Rick Copeland <rcopeland@geek.net>
@@ -2,43 +2,47 @@ import re | ||
2 | 2 | import logging |
3 | 3 | from itertools import groupby |
4 | 4 | from cPickle import dumps, loads |
5 | -from datetime import datetime | |
6 | 5 | from collections import defaultdict |
7 | 6 | |
8 | 7 | import bson |
9 | 8 | import pymongo |
10 | -from pylons import c, g | |
9 | +from pylons import c | |
11 | 10 | |
12 | -import ming | |
11 | +from ming import collection, Field, Index | |
13 | 12 | from ming import schema as S |
14 | 13 | from ming.utils import LazyProperty |
15 | -from ming.orm import session | |
16 | -from ming.orm import FieldProperty, ForeignIdProperty, RelationProperty | |
17 | -from ming.orm.declarative import MappedClass | |
14 | +from ming.orm import session, mapper | |
15 | +from ming.orm import ForeignIdProperty, RelationProperty | |
18 | 16 | |
19 | 17 | from allura.lib import helpers as h |
20 | 18 | |
21 | -from .session import main_orm_session | |
19 | +from .session import main_doc_session, main_orm_session | |
22 | 20 | |
23 | 21 | log = logging.getLogger(__name__) |
24 | 22 | |
25 | -class ArtifactReference(MappedClass): | |
26 | - '''ArtifactReference manages the artifact graph. | |
27 | - | |
28 | - fields are all strs, corresponding to Solr index_ids | |
29 | - ''' | |
30 | - class __mongometa__: | |
31 | - session = main_orm_session | |
32 | - name = 'artifact_reference' | |
33 | - indexes = [ 'references' ] | |
34 | - | |
35 | - _id = FieldProperty(str) | |
36 | - artifact_reference = FieldProperty(S.Object(dict( | |
37 | - cls=S.Binary, | |
38 | - project_id=S.ObjectId, | |
39 | - app_config_id=S.ObjectId, | |
40 | - artifact_id=S.Anything(if_missing=None)))) | |
41 | - references = FieldProperty([str]) | |
23 | +# Collection definitions | |
24 | +ArtifactReferenceDoc = collection( | |
25 | + 'artifact_reference', main_doc_session, | |
26 | + Field('_id', str), | |
27 | + Field('artifact_reference', dict( | |
28 | + cls=S.Binary(), | |
29 | + project_id=S.ObjectId(), | |
30 | + app_config_id=S.ObjectId(), | |
31 | + artifact_id=S.Anything(if_missing=None))), | |
32 | + Field('references', [str], index=True)) | |
33 | + | |
34 | +ShortlinkDoc = collection( | |
35 | + 'shortlink', main_doc_session, | |
36 | + Field('_id', S.ObjectId()), | |
37 | + Field('ref_id', str, index=True), | |
38 | + Field('project_id', S.ObjectId()), | |
39 | + Field('app_config_id', S.ObjectId()), | |
40 | + Field('link', str), | |
41 | + Field('url', str), | |
42 | + Index('link, project_id', 'app_config_id')) | |
43 | + | |
44 | +# Class definitions | |
45 | +class ArtifactReference(object): | |
42 | 46 | |
43 | 47 | @classmethod |
44 | 48 | def from_artifact(cls, artifact): |
@@ -71,28 +75,8 @@ class ArtifactReference(MappedClass): | ||
71 | 75 | log.exception('Error loading artifact for %s: %r', |
72 | 76 | self._id, aref) |
73 | 77 | |
74 | -class Shortlink(MappedClass): | |
78 | +class Shortlink(object): | |
75 | 79 | '''Collection mapping shorthand_ids for artifacts to ArtifactReferences''' |
76 | - class __mongometa__: | |
77 | - session = main_orm_session | |
78 | - name = 'shortlink' | |
79 | - indexes = [ | |
80 | - ('link', 'project_id', 'app_config_id'), | |
81 | - ('ref_id',), | |
82 | - ] | |
83 | - | |
84 | - # Stored properties | |
85 | - _id = FieldProperty(S.ObjectId) | |
86 | - ref_id = ForeignIdProperty(ArtifactReference) | |
87 | - project_id = ForeignIdProperty('Project') | |
88 | - app_config_id = ForeignIdProperty('AppConfig') | |
89 | - link = FieldProperty(str) | |
90 | - url = FieldProperty(str) | |
91 | - | |
92 | - # Relation Properties | |
93 | - project = RelationProperty('Project') | |
94 | - app_config = RelationProperty('AppConfig') | |
95 | - ref = RelationProperty('ArtifactReference') | |
96 | 80 | |
97 | 81 | # Regexes used to find shortlinks |
98 | 82 | _core_re = r'''(\[ |
@@ -202,3 +186,12 @@ class Shortlink(MappedClass): | ||
202 | 186 | else: |
203 | 187 | return None |
204 | 188 | |
189 | +# Mapper definitions | |
190 | +mapper(ArtifactReference, ArtifactReferenceDoc, main_orm_session) | |
191 | +mapper(Shortlink, ShortlinkDoc, main_orm_session, properties=dict( | |
192 | + ref_id = ForeignIdProperty(ArtifactReference), | |
193 | + project_id = ForeignIdProperty('Project'), | |
194 | + app_config_id = ForeignIdProperty('AppConfig'), | |
195 | + project = RelationProperty('Project'), | |
196 | + app_config = RelationProperty('AppConfig'), | |
197 | + ref = RelationProperty(ArtifactReference))) |
@@ -1,77 +1,59 @@ | ||
1 | +import re | |
2 | +import sys | |
3 | +import logging | |
4 | +from hashlib import sha1 | |
5 | +from itertools import izip, chain | |
1 | 6 | from datetime import datetime |
7 | +from collections import defaultdict | |
2 | 8 | |
3 | -from ming import Document, Field | |
9 | +from pylons import g | |
10 | + | |
11 | +from ming import Field, Index, collection | |
4 | 12 | from ming import schema as S |
13 | +from ming.utils import LazyProperty | |
14 | +from ming.orm import mapper | |
5 | 15 | |
6 | -from .session import main_doc_session, project_doc_session | |
16 | +from allura.lib import utils | |
17 | +from allura.lib import helpers as h | |
7 | 18 | |
8 | -class Commit(Document): | |
9 | - class __mongometa__: | |
10 | - name = 'repo_ci' | |
11 | - session = main_doc_session | |
12 | - indexes = [ | |
13 | - ('parent_ids',), | |
14 | - ('child_ids',), | |
15 | - ('repo_ids',)] | |
16 | - User = dict(name=str, email=str, date=datetime) | |
17 | - | |
18 | - _id = Field(str) | |
19 | - tree_id = Field(str) | |
20 | - committed = Field(User) | |
21 | - authored = Field(User) | |
22 | - message = Field(str) | |
23 | - parent_ids = Field([str]) | |
24 | - child_ids = Field([str]) | |
25 | - repo_ids = Field([S.ObjectId()]) | |
19 | +from .auth import User | |
20 | +from .session import main_doc_session, project_doc_session | |
21 | +from .session import repository_orm_session | |
26 | 22 | |
27 | - def __repr__(self): | |
28 | - return '%s %s' % ( | |
29 | - self._id[:7], self.summary) | |
23 | +log = logging.getLogger(__name__) | |
30 | 24 | |
31 | - @property | |
32 | - def summary(self): | |
33 | - if self.message: | |
34 | - summary = [] | |
35 | - for line in self.message.splitlines(): | |
36 | - line = line.rstrip() | |
37 | - if line: summary.append(line) | |
38 | - else: return ' '.join(summary) | |
39 | - return ' '.join(summary) | |
40 | - return '' | |
25 | +SUser = dict(name=str, email=str, date=datetime) | |
26 | +SObjType=S.OneOf('blob', 'tree', 'submodule') | |
27 | +QSIZE = 100 | |
28 | +README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE) | |
41 | 29 | |
42 | - def url(self): | |
43 | - return '' | |
30 | +# Basic commit information | |
31 | +CommitDoc = collection( | |
32 | + 'repo_ci', main_doc_session, | |
33 | + Field('_id', str), | |
34 | + Field('tree_id', str), | |
35 | + Field('committed', SUser), | |
36 | + Field('authored', SUser), | |
37 | + Field('message', str), | |
38 | + Field('parent_ids', [str], index=True), | |
39 | + Field('child_ids', [str], index=True), | |
40 | + Field('repo_ids', [ S.ObjectId() ], index=True)) | |
44 | 41 | |
45 | - def shorthand_id(self): | |
46 | - return '' | |
42 | +# Basic tree information | |
43 | +TreeDoc = collection( | |
44 | + 'repo_tree', main_doc_session, | |
45 | + Field('_id', str), | |
46 | + Field('tree_ids', [dict(name=str, id=str)]), | |
47 | + Field('blob_ids', [dict(name=str, id=str)]), | |
48 | + Field('other_ids', [dict(name=str, id=str, type=SObjType)])) | |
47 | 49 | |
48 | - @property | |
49 | - def author_url(self): | |
50 | - return '' | |
51 | - | |
52 | -class Tree(Document): | |
53 | - class __mongometa__: | |
54 | - name = 'repo_tree' | |
55 | - session = main_doc_session | |
56 | - ObjType=S.OneOf('blob', 'tree', 'submodule') | |
57 | - | |
58 | - _id = Field(str) | |
59 | - tree_ids = Field([dict(name=str, id=str)]) | |
60 | - blob_ids = Field([dict(name=str, id=str)]) | |
61 | - other_ids = Field([dict(name=str, id=str, type=ObjType)]) | |
62 | - | |
63 | -class LastCommit(Document): | |
64 | - class __mongometa__: | |
65 | - name = 'repo_last_commit' | |
66 | - session = project_doc_session | |
67 | - indexes = [ | |
68 | - ( 'repo_id', 'object_id'), | |
69 | - ] | |
70 | - | |
71 | - _id = Field(str) | |
72 | - repo_id=Field(S.ObjectId()) | |
73 | - object_id=Field(str) | |
74 | - commit_info = Field(dict( | |
50 | +# Information about the last commit to touch a tree/blob | |
51 | +LastCommitDoc = collection( | |
52 | + 'repo_last_commit', project_doc_session, | |
53 | + Field('_id', str), | |
54 | + Field('repo_id', S.ObjectId()), | |
55 | + Field('object_id', str), | |
56 | + Field('commit_info', dict( | |
75 | 57 | id=str, |
76 | 58 | date=datetime, |
77 | 59 | author=str, |
@@ -79,58 +61,298 @@ class LastCommit(Document): | ||
79 | 61 | author_url=str, |
80 | 62 | href=str, |
81 | 63 | shortlink=str, |
82 | - summary=str)) | |
83 | - | |
84 | - @classmethod | |
85 | - def set_last_commit(cls, repo_id, oid, commit): | |
86 | - lc = cls(dict( | |
87 | - _id='%s:%s' % (repo_id, oid), | |
88 | - repo_id=repo_id, | |
89 | - object_id=oid, | |
90 | - commit_info=dict( | |
91 | - id=commit._id, | |
92 | - author=commit.authored.name, | |
93 | - author_email=commit.authored.email, | |
94 | - author_url=commit.author_url, | |
95 | - date=commit.authored.date, | |
96 | - href=commit.url(), | |
97 | - shortlink=commit.shorthand_id(), | |
98 | - summary=commit.summary))) | |
99 | - lc.m.save(safe=False) | |
100 | - return lc | |
101 | - | |
102 | -class Trees(Document): | |
103 | - class __mongometa__: | |
104 | - name = 'repo_trees' | |
105 | - session = main_doc_session | |
106 | - | |
107 | - _id = Field(str) # commit ID | |
108 | - tree_ids = Field([str]) # tree IDs | |
109 | - | |
110 | -class DiffInfo(Document): | |
111 | - class __mongometa__: | |
112 | - name = 'repo_diffinfo' | |
113 | - session = main_doc_session | |
114 | - | |
115 | - _id = Field(str) | |
116 | - differences = Field([dict(name=str, lhs_id=str, rhs_id=str)]) | |
117 | - | |
118 | -class BasicBlock(Document): | |
119 | - class __mongometa__: | |
120 | - name = 'repo_basic_block' | |
121 | - session = main_doc_session | |
122 | - indexes = [ | |
123 | - ('commit_ids',) ] | |
124 | - | |
125 | - _id = Field(str) | |
126 | - parent_commit_ids = Field([str]) | |
127 | - commit_ids = Field([str]) | |
128 | - commit_times = Field([datetime]) | |
64 | + summary=str)), | |
65 | + Index('repo_id', 'object_id')) | |
66 | + | |
67 | +# List of all trees contained within a commit | |
68 | +TreesDoc = collection( | |
69 | + 'repo_trees', main_doc_session, | |
70 | + Field('_id', str), | |
71 | + Field('tree_ids', [str])) | |
72 | + | |
73 | +# Information about which things were added/removed in commit | |
74 | +DiffInfoDoc = collection( | |
75 | + 'repo_diffinfo', main_doc_session, | |
76 | + Field('_id', str), | |
77 | + Field( | |
78 | + 'differences', | |
79 | + [ dict(name=str, lhs_id=str, rhs_id=str)])) | |
80 | + | |
81 | +# List of commit runs (a run is a linear series of single-parent commits) | |
82 | +CommitRunDoc = collection( | |
83 | + 'repo_commitrun', main_doc_session, | |
84 | + Field('_id', str), | |
85 | + Field('parent_commit_ids', [str]), | |
86 | + Field('commit_ids', [str], index=True), | |
87 | + Field('commit_times', [datetime])) | |
88 | + | |
89 | +class RepoObject(object): | |
129 | 90 | |
130 | 91 | def __repr__(self): |
131 | - return '%s: (P %s, T %s..%s (%d commits))' % ( | |
132 | - self._id[:6], | |
133 | - [ oid[:6] for oid in self.parent_commit_ids ], | |
134 | - self.commit_ids[0][:6], | |
135 | - self.commit_ids[-1][:6], | |
136 | - len(self.commit_ids)) | |
92 | + return '<%s %s>' % ( | |
93 | + self.__class__.__name__, self._id) | |
94 | + | |
95 | + def primary(self): | |
96 | + return self | |
97 | + | |
98 | + def index_id(self): | |
99 | + '''Globally unique artifact identifier. Used for | |
100 | + SOLR ID, shortlinks, and maybe elsewhere | |
101 | + ''' | |
102 | + id = '%s.%s#%s' % ( | |
103 | + self.__class__.__module__, | |
104 | + self.__class__.__name__, | |
105 | + self._id) | |
106 | + return id.replace('.', '/') | |
107 | + | |
108 | +class Commit(RepoObject): | |
109 | + # Ephemeral attrs | |
110 | + repo=None | |
111 | + | |
112 | + @LazyProperty | |
113 | + def author_url(self): | |
114 | + u = User.by_email_address(self.authored.email) | |
115 | + if u: return u.url() | |
116 | + | |
117 | + @LazyProperty | |
118 | + def committer_url(self): | |
119 | + u = User.by_email_address(self.committed.email) | |
120 | + if u: return u.url() | |
121 | + | |
122 | + @LazyProperty | |
123 | + def tree(self): | |
124 | + if self.tree_id is None: | |
125 | + self.tree_id = self.repo.compute_tree(self) | |
126 | + if self.tree_id is None: | |
127 | + return None | |
128 | + t = Tree.query.get(object_id=self.tree_id) | |
129 | + if t is None: | |
130 | + self.tree_id = self.repo.compute_tree(self) | |
131 | + t = Tree.query.get(object_id=self.tree_id) | |
132 | + if t is not None: t.set_context(self) | |
133 | + return t | |
134 | + | |
135 | + @LazyProperty | |
136 | + def summary(self): | |
137 | + message = h.really_unicode(self.message) | |
138 | + first_line = message.split('\n')[0] | |
139 | + return h.text.truncate(first_line, 50) | |
140 | + | |
141 | + def get_path(self, path): | |
142 | + '''Return the blob on the given path''' | |
143 | + if path.startswith('/'): path = path[1:] | |
144 | + path_parts = path.split('/') | |
145 | + return self.tree.get_blob(path_parts[-1], path_parts[:-1]) | |
146 | + | |
147 | + def shorthand_id(self): | |
148 | + return self.repo.shorthand_for_commit(self) | |
149 | + | |
150 | + @LazyProperty | |
151 | + def symbolic_ids(self): | |
152 | + return self.repo.symbolics_for_commit(self) | |
153 | + | |
154 | + def url(self): | |
155 | + return self.repo.url_for_commit(self) | |
156 | + | |
157 | + def log_iter(self, skip, count): | |
158 | + for oids in utils.chunked_iter(commitlog(self._id), QSIZE): | |
159 | + oids = list(oids) | |
160 | + commits = dict( | |
161 | + (ci._id, ci) for ci in self.query.find(dict( | |
162 | + _id={'$in': oids}))) | |
163 | + for oid in oids: | |
164 | + ci = commits[oid] | |
165 | + ci.set_context(self.repo) | |
166 | + yield ci | |
167 | + | |
168 | + def log(self, skip, count): | |
169 | + return list(self.log_iter(skip, count)) | |
170 | + | |
171 | + def count_revisions(self): | |
172 | + result = 0 | |
173 | + for oid in commitlog(self): result += 1 | |
174 | + return result | |
175 | + | |
176 | + def context(self): | |
177 | + result = dict(prev=None, next=None) | |
178 | + if self.parent_ids: | |
179 | + result['prev'] = self.query.get(_id=self.parent_ids[0]) | |
180 | + if self.child_ids: | |
181 | + result['next'] = self.query.get(_id=self.child_ids[0]) | |
182 | + return result | |
183 | + | |
184 | +class Tree(RepoObject): | |
185 | + # Ephemeral attrs | |
186 | + repo=None | |
187 | + commit=None | |
188 | + parent=None | |
189 | + name=None | |
190 | + | |
191 | + def compute_hash(self): | |
192 | + '''Compute a hash based on the contents of the tree. Note that this | |
193 | + hash does not necessarily correspond to any actual DVCS hash. | |
194 | + ''' | |
195 | + lines = ( | |
196 | + [ 'tree' + x.name + x.id for x in self.tree_ids ] | |
197 | + + [ 'blob' + x.name + x.id for x in self.blob_ids ] | |
198 | + + [ x.type + x.name + x.id for x in self.other_ids ]) | |
199 | + sha_obj = sha1() | |
200 | + for line in sorted(lines): | |
201 | + sha_obj.update(line) | |
202 | + return sha_obj.hexdigest() | |
203 | + | |
204 | + def set_context(self, commit_or_tree, name=None): | |
205 | + assert commit_or_tree is not self | |
206 | + self.repo = commit_or_tree.repo | |
207 | + if name: | |
208 | + self.commit = commit_or_tree.commit | |
209 | + self.parent = commit_or_tree | |
210 | + self.name = name | |
211 | + else: | |
212 | + self.commit = commit_or_tree | |
213 | + | |
214 | + def readme(self): | |
215 | + name = None | |
216 | + text = '' | |
217 | + for x in self.blob_ids: | |
218 | + if README_RE.match(x.name): | |
219 | + name = x.name | |
220 | + text = h.really_unicode(self.repo.blob_text(x.id)) | |
221 | + break | |
222 | + if text == '': | |
223 | + text = '<p><em>Empty File</em></p>' | |
224 | + else: | |
225 | + renderer = g.pypeline_markup.renderer(name) | |
226 | + if renderer[1]: | |
227 | + text = g.pypeline_markup.render(name,text) | |
228 | + else: | |
229 | + text = '<pre>%s</pre>' % text | |
230 | + return (name, text) | |
231 | + | |
232 | + def ls(self): | |
233 | + # Load last commit info | |
234 | + oids = [ x.id for x in chain(self.tree_ids, self.object_ids, self.other_ids) ] | |
235 | + lc_index = dict( | |
236 | + (lc.object_id, lc.commit) | |
237 | + for lc in LastCommitDoc.m.find(dict( | |
238 | + repo_id=self.repo._id, | |
239 | + object_id={'$in': oids}))) | |
240 | + results = [] | |
241 | + def _get_last_commit(oid): | |
242 | + lc = lc_index.get(oid) | |
243 | + if lc is None: | |
244 | + lc = dict( | |
245 | + author=None, | |
246 | + author_email=None, | |
247 | + author_url=None, | |
248 | + date=None, | |
249 | + id=None, | |
250 | + href=None, | |
251 | + shortlink=None, | |
252 | + summary=None) | |
253 | + return lc | |
254 | + for x in sorted(self.tree_ids, key=lambda x:x.name): | |
255 | + results.append(dict( | |
256 | + kind='DIR', | |
257 | + name=x.name, | |
258 | + href=x.name + '/', | |
259 | + last_commit=_get_last_commit(x.id))) | |
260 | + for x in sorted(self.blob_ids, key=lambda x:x.name): | |
261 | + results.append(dict( | |
262 | + kind='FILE', | |
263 | + name=x.name, | |
264 | + href=x.name + '/', | |
265 | + last_commit=_get_last_commit(x.id))) | |
266 | + for x in sorted(self.other_ids, key=lambda x:x.name): | |
267 | + results.append(dict( | |
268 | + kind=x.type, | |
269 | + name=x.name, | |
270 | + href=None, | |
271 | + last_commit=_get_last_commit(x.id))) | |
272 | + return results | |
273 | + | |
274 | + def path(self): | |
275 | + if self.parent: | |
276 | + assert self.parent is not self | |
277 | + return self.parent.path() + self.name + '/' | |
278 | + else: | |
279 | + return '/' | |
280 | + | |
281 | + def url(self): | |
282 | + return self.commit.url() + 'tree' + self.path() | |
283 | + | |
284 | + @LazyProperty | |
285 | + def by_name(self): | |
286 | + d = dict((x.name, x) for x in self.other_ids) | |
287 | + d.update( | |
288 | + (x.name, dict(x, type='tree')) | |
289 | + for x in self.tree_ids) | |
290 | + d.update( | |
291 | + (x.name, dict(x, type='blob')) | |
292 | + for x in self.blob_ids) | |
293 | + return d | |
294 | + | |
295 | + def is_blob(self, name): | |
296 | + return self.by_name[name].type == 'blob' | |
297 | + | |
298 | +mapper(Commit, CommitDoc, repository_orm_session) | |
299 | +mapper(Tree, TreeDoc, repository_orm_session) | |
300 | + | |
301 | +def commitlog(commit_id, skip=0, limit=sys.maxint): | |
302 | + | |
303 | + seen = set() | |
304 | + def _visit(commit_id): | |
305 | + if commit_id in seen: return | |
306 | + run = CommitRunDoc.m.get(commit_ids=commit_id) | |
307 | + if run is None: return | |
308 | + index = False | |
309 | + for pos, (oid, time) in enumerate(izip(run.commit_ids, run.commit_times)): | |
310 | + if oid == commit_id: index = True | |
311 | + elif not index: continue | |
312 | + seen.add(oid) | |
313 | + ci_times[oid] = time | |
314 | + if pos+1 < len(run.commit_ids): | |
315 | + ci_parents[oid] = [ run.commit_ids[pos+1] ] | |
316 | + else: | |
317 | + ci_parents[oid] = run.parent_commit_ids | |
318 | + for oid in run.parent_commit_ids: | |
319 | + _visit(oid) | |
320 | + | |
321 | + def _gen_ids(commit_id, skip, limit): | |
322 | + # Traverse the graph in topo order, yielding commit IDs | |
323 | + commits = set([commit_id]) | |
324 | + new_parent = None | |
325 | + while commits and limit: | |
326 | + # next commit is latest commit that's valid to log | |
327 | + if new_parent in commits: | |
328 | + ci = new_parent | |
329 | + else: | |
330 | + ci = max(commits, key=lambda ci:ci_times[ci]) | |
331 | + commits.remove(ci) | |
332 | + if skip: | |
333 | + skip -= 1 | |
334 | + continue | |
335 | + else: | |
336 | + limit -= 1 | |
337 | + yield ci | |
338 | + # remove this commit from its parents children and add any childless | |
339 | + # parents to the 'ready set' | |
340 | + new_parent = None | |
341 | + for oid in ci_parents[ci]: | |
342 | + children = ci_children[oid] | |
343 | + children.discard(ci) | |
344 | + if not children: | |
345 | + commits.add(oid) | |
346 | + new_parent = oid | |
347 | + | |
348 | + # Load all the runs to build a commit graph | |
349 | + ci_times = {} | |
350 | + ci_parents = {} | |
351 | + ci_children = defaultdict(set) | |
352 | + log.info('Build commit graph') | |
353 | + _visit(commit_id) | |
354 | + for oid, parents in ci_parents.iteritems(): | |
355 | + for ci_parent in parents: | |
356 | + ci_children[ci_parent].add(oid) | |
357 | + | |
358 | + return _gen_ids(commit_id, skip, limit) |
@@ -49,6 +49,9 @@ class RepositoryImplementation(object): | ||
49 | 49 | def commit(self, revision): # pragma no cover |
50 | 50 | raise NotImplementedError, 'commit' |
51 | 51 | |
52 | + def all_commit_ids(self): # pragma no cover | |
53 | + raise NotImplementedError, 'all_commit_ids' | |
54 | + | |
52 | 55 | def new_commits(self, all_commits=False): # pragma no cover |
53 | 56 | '''Return a list of native commits in topological order (heads first). |
54 | 57 |
@@ -74,6 +77,10 @@ class RepositoryImplementation(object): | ||
74 | 77 | '''Refresh the data in the commit object 'ci' with data from the repo''' |
75 | 78 | raise NotImplementedError, 'refresh_commit' |
76 | 79 | |
80 | + def refresh_commit_info(self, oid): # pragma no cover | |
81 | + '''Refresh the data in the commit with id oid''' | |
82 | + raise NotImplementedError, 'refresh_commit_info' | |
83 | + | |
77 | 84 | def refresh_tree(self, tree): # pragma no cover |
78 | 85 | '''Refresh the data in the tree object 'tree' with data from the repo''' |
79 | 86 | raise NotImplementedError, 'refresh_tree' |
@@ -105,8 +112,8 @@ class RepositoryImplementation(object): | ||
105 | 112 | '''Return a file-like object that contains the contents of the blob''' |
106 | 113 | raise NotImplementedError, 'open_blob' |
107 | 114 | |
108 | - def shorthand_for_commit(self, commit): | |
109 | - return '[%s]' % commit.object_id[:6] | |
115 | + def shorthand_for_commit(self, oid): | |
116 | + return '[%s]' % oid | |
110 | 117 | |
111 | 118 | def symbolics_for_commit(self, commit): |
112 | 119 | '''Return symbolic branch and tag names for a commit. |
@@ -188,6 +195,10 @@ class Repository(Artifact): | ||
188 | 195 | return self._impl.init() |
189 | 196 | def commit(self, rev): |
190 | 197 | return self._impl.commit(rev) |
198 | + def all_commit_ids(self): | |
199 | + return self._impl.all_commit_ids() | |
200 | + def refresh_commit_info(self, oid, seen): | |
201 | + return self._impl.refresh_commit_info(oid, seen) | |
191 | 202 | def commit_context(self, commit): |
192 | 203 | return self._impl.commit_context(commit) |
193 | 204 | def open_blob(self, blob): |
@@ -3,60 +3,49 @@ import logging | ||
3 | 3 | from collections import defaultdict |
4 | 4 | from itertools import chain, izip |
5 | 5 | from datetime import datetime |
6 | +from cPickle import dumps | |
6 | 7 | |
8 | +import bson | |
7 | 9 | from pylons import c |
8 | 10 | from pymongo.errors import DuplicateKeyError |
9 | 11 | |
10 | 12 | from ming.base import Object |
11 | 13 | |
12 | -from allura import model as M | |
13 | 14 | from allura.lib import helpers as h |
14 | 15 | from allura.lib import utils |
16 | +from allura.model.repo import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc | |
17 | +from allura.model.repo import LastCommitDoc, CommitRunDoc | |
18 | +from allura.model.repo import Commit | |
19 | +from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc | |
15 | 20 | |
16 | 21 | log = logging.getLogger(__name__) |
17 | 22 | |
18 | 23 | QSIZE=100 |
19 | 24 | |
20 | -def dolog(): | |
21 | - h.set_context('test', 'code') | |
22 | - repo = c.app.repo._impl._git | |
23 | - oid = repo.commit(repo.heads[0]).hexsha | |
24 | - log.info('start') | |
25 | - for i, ci in enumerate(commitlog(oid)): | |
26 | - print repr(ci) | |
27 | - log.info('done') | |
28 | - | |
29 | 25 | def main(): |
30 | 26 | if len(sys.argv) > 1: |
31 | 27 | h.set_context('test') |
32 | 28 | c.project.install_app('Git', 'code', 'Code', init_from_url='/home/rick446/src/forge') |
33 | 29 | h.set_context('test', 'code') |
34 | - M.repo.Commit.m.remove({}) | |
35 | - M.repo.Tree.m.remove({}) | |
36 | - M.repo.Trees.m.remove({}) | |
37 | - M.repo.DiffInfo.m.remove({}) | |
38 | - M.repo.LastCommit.m.remove({}) | |
39 | - M.repo.BasicBlock.m.remove({}) | |
40 | - repo = c.app.repo._impl._git | |
41 | - | |
42 | - # Get all commits | |
43 | - seen = set() | |
44 | - all_commit_ids = [] | |
45 | - for head in repo.heads: | |
46 | - for ci in repo.iter_commits(head, topo_order=True): | |
47 | - if ci.binsha in seen: continue | |
48 | - seen.add(ci.binsha) | |
49 | - all_commit_ids.append(ci.hexsha) | |
50 | - | |
51 | - # Skip commits that are already in the DB | |
30 | + CommitDoc.m.remove({}) | |
31 | + TreeDoc.m.remove({}) | |
32 | + TreesDoc.m.remove({}) | |
33 | + DiffInfoDoc.m.remove({}) | |
34 | + LastCommitDoc.m.remove({}) | |
35 | + CommitRunDoc.m.remove({}) | |
36 | + | |
37 | + # Get all commits (repo-specific) | |
38 | + all_commit_ids = list(c.app.repo.all_commit_ids()) | |
39 | + | |
40 | + # Skip commits that are already in the DB (repo-agnostic) | |
52 | 41 | commit_ids = unknown_commit_ids(all_commit_ids) |
53 | 42 | # commit_ids = commit_ids[:500] |
54 | 43 | log.info('Refreshing %d commits', len(commit_ids)) |
55 | 44 | |
56 | - # Refresh commits | |
45 | + # Refresh commits (repo-specific) | |
46 | + seen = set() | |
57 | 47 | for i, oid in enumerate(commit_ids): |
58 | - ci = repo.rev_parse(oid) | |
59 | - refresh_commit_info(ci, seen) | |
48 | + c.app.repo.refresh_commit_info(oid, seen) | |
60 | 49 | if (i+1) % 100 == 0: |
61 | 50 | log.info('Refresh commit info %d: %s', (i+1), oid) |
62 | 51 |
@@ -64,14 +53,14 @@ def main(): | ||
64 | 53 | # Everything below here is repo-agnostic |
65 | 54 | ############################################# |
66 | 55 | |
67 | - refresh_repo(commit_ids, c.app.repo._id) | |
56 | + refresh_repo(commit_ids, c.app.repo) | |
68 | 57 | |
69 | 58 | # Refresh child references |
70 | 59 | seen = set() |
71 | 60 | parents = set() |
72 | 61 | |
73 | 62 | for i, oid in enumerate(commit_ids): |
74 | - ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next() | |
63 | + ci = CommitDoc.m.find(dict(_id=oid), validate=False).next() | |
75 | 64 | refresh_children(ci) |
76 | 65 | seen.add(ci._id) |
77 | 66 | parents.update(ci.parent_ids) |
@@ -79,28 +68,22 @@ def main(): | ||
79 | 68 | log.info('Refresh child (a) info %d: %s', (i+1), ci._id) |
80 | 69 | for j, oid in enumerate(parents-seen): |
81 | 70 | try: |
82 | - ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next() | |
71 | + ci = CommitDoc.m.find(dict(_id=oid), validate=False).next() | |
83 | 72 | except StopIteration: |
84 | 73 | continue |
85 | 74 | refresh_children(ci) |
86 | 75 | if (i + j + 1) % 100 == 0: |
87 | 76 | log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id) |
88 | 77 | |
89 | - # Refresh basic blocks | |
90 | - bbb = BasicBlockBuilder(commit_ids) | |
91 | - bbb.run() | |
92 | - bbb.cleanup() | |
93 | - | |
94 | - # Verify the log | |
95 | - log.info('Logging via basic blocks') | |
96 | - for i, ci in enumerate(commitlog(commit_ids[0])): | |
97 | - pass | |
98 | - log.info('... done (%d commits from %s)', i+1, commit_ids[0]) | |
78 | + # Refresh commit runs | |
79 | + rb = CommitRunBuilder(commit_ids) | |
80 | + rb.run() | |
81 | + rb.cleanup() | |
99 | 82 | |
100 | 83 | # Refresh trees |
101 | 84 | cache = {} |
102 | 85 | for i, oid in enumerate(commit_ids): |
103 | - ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next() | |
86 | + ci = CommitDoc.m.find(dict(_id=oid), validate=False).next() | |
104 | 87 | cache = refresh_commit_trees(ci, cache) |
105 | 88 | if (i+1) % 100 == 0: |
106 | 89 | log.info('Refresh commit trees %d: %s', (i+1), ci._id) |
@@ -108,13 +91,13 @@ def main(): | ||
108 | 91 | # Compute diffs |
109 | 92 | cache = {} |
110 | 93 | for i, oid in enumerate(commit_ids): |
111 | - ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next() | |
94 | + ci = CommitDoc.m.find(dict(_id=oid), validate=False).next() | |
112 | 95 | compute_diffs(c.app.repo._id, cache, ci) |
113 | 96 | if (i+1) % 100 == 0: |
114 | 97 | log.info('Compute diffs %d: %s', (i+1), ci._id) |
115 | 98 | |
116 | 99 | def refresh_commit_trees(ci, cache): |
117 | - trees_doc = M.repo.Trees(dict( | |
100 | + trees_doc = TreesDoc(dict( | |
118 | 101 | _id=ci._id, |
119 | 102 | tree_ids = list(trees(ci.tree_id, cache)))) |
120 | 103 | trees_doc.m.save(safe=False) |
@@ -124,10 +107,10 @@ def refresh_commit_trees(ci, cache): | ||
124 | 107 | return new_cache |
125 | 108 | |
126 | 109 | def refresh_commit_info(ci, seen): |
127 | - if M.repo.Commit.m.find(dict(_id=ci.hexsha)).count() != 0: | |
110 | + if CommitDoc.m.find(dict(_id=ci.hexsha)).count() != 0: | |
128 | 111 | return False |
129 | 112 | try: |
130 | - ci_doc = M.repo.Commit(dict( | |
113 | + ci_doc = CommitDoc(dict( | |
131 | 114 | _id=ci.hexsha, |
132 | 115 | tree_id=ci.tree.hexsha, |
133 | 116 | committed = Object( |
@@ -149,109 +132,128 @@ def refresh_commit_info(ci, seen): | ||
149 | 132 | refresh_tree(ci.tree, seen) |
150 | 133 | return True |
151 | 134 | |
152 | -def refresh_repo(commit_ids, repo_id): | |
135 | +def refresh_repo(commit_ids, repo): | |
153 | 136 | for oids in utils.chunked_iter(commit_ids, QSIZE): |
154 | 137 | oids = list(oids) |
155 | - M.repo.Commit.m.update_partial( | |
138 | + # Create shortlinks and artifactrefs | |
139 | + for oid in oids: | |
140 | + index_id = 'allura.model.repo.Commit#' + oid | |
141 | + ref = ArtifactReferenceDoc(dict( | |
142 | + _id=index_id, | |
143 | + artifact_reference=dict( | |
144 | + cls=dumps(Commit), | |
145 | + project_id=repo.app.config.project_id, | |
146 | + app_config_id=repo.app.config._id, | |
147 | + artifact_id=oid), | |
148 | + references=[])) | |
149 | + link = ShortlinkDoc(dict( | |
150 | + _id=bson.ObjectId(), | |
151 | + ref_id=index_id, | |
152 | + project_id=repo.app.config.project_id, | |
153 | + app_config_id=repo.app.config._id, | |
154 | + link=repo.shorthand_for_commit(oid), | |
155 | + url=repo.url() + 'ci/' + oid + '/')) | |
156 | + ref.m.save(safe=False, validate=False) | |
157 | + link.m.save(safe=False, validate=False) | |
158 | + CommitDoc.m.update_partial( | |
156 | 159 | dict( |
157 | 160 | _id={'$in': oids}, |
158 | - repo_ids={'$ne': repo_id}), | |
159 | - {'$addToSet': dict(repo_ids=repo_id)}, | |
161 | + repo_ids={'$ne': repo._id}), | |
162 | + {'$addToSet': dict(repo_ids=repo._id)}, | |
160 | 163 | multi=True) |
161 | 164 | |
162 | 165 | def refresh_children(ci): |
163 | - M.repo.Commit.m.update_partial( | |
166 | + CommitDoc.m.update_partial( | |
164 | 167 | dict(_id={'$in': ci.parent_ids}), |
165 | 168 | {'$addToSet': dict(child_ids=ci._id)}, |
166 | 169 | multi=True) |
167 | 170 | |
168 | -class BasicBlockBuilder(object): | |
171 | +class CommitRunBuilder(object): | |
169 | 172 | |
170 | 173 | def __init__(self, commit_ids): |
171 | 174 | self.commit_ids = commit_ids |
172 | - self.block_index = {} # by commit ID | |
173 | - self.blocks = {} # by block ID | |
174 | - self.reasons = {} # reasons to stop merging blocks | |
175 | + self.run_index = {} # by commit ID | |
176 | + self.runs = {} # by run ID | |
177 | + self.reasons = {} # reasons to stop merging runs | |
175 | 178 | |
176 | 179 | def run(self): |
177 | 180 | for oids in utils.chunked_iter(self.commit_ids, QSIZE): |
178 | 181 | oids = list(oids) |
179 | - commits = list(M.repo.Commit.m.find(dict(_id={'$in':oids}))) | |
182 | + commits = list(CommitDoc.m.find(dict(_id={'$in':oids}))) | |
180 | 183 | for ci in commits: |
181 | - if ci._id in self.block_index: continue | |
182 | - self.block_index[ci._id] = ci._id | |
183 | - self.blocks[ci._id] = M.repo.BasicBlock(dict( | |
184 | + if ci._id in self.run_index: continue | |
185 | + self.run_index[ci._id] = ci._id | |
186 | + self.runs[ci._id] = CommitRunDoc(dict( | |
184 | 187 | _id=ci._id, |
185 | 188 | parent_commit_ids=ci.parent_ids, |
186 | 189 | commit_ids=[ci._id], |
187 | 190 | commit_times=[ci.authored.date])) |
188 | - self.merge_blocks() | |
189 | - log.info('%d basic blocks', len(self.blocks)) | |
190 | - for bid, bb in sorted(self.blocks.items()): | |
191 | - log.info('%32s: %r', self.reasons.get(bid, 'none'), bb) | |
192 | - for bb in self.blocks.itervalues(): | |
193 | - bb.m.save() | |
194 | - return self.blocks | |
195 | - | |
196 | - def _all_blocks(self): | |
197 | - blocks = {} | |
191 | + self.merge_runs() | |
192 | + log.info('%d runs', len(self.runs)) | |
193 | + for rid, run in sorted(self.runs.items()): | |
194 | + log.info('%32s: %r', self.reasons.get(rid, 'none'), run._id) | |
195 | + for run in self.runs.itervalues(): | |
196 | + run.m.save() | |
197 | + return self.runs | |
198 | + | |
199 | + def _all_runs(self): | |
200 | + runs = {} | |
198 | 201 | for oids in utils.chunked_iter(self.commit_ids, QSIZE): |
199 | 202 | oids = list(oids) |
200 | - for bb in M.repo.BasicBlock.m.find(dict(commit_ids={'$in': oids})): | |
201 | - blocks[bb._id] = bb | |
202 | - seen_bids = set() | |
203 | - blocks = blocks.values() | |
204 | - while blocks: | |
205 | - bb = blocks.pop() | |
206 | - if bb._id in seen_bids: continue | |
207 | - seen_bids.add(bb._id) | |
208 | - yield bb | |
209 | - for bb in M.repo.BasicBlock.m.find( | |
210 | - dict(commit_ids={'$in':bb.parent_commit_ids})): | |
211 | - blocks.append(bb) | |
203 | + for run in CommitRunDoc.m.find(dict(commit_ids={'$in': oids})): | |
204 | + runs[run._id] = run | |
205 | + seen_run_ids = set() | |
206 | + runs = runs.values() | |
207 | + while runs: | |
208 | + run = runs.pop() | |
209 | + if run._id in seen_run_ids: continue | |
210 | + seen_run_ids.add(run._id) | |
211 | + yield run | |
212 | + for run in CommitRunDoc.m.find( | |
213 | + dict(commit_ids={'$in':run.parent_commit_ids})): | |
214 | + runs.append(run) | |
212 | 215 | |
213 | 216 | def cleanup(self): |
214 | - '''Delete non-maximal basic blocks''' | |
215 | - for bb1 in self._all_blocks(): | |
216 | - for bb2 in M.repo.BasicBlock.m.find(dict( | |
217 | - commit_ids=bb1.commit_ids[0])): | |
218 | - if bb2._id == bb1._id: continue | |
219 | - log.info('... delete %r (part of %r)', bb2, bb1) | |
220 | - import pdb; pdb.set_trace() | |
221 | - bb2.m.delete() | |
222 | - | |
223 | - def merge_blocks(self): | |
217 | + '''Delete non-maximal runs''' | |
218 | + for run1 in self._all_runs(): | |
219 | + for run2 in CommitRunDoc.m.find(dict( | |
220 | + commit_ids=run1.commit_ids[0])): | |
221 | + if run1._id == run2._id: continue | |
222 | + log.info('... delete %r (part of %r)', run2, run1) | |
223 | + run2.m.delete() | |
224 | + | |
225 | + def merge_runs(self): | |
224 | 226 | while True: |
225 | - for bid, bb in self.blocks.iteritems(): | |
226 | - if len(bb.parent_commit_ids) != 1: | |
227 | - self.reasons[bid] = '%d parents' % len(bb.parent_commit_ids) | |
227 | + for run_id, run in self.runs.iteritems(): | |
228 | + if len(run.parent_commit_ids) != 1: | |
229 | + self.reasons[run_id] = '%d parents' % len(run.parent_commit_ids) | |
228 | 230 | continue |
229 | - p_oid = bb.parent_commit_ids[0] | |
230 | - p_bid = self.block_index.get(p_oid) | |
231 | - if p_bid is None: | |
232 | - self.reasons[bid] = 'parent commit not found' | |
231 | + p_oid = run.parent_commit_ids[0] | |
232 | + p_run_id = self.run_index.get(p_oid) | |
233 | + if p_run_id is None: | |
234 | + self.reasons[run_id] = 'parent commit not found' | |
233 | 235 | continue |
234 | - p_bb = self.blocks.get(p_bid) | |
235 | - if p_bb is None: | |
236 | - self.reasons[bid] = 'parent block not found' | |
236 | + p_run = self.runs.get(p_run_id) | |
237 | + if p_run is None: | |
238 | + self.reasons[run_id] = 'parent run not found' | |
237 | 239 | continue |
238 | - if p_bb.commit_ids[0] != p_oid: | |
239 | - self.reasons[bid] = 'parent does not start with parent commit' | |
240 | + if p_run.commit_ids[0] != p_oid: | |
241 | + self.reasons[run_id] = 'parent does not start with parent commit' | |
240 | 242 | continue |
241 | - bb.commit_ids += p_bb.commit_ids | |
242 | - bb.commit_times += p_bb.commit_times | |
243 | - bb.parent_commit_ids = p_bb.parent_commit_ids | |
244 | - for oid in p_bb.commit_ids: | |
245 | - self.block_index[oid] = bid | |
243 | + run.commit_ids += p_run.commit_ids | |
244 | + run.commit_times += p_run.commit_times | |
245 | + run.parent_commit_ids = p_run.parent_commit_ids | |
246 | + for oid in p_run.commit_ids: | |
247 | + self.run_index[oid] = run_id | |
246 | 248 | break |
247 | 249 | else: |
248 | 250 | break |
249 | - del self.blocks[p_bid] | |
251 | + del self.runs[p_run_id] | |
250 | 252 | |
251 | 253 | def refresh_tree(t, seen): |
252 | 254 | if t.binsha in seen: return |
253 | 255 | seen.add(t.binsha) |
254 | - doc = M.repo.Tree(dict( | |
256 | + doc = TreeDoc(dict( | |
255 | 257 | _id=t.hexsha, |
256 | 258 | tree_ids=[], |
257 | 259 | blob_ids=[], |
@@ -274,7 +276,7 @@ def trees(id, cache): | ||
274 | 276 | yield id |
275 | 277 | entries = cache.get(id, None) |
276 | 278 | if entries is None: |
277 | - t = M.repo.Tree.m.get(_id=id) | |
279 | + t = TreeDoc.m.get(_id=id) | |
278 | 280 | entries = [ o.id for o in t.tree_ids ] |
279 | 281 | cache[id] = entries |
280 | 282 | for i in entries: |
@@ -284,7 +286,7 @@ def trees(id, cache): | ||
284 | 286 | def unknown_commit_ids(all_commit_ids): |
285 | 287 | result = [] |
286 | 288 | for chunk in utils.chunked_iter(all_commit_ids, QSIZE): |
287 | - q = M.repo.Commit.m.find(_id={'$in':chunk}) | |
289 | + q = CommitDoc.m.find(_id={'$in':chunk}) | |
288 | 290 | known_commit_ids = set(ci._id for ci in q) |
289 | 291 | result += [ oid for oid in chunk if oid not in known_commit_ids ] |
290 | 292 | return result |
@@ -298,20 +300,20 @@ def compute_diffs(repo_id, tree_cache, rhs_ci): | ||
298 | 300 | for xx in _walk_tree(tree_index[x.id], tree_index): |
299 | 301 | yield xx |
300 | 302 | |
301 | - rhs_tree_ids = M.repo.Trees.m.get(_id=rhs_ci._id).tree_ids | |
303 | + rhs_tree_ids = TreesDoc.m.get(_id=rhs_ci._id).tree_ids | |
302 | 304 | if rhs_ci.parent_ids: |
303 | - lhs_ci = M.repo.Commit.m.get(_id=rhs_ci.parent_ids[0]) | |
305 | + lhs_ci = CommitDoc.m.get(_id=rhs_ci.parent_ids[0]) | |
304 | 306 | else: |
305 | 307 | lhs_ci = None |
306 | 308 | if lhs_ci is not None: |
307 | - lhs_tree_ids = M.repo.Trees.m.get(_id=lhs_ci._id).tree_ids | |
309 | + lhs_tree_ids = TreesDoc.m.get(_id=lhs_ci._id).tree_ids | |
308 | 310 | else: |
309 | 311 | lhs_tree_ids = [] |
310 | 312 | new_tree_ids = [ |
311 | 313 | tid for tid in chain(lhs_tree_ids, rhs_tree_ids) |
312 | 314 | if tid not in tree_cache ] |
313 | 315 | tree_index = dict( |
314 | - (t._id, t) for t in M.repo.Tree.m.find(dict(_id={'$in': new_tree_ids}),validate=False)) | |
316 | + (t._id, t) for t in TreeDoc.m.find(dict(_id={'$in': new_tree_ids}),validate=False)) | |
315 | 317 | tree_index.update(tree_cache) |
316 | 318 | rhs_tree_ids_set = set(rhs_tree_ids) |
317 | 319 | tree_cache.clear() |
@@ -328,83 +330,17 @@ def compute_diffs(repo_id, tree_cache, rhs_ci): | ||
328 | 330 | dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id)) |
329 | 331 | # Set last commit info |
330 | 332 | if rhs_id is not None: |
331 | - M.repo.LastCommit.set_last_commit(repo_id, rhs_id, rhs_ci) | |
333 | + _set_last_commit(repo_id, rhs_id, rhs_ci) | |
332 | 334 | rhs_tree = tree_index.get(rhs_id, None) |
333 | 335 | if rhs_tree is not None: |
334 | 336 | for oid in _walk_tree(rhs_tree, tree_index): |
335 | - M.repo.LastCommit.set_last_commit(repo_id, oid, rhs_ci) | |
336 | - di = M.repo.DiffInfo(dict( | |
337 | + _set_last_commit(repo_id, oid, rhs_ci) | |
338 | + di = DiffInfoDoc(dict( | |
337 | 339 | _id=rhs_ci._id, |
338 | 340 | differences=differences)) |
339 | 341 | di.m.save() |
340 | 342 | return tree_cache |
341 | 343 | |
342 | -def commitlog(commit_id, skip=0, limit=sys.maxint): | |
343 | - | |
344 | - seen = set() | |
345 | - def _visit(commit_id): | |
346 | - if commit_id in seen: return | |
347 | - bb = M.repo.BasicBlock.m.get(commit_ids=commit_id) | |
348 | - if bb is None: return | |
349 | - index = False | |
350 | - for pos, (oid, time) in enumerate(izip(bb.commit_ids, bb.commit_times)): | |
351 | - if oid == commit_id: index = True | |
352 | - elif not index: continue | |
353 | - seen.add(oid) | |
354 | - ci_times[oid] = time | |
355 | - if pos+1 < len(bb.commit_ids): | |
356 | - ci_parents[oid] = [ bb.commit_ids[pos+1] ] | |
357 | - else: | |
358 | - ci_parents[oid] = bb.parent_commit_ids | |
359 | - for oid in bb.parent_commit_ids: | |
360 | - _visit(oid) | |
361 | - | |
362 | - def _gen_ids(commit_id, skip, limit): | |
363 | - # Traverse the graph in topo order, yielding commit IDs | |
364 | - commits = set([commit_id]) | |
365 | - new_parent = None | |
366 | - while commits and limit: | |
367 | - # next commit is latest commit that's valid to log | |
368 | - if new_parent in commits: | |
369 | - ci = new_parent | |
370 | - else: | |
371 | - ci = max(commits, key=lambda ci:ci_times[ci]) | |
372 | - commits.remove(ci) | |
373 | - if skip: | |
374 | - skip -= 1 | |
375 | - continue | |
376 | - else: | |
377 | - limit -= 1 | |
378 | - yield ci | |
379 | - # remove this commit from its parents children and add any childless | |
380 | - # parents to the 'ready set' | |
381 | - new_parent = None | |
382 | - for oid in ci_parents[ci]: | |
383 | - children = ci_children[oid] | |
384 | - children.discard(ci) | |
385 | - if not children: | |
386 | - commits.add(oid) | |
387 | - new_parent = oid | |
388 | - | |
389 | - # Load all the blocks to build a commit graph | |
390 | - ci_times = {} | |
391 | - ci_parents = {} | |
392 | - ci_children = defaultdict(set) | |
393 | - log.info('Build commit graph') | |
394 | - _visit(commit_id) | |
395 | - for oid, parents in ci_parents.iteritems(): | |
396 | - for ci_parent in parents: | |
397 | - ci_children[ci_parent].add(oid) | |
398 | - | |
399 | - # Convert oids to commit objects | |
400 | - log.info('Traverse commit graph') | |
401 | - for oids in utils.chunked_iter(_gen_ids(commit_id, skip, limit), QSIZE): | |
402 | - oids = list(oids) | |
403 | - index = dict( | |
404 | - (ci._id, ci) for ci in M.repo.Commit.m.find(dict(_id={'$in': oids}))) | |
405 | - for oid in oids: | |
406 | - yield index[oid] | |
407 | - | |
408 | 344 | def _diff_trees(lhs, rhs, index, *path): |
409 | 345 | def _fq(name): |
410 | 346 | return '/'.join(reversed( |
@@ -441,6 +377,24 @@ def _diff_trees(lhs, rhs, index, *path): | ||
441 | 377 | for name, id in rhs_blob_ids.items(): |
442 | 378 | yield (_fq(name), None, id) |
443 | 379 | |
380 | +def _set_last_commit(repo_id, oid, commit): | |
381 | + lc = LastCommitDoc(dict( | |
382 | + _id='%s:%s' % (repo_id, oid), | |
383 | + repo_id=repo_id, | |
384 | + object_id=oid, | |
385 | + commit_info=dict( | |
386 | + id=commit._id, | |
387 | + author=commit.authored.name, | |
388 | + author_email=commit.authored.email, | |
389 | + date=commit.authored.date, | |
390 | + # author_url=commit.author_url, | |
391 | + # href=commit.url(), | |
392 | + # shortlink=commit.shorthand_id(), | |
393 | + # summary=commit.summary | |
394 | + ))) | |
395 | + lc.m.save(safe=False) | |
396 | + return lc | |
397 | + | |
444 | 398 | if __name__ == '__main__': |
445 | 399 | main() |
446 | 400 | # dolog() |
@@ -117,6 +117,14 @@ class GitImplementation(M.RepositoryImplementation): | ||
117 | 117 | result.set_context(self._repo) |
118 | 118 | return result |
119 | 119 | |
120 | + def all_commit_ids(self): | |
121 | + seen = set() | |
122 | + for head in self._git.heads: | |
123 | + for ci in self._git.iter_commits(head, topo_order=True): | |
124 | + if ci.binsha in seen: continue | |
125 | + seen.add(ci.binsha) | |
126 | + yield ci.hexsha | |
127 | + | |
120 | 128 | def new_commits(self, all_commits=False): |
121 | 129 | commits = list(self._git.iter_commits(topo_order=True)) |
122 | 130 | if all_commits: return commits |
@@ -184,6 +192,57 @@ class GitImplementation(M.RepositoryImplementation): | ||
184 | 192 | self._build_manifest(native_ci) |
185 | 193 | self.refresh_tree(root_entry) |
186 | 194 | |
195 | + def refresh_commit_info(self, oid, seen): | |
196 | + from allura.model.repo import CommitDoc | |
197 | + if CommitDoc.m.find(dict(_id=oid)).count(): | |
198 | + return False | |
199 | + try: | |
200 | + ci = self._git.rev_parse(oid) | |
201 | + ci_doc = CommitDoc(dict( | |
202 | + _id=ci.hexsha, | |
203 | + tree_id=ci.tree.hexsha, | |
204 | + committed = Object( | |
205 | + name=h.really_unicode(ci.committer.name), | |
206 | + email=h.really_unicode(ci.committer.email), | |
207 | + date=datetime.utcfromtimestamp( | |
208 | + ci.committed_date-ci.committer_tz_offset)), | |
209 | + authored = Object( | |
210 | + name=h.really_unicode(ci.author.name), | |
211 | + email=h.really_unicode(ci.author.email), | |
212 | + date=datetime.utcfromtimestamp( | |
213 | + ci.authored_date-ci.author_tz_offset)), | |
214 | + message=h.really_unicode(ci.message or ''), | |
215 | + child_ids=[], | |
216 | + parent_ids = [ p.hexsha for p in ci.parents ])) | |
217 | + ci_doc.m.insert(safe=True) | |
218 | + except DuplicateKeyError: | |
219 | + return False | |
220 | + self.refresh_tree_info(ci.tree, seen) | |
221 | + return True | |
222 | + | |
223 | + def refresh_tree_info(self, tree, seen): | |
224 | + from allura.model.repo import TreeDoc | |
225 | + if tree.binsha in seen: return | |
226 | + seen.add(tree.binsha) | |
227 | + doc = TreeDoc(dict( | |
228 | + _id=tree.hexsha, | |
229 | + tree_ids=[], | |
230 | + blob_ids=[], | |
231 | + other_ids=[])) | |
232 | + for o in tree: | |
233 | + obj = Object( | |
234 | + name=h.really_unicode(o.name), | |
235 | + id=o.hexsha) | |
236 | + if o.type == 'tree': | |
237 | + self.refresh_tree_info(o, seen) | |
238 | + doc.tree_ids.append(obj) | |
239 | + elif o.type == 'blob': | |
240 | + doc.blob_ids.append(obj) | |
241 | + else: | |
242 | + obj.type = o.type | |
243 | + doc.other_ids.append(obj) | |
244 | + doc.m.save(safe=False) | |
245 | + | |
187 | 246 | def _build_manifest(self, native_ci): |
188 | 247 | '''Build the manifest for this commit (mapof all paths to trees/blobs) |
189 | 248 |